2022-04-08 22:40:01 +01:00
|
|
|
from collections import OrderedDict
|
|
|
|
from progress.bar import IncrementalBar
|
|
|
|
import math
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
2022-04-16 09:43:51 +01:00
|
|
|
from bookstack import Bookstack
|
2022-04-08 22:40:01 +01:00
|
|
|
from imdb_utils import IMDbUtils
|
2022-12-09 20:45:50 +00:00
|
|
|
from markdown_utils import MarkdownUtils
|
2022-04-08 22:40:01 +01:00
|
|
|
from vcinema_utils import VCinemaUtils
|
|
|
|
|
|
|
|
# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores
|
2022-04-15 17:29:19 +01:00
|
|
|
PAGE_ID = 23
|
2022-04-08 22:40:01 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_keyword_scores(viewings):
|
|
|
|
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
|
|
|
|
|
|
|
|
for keyword, viewings in viewings_filtered_keyword.items():
|
|
|
|
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
|
|
|
|
|
|
|
min_vcinema_count = 2
|
|
|
|
min_imdb_count = 4
|
|
|
|
|
|
|
|
add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)
|
|
|
|
add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)
|
|
|
|
|
|
|
|
return viewings_filtered_keyword
|
|
|
|
|
|
|
|
|
2022-04-16 09:43:51 +01:00
|
|
|
def update_page(token_id, token_secret, keyword_data):
|
|
|
|
page = build_page(keyword_data)
|
|
|
|
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
|
|
|
|
|
|
|
2022-04-08 22:40:01 +01:00
|
|
|
def add_keyword_totals(keywords, min_vcinema_count):
|
|
|
|
keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])
|
|
|
|
|
|
|
|
with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:
|
|
|
|
with ThreadPoolExecutor(6) as executor:
|
|
|
|
for keyword, data in keywords.items():
|
|
|
|
if len(data['vcinema_films']) >= min_vcinema_count:
|
|
|
|
executor.submit(add_keyword_total, keyword, keywords, bar)
|
|
|
|
|
|
|
|
|
|
|
|
def add_keyword_total(keyword, keywords, progress_bar=None):
|
|
|
|
keyword_total = IMDbUtils.get_keyword_count(keyword)
|
|
|
|
|
|
|
|
keywords[keyword]['total'] = keyword_total
|
|
|
|
|
|
|
|
if progress_bar is not None:
|
|
|
|
progress_bar.next()
|
|
|
|
|
|
|
|
|
|
|
|
def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):
|
|
|
|
for keyword in keyword_data.keys():
|
|
|
|
if 'total' in keyword_data[keyword]:
|
|
|
|
vcinema_count = len(keyword_data[keyword]['vcinema_films'])
|
|
|
|
total_count = keyword_data[keyword]['total']
|
|
|
|
|
|
|
|
if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:
|
|
|
|
score = vcinema_count / math.log(total_count)
|
|
|
|
|
|
|
|
keyword_data[keyword]['score'] = score
|
|
|
|
|
|
|
|
|
2022-04-08 23:27:09 +01:00
|
|
|
def build_page(keyword_data, minimum_score=1.0):
|
2022-04-08 22:40:01 +01:00
|
|
|
keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}
|
|
|
|
keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))
|
|
|
|
|
2022-12-09 20:45:50 +00:00
|
|
|
table = MarkdownUtils.MarkdownTable(["Keyword", "Number of VCinema Films", "Total IMDb entries", "Score"])
|
2022-04-08 22:40:01 +01:00
|
|
|
|
|
|
|
for keyword, data in keyword_data.items():
|
2022-12-10 17:10:24 +00:00
|
|
|
number_of_vcinema_films = len(data['vcinema_films'])
|
|
|
|
number_of_imdb_films = data['total']
|
|
|
|
keyword_score = round(data['score'], 3)
|
|
|
|
|
|
|
|
row_data = [keyword, number_of_vcinema_films, number_of_imdb_films, keyword_score]
|
2022-04-08 22:40:01 +01:00
|
|
|
|
2022-12-09 20:45:50 +00:00
|
|
|
table.add_row(row_data)
|
2022-04-08 22:40:01 +01:00
|
|
|
|
|
|
|
return table
|