diff --git a/update_films_by_year_page.py b/update_films_by_year_page.py index 2c1a313..c921b11 100644 --- a/update_films_by_year_page.py +++ b/update_films_by_year_page.py @@ -1,5 +1,7 @@ import argparse from collections import OrderedDict +from progress.bar import Bar +import sys from bookstack import Bookstack from vcinema_utils import VCinemaUtils @@ -9,10 +11,18 @@ def build_table(films_by_year): films_by_year_sorted = OrderedDict(sorted(films_by_year.items(), key=lambda t: t[0], reverse=True)) page_table = "| Year | Films |\n| - | - |\n" - for year in films_by_year_sorted.keys(): - page_table += str(year) + " | " - page_table += "
".join("[{}](https://www.imdb.com/title/tt{}/)".format(film['title'], film['imdb_id']) for film in films_by_year_sorted[year]) - page_table += "\n" + + sys.stdout.write("\rGenerating table") + sys.stdout.flush() + + viewing_count = len(films_by_year_sorted) + + with Bar('Generating table', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar: + for year in films_by_year_sorted.keys(): + page_table += str(year) + " | " + page_table += "
".join("[{}](https://www.imdb.com/title/tt{}/)".format(film['title'], film['imdb_id']) for film in films_by_year_sorted[year]) + page_table += "\n" + bar.next() return page_table @@ -21,13 +31,10 @@ def update_films_by_year_page(token_id, token_secret): print("Retrieving VCinema viewings") viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret) - print("Retrieving movie data") VCinemaUtils.add_imdb_data_to_viewings(viewings, ['year']) - print("Processing viewing data") viewings_by_year = VCinemaUtils.filter_viewings(viewings, 'year') - print("Generating table") film_by_year_table = build_table(viewings_by_year) # Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-release-year diff --git a/vcinema_utils/VCinemaUtils.py b/vcinema_utils/VCinemaUtils.py index 0adaf17..dc98bcb 100644 --- a/vcinema_utils/VCinemaUtils.py +++ b/vcinema_utils/VCinemaUtils.py @@ -1,14 +1,17 @@ +from collections import Counter +from concurrent.futures import ThreadPoolExecutor, as_completed from progress.bar import Bar +import sys from imdb_utils import IMDbUtils -from wiki_utils import WikiUtils +from bookstack import Bookstack JACKNET_WIKI_URL = "https://wiki.jacknet.io" def get_viewings_csv_attachment_id(token_id, token_secret): - attachments = WikiUtils.get_attachments(JACKNET_WIKI_URL, token_id, token_secret) + attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret) # Page ID of "https://wiki.jacknet.io/books/vcinema/page/csv" page_id = 11 @@ -17,43 +20,87 @@ def get_viewings_csv_attachment_id(token_id, token_secret): return next((x['id'] for x in attachments if x['uploaded_to'] == page_id and x['name'] == viewings_csv_file_name), None) -def get_vcinema_viewings(token_id, token_secret): +def get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=True): attachment_id = get_viewings_csv_attachment_id(token_id, token_secret) - viewings_csv = WikiUtils.get_attachment_contents(attachment_id, JACKNET_WIKI_URL, token_id, token_secret) + viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id) viewings_csv = viewings_csv.decode("utf-8") viewings_csv_rows = viewings_csv.strip().split("\n") headers = viewings_csv_rows.pop(0).split(",") viewings = [dict(zip(headers, row.split(","))) for row in viewings_csv_rows] + if combine_repeat_viewings: + watch_counts = Counter([x['imdb_id'] for x in viewings]) + repeat_watches = [k for k, v in watch_counts.items() if v > 1] + + for film in repeat_watches: + viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film] + + first_watch = viewings[viewing_indexes[0]] + first_watch['date_watched'] = [first_watch['date_watched']] + + for index in viewing_indexes[1::]: + first_watch['date_watched'].append(viewings[index]['date_watched']) + + for index in reversed(viewing_indexes[1::]): + viewings.pop(index) + return viewings -def add_imdb_data_to_viewings(viewings, field_name): +def get_imdb(imdb_id, bar): + imdb_entry = IMDbUtils.get_movie(imdb_id) + + bar.next() + + return imdb_entry + + +def add_imdb_data_to_viewings(viewings, field_names): + sys.stdout.write("\rRetrieving movie data") + sys.stdout.flush() + viewing_count = len(viewings) - with Bar('Processing', max=viewing_count) as bar: - bar.message = "Processing" - bar.suffix = '%(percent).1f%% - %(eta)ds' + with Bar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar: + with ThreadPoolExecutor(4) as executor: + future_to_url = {executor.submit(get_imdb, viewing['imdb_id'], bar) for viewing in viewings} - for (viewing_num, viewing) in enumerate(viewings): - imdb_entry = IMDbUtils.get_movie(viewing['imdb_id']) + for future in as_completed(future_to_url): + imdb_data = future.result() - viewing[field_name] = imdb_entry[field_name] - bar.next() - bar.finish() + for viewing in viewings: + if viewing['imdb_id'] == imdb_data.movieID: + for field_name in field_names: + if field_name in imdb_data: + viewing[field_name] = imdb_data[field_name] -def filter_viewings(viewings, filter_field, remove_duplicates=True): +def filter_viewings(viewings, filter_field): + sys.stdout.write("\rProcessing viewing data") + sys.stdout.flush() + viewings_filtered = {} - for viewing in viewings: - viewing_field = viewing[filter_field] - if viewing_field in viewings_filtered.keys(): - if not remove_duplicates or not any(x['imdb_id'] == viewing['imdb_id'] for x in viewings_filtered[viewing_field]): - viewings_filtered[viewing_field] += [viewing] - else: - viewings_filtered[viewing[filter_field]] = [viewing] + viewing_count = len(viewings) + + with Bar('Processing viewing data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar: + for viewing in viewings: + if filter_field in viewing: + viewing_field = viewing[filter_field] + if isinstance(viewing_field, list): + for fve in list(viewing_field): + if fve in viewings_filtered.keys(): + viewings_filtered[fve] += [viewing] + else: + viewings_filtered[fve] = [viewing] + else: + if viewing_field in viewings_filtered.keys(): + viewings_filtered[viewing_field] += [viewing] + else: + viewings_filtered[viewing_field] = [viewing] + bar.next() + return viewings_filtered