diff --git a/update_films_by_year_page.py b/update_films_by_year_page.py
index 2c1a313..c921b11 100644
--- a/update_films_by_year_page.py
+++ b/update_films_by_year_page.py
@@ -1,5 +1,7 @@
import argparse
from collections import OrderedDict
+from progress.bar import Bar
+import sys
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
@@ -9,10 +11,18 @@ def build_table(films_by_year):
films_by_year_sorted = OrderedDict(sorted(films_by_year.items(), key=lambda t: t[0], reverse=True))
page_table = "| Year | Films |\n| - | - |\n"
- for year in films_by_year_sorted.keys():
- page_table += str(year) + " | "
- page_table += "
".join("[{}](https://www.imdb.com/title/tt{}/)".format(film['title'], film['imdb_id']) for film in films_by_year_sorted[year])
- page_table += "\n"
+
+ sys.stdout.write("\rGenerating table")
+ sys.stdout.flush()
+
+ viewing_count = len(films_by_year_sorted)
+
+ with Bar('Generating table', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar:
+ for year in films_by_year_sorted.keys():
+ page_table += str(year) + " | "
+ page_table += "
".join("[{}](https://www.imdb.com/title/tt{}/)".format(film['title'], film['imdb_id']) for film in films_by_year_sorted[year])
+ page_table += "\n"
+ bar.next()
return page_table
@@ -21,13 +31,10 @@ def update_films_by_year_page(token_id, token_secret):
print("Retrieving VCinema viewings")
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
- print("Retrieving movie data")
VCinemaUtils.add_imdb_data_to_viewings(viewings, ['year'])
- print("Processing viewing data")
viewings_by_year = VCinemaUtils.filter_viewings(viewings, 'year')
- print("Generating table")
film_by_year_table = build_table(viewings_by_year)
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-release-year
diff --git a/vcinema_utils/VCinemaUtils.py b/vcinema_utils/VCinemaUtils.py
index 0adaf17..dc98bcb 100644
--- a/vcinema_utils/VCinemaUtils.py
+++ b/vcinema_utils/VCinemaUtils.py
@@ -1,14 +1,17 @@
+from collections import Counter
+from concurrent.futures import ThreadPoolExecutor, as_completed
from progress.bar import Bar
+import sys
from imdb_utils import IMDbUtils
-from wiki_utils import WikiUtils
+from bookstack import Bookstack
JACKNET_WIKI_URL = "https://wiki.jacknet.io"
def get_viewings_csv_attachment_id(token_id, token_secret):
- attachments = WikiUtils.get_attachments(JACKNET_WIKI_URL, token_id, token_secret)
+ attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret)
# Page ID of "https://wiki.jacknet.io/books/vcinema/page/csv"
page_id = 11
@@ -17,43 +20,87 @@ def get_viewings_csv_attachment_id(token_id, token_secret):
return next((x['id'] for x in attachments if x['uploaded_to'] == page_id and x['name'] == viewings_csv_file_name), None)
-def get_vcinema_viewings(token_id, token_secret):
+def get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=True):
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
- viewings_csv = WikiUtils.get_attachment_contents(attachment_id, JACKNET_WIKI_URL, token_id, token_secret)
+ viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
viewings_csv = viewings_csv.decode("utf-8")
viewings_csv_rows = viewings_csv.strip().split("\n")
headers = viewings_csv_rows.pop(0).split(",")
viewings = [dict(zip(headers, row.split(","))) for row in viewings_csv_rows]
+ if combine_repeat_viewings:
+ watch_counts = Counter([x['imdb_id'] for x in viewings])
+ repeat_watches = [k for k, v in watch_counts.items() if v > 1]
+
+ for film in repeat_watches:
+ viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
+
+ first_watch = viewings[viewing_indexes[0]]
+ first_watch['date_watched'] = [first_watch['date_watched']]
+
+ for index in viewing_indexes[1::]:
+ first_watch['date_watched'].append(viewings[index]['date_watched'])
+
+ for index in reversed(viewing_indexes[1::]):
+ viewings.pop(index)
+
return viewings
-def add_imdb_data_to_viewings(viewings, field_name):
+def get_imdb(imdb_id, bar):
+ imdb_entry = IMDbUtils.get_movie(imdb_id)
+
+ bar.next()
+
+ return imdb_entry
+
+
+def add_imdb_data_to_viewings(viewings, field_names):
+ sys.stdout.write("\rRetrieving movie data")
+ sys.stdout.flush()
+
viewing_count = len(viewings)
- with Bar('Processing', max=viewing_count) as bar:
- bar.message = "Processing"
- bar.suffix = '%(percent).1f%% - %(eta)ds'
+ with Bar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar:
+ with ThreadPoolExecutor(4) as executor:
+ future_to_url = {executor.submit(get_imdb, viewing['imdb_id'], bar) for viewing in viewings}
- for (viewing_num, viewing) in enumerate(viewings):
- imdb_entry = IMDbUtils.get_movie(viewing['imdb_id'])
+ for future in as_completed(future_to_url):
+ imdb_data = future.result()
- viewing[field_name] = imdb_entry[field_name]
- bar.next()
- bar.finish()
+ for viewing in viewings:
+ if viewing['imdb_id'] == imdb_data.movieID:
+ for field_name in field_names:
+ if field_name in imdb_data:
+ viewing[field_name] = imdb_data[field_name]
-def filter_viewings(viewings, filter_field, remove_duplicates=True):
+def filter_viewings(viewings, filter_field):
+ sys.stdout.write("\rProcessing viewing data")
+ sys.stdout.flush()
+
viewings_filtered = {}
- for viewing in viewings:
- viewing_field = viewing[filter_field]
- if viewing_field in viewings_filtered.keys():
- if not remove_duplicates or not any(x['imdb_id'] == viewing['imdb_id'] for x in viewings_filtered[viewing_field]):
- viewings_filtered[viewing_field] += [viewing]
- else:
- viewings_filtered[viewing[filter_field]] = [viewing]
+ viewing_count = len(viewings)
+
+ with Bar('Processing viewing data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar:
+ for viewing in viewings:
+ if filter_field in viewing:
+ viewing_field = viewing[filter_field]
+ if isinstance(viewing_field, list):
+ for fve in list(viewing_field):
+ if fve in viewings_filtered.keys():
+ viewings_filtered[fve] += [viewing]
+ else:
+ viewings_filtered[fve] = [viewing]
+ else:
+ if viewing_field in viewings_filtered.keys():
+ viewings_filtered[viewing_field] += [viewing]
+ else:
+ viewings_filtered[viewing_field] = [viewing]
+ bar.next()
+
return viewings_filtered