from collections import Counter from concurrent.futures import ThreadPoolExecutor, as_completed from progress.bar import Bar import sys import functools from imdb_utils import IMDbUtils from bookstack import Bookstack JACKNET_WIKI_URL = "https://wiki.jacknet.io" def get_viewings_csv_attachment_id(token_id, token_secret): attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret) # Page ID of "https://wiki.jacknet.io/books/vcinema/page/csv" page_id = 11 viewings_csv_file_name = "vcinema.csv" return next((x['id'] for x in attachments if x['uploaded_to'] == page_id and x['name'] == viewings_csv_file_name), None) def get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=True): attachment_id = get_viewings_csv_attachment_id(token_id, token_secret) viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id) viewings_csv = viewings_csv.decode("utf-8") viewings_csv_rows = viewings_csv.strip().split("\n") headers = viewings_csv_rows.pop(0).split(",") viewings = [dict(zip(headers, row.split(","))) for row in viewings_csv_rows] if combine_repeat_viewings: watch_counts = Counter([x['imdb_id'] for x in viewings]) repeat_watches = [k for k, v in watch_counts.items() if v > 1] for film in repeat_watches: viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film] first_watch = viewings[viewing_indexes[0]] first_watch['date_watched'] = [first_watch['date_watched']] for index in viewing_indexes[1::]: first_watch['date_watched'].append(viewings[index]['date_watched']) for index in reversed(viewing_indexes[1::]): viewings.pop(index) return viewings def increment_progressbar(bar, _): bar.next() def add_imdb_data_to_viewings(viewings, field_names, progressbar=None): with ThreadPoolExecutor(4) as executor: future_to_url = {executor.submit(IMDbUtils.get_movie, viewing['imdb_id']) for viewing in viewings} if progressbar is not None: for this_future in future_to_url: this_future.add_done_callback(functools.partial(increment_progressbar, progressbar)) for future in as_completed(future_to_url): imdb_data = future.result() for viewing in viewings: if viewing['imdb_id'] == imdb_data.movieID: for field_name in field_names: if field_name in imdb_data: viewing[field_name] = imdb_data[field_name] def filter_viewings(viewings, filter_field): sys.stdout.write("\rProcessing viewing data") sys.stdout.flush() viewings_filtered = {} viewing_count = len(viewings) with Bar('Processing viewing data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining') as bar: for viewing in viewings: if filter_field in viewing: viewing_field = viewing[filter_field] if isinstance(viewing_field, list): for fve in list(viewing_field): if fve in viewings_filtered.keys(): viewings_filtered[fve] += [viewing] else: viewings_filtered[fve] = [viewing] else: if viewing_field in viewings_filtered.keys(): viewings_filtered[viewing_field] += [viewing] else: viewings_filtered[viewing_field] = [viewing] bar.next() return viewings_filtered