2022-02-20 21:26:38 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from progress.bar import Bar
|
|
|
|
|
|
|
|
from imdb_utils import IMDbUtils
|
|
|
|
from wiki_utils import WikiUtils
|
|
|
|
|
|
|
|
|
|
|
|
def get_vcinema_viewings(token_id, token_secret):
|
|
|
|
# Page ID of /Vcinema/CSV
|
|
|
|
page_id = 11
|
|
|
|
|
|
|
|
wiki_base_url = "https://wiki.jacknet.io"
|
|
|
|
|
|
|
|
html_page = WikiUtils.get_page_export_html(page_id, wiki_base_url, token_id, token_secret)
|
|
|
|
|
|
|
|
soup = BeautifulSoup(html_page, 'html.parser')
|
|
|
|
elements = soup.find("code").text.strip().split("\n")
|
|
|
|
headers = elements.pop(0).split(",")
|
|
|
|
viewings = [dict(zip(headers, row.split(","))) for row in elements]
|
|
|
|
|
|
|
|
return viewings
|
|
|
|
|
|
|
|
|
|
|
|
def add_imdb_data_to_viewings(viewings, field_name):
|
|
|
|
viewing_count = len(viewings)
|
|
|
|
|
|
|
|
with Bar('Processing', max=viewing_count) as bar:
|
|
|
|
bar.message = "Processing"
|
|
|
|
bar.suffix = '%(percent).1f%% - %(eta)ds'
|
|
|
|
|
|
|
|
for (viewing_num, viewing) in enumerate(viewings):
|
|
|
|
imdb_entry = IMDbUtils.get_movie(viewing['imdb_id'])
|
|
|
|
|
|
|
|
viewing[field_name] = imdb_entry[field_name]
|
|
|
|
bar.next()
|
|
|
|
bar.finish()
|
|
|
|
|
|
|
|
|
|
|
|
def filter_viewings(viewings, pivot_field, remove_duplicates=True):
|
|
|
|
viewings_filtered = {}
|
|
|
|
|
|
|
|
for viewing in viewings:
|
|
|
|
viewing_field = viewing[pivot_field]
|
|
|
|
if viewing_field in viewings_filtered.keys():
|
2022-02-20 21:31:02 +00:00
|
|
|
if not remove_duplicates or (remove_duplicates and not any(x['imdb_id'] == viewing['imdb_id'] for x in viewings_filtered[viewing_field])):
|
|
|
|
viewings_filtered[viewing_field] += [viewing]
|
2022-02-20 21:26:38 +00:00
|
|
|
else:
|
|
|
|
viewings_filtered[viewing[pivot_field]] = [viewing]
|
|
|
|
|
|
|
|
return viewings_filtered
|