Compare commits
4 Commits
master
...
task/refac
Author | SHA1 | Date |
---|---|---|
Sarah | 732b32f39b | |
Sarah | 08c22b1277 | |
Sarah | 15f04eabd4 | |
Sarah | 2cfee5c23a |
|
@ -1,5 +1,5 @@
|
|||
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
|
||||
from vcinema_utils import VCinemaUtils
|
||||
from vcinema_utils.VCinemaUtils import *
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
@ -12,7 +12,7 @@ def update_wiki(token_id, token_secret, update_csv, pages):
|
|||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
||||
|
||||
print("Getting viewings")
|
||||
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
||||
films = get_vcinema_films(token_id, token_secret)
|
||||
|
||||
update_films_by_year = 'years' in pages
|
||||
update_films_by_country = 'countries' in pages
|
||||
|
@ -30,26 +30,26 @@ def update_wiki(token_id, token_secret, update_csv, pages):
|
|||
if update_film_references or update_hidden_themes or update_keyword_scores:
|
||||
data_fields.append("keywords")
|
||||
|
||||
viewing_count = len(viewings)
|
||||
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
|
||||
films_count = len(films)
|
||||
with IncrementalBar('Retrieving movie data', max=films_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||
add_imdb_data_to_films(films, data_fields, bar)
|
||||
|
||||
print("Processing viewing data")
|
||||
|
||||
if update_films_by_year:
|
||||
films_by_year = FilmsByYear.get_films_by_year(viewings)
|
||||
films_by_year = FilmsByYear.get_films_by_year(films)
|
||||
FilmsByYear.update_page(token_id, token_secret, films_by_year)
|
||||
if update_films_by_country:
|
||||
films_by_country = FilmsByCountry.get_films_by_country(viewings)
|
||||
films_by_country = FilmsByCountry.get_films_by_country(films)
|
||||
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
|
||||
if update_film_references:
|
||||
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
|
||||
films_by_reference = FilmsByReference.get_films_by_reference(films)
|
||||
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
|
||||
if update_hidden_themes:
|
||||
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
|
||||
hidden_themes = HiddenThemes.get_hidden_themes(films, token_id, token_secret)
|
||||
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
|
||||
if update_keyword_scores:
|
||||
keyword_scores = KeywordScores.get_keyword_scores(viewings)
|
||||
keyword_scores = KeywordScores.get_keyword_scores(films)
|
||||
KeywordScores.update_page(token_id, token_secret, keyword_scores)
|
||||
|
||||
print("Done!")
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
from vcinema_utils.VCinemaUtils import *
|
||||
from vcinema_utils.Viewing import Viewing
|
||||
|
||||
|
||||
class VCinemaFilm:
|
||||
|
||||
def __init__(self, title, imdb_id):
|
||||
self._title = title
|
||||
self._imdb_id = imdb_id
|
||||
self._viewings = []
|
||||
self._imdb_data = {}
|
||||
|
||||
def add_viewing(self, date, season, rating):
|
||||
viewing = Viewing(date, season, rating)
|
||||
|
||||
self._viewings.append(viewing)
|
||||
|
||||
def add_imdb_data(self, field, value):
|
||||
self._imdb_data[field] = value
|
||||
|
||||
def get_imdb_data(self, field):
|
||||
if field in self._imdb_data:
|
||||
return self._imdb_data[field]
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_imdb_url(self):
|
||||
return "https://www.imdb.com/title/tt{}/".format(self._imdb_id)
|
||||
|
||||
def get_imdb_link(self):
|
||||
return generate_markdown_link(self._title, self.get_imdb_url())
|
||||
|
||||
def get_title(self):
|
||||
return self._title
|
||||
|
||||
def get_imdb_id(self):
|
||||
return self._imdb_id
|
|
@ -1,7 +1,8 @@
|
|||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import csv
|
||||
from datetime import datetime
|
||||
|
||||
from vcinema_utils.VCinemaFilm import VCinemaFilm
|
||||
from imdb_utils import IMDbUtils
|
||||
from bookstack import Bookstack
|
||||
|
||||
|
@ -20,75 +21,74 @@ def get_viewings_csv_attachment_id(token_id, token_secret):
|
|||
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
|
||||
|
||||
|
||||
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
|
||||
if viewings_csv is None:
|
||||
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
||||
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
||||
def get_vcinema_viewings(token_id, token_secret):
|
||||
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
||||
|
||||
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
||||
viewings_csv = viewings_csv.decode("utf-8")
|
||||
|
||||
viewings_csv_rows = viewings_csv.strip().split("\n")
|
||||
|
||||
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
|
||||
|
||||
if combine_repeat_viewings:
|
||||
for viewing in viewings:
|
||||
viewing['viewings'] = [
|
||||
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
|
||||
viewing.pop('date_watched')
|
||||
viewing.pop('season')
|
||||
viewing.pop('rating')
|
||||
|
||||
watch_counts = Counter([x['imdb_id'] for x in viewings])
|
||||
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
|
||||
|
||||
for film in repeat_watches:
|
||||
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
|
||||
|
||||
first_watch = viewings[viewing_indexes[0]]
|
||||
|
||||
for index in viewing_indexes[1::]:
|
||||
first_watch['viewings'].extend(viewings[index]['viewings'])
|
||||
|
||||
for index in reversed(viewing_indexes[1::]):
|
||||
viewings.pop(index)
|
||||
|
||||
return viewings
|
||||
|
||||
|
||||
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
|
||||
def get_vcinema_films(token_id, token_secret):
|
||||
viewings = get_vcinema_viewings(token_id, token_secret)
|
||||
films = {}
|
||||
|
||||
for viewing in viewings:
|
||||
imdb_id = viewing["imdb_id"]
|
||||
title = viewing["title"]
|
||||
|
||||
if imdb_id not in films.keys():
|
||||
film = VCinemaFilm(imdb_id=imdb_id, title=title)
|
||||
films[imdb_id] = film
|
||||
|
||||
date_watched = datetime.strptime(viewing['date_watched'], "%Y-%m-%d")
|
||||
season = viewing['season']
|
||||
rating = viewing['rating']
|
||||
|
||||
films[imdb_id].add_viewing(date_watched, season, rating)
|
||||
|
||||
return list(films.values())
|
||||
|
||||
|
||||
def add_imdb_data(imdb_id, films, data_fields, progressbar=None):
|
||||
movie = IMDbUtils.get_movie(imdb_id)
|
||||
|
||||
for viewing in viewings:
|
||||
if viewing['imdb_id'] == movie.movieID:
|
||||
for film in films:
|
||||
if film.get_imdb_id() == movie.movieID:
|
||||
for field_name in data_fields:
|
||||
if field_name in movie:
|
||||
viewing[field_name] = movie[field_name]
|
||||
film.add_imdb_data(field_name, movie[field_name])
|
||||
|
||||
if progressbar is not None:
|
||||
progressbar.next()
|
||||
|
||||
|
||||
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
|
||||
def add_imdb_keywords(imdb_id, films, progressbar=None):
|
||||
movie = IMDbUtils.get_movie_keywords(imdb_id)
|
||||
|
||||
for viewing in viewings:
|
||||
if viewing['imdb_id'] == movie.movieID:
|
||||
for film in films:
|
||||
if film.get_imdb_id() == movie.movieID:
|
||||
if 'keywords' in movie:
|
||||
viewing['keywords'] = movie['keywords']
|
||||
film.add_imdb_data('keywords', movie['keywords'])
|
||||
|
||||
if progressbar is not None:
|
||||
progressbar.next()
|
||||
|
||||
|
||||
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
|
||||
def add_imdb_data_to_films(films, field_names, progress_bar=None):
|
||||
with ThreadPoolExecutor(4) as executor:
|
||||
future_imdb_tasks = set()
|
||||
|
||||
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_data, film.get_imdb_id(), films, field_names, progress_bar) for film in films)
|
||||
|
||||
if 'keywords' in field_names:
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_keywords, film.get_imdb_id(), films, progress_bar) for film in films)
|
||||
|
||||
progress_bar.max = len(future_imdb_tasks)
|
||||
|
||||
|
@ -96,32 +96,32 @@ def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
|
|||
progress_bar.finish()
|
||||
|
||||
|
||||
def filter_viewings(viewings, filter_field):
|
||||
viewings_filtered = {}
|
||||
def filter_films(films: [VCinemaFilm], field: str) -> [VCinemaFilm]:
|
||||
films_filtered = {}
|
||||
|
||||
for viewing in viewings:
|
||||
if filter_field in viewing:
|
||||
viewing_field = viewing[filter_field]
|
||||
if isinstance(viewing_field, list):
|
||||
for fve in list(viewing_field):
|
||||
if fve in viewings_filtered.keys():
|
||||
viewings_filtered[fve] += [viewing]
|
||||
for film in films:
|
||||
if film.get_imdb_data(field) is not None:
|
||||
field_value = film.get_imdb_data(field)
|
||||
if isinstance(field_value, list):
|
||||
for value in list(field_value):
|
||||
if value in films_filtered.keys():
|
||||
films_filtered[value] += [film]
|
||||
else:
|
||||
viewings_filtered[fve] = [viewing]
|
||||
films_filtered[value] = [film]
|
||||
else:
|
||||
if viewing_field in viewings_filtered.keys():
|
||||
viewings_filtered[viewing_field] += [viewing]
|
||||
if field_value in films_filtered.keys():
|
||||
films_filtered[field_value] += [film]
|
||||
else:
|
||||
viewings_filtered[viewing_field] = [viewing]
|
||||
films_filtered[field_value] = [film]
|
||||
|
||||
return viewings_filtered
|
||||
return films_filtered
|
||||
|
||||
|
||||
def get_film_list(films):
|
||||
def get_film_list(films: [VCinemaFilm]) -> str:
|
||||
film_links = []
|
||||
|
||||
for film in films:
|
||||
film_link = generate_imdb_film_link(film)
|
||||
film_link = film.get_imdb_link()
|
||||
film_links.append(film_link)
|
||||
|
||||
if len(film_links) > 0:
|
||||
|
@ -130,21 +130,13 @@ def get_film_list(films):
|
|||
return ""
|
||||
|
||||
|
||||
def generate_markdown_link(text, url):
|
||||
def generate_markdown_link(text, url) -> str:
|
||||
return "[{}]({})".format(text, url)
|
||||
|
||||
|
||||
def generate_imdb_film_link(film):
|
||||
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
|
||||
|
||||
|
||||
def generate_wikipedia_page_link(page_title):
|
||||
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
|
||||
|
||||
|
||||
def generate_imdb_url(imdb_id):
|
||||
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
|
||||
|
||||
|
||||
def generate_wikipedia_url(page_title):
|
||||
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
class Viewing:
|
||||
|
||||
def __init__(self, date_watched, season, rating):
|
||||
self.date_watched = date_watched
|
||||
self.season = season
|
||||
self.rating = rating
|
|
@ -16,7 +16,7 @@ PAGE_ID = 34
|
|||
|
||||
|
||||
def get_films_by_country(viewings):
|
||||
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
|
||||
viewings_filtered_by_country = VCinemaUtils.filter_films(viewings, "countries")
|
||||
|
||||
if "Czechia" in viewings_filtered_by_country.keys():
|
||||
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
|
||||
|
|
|
@ -2,23 +2,23 @@ from collections import OrderedDict
|
|||
import wikipedia
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
from vcinema_utils import VCinemaUtils, VCinemaFilm
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
|
||||
PAGE_ID = 62
|
||||
|
||||
|
||||
def get_films_by_reference(viewings):
|
||||
def get_films_by_reference(films: [VCinemaFilm]):
|
||||
films_by_reference = {}
|
||||
|
||||
for viewing in viewings:
|
||||
if "keywords" in viewing.keys():
|
||||
for keyword in viewing["keywords"]:
|
||||
for film in films:
|
||||
if (film_keywords := film.get_imdb_data("keywords")) is not None:
|
||||
for keyword in film_keywords:
|
||||
if keyword.startswith("reference-to-"):
|
||||
|
||||
for reference in films_by_reference:
|
||||
if keyword in films_by_reference[reference]["keywords"]:
|
||||
films_by_reference[reference]["films"].append(viewing)
|
||||
films_by_reference[reference]["films"].append(film)
|
||||
break
|
||||
else:
|
||||
keyword = keyword[13:]
|
||||
|
@ -31,13 +31,13 @@ def get_films_by_reference(viewings):
|
|||
|
||||
referenced = keyword.replace("-", " ")
|
||||
|
||||
searches = wikipedia.search(referenced, suggestion=False)
|
||||
|
||||
try:
|
||||
searches = wikipedia.search(referenced, suggestion=False)
|
||||
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
|
||||
|
||||
page_title = referenced_page.title
|
||||
page_url = referenced_page.url
|
||||
|
||||
except wikipedia.DisambiguationError as e:
|
||||
page_title = e.title
|
||||
page_title = page_title[0].upper() + page_title[1:]
|
||||
|
@ -60,13 +60,13 @@ def get_films_by_reference(viewings):
|
|||
if page_title in films_by_reference.keys():
|
||||
films_by_reference[page_title]["keywords"].append(keyword)
|
||||
|
||||
if viewing not in films_by_reference[page_title]["films"]:
|
||||
films_by_reference[page_title]["films"].append(viewing)
|
||||
if film not in films_by_reference[page_title]["films"]:
|
||||
films_by_reference[page_title]["films"].append(film)
|
||||
|
||||
else:
|
||||
films_by_reference[page_title] = {"url": page_url,
|
||||
"keywords": [keyword],
|
||||
"films": [viewing]}
|
||||
"films": [film]}
|
||||
|
||||
return films_by_reference
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ PAGE_ID = 24
|
|||
|
||||
|
||||
def get_films_by_year(viewings):
|
||||
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
|
||||
viewings_filtered_by_year = VCinemaUtils.filter_films(viewings, "year")
|
||||
|
||||
return viewings_filtered_by_year
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ PAGE_ID = 63
|
|||
|
||||
def get_hidden_themes(viewings, token_id, token_secret):
|
||||
# Bit horrible to need to request this again, but it affects the order of the result table
|
||||
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
||||
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
||||
|
||||
# Copy keywords from grouped viewings to ungrouped viewings
|
||||
for viewing_ungrouped in viewings_ungrouped:
|
||||
|
|
|
@ -12,7 +12,7 @@ PAGE_ID = 23
|
|||
|
||||
|
||||
def get_keyword_scores(viewings):
|
||||
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
|
||||
viewings_filtered_keyword = VCinemaUtils.filter_films(viewings, "keywords")
|
||||
|
||||
for keyword, viewings in viewings_filtered_keyword.items():
|
||||
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
||||
|
|
Loading…
Reference in New Issue