Compare commits

...

4 Commits

Author SHA1 Message Date
Sarah 732b32f39b Update update_wiki.py 2022-12-23 16:38:52 +00:00
Sarah 08c22b1277 fix for reference and fix imports 2022-12-23 16:36:49 +00:00
Sarah 15f04eabd4 apply changes 2022-12-19 22:26:38 +00:00
Sarah 2cfee5c23a add first classes 2022-12-18 17:15:47 +00:00
9 changed files with 123 additions and 88 deletions

View File

@ -1,5 +1,5 @@
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
from vcinema_utils import VCinemaUtils
from vcinema_utils.VCinemaUtils import *
import argparse
import json
@ -12,7 +12,7 @@ def update_wiki(token_id, token_secret, update_csv, pages):
ViewingsCsv.update_viewings_csv(token_id, token_secret)
print("Getting viewings")
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
films = get_vcinema_films(token_id, token_secret)
update_films_by_year = 'years' in pages
update_films_by_country = 'countries' in pages
@ -30,26 +30,26 @@ def update_wiki(token_id, token_secret, update_csv, pages):
if update_film_references or update_hidden_themes or update_keyword_scores:
data_fields.append("keywords")
viewing_count = len(viewings)
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
films_count = len(films)
with IncrementalBar('Retrieving movie data', max=films_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
add_imdb_data_to_films(films, data_fields, bar)
print("Processing viewing data")
if update_films_by_year:
films_by_year = FilmsByYear.get_films_by_year(viewings)
films_by_year = FilmsByYear.get_films_by_year(films)
FilmsByYear.update_page(token_id, token_secret, films_by_year)
if update_films_by_country:
films_by_country = FilmsByCountry.get_films_by_country(viewings)
films_by_country = FilmsByCountry.get_films_by_country(films)
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
if update_film_references:
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
films_by_reference = FilmsByReference.get_films_by_reference(films)
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
if update_hidden_themes:
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
hidden_themes = HiddenThemes.get_hidden_themes(films, token_id, token_secret)
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
if update_keyword_scores:
keyword_scores = KeywordScores.get_keyword_scores(viewings)
keyword_scores = KeywordScores.get_keyword_scores(films)
KeywordScores.update_page(token_id, token_secret, keyword_scores)
print("Done!")

View File

@ -0,0 +1,37 @@
from vcinema_utils.VCinemaUtils import *
from vcinema_utils.Viewing import Viewing
class VCinemaFilm:
def __init__(self, title, imdb_id):
self._title = title
self._imdb_id = imdb_id
self._viewings = []
self._imdb_data = {}
def add_viewing(self, date, season, rating):
viewing = Viewing(date, season, rating)
self._viewings.append(viewing)
def add_imdb_data(self, field, value):
self._imdb_data[field] = value
def get_imdb_data(self, field):
if field in self._imdb_data:
return self._imdb_data[field]
else:
return None
def get_imdb_url(self):
return "https://www.imdb.com/title/tt{}/".format(self._imdb_id)
def get_imdb_link(self):
return generate_markdown_link(self._title, self.get_imdb_url())
def get_title(self):
return self._title
def get_imdb_id(self):
return self._imdb_id

View File

@ -1,7 +1,8 @@
from collections import Counter
from concurrent.futures import ThreadPoolExecutor
import csv
from datetime import datetime
from vcinema_utils.VCinemaFilm import VCinemaFilm
from imdb_utils import IMDbUtils
from bookstack import Bookstack
@ -20,75 +21,74 @@ def get_viewings_csv_attachment_id(token_id, token_secret):
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
if viewings_csv is None:
def get_vcinema_viewings(token_id, token_secret):
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
viewings_csv = viewings_csv.decode("utf-8")
viewings_csv_rows = viewings_csv.strip().split("\n")
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
if combine_repeat_viewings:
for viewing in viewings:
viewing['viewings'] = [
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
viewing.pop('date_watched')
viewing.pop('season')
viewing.pop('rating')
watch_counts = Counter([x['imdb_id'] for x in viewings])
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
for film in repeat_watches:
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
first_watch = viewings[viewing_indexes[0]]
for index in viewing_indexes[1::]:
first_watch['viewings'].extend(viewings[index]['viewings'])
for index in reversed(viewing_indexes[1::]):
viewings.pop(index)
return viewings
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
def get_vcinema_films(token_id, token_secret):
viewings = get_vcinema_viewings(token_id, token_secret)
films = {}
for viewing in viewings:
imdb_id = viewing["imdb_id"]
title = viewing["title"]
if imdb_id not in films.keys():
film = VCinemaFilm(imdb_id=imdb_id, title=title)
films[imdb_id] = film
date_watched = datetime.strptime(viewing['date_watched'], "%Y-%m-%d")
season = viewing['season']
rating = viewing['rating']
films[imdb_id].add_viewing(date_watched, season, rating)
return list(films.values())
def add_imdb_data(imdb_id, films, data_fields, progressbar=None):
movie = IMDbUtils.get_movie(imdb_id)
for viewing in viewings:
if viewing['imdb_id'] == movie.movieID:
for film in films:
if film.get_imdb_id() == movie.movieID:
for field_name in data_fields:
if field_name in movie:
viewing[field_name] = movie[field_name]
film.add_imdb_data(field_name, movie[field_name])
if progressbar is not None:
progressbar.next()
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
def add_imdb_keywords(imdb_id, films, progressbar=None):
movie = IMDbUtils.get_movie_keywords(imdb_id)
for viewing in viewings:
if viewing['imdb_id'] == movie.movieID:
for film in films:
if film.get_imdb_id() == movie.movieID:
if 'keywords' in movie:
viewing['keywords'] = movie['keywords']
film.add_imdb_data('keywords', movie['keywords'])
if progressbar is not None:
progressbar.next()
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
def add_imdb_data_to_films(films, field_names, progress_bar=None):
with ThreadPoolExecutor(4) as executor:
future_imdb_tasks = set()
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
future_imdb_tasks.update(executor.submit(add_imdb_data, film.get_imdb_id(), films, field_names, progress_bar) for film in films)
if 'keywords' in field_names:
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
future_imdb_tasks.update(executor.submit(add_imdb_keywords, film.get_imdb_id(), films, progress_bar) for film in films)
progress_bar.max = len(future_imdb_tasks)
@ -96,32 +96,32 @@ def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
progress_bar.finish()
def filter_viewings(viewings, filter_field):
viewings_filtered = {}
def filter_films(films: [VCinemaFilm], field: str) -> [VCinemaFilm]:
films_filtered = {}
for viewing in viewings:
if filter_field in viewing:
viewing_field = viewing[filter_field]
if isinstance(viewing_field, list):
for fve in list(viewing_field):
if fve in viewings_filtered.keys():
viewings_filtered[fve] += [viewing]
for film in films:
if film.get_imdb_data(field) is not None:
field_value = film.get_imdb_data(field)
if isinstance(field_value, list):
for value in list(field_value):
if value in films_filtered.keys():
films_filtered[value] += [film]
else:
viewings_filtered[fve] = [viewing]
films_filtered[value] = [film]
else:
if viewing_field in viewings_filtered.keys():
viewings_filtered[viewing_field] += [viewing]
if field_value in films_filtered.keys():
films_filtered[field_value] += [film]
else:
viewings_filtered[viewing_field] = [viewing]
films_filtered[field_value] = [film]
return viewings_filtered
return films_filtered
def get_film_list(films):
def get_film_list(films: [VCinemaFilm]) -> str:
film_links = []
for film in films:
film_link = generate_imdb_film_link(film)
film_link = film.get_imdb_link()
film_links.append(film_link)
if len(film_links) > 0:
@ -130,21 +130,13 @@ def get_film_list(films):
return ""
def generate_markdown_link(text, url):
def generate_markdown_link(text, url) -> str:
return "[{}]({})".format(text, url)
def generate_imdb_film_link(film):
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
def generate_wikipedia_page_link(page_title):
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
def generate_imdb_url(imdb_id):
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
def generate_wikipedia_url(page_title):
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))

6
vcinema_utils/Viewing.py Normal file
View File

@ -0,0 +1,6 @@
class Viewing:
def __init__(self, date_watched, season, rating):
self.date_watched = date_watched
self.season = season
self.rating = rating

View File

@ -16,7 +16,7 @@ PAGE_ID = 34
def get_films_by_country(viewings):
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
viewings_filtered_by_country = VCinemaUtils.filter_films(viewings, "countries")
if "Czechia" in viewings_filtered_by_country.keys():
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]

View File

@ -2,23 +2,23 @@ from collections import OrderedDict
import wikipedia
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
from vcinema_utils import VCinemaUtils, VCinemaFilm
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
PAGE_ID = 62
def get_films_by_reference(viewings):
def get_films_by_reference(films: [VCinemaFilm]):
films_by_reference = {}
for viewing in viewings:
if "keywords" in viewing.keys():
for keyword in viewing["keywords"]:
for film in films:
if (film_keywords := film.get_imdb_data("keywords")) is not None:
for keyword in film_keywords:
if keyword.startswith("reference-to-"):
for reference in films_by_reference:
if keyword in films_by_reference[reference]["keywords"]:
films_by_reference[reference]["films"].append(viewing)
films_by_reference[reference]["films"].append(film)
break
else:
keyword = keyword[13:]
@ -31,13 +31,13 @@ def get_films_by_reference(viewings):
referenced = keyword.replace("-", " ")
try:
searches = wikipedia.search(referenced, suggestion=False)
try:
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
page_title = referenced_page.title
page_url = referenced_page.url
except wikipedia.DisambiguationError as e:
page_title = e.title
page_title = page_title[0].upper() + page_title[1:]
@ -60,13 +60,13 @@ def get_films_by_reference(viewings):
if page_title in films_by_reference.keys():
films_by_reference[page_title]["keywords"].append(keyword)
if viewing not in films_by_reference[page_title]["films"]:
films_by_reference[page_title]["films"].append(viewing)
if film not in films_by_reference[page_title]["films"]:
films_by_reference[page_title]["films"].append(film)
else:
films_by_reference[page_title] = {"url": page_url,
"keywords": [keyword],
"films": [viewing]}
"films": [film]}
return films_by_reference

View File

@ -8,7 +8,7 @@ PAGE_ID = 24
def get_films_by_year(viewings):
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
viewings_filtered_by_year = VCinemaUtils.filter_films(viewings, "year")
return viewings_filtered_by_year

View File

@ -9,7 +9,7 @@ PAGE_ID = 63
def get_hidden_themes(viewings, token_id, token_secret):
# Bit horrible to need to request this again, but it affects the order of the result table
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
# Copy keywords from grouped viewings to ungrouped viewings
for viewing_ungrouped in viewings_ungrouped:

View File

@ -12,7 +12,7 @@ PAGE_ID = 23
def get_keyword_scores(viewings):
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
viewings_filtered_keyword = VCinemaUtils.filter_films(viewings, "keywords")
for keyword, viewings in viewings_filtered_keyword.items():
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}