Compare commits
No commits in common. "task/refactor-viewings-representation" and "master" have entirely different histories.
task/refac
...
master
|
@ -1,5 +1,5 @@
|
||||||
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
|
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
|
||||||
from vcinema_utils.VCinemaUtils import *
|
from vcinema_utils import VCinemaUtils
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
@ -12,7 +12,7 @@ def update_wiki(token_id, token_secret, update_csv, pages):
|
||||||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
||||||
|
|
||||||
print("Getting viewings")
|
print("Getting viewings")
|
||||||
films = get_vcinema_films(token_id, token_secret)
|
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
||||||
|
|
||||||
update_films_by_year = 'years' in pages
|
update_films_by_year = 'years' in pages
|
||||||
update_films_by_country = 'countries' in pages
|
update_films_by_country = 'countries' in pages
|
||||||
|
@ -30,26 +30,26 @@ def update_wiki(token_id, token_secret, update_csv, pages):
|
||||||
if update_film_references or update_hidden_themes or update_keyword_scores:
|
if update_film_references or update_hidden_themes or update_keyword_scores:
|
||||||
data_fields.append("keywords")
|
data_fields.append("keywords")
|
||||||
|
|
||||||
films_count = len(films)
|
viewing_count = len(viewings)
|
||||||
with IncrementalBar('Retrieving movie data', max=films_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||||
add_imdb_data_to_films(films, data_fields, bar)
|
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
|
||||||
|
|
||||||
print("Processing viewing data")
|
print("Processing viewing data")
|
||||||
|
|
||||||
if update_films_by_year:
|
if update_films_by_year:
|
||||||
films_by_year = FilmsByYear.get_films_by_year(films)
|
films_by_year = FilmsByYear.get_films_by_year(viewings)
|
||||||
FilmsByYear.update_page(token_id, token_secret, films_by_year)
|
FilmsByYear.update_page(token_id, token_secret, films_by_year)
|
||||||
if update_films_by_country:
|
if update_films_by_country:
|
||||||
films_by_country = FilmsByCountry.get_films_by_country(films)
|
films_by_country = FilmsByCountry.get_films_by_country(viewings)
|
||||||
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
|
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
|
||||||
if update_film_references:
|
if update_film_references:
|
||||||
films_by_reference = FilmsByReference.get_films_by_reference(films)
|
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
|
||||||
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
|
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
|
||||||
if update_hidden_themes:
|
if update_hidden_themes:
|
||||||
hidden_themes = HiddenThemes.get_hidden_themes(films, token_id, token_secret)
|
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
|
||||||
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
|
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
|
||||||
if update_keyword_scores:
|
if update_keyword_scores:
|
||||||
keyword_scores = KeywordScores.get_keyword_scores(films)
|
keyword_scores = KeywordScores.get_keyword_scores(viewings)
|
||||||
KeywordScores.update_page(token_id, token_secret, keyword_scores)
|
KeywordScores.update_page(token_id, token_secret, keyword_scores)
|
||||||
|
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
|
@ -1,37 +0,0 @@
|
||||||
from vcinema_utils.VCinemaUtils import *
|
|
||||||
from vcinema_utils.Viewing import Viewing
|
|
||||||
|
|
||||||
|
|
||||||
class VCinemaFilm:
|
|
||||||
|
|
||||||
def __init__(self, title, imdb_id):
|
|
||||||
self._title = title
|
|
||||||
self._imdb_id = imdb_id
|
|
||||||
self._viewings = []
|
|
||||||
self._imdb_data = {}
|
|
||||||
|
|
||||||
def add_viewing(self, date, season, rating):
|
|
||||||
viewing = Viewing(date, season, rating)
|
|
||||||
|
|
||||||
self._viewings.append(viewing)
|
|
||||||
|
|
||||||
def add_imdb_data(self, field, value):
|
|
||||||
self._imdb_data[field] = value
|
|
||||||
|
|
||||||
def get_imdb_data(self, field):
|
|
||||||
if field in self._imdb_data:
|
|
||||||
return self._imdb_data[field]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_imdb_url(self):
|
|
||||||
return "https://www.imdb.com/title/tt{}/".format(self._imdb_id)
|
|
||||||
|
|
||||||
def get_imdb_link(self):
|
|
||||||
return generate_markdown_link(self._title, self.get_imdb_url())
|
|
||||||
|
|
||||||
def get_title(self):
|
|
||||||
return self._title
|
|
||||||
|
|
||||||
def get_imdb_id(self):
|
|
||||||
return self._imdb_id
|
|
|
@ -1,8 +1,7 @@
|
||||||
|
from collections import Counter
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import csv
|
import csv
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from vcinema_utils.VCinemaFilm import VCinemaFilm
|
|
||||||
from imdb_utils import IMDbUtils
|
from imdb_utils import IMDbUtils
|
||||||
from bookstack import Bookstack
|
from bookstack import Bookstack
|
||||||
|
|
||||||
|
@ -21,74 +20,75 @@ def get_viewings_csv_attachment_id(token_id, token_secret):
|
||||||
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
|
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
|
||||||
|
|
||||||
|
|
||||||
def get_vcinema_viewings(token_id, token_secret):
|
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
|
||||||
|
if viewings_csv is None:
|
||||||
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
||||||
|
|
||||||
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
||||||
viewings_csv = viewings_csv.decode("utf-8")
|
|
||||||
|
|
||||||
|
viewings_csv = viewings_csv.decode("utf-8")
|
||||||
viewings_csv_rows = viewings_csv.strip().split("\n")
|
viewings_csv_rows = viewings_csv.strip().split("\n")
|
||||||
|
|
||||||
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
|
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
|
||||||
|
|
||||||
|
if combine_repeat_viewings:
|
||||||
|
for viewing in viewings:
|
||||||
|
viewing['viewings'] = [
|
||||||
|
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
|
||||||
|
viewing.pop('date_watched')
|
||||||
|
viewing.pop('season')
|
||||||
|
viewing.pop('rating')
|
||||||
|
|
||||||
|
watch_counts = Counter([x['imdb_id'] for x in viewings])
|
||||||
|
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
|
||||||
|
|
||||||
|
for film in repeat_watches:
|
||||||
|
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
|
||||||
|
|
||||||
|
first_watch = viewings[viewing_indexes[0]]
|
||||||
|
|
||||||
|
for index in viewing_indexes[1::]:
|
||||||
|
first_watch['viewings'].extend(viewings[index]['viewings'])
|
||||||
|
|
||||||
|
for index in reversed(viewing_indexes[1::]):
|
||||||
|
viewings.pop(index)
|
||||||
|
|
||||||
return viewings
|
return viewings
|
||||||
|
|
||||||
|
|
||||||
def get_vcinema_films(token_id, token_secret):
|
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
|
||||||
viewings = get_vcinema_viewings(token_id, token_secret)
|
|
||||||
films = {}
|
|
||||||
|
|
||||||
for viewing in viewings:
|
|
||||||
imdb_id = viewing["imdb_id"]
|
|
||||||
title = viewing["title"]
|
|
||||||
|
|
||||||
if imdb_id not in films.keys():
|
|
||||||
film = VCinemaFilm(imdb_id=imdb_id, title=title)
|
|
||||||
films[imdb_id] = film
|
|
||||||
|
|
||||||
date_watched = datetime.strptime(viewing['date_watched'], "%Y-%m-%d")
|
|
||||||
season = viewing['season']
|
|
||||||
rating = viewing['rating']
|
|
||||||
|
|
||||||
films[imdb_id].add_viewing(date_watched, season, rating)
|
|
||||||
|
|
||||||
return list(films.values())
|
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_data(imdb_id, films, data_fields, progressbar=None):
|
|
||||||
movie = IMDbUtils.get_movie(imdb_id)
|
movie = IMDbUtils.get_movie(imdb_id)
|
||||||
|
|
||||||
for film in films:
|
for viewing in viewings:
|
||||||
if film.get_imdb_id() == movie.movieID:
|
if viewing['imdb_id'] == movie.movieID:
|
||||||
for field_name in data_fields:
|
for field_name in data_fields:
|
||||||
if field_name in movie:
|
if field_name in movie:
|
||||||
film.add_imdb_data(field_name, movie[field_name])
|
viewing[field_name] = movie[field_name]
|
||||||
|
|
||||||
if progressbar is not None:
|
if progressbar is not None:
|
||||||
progressbar.next()
|
progressbar.next()
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_keywords(imdb_id, films, progressbar=None):
|
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
|
||||||
movie = IMDbUtils.get_movie_keywords(imdb_id)
|
movie = IMDbUtils.get_movie_keywords(imdb_id)
|
||||||
|
|
||||||
for film in films:
|
for viewing in viewings:
|
||||||
if film.get_imdb_id() == movie.movieID:
|
if viewing['imdb_id'] == movie.movieID:
|
||||||
if 'keywords' in movie:
|
if 'keywords' in movie:
|
||||||
film.add_imdb_data('keywords', movie['keywords'])
|
viewing['keywords'] = movie['keywords']
|
||||||
|
|
||||||
if progressbar is not None:
|
if progressbar is not None:
|
||||||
progressbar.next()
|
progressbar.next()
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_data_to_films(films, field_names, progress_bar=None):
|
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
|
||||||
with ThreadPoolExecutor(4) as executor:
|
with ThreadPoolExecutor(4) as executor:
|
||||||
future_imdb_tasks = set()
|
future_imdb_tasks = set()
|
||||||
|
|
||||||
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
|
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
|
||||||
future_imdb_tasks.update(executor.submit(add_imdb_data, film.get_imdb_id(), films, field_names, progress_bar) for film in films)
|
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
|
||||||
|
|
||||||
if 'keywords' in field_names:
|
if 'keywords' in field_names:
|
||||||
future_imdb_tasks.update(executor.submit(add_imdb_keywords, film.get_imdb_id(), films, progress_bar) for film in films)
|
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
|
||||||
|
|
||||||
progress_bar.max = len(future_imdb_tasks)
|
progress_bar.max = len(future_imdb_tasks)
|
||||||
|
|
||||||
|
@ -96,32 +96,32 @@ def add_imdb_data_to_films(films, field_names, progress_bar=None):
|
||||||
progress_bar.finish()
|
progress_bar.finish()
|
||||||
|
|
||||||
|
|
||||||
def filter_films(films: [VCinemaFilm], field: str) -> [VCinemaFilm]:
|
def filter_viewings(viewings, filter_field):
|
||||||
films_filtered = {}
|
viewings_filtered = {}
|
||||||
|
|
||||||
for film in films:
|
for viewing in viewings:
|
||||||
if film.get_imdb_data(field) is not None:
|
if filter_field in viewing:
|
||||||
field_value = film.get_imdb_data(field)
|
viewing_field = viewing[filter_field]
|
||||||
if isinstance(field_value, list):
|
if isinstance(viewing_field, list):
|
||||||
for value in list(field_value):
|
for fve in list(viewing_field):
|
||||||
if value in films_filtered.keys():
|
if fve in viewings_filtered.keys():
|
||||||
films_filtered[value] += [film]
|
viewings_filtered[fve] += [viewing]
|
||||||
else:
|
else:
|
||||||
films_filtered[value] = [film]
|
viewings_filtered[fve] = [viewing]
|
||||||
else:
|
else:
|
||||||
if field_value in films_filtered.keys():
|
if viewing_field in viewings_filtered.keys():
|
||||||
films_filtered[field_value] += [film]
|
viewings_filtered[viewing_field] += [viewing]
|
||||||
else:
|
else:
|
||||||
films_filtered[field_value] = [film]
|
viewings_filtered[viewing_field] = [viewing]
|
||||||
|
|
||||||
return films_filtered
|
return viewings_filtered
|
||||||
|
|
||||||
|
|
||||||
def get_film_list(films: [VCinemaFilm]) -> str:
|
def get_film_list(films):
|
||||||
film_links = []
|
film_links = []
|
||||||
|
|
||||||
for film in films:
|
for film in films:
|
||||||
film_link = film.get_imdb_link()
|
film_link = generate_imdb_film_link(film)
|
||||||
film_links.append(film_link)
|
film_links.append(film_link)
|
||||||
|
|
||||||
if len(film_links) > 0:
|
if len(film_links) > 0:
|
||||||
|
@ -130,13 +130,21 @@ def get_film_list(films: [VCinemaFilm]) -> str:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown_link(text, url) -> str:
|
def generate_markdown_link(text, url):
|
||||||
return "[{}]({})".format(text, url)
|
return "[{}]({})".format(text, url)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_imdb_film_link(film):
|
||||||
|
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
|
||||||
|
|
||||||
|
|
||||||
def generate_wikipedia_page_link(page_title):
|
def generate_wikipedia_page_link(page_title):
|
||||||
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
|
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_imdb_url(imdb_id):
|
||||||
|
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
|
||||||
|
|
||||||
|
|
||||||
def generate_wikipedia_url(page_title):
|
def generate_wikipedia_url(page_title):
|
||||||
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))
|
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
class Viewing:
|
|
||||||
|
|
||||||
def __init__(self, date_watched, season, rating):
|
|
||||||
self.date_watched = date_watched
|
|
||||||
self.season = season
|
|
||||||
self.rating = rating
|
|
|
@ -16,7 +16,7 @@ PAGE_ID = 34
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_country(viewings):
|
def get_films_by_country(viewings):
|
||||||
viewings_filtered_by_country = VCinemaUtils.filter_films(viewings, "countries")
|
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
|
||||||
|
|
||||||
if "Czechia" in viewings_filtered_by_country.keys():
|
if "Czechia" in viewings_filtered_by_country.keys():
|
||||||
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
|
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
|
||||||
|
|
|
@ -2,23 +2,23 @@ from collections import OrderedDict
|
||||||
import wikipedia
|
import wikipedia
|
||||||
|
|
||||||
from bookstack import Bookstack
|
from bookstack import Bookstack
|
||||||
from vcinema_utils import VCinemaUtils, VCinemaFilm
|
from vcinema_utils import VCinemaUtils
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
|
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
|
||||||
PAGE_ID = 62
|
PAGE_ID = 62
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_reference(films: [VCinemaFilm]):
|
def get_films_by_reference(viewings):
|
||||||
films_by_reference = {}
|
films_by_reference = {}
|
||||||
|
|
||||||
for film in films:
|
for viewing in viewings:
|
||||||
if (film_keywords := film.get_imdb_data("keywords")) is not None:
|
if "keywords" in viewing.keys():
|
||||||
for keyword in film_keywords:
|
for keyword in viewing["keywords"]:
|
||||||
if keyword.startswith("reference-to-"):
|
if keyword.startswith("reference-to-"):
|
||||||
|
|
||||||
for reference in films_by_reference:
|
for reference in films_by_reference:
|
||||||
if keyword in films_by_reference[reference]["keywords"]:
|
if keyword in films_by_reference[reference]["keywords"]:
|
||||||
films_by_reference[reference]["films"].append(film)
|
films_by_reference[reference]["films"].append(viewing)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
keyword = keyword[13:]
|
keyword = keyword[13:]
|
||||||
|
@ -31,13 +31,13 @@ def get_films_by_reference(films: [VCinemaFilm]):
|
||||||
|
|
||||||
referenced = keyword.replace("-", " ")
|
referenced = keyword.replace("-", " ")
|
||||||
|
|
||||||
searches = wikipedia.search(referenced, suggestion=False)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
searches = wikipedia.search(referenced, suggestion=False)
|
||||||
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
|
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
|
||||||
|
|
||||||
page_title = referenced_page.title
|
page_title = referenced_page.title
|
||||||
page_url = referenced_page.url
|
page_url = referenced_page.url
|
||||||
|
|
||||||
except wikipedia.DisambiguationError as e:
|
except wikipedia.DisambiguationError as e:
|
||||||
page_title = e.title
|
page_title = e.title
|
||||||
page_title = page_title[0].upper() + page_title[1:]
|
page_title = page_title[0].upper() + page_title[1:]
|
||||||
|
@ -60,13 +60,13 @@ def get_films_by_reference(films: [VCinemaFilm]):
|
||||||
if page_title in films_by_reference.keys():
|
if page_title in films_by_reference.keys():
|
||||||
films_by_reference[page_title]["keywords"].append(keyword)
|
films_by_reference[page_title]["keywords"].append(keyword)
|
||||||
|
|
||||||
if film not in films_by_reference[page_title]["films"]:
|
if viewing not in films_by_reference[page_title]["films"]:
|
||||||
films_by_reference[page_title]["films"].append(film)
|
films_by_reference[page_title]["films"].append(viewing)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
films_by_reference[page_title] = {"url": page_url,
|
films_by_reference[page_title] = {"url": page_url,
|
||||||
"keywords": [keyword],
|
"keywords": [keyword],
|
||||||
"films": [film]}
|
"films": [viewing]}
|
||||||
|
|
||||||
return films_by_reference
|
return films_by_reference
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ PAGE_ID = 24
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_year(viewings):
|
def get_films_by_year(viewings):
|
||||||
viewings_filtered_by_year = VCinemaUtils.filter_films(viewings, "year")
|
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
|
||||||
|
|
||||||
return viewings_filtered_by_year
|
return viewings_filtered_by_year
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ PAGE_ID = 63
|
||||||
|
|
||||||
def get_hidden_themes(viewings, token_id, token_secret):
|
def get_hidden_themes(viewings, token_id, token_secret):
|
||||||
# Bit horrible to need to request this again, but it affects the order of the result table
|
# Bit horrible to need to request this again, but it affects the order of the result table
|
||||||
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
||||||
|
|
||||||
# Copy keywords from grouped viewings to ungrouped viewings
|
# Copy keywords from grouped viewings to ungrouped viewings
|
||||||
for viewing_ungrouped in viewings_ungrouped:
|
for viewing_ungrouped in viewings_ungrouped:
|
||||||
|
|
|
@ -12,7 +12,7 @@ PAGE_ID = 23
|
||||||
|
|
||||||
|
|
||||||
def get_keyword_scores(viewings):
|
def get_keyword_scores(viewings):
|
||||||
viewings_filtered_keyword = VCinemaUtils.filter_films(viewings, "keywords")
|
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
|
||||||
|
|
||||||
for keyword, viewings in viewings_filtered_keyword.items():
|
for keyword, viewings in viewings_filtered_keyword.items():
|
||||||
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
||||||
|
|
Loading…
Reference in New Issue