Compare commits
No commits in common. "master" and "056e3b474f202fcc9090dbe63fbd6ac710b565bd" have entirely different histories.
master
...
056e3b474f
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
|
||||
*.pyc
|
||||
.idea/*
|
||||
__pycache__/*
|
||||
.DS_Store
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -1,3 +1,3 @@
|
||||
[submodule "bookstack"]
|
||||
path = bookstack
|
||||
url = git@git.jacknet.io:sarah/bookstack.git
|
||||
[submodule "wiki_utils"]
|
||||
path = wiki_utils
|
||||
url = gitea@git.jacknet.io:sarah/wiki_utils.git
|
||||
|
@ -1 +0,0 @@
|
||||
Subproject commit 8f6e38cb337bcf51b0790f2db7001681ceb9338f
|
@ -1,258 +0,0 @@
|
||||
Country,Flag
|
||||
Afghanistan,🇦🇫
|
||||
Åland Islands,🇦🇽
|
||||
Albania,🇦🇱
|
||||
Algeria,🇩🇿
|
||||
American Samoa,🇦🇸
|
||||
Andorra,🇦🇩
|
||||
Angola,🇦🇴
|
||||
Anguilla,🇦🇮
|
||||
Antarctica,🇦🇶
|
||||
Antigua and Barbuda,🇦🇬
|
||||
Argentina,🇦🇷
|
||||
Armenia,🇦🇲
|
||||
Aruba,🇦🇼
|
||||
Australia,🇦🇺
|
||||
Austria,🇦🇹
|
||||
Azerbaijan,🇦🇿
|
||||
Bahamas,🇧🇸
|
||||
Bahrain,🇧🇭
|
||||
Bangladesh,🇧🇩
|
||||
Barbados,🇧🇧
|
||||
Belarus,🇧🇾
|
||||
Belgium,🇧🇪
|
||||
Belize,🇧🇿
|
||||
Benin,🇧🇯
|
||||
Bermuda,🇧🇲
|
||||
Bhutan,🇧🇹
|
||||
Bolivia,🇧🇴
|
||||
"Bonaire, Sint Eustatius and Saba",🇧🇶
|
||||
Bosnia and Herzegovina,🇧🇦
|
||||
Botswana,🇧🇼
|
||||
Bouvet Island,🇧🇻
|
||||
Brazil,🇧🇷
|
||||
British Indian Ocean Territory,🇮🇴
|
||||
British Virgin Islands,🇻🇬
|
||||
Brunei Darussalamm,🇧🇳
|
||||
Bulgaria,🇧🇬
|
||||
Burkina Faso,🇧🇫
|
||||
Burma,🇲🇲
|
||||
Burundi,🇧🇮
|
||||
Cambodia,🇰🇭
|
||||
Cameroon,🇨🇲
|
||||
Canada,🇨🇦
|
||||
Cape Verde,🇨🇻
|
||||
Cayman Islands,🇰🇾
|
||||
Central African Republic,🇨🇫
|
||||
Chad,🇹🇩
|
||||
Chile,🇨🇱
|
||||
China,🇨🇳
|
||||
Christmas Island,🇨🇽
|
||||
Cocos (Keeling) Islands,🇨🇨
|
||||
Colombia,🇨🇴
|
||||
Comoros,🇰🇲
|
||||
Congo,🇨🇩
|
||||
Cook Islands,🇨🇰
|
||||
Costa Rica,🇨🇷
|
||||
Côte d'Ivoire,🇨🇮
|
||||
Croatia,🇭🇷
|
||||
Cuba,🇨🇺
|
||||
Cyprus,🇨🇾
|
||||
Czech Republic,🇨🇿
|
||||
Czechoslovakia,🇨🇿
|
||||
Democratic Republic of the Congo,🇨🇩
|
||||
Denmark,🇩🇰
|
||||
Djibouti,🇩🇯
|
||||
Dominica,🇩🇲
|
||||
Dominican Republic,🇩🇴
|
||||
East Germany,➡️🇩🇪
|
||||
Ecuador,🇪🇨
|
||||
Egypt,🇪🇬
|
||||
El Salvador,🇸🇻
|
||||
Equatorial Guinea,🇬🇶
|
||||
Eritrea,🇪🇷
|
||||
Estonia,🇪🇪
|
||||
Ethiopia,🇪🇹
|
||||
Falkland Islands,🇫🇰
|
||||
Faroe Islands,🇫🇴
|
||||
Federated States of Micronesia,🇫🇲
|
||||
Fiji,🇫🇯
|
||||
Finland,🇫🇮
|
||||
France,🇫🇷
|
||||
French Guiana,🇬🇫
|
||||
French Polynesia,🇵🇫
|
||||
French Southern Territories,🇹🇫
|
||||
Gabon,🇬🇦
|
||||
Gambia,🇬🇲
|
||||
Georgia,🇬🇪
|
||||
Germany,🇩🇪
|
||||
Ghana,🇬🇭
|
||||
Gibraltar,🇬🇮
|
||||
Greece,🇬🇷
|
||||
Greenland,🇬🇱
|
||||
Grenada,🇬🇩
|
||||
Guadeloupe,🇬🇵
|
||||
Guam,🇬🇺
|
||||
Guatemala,🇬🇹
|
||||
Guernsey,🇬🇬
|
||||
Guinea,🇬🇳
|
||||
Guinea-Bissau,🇬🇼
|
||||
Guyana,🇬🇾
|
||||
Haiti,🇭🇹
|
||||
Heard Island and McDonald Islands,🇭🇲
|
||||
Holy See (Vatican City State),🇻🇦
|
||||
Honduras,🇭🇳
|
||||
Hong Kong,🇭🇰
|
||||
Hungary,🇭🇺
|
||||
Iceland,🇮🇸
|
||||
India,🇮🇳
|
||||
Indonesia,🇮🇩
|
||||
Iran,🇮🇷
|
||||
Iraq,🇮🇶
|
||||
Ireland,🇮🇪
|
||||
Isle of Man,🇮🇲
|
||||
Israel,🇮🇱
|
||||
Italy,🇮🇹
|
||||
Jamaica,🇯🇲
|
||||
Japan,🇯🇵
|
||||
Jersey,🇯🇪
|
||||
Jordan,🇯🇴
|
||||
Kazakhstan,🇰🇿
|
||||
Kenya,🇰🇪
|
||||
Kiribati,🇰🇮
|
||||
Korea,🇰🇵🇰🇷
|
||||
Kosovo,🇽🇰
|
||||
Kuwait,🇰🇼
|
||||
Kyrgyzstan,🇰🇬
|
||||
Laos,🇱🇦
|
||||
Latvia,🇱🇻
|
||||
Lebanon,🇱🇧
|
||||
Lesotho,🇱🇸
|
||||
Liberia,🇱🇷
|
||||
Libya,🇱🇾
|
||||
Liechtenstein,🇱🇮
|
||||
Lithuania,🇱🇹
|
||||
Luxembourg,🇱🇺
|
||||
Macao,🇲🇴
|
||||
Madagascar,🇲🇬
|
||||
Malawi,🇲🇼
|
||||
Malaysia,🇲🇾
|
||||
Maldives,🇲🇻
|
||||
Mali,🇲🇱
|
||||
Malta,🇲🇹
|
||||
Marshall Islands,🇲🇭
|
||||
Martinique,🇲🇶
|
||||
Mauritania,🇲🇷
|
||||
Mauritius,🇲🇺
|
||||
Mayotte,🇾🇹
|
||||
Mexico,🇲🇽
|
||||
Moldova,🇲🇩
|
||||
Monaco,🇲🇨
|
||||
Mongolia,🇲🇳
|
||||
Montenegro,🇲🇪
|
||||
Montserrat,🇲🇸
|
||||
Morocco,🇲🇦
|
||||
Mozambique,🇲🇿
|
||||
Myanmar,🇲🇲
|
||||
Namibia,🇳🇦
|
||||
Nauru,🇳🇷
|
||||
Nepal,🇳🇵
|
||||
Netherlands,🇳🇱
|
||||
Netherlands Antilles,🇳🇱
|
||||
New Caledonia,🇳🇨
|
||||
New Zealand,🇳🇿
|
||||
Nicaragua,🇳🇮
|
||||
Niger,🇳🇪
|
||||
Nigeria,🇳🇬
|
||||
Niue,🇳🇺
|
||||
Norfolk Island,🇳🇫
|
||||
North Korea,🇰🇵
|
||||
North Vietnam,🇻🇳
|
||||
Northern Mariana Islands,🇲🇵
|
||||
Norway,🇳🇴
|
||||
Oman,🇴🇲
|
||||
Pakistan,🇵🇰
|
||||
Palau,🇵🇼
|
||||
Palestine,🇵🇸
|
||||
Palestinian Territory,🇵🇸
|
||||
Panama,🇵🇦
|
||||
Papua New Guinea,🇵🇬
|
||||
Paraguay,🇵🇾
|
||||
Peru,🇵🇪
|
||||
Philippines,🇵🇭
|
||||
Poland,🇵🇱
|
||||
Portugal,🇵🇹
|
||||
Pitcairn,🇵🇳
|
||||
Puerto Rico,🇵🇷
|
||||
Qatar,🇶🇦
|
||||
Republic of Macedonia,🇲🇰
|
||||
Réunion,🇷🇪
|
||||
Romania,🇷🇴
|
||||
Russia,🇷🇺
|
||||
Rwanda,🇷🇼
|
||||
Saint Barthélemy,🇧🇱
|
||||
Saint Helena,🇸🇭
|
||||
Saint Kitts and Nevis,🇰🇳
|
||||
Saint Lucia,🇱🇨
|
||||
Saint Martin (French part),🇫🇷
|
||||
Saint Pierre and Miquelon,🇵🇲
|
||||
Saint Vincent and the Grenadines,🇻🇨
|
||||
Samoa,🇼🇸
|
||||
San Marino,🇸🇲
|
||||
Sao Tome and Principe,🇸🇹
|
||||
Saudi Arabia,🇸🇦
|
||||
Senegal,🇸🇳
|
||||
Serbia,🇷🇸
|
||||
Serbia and Montenegro,🇷🇸🇲🇪
|
||||
Seychelles,🇸🇨
|
||||
Siam,🇹🇭
|
||||
Sierra Leone,🇸🇱
|
||||
Singapore,🇸🇬
|
||||
Slovakia,🇸🇰
|
||||
Slovenia,🇸🇮
|
||||
Solomon Islands,🇸🇧
|
||||
Somalia,🇸🇴
|
||||
South Africa,🇿🇦
|
||||
South Georgia and the South Sandwich Islands,🇬🇸
|
||||
South Korea,🇰🇷
|
||||
Spain,🇪🇸
|
||||
Sri Lanka,🇱🇰
|
||||
Sudan,🇸🇩
|
||||
Suriname,🇸🇷
|
||||
Svalbard and Jan Mayen,🇸🇯
|
||||
Swaziland,🇸🇿
|
||||
Sweden,🇸🇪
|
||||
Switzerland,🇨🇭
|
||||
Syria,🇸🇾
|
||||
Taiwan,🇹🇼
|
||||
Tajikistan,🇹🇯
|
||||
Tanzania,🇹🇿
|
||||
Thailand,🇹🇭
|
||||
Timor-Leste,🇹🇱
|
||||
Togo,🇹🇬
|
||||
Tokelau,🇹🇰
|
||||
Tonga,🇹🇴
|
||||
Trinidad and Tobago,🇹🇹
|
||||
Tunisia,🇹🇳
|
||||
Turkey,🇹🇷
|
||||
Turkmenistan,🇹🇲
|
||||
Turks and Caicos Islands,🇹🇨
|
||||
Tuvalu,🇹🇻
|
||||
U.S. Virgin Islands,🇻🇮
|
||||
Uganda,🇺🇬
|
||||
Ukraine,🇺🇦
|
||||
United Arab Emirates,🇦🇪
|
||||
United Kingdom,🇬🇧
|
||||
United States,🇺🇸
|
||||
United States Minor Outlying Islands,🇺🇲
|
||||
Uruguay,🇺🇾
|
||||
Uzbekistan,🇺🇿
|
||||
Vanuatu,🇻🇺
|
||||
Venezuela,🇻🇪
|
||||
Vietnam,🇻🇳
|
||||
Wallis and Futuna,🇫🇷
|
||||
West Germany,⬅️🇩🇪
|
||||
Western Sahara,🇪🇭
|
||||
Yemen,🇾🇪
|
||||
Zambia,🇿🇲
|
||||
Zimbabwe,🇿🇼
|
|
@ -1,73 +0,0 @@
|
||||
from wiki_pages import FilmsByCountry
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
import argparse
|
||||
from collections import OrderedDict
|
||||
import imageio
|
||||
from progress.bar import IncrementalBar
|
||||
from pygifsicle import optimize
|
||||
from PIL import Image, ImageFont, ImageDraw, ImageFont
|
||||
import io
|
||||
|
||||
|
||||
def generate_map_timelapse(token_id, token_secret, filename):
|
||||
print("Getting viewings")
|
||||
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
||||
|
||||
viewing_count = len(viewings)
|
||||
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||
VCinemaUtils.add_imdb_data_to_viewings(viewings, ['countries'], bar)
|
||||
|
||||
date_viewings = VCinemaUtils.filter_viewings(viewings, "date_watched")
|
||||
|
||||
date_viewings = OrderedDict(sorted(date_viewings.items(), key=lambda t: t[0]))
|
||||
|
||||
running_country_counts = {}
|
||||
print(len(date_viewings.keys()))
|
||||
|
||||
with imageio.get_writer(filename, mode='I', duration=0.1) as writer:
|
||||
for date, viewings in date_viewings.items():
|
||||
date_viewings_countries = VCinemaUtils.filter_viewings(viewings, "countries")
|
||||
|
||||
for country in date_viewings_countries:
|
||||
if country in running_country_counts.keys():
|
||||
running_country_counts[country] += date_viewings_countries[country]
|
||||
else:
|
||||
running_country_counts[country] = date_viewings_countries[country]
|
||||
|
||||
map = FilmsByCountry.draw_map(running_country_counts, file_name="map-{}.svg".format(date))
|
||||
|
||||
stream = io.BytesIO(map)
|
||||
img = Image.open(stream)
|
||||
|
||||
map_editable = ImageDraw.Draw(img)
|
||||
|
||||
# macos font path
|
||||
font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial.ttf", 48)
|
||||
|
||||
# image is 655 high
|
||||
map_editable.text((2, 605), "{}".format(date), (255, 64, 0), font=font)
|
||||
|
||||
img_byte_arr = io.BytesIO()
|
||||
img.save(img_byte_arr, format='PNG')
|
||||
img_byte_arr = img_byte_arr.getvalue()
|
||||
|
||||
image = imageio.imread(img_byte_arr)
|
||||
writer.append_data(image)
|
||||
|
||||
print("optimizing")
|
||||
|
||||
optimize(filename)
|
||||
|
||||
print("done")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Create timelapse gif of vcinema countries')
|
||||
parser.add_argument('token_id', help='API token ID.')
|
||||
parser.add_argument('token_secret', help='API token secret.')
|
||||
parser.add_argument('filename', help='Name of output gif')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
generate_map_timelapse(args.token_id, args.token_secret, args.filename)
|
@ -1,33 +0,0 @@
|
||||
from imdb_utils import IMDbUtils
|
||||
|
||||
import argparse
|
||||
from progress.bar import IncrementalBar
|
||||
|
||||
|
||||
def get_hidden_themes(imdb_ids):
|
||||
film_keywords = []
|
||||
|
||||
with IncrementalBar('Retrieving movie data', max=len(imdb_ids), suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||
for imdb_id in imdb_ids:
|
||||
movie_data = IMDbUtils.get_movie_keywords(imdb_id)
|
||||
|
||||
if 'keywords' in movie_data:
|
||||
keywords = set(movie_data['keywords'])
|
||||
film_keywords.append(keywords)
|
||||
|
||||
bar.next()
|
||||
|
||||
hidden_themes = set.intersection(*film_keywords)
|
||||
|
||||
return hidden_themes
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('imdb_ids', nargs="+", default=[])
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
hidden_themes = get_hidden_themes(args.imdb_ids)
|
||||
|
||||
print(hidden_themes)
|
@ -1,60 +0,0 @@
|
||||
from imdb import IMDb
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
||||
def get_movie(imdb_id):
|
||||
ia = IMDb()
|
||||
|
||||
movie = ia.get_movie(imdb_id)
|
||||
|
||||
return movie
|
||||
|
||||
|
||||
def get_movie_keywords(imdb_id):
|
||||
ia = IMDb()
|
||||
|
||||
movie = ia.get_movie(imdb_id, info="keywords")
|
||||
|
||||
return movie
|
||||
|
||||
|
||||
def get_api_keyword_count(keyword):
|
||||
ia = IMDb()
|
||||
|
||||
count = len(ia.get_keyword(keyword))
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def get_website_keyword_count(keyword):
|
||||
try:
|
||||
page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword)
|
||||
except ConnectionError:
|
||||
raise
|
||||
|
||||
soup = BeautifulSoup(page.content, 'html.parser')
|
||||
elements = soup.findAll("div", class_="desc")
|
||||
|
||||
pagination_label = elements[0].text.replace("\n", "")
|
||||
|
||||
pagination_label_reg = "(\d+,?\d*) titles"
|
||||
pattern_match = re.compile(pagination_label_reg).search(pagination_label)
|
||||
|
||||
if pattern_match is not None:
|
||||
return int(pattern_match.group(1).replace(',', ''))
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def get_keyword_count(keyword):
|
||||
count = get_api_keyword_count(keyword)
|
||||
|
||||
if count == 50:
|
||||
try:
|
||||
count = get_website_keyword_count(keyword)
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
return count
|
@ -1,16 +0,0 @@
|
||||
import json
|
||||
|
||||
from wiki_pages import ViewingsCsv
|
||||
|
||||
|
||||
def update_viewings_csv(token_id, token_secret):
|
||||
print("Updating CSV")
|
||||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('token.json') as json_file:
|
||||
token = json.load(json_file)
|
||||
|
||||
update_viewings_csv(token['token_id'], token['token_secret'])
|
@ -1,69 +0,0 @@
|
||||
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from progress.bar import IncrementalBar
|
||||
|
||||
|
||||
def update_wiki(token_id, token_secret, update_csv, pages):
|
||||
if update_csv:
|
||||
print("Updating CSV")
|
||||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
||||
|
||||
print("Getting viewings")
|
||||
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
||||
|
||||
update_films_by_year = 'years' in pages
|
||||
update_films_by_country = 'countries' in pages
|
||||
update_film_references = 'references' in pages
|
||||
update_hidden_themes = 'themes' in pages
|
||||
update_keyword_scores = 'scores' in pages
|
||||
|
||||
data_fields = []
|
||||
if update_films_by_year:
|
||||
data_fields.append("year")
|
||||
|
||||
if update_films_by_country:
|
||||
data_fields.append("countries")
|
||||
|
||||
if update_film_references or update_hidden_themes or update_keyword_scores:
|
||||
data_fields.append("keywords")
|
||||
|
||||
viewing_count = len(viewings)
|
||||
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
||||
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
|
||||
|
||||
print("Processing viewing data")
|
||||
|
||||
if update_films_by_year:
|
||||
films_by_year = FilmsByYear.get_films_by_year(viewings)
|
||||
FilmsByYear.update_page(token_id, token_secret, films_by_year)
|
||||
if update_films_by_country:
|
||||
films_by_country = FilmsByCountry.get_films_by_country(viewings)
|
||||
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
|
||||
if update_film_references:
|
||||
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
|
||||
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
|
||||
if update_hidden_themes:
|
||||
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
|
||||
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
|
||||
if update_keyword_scores:
|
||||
keyword_scores = KeywordScores.get_keyword_scores(viewings)
|
||||
KeywordScores.update_page(token_id, token_secret, keyword_scores)
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Update wiki pages.')
|
||||
|
||||
parser.add_argument('--pages', nargs="+", default=['years', 'countries', 'references', 'themes', 'scores'], required=False)
|
||||
parser.add_argument("--do_not_update_csv", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with open('token.json') as json_file:
|
||||
token = json.load(json_file)
|
||||
|
||||
update_wiki(token['token_id'], token['token_secret'], not args.do_not_update_csv, args.pages)
|
@ -1,150 +0,0 @@
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import csv
|
||||
|
||||
from imdb_utils import IMDbUtils
|
||||
from bookstack import Bookstack
|
||||
|
||||
|
||||
JACKNET_WIKI_URL = "https://wiki.jacknet.io"
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
|
||||
CSV_PAGE_ID = 11
|
||||
|
||||
|
||||
def get_viewings_csv_attachment_id(token_id, token_secret):
|
||||
attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret)
|
||||
|
||||
viewings_csv_file_name = "vcinema.csv"
|
||||
|
||||
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
|
||||
|
||||
|
||||
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
|
||||
if viewings_csv is None:
|
||||
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
||||
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
||||
|
||||
viewings_csv = viewings_csv.decode("utf-8")
|
||||
viewings_csv_rows = viewings_csv.strip().split("\n")
|
||||
|
||||
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
|
||||
|
||||
if combine_repeat_viewings:
|
||||
for viewing in viewings:
|
||||
viewing['viewings'] = [
|
||||
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
|
||||
viewing.pop('date_watched')
|
||||
viewing.pop('season')
|
||||
viewing.pop('rating')
|
||||
|
||||
watch_counts = Counter([x['imdb_id'] for x in viewings])
|
||||
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
|
||||
|
||||
for film in repeat_watches:
|
||||
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
|
||||
|
||||
first_watch = viewings[viewing_indexes[0]]
|
||||
|
||||
for index in viewing_indexes[1::]:
|
||||
first_watch['viewings'].extend(viewings[index]['viewings'])
|
||||
|
||||
for index in reversed(viewing_indexes[1::]):
|
||||
viewings.pop(index)
|
||||
|
||||
return viewings
|
||||
|
||||
|
||||
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
|
||||
movie = IMDbUtils.get_movie(imdb_id)
|
||||
|
||||
for viewing in viewings:
|
||||
if viewing['imdb_id'] == movie.movieID:
|
||||
for field_name in data_fields:
|
||||
if field_name in movie:
|
||||
viewing[field_name] = movie[field_name]
|
||||
|
||||
if progressbar is not None:
|
||||
progressbar.next()
|
||||
|
||||
|
||||
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
|
||||
movie = IMDbUtils.get_movie_keywords(imdb_id)
|
||||
|
||||
for viewing in viewings:
|
||||
if viewing['imdb_id'] == movie.movieID:
|
||||
if 'keywords' in movie:
|
||||
viewing['keywords'] = movie['keywords']
|
||||
|
||||
if progressbar is not None:
|
||||
progressbar.next()
|
||||
|
||||
|
||||
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
|
||||
with ThreadPoolExecutor(4) as executor:
|
||||
future_imdb_tasks = set()
|
||||
|
||||
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
|
||||
|
||||
if 'keywords' in field_names:
|
||||
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
|
||||
|
||||
progress_bar.max = len(future_imdb_tasks)
|
||||
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
|
||||
|
||||
def filter_viewings(viewings, filter_field):
|
||||
viewings_filtered = {}
|
||||
|
||||
for viewing in viewings:
|
||||
if filter_field in viewing:
|
||||
viewing_field = viewing[filter_field]
|
||||
if isinstance(viewing_field, list):
|
||||
for fve in list(viewing_field):
|
||||
if fve in viewings_filtered.keys():
|
||||
viewings_filtered[fve] += [viewing]
|
||||
else:
|
||||
viewings_filtered[fve] = [viewing]
|
||||
else:
|
||||
if viewing_field in viewings_filtered.keys():
|
||||
viewings_filtered[viewing_field] += [viewing]
|
||||
else:
|
||||
viewings_filtered[viewing_field] = [viewing]
|
||||
|
||||
return viewings_filtered
|
||||
|
||||
|
||||
def get_film_list(films):
|
||||
film_links = []
|
||||
|
||||
for film in films:
|
||||
film_link = generate_imdb_film_link(film)
|
||||
film_links.append(film_link)
|
||||
|
||||
if len(film_links) > 0:
|
||||
return "<br>".join(film_links)
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def generate_markdown_link(text, url):
|
||||
return "[{}]({})".format(text, url)
|
||||
|
||||
|
||||
def generate_imdb_film_link(film):
|
||||
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
|
||||
|
||||
|
||||
def generate_wikipedia_page_link(page_title):
|
||||
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
|
||||
|
||||
|
||||
def generate_imdb_url(imdb_id):
|
||||
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
|
||||
|
||||
|
||||
def generate_wikipedia_url(page_title):
|
||||
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))
|
@ -1,104 +0,0 @@
|
||||
import base64
|
||||
from collections import Counter, OrderedDict
|
||||
import csv
|
||||
import os
|
||||
import pyvips
|
||||
import worldmap
|
||||
import warnings
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-country
|
||||
PAGE_ID = 34
|
||||
|
||||
|
||||
def get_films_by_country(viewings):
|
||||
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
|
||||
|
||||
if "Czechia" in viewings_filtered_by_country.keys():
|
||||
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
|
||||
viewings_filtered_by_country.pop("Czechia")
|
||||
|
||||
return viewings_filtered_by_country
|
||||
|
||||
|
||||
def update_page(token_id, token_secret, films_by_country):
|
||||
page = build_page(films_by_country)
|
||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
||||
|
||||
|
||||
def build_page(films_by_country):
|
||||
table = build_table(films_by_country)
|
||||
|
||||
country_counter = Counter(films_by_country)
|
||||
png_data = draw_map(country_counter)
|
||||
encoded = base64.b64encode(png_data).decode("utf-8")
|
||||
image = "".format(encoded)
|
||||
page = image + "\n" + table
|
||||
|
||||
return page
|
||||
|
||||
|
||||
def get_flags_dict():
|
||||
flags = {}
|
||||
|
||||
with open('country-flags.csv', newline='') as f:
|
||||
reader = csv.reader(f, quotechar="\"")
|
||||
next(reader, None) # skip the headers
|
||||
|
||||
for row in reader:
|
||||
flags[row[0]] = row[1]
|
||||
|
||||
return flags
|
||||
|
||||
|
||||
def build_table(films_by_country):
|
||||
films_by_country_sorted = OrderedDict(sorted(films_by_country.items(), key=lambda t: t[0]))
|
||||
|
||||
flags = get_flags_dict()
|
||||
|
||||
table = "| Country | Films |\n| - | - |"
|
||||
|
||||
for country, films in films_by_country_sorted.items():
|
||||
table += "\n"
|
||||
|
||||
row_data = []
|
||||
|
||||
country_label = country
|
||||
if country in flags.keys():
|
||||
country_label += " "
|
||||
country_label += flags[country]
|
||||
|
||||
row_data.append(country_label)
|
||||
row_data.append(VCinemaUtils.get_film_list(films))
|
||||
|
||||
table += " | ".join(row_data)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def draw_map(films_by_country, file_name="vcinema_map.svg"):
|
||||
films_by_country['Germany'] += films_by_country['West Germany']
|
||||
del films_by_country['West Germany']
|
||||
|
||||
counter = Counter(films_by_country)
|
||||
countries = [k for k, v in counter.items()]
|
||||
counts = [len(v) for _, v in counter.items()]
|
||||
|
||||
max_count = max(counts)
|
||||
|
||||
opacity = [0.5 + (x / (float(max_count))/2.0) for x in counts]
|
||||
|
||||
worldmap.plot(countries, cmap=["#FF4000"], opacity=opacity, filename=file_name, verbose=False)
|
||||
|
||||
image = pyvips.Image.new_from_file(file_name)
|
||||
image = image.thumbnail_image(1000, crop=pyvips.Interesting.ALL)
|
||||
|
||||
png_data = image.write_to_buffer(".png")
|
||||
|
||||
os.remove(file_name)
|
||||
|
||||
return png_data
|
@ -1,100 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
import wikipedia
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
|
||||
PAGE_ID = 62
|
||||
|
||||
|
||||
def get_films_by_reference(viewings):
|
||||
films_by_reference = {}
|
||||
|
||||
for viewing in viewings:
|
||||
if "keywords" in viewing.keys():
|
||||
for keyword in viewing["keywords"]:
|
||||
if keyword.startswith("reference-to-"):
|
||||
|
||||
for reference in films_by_reference:
|
||||
if keyword in films_by_reference[reference]["keywords"]:
|
||||
films_by_reference[reference]["films"].append(viewing)
|
||||
break
|
||||
else:
|
||||
keyword = keyword[13:]
|
||||
|
||||
if keyword.startswith("a-"):
|
||||
keyword = keyword[2:]
|
||||
|
||||
if keyword.endswith("-character"):
|
||||
keyword = keyword[:-10]
|
||||
|
||||
referenced = keyword.replace("-", " ")
|
||||
|
||||
try:
|
||||
searches = wikipedia.search(referenced, suggestion=False)
|
||||
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
|
||||
|
||||
page_title = referenced_page.title
|
||||
page_url = referenced_page.url
|
||||
|
||||
except wikipedia.DisambiguationError as e:
|
||||
page_title = e.title
|
||||
page_title = page_title[0].upper() + page_title[1:]
|
||||
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
|
||||
except wikipedia.PageError as _:
|
||||
if len(searches) > 0:
|
||||
try:
|
||||
referenced_page = wikipedia.page(title=searches[0], auto_suggest=False)
|
||||
|
||||
page_title = referenced_page.title
|
||||
page_url = referenced_page.url
|
||||
except wikipedia.DisambiguationError as e:
|
||||
page_title = e.title
|
||||
page_title = page_title[0].upper() + page_title[1:]
|
||||
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
|
||||
else:
|
||||
page_title = referenced.title()
|
||||
page_url = None
|
||||
|
||||
if page_title in films_by_reference.keys():
|
||||
films_by_reference[page_title]["keywords"].append(keyword)
|
||||
|
||||
if viewing not in films_by_reference[page_title]["films"]:
|
||||
films_by_reference[page_title]["films"].append(viewing)
|
||||
|
||||
else:
|
||||
films_by_reference[page_title] = {"url": page_url,
|
||||
"keywords": [keyword],
|
||||
"films": [viewing]}
|
||||
|
||||
return films_by_reference
|
||||
|
||||
|
||||
def update_page(token_id, token_secret, films_by_reference_keyword):
|
||||
page = build_page(films_by_reference_keyword)
|
||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
||||
|
||||
|
||||
def build_page(films_by_reference):
|
||||
films_by_reference = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0]))
|
||||
|
||||
table = "| Referenced | Films |\n| - | - |"
|
||||
|
||||
for reference, referenced in films_by_reference.items():
|
||||
table += "\n"
|
||||
|
||||
row_data = []
|
||||
|
||||
reference_url = referenced["url"]
|
||||
referenced_films = referenced["films"]
|
||||
|
||||
if reference_url is None:
|
||||
row_data.append(reference)
|
||||
else:
|
||||
row_data.append(VCinemaUtils.generate_markdown_link(reference, reference_url))
|
||||
row_data.append(VCinemaUtils.get_film_list(referenced_films))
|
||||
|
||||
table += " | ".join(row_data)
|
||||
|
||||
return table
|
@ -1,35 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-release-year
|
||||
PAGE_ID = 24
|
||||
|
||||
|
||||
def get_films_by_year(viewings):
|
||||
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
|
||||
|
||||
return viewings_filtered_by_year
|
||||
|
||||
|
||||
def update_page(token_id, token_secret, films_by_year):
|
||||
page = build_page(films_by_year)
|
||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
||||
|
||||
|
||||
def build_page(films_by_year):
|
||||
films_by_year_sorted = OrderedDict(sorted(films_by_year.items(), key=lambda t: t[0], reverse=True))
|
||||
|
||||
page = "| Year | Films |\n| - | - |"
|
||||
|
||||
for year in films_by_year_sorted.keys():
|
||||
page += "\n"
|
||||
|
||||
row_data = []
|
||||
row_data.append(str(year))
|
||||
row_data.append(VCinemaUtils.get_film_list(films_by_year_sorted[year]))
|
||||
|
||||
page += " | ".join(row_data)
|
||||
|
||||
return page
|
@ -1,86 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-reference
|
||||
PAGE_ID = 63
|
||||
|
||||
|
||||
def get_hidden_themes(viewings, token_id, token_secret):
|
||||
# Bit horrible to need to request this again, but it affects the order of the result table
|
||||
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
||||
|
||||
# Copy keywords from grouped viewings to ungrouped viewings
|
||||
for viewing_ungrouped in viewings_ungrouped:
|
||||
for viewing in viewings:
|
||||
if viewing['imdb_id'] == viewing_ungrouped['imdb_id']:
|
||||
if 'keywords' in viewing:
|
||||
viewing_ungrouped['keywords'] = viewing['keywords']
|
||||
break
|
||||
|
||||
viewings_filtered_watch_date = VCinemaUtils.filter_viewings(viewings_ungrouped, "date_watched")
|
||||
|
||||
for date, viewings in viewings_filtered_watch_date.items():
|
||||
viewing_dict = {"viewings": viewings}
|
||||
|
||||
viewings_filtered_watch_date[date] = viewing_dict
|
||||
|
||||
# Add hidden themes
|
||||
for date, data in viewings_filtered_watch_date.items():
|
||||
keyword_counts = {}
|
||||
|
||||
if len(data['viewings']) > 1:
|
||||
for viewing in data['viewings']:
|
||||
if 'keywords' in viewing:
|
||||
for keyword in viewing['keywords']:
|
||||
if keyword in keyword_counts.keys():
|
||||
keyword_counts[keyword] += 1
|
||||
else:
|
||||
keyword_counts[keyword] = 1
|
||||
|
||||
keyword_counts = {k: v for k, v in sorted(keyword_counts.items(), key=lambda item: item[1], reverse=True)}
|
||||
hidden_themes = {}
|
||||
|
||||
for keyword in keyword_counts:
|
||||
rating = float(keyword_counts[keyword]) / float(len(data['viewings']))
|
||||
if rating > 0.5:
|
||||
hidden_themes[keyword] = rating
|
||||
|
||||
viewings_filtered_watch_date[date]['hidden_themes'] = hidden_themes
|
||||
|
||||
return viewings_filtered_watch_date
|
||||
|
||||
|
||||
def update_page(token_id, token_secret, hidden_themes):
|
||||
page = build_page(hidden_themes)
|
||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
||||
|
||||
|
||||
def build_page(hidden_themes):
|
||||
hidden_themes = OrderedDict(sorted(hidden_themes.items(), key=lambda t: t[0]))
|
||||
|
||||
table = "| Date | Films | Hidden Themes |\n| - | - | - |"
|
||||
|
||||
for date, data in hidden_themes.items():
|
||||
table += "\n"
|
||||
|
||||
row_data = []
|
||||
row_data.append(str(date))
|
||||
row_data.append(VCinemaUtils.get_film_list(data['viewings']))
|
||||
if 'hidden_themes' in data and data['hidden_themes'] != {}:
|
||||
hidden_theme_labels = []
|
||||
|
||||
for hidden_theme in sorted(data['hidden_themes'].keys()):
|
||||
if data['hidden_themes'][hidden_theme] == 1:
|
||||
hidden_theme_labels.append(hidden_theme)
|
||||
else:
|
||||
hidden_theme_labels.append("<i>{} ({}%)</i>".format(hidden_theme, round(data['hidden_themes'][hidden_theme] * 100)))
|
||||
|
||||
row_data.append("<br>".join(hidden_theme_labels))
|
||||
else:
|
||||
row_data.append("N/A")
|
||||
|
||||
table += " | ".join(row_data)
|
||||
|
||||
return table
|
@ -1,81 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
from progress.bar import IncrementalBar
|
||||
import math
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from bookstack import Bookstack
|
||||
from imdb_utils import IMDbUtils
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores
|
||||
PAGE_ID = 23
|
||||
|
||||
|
||||
def get_keyword_scores(viewings):
|
||||
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
|
||||
|
||||
for keyword, viewings in viewings_filtered_keyword.items():
|
||||
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
||||
|
||||
min_vcinema_count = 2
|
||||
min_imdb_count = 4
|
||||
|
||||
add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)
|
||||
add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)
|
||||
|
||||
return viewings_filtered_keyword
|
||||
|
||||
|
||||
def update_page(token_id, token_secret, keyword_data):
|
||||
page = build_page(keyword_data)
|
||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
||||
|
||||
|
||||
def add_keyword_totals(keywords, min_vcinema_count):
|
||||
keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])
|
||||
|
||||
with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:
|
||||
with ThreadPoolExecutor(6) as executor:
|
||||
for keyword, data in keywords.items():
|
||||
if len(data['vcinema_films']) >= min_vcinema_count:
|
||||
executor.submit(add_keyword_total, keyword, keywords, bar)
|
||||
|
||||
|
||||
def add_keyword_total(keyword, keywords, progress_bar=None):
|
||||
keyword_total = IMDbUtils.get_keyword_count(keyword)
|
||||
|
||||
keywords[keyword]['total'] = keyword_total
|
||||
|
||||
if progress_bar is not None:
|
||||
progress_bar.next()
|
||||
|
||||
|
||||
def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):
|
||||
for keyword in keyword_data.keys():
|
||||
if 'total' in keyword_data[keyword]:
|
||||
vcinema_count = len(keyword_data[keyword]['vcinema_films'])
|
||||
total_count = keyword_data[keyword]['total']
|
||||
|
||||
if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:
|
||||
score = vcinema_count / math.log(total_count)
|
||||
|
||||
keyword_data[keyword]['score'] = score
|
||||
|
||||
|
||||
def build_page(keyword_data, minimum_score=1.0):
|
||||
keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}
|
||||
keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))
|
||||
|
||||
table = "| Keyword | Number of VCinema Films | Total IMDb entries | Score |\n| - | - | - | - |"
|
||||
|
||||
for keyword, data in keyword_data.items():
|
||||
table += "\n"
|
||||
|
||||
row_data = []
|
||||
row_data.append(str(keyword))
|
||||
row_data.append(str(len(data['vcinema_films'])))
|
||||
row_data.append(str(data['total']))
|
||||
row_data.append(str(round(data['score'], 3)))
|
||||
table += " | ".join(row_data)
|
||||
|
||||
return table
|
@ -1,20 +0,0 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from bookstack import Bookstack
|
||||
from vcinema_utils import VCinemaUtils
|
||||
|
||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
|
||||
PAGE_ID = 11
|
||||
|
||||
|
||||
def update_viewings_csv(token_id, token_secret):
|
||||
print("Retrieving viewings page")
|
||||
html_page = Bookstack.get_page_html(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID)
|
||||
|
||||
soup = BeautifulSoup(html_page, 'html.parser')
|
||||
csv_data = soup.find("code").text.strip().encode('utf-8')
|
||||
|
||||
existing_attachment_id = VCinemaUtils.get_viewings_csv_attachment_id(token_id, token_secret)
|
||||
print("Updating file")
|
||||
Bookstack.update_attachment(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, existing_attachment_id, "vcinema.csv", csv_data, PAGE_ID)
|
||||
print("File updated")
|
1
wiki_utils
Submodule
1
wiki_utils
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 88be36d5cd7378a01d9861726bf123715fe81d4a
|
Loading…
x
Reference in New Issue
Block a user