Compare commits
No commits in common. "master" and "056e3b474f202fcc9090dbe63fbd6ac710b565bd" have entirely different histories.
master
...
056e3b474f
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +0,0 @@
|
|||||||
|
|
||||||
*.pyc
|
|
||||||
.idea/*
|
|
||||||
__pycache__/*
|
|
||||||
.DS_Store
|
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -1,3 +1,3 @@
|
|||||||
[submodule "bookstack"]
|
[submodule "wiki_utils"]
|
||||||
path = bookstack
|
path = wiki_utils
|
||||||
url = git@git.jacknet.io:sarah/bookstack.git
|
url = gitea@git.jacknet.io:sarah/wiki_utils.git
|
||||||
|
@ -1 +0,0 @@
|
|||||||
Subproject commit 8f6e38cb337bcf51b0790f2db7001681ceb9338f
|
|
@ -1,258 +0,0 @@
|
|||||||
Country,Flag
|
|
||||||
Afghanistan,🇦🇫
|
|
||||||
Åland Islands,🇦🇽
|
|
||||||
Albania,🇦🇱
|
|
||||||
Algeria,🇩🇿
|
|
||||||
American Samoa,🇦🇸
|
|
||||||
Andorra,🇦🇩
|
|
||||||
Angola,🇦🇴
|
|
||||||
Anguilla,🇦🇮
|
|
||||||
Antarctica,🇦🇶
|
|
||||||
Antigua and Barbuda,🇦🇬
|
|
||||||
Argentina,🇦🇷
|
|
||||||
Armenia,🇦🇲
|
|
||||||
Aruba,🇦🇼
|
|
||||||
Australia,🇦🇺
|
|
||||||
Austria,🇦🇹
|
|
||||||
Azerbaijan,🇦🇿
|
|
||||||
Bahamas,🇧🇸
|
|
||||||
Bahrain,🇧🇭
|
|
||||||
Bangladesh,🇧🇩
|
|
||||||
Barbados,🇧🇧
|
|
||||||
Belarus,🇧🇾
|
|
||||||
Belgium,🇧🇪
|
|
||||||
Belize,🇧🇿
|
|
||||||
Benin,🇧🇯
|
|
||||||
Bermuda,🇧🇲
|
|
||||||
Bhutan,🇧🇹
|
|
||||||
Bolivia,🇧🇴
|
|
||||||
"Bonaire, Sint Eustatius and Saba",🇧🇶
|
|
||||||
Bosnia and Herzegovina,🇧🇦
|
|
||||||
Botswana,🇧🇼
|
|
||||||
Bouvet Island,🇧🇻
|
|
||||||
Brazil,🇧🇷
|
|
||||||
British Indian Ocean Territory,🇮🇴
|
|
||||||
British Virgin Islands,🇻🇬
|
|
||||||
Brunei Darussalamm,🇧🇳
|
|
||||||
Bulgaria,🇧🇬
|
|
||||||
Burkina Faso,🇧🇫
|
|
||||||
Burma,🇲🇲
|
|
||||||
Burundi,🇧🇮
|
|
||||||
Cambodia,🇰🇭
|
|
||||||
Cameroon,🇨🇲
|
|
||||||
Canada,🇨🇦
|
|
||||||
Cape Verde,🇨🇻
|
|
||||||
Cayman Islands,🇰🇾
|
|
||||||
Central African Republic,🇨🇫
|
|
||||||
Chad,🇹🇩
|
|
||||||
Chile,🇨🇱
|
|
||||||
China,🇨🇳
|
|
||||||
Christmas Island,🇨🇽
|
|
||||||
Cocos (Keeling) Islands,🇨🇨
|
|
||||||
Colombia,🇨🇴
|
|
||||||
Comoros,🇰🇲
|
|
||||||
Congo,🇨🇩
|
|
||||||
Cook Islands,🇨🇰
|
|
||||||
Costa Rica,🇨🇷
|
|
||||||
Côte d'Ivoire,🇨🇮
|
|
||||||
Croatia,🇭🇷
|
|
||||||
Cuba,🇨🇺
|
|
||||||
Cyprus,🇨🇾
|
|
||||||
Czech Republic,🇨🇿
|
|
||||||
Czechoslovakia,🇨🇿
|
|
||||||
Democratic Republic of the Congo,🇨🇩
|
|
||||||
Denmark,🇩🇰
|
|
||||||
Djibouti,🇩🇯
|
|
||||||
Dominica,🇩🇲
|
|
||||||
Dominican Republic,🇩🇴
|
|
||||||
East Germany,➡️🇩🇪
|
|
||||||
Ecuador,🇪🇨
|
|
||||||
Egypt,🇪🇬
|
|
||||||
El Salvador,🇸🇻
|
|
||||||
Equatorial Guinea,🇬🇶
|
|
||||||
Eritrea,🇪🇷
|
|
||||||
Estonia,🇪🇪
|
|
||||||
Ethiopia,🇪🇹
|
|
||||||
Falkland Islands,🇫🇰
|
|
||||||
Faroe Islands,🇫🇴
|
|
||||||
Federated States of Micronesia,🇫🇲
|
|
||||||
Fiji,🇫🇯
|
|
||||||
Finland,🇫🇮
|
|
||||||
France,🇫🇷
|
|
||||||
French Guiana,🇬🇫
|
|
||||||
French Polynesia,🇵🇫
|
|
||||||
French Southern Territories,🇹🇫
|
|
||||||
Gabon,🇬🇦
|
|
||||||
Gambia,🇬🇲
|
|
||||||
Georgia,🇬🇪
|
|
||||||
Germany,🇩🇪
|
|
||||||
Ghana,🇬🇭
|
|
||||||
Gibraltar,🇬🇮
|
|
||||||
Greece,🇬🇷
|
|
||||||
Greenland,🇬🇱
|
|
||||||
Grenada,🇬🇩
|
|
||||||
Guadeloupe,🇬🇵
|
|
||||||
Guam,🇬🇺
|
|
||||||
Guatemala,🇬🇹
|
|
||||||
Guernsey,🇬🇬
|
|
||||||
Guinea,🇬🇳
|
|
||||||
Guinea-Bissau,🇬🇼
|
|
||||||
Guyana,🇬🇾
|
|
||||||
Haiti,🇭🇹
|
|
||||||
Heard Island and McDonald Islands,🇭🇲
|
|
||||||
Holy See (Vatican City State),🇻🇦
|
|
||||||
Honduras,🇭🇳
|
|
||||||
Hong Kong,🇭🇰
|
|
||||||
Hungary,🇭🇺
|
|
||||||
Iceland,🇮🇸
|
|
||||||
India,🇮🇳
|
|
||||||
Indonesia,🇮🇩
|
|
||||||
Iran,🇮🇷
|
|
||||||
Iraq,🇮🇶
|
|
||||||
Ireland,🇮🇪
|
|
||||||
Isle of Man,🇮🇲
|
|
||||||
Israel,🇮🇱
|
|
||||||
Italy,🇮🇹
|
|
||||||
Jamaica,🇯🇲
|
|
||||||
Japan,🇯🇵
|
|
||||||
Jersey,🇯🇪
|
|
||||||
Jordan,🇯🇴
|
|
||||||
Kazakhstan,🇰🇿
|
|
||||||
Kenya,🇰🇪
|
|
||||||
Kiribati,🇰🇮
|
|
||||||
Korea,🇰🇵🇰🇷
|
|
||||||
Kosovo,🇽🇰
|
|
||||||
Kuwait,🇰🇼
|
|
||||||
Kyrgyzstan,🇰🇬
|
|
||||||
Laos,🇱🇦
|
|
||||||
Latvia,🇱🇻
|
|
||||||
Lebanon,🇱🇧
|
|
||||||
Lesotho,🇱🇸
|
|
||||||
Liberia,🇱🇷
|
|
||||||
Libya,🇱🇾
|
|
||||||
Liechtenstein,🇱🇮
|
|
||||||
Lithuania,🇱🇹
|
|
||||||
Luxembourg,🇱🇺
|
|
||||||
Macao,🇲🇴
|
|
||||||
Madagascar,🇲🇬
|
|
||||||
Malawi,🇲🇼
|
|
||||||
Malaysia,🇲🇾
|
|
||||||
Maldives,🇲🇻
|
|
||||||
Mali,🇲🇱
|
|
||||||
Malta,🇲🇹
|
|
||||||
Marshall Islands,🇲🇭
|
|
||||||
Martinique,🇲🇶
|
|
||||||
Mauritania,🇲🇷
|
|
||||||
Mauritius,🇲🇺
|
|
||||||
Mayotte,🇾🇹
|
|
||||||
Mexico,🇲🇽
|
|
||||||
Moldova,🇲🇩
|
|
||||||
Monaco,🇲🇨
|
|
||||||
Mongolia,🇲🇳
|
|
||||||
Montenegro,🇲🇪
|
|
||||||
Montserrat,🇲🇸
|
|
||||||
Morocco,🇲🇦
|
|
||||||
Mozambique,🇲🇿
|
|
||||||
Myanmar,🇲🇲
|
|
||||||
Namibia,🇳🇦
|
|
||||||
Nauru,🇳🇷
|
|
||||||
Nepal,🇳🇵
|
|
||||||
Netherlands,🇳🇱
|
|
||||||
Netherlands Antilles,🇳🇱
|
|
||||||
New Caledonia,🇳🇨
|
|
||||||
New Zealand,🇳🇿
|
|
||||||
Nicaragua,🇳🇮
|
|
||||||
Niger,🇳🇪
|
|
||||||
Nigeria,🇳🇬
|
|
||||||
Niue,🇳🇺
|
|
||||||
Norfolk Island,🇳🇫
|
|
||||||
North Korea,🇰🇵
|
|
||||||
North Vietnam,🇻🇳
|
|
||||||
Northern Mariana Islands,🇲🇵
|
|
||||||
Norway,🇳🇴
|
|
||||||
Oman,🇴🇲
|
|
||||||
Pakistan,🇵🇰
|
|
||||||
Palau,🇵🇼
|
|
||||||
Palestine,🇵🇸
|
|
||||||
Palestinian Territory,🇵🇸
|
|
||||||
Panama,🇵🇦
|
|
||||||
Papua New Guinea,🇵🇬
|
|
||||||
Paraguay,🇵🇾
|
|
||||||
Peru,🇵🇪
|
|
||||||
Philippines,🇵🇭
|
|
||||||
Poland,🇵🇱
|
|
||||||
Portugal,🇵🇹
|
|
||||||
Pitcairn,🇵🇳
|
|
||||||
Puerto Rico,🇵🇷
|
|
||||||
Qatar,🇶🇦
|
|
||||||
Republic of Macedonia,🇲🇰
|
|
||||||
Réunion,🇷🇪
|
|
||||||
Romania,🇷🇴
|
|
||||||
Russia,🇷🇺
|
|
||||||
Rwanda,🇷🇼
|
|
||||||
Saint Barthélemy,🇧🇱
|
|
||||||
Saint Helena,🇸🇭
|
|
||||||
Saint Kitts and Nevis,🇰🇳
|
|
||||||
Saint Lucia,🇱🇨
|
|
||||||
Saint Martin (French part),🇫🇷
|
|
||||||
Saint Pierre and Miquelon,🇵🇲
|
|
||||||
Saint Vincent and the Grenadines,🇻🇨
|
|
||||||
Samoa,🇼🇸
|
|
||||||
San Marino,🇸🇲
|
|
||||||
Sao Tome and Principe,🇸🇹
|
|
||||||
Saudi Arabia,🇸🇦
|
|
||||||
Senegal,🇸🇳
|
|
||||||
Serbia,🇷🇸
|
|
||||||
Serbia and Montenegro,🇷🇸🇲🇪
|
|
||||||
Seychelles,🇸🇨
|
|
||||||
Siam,🇹🇭
|
|
||||||
Sierra Leone,🇸🇱
|
|
||||||
Singapore,🇸🇬
|
|
||||||
Slovakia,🇸🇰
|
|
||||||
Slovenia,🇸🇮
|
|
||||||
Solomon Islands,🇸🇧
|
|
||||||
Somalia,🇸🇴
|
|
||||||
South Africa,🇿🇦
|
|
||||||
South Georgia and the South Sandwich Islands,🇬🇸
|
|
||||||
South Korea,🇰🇷
|
|
||||||
Spain,🇪🇸
|
|
||||||
Sri Lanka,🇱🇰
|
|
||||||
Sudan,🇸🇩
|
|
||||||
Suriname,🇸🇷
|
|
||||||
Svalbard and Jan Mayen,🇸🇯
|
|
||||||
Swaziland,🇸🇿
|
|
||||||
Sweden,🇸🇪
|
|
||||||
Switzerland,🇨🇭
|
|
||||||
Syria,🇸🇾
|
|
||||||
Taiwan,🇹🇼
|
|
||||||
Tajikistan,🇹🇯
|
|
||||||
Tanzania,🇹🇿
|
|
||||||
Thailand,🇹🇭
|
|
||||||
Timor-Leste,🇹🇱
|
|
||||||
Togo,🇹🇬
|
|
||||||
Tokelau,🇹🇰
|
|
||||||
Tonga,🇹🇴
|
|
||||||
Trinidad and Tobago,🇹🇹
|
|
||||||
Tunisia,🇹🇳
|
|
||||||
Turkey,🇹🇷
|
|
||||||
Turkmenistan,🇹🇲
|
|
||||||
Turks and Caicos Islands,🇹🇨
|
|
||||||
Tuvalu,🇹🇻
|
|
||||||
U.S. Virgin Islands,🇻🇮
|
|
||||||
Uganda,🇺🇬
|
|
||||||
Ukraine,🇺🇦
|
|
||||||
United Arab Emirates,🇦🇪
|
|
||||||
United Kingdom,🇬🇧
|
|
||||||
United States,🇺🇸
|
|
||||||
United States Minor Outlying Islands,🇺🇲
|
|
||||||
Uruguay,🇺🇾
|
|
||||||
Uzbekistan,🇺🇿
|
|
||||||
Vanuatu,🇻🇺
|
|
||||||
Venezuela,🇻🇪
|
|
||||||
Vietnam,🇻🇳
|
|
||||||
Wallis and Futuna,🇫🇷
|
|
||||||
West Germany,⬅️🇩🇪
|
|
||||||
Western Sahara,🇪🇭
|
|
||||||
Yemen,🇾🇪
|
|
||||||
Zambia,🇿🇲
|
|
||||||
Zimbabwe,🇿🇼
|
|
|
@ -1,73 +0,0 @@
|
|||||||
from wiki_pages import FilmsByCountry
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from collections import OrderedDict
|
|
||||||
import imageio
|
|
||||||
from progress.bar import IncrementalBar
|
|
||||||
from pygifsicle import optimize
|
|
||||||
from PIL import Image, ImageFont, ImageDraw, ImageFont
|
|
||||||
import io
|
|
||||||
|
|
||||||
|
|
||||||
def generate_map_timelapse(token_id, token_secret, filename):
|
|
||||||
print("Getting viewings")
|
|
||||||
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
|
||||||
|
|
||||||
viewing_count = len(viewings)
|
|
||||||
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
|
||||||
VCinemaUtils.add_imdb_data_to_viewings(viewings, ['countries'], bar)
|
|
||||||
|
|
||||||
date_viewings = VCinemaUtils.filter_viewings(viewings, "date_watched")
|
|
||||||
|
|
||||||
date_viewings = OrderedDict(sorted(date_viewings.items(), key=lambda t: t[0]))
|
|
||||||
|
|
||||||
running_country_counts = {}
|
|
||||||
print(len(date_viewings.keys()))
|
|
||||||
|
|
||||||
with imageio.get_writer(filename, mode='I', duration=0.1) as writer:
|
|
||||||
for date, viewings in date_viewings.items():
|
|
||||||
date_viewings_countries = VCinemaUtils.filter_viewings(viewings, "countries")
|
|
||||||
|
|
||||||
for country in date_viewings_countries:
|
|
||||||
if country in running_country_counts.keys():
|
|
||||||
running_country_counts[country] += date_viewings_countries[country]
|
|
||||||
else:
|
|
||||||
running_country_counts[country] = date_viewings_countries[country]
|
|
||||||
|
|
||||||
map = FilmsByCountry.draw_map(running_country_counts, file_name="map-{}.svg".format(date))
|
|
||||||
|
|
||||||
stream = io.BytesIO(map)
|
|
||||||
img = Image.open(stream)
|
|
||||||
|
|
||||||
map_editable = ImageDraw.Draw(img)
|
|
||||||
|
|
||||||
# macos font path
|
|
||||||
font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial.ttf", 48)
|
|
||||||
|
|
||||||
# image is 655 high
|
|
||||||
map_editable.text((2, 605), "{}".format(date), (255, 64, 0), font=font)
|
|
||||||
|
|
||||||
img_byte_arr = io.BytesIO()
|
|
||||||
img.save(img_byte_arr, format='PNG')
|
|
||||||
img_byte_arr = img_byte_arr.getvalue()
|
|
||||||
|
|
||||||
image = imageio.imread(img_byte_arr)
|
|
||||||
writer.append_data(image)
|
|
||||||
|
|
||||||
print("optimizing")
|
|
||||||
|
|
||||||
optimize(filename)
|
|
||||||
|
|
||||||
print("done")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description='Create timelapse gif of vcinema countries')
|
|
||||||
parser.add_argument('token_id', help='API token ID.')
|
|
||||||
parser.add_argument('token_secret', help='API token secret.')
|
|
||||||
parser.add_argument('filename', help='Name of output gif')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
generate_map_timelapse(args.token_id, args.token_secret, args.filename)
|
|
@ -1,33 +0,0 @@
|
|||||||
from imdb_utils import IMDbUtils
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from progress.bar import IncrementalBar
|
|
||||||
|
|
||||||
|
|
||||||
def get_hidden_themes(imdb_ids):
|
|
||||||
film_keywords = []
|
|
||||||
|
|
||||||
with IncrementalBar('Retrieving movie data', max=len(imdb_ids), suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
|
||||||
for imdb_id in imdb_ids:
|
|
||||||
movie_data = IMDbUtils.get_movie_keywords(imdb_id)
|
|
||||||
|
|
||||||
if 'keywords' in movie_data:
|
|
||||||
keywords = set(movie_data['keywords'])
|
|
||||||
film_keywords.append(keywords)
|
|
||||||
|
|
||||||
bar.next()
|
|
||||||
|
|
||||||
hidden_themes = set.intersection(*film_keywords)
|
|
||||||
|
|
||||||
return hidden_themes
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('imdb_ids', nargs="+", default=[])
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
hidden_themes = get_hidden_themes(args.imdb_ids)
|
|
||||||
|
|
||||||
print(hidden_themes)
|
|
@ -1,60 +0,0 @@
|
|||||||
from imdb import IMDb
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def get_movie(imdb_id):
|
|
||||||
ia = IMDb()
|
|
||||||
|
|
||||||
movie = ia.get_movie(imdb_id)
|
|
||||||
|
|
||||||
return movie
|
|
||||||
|
|
||||||
|
|
||||||
def get_movie_keywords(imdb_id):
|
|
||||||
ia = IMDb()
|
|
||||||
|
|
||||||
movie = ia.get_movie(imdb_id, info="keywords")
|
|
||||||
|
|
||||||
return movie
|
|
||||||
|
|
||||||
|
|
||||||
def get_api_keyword_count(keyword):
|
|
||||||
ia = IMDb()
|
|
||||||
|
|
||||||
count = len(ia.get_keyword(keyword))
|
|
||||||
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def get_website_keyword_count(keyword):
|
|
||||||
try:
|
|
||||||
page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword)
|
|
||||||
except ConnectionError:
|
|
||||||
raise
|
|
||||||
|
|
||||||
soup = BeautifulSoup(page.content, 'html.parser')
|
|
||||||
elements = soup.findAll("div", class_="desc")
|
|
||||||
|
|
||||||
pagination_label = elements[0].text.replace("\n", "")
|
|
||||||
|
|
||||||
pagination_label_reg = "(\d+,?\d*) titles"
|
|
||||||
pattern_match = re.compile(pagination_label_reg).search(pagination_label)
|
|
||||||
|
|
||||||
if pattern_match is not None:
|
|
||||||
return int(pattern_match.group(1).replace(',', ''))
|
|
||||||
else:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
def get_keyword_count(keyword):
|
|
||||||
count = get_api_keyword_count(keyword)
|
|
||||||
|
|
||||||
if count == 50:
|
|
||||||
try:
|
|
||||||
count = get_website_keyword_count(keyword)
|
|
||||||
except Exception as e:
|
|
||||||
raise
|
|
||||||
|
|
||||||
return count
|
|
@ -1,16 +0,0 @@
|
|||||||
import json
|
|
||||||
|
|
||||||
from wiki_pages import ViewingsCsv
|
|
||||||
|
|
||||||
|
|
||||||
def update_viewings_csv(token_id, token_secret):
|
|
||||||
print("Updating CSV")
|
|
||||||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
|
||||||
print("Done!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
with open('token.json') as json_file:
|
|
||||||
token = json.load(json_file)
|
|
||||||
|
|
||||||
update_viewings_csv(token['token_id'], token['token_secret'])
|
|
@ -1,69 +0,0 @@
|
|||||||
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
from progress.bar import IncrementalBar
|
|
||||||
|
|
||||||
|
|
||||||
def update_wiki(token_id, token_secret, update_csv, pages):
|
|
||||||
if update_csv:
|
|
||||||
print("Updating CSV")
|
|
||||||
ViewingsCsv.update_viewings_csv(token_id, token_secret)
|
|
||||||
|
|
||||||
print("Getting viewings")
|
|
||||||
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
|
|
||||||
|
|
||||||
update_films_by_year = 'years' in pages
|
|
||||||
update_films_by_country = 'countries' in pages
|
|
||||||
update_film_references = 'references' in pages
|
|
||||||
update_hidden_themes = 'themes' in pages
|
|
||||||
update_keyword_scores = 'scores' in pages
|
|
||||||
|
|
||||||
data_fields = []
|
|
||||||
if update_films_by_year:
|
|
||||||
data_fields.append("year")
|
|
||||||
|
|
||||||
if update_films_by_country:
|
|
||||||
data_fields.append("countries")
|
|
||||||
|
|
||||||
if update_film_references or update_hidden_themes or update_keyword_scores:
|
|
||||||
data_fields.append("keywords")
|
|
||||||
|
|
||||||
viewing_count = len(viewings)
|
|
||||||
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
|
|
||||||
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
|
|
||||||
|
|
||||||
print("Processing viewing data")
|
|
||||||
|
|
||||||
if update_films_by_year:
|
|
||||||
films_by_year = FilmsByYear.get_films_by_year(viewings)
|
|
||||||
FilmsByYear.update_page(token_id, token_secret, films_by_year)
|
|
||||||
if update_films_by_country:
|
|
||||||
films_by_country = FilmsByCountry.get_films_by_country(viewings)
|
|
||||||
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
|
|
||||||
if update_film_references:
|
|
||||||
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
|
|
||||||
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
|
|
||||||
if update_hidden_themes:
|
|
||||||
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
|
|
||||||
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
|
|
||||||
if update_keyword_scores:
|
|
||||||
keyword_scores = KeywordScores.get_keyword_scores(viewings)
|
|
||||||
KeywordScores.update_page(token_id, token_secret, keyword_scores)
|
|
||||||
|
|
||||||
print("Done!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description='Update wiki pages.')
|
|
||||||
|
|
||||||
parser.add_argument('--pages', nargs="+", default=['years', 'countries', 'references', 'themes', 'scores'], required=False)
|
|
||||||
parser.add_argument("--do_not_update_csv", action="store_true")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
with open('token.json') as json_file:
|
|
||||||
token = json.load(json_file)
|
|
||||||
|
|
||||||
update_wiki(token['token_id'], token['token_secret'], not args.do_not_update_csv, args.pages)
|
|
@ -1,150 +0,0 @@
|
|||||||
from collections import Counter
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
import csv
|
|
||||||
|
|
||||||
from imdb_utils import IMDbUtils
|
|
||||||
from bookstack import Bookstack
|
|
||||||
|
|
||||||
|
|
||||||
JACKNET_WIKI_URL = "https://wiki.jacknet.io"
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
|
|
||||||
CSV_PAGE_ID = 11
|
|
||||||
|
|
||||||
|
|
||||||
def get_viewings_csv_attachment_id(token_id, token_secret):
|
|
||||||
attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret)
|
|
||||||
|
|
||||||
viewings_csv_file_name = "vcinema.csv"
|
|
||||||
|
|
||||||
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
|
|
||||||
|
|
||||||
|
|
||||||
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
|
|
||||||
if viewings_csv is None:
|
|
||||||
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
|
|
||||||
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
|
|
||||||
|
|
||||||
viewings_csv = viewings_csv.decode("utf-8")
|
|
||||||
viewings_csv_rows = viewings_csv.strip().split("\n")
|
|
||||||
|
|
||||||
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
|
|
||||||
|
|
||||||
if combine_repeat_viewings:
|
|
||||||
for viewing in viewings:
|
|
||||||
viewing['viewings'] = [
|
|
||||||
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
|
|
||||||
viewing.pop('date_watched')
|
|
||||||
viewing.pop('season')
|
|
||||||
viewing.pop('rating')
|
|
||||||
|
|
||||||
watch_counts = Counter([x['imdb_id'] for x in viewings])
|
|
||||||
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
|
|
||||||
|
|
||||||
for film in repeat_watches:
|
|
||||||
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
|
|
||||||
|
|
||||||
first_watch = viewings[viewing_indexes[0]]
|
|
||||||
|
|
||||||
for index in viewing_indexes[1::]:
|
|
||||||
first_watch['viewings'].extend(viewings[index]['viewings'])
|
|
||||||
|
|
||||||
for index in reversed(viewing_indexes[1::]):
|
|
||||||
viewings.pop(index)
|
|
||||||
|
|
||||||
return viewings
|
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
|
|
||||||
movie = IMDbUtils.get_movie(imdb_id)
|
|
||||||
|
|
||||||
for viewing in viewings:
|
|
||||||
if viewing['imdb_id'] == movie.movieID:
|
|
||||||
for field_name in data_fields:
|
|
||||||
if field_name in movie:
|
|
||||||
viewing[field_name] = movie[field_name]
|
|
||||||
|
|
||||||
if progressbar is not None:
|
|
||||||
progressbar.next()
|
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
|
|
||||||
movie = IMDbUtils.get_movie_keywords(imdb_id)
|
|
||||||
|
|
||||||
for viewing in viewings:
|
|
||||||
if viewing['imdb_id'] == movie.movieID:
|
|
||||||
if 'keywords' in movie:
|
|
||||||
viewing['keywords'] = movie['keywords']
|
|
||||||
|
|
||||||
if progressbar is not None:
|
|
||||||
progressbar.next()
|
|
||||||
|
|
||||||
|
|
||||||
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
|
|
||||||
with ThreadPoolExecutor(4) as executor:
|
|
||||||
future_imdb_tasks = set()
|
|
||||||
|
|
||||||
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
|
|
||||||
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
|
|
||||||
|
|
||||||
if 'keywords' in field_names:
|
|
||||||
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
|
|
||||||
|
|
||||||
progress_bar.max = len(future_imdb_tasks)
|
|
||||||
|
|
||||||
if progress_bar is not None:
|
|
||||||
progress_bar.finish()
|
|
||||||
|
|
||||||
|
|
||||||
def filter_viewings(viewings, filter_field):
|
|
||||||
viewings_filtered = {}
|
|
||||||
|
|
||||||
for viewing in viewings:
|
|
||||||
if filter_field in viewing:
|
|
||||||
viewing_field = viewing[filter_field]
|
|
||||||
if isinstance(viewing_field, list):
|
|
||||||
for fve in list(viewing_field):
|
|
||||||
if fve in viewings_filtered.keys():
|
|
||||||
viewings_filtered[fve] += [viewing]
|
|
||||||
else:
|
|
||||||
viewings_filtered[fve] = [viewing]
|
|
||||||
else:
|
|
||||||
if viewing_field in viewings_filtered.keys():
|
|
||||||
viewings_filtered[viewing_field] += [viewing]
|
|
||||||
else:
|
|
||||||
viewings_filtered[viewing_field] = [viewing]
|
|
||||||
|
|
||||||
return viewings_filtered
|
|
||||||
|
|
||||||
|
|
||||||
def get_film_list(films):
|
|
||||||
film_links = []
|
|
||||||
|
|
||||||
for film in films:
|
|
||||||
film_link = generate_imdb_film_link(film)
|
|
||||||
film_links.append(film_link)
|
|
||||||
|
|
||||||
if len(film_links) > 0:
|
|
||||||
return "<br>".join(film_links)
|
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown_link(text, url):
|
|
||||||
return "[{}]({})".format(text, url)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_imdb_film_link(film):
|
|
||||||
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
|
|
||||||
|
|
||||||
|
|
||||||
def generate_wikipedia_page_link(page_title):
|
|
||||||
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
|
|
||||||
|
|
||||||
|
|
||||||
def generate_imdb_url(imdb_id):
|
|
||||||
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_wikipedia_url(page_title):
|
|
||||||
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))
|
|
@ -1,104 +0,0 @@
|
|||||||
import base64
|
|
||||||
from collections import Counter, OrderedDict
|
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
import pyvips
|
|
||||||
import worldmap
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-country
|
|
||||||
PAGE_ID = 34
|
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_country(viewings):
|
|
||||||
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
|
|
||||||
|
|
||||||
if "Czechia" in viewings_filtered_by_country.keys():
|
|
||||||
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
|
|
||||||
viewings_filtered_by_country.pop("Czechia")
|
|
||||||
|
|
||||||
return viewings_filtered_by_country
|
|
||||||
|
|
||||||
|
|
||||||
def update_page(token_id, token_secret, films_by_country):
|
|
||||||
page = build_page(films_by_country)
|
|
||||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
||||||
|
|
||||||
|
|
||||||
def build_page(films_by_country):
|
|
||||||
table = build_table(films_by_country)
|
|
||||||
|
|
||||||
country_counter = Counter(films_by_country)
|
|
||||||
png_data = draw_map(country_counter)
|
|
||||||
encoded = base64.b64encode(png_data).decode("utf-8")
|
|
||||||
image = "".format(encoded)
|
|
||||||
page = image + "\n" + table
|
|
||||||
|
|
||||||
return page
|
|
||||||
|
|
||||||
|
|
||||||
def get_flags_dict():
|
|
||||||
flags = {}
|
|
||||||
|
|
||||||
with open('country-flags.csv', newline='') as f:
|
|
||||||
reader = csv.reader(f, quotechar="\"")
|
|
||||||
next(reader, None) # skip the headers
|
|
||||||
|
|
||||||
for row in reader:
|
|
||||||
flags[row[0]] = row[1]
|
|
||||||
|
|
||||||
return flags
|
|
||||||
|
|
||||||
|
|
||||||
def build_table(films_by_country):
|
|
||||||
films_by_country_sorted = OrderedDict(sorted(films_by_country.items(), key=lambda t: t[0]))
|
|
||||||
|
|
||||||
flags = get_flags_dict()
|
|
||||||
|
|
||||||
table = "| Country | Films |\n| - | - |"
|
|
||||||
|
|
||||||
for country, films in films_by_country_sorted.items():
|
|
||||||
table += "\n"
|
|
||||||
|
|
||||||
row_data = []
|
|
||||||
|
|
||||||
country_label = country
|
|
||||||
if country in flags.keys():
|
|
||||||
country_label += " "
|
|
||||||
country_label += flags[country]
|
|
||||||
|
|
||||||
row_data.append(country_label)
|
|
||||||
row_data.append(VCinemaUtils.get_film_list(films))
|
|
||||||
|
|
||||||
table += " | ".join(row_data)
|
|
||||||
|
|
||||||
return table
|
|
||||||
|
|
||||||
|
|
||||||
def draw_map(films_by_country, file_name="vcinema_map.svg"):
|
|
||||||
films_by_country['Germany'] += films_by_country['West Germany']
|
|
||||||
del films_by_country['West Germany']
|
|
||||||
|
|
||||||
counter = Counter(films_by_country)
|
|
||||||
countries = [k for k, v in counter.items()]
|
|
||||||
counts = [len(v) for _, v in counter.items()]
|
|
||||||
|
|
||||||
max_count = max(counts)
|
|
||||||
|
|
||||||
opacity = [0.5 + (x / (float(max_count))/2.0) for x in counts]
|
|
||||||
|
|
||||||
worldmap.plot(countries, cmap=["#FF4000"], opacity=opacity, filename=file_name, verbose=False)
|
|
||||||
|
|
||||||
image = pyvips.Image.new_from_file(file_name)
|
|
||||||
image = image.thumbnail_image(1000, crop=pyvips.Interesting.ALL)
|
|
||||||
|
|
||||||
png_data = image.write_to_buffer(".png")
|
|
||||||
|
|
||||||
os.remove(file_name)
|
|
||||||
|
|
||||||
return png_data
|
|
@ -1,100 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
import wikipedia
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
|
|
||||||
PAGE_ID = 62
|
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_reference(viewings):
|
|
||||||
films_by_reference = {}
|
|
||||||
|
|
||||||
for viewing in viewings:
|
|
||||||
if "keywords" in viewing.keys():
|
|
||||||
for keyword in viewing["keywords"]:
|
|
||||||
if keyword.startswith("reference-to-"):
|
|
||||||
|
|
||||||
for reference in films_by_reference:
|
|
||||||
if keyword in films_by_reference[reference]["keywords"]:
|
|
||||||
films_by_reference[reference]["films"].append(viewing)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
keyword = keyword[13:]
|
|
||||||
|
|
||||||
if keyword.startswith("a-"):
|
|
||||||
keyword = keyword[2:]
|
|
||||||
|
|
||||||
if keyword.endswith("-character"):
|
|
||||||
keyword = keyword[:-10]
|
|
||||||
|
|
||||||
referenced = keyword.replace("-", " ")
|
|
||||||
|
|
||||||
try:
|
|
||||||
searches = wikipedia.search(referenced, suggestion=False)
|
|
||||||
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
|
|
||||||
|
|
||||||
page_title = referenced_page.title
|
|
||||||
page_url = referenced_page.url
|
|
||||||
|
|
||||||
except wikipedia.DisambiguationError as e:
|
|
||||||
page_title = e.title
|
|
||||||
page_title = page_title[0].upper() + page_title[1:]
|
|
||||||
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
|
|
||||||
except wikipedia.PageError as _:
|
|
||||||
if len(searches) > 0:
|
|
||||||
try:
|
|
||||||
referenced_page = wikipedia.page(title=searches[0], auto_suggest=False)
|
|
||||||
|
|
||||||
page_title = referenced_page.title
|
|
||||||
page_url = referenced_page.url
|
|
||||||
except wikipedia.DisambiguationError as e:
|
|
||||||
page_title = e.title
|
|
||||||
page_title = page_title[0].upper() + page_title[1:]
|
|
||||||
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
|
|
||||||
else:
|
|
||||||
page_title = referenced.title()
|
|
||||||
page_url = None
|
|
||||||
|
|
||||||
if page_title in films_by_reference.keys():
|
|
||||||
films_by_reference[page_title]["keywords"].append(keyword)
|
|
||||||
|
|
||||||
if viewing not in films_by_reference[page_title]["films"]:
|
|
||||||
films_by_reference[page_title]["films"].append(viewing)
|
|
||||||
|
|
||||||
else:
|
|
||||||
films_by_reference[page_title] = {"url": page_url,
|
|
||||||
"keywords": [keyword],
|
|
||||||
"films": [viewing]}
|
|
||||||
|
|
||||||
return films_by_reference
|
|
||||||
|
|
||||||
|
|
||||||
def update_page(token_id, token_secret, films_by_reference_keyword):
|
|
||||||
page = build_page(films_by_reference_keyword)
|
|
||||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
||||||
|
|
||||||
|
|
||||||
def build_page(films_by_reference):
|
|
||||||
films_by_reference = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0]))
|
|
||||||
|
|
||||||
table = "| Referenced | Films |\n| - | - |"
|
|
||||||
|
|
||||||
for reference, referenced in films_by_reference.items():
|
|
||||||
table += "\n"
|
|
||||||
|
|
||||||
row_data = []
|
|
||||||
|
|
||||||
reference_url = referenced["url"]
|
|
||||||
referenced_films = referenced["films"]
|
|
||||||
|
|
||||||
if reference_url is None:
|
|
||||||
row_data.append(reference)
|
|
||||||
else:
|
|
||||||
row_data.append(VCinemaUtils.generate_markdown_link(reference, reference_url))
|
|
||||||
row_data.append(VCinemaUtils.get_film_list(referenced_films))
|
|
||||||
|
|
||||||
table += " | ".join(row_data)
|
|
||||||
|
|
||||||
return table
|
|
@ -1,35 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-release-year
|
|
||||||
PAGE_ID = 24
|
|
||||||
|
|
||||||
|
|
||||||
def get_films_by_year(viewings):
|
|
||||||
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
|
|
||||||
|
|
||||||
return viewings_filtered_by_year
|
|
||||||
|
|
||||||
|
|
||||||
def update_page(token_id, token_secret, films_by_year):
|
|
||||||
page = build_page(films_by_year)
|
|
||||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
||||||
|
|
||||||
|
|
||||||
def build_page(films_by_year):
|
|
||||||
films_by_year_sorted = OrderedDict(sorted(films_by_year.items(), key=lambda t: t[0], reverse=True))
|
|
||||||
|
|
||||||
page = "| Year | Films |\n| - | - |"
|
|
||||||
|
|
||||||
for year in films_by_year_sorted.keys():
|
|
||||||
page += "\n"
|
|
||||||
|
|
||||||
row_data = []
|
|
||||||
row_data.append(str(year))
|
|
||||||
row_data.append(VCinemaUtils.get_film_list(films_by_year_sorted[year]))
|
|
||||||
|
|
||||||
page += " | ".join(row_data)
|
|
||||||
|
|
||||||
return page
|
|
@ -1,86 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-reference
|
|
||||||
PAGE_ID = 63
|
|
||||||
|
|
||||||
|
|
||||||
def get_hidden_themes(viewings, token_id, token_secret):
|
|
||||||
# Bit horrible to need to request this again, but it affects the order of the result table
|
|
||||||
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
|
|
||||||
|
|
||||||
# Copy keywords from grouped viewings to ungrouped viewings
|
|
||||||
for viewing_ungrouped in viewings_ungrouped:
|
|
||||||
for viewing in viewings:
|
|
||||||
if viewing['imdb_id'] == viewing_ungrouped['imdb_id']:
|
|
||||||
if 'keywords' in viewing:
|
|
||||||
viewing_ungrouped['keywords'] = viewing['keywords']
|
|
||||||
break
|
|
||||||
|
|
||||||
viewings_filtered_watch_date = VCinemaUtils.filter_viewings(viewings_ungrouped, "date_watched")
|
|
||||||
|
|
||||||
for date, viewings in viewings_filtered_watch_date.items():
|
|
||||||
viewing_dict = {"viewings": viewings}
|
|
||||||
|
|
||||||
viewings_filtered_watch_date[date] = viewing_dict
|
|
||||||
|
|
||||||
# Add hidden themes
|
|
||||||
for date, data in viewings_filtered_watch_date.items():
|
|
||||||
keyword_counts = {}
|
|
||||||
|
|
||||||
if len(data['viewings']) > 1:
|
|
||||||
for viewing in data['viewings']:
|
|
||||||
if 'keywords' in viewing:
|
|
||||||
for keyword in viewing['keywords']:
|
|
||||||
if keyword in keyword_counts.keys():
|
|
||||||
keyword_counts[keyword] += 1
|
|
||||||
else:
|
|
||||||
keyword_counts[keyword] = 1
|
|
||||||
|
|
||||||
keyword_counts = {k: v for k, v in sorted(keyword_counts.items(), key=lambda item: item[1], reverse=True)}
|
|
||||||
hidden_themes = {}
|
|
||||||
|
|
||||||
for keyword in keyword_counts:
|
|
||||||
rating = float(keyword_counts[keyword]) / float(len(data['viewings']))
|
|
||||||
if rating > 0.5:
|
|
||||||
hidden_themes[keyword] = rating
|
|
||||||
|
|
||||||
viewings_filtered_watch_date[date]['hidden_themes'] = hidden_themes
|
|
||||||
|
|
||||||
return viewings_filtered_watch_date
|
|
||||||
|
|
||||||
|
|
||||||
def update_page(token_id, token_secret, hidden_themes):
|
|
||||||
page = build_page(hidden_themes)
|
|
||||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
||||||
|
|
||||||
|
|
||||||
def build_page(hidden_themes):
|
|
||||||
hidden_themes = OrderedDict(sorted(hidden_themes.items(), key=lambda t: t[0]))
|
|
||||||
|
|
||||||
table = "| Date | Films | Hidden Themes |\n| - | - | - |"
|
|
||||||
|
|
||||||
for date, data in hidden_themes.items():
|
|
||||||
table += "\n"
|
|
||||||
|
|
||||||
row_data = []
|
|
||||||
row_data.append(str(date))
|
|
||||||
row_data.append(VCinemaUtils.get_film_list(data['viewings']))
|
|
||||||
if 'hidden_themes' in data and data['hidden_themes'] != {}:
|
|
||||||
hidden_theme_labels = []
|
|
||||||
|
|
||||||
for hidden_theme in sorted(data['hidden_themes'].keys()):
|
|
||||||
if data['hidden_themes'][hidden_theme] == 1:
|
|
||||||
hidden_theme_labels.append(hidden_theme)
|
|
||||||
else:
|
|
||||||
hidden_theme_labels.append("<i>{} ({}%)</i>".format(hidden_theme, round(data['hidden_themes'][hidden_theme] * 100)))
|
|
||||||
|
|
||||||
row_data.append("<br>".join(hidden_theme_labels))
|
|
||||||
else:
|
|
||||||
row_data.append("N/A")
|
|
||||||
|
|
||||||
table += " | ".join(row_data)
|
|
||||||
|
|
||||||
return table
|
|
@ -1,81 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
from progress.bar import IncrementalBar
|
|
||||||
import math
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from imdb_utils import IMDbUtils
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores
|
|
||||||
PAGE_ID = 23
|
|
||||||
|
|
||||||
|
|
||||||
def get_keyword_scores(viewings):
|
|
||||||
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
|
|
||||||
|
|
||||||
for keyword, viewings in viewings_filtered_keyword.items():
|
|
||||||
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
|
|
||||||
|
|
||||||
min_vcinema_count = 2
|
|
||||||
min_imdb_count = 4
|
|
||||||
|
|
||||||
add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)
|
|
||||||
add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)
|
|
||||||
|
|
||||||
return viewings_filtered_keyword
|
|
||||||
|
|
||||||
|
|
||||||
def update_page(token_id, token_secret, keyword_data):
|
|
||||||
page = build_page(keyword_data)
|
|
||||||
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
|
|
||||||
|
|
||||||
|
|
||||||
def add_keyword_totals(keywords, min_vcinema_count):
|
|
||||||
keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])
|
|
||||||
|
|
||||||
with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:
|
|
||||||
with ThreadPoolExecutor(6) as executor:
|
|
||||||
for keyword, data in keywords.items():
|
|
||||||
if len(data['vcinema_films']) >= min_vcinema_count:
|
|
||||||
executor.submit(add_keyword_total, keyword, keywords, bar)
|
|
||||||
|
|
||||||
|
|
||||||
def add_keyword_total(keyword, keywords, progress_bar=None):
|
|
||||||
keyword_total = IMDbUtils.get_keyword_count(keyword)
|
|
||||||
|
|
||||||
keywords[keyword]['total'] = keyword_total
|
|
||||||
|
|
||||||
if progress_bar is not None:
|
|
||||||
progress_bar.next()
|
|
||||||
|
|
||||||
|
|
||||||
def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):
|
|
||||||
for keyword in keyword_data.keys():
|
|
||||||
if 'total' in keyword_data[keyword]:
|
|
||||||
vcinema_count = len(keyword_data[keyword]['vcinema_films'])
|
|
||||||
total_count = keyword_data[keyword]['total']
|
|
||||||
|
|
||||||
if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:
|
|
||||||
score = vcinema_count / math.log(total_count)
|
|
||||||
|
|
||||||
keyword_data[keyword]['score'] = score
|
|
||||||
|
|
||||||
|
|
||||||
def build_page(keyword_data, minimum_score=1.0):
|
|
||||||
keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}
|
|
||||||
keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))
|
|
||||||
|
|
||||||
table = "| Keyword | Number of VCinema Films | Total IMDb entries | Score |\n| - | - | - | - |"
|
|
||||||
|
|
||||||
for keyword, data in keyword_data.items():
|
|
||||||
table += "\n"
|
|
||||||
|
|
||||||
row_data = []
|
|
||||||
row_data.append(str(keyword))
|
|
||||||
row_data.append(str(len(data['vcinema_films'])))
|
|
||||||
row_data.append(str(data['total']))
|
|
||||||
row_data.append(str(round(data['score'], 3)))
|
|
||||||
table += " | ".join(row_data)
|
|
||||||
|
|
||||||
return table
|
|
@ -1,20 +0,0 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
from bookstack import Bookstack
|
|
||||||
from vcinema_utils import VCinemaUtils
|
|
||||||
|
|
||||||
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
|
|
||||||
PAGE_ID = 11
|
|
||||||
|
|
||||||
|
|
||||||
def update_viewings_csv(token_id, token_secret):
|
|
||||||
print("Retrieving viewings page")
|
|
||||||
html_page = Bookstack.get_page_html(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(html_page, 'html.parser')
|
|
||||||
csv_data = soup.find("code").text.strip().encode('utf-8')
|
|
||||||
|
|
||||||
existing_attachment_id = VCinemaUtils.get_viewings_csv_attachment_id(token_id, token_secret)
|
|
||||||
print("Updating file")
|
|
||||||
Bookstack.update_attachment(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, existing_attachment_id, "vcinema.csv", csv_data, PAGE_ID)
|
|
||||||
print("File updated")
|
|
1
wiki_utils
Submodule
1
wiki_utils
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 88be36d5cd7378a01d9861726bf123715fe81d4a
|
Loading…
x
Reference in New Issue
Block a user