Compare commits

..

No commits in common. "master" and "056e3b474f202fcc9090dbe63fbd6ac710b565bd" have entirely different histories.

20 changed files with 4 additions and 1094 deletions

5
.gitignore vendored
View File

@ -1,5 +0,0 @@
*.pyc
.idea/*
__pycache__/*
.DS_Store

6
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "bookstack"] [submodule "wiki_utils"]
path = bookstack path = wiki_utils
url = git@git.jacknet.io:sarah/bookstack.git url = gitea@git.jacknet.io:sarah/wiki_utils.git

@ -1 +0,0 @@
Subproject commit 8f6e38cb337bcf51b0790f2db7001681ceb9338f

View File

@ -1,258 +0,0 @@
Country,Flag
Afghanistan,🇦🇫
Åland Islands,🇦🇽
Albania,🇦🇱
Algeria,🇩🇿
American Samoa,🇦🇸
Andorra,🇦🇩
Angola,🇦🇴
Anguilla,🇦🇮
Antarctica,🇦🇶
Antigua and Barbuda,🇦🇬
Argentina,🇦🇷
Armenia,🇦🇲
Aruba,🇦🇼
Australia,🇦🇺
Austria,🇦🇹
Azerbaijan,🇦🇿
Bahamas,🇧🇸
Bahrain,🇧🇭
Bangladesh,🇧🇩
Barbados,🇧🇧
Belarus,🇧🇾
Belgium,🇧🇪
Belize,🇧🇿
Benin,🇧🇯
Bermuda,🇧🇲
Bhutan,🇧🇹
Bolivia,🇧🇴
"Bonaire, Sint Eustatius and Saba",🇧🇶
Bosnia and Herzegovina,🇧🇦
Botswana,🇧🇼
Bouvet Island,🇧🇻
Brazil,🇧🇷
British Indian Ocean Territory,🇮🇴
British Virgin Islands,🇻🇬
Brunei Darussalamm,🇧🇳
Bulgaria,🇧🇬
Burkina Faso,🇧🇫
Burma,🇲🇲
Burundi,🇧🇮
Cambodia,🇰🇭
Cameroon,🇨🇲
Canada,🇨🇦
Cape Verde,🇨🇻
Cayman Islands,🇰🇾
Central African Republic,🇨🇫
Chad,🇹🇩
Chile,🇨🇱
China,🇨🇳
Christmas Island,🇨🇽
Cocos (Keeling) Islands,🇨🇨
Colombia,🇨🇴
Comoros,🇰🇲
Congo,🇨🇩
Cook Islands,🇨🇰
Costa Rica,🇨🇷
Côte d'Ivoire,🇨🇮
Croatia,🇭🇷
Cuba,🇨🇺
Cyprus,🇨🇾
Czech Republic,🇨🇿
Czechoslovakia,🇨🇿
Democratic Republic of the Congo,🇨🇩
Denmark,🇩🇰
Djibouti,🇩🇯
Dominica,🇩🇲
Dominican Republic,🇩🇴
East Germany,➡️🇩🇪
Ecuador,🇪🇨
Egypt,🇪🇬
El Salvador,🇸🇻
Equatorial Guinea,🇬🇶
Eritrea,🇪🇷
Estonia,🇪🇪
Ethiopia,🇪🇹
Falkland Islands,🇫🇰
Faroe Islands,🇫🇴
Federated States of Micronesia,🇫🇲
Fiji,🇫🇯
Finland,🇫🇮
France,🇫🇷
French Guiana,🇬🇫
French Polynesia,🇵🇫
French Southern Territories,🇹🇫
Gabon,🇬🇦
Gambia,🇬🇲
Georgia,🇬🇪
Germany,🇩🇪
Ghana,🇬🇭
Gibraltar,🇬🇮
Greece,🇬🇷
Greenland,🇬🇱
Grenada,🇬🇩
Guadeloupe,🇬🇵
Guam,🇬🇺
Guatemala,🇬🇹
Guernsey,🇬🇬
Guinea,🇬🇳
Guinea-Bissau,🇬🇼
Guyana,🇬🇾
Haiti,🇭🇹
Heard Island and McDonald Islands,🇭🇲
Holy See (Vatican City State),🇻🇦
Honduras,🇭🇳
Hong Kong,🇭🇰
Hungary,🇭🇺
Iceland,🇮🇸
India,🇮🇳
Indonesia,🇮🇩
Iran,🇮🇷
Iraq,🇮🇶
Ireland,🇮🇪
Isle of Man,🇮🇲
Israel,🇮🇱
Italy,🇮🇹
Jamaica,🇯🇲
Japan,🇯🇵
Jersey,🇯🇪
Jordan,🇯🇴
Kazakhstan,🇰🇿
Kenya,🇰🇪
Kiribati,🇰🇮
Korea,🇰🇵🇰🇷
Kosovo,🇽🇰
Kuwait,🇰🇼
Kyrgyzstan,🇰🇬
Laos,🇱🇦
Latvia,🇱🇻
Lebanon,🇱🇧
Lesotho,🇱🇸
Liberia,🇱🇷
Libya,🇱🇾
Liechtenstein,🇱🇮
Lithuania,🇱🇹
Luxembourg,🇱🇺
Macao,🇲🇴
Madagascar,🇲🇬
Malawi,🇲🇼
Malaysia,🇲🇾
Maldives,🇲🇻
Mali,🇲🇱
Malta,🇲🇹
Marshall Islands,🇲🇭
Martinique,🇲🇶
Mauritania,🇲🇷
Mauritius,🇲🇺
Mayotte,🇾🇹
Mexico,🇲🇽
Moldova,🇲🇩
Monaco,🇲🇨
Mongolia,🇲🇳
Montenegro,🇲🇪
Montserrat,🇲🇸
Morocco,🇲🇦
Mozambique,🇲🇿
Myanmar,🇲🇲
Namibia,🇳🇦
Nauru,🇳🇷
Nepal,🇳🇵
Netherlands,🇳🇱
Netherlands Antilles,🇳🇱
New Caledonia,🇳🇨
New Zealand,🇳🇿
Nicaragua,🇳🇮
Niger,🇳🇪
Nigeria,🇳🇬
Niue,🇳🇺
Norfolk Island,🇳🇫
North Korea,🇰🇵
North Vietnam,🇻🇳
Northern Mariana Islands,🇲🇵
Norway,🇳🇴
Oman,🇴🇲
Pakistan,🇵🇰
Palau,🇵🇼
Palestine,🇵🇸
Palestinian Territory,🇵🇸
Panama,🇵🇦
Papua New Guinea,🇵🇬
Paraguay,🇵🇾
Peru,🇵🇪
Philippines,🇵🇭
Poland,🇵🇱
Portugal,🇵🇹
Pitcairn,🇵🇳
Puerto Rico,🇵🇷
Qatar,🇶🇦
Republic of Macedonia,🇲🇰
Réunion,🇷🇪
Romania,🇷🇴
Russia,🇷🇺
Rwanda,🇷🇼
Saint Barthélemy,🇧🇱
Saint Helena,🇸🇭
Saint Kitts and Nevis,🇰🇳
Saint Lucia,🇱🇨
Saint Martin (French part),🇫🇷
Saint Pierre and Miquelon,🇵🇲
Saint Vincent and the Grenadines,🇻🇨
Samoa,🇼🇸
San Marino,🇸🇲
Sao Tome and Principe,🇸🇹
Saudi Arabia,🇸🇦
Senegal,🇸🇳
Serbia,🇷🇸
Serbia and Montenegro,🇷🇸🇲🇪
Seychelles,🇸🇨
Siam,🇹🇭
Sierra Leone,🇸🇱
Singapore,🇸🇬
Slovakia,🇸🇰
Slovenia,🇸🇮
Solomon Islands,🇸🇧
Somalia,🇸🇴
South Africa,🇿🇦
South Georgia and the South Sandwich Islands,🇬🇸
South Korea,🇰🇷
Spain,🇪🇸
Sri Lanka,🇱🇰
Sudan,🇸🇩
Suriname,🇸🇷
Svalbard and Jan Mayen,🇸🇯
Swaziland,🇸🇿
Sweden,🇸🇪
Switzerland,🇨🇭
Syria,🇸🇾
Taiwan,🇹🇼
Tajikistan,🇹🇯
Tanzania,🇹🇿
Thailand,🇹🇭
Timor-Leste,🇹🇱
Togo,🇹🇬
Tokelau,🇹🇰
Tonga,🇹🇴
Trinidad and Tobago,🇹🇹
Tunisia,🇹🇳
Turkey,🇹🇷
Turkmenistan,🇹🇲
Turks and Caicos Islands,🇹🇨
Tuvalu,🇹🇻
U.S. Virgin Islands,🇻🇮
Uganda,🇺🇬
Ukraine,🇺🇦
United Arab Emirates,🇦🇪
United Kingdom,🇬🇧
United States,🇺🇸
United States Minor Outlying Islands,🇺🇲
Uruguay,🇺🇾
Uzbekistan,🇺🇿
Vanuatu,🇻🇺
Venezuela,🇻🇪
Vietnam,🇻🇳
Wallis and Futuna,🇫🇷
West Germany,⬅️🇩🇪
Western Sahara,🇪🇭
Yemen,🇾🇪
Zambia,🇿🇲
Zimbabwe,🇿🇼
1 Country Flag
2 Afghanistan 🇦🇫
3 Åland Islands 🇦🇽
4 Albania 🇦🇱
5 Algeria 🇩🇿
6 American Samoa 🇦🇸
7 Andorra 🇦🇩
8 Angola 🇦🇴
9 Anguilla 🇦🇮
10 Antarctica 🇦🇶
11 Antigua and Barbuda 🇦🇬
12 Argentina 🇦🇷
13 Armenia 🇦🇲
14 Aruba 🇦🇼
15 Australia 🇦🇺
16 Austria 🇦🇹
17 Azerbaijan 🇦🇿
18 Bahamas 🇧🇸
19 Bahrain 🇧🇭
20 Bangladesh 🇧🇩
21 Barbados 🇧🇧
22 Belarus 🇧🇾
23 Belgium 🇧🇪
24 Belize 🇧🇿
25 Benin 🇧🇯
26 Bermuda 🇧🇲
27 Bhutan 🇧🇹
28 Bolivia 🇧🇴
29 Bonaire, Sint Eustatius and Saba 🇧🇶
30 Bosnia and Herzegovina 🇧🇦
31 Botswana 🇧🇼
32 Bouvet Island 🇧🇻
33 Brazil 🇧🇷
34 British Indian Ocean Territory 🇮🇴
35 British Virgin Islands 🇻🇬
36 Brunei Darussalamm 🇧🇳
37 Bulgaria 🇧🇬
38 Burkina Faso 🇧🇫
39 Burma 🇲🇲
40 Burundi 🇧🇮
41 Cambodia 🇰🇭
42 Cameroon 🇨🇲
43 Canada 🇨🇦
44 Cape Verde 🇨🇻
45 Cayman Islands 🇰🇾
46 Central African Republic 🇨🇫
47 Chad 🇹🇩
48 Chile 🇨🇱
49 China 🇨🇳
50 Christmas Island 🇨🇽
51 Cocos (Keeling) Islands 🇨🇨
52 Colombia 🇨🇴
53 Comoros 🇰🇲
54 Congo 🇨🇩
55 Cook Islands 🇨🇰
56 Costa Rica 🇨🇷
57 Côte d'Ivoire 🇨🇮
58 Croatia 🇭🇷
59 Cuba 🇨🇺
60 Cyprus 🇨🇾
61 Czech Republic 🇨🇿
62 Czechoslovakia 🇨🇿
63 Democratic Republic of the Congo 🇨🇩
64 Denmark 🇩🇰
65 Djibouti 🇩🇯
66 Dominica 🇩🇲
67 Dominican Republic 🇩🇴
68 East Germany ➡️🇩🇪
69 Ecuador 🇪🇨
70 Egypt 🇪🇬
71 El Salvador 🇸🇻
72 Equatorial Guinea 🇬🇶
73 Eritrea 🇪🇷
74 Estonia 🇪🇪
75 Ethiopia 🇪🇹
76 Falkland Islands 🇫🇰
77 Faroe Islands 🇫🇴
78 Federated States of Micronesia 🇫🇲
79 Fiji 🇫🇯
80 Finland 🇫🇮
81 France 🇫🇷
82 French Guiana 🇬🇫
83 French Polynesia 🇵🇫
84 French Southern Territories 🇹🇫
85 Gabon 🇬🇦
86 Gambia 🇬🇲
87 Georgia 🇬🇪
88 Germany 🇩🇪
89 Ghana 🇬🇭
90 Gibraltar 🇬🇮
91 Greece 🇬🇷
92 Greenland 🇬🇱
93 Grenada 🇬🇩
94 Guadeloupe 🇬🇵
95 Guam 🇬🇺
96 Guatemala 🇬🇹
97 Guernsey 🇬🇬
98 Guinea 🇬🇳
99 Guinea-Bissau 🇬🇼
100 Guyana 🇬🇾
101 Haiti 🇭🇹
102 Heard Island and McDonald Islands 🇭🇲
103 Holy See (Vatican City State) 🇻🇦
104 Honduras 🇭🇳
105 Hong Kong 🇭🇰
106 Hungary 🇭🇺
107 Iceland 🇮🇸
108 India 🇮🇳
109 Indonesia 🇮🇩
110 Iran 🇮🇷
111 Iraq 🇮🇶
112 Ireland 🇮🇪
113 Isle of Man 🇮🇲
114 Israel 🇮🇱
115 Italy 🇮🇹
116 Jamaica 🇯🇲
117 Japan 🇯🇵
118 Jersey 🇯🇪
119 Jordan 🇯🇴
120 Kazakhstan 🇰🇿
121 Kenya 🇰🇪
122 Kiribati 🇰🇮
123 Korea 🇰🇵🇰🇷
124 Kosovo 🇽🇰
125 Kuwait 🇰🇼
126 Kyrgyzstan 🇰🇬
127 Laos 🇱🇦
128 Latvia 🇱🇻
129 Lebanon 🇱🇧
130 Lesotho 🇱🇸
131 Liberia 🇱🇷
132 Libya 🇱🇾
133 Liechtenstein 🇱🇮
134 Lithuania 🇱🇹
135 Luxembourg 🇱🇺
136 Macao 🇲🇴
137 Madagascar 🇲🇬
138 Malawi 🇲🇼
139 Malaysia 🇲🇾
140 Maldives 🇲🇻
141 Mali 🇲🇱
142 Malta 🇲🇹
143 Marshall Islands 🇲🇭
144 Martinique 🇲🇶
145 Mauritania 🇲🇷
146 Mauritius 🇲🇺
147 Mayotte 🇾🇹
148 Mexico 🇲🇽
149 Moldova 🇲🇩
150 Monaco 🇲🇨
151 Mongolia 🇲🇳
152 Montenegro 🇲🇪
153 Montserrat 🇲🇸
154 Morocco 🇲🇦
155 Mozambique 🇲🇿
156 Myanmar 🇲🇲
157 Namibia 🇳🇦
158 Nauru 🇳🇷
159 Nepal 🇳🇵
160 Netherlands 🇳🇱
161 Netherlands Antilles 🇳🇱
162 New Caledonia 🇳🇨
163 New Zealand 🇳🇿
164 Nicaragua 🇳🇮
165 Niger 🇳🇪
166 Nigeria 🇳🇬
167 Niue 🇳🇺
168 Norfolk Island 🇳🇫
169 North Korea 🇰🇵
170 North Vietnam 🇻🇳
171 Northern Mariana Islands 🇲🇵
172 Norway 🇳🇴
173 Oman 🇴🇲
174 Pakistan 🇵🇰
175 Palau 🇵🇼
176 Palestine 🇵🇸
177 Palestinian Territory 🇵🇸
178 Panama 🇵🇦
179 Papua New Guinea 🇵🇬
180 Paraguay 🇵🇾
181 Peru 🇵🇪
182 Philippines 🇵🇭
183 Poland 🇵🇱
184 Portugal 🇵🇹
185 Pitcairn 🇵🇳
186 Puerto Rico 🇵🇷
187 Qatar 🇶🇦
188 Republic of Macedonia 🇲🇰
189 Réunion 🇷🇪
190 Romania 🇷🇴
191 Russia 🇷🇺
192 Rwanda 🇷🇼
193 Saint Barthélemy 🇧🇱
194 Saint Helena 🇸🇭
195 Saint Kitts and Nevis 🇰🇳
196 Saint Lucia 🇱🇨
197 Saint Martin (French part) 🇫🇷
198 Saint Pierre and Miquelon 🇵🇲
199 Saint Vincent and the Grenadines 🇻🇨
200 Samoa 🇼🇸
201 San Marino 🇸🇲
202 Sao Tome and Principe 🇸🇹
203 Saudi Arabia 🇸🇦
204 Senegal 🇸🇳
205 Serbia 🇷🇸
206 Serbia and Montenegro 🇷🇸🇲🇪
207 Seychelles 🇸🇨
208 Siam 🇹🇭
209 Sierra Leone 🇸🇱
210 Singapore 🇸🇬
211 Slovakia 🇸🇰
212 Slovenia 🇸🇮
213 Solomon Islands 🇸🇧
214 Somalia 🇸🇴
215 South Africa 🇿🇦
216 South Georgia and the South Sandwich Islands 🇬🇸
217 South Korea 🇰🇷
218 Spain 🇪🇸
219 Sri Lanka 🇱🇰
220 Sudan 🇸🇩
221 Suriname 🇸🇷
222 Svalbard and Jan Mayen 🇸🇯
223 Swaziland 🇸🇿
224 Sweden 🇸🇪
225 Switzerland 🇨🇭
226 Syria 🇸🇾
227 Taiwan 🇹🇼
228 Tajikistan 🇹🇯
229 Tanzania 🇹🇿
230 Thailand 🇹🇭
231 Timor-Leste 🇹🇱
232 Togo 🇹🇬
233 Tokelau 🇹🇰
234 Tonga 🇹🇴
235 Trinidad and Tobago 🇹🇹
236 Tunisia 🇹🇳
237 Turkey 🇹🇷
238 Turkmenistan 🇹🇲
239 Turks and Caicos Islands 🇹🇨
240 Tuvalu 🇹🇻
241 U.S. Virgin Islands 🇻🇮
242 Uganda 🇺🇬
243 Ukraine 🇺🇦
244 United Arab Emirates 🇦🇪
245 United Kingdom 🇬🇧
246 United States 🇺🇸
247 United States Minor Outlying Islands 🇺🇲
248 Uruguay 🇺🇾
249 Uzbekistan 🇺🇿
250 Vanuatu 🇻🇺
251 Venezuela 🇻🇪
252 Vietnam 🇻🇳
253 Wallis and Futuna 🇫🇷
254 West Germany ⬅️🇩🇪
255 Western Sahara 🇪🇭
256 Yemen 🇾🇪
257 Zambia 🇿🇲
258 Zimbabwe 🇿🇼

View File

@ -1,73 +0,0 @@
from wiki_pages import FilmsByCountry
from vcinema_utils import VCinemaUtils
import argparse
from collections import OrderedDict
import imageio
from progress.bar import IncrementalBar
from pygifsicle import optimize
from PIL import Image, ImageFont, ImageDraw, ImageFont
import io
def generate_map_timelapse(token_id, token_secret, filename):
print("Getting viewings")
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
viewing_count = len(viewings)
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
VCinemaUtils.add_imdb_data_to_viewings(viewings, ['countries'], bar)
date_viewings = VCinemaUtils.filter_viewings(viewings, "date_watched")
date_viewings = OrderedDict(sorted(date_viewings.items(), key=lambda t: t[0]))
running_country_counts = {}
print(len(date_viewings.keys()))
with imageio.get_writer(filename, mode='I', duration=0.1) as writer:
for date, viewings in date_viewings.items():
date_viewings_countries = VCinemaUtils.filter_viewings(viewings, "countries")
for country in date_viewings_countries:
if country in running_country_counts.keys():
running_country_counts[country] += date_viewings_countries[country]
else:
running_country_counts[country] = date_viewings_countries[country]
map = FilmsByCountry.draw_map(running_country_counts, file_name="map-{}.svg".format(date))
stream = io.BytesIO(map)
img = Image.open(stream)
map_editable = ImageDraw.Draw(img)
# macos font path
font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial.ttf", 48)
# image is 655 high
map_editable.text((2, 605), "{}".format(date), (255, 64, 0), font=font)
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
image = imageio.imread(img_byte_arr)
writer.append_data(image)
print("optimizing")
optimize(filename)
print("done")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Create timelapse gif of vcinema countries')
parser.add_argument('token_id', help='API token ID.')
parser.add_argument('token_secret', help='API token secret.')
parser.add_argument('filename', help='Name of output gif')
args = parser.parse_args()
generate_map_timelapse(args.token_id, args.token_secret, args.filename)

View File

@ -1,33 +0,0 @@
from imdb_utils import IMDbUtils
import argparse
from progress.bar import IncrementalBar
def get_hidden_themes(imdb_ids):
film_keywords = []
with IncrementalBar('Retrieving movie data', max=len(imdb_ids), suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
for imdb_id in imdb_ids:
movie_data = IMDbUtils.get_movie_keywords(imdb_id)
if 'keywords' in movie_data:
keywords = set(movie_data['keywords'])
film_keywords.append(keywords)
bar.next()
hidden_themes = set.intersection(*film_keywords)
return hidden_themes
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('imdb_ids', nargs="+", default=[])
args = parser.parse_args()
hidden_themes = get_hidden_themes(args.imdb_ids)
print(hidden_themes)

View File

@ -1,60 +0,0 @@
from imdb import IMDb
import requests
from bs4 import BeautifulSoup
import re
def get_movie(imdb_id):
ia = IMDb()
movie = ia.get_movie(imdb_id)
return movie
def get_movie_keywords(imdb_id):
ia = IMDb()
movie = ia.get_movie(imdb_id, info="keywords")
return movie
def get_api_keyword_count(keyword):
ia = IMDb()
count = len(ia.get_keyword(keyword))
return count
def get_website_keyword_count(keyword):
try:
page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword)
except ConnectionError:
raise
soup = BeautifulSoup(page.content, 'html.parser')
elements = soup.findAll("div", class_="desc")
pagination_label = elements[0].text.replace("\n", "")
pagination_label_reg = "(\d+,?\d*) titles"
pattern_match = re.compile(pagination_label_reg).search(pagination_label)
if pattern_match is not None:
return int(pattern_match.group(1).replace(',', ''))
else:
return 1
def get_keyword_count(keyword):
count = get_api_keyword_count(keyword)
if count == 50:
try:
count = get_website_keyword_count(keyword)
except Exception as e:
raise
return count

View File

View File

@ -1,16 +0,0 @@
import json
from wiki_pages import ViewingsCsv
def update_viewings_csv(token_id, token_secret):
print("Updating CSV")
ViewingsCsv.update_viewings_csv(token_id, token_secret)
print("Done!")
if __name__ == '__main__':
with open('token.json') as json_file:
token = json.load(json_file)
update_viewings_csv(token['token_id'], token['token_secret'])

View File

@ -1,69 +0,0 @@
from wiki_pages import FilmsByCountry, FilmsByReference, FilmsByYear, HiddenThemes, KeywordScores, ViewingsCsv
from vcinema_utils import VCinemaUtils
import argparse
import json
from progress.bar import IncrementalBar
def update_wiki(token_id, token_secret, update_csv, pages):
if update_csv:
print("Updating CSV")
ViewingsCsv.update_viewings_csv(token_id, token_secret)
print("Getting viewings")
viewings = VCinemaUtils.get_vcinema_viewings(token_id, token_secret)
update_films_by_year = 'years' in pages
update_films_by_country = 'countries' in pages
update_film_references = 'references' in pages
update_hidden_themes = 'themes' in pages
update_keyword_scores = 'scores' in pages
data_fields = []
if update_films_by_year:
data_fields.append("year")
if update_films_by_country:
data_fields.append("countries")
if update_film_references or update_hidden_themes or update_keyword_scores:
data_fields.append("keywords")
viewing_count = len(viewings)
with IncrementalBar('Retrieving movie data', max=viewing_count, suffix='%(percent).1f%% - %(eta)ds remaining', check_tty=False) as bar:
VCinemaUtils.add_imdb_data_to_viewings(viewings, data_fields, bar)
print("Processing viewing data")
if update_films_by_year:
films_by_year = FilmsByYear.get_films_by_year(viewings)
FilmsByYear.update_page(token_id, token_secret, films_by_year)
if update_films_by_country:
films_by_country = FilmsByCountry.get_films_by_country(viewings)
FilmsByCountry.update_page(token_id, token_secret, films_by_country)
if update_film_references:
films_by_reference = FilmsByReference.get_films_by_reference(viewings)
FilmsByReference.update_page(token_id, token_secret, films_by_reference)
if update_hidden_themes:
hidden_themes = HiddenThemes.get_hidden_themes(viewings, token_id, token_secret)
HiddenThemes.update_page(token_id, token_secret, hidden_themes)
if update_keyword_scores:
keyword_scores = KeywordScores.get_keyword_scores(viewings)
KeywordScores.update_page(token_id, token_secret, keyword_scores)
print("Done!")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Update wiki pages.')
parser.add_argument('--pages', nargs="+", default=['years', 'countries', 'references', 'themes', 'scores'], required=False)
parser.add_argument("--do_not_update_csv", action="store_true")
args = parser.parse_args()
with open('token.json') as json_file:
token = json.load(json_file)
update_wiki(token['token_id'], token['token_secret'], not args.do_not_update_csv, args.pages)

View File

@ -1,150 +0,0 @@
from collections import Counter
from concurrent.futures import ThreadPoolExecutor
import csv
from imdb_utils import IMDbUtils
from bookstack import Bookstack
JACKNET_WIKI_URL = "https://wiki.jacknet.io"
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
CSV_PAGE_ID = 11
def get_viewings_csv_attachment_id(token_id, token_secret):
attachments = Bookstack.get_attachments(JACKNET_WIKI_URL, token_id, token_secret)
viewings_csv_file_name = "vcinema.csv"
return next((x['id'] for x in attachments if x['uploaded_to'] == CSV_PAGE_ID and x['name'] == viewings_csv_file_name), None)
def get_vcinema_viewings(token_id, token_secret, viewings_csv=None, combine_repeat_viewings=True):
if viewings_csv is None:
attachment_id = get_viewings_csv_attachment_id(token_id, token_secret)
viewings_csv = Bookstack.get_attachment(JACKNET_WIKI_URL, token_id, token_secret, attachment_id)
viewings_csv = viewings_csv.decode("utf-8")
viewings_csv_rows = viewings_csv.strip().split("\n")
viewings = list(csv.DictReader(viewings_csv_rows, quotechar='"'))
if combine_repeat_viewings:
for viewing in viewings:
viewing['viewings'] = [
{'date_watched': viewing['date_watched'], 'season': viewing['season'], 'rating': viewing['rating']}]
viewing.pop('date_watched')
viewing.pop('season')
viewing.pop('rating')
watch_counts = Counter([x['imdb_id'] for x in viewings])
repeat_watches = [k for k, v in watch_counts.items() if v > 1]
for film in repeat_watches:
viewing_indexes = [index for index, viewing in enumerate(viewings) if viewing['imdb_id'] == film]
first_watch = viewings[viewing_indexes[0]]
for index in viewing_indexes[1::]:
first_watch['viewings'].extend(viewings[index]['viewings'])
for index in reversed(viewing_indexes[1::]):
viewings.pop(index)
return viewings
def add_imdb_data(imdb_id, viewings, data_fields, progressbar=None):
movie = IMDbUtils.get_movie(imdb_id)
for viewing in viewings:
if viewing['imdb_id'] == movie.movieID:
for field_name in data_fields:
if field_name in movie:
viewing[field_name] = movie[field_name]
if progressbar is not None:
progressbar.next()
def add_imdb_keywords(imdb_id, viewings, progressbar=None):
movie = IMDbUtils.get_movie_keywords(imdb_id)
for viewing in viewings:
if viewing['imdb_id'] == movie.movieID:
if 'keywords' in movie:
viewing['keywords'] = movie['keywords']
if progressbar is not None:
progressbar.next()
def add_imdb_data_to_viewings(viewings, field_names, progress_bar=None):
with ThreadPoolExecutor(4) as executor:
future_imdb_tasks = set()
if ('keywords' in field_names and len(field_names) > 1) or ('keywords' not in field_names and len(field_names) > 0):
future_imdb_tasks.update(executor.submit(add_imdb_data, viewing['imdb_id'], viewings, field_names, progress_bar) for viewing in viewings)
if 'keywords' in field_names:
future_imdb_tasks.update(executor.submit(add_imdb_keywords, viewing['imdb_id'], viewings, progress_bar) for viewing in viewings)
progress_bar.max = len(future_imdb_tasks)
if progress_bar is not None:
progress_bar.finish()
def filter_viewings(viewings, filter_field):
viewings_filtered = {}
for viewing in viewings:
if filter_field in viewing:
viewing_field = viewing[filter_field]
if isinstance(viewing_field, list):
for fve in list(viewing_field):
if fve in viewings_filtered.keys():
viewings_filtered[fve] += [viewing]
else:
viewings_filtered[fve] = [viewing]
else:
if viewing_field in viewings_filtered.keys():
viewings_filtered[viewing_field] += [viewing]
else:
viewings_filtered[viewing_field] = [viewing]
return viewings_filtered
def get_film_list(films):
film_links = []
for film in films:
film_link = generate_imdb_film_link(film)
film_links.append(film_link)
if len(film_links) > 0:
return "<br>".join(film_links)
else:
return ""
def generate_markdown_link(text, url):
return "[{}]({})".format(text, url)
def generate_imdb_film_link(film):
return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id']))
def generate_wikipedia_page_link(page_title):
return generate_markdown_link(page_title, generate_wikipedia_url(page_title))
def generate_imdb_url(imdb_id):
return "https://www.imdb.com/title/tt{}/".format(imdb_id)
def generate_wikipedia_url(page_title):
return "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))

View File

@ -1,104 +0,0 @@
import base64
from collections import Counter, OrderedDict
import csv
import os
import pyvips
import worldmap
import warnings
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
warnings.filterwarnings("ignore")
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-country
PAGE_ID = 34
def get_films_by_country(viewings):
viewings_filtered_by_country = VCinemaUtils.filter_viewings(viewings, "countries")
if "Czechia" in viewings_filtered_by_country.keys():
viewings_filtered_by_country["Czech Republic"] = viewings_filtered_by_country["Czechia"]
viewings_filtered_by_country.pop("Czechia")
return viewings_filtered_by_country
def update_page(token_id, token_secret, films_by_country):
page = build_page(films_by_country)
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
def build_page(films_by_country):
table = build_table(films_by_country)
country_counter = Counter(films_by_country)
png_data = draw_map(country_counter)
encoded = base64.b64encode(png_data).decode("utf-8")
image = "![](data:image/png;base64,{})".format(encoded)
page = image + "\n" + table
return page
def get_flags_dict():
flags = {}
with open('country-flags.csv', newline='') as f:
reader = csv.reader(f, quotechar="\"")
next(reader, None) # skip the headers
for row in reader:
flags[row[0]] = row[1]
return flags
def build_table(films_by_country):
films_by_country_sorted = OrderedDict(sorted(films_by_country.items(), key=lambda t: t[0]))
flags = get_flags_dict()
table = "| Country | Films |\n| - | - |"
for country, films in films_by_country_sorted.items():
table += "\n"
row_data = []
country_label = country
if country in flags.keys():
country_label += " "
country_label += flags[country]
row_data.append(country_label)
row_data.append(VCinemaUtils.get_film_list(films))
table += " | ".join(row_data)
return table
def draw_map(films_by_country, file_name="vcinema_map.svg"):
films_by_country['Germany'] += films_by_country['West Germany']
del films_by_country['West Germany']
counter = Counter(films_by_country)
countries = [k for k, v in counter.items()]
counts = [len(v) for _, v in counter.items()]
max_count = max(counts)
opacity = [0.5 + (x / (float(max_count))/2.0) for x in counts]
worldmap.plot(countries, cmap=["#FF4000"], opacity=opacity, filename=file_name, verbose=False)
image = pyvips.Image.new_from_file(file_name)
image = image.thumbnail_image(1000, crop=pyvips.Interesting.ALL)
png_data = image.write_to_buffer(".png")
os.remove(file_name)
return png_data

View File

@ -1,100 +0,0 @@
from collections import OrderedDict
import wikipedia
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
# Page ID of https://wiki.jacknet.io/books/vcinema/page/references
PAGE_ID = 62
def get_films_by_reference(viewings):
films_by_reference = {}
for viewing in viewings:
if "keywords" in viewing.keys():
for keyword in viewing["keywords"]:
if keyword.startswith("reference-to-"):
for reference in films_by_reference:
if keyword in films_by_reference[reference]["keywords"]:
films_by_reference[reference]["films"].append(viewing)
break
else:
keyword = keyword[13:]
if keyword.startswith("a-"):
keyword = keyword[2:]
if keyword.endswith("-character"):
keyword = keyword[:-10]
referenced = keyword.replace("-", " ")
try:
searches = wikipedia.search(referenced, suggestion=False)
referenced_page = wikipedia.page(title=referenced, auto_suggest=False)
page_title = referenced_page.title
page_url = referenced_page.url
except wikipedia.DisambiguationError as e:
page_title = e.title
page_title = page_title[0].upper() + page_title[1:]
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
except wikipedia.PageError as _:
if len(searches) > 0:
try:
referenced_page = wikipedia.page(title=searches[0], auto_suggest=False)
page_title = referenced_page.title
page_url = referenced_page.url
except wikipedia.DisambiguationError as e:
page_title = e.title
page_title = page_title[0].upper() + page_title[1:]
page_url = VCinemaUtils.generate_wikipedia_url(page_title)
else:
page_title = referenced.title()
page_url = None
if page_title in films_by_reference.keys():
films_by_reference[page_title]["keywords"].append(keyword)
if viewing not in films_by_reference[page_title]["films"]:
films_by_reference[page_title]["films"].append(viewing)
else:
films_by_reference[page_title] = {"url": page_url,
"keywords": [keyword],
"films": [viewing]}
return films_by_reference
def update_page(token_id, token_secret, films_by_reference_keyword):
page = build_page(films_by_reference_keyword)
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
def build_page(films_by_reference):
films_by_reference = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0]))
table = "| Referenced | Films |\n| - | - |"
for reference, referenced in films_by_reference.items():
table += "\n"
row_data = []
reference_url = referenced["url"]
referenced_films = referenced["films"]
if reference_url is None:
row_data.append(reference)
else:
row_data.append(VCinemaUtils.generate_markdown_link(reference, reference_url))
row_data.append(VCinemaUtils.get_film_list(referenced_films))
table += " | ".join(row_data)
return table

View File

@ -1,35 +0,0 @@
from collections import OrderedDict
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-release-year
PAGE_ID = 24
def get_films_by_year(viewings):
viewings_filtered_by_year = VCinemaUtils.filter_viewings(viewings, "year")
return viewings_filtered_by_year
def update_page(token_id, token_secret, films_by_year):
page = build_page(films_by_year)
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
def build_page(films_by_year):
films_by_year_sorted = OrderedDict(sorted(films_by_year.items(), key=lambda t: t[0], reverse=True))
page = "| Year | Films |\n| - | - |"
for year in films_by_year_sorted.keys():
page += "\n"
row_data = []
row_data.append(str(year))
row_data.append(VCinemaUtils.get_film_list(films_by_year_sorted[year]))
page += " | ".join(row_data)
return page

View File

@ -1,86 +0,0 @@
from collections import OrderedDict
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
# Page ID of https://wiki.jacknet.io/books/vcinema/page/films-by-reference
PAGE_ID = 63
def get_hidden_themes(viewings, token_id, token_secret):
# Bit horrible to need to request this again, but it affects the order of the result table
viewings_ungrouped = VCinemaUtils.get_vcinema_viewings(token_id, token_secret, combine_repeat_viewings=False)
# Copy keywords from grouped viewings to ungrouped viewings
for viewing_ungrouped in viewings_ungrouped:
for viewing in viewings:
if viewing['imdb_id'] == viewing_ungrouped['imdb_id']:
if 'keywords' in viewing:
viewing_ungrouped['keywords'] = viewing['keywords']
break
viewings_filtered_watch_date = VCinemaUtils.filter_viewings(viewings_ungrouped, "date_watched")
for date, viewings in viewings_filtered_watch_date.items():
viewing_dict = {"viewings": viewings}
viewings_filtered_watch_date[date] = viewing_dict
# Add hidden themes
for date, data in viewings_filtered_watch_date.items():
keyword_counts = {}
if len(data['viewings']) > 1:
for viewing in data['viewings']:
if 'keywords' in viewing:
for keyword in viewing['keywords']:
if keyword in keyword_counts.keys():
keyword_counts[keyword] += 1
else:
keyword_counts[keyword] = 1
keyword_counts = {k: v for k, v in sorted(keyword_counts.items(), key=lambda item: item[1], reverse=True)}
hidden_themes = {}
for keyword in keyword_counts:
rating = float(keyword_counts[keyword]) / float(len(data['viewings']))
if rating > 0.5:
hidden_themes[keyword] = rating
viewings_filtered_watch_date[date]['hidden_themes'] = hidden_themes
return viewings_filtered_watch_date
def update_page(token_id, token_secret, hidden_themes):
page = build_page(hidden_themes)
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
def build_page(hidden_themes):
hidden_themes = OrderedDict(sorted(hidden_themes.items(), key=lambda t: t[0]))
table = "| Date | Films | Hidden Themes |\n| - | - | - |"
for date, data in hidden_themes.items():
table += "\n"
row_data = []
row_data.append(str(date))
row_data.append(VCinemaUtils.get_film_list(data['viewings']))
if 'hidden_themes' in data and data['hidden_themes'] != {}:
hidden_theme_labels = []
for hidden_theme in sorted(data['hidden_themes'].keys()):
if data['hidden_themes'][hidden_theme] == 1:
hidden_theme_labels.append(hidden_theme)
else:
hidden_theme_labels.append("<i>{} ({}%)</i>".format(hidden_theme, round(data['hidden_themes'][hidden_theme] * 100)))
row_data.append("<br>".join(hidden_theme_labels))
else:
row_data.append("N/A")
table += " | ".join(row_data)
return table

View File

@ -1,81 +0,0 @@
from collections import OrderedDict
from progress.bar import IncrementalBar
import math
from concurrent.futures import ThreadPoolExecutor
from bookstack import Bookstack
from imdb_utils import IMDbUtils
from vcinema_utils import VCinemaUtils
# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores
PAGE_ID = 23
def get_keyword_scores(viewings):
viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")
for keyword, viewings in viewings_filtered_keyword.items():
viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}
min_vcinema_count = 2
min_imdb_count = 4
add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)
add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)
return viewings_filtered_keyword
def update_page(token_id, token_secret, keyword_data):
page = build_page(keyword_data)
Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)
def add_keyword_totals(keywords, min_vcinema_count):
keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])
with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:
with ThreadPoolExecutor(6) as executor:
for keyword, data in keywords.items():
if len(data['vcinema_films']) >= min_vcinema_count:
executor.submit(add_keyword_total, keyword, keywords, bar)
def add_keyword_total(keyword, keywords, progress_bar=None):
keyword_total = IMDbUtils.get_keyword_count(keyword)
keywords[keyword]['total'] = keyword_total
if progress_bar is not None:
progress_bar.next()
def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):
for keyword in keyword_data.keys():
if 'total' in keyword_data[keyword]:
vcinema_count = len(keyword_data[keyword]['vcinema_films'])
total_count = keyword_data[keyword]['total']
if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:
score = vcinema_count / math.log(total_count)
keyword_data[keyword]['score'] = score
def build_page(keyword_data, minimum_score=1.0):
keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}
keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))
table = "| Keyword | Number of VCinema Films | Total IMDb entries | Score |\n| - | - | - | - |"
for keyword, data in keyword_data.items():
table += "\n"
row_data = []
row_data.append(str(keyword))
row_data.append(str(len(data['vcinema_films'])))
row_data.append(str(data['total']))
row_data.append(str(round(data['score'], 3)))
table += " | ".join(row_data)
return table

View File

@ -1,20 +0,0 @@
from bs4 import BeautifulSoup
from bookstack import Bookstack
from vcinema_utils import VCinemaUtils
# Page ID of https://wiki.jacknet.io/books/vcinema/page/csv
PAGE_ID = 11
def update_viewings_csv(token_id, token_secret):
print("Retrieving viewings page")
html_page = Bookstack.get_page_html(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID)
soup = BeautifulSoup(html_page, 'html.parser')
csv_data = soup.find("code").text.strip().encode('utf-8')
existing_attachment_id = VCinemaUtils.get_viewings_csv_attachment_id(token_id, token_secret)
print("Updating file")
Bookstack.update_attachment(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, existing_attachment_id, "vcinema.csv", csv_data, PAGE_ID)
print("File updated")

View File

1
wiki_utils Submodule

@ -0,0 +1 @@
Subproject commit 88be36d5cd7378a01d9861726bf123715fe81d4a