From e89eb332308fd1c599a2e2802c9445886d29ccfb Mon Sep 17 00:00:00 2001 From: Sarah Date: Sat, 3 Dec 2022 12:05:52 +0000 Subject: [PATCH 1/9] use page of wikipedia page for table row --- wiki_pages/FilmsByReference.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index b3e5964..9cfd8bc 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -1,7 +1,6 @@ from collections import OrderedDict -import string +import wikipedia -from bookstack import Bookstack from vcinema_utils import VCinemaUtils # Page ID of https://wiki.jacknet.io/books/vcinema/page/references @@ -25,12 +24,32 @@ def build_page(films_by_reference_keyword): table = "| Referenced | Films |\n| - | - |" - for year in reference_keywords_sorted.keys(): + for reference_keyword in reference_keywords_sorted.keys(): table += "\n" row_data = [] - row_data.append("[{}](https://en.wikipedia.org/wiki/{})".format(str(string.capwords(year[13:].replace("-", " "))), str(string.capwords(year[13:].replace("-", " ")).replace(" ", "_")))) - row_data.append(VCinemaUtils.get_film_list(reference_keywords_sorted[year])) + + referenced = reference_keyword[13:].replace("-", " ") + + if referenced.startswith("a "): + referenced = referenced[2:] + else: + referenced = referenced + + try: + searches = wikipedia.search(referenced, suggestion=False) + referenced_page = wikipedia.page(title=referenced, auto_suggest=False) + row_data.append("[{}]({}) ()".format(referenced_page.title, referenced_page.url, reference_keyword)) + except wikipedia.DisambiguationError as e1: + row_data.append("[{}](https://en.wikipedia.org/wiki/{})) ()".format(e1.title, e1.title.replace(" ", "_"), reference_keyword)) + except wikipedia.PageError as _: + try: + referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) + row_data.append("[{}]({}) ()".format(referenced_page.title, referenced_page.url, reference_keyword)) + except wikipedia.DisambiguationError as e2: + row_data.append( + "[{}](https://en.wikipedia.org/wiki/{})) ()".format(e2.title, e2.title.replace(" ", "_"),reference_keyword)) + row_data.append(VCinemaUtils.get_film_list(reference_keywords_sorted[reference_keyword])) table += " | ".join(row_data) -- 2.47.2 From 9de7ec31c2e423676118266e377b9d97f8079268 Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 12:16:26 +0000 Subject: [PATCH 2/9] get references before generating table --- wiki_pages/FilmsByReference.py | 49 +++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index 9cfd8bc..dd0a2fb 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -1,6 +1,7 @@ from collections import OrderedDict import wikipedia +from bookstack import Bookstack from vcinema_utils import VCinemaUtils # Page ID of https://wiki.jacknet.io/books/vcinema/page/references @@ -20,14 +21,11 @@ def update_page(token_id, token_secret, films_by_reference_keyword): def build_page(films_by_reference_keyword): - reference_keywords_sorted = OrderedDict(sorted(films_by_reference_keyword.items(), key=lambda t: t[0])) + references = {} - table = "| Referenced | Films |\n| - | - |" + for reference_keyword in films_by_reference_keyword: - for reference_keyword in reference_keywords_sorted.keys(): - table += "\n" - - row_data = [] + print(reference_keyword) referenced = reference_keyword[13:].replace("-", " ") @@ -39,17 +37,44 @@ def build_page(films_by_reference_keyword): try: searches = wikipedia.search(referenced, suggestion=False) referenced_page = wikipedia.page(title=referenced, auto_suggest=False) - row_data.append("[{}]({}) ()".format(referenced_page.title, referenced_page.url, reference_keyword)) + + page_title = referenced_page.title + page_url = referenced_page.url + except wikipedia.DisambiguationError as e1: - row_data.append("[{}](https://en.wikipedia.org/wiki/{})) ()".format(e1.title, e1.title.replace(" ", "_"), reference_keyword)) + page_title = e1.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) except wikipedia.PageError as _: try: referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) - row_data.append("[{}]({}) ()".format(referenced_page.title, referenced_page.url, reference_keyword)) + + page_title = referenced_page.title + page_url = referenced_page.url except wikipedia.DisambiguationError as e2: - row_data.append( - "[{}](https://en.wikipedia.org/wiki/{})) ()".format(e2.title, e2.title.replace(" ", "_"),reference_keyword)) - row_data.append(VCinemaUtils.get_film_list(reference_keywords_sorted[reference_keyword])) + page_title = e2.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) + + if page_title in references.keys(): + references[page_title]["films"].extend(films_by_reference_keyword[reference_keyword]) + else: + references[page_title] = {"url": page_url, + "films": films_by_reference_keyword[reference_keyword]} + + references_sorted = OrderedDict(sorted(references.items(), key=lambda t: t[0])) + + table = "| Referenced | Films |\n| - | - |" + + for reference, referenced in references_sorted.items(): + table += "\n" + + row_data = [] + + reference_url = referenced["url"] + reference_title = reference + referenced_films = referenced["films"] + + row_data.append("[{}]({})".format(reference_title, reference_url)) + row_data.append(VCinemaUtils.get_film_list(referenced_films)) table += " | ".join(row_data) -- 2.47.2 From 95e7804a4accbea78778989b192196e1968562cb Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 13:00:58 +0000 Subject: [PATCH 3/9] get references in the processing func --- wiki_pages/FilmsByReference.py | 93 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index dd0a2fb..9c7e30d 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -9,10 +9,52 @@ PAGE_ID = 62 def get_films_by_reference(viewings): - viewings_filtered_by_keyword = VCinemaUtils.filter_viewings(viewings, "keywords") - viewings_filtered_by_reference_keyword = {k: v for k, v in viewings_filtered_by_keyword.items() if k.startswith("reference-to")} + films_by_reference = {} - return viewings_filtered_by_reference_keyword + for viewing in viewings: + if "keywords" in viewing.keys(): + for keyword in viewing["keywords"]: + if keyword.startswith("reference-to-"): + + for reference in films_by_reference: + if keyword in films_by_reference[reference]["keywords"]: + films_by_reference[reference]["films"].append(viewing) + break + else: + referenced = keyword[13:].replace("-", " ") + + if referenced.startswith("a "): + referenced = referenced[2:] + + try: + searches = wikipedia.search(referenced, suggestion=False) + referenced_page = wikipedia.page(title=referenced, auto_suggest=False) + + page_title = referenced_page.title + page_url = referenced_page.url + + except wikipedia.DisambiguationError as e1: + page_title = e1.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) + except wikipedia.PageError as _: + try: + referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) + + page_title = referenced_page.title + page_url = referenced_page.url + except wikipedia.DisambiguationError as e2: + page_title = e2.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) + + if page_title in films_by_reference.keys(): + films_by_reference[page_title]["films"].append(viewing) + films_by_reference[page_title]["keywords"].append(keyword) + else: + films_by_reference[page_title] = {"url": page_url, + "keywords": [keyword], + "films": [viewing]} + + return films_by_reference def update_page(token_id, token_secret, films_by_reference_keyword): @@ -20,51 +62,12 @@ def update_page(token_id, token_secret, films_by_reference_keyword): Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page) -def build_page(films_by_reference_keyword): - references = {} - - for reference_keyword in films_by_reference_keyword: - - print(reference_keyword) - - referenced = reference_keyword[13:].replace("-", " ") - - if referenced.startswith("a "): - referenced = referenced[2:] - else: - referenced = referenced - - try: - searches = wikipedia.search(referenced, suggestion=False) - referenced_page = wikipedia.page(title=referenced, auto_suggest=False) - - page_title = referenced_page.title - page_url = referenced_page.url - - except wikipedia.DisambiguationError as e1: - page_title = e1.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) - except wikipedia.PageError as _: - try: - referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) - - page_title = referenced_page.title - page_url = referenced_page.url - except wikipedia.DisambiguationError as e2: - page_title = e2.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) - - if page_title in references.keys(): - references[page_title]["films"].extend(films_by_reference_keyword[reference_keyword]) - else: - references[page_title] = {"url": page_url, - "films": films_by_reference_keyword[reference_keyword]} - - references_sorted = OrderedDict(sorted(references.items(), key=lambda t: t[0])) +def build_page(films_by_reference): + films_by_reference_sorted = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0])) table = "| Referenced | Films |\n| - | - |" - for reference, referenced in references_sorted.items(): + for reference, referenced in films_by_reference_sorted.items(): table += "\n" row_data = [] -- 2.47.2 From 19365a77d58de215c24f44fa0e9219eba0bf336d Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 14:14:56 +0000 Subject: [PATCH 4/9] user helper function --- wiki_pages/FilmsByReference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index 9c7e30d..2f714a8 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -76,7 +76,7 @@ def build_page(films_by_reference): reference_title = reference referenced_films = referenced["films"] - row_data.append("[{}]({})".format(reference_title, reference_url)) + row_data.append(VCinemaUtils.generate_markdown_link(reference_title, reference_url)) row_data.append(VCinemaUtils.get_film_list(referenced_films)) table += " | ".join(row_data) -- 2.47.2 From b8128be130934cb8f97a75d8a09445305eaa418a Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 14:18:30 +0000 Subject: [PATCH 5/9] refactor and use helper function --- wiki_pages/FilmsByReference.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index 2f714a8..1a00556 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -33,18 +33,18 @@ def get_films_by_reference(viewings): page_title = referenced_page.title page_url = referenced_page.url - except wikipedia.DisambiguationError as e1: - page_title = e1.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) + except wikipedia.DisambiguationError as e: + page_title = e.title + page_url = VCinemaUtils.generate_wikipedia_url(page_title) except wikipedia.PageError as _: try: referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) page_title = referenced_page.title page_url = referenced_page.url - except wikipedia.DisambiguationError as e2: - page_title = e2.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) + except wikipedia.DisambiguationError as e: + page_title = e.title + page_url = VCinemaUtils.generate_wikipedia_url(page_title) if page_title in films_by_reference.keys(): films_by_reference[page_title]["films"].append(viewing) -- 2.47.2 From 6e182deb6effe92aefadd6bea4c26491b23ef4e2 Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 14:20:36 +0000 Subject: [PATCH 6/9] use helper function --- vcinema_utils/VCinemaUtils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcinema_utils/VCinemaUtils.py b/vcinema_utils/VCinemaUtils.py index 1e80148..f1e0bb0 100644 --- a/vcinema_utils/VCinemaUtils.py +++ b/vcinema_utils/VCinemaUtils.py @@ -135,7 +135,7 @@ def generate_markdown_link(text, url): def generate_imdb_film_link(film): - return generate_markdown_link(film['title'], "https://www.imdb.com/title/tt{}/".format(film['imdb_id'])) + return generate_markdown_link(film['title'], generate_imdb_url(film['imdb_id'])) def generate_wikipedia_page_link(page_title): -- 2.47.2 From 3c8afec7753665c0884df2616d015f254e9cd736 Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 14:20:49 +0000 Subject: [PATCH 7/9] refactor --- wiki_pages/FilmsByReference.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index 1a00556..a0e072a 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -51,8 +51,8 @@ def get_films_by_reference(viewings): films_by_reference[page_title]["keywords"].append(keyword) else: films_by_reference[page_title] = {"url": page_url, - "keywords": [keyword], - "films": [viewing]} + "keywords": [keyword], + "films": [viewing]} return films_by_reference @@ -73,10 +73,9 @@ def build_page(films_by_reference): row_data = [] reference_url = referenced["url"] - reference_title = reference referenced_films = referenced["films"] - row_data.append(VCinemaUtils.generate_markdown_link(reference_title, reference_url)) + row_data.append(VCinemaUtils.generate_markdown_link(reference, reference_url)) row_data.append(VCinemaUtils.get_film_list(referenced_films)) table += " | ".join(row_data) -- 2.47.2 From 0f59b2cd67054f178f98b849fcc3dd4dd8b5ac1f Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 14:25:17 +0000 Subject: [PATCH 8/9] add missing helper functions --- vcinema_utils/VCinemaUtils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vcinema_utils/VCinemaUtils.py b/vcinema_utils/VCinemaUtils.py index f1e0bb0..7fbf576 100644 --- a/vcinema_utils/VCinemaUtils.py +++ b/vcinema_utils/VCinemaUtils.py @@ -139,4 +139,12 @@ def generate_imdb_film_link(film): def generate_wikipedia_page_link(page_title): - return generate_markdown_link(page_title, "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_"))) + return generate_markdown_link(page_title, generate_wikipedia_url(page_title)) + + +def generate_imdb_url(imdb_id): + return "https://www.imdb.com/title/tt{}/".format(imdb_id) + + +def generate_wikipedia_url(page_title): + "https://en.wikipedia.org/wiki/{}".format(page_title.replace(" ", "_")) -- 2.47.2 From 9c1e4caf6851fb06721b1fabd17b03f61b81ce7f Mon Sep 17 00:00:00 2001 From: Sarah Date: Fri, 9 Dec 2022 20:25:19 +0000 Subject: [PATCH 9/9] refactor --- wiki_pages/FilmsByReference.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index a0e072a..6d554c4 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -21,10 +21,12 @@ def get_films_by_reference(viewings): films_by_reference[reference]["films"].append(viewing) break else: - referenced = keyword[13:].replace("-", " ") + keyword = keyword[13:] - if referenced.startswith("a "): - referenced = referenced[2:] + if keyword.startswith("a-"): + keyword = keyword[2:] + + referenced = keyword.replace("-", " ") try: searches = wikipedia.search(referenced, suggestion=False) @@ -63,11 +65,11 @@ def update_page(token_id, token_secret, films_by_reference_keyword): def build_page(films_by_reference): - films_by_reference_sorted = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0])) + films_by_reference = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0])) table = "| Referenced | Films |\n| - | - |" - for reference, referenced in films_by_reference_sorted.items(): + for reference, referenced in films_by_reference.items(): table += "\n" row_data = [] -- 2.47.2