From 95e7804a4accbea78778989b192196e1968562cb Mon Sep 17 00:00:00 2001 From: Sarah Date: Sun, 4 Dec 2022 13:00:58 +0000 Subject: [PATCH] get references in the processing func --- wiki_pages/FilmsByReference.py | 93 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/wiki_pages/FilmsByReference.py b/wiki_pages/FilmsByReference.py index dd0a2fb..9c7e30d 100644 --- a/wiki_pages/FilmsByReference.py +++ b/wiki_pages/FilmsByReference.py @@ -9,10 +9,52 @@ PAGE_ID = 62 def get_films_by_reference(viewings): - viewings_filtered_by_keyword = VCinemaUtils.filter_viewings(viewings, "keywords") - viewings_filtered_by_reference_keyword = {k: v for k, v in viewings_filtered_by_keyword.items() if k.startswith("reference-to")} + films_by_reference = {} - return viewings_filtered_by_reference_keyword + for viewing in viewings: + if "keywords" in viewing.keys(): + for keyword in viewing["keywords"]: + if keyword.startswith("reference-to-"): + + for reference in films_by_reference: + if keyword in films_by_reference[reference]["keywords"]: + films_by_reference[reference]["films"].append(viewing) + break + else: + referenced = keyword[13:].replace("-", " ") + + if referenced.startswith("a "): + referenced = referenced[2:] + + try: + searches = wikipedia.search(referenced, suggestion=False) + referenced_page = wikipedia.page(title=referenced, auto_suggest=False) + + page_title = referenced_page.title + page_url = referenced_page.url + + except wikipedia.DisambiguationError as e1: + page_title = e1.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) + except wikipedia.PageError as _: + try: + referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) + + page_title = referenced_page.title + page_url = referenced_page.url + except wikipedia.DisambiguationError as e2: + page_title = e2.title + page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) + + if page_title in films_by_reference.keys(): + films_by_reference[page_title]["films"].append(viewing) + films_by_reference[page_title]["keywords"].append(keyword) + else: + films_by_reference[page_title] = {"url": page_url, + "keywords": [keyword], + "films": [viewing]} + + return films_by_reference def update_page(token_id, token_secret, films_by_reference_keyword): @@ -20,51 +62,12 @@ def update_page(token_id, token_secret, films_by_reference_keyword): Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page) -def build_page(films_by_reference_keyword): - references = {} - - for reference_keyword in films_by_reference_keyword: - - print(reference_keyword) - - referenced = reference_keyword[13:].replace("-", " ") - - if referenced.startswith("a "): - referenced = referenced[2:] - else: - referenced = referenced - - try: - searches = wikipedia.search(referenced, suggestion=False) - referenced_page = wikipedia.page(title=referenced, auto_suggest=False) - - page_title = referenced_page.title - page_url = referenced_page.url - - except wikipedia.DisambiguationError as e1: - page_title = e1.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e1.title.replace(" ", "_")) - except wikipedia.PageError as _: - try: - referenced_page = wikipedia.page(title=searches[0], auto_suggest=False) - - page_title = referenced_page.title - page_url = referenced_page.url - except wikipedia.DisambiguationError as e2: - page_title = e2.title - page_url = "https://en.wikipedia.org/wiki/{}".format(e2.title.replace(" ", "_")) - - if page_title in references.keys(): - references[page_title]["films"].extend(films_by_reference_keyword[reference_keyword]) - else: - references[page_title] = {"url": page_url, - "films": films_by_reference_keyword[reference_keyword]} - - references_sorted = OrderedDict(sorted(references.items(), key=lambda t: t[0])) +def build_page(films_by_reference): + films_by_reference_sorted = OrderedDict(sorted(films_by_reference.items(), key=lambda t: t[0])) table = "| Referenced | Films |\n| - | - |" - for reference, referenced in references_sorted.items(): + for reference, referenced in films_by_reference_sorted.items(): table += "\n" row_data = []