vcinema/wiki_pages/KeywordScores.py

from collections import OrderedDict
from progress.bar import IncrementalBar
import math
from concurrent.futures import ThreadPoolExecutor

from bookstack import Bookstack
from imdb_utils import IMDbUtils
from markdown_utils import MarkdownUtils
from vcinema_utils import VCinemaUtils

# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores
PAGE_ID = 23


def get_keyword_scores(viewings):
    viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")

    for keyword, viewings in viewings_filtered_keyword.items():
        viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}

    min_vcinema_count = 2
    min_imdb_count = 4

    add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)
    add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)

    return viewings_filtered_keyword


def update_page(token_id, token_secret, keyword_data):
    page = build_page(keyword_data)
    Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)


def add_keyword_totals(keywords, min_vcinema_count):
    keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])

    with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:
        with ThreadPoolExecutor(6) as executor:
            for keyword, data in keywords.items():
                if len(data['vcinema_films']) >= min_vcinema_count:
                    executor.submit(add_keyword_total, keyword, keywords, bar)


def add_keyword_total(keyword, keywords, progress_bar=None):
    keyword_total = IMDbUtils.get_keyword_count(keyword)

    keywords[keyword]['total'] = keyword_total

    if progress_bar is not None:
        progress_bar.next()


def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):
    for keyword in keyword_data.keys():
        if 'total' in keyword_data[keyword]:
            vcinema_count = len(keyword_data[keyword]['vcinema_films'])
            total_count = keyword_data[keyword]['total']

            if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:
                score = vcinema_count / math.log(total_count)

                keyword_data[keyword]['score'] = score


def build_page(keyword_data, minimum_score=1.0):
    keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}
    keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))

    table = MarkdownUtils.MarkdownTable(["Keyword", "Number of VCinema Films", "Total IMDb entries", "Score"])

    for keyword, data in keyword_data.items():
        number_of_vcinema_films = len(data['vcinema_films'])
        number_of_imdb_films = data['total']
        keyword_score = round(data['score'], 3)

        row_data = [keyword, number_of_vcinema_films, number_of_imdb_films, keyword_score]

        table.add_row(row_data)

    return table
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00			`from collections import OrderedDict`
			`from progress.bar import IncrementalBar`
			`import math`
			`from concurrent.futures import ThreadPoolExecutor`

move update 2022-04-16 09:43:51 +01:00			`from bookstack import Bookstack`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00			`from imdb_utils import IMDbUtils`
add markdown table 2022-12-09 20:45:50 +00:00			`from markdown_utils import MarkdownUtils`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00			`from vcinema_utils import VCinemaUtils`

			`# Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores`
moved other files to dir 2022-04-15 17:29:19 +01:00			`PAGE_ID = 23`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00

			`def get_keyword_scores(viewings):`
			`viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords")`

			`for keyword, viewings in viewings_filtered_keyword.items():`
			`viewings_filtered_keyword[keyword] = {"vcinema_films": viewings}`

			`min_vcinema_count = 2`
			`min_imdb_count = 4`

			`add_keyword_totals(viewings_filtered_keyword, min_vcinema_count)`
			`add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count)`

			`return viewings_filtered_keyword`


move update 2022-04-16 09:43:51 +01:00			`def update_page(token_id, token_secret, keyword_data):`
			`page = build_page(keyword_data)`
			`Bookstack.update_page(VCinemaUtils.JACKNET_WIKI_URL, token_id, token_secret, PAGE_ID, markdown=page)`


Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00			`def add_keyword_totals(keywords, min_vcinema_count):`
			`keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count])`

			`with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar:`
			`with ThreadPoolExecutor(6) as executor:`
			`for keyword, data in keywords.items():`
			`if len(data['vcinema_films']) >= min_vcinema_count:`
			`executor.submit(add_keyword_total, keyword, keywords, bar)`


			`def add_keyword_total(keyword, keywords, progress_bar=None):`
			`keyword_total = IMDbUtils.get_keyword_count(keyword)`

			`keywords[keyword]['total'] = keyword_total`

			`if progress_bar is not None:`
			`progress_bar.next()`


			`def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count):`
			`for keyword in keyword_data.keys():`
			`if 'total' in keyword_data[keyword]:`
			`vcinema_count = len(keyword_data[keyword]['vcinema_films'])`
			`total_count = keyword_data[keyword]['total']`

			`if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count:`
			`score = vcinema_count / math.log(total_count)`

			`keyword_data[keyword]['score'] = score`


add hidden_themes and keyword_scores in update_Wiki script 2022-04-08 23:27:09 +01:00			`def build_page(keyword_data, minimum_score=1.0):`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00			`keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score}`
			`keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True))`

add markdown table 2022-12-09 20:45:50 +00:00			`table = MarkdownUtils.MarkdownTable(["Keyword", "Number of VCinema Films", "Total IMDb entries", "Score"])`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00
			`for keyword, data in keyword_data.items():`
fix scores page 2022-12-10 17:10:24 +00:00			`number_of_vcinema_films = len(data['vcinema_films'])`
			`number_of_imdb_films = data['total']`
			`keyword_score = round(data['score'], 3)`

			`row_data = [keyword, number_of_vcinema_films, number_of_imdb_films, keyword_score]`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00
add markdown table 2022-12-09 20:45:50 +00:00			`table.add_row(row_data)`
Create update_keywords_scores.py 2022-04-08 22:40:01 +01:00
			`return table`