Create update_keywords_scores.py
This commit is contained in:
		
							parent
							
								
									4207e89665
								
							
						
					
					
						commit
						e0dff08c14
					
				
							
								
								
									
										75
									
								
								update_keywords_scores.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								update_keywords_scores.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | |||||||
|  | from collections import OrderedDict | ||||||
|  | from progress.bar import IncrementalBar | ||||||
|  | import math | ||||||
|  | from concurrent.futures import ThreadPoolExecutor | ||||||
|  | 
 | ||||||
|  | from imdb_utils import IMDbUtils | ||||||
|  | from vcinema_utils import VCinemaUtils | ||||||
|  | 
 | ||||||
|  | # Page ID of https://wiki.jacknet.io/books/vcinema/page/keyword-scores | ||||||
|  | KEYWORD_SCORES_PAGE_ID = 23 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_keyword_scores(viewings): | ||||||
|  |     viewings_filtered_keyword = VCinemaUtils.filter_viewings(viewings, "keywords") | ||||||
|  | 
 | ||||||
|  |     for keyword, viewings in viewings_filtered_keyword.items(): | ||||||
|  |         viewings_filtered_keyword[keyword] = {"vcinema_films": viewings} | ||||||
|  | 
 | ||||||
|  |     min_vcinema_count = 2 | ||||||
|  |     min_imdb_count = 4 | ||||||
|  | 
 | ||||||
|  |     add_keyword_totals(viewings_filtered_keyword, min_vcinema_count) | ||||||
|  |     add_keyword_scores(viewings_filtered_keyword, min_vcinema_count, min_imdb_count) | ||||||
|  | 
 | ||||||
|  |     return viewings_filtered_keyword | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def add_keyword_totals(keywords, min_vcinema_count): | ||||||
|  |     keyword_count = len([keyword for keyword in keywords.keys() if len(keywords[keyword]['vcinema_films']) >= min_vcinema_count]) | ||||||
|  | 
 | ||||||
|  |     with IncrementalBar(message='%(percent).1f%% - %(eta)ds remaining', max=keyword_count, check_tty=False) as bar: | ||||||
|  |         with ThreadPoolExecutor(6) as executor: | ||||||
|  |             for keyword, data in keywords.items(): | ||||||
|  |                 if len(data['vcinema_films']) >= min_vcinema_count: | ||||||
|  |                     executor.submit(add_keyword_total, keyword, keywords, bar) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def add_keyword_total(keyword, keywords, progress_bar=None): | ||||||
|  |     keyword_total = IMDbUtils.get_keyword_count(keyword) | ||||||
|  | 
 | ||||||
|  |     keywords[keyword]['total'] = keyword_total | ||||||
|  | 
 | ||||||
|  |     if progress_bar is not None: | ||||||
|  |         progress_bar.next() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def add_keyword_scores(keyword_data, min_vcinema_count, min_imdb_count): | ||||||
|  |     for keyword in keyword_data.keys(): | ||||||
|  |         if 'total' in keyword_data[keyword]: | ||||||
|  |             vcinema_count = len(keyword_data[keyword]['vcinema_films']) | ||||||
|  |             total_count = keyword_data[keyword]['total'] | ||||||
|  | 
 | ||||||
|  |             if vcinema_count >= min_vcinema_count and total_count >= min_imdb_count: | ||||||
|  |                 score = vcinema_count / math.log(total_count) | ||||||
|  | 
 | ||||||
|  |                 keyword_data[keyword]['score'] = score | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def build_table(keyword_data, minimum_score=1.0): | ||||||
|  |     keyword_data = {k: v for k, v in keyword_data.items() if 'score' in v and v['score'] >= minimum_score} | ||||||
|  |     keyword_data = OrderedDict(sorted(keyword_data.items(), key=lambda t: t[1]['score'], reverse=True)) | ||||||
|  | 
 | ||||||
|  |     table = "| Keyword | Number of VCinema Films | Total IMDb entries | Score |\n| - | - | - | - |" | ||||||
|  | 
 | ||||||
|  |     for keyword, data in keyword_data.items(): | ||||||
|  |         table += "\n" | ||||||
|  | 
 | ||||||
|  |         row_data = [] | ||||||
|  |         row_data.append(str(keyword)) | ||||||
|  |         row_data.append(str(len(data['vcinema_films']))) | ||||||
|  |         row_data.append(str(len(data['total']))) | ||||||
|  |         row_data.append(str(round(data['score'], 3))) | ||||||
|  |         table += " | ".join(row_data) | ||||||
|  | 
 | ||||||
|  |     return table | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sarah
						Sarah