add keyword count methods
This commit is contained in:
		
							parent
							
								
									a59e7f0b3c
								
							
						
					
					
						commit
						0ab9d81a88
					
				| @ -1,4 +1,7 @@ | ||||
| from imdb import IMDb | ||||
| import requests | ||||
| from bs4 import BeautifulSoup | ||||
| import re | ||||
| 
 | ||||
| 
 | ||||
| def get_movie(imdb_id): | ||||
| @ -16,3 +19,42 @@ def get_movie_keywords(imdb_id): | ||||
| 
 | ||||
|     return movie | ||||
| 
 | ||||
| 
 | ||||
| def get_api_keyword_count(keyword): | ||||
|     ia = IMDb() | ||||
| 
 | ||||
|     count = len(ia.get_keyword(keyword)) | ||||
| 
 | ||||
|     return count | ||||
| 
 | ||||
| 
 | ||||
| def get_website_keyword_count(keyword): | ||||
|     try: | ||||
|         page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword) | ||||
|     except ConnectionError: | ||||
|         raise | ||||
| 
 | ||||
|     soup = BeautifulSoup(page.content, 'html.parser') | ||||
|     elements = soup.findAll("div", class_="desc") | ||||
| 
 | ||||
|     pagination_label = elements[0].text.replace("\n", "") | ||||
| 
 | ||||
|     pagination_label_reg = "(\d+,?\d*) titles" | ||||
|     pattern_match = re.compile(pagination_label_reg).search(pagination_label) | ||||
| 
 | ||||
|     if pattern_match is not None: | ||||
|         return int(pattern_match.group(1).replace(',', '')) | ||||
|     else: | ||||
|         return 1 | ||||
| 
 | ||||
| 
 | ||||
| def get_keyword_count(keyword): | ||||
|     count = get_api_keyword_count(keyword) | ||||
| 
 | ||||
|     if count == 50: | ||||
|         try: | ||||
|             count = get_website_keyword_count(keyword) | ||||
|         except Exception as e: | ||||
|             raise | ||||
| 
 | ||||
|     return count | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sarah
						Sarah