add keyword count methods
This commit is contained in:
		
							parent
							
								
									a59e7f0b3c
								
							
						
					
					
						commit
						0ab9d81a88
					
				@ -1,4 +1,7 @@
 | 
			
		||||
from imdb import IMDb
 | 
			
		||||
import requests
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_movie(imdb_id):
 | 
			
		||||
@ -16,3 +19,42 @@ def get_movie_keywords(imdb_id):
 | 
			
		||||
 | 
			
		||||
    return movie
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_api_keyword_count(keyword):
 | 
			
		||||
    ia = IMDb()
 | 
			
		||||
 | 
			
		||||
    count = len(ia.get_keyword(keyword))
 | 
			
		||||
 | 
			
		||||
    return count
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_website_keyword_count(keyword):
 | 
			
		||||
    try:
 | 
			
		||||
        page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword)
 | 
			
		||||
    except ConnectionError:
 | 
			
		||||
        raise
 | 
			
		||||
 | 
			
		||||
    soup = BeautifulSoup(page.content, 'html.parser')
 | 
			
		||||
    elements = soup.findAll("div", class_="desc")
 | 
			
		||||
 | 
			
		||||
    pagination_label = elements[0].text.replace("\n", "")
 | 
			
		||||
 | 
			
		||||
    pagination_label_reg = "(\d+,?\d*) titles"
 | 
			
		||||
    pattern_match = re.compile(pagination_label_reg).search(pagination_label)
 | 
			
		||||
 | 
			
		||||
    if pattern_match is not None:
 | 
			
		||||
        return int(pattern_match.group(1).replace(',', ''))
 | 
			
		||||
    else:
 | 
			
		||||
        return 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_keyword_count(keyword):
 | 
			
		||||
    count = get_api_keyword_count(keyword)
 | 
			
		||||
 | 
			
		||||
    if count == 50:
 | 
			
		||||
        try:
 | 
			
		||||
            count = get_website_keyword_count(keyword)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    return count
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user