from imdb import IMDb import requests from bs4 import BeautifulSoup import re def get_movie(imdb_id): ia = IMDb() movie = ia.get_movie(imdb_id) return movie def get_movie_keywords(imdb_id): ia = IMDb() movie = ia.get_movie(imdb_id, info="keywords") return movie def get_api_keyword_count(keyword): ia = IMDb() count = len(ia.get_keyword(keyword)) return count def get_website_keyword_count(keyword): try: page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword) except ConnectionError: raise soup = BeautifulSoup(page.content, 'html.parser') elements = soup.findAll("div", class_="desc") pagination_label = elements[0].text.replace("\n", "") pagination_label_reg = "(\d+,?\d*) titles" pattern_match = re.compile(pagination_label_reg).search(pagination_label) if pattern_match is not None: return int(pattern_match.group(1).replace(',', '')) else: return 1 def get_keyword_count(keyword): count = get_api_keyword_count(keyword) if count == 50: try: count = get_website_keyword_count(keyword) except Exception as e: raise return count