vcinema/imdb_utils/IMDbUtils.py

from imdb import IMDb
import requests
from bs4 import BeautifulSoup
import re


def get_movie(imdb_id):
    ia = IMDb()

    movie = ia.get_movie(imdb_id)

    return movie


def get_movie_keywords(imdb_id):
    ia = IMDb()

    movie = ia.get_movie(imdb_id, info="keywords")

    return movie


def get_api_keyword_count(keyword):
    ia = IMDb()

    count = len(ia.get_keyword(keyword))

    return count


def get_website_keyword_count(keyword):
    try:
        page = requests.get("https://www.imdb.com/search/keyword/?keywords=" + keyword)
    except ConnectionError:
        raise

    soup = BeautifulSoup(page.content, 'html.parser')
    elements = soup.findAll("div", class_="desc")

    pagination_label = elements[0].text.replace("\n", "")

    pagination_label_reg = "(\d+,?\d*) titles"
    pattern_match = re.compile(pagination_label_reg).search(pagination_label)

    if pattern_match is not None:
        return int(pattern_match.group(1).replace(',', ''))
    else:
        return 1


def get_keyword_count(keyword):
    count = get_api_keyword_count(keyword)

    if count == 50:
        try:
            count = get_website_keyword_count(keyword)
        except Exception as e:
            raise

    return count