import csv import logging import xml.etree.ElementTree as et from datetime import datetime from grabber import Grabber from requests.exceptions import HTTPError class BBCBusiness(Grabber): articles = [] feed_url = "http://feeds.bbci.co.uk/news/business/rss.xml" def grab(self): try: feed = et.fromstring(self.request()) self.process(feed) except (HTTPError, et.ParseError): logging.error(f"Unable to get updated news from {self.__class__.__name__}.") def process(self, feed): for item in feed.iter("item"): article = self.parse(item) if article not in self.articles: self.articles.append(article) logging.info(f"New article from {self.__class__.__name__} at {datetime.fromtimestamp(article[0])}") def parse(self, item): article = None try: date = item.find("pubDate").text # Fri, 17 Jan 2020 19:09:40 GMT timestamp = int(datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %Z").timestamp()) title = item.find("title").text description = item.find("description").text article = (timestamp, title, description) except AttributeError: logging.error(f"Received non-parsable news article from {self.__class__.__name__}.") finally: return article