40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
|
import csv
|
||
|
import logging
|
||
|
import xml.etree.ElementTree as et
|
||
|
from datetime import datetime
|
||
|
from grabber import Grabber
|
||
|
from requests.exceptions import HTTPError
|
||
|
|
||
|
|
||
|
class BBCBusiness(Grabber):
|
||
|
articles = []
|
||
|
feed_url = "http://feeds.bbci.co.uk/news/business/rss.xml"
|
||
|
|
||
|
def grab(self):
|
||
|
try:
|
||
|
feed = et.fromstring(self.request())
|
||
|
self.process(feed)
|
||
|
except (HTTPError, et.ParseError):
|
||
|
logging.error(f"Unable to get updated news from {self.__class__.__name__}.")
|
||
|
|
||
|
def process(self, feed):
|
||
|
for item in feed.iter("item"):
|
||
|
article = self.parse(item)
|
||
|
if article not in self.articles:
|
||
|
self.articles.append(article)
|
||
|
logging.info(f"New article from {self.__class__.__name__} at {datetime.fromtimestamp(article[0])}")
|
||
|
|
||
|
def parse(self, item):
|
||
|
article = None
|
||
|
try:
|
||
|
date = item.find("pubDate").text
|
||
|
# Fri, 17 Jan 2020 19:09:40 GMT
|
||
|
timestamp = int(datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %Z").timestamp())
|
||
|
title = item.find("title").text
|
||
|
description = item.find("description").text
|
||
|
article = (timestamp, title, description)
|
||
|
except AttributeError:
|
||
|
logging.error(f"Received non-parsable news article from {self.__class__.__name__}.")
|
||
|
finally:
|
||
|
return article
|