From c92e82b23e00a86ef714ca365caa530c531cc398 Mon Sep 17 00:00:00 2001 From: psy <psy@darmstadt.ccc.de> Date: Mon, 16 Oct 2023 19:09:55 +0200 Subject: [PATCH] do http calls with requests --- rssbot.py | 77 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/rssbot.py b/rssbot.py index 9cc9647..9ff1824 100755 --- a/rssbot.py +++ b/rssbot.py @@ -4,6 +4,7 @@ import argparse import time import traceback import feedparser +import requests from threading import Thread, Condition from matrix_client.errors import MatrixRequestError from matrix_client.client import MatrixClient @@ -149,44 +150,54 @@ Invite me to any room on {homeserver} and use the custom state event <code>de.ev if url in feeds] def _fetch_feed(self, url): - # FIXME: one site with slow response times can block all feeds print('Fetching updates from {}'.format(url)) try: - feed = feedparser.parse(url) - feed_title = feed.feed.title - to_be_sent = [] - any_knowns = False - for entry in feed.entries: - guid = entry.id - if guid not in self._known_guids: - self._known_guids.add(guid) - to_be_sent.append(entry) - else: - any_knowns = True + response = requests.get(url, timeout=10) + except requests.RequestException as e: + print(f'error fetching feed {url}: {e}') + return + + if response.status_code != 200: + print(f'error fetching feed {url}, got status {response.status_code}.') + return - if not to_be_sent: - return + feed = feedparser.parse(response.text) - self.client.api.set_account_data( - self.client.user_id, - ACCOUNT_DATA_TYPE, - {'known_guids': list(self._known_guids)} - ) + if feed.bozo: + print(f'error parsing feed {url}: {feed.bozo_exception}.') + return + + feed_title = feed.feed.title + to_be_sent = [] + any_knowns = False + for entry in feed.entries: + guid = entry.id + if guid not in self._known_guids: + self._known_guids.add(guid) + to_be_sent.append(entry) + else: + any_knowns = True + + if not to_be_sent: + return + + self.client.api.set_account_data( + self.client.user_id, + ACCOUNT_DATA_TYPE, + {'known_guids': list(self._known_guids)} + ) + + if not any_knowns: + return - if not any_knowns: - return - - for entry in reversed(to_be_sent): - html = '[<a href="{}">{}</a>] {}'\ - .format(entry.link, feed_title, entry.title) - raw = '[{}][{}] {}'\ - .format(feed_title, entry.link, entry.title) - print(raw) - for room in self.get_rooms_for_feed(url): - room.send_html(html, raw, 'm.notice') - except Exception: - print('Failed to parse feed {}: {}' - .format(url, traceback.format_exc())) + for entry in reversed(to_be_sent): + html = '[<a href="{}">{}</a>] {}'\ + .format(entry.link, feed_title, entry.title) + raw = '[{}][{}] {}'\ + .format(feed_title, entry.link, entry.title) + print(raw) + for room in self.get_rooms_for_feed(url): + room.send_html(html, raw, 'm.notice') def run(self): self._fetch_thread.start() -- GitLab