# HG changeset patch # User paulo # Date 1533188002 25200 # Node ID e2817e78989546f114c41d2aef8f9f0063b284ac # Parent 9bd400576469599fb641c5ad0516b7cc45ac99c8 myrss: update FEEDS, and add filter_feed() function to remove bad character from the new feed diff -r 9bd400576469 -r e2817e789895 myrss/FEEDS --- a/myrss/FEEDS Mon Jul 09 00:50:35 2018 -0600 +++ b/myrss/FEEDS Wed Aug 01 22:33:22 2018 -0700 @@ -35,4 +35,5 @@ http://feeds.kottke.org/main http://feeds.feedburner.com/shorpy?q=rss.xml http://feeds.feedburner.com/codinghorror +https://danielmiessler.com/feed/ http://syndication.thedailywtf.com/TheDailyWtf diff -r 9bd400576469 -r e2817e789895 myrss/myrss_app.py --- a/myrss/myrss_app.py Mon Jul 09 00:50:35 2018 -0600 +++ b/myrss/myrss_app.py Wed Aug 01 22:33:22 2018 -0700 @@ -7,6 +7,7 @@ import datetime import time import traceback +import StringIO import logging #logging.basicConfig( @@ -168,13 +169,24 @@ logging.info("(%s) %s" % (url, e)) return None - return feed + return feed.read() + + +def _filter_feed(feed): + ret = feed + + filter_out = ["\x16"] + for i in filter_out: + ret = ret.replace(i, "") + + return ret def _process_feed(feed): ret = None - elementTree = xml.etree.ElementTree.parse(feed) + feed_sio = StringIO.StringIO(feed) + elementTree = xml.etree.ElementTree.parse(feed_sio) root = elementTree.getroot() parsed_root_tag = _parse_root_tag(root.tag) @@ -209,7 +221,7 @@ try: feed = _fetch_url(url) if feed is not None: - docfeed = _process_feed(feed) + docfeed = _process_feed(_filter_feed(feed)) except Exception as e: logging.info("(%s) exception: (%s) %s" % (url, type(e), e)) self._output_queue.put((idx, docfeed)) diff -r 9bd400576469 -r e2817e789895 myrss/myrss_test_feed.py --- a/myrss/myrss_test_feed.py Mon Jul 09 00:50:35 2018 -0600 +++ b/myrss/myrss_test_feed.py Wed Aug 01 22:33:22 2018 -0700 @@ -3,7 +3,9 @@ import myrss_app -x = myrss_app._process_feed(open("yahoo_blogs.rss.xml")) +feed = open("testfeed.xml").read() +filtered_feed = myrss_app._filter_feed(feed) +x = myrss_app._process_feed(filtered_feed) y = myrss_app._to_html(datetime.datetime.now(), [x]) print y