changeset 98:e2817e789895

myrss: update FEEDS, and add filter_feed() function to remove bad character from the new feed
author paulo
date Wed, 01 Aug 2018 22:33:22 -0700
parents 9bd400576469
children fcbc8635abf8
files myrss/FEEDS myrss/myrss_app.py myrss/myrss_test_feed.py
diffstat 3 files changed, 19 insertions(+), 4 deletions(-) [+]
line diff
     1.1 --- a/myrss/FEEDS	Mon Jul 09 00:50:35 2018 -0600
     1.2 +++ b/myrss/FEEDS	Wed Aug 01 22:33:22 2018 -0700
     1.3 @@ -35,4 +35,5 @@
     1.4  http://feeds.kottke.org/main
     1.5  http://feeds.feedburner.com/shorpy?q=rss.xml
     1.6  http://feeds.feedburner.com/codinghorror
     1.7 +https://danielmiessler.com/feed/
     1.8  http://syndication.thedailywtf.com/TheDailyWtf
     2.1 --- a/myrss/myrss_app.py	Mon Jul 09 00:50:35 2018 -0600
     2.2 +++ b/myrss/myrss_app.py	Wed Aug 01 22:33:22 2018 -0700
     2.3 @@ -7,6 +7,7 @@
     2.4  import datetime
     2.5  import time
     2.6  import traceback
     2.7 +import StringIO
     2.8  
     2.9  import logging
    2.10  #logging.basicConfig(
    2.11 @@ -168,13 +169,24 @@
    2.12  		logging.info("(%s) %s" % (url, e))
    2.13  		return None
    2.14  
    2.15 -	return feed
    2.16 +	return feed.read()
    2.17 +
    2.18 +
    2.19 +def _filter_feed(feed):
    2.20 +	ret = feed
    2.21 +
    2.22 +	filter_out = ["\x16"]
    2.23 +	for i in filter_out:
    2.24 +		ret = ret.replace(i, "")
    2.25 +
    2.26 +	return ret
    2.27  
    2.28  
    2.29  def _process_feed(feed):
    2.30  	ret = None
    2.31  
    2.32 -	elementTree = xml.etree.ElementTree.parse(feed)
    2.33 +	feed_sio = StringIO.StringIO(feed)
    2.34 +	elementTree = xml.etree.ElementTree.parse(feed_sio)
    2.35  	root = elementTree.getroot()
    2.36  
    2.37  	parsed_root_tag = _parse_root_tag(root.tag) 
    2.38 @@ -209,7 +221,7 @@
    2.39  			try:
    2.40  				feed = _fetch_url(url)
    2.41  				if feed is not None:
    2.42 -					docfeed = _process_feed(feed)
    2.43 +					docfeed = _process_feed(_filter_feed(feed))
    2.44  			except Exception as e:
    2.45  				logging.info("(%s) exception: (%s) %s" % (url, type(e), e))
    2.46  			self._output_queue.put((idx, docfeed))
     3.1 --- a/myrss/myrss_test_feed.py	Mon Jul 09 00:50:35 2018 -0600
     3.2 +++ b/myrss/myrss_test_feed.py	Wed Aug 01 22:33:22 2018 -0700
     3.3 @@ -3,7 +3,9 @@
     3.4  import myrss_app
     3.5  
     3.6  
     3.7 -x = myrss_app._process_feed(open("yahoo_blogs.rss.xml"))
     3.8 +feed = open("testfeed.xml").read()
     3.9 +filtered_feed = myrss_app._filter_feed(feed)
    3.10 +x = myrss_app._process_feed(filtered_feed)
    3.11  y = myrss_app._to_html(datetime.datetime.now(), [x])
    3.12  
    3.13  print y