diff myrss/myrss_app.py @ 98:e2817e789895

myrss: update FEEDS, and add filter_feed() function to remove bad character from the new feed
author paulo
date Wed, 01 Aug 2018 22:33:22 -0700
parents 9bd400576469
children 083e09bb913a
line diff
     1.1 --- a/myrss/myrss_app.py	Mon Jul 09 00:50:35 2018 -0600
     1.2 +++ b/myrss/myrss_app.py	Wed Aug 01 22:33:22 2018 -0700
     1.3 @@ -7,6 +7,7 @@
     1.4  import datetime
     1.5  import time
     1.6  import traceback
     1.7 +import StringIO
     1.8  
     1.9  import logging
    1.10  #logging.basicConfig(
    1.11 @@ -168,13 +169,24 @@
    1.12  		logging.info("(%s) %s" % (url, e))
    1.13  		return None
    1.14  
    1.15 -	return feed
    1.16 +	return feed.read()
    1.17 +
    1.18 +
    1.19 +def _filter_feed(feed):
    1.20 +	ret = feed
    1.21 +
    1.22 +	filter_out = ["\x16"]
    1.23 +	for i in filter_out:
    1.24 +		ret = ret.replace(i, "")
    1.25 +
    1.26 +	return ret
    1.27  
    1.28  
    1.29  def _process_feed(feed):
    1.30  	ret = None
    1.31  
    1.32 -	elementTree = xml.etree.ElementTree.parse(feed)
    1.33 +	feed_sio = StringIO.StringIO(feed)
    1.34 +	elementTree = xml.etree.ElementTree.parse(feed_sio)
    1.35  	root = elementTree.getroot()
    1.36  
    1.37  	parsed_root_tag = _parse_root_tag(root.tag) 
    1.38 @@ -209,7 +221,7 @@
    1.39  			try:
    1.40  				feed = _fetch_url(url)
    1.41  				if feed is not None:
    1.42 -					docfeed = _process_feed(feed)
    1.43 +					docfeed = _process_feed(_filter_feed(feed))
    1.44  			except Exception as e:
    1.45  				logging.info("(%s) exception: (%s) %s" % (url, type(e), e))
    1.46  			self._output_queue.put((idx, docfeed))