Mercurial > hg > index.fcgi > www > www-1
comparison myrss/myrss_app.py @ 101:2b010bd970c2
deer_crash: correction: it was actually more than *three* years ago
author | paulo |
---|---|
date | Wed, 29 Aug 2018 23:28:09 -0700 |
parents | 9bd400576469 |
children | 083e09bb913a |
comparison
equal
deleted
inserted
replaced
13:7cf39f3fd427 | 14:6a023f65511a |
---|---|
5 import threading | 5 import threading |
6 import Queue | 6 import Queue |
7 import datetime | 7 import datetime |
8 import time | 8 import time |
9 import traceback | 9 import traceback |
10 import StringIO | |
10 | 11 |
11 import logging | 12 import logging |
12 #logging.basicConfig( | 13 #logging.basicConfig( |
13 # level=logging.DEBUG, | 14 # level=logging.DEBUG, |
14 # filename="_LOG", | 15 # filename="_LOG", |
166 feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) | 167 feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) |
167 except urllib2.HTTPError as e: | 168 except urllib2.HTTPError as e: |
168 logging.info("(%s) %s" % (url, e)) | 169 logging.info("(%s) %s" % (url, e)) |
169 return None | 170 return None |
170 | 171 |
171 return feed | 172 return feed.read() |
173 | |
174 | |
175 def _filter_feed(feed): | |
176 ret = feed | |
177 | |
178 filter_out = ["\x16"] | |
179 for i in filter_out: | |
180 ret = ret.replace(i, "") | |
181 | |
182 return ret | |
172 | 183 |
173 | 184 |
174 def _process_feed(feed): | 185 def _process_feed(feed): |
175 ret = None | 186 ret = None |
176 | 187 |
177 elementTree = xml.etree.ElementTree.parse(feed) | 188 feed_sio = StringIO.StringIO(feed) |
189 elementTree = xml.etree.ElementTree.parse(feed_sio) | |
178 root = elementTree.getroot() | 190 root = elementTree.getroot() |
179 | 191 |
180 parsed_root_tag = _parse_root_tag(root.tag) | 192 parsed_root_tag = _parse_root_tag(root.tag) |
181 | 193 |
182 if parsed_root_tag == (None, "rss"): | 194 if parsed_root_tag == (None, "rss"): |
207 (idx, url) = self._input_queue.get() | 219 (idx, url) = self._input_queue.get() |
208 docfeed = None | 220 docfeed = None |
209 try: | 221 try: |
210 feed = _fetch_url(url) | 222 feed = _fetch_url(url) |
211 if feed is not None: | 223 if feed is not None: |
212 docfeed = _process_feed(feed) | 224 docfeed = _process_feed(_filter_feed(feed)) |
213 except Exception as e: | 225 except Exception as e: |
214 logging.info("(%s) exception: (%s) %s" % (url, type(e), e)) | 226 logging.info("(%s) exception: (%s) %s" % (url, type(e), e)) |
215 self._output_queue.put((idx, docfeed)) | 227 self._output_queue.put((idx, docfeed)) |
216 | 228 |
217 | 229 |