Mercurial > hg > index.fcgi > www > www-1
diff myrss/myrss_parser.py @ 42:a1456ecd25b9
fix bug with threadpool; create WSGI app and test server; use logging
author | paulo |
---|---|
date | Mon, 04 Feb 2013 23:58:02 -0800 |
parents | 5f9bc02e9caf |
children |
line diff
1.1 --- a/myrss/myrss_parser.py Fri Feb 01 01:26:07 2013 -0800 1.2 +++ b/myrss/myrss_parser.py Mon Feb 04 23:58:02 2013 -0800 1.3 @@ -6,6 +6,8 @@ 1.4 import Queue 1.5 import datetime 1.6 import time 1.7 +import logging 1.8 +logging.basicConfig(level=logging.INFO) 1.9 1.10 import html 1.11 import xml.etree.ElementTree 1.12 @@ -78,7 +80,7 @@ 1.13 datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z") 1.14 page_title = "myrss -- %s" % datetime_str 1.15 1.16 - root = html.HTML() 1.17 + root = html.HTML("html") 1.18 1.19 header = root.header 1.20 header.title(page_title) 1.21 @@ -113,10 +115,10 @@ 1.22 ret = None 1.23 1.24 try: 1.25 - print >> sys.stderr, "--> processing %s" % url 1.26 + logging.info("processing %s" % url) 1.27 feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''})) 1.28 except urllib2.HTTPError as e: 1.29 - print >> sys.stderr, "--> (%s) %s" % (url, e) 1.30 + logging.info("(%s) %s" % (url, e)) 1.31 return ret 1.32 1.33 elementTree = xml.etree.ElementTree.parse(feed) 1.34 @@ -152,12 +154,11 @@ 1.35 try: 1.36 docfeed = _process_url(url) 1.37 except Exception as e: 1.38 - print >> sys.stderr, "--> (%s) exception: %s" % (url, e) 1.39 + logging.info("(%s) exception: %s" % (url, e)) 1.40 self._output_queue.put((idx, docfeed)) 1.41 - self._input_queue.task_done() 1.42 1.43 1.44 -def main(): 1.45 +def main(input_queue, output_queue): 1.46 ret = '' 1.47 1.48 epoch_now = time.time() 1.49 @@ -172,25 +173,16 @@ 1.50 feedlines = feeds_file.readlines() 1.51 1.52 docstruct = [None]*len(feedlines) 1.53 - iq = Queue.Queue(feedlines) 1.54 - oq = Queue.Queue(feedlines) 1.55 - 1.56 - for _ in range(MAX_THREADS): 1.57 - WorkerThread(input_queue=iq, output_queue=oq).start() 1.58 - 1.59 + num_input = 0 1.60 for (i, l) in enumerate(feedlines): 1.61 if l[0] != '#': 1.62 l = l.strip() 1.63 - iq.put((i, l)) 1.64 + input_queue.put((i, l)) 1.65 + num_input += 1 1.66 1.67 - iq.join() 1.68 - 1.69 - while True: 1.70 - try: 1.71 - (idx, docfeed) = oq.get_nowait() 1.72 - docstruct[idx] = docfeed 1.73 - except Queue.Empty: 1.74 - break 1.75 + for _ in range(num_input): 1.76 + (idx, docfeed) = output_queue.get() 1.77 + docstruct[idx] = docfeed 1.78 1.79 ret = _to_html(dtnow, docstruct) 1.80 1.81 @@ -200,6 +192,22 @@ 1.82 return ret 1.83 1.84 1.85 -if __name__ == "__main__": 1.86 - print main() 1.87 +class MyRssApp: 1.88 + def __init__(self): 1.89 + self._iq = Queue.Queue(MAX_THREADS) 1.90 + self._oq = Queue.Queue(MAX_THREADS) 1.91 1.92 + for _ in range(MAX_THREADS): 1.93 + WorkerThread(input_queue=self._iq, output_queue=self._oq).start() 1.94 + 1.95 + def __call__(self, environ, start_response): 1.96 + response_body = main(self._iq, self._oq) 1.97 + response_headers = [ 1.98 + ("Content-Type", "text/html"), 1.99 + ("Content-Length", str(len(response_body))), 1.100 + ] 1.101 + start_response("200 OK", response_headers) 1.102 + 1.103 + return [response_body] 1.104 + 1.105 +