# HG changeset patch # User paulo # Date 1360051082 28800 # Node ID a1456ecd25b98c63a616c01ac868a66d0ebc443c # Parent 5f9bc02e9cafdda4b0a31b33693d5af5f5111a31 fix bug with threadpool; create WSGI app and test server; use logging diff -r 5f9bc02e9caf -r a1456ecd25b9 myrss/myrss_parser.py --- a/myrss/myrss_parser.py Fri Feb 01 01:26:07 2013 -0800 +++ b/myrss/myrss_parser.py Mon Feb 04 23:58:02 2013 -0800 @@ -6,6 +6,8 @@ import Queue import datetime import time +import logging +logging.basicConfig(level=logging.INFO) import html import xml.etree.ElementTree @@ -78,7 +80,7 @@ datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z") page_title = "myrss -- %s" % datetime_str - root = html.HTML() + root = html.HTML("html") header = root.header header.title(page_title) @@ -113,10 +115,10 @@ ret = None try: - print >> sys.stderr, "--> processing %s" % url + logging.info("processing %s" % url) feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''})) except urllib2.HTTPError as e: - print >> sys.stderr, "--> (%s) %s" % (url, e) + logging.info("(%s) %s" % (url, e)) return ret elementTree = xml.etree.ElementTree.parse(feed) @@ -152,12 +154,11 @@ try: docfeed = _process_url(url) except Exception as e: - print >> sys.stderr, "--> (%s) exception: %s" % (url, e) + logging.info("(%s) exception: %s" % (url, e)) self._output_queue.put((idx, docfeed)) - self._input_queue.task_done() -def main(): +def main(input_queue, output_queue): ret = '' epoch_now = time.time() @@ -172,25 +173,16 @@ feedlines = feeds_file.readlines() docstruct = [None]*len(feedlines) - iq = Queue.Queue(feedlines) - oq = Queue.Queue(feedlines) - - for _ in range(MAX_THREADS): - WorkerThread(input_queue=iq, output_queue=oq).start() - + num_input = 0 for (i, l) in enumerate(feedlines): if l[0] != '#': l = l.strip() - iq.put((i, l)) + input_queue.put((i, l)) + num_input += 1 - iq.join() - - while True: - try: - (idx, docfeed) = oq.get_nowait() - docstruct[idx] = docfeed - except Queue.Empty: - break + for _ in range(num_input): + (idx, docfeed) = output_queue.get() + docstruct[idx] = docfeed ret = _to_html(dtnow, docstruct) @@ -200,6 +192,22 @@ return ret -if __name__ == "__main__": - print main() +class MyRssApp: + def __init__(self): + self._iq = Queue.Queue(MAX_THREADS) + self._oq = Queue.Queue(MAX_THREADS) + for _ in range(MAX_THREADS): + WorkerThread(input_queue=self._iq, output_queue=self._oq).start() + + def __call__(self, environ, start_response): + response_body = main(self._iq, self._oq) + response_headers = [ + ("Content-Type", "text/html"), + ("Content-Length", str(len(response_body))), + ] + start_response("200 OK", response_headers) + + return [response_body] + + diff -r 5f9bc02e9caf -r a1456ecd25b9 myrss/myrss_test_server.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/myrss/myrss_test_server.py Mon Feb 04 23:58:02 2013 -0800 @@ -0,0 +1,14 @@ +import wsgiref.simple_server +import SocketServer + +import myrss_parser + + +class ThreadingWSGIServer(SocketServer.ThreadingMixIn, wsgiref.simple_server.WSGIServer): + pass + + +if __name__ == "__main__": + httpd = ThreadingWSGIServer(('', 8000), wsgiref.simple_server.WSGIRequestHandler) + httpd.set_app(myrss_parser.MyRssApp()) + httpd.serve_forever()