diff myrss/myrss_parser.py @ 42:a1456ecd25b9

fix bug with threadpool; create WSGI app and test server; use logging
author paulo
date Mon, 04 Feb 2013 23:58:02 -0800
parents 5f9bc02e9caf
children
line diff
     1.1 --- a/myrss/myrss_parser.py	Fri Feb 01 01:26:07 2013 -0800
     1.2 +++ b/myrss/myrss_parser.py	Mon Feb 04 23:58:02 2013 -0800
     1.3 @@ -6,6 +6,8 @@
     1.4  import Queue
     1.5  import datetime
     1.6  import time
     1.7 +import logging
     1.8 +logging.basicConfig(level=logging.INFO)
     1.9  
    1.10  import html
    1.11  import xml.etree.ElementTree 
    1.12 @@ -78,7 +80,7 @@
    1.13  	datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z")
    1.14  	page_title = "myrss -- %s" % datetime_str
    1.15  
    1.16 -	root = html.HTML()
    1.17 +	root = html.HTML("html")
    1.18  
    1.19  	header = root.header
    1.20  	header.title(page_title)
    1.21 @@ -113,10 +115,10 @@
    1.22  	ret = None
    1.23  
    1.24  	try:
    1.25 -		print >> sys.stderr, "--> processing %s" % url
    1.26 +		logging.info("processing %s" % url)
    1.27  		feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''}))
    1.28  	except urllib2.HTTPError as e:
    1.29 -		print >> sys.stderr, "--> (%s) %s" % (url, e)
    1.30 +		logging.info("(%s) %s" % (url, e))
    1.31  		return ret
    1.32  
    1.33  	elementTree = xml.etree.ElementTree.parse(feed)
    1.34 @@ -152,12 +154,11 @@
    1.35  			try:
    1.36  				docfeed = _process_url(url)
    1.37  			except Exception as e:
    1.38 -				print >> sys.stderr, "--> (%s) exception: %s" % (url, e)
    1.39 +				logging.info("(%s) exception: %s" % (url, e))
    1.40  			self._output_queue.put((idx, docfeed))
    1.41 -			self._input_queue.task_done()
    1.42  			
    1.43  
    1.44 -def main():
    1.45 +def main(input_queue, output_queue):
    1.46  	ret = ''
    1.47  
    1.48  	epoch_now = time.time()
    1.49 @@ -172,25 +173,16 @@
    1.50  			feedlines = feeds_file.readlines()
    1.51  
    1.52  		docstruct = [None]*len(feedlines)
    1.53 -		iq = Queue.Queue(feedlines)
    1.54 -		oq = Queue.Queue(feedlines)
    1.55 -
    1.56 -		for _ in range(MAX_THREADS):
    1.57 -			WorkerThread(input_queue=iq, output_queue=oq).start()
    1.58 -
    1.59 +		num_input = 0
    1.60  		for (i, l) in enumerate(feedlines):
    1.61  			if l[0] != '#':
    1.62  				l = l.strip()
    1.63 -				iq.put((i, l))
    1.64 +				input_queue.put((i, l))
    1.65 +				num_input += 1
    1.66  
    1.67 -		iq.join()
    1.68 -
    1.69 -		while True:
    1.70 -			try:
    1.71 -				(idx, docfeed) = oq.get_nowait()
    1.72 -				docstruct[idx] = docfeed
    1.73 -			except Queue.Empty:
    1.74 -				break
    1.75 +		for _ in range(num_input):
    1.76 +			(idx, docfeed) = output_queue.get()
    1.77 +			docstruct[idx] = docfeed
    1.78  
    1.79  		ret = _to_html(dtnow, docstruct)
    1.80  
    1.81 @@ -200,6 +192,22 @@
    1.82  	return ret
    1.83  
    1.84  
    1.85 -if __name__ == "__main__":
    1.86 -	print main()
    1.87 +class MyRssApp:
    1.88 +	def __init__(self):
    1.89 +		self._iq = Queue.Queue(MAX_THREADS)
    1.90 +		self._oq = Queue.Queue(MAX_THREADS)
    1.91  
    1.92 +		for _ in range(MAX_THREADS):
    1.93 +			WorkerThread(input_queue=self._iq, output_queue=self._oq).start()
    1.94 +
    1.95 +	def __call__(self, environ, start_response):
    1.96 +		response_body = main(self._iq, self._oq)
    1.97 +		response_headers = [
    1.98 +			("Content-Type", "text/html"),
    1.99 +			("Content-Length", str(len(response_body))),
   1.100 +		]
   1.101 +		start_response("200 OK", response_headers)
   1.102 +
   1.103 +		return [response_body]
   1.104 +
   1.105 +