Mercurial > hg > index.fcgi > www > www-1
comparison myrss/myrss_parser.py @ 42:a1456ecd25b9
fix bug with threadpool; create WSGI app and test server; use logging
author | paulo |
---|---|
date | Mon, 04 Feb 2013 23:58:02 -0800 |
parents | 5f9bc02e9caf |
children |
comparison
equal
deleted
inserted
replaced
2:226e506e9c82 | 3:a5668d7bdf5b |
---|---|
4 import urllib2 | 4 import urllib2 |
5 import threading | 5 import threading |
6 import Queue | 6 import Queue |
7 import datetime | 7 import datetime |
8 import time | 8 import time |
9 import logging | |
10 logging.basicConfig(level=logging.INFO) | |
9 | 11 |
10 import html | 12 import html |
11 import xml.etree.ElementTree | 13 import xml.etree.ElementTree |
12 | 14 |
13 | 15 |
76 | 78 |
77 def _to_html(dtnow, docstruct): | 79 def _to_html(dtnow, docstruct): |
78 datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z") | 80 datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z") |
79 page_title = "myrss -- %s" % datetime_str | 81 page_title = "myrss -- %s" % datetime_str |
80 | 82 |
81 root = html.HTML() | 83 root = html.HTML("html") |
82 | 84 |
83 header = root.header | 85 header = root.header |
84 header.title(page_title) | 86 header.title(page_title) |
85 header.link(rel="stylesheet", type="text/css", href="index.css") | 87 header.link(rel="stylesheet", type="text/css", href="index.css") |
86 | 88 |
111 | 113 |
112 def _process_url(url): | 114 def _process_url(url): |
113 ret = None | 115 ret = None |
114 | 116 |
115 try: | 117 try: |
116 print >> sys.stderr, "--> processing %s" % url | 118 logging.info("processing %s" % url) |
117 feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''})) | 119 feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''})) |
118 except urllib2.HTTPError as e: | 120 except urllib2.HTTPError as e: |
119 print >> sys.stderr, "--> (%s) %s" % (url, e) | 121 logging.info("(%s) %s" % (url, e)) |
120 return ret | 122 return ret |
121 | 123 |
122 elementTree = xml.etree.ElementTree.parse(feed) | 124 elementTree = xml.etree.ElementTree.parse(feed) |
123 root = elementTree.getroot() | 125 root = elementTree.getroot() |
124 | 126 |
150 (idx, url) = self._input_queue.get() | 152 (idx, url) = self._input_queue.get() |
151 docfeed = None | 153 docfeed = None |
152 try: | 154 try: |
153 docfeed = _process_url(url) | 155 docfeed = _process_url(url) |
154 except Exception as e: | 156 except Exception as e: |
155 print >> sys.stderr, "--> (%s) exception: %s" % (url, e) | 157 logging.info("(%s) exception: %s" % (url, e)) |
156 self._output_queue.put((idx, docfeed)) | 158 self._output_queue.put((idx, docfeed)) |
157 self._input_queue.task_done() | |
158 | 159 |
159 | 160 |
160 def main(): | 161 def main(input_queue, output_queue): |
161 ret = '' | 162 ret = '' |
162 | 163 |
163 epoch_now = time.time() | 164 epoch_now = time.time() |
164 dtnow = datetime.datetime.fromtimestamp(epoch_now) | 165 dtnow = datetime.datetime.fromtimestamp(epoch_now) |
165 | 166 |
170 else: | 171 else: |
171 with open(FEEDS_FILE) as feeds_file: | 172 with open(FEEDS_FILE) as feeds_file: |
172 feedlines = feeds_file.readlines() | 173 feedlines = feeds_file.readlines() |
173 | 174 |
174 docstruct = [None]*len(feedlines) | 175 docstruct = [None]*len(feedlines) |
175 iq = Queue.Queue(feedlines) | 176 num_input = 0 |
176 oq = Queue.Queue(feedlines) | |
177 | |
178 for _ in range(MAX_THREADS): | |
179 WorkerThread(input_queue=iq, output_queue=oq).start() | |
180 | |
181 for (i, l) in enumerate(feedlines): | 177 for (i, l) in enumerate(feedlines): |
182 if l[0] != '#': | 178 if l[0] != '#': |
183 l = l.strip() | 179 l = l.strip() |
184 iq.put((i, l)) | 180 input_queue.put((i, l)) |
185 | 181 num_input += 1 |
186 iq.join() | 182 |
187 | 183 for _ in range(num_input): |
188 while True: | 184 (idx, docfeed) = output_queue.get() |
189 try: | 185 docstruct[idx] = docfeed |
190 (idx, docfeed) = oq.get_nowait() | |
191 docstruct[idx] = docfeed | |
192 except Queue.Empty: | |
193 break | |
194 | 186 |
195 ret = _to_html(dtnow, docstruct) | 187 ret = _to_html(dtnow, docstruct) |
196 | 188 |
197 with open(CACHE_HTML_FILE, 'w') as cache_html_file: | 189 with open(CACHE_HTML_FILE, 'w') as cache_html_file: |
198 cache_html_file.write(ret) | 190 cache_html_file.write(ret) |
199 | 191 |
200 return ret | 192 return ret |
201 | 193 |
202 | 194 |
203 if __name__ == "__main__": | 195 class MyRssApp: |
204 print main() | 196 def __init__(self): |
205 | 197 self._iq = Queue.Queue(MAX_THREADS) |
198 self._oq = Queue.Queue(MAX_THREADS) | |
199 | |
200 for _ in range(MAX_THREADS): | |
201 WorkerThread(input_queue=self._iq, output_queue=self._oq).start() | |
202 | |
203 def __call__(self, environ, start_response): | |
204 response_body = main(self._iq, self._oq) | |
205 response_headers = [ | |
206 ("Content-Type", "text/html"), | |
207 ("Content-Length", str(len(response_body))), | |
208 ] | |
209 start_response("200 OK", response_headers) | |
210 | |
211 return [response_body] | |
212 | |
213 |