www/www-1: myrss/myrss_parser.py comparison

comparison myrss/myrss_parser.py @ 41:5f9bc02e9caf

add datetimestamp and caching

author	paulo
date	Fri, 01 Feb 2013 01:26:07 -0800
parents	62464a0034d1
children	a1456ecd25b9

comparison

equal deleted inserted replaced

-:492d8d321f43
+:226e506e9c82
 import sys
 import re
 import urllib2
 import threading
 import Queue
+import datetime
+import time
 import html
 import xml.etree.ElementTree
+FEEDS_FILE = "FEEDS"
+CACHE_HTML_FILE = "__cache__.html"
+#CACHE_LIFE = 1200 # [seconds]
+CACHE_LIFE = 30 # [seconds]
 MAX_ITEMS = 30
 MAX_LINK_Z = 4
 MAX_THREADS = 20
 		items.append((it_title, it_link))
 	return (title, link, items)
-def _to_html(docstruct):
+def _to_html(dtnow, docstruct):
+	datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z")
+	page_title = "myrss -- %s" % datetime_str
 	root = html.HTML()
 	header = root.header
-	header.title("myrss")
+	header.title(page_title)
 	header.link(rel="stylesheet", type="text/css", href="index.css")
+	body = root.body
+	body.h1(page_title)
 	link_z = 0
 	for feed in docstruct:
 		if feed is None:
 			continue
 		(title, link, items) = feed
-		root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
+		body.h2.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
 		link_z += 1
-		p = root.p
+		p = body.p
 		for (i, (it_title, it_link)) in enumerate(items):
 			if i > 0:
 				p += " - "
 def _process_url(url):
 	ret = None
 	try:
 		print >> sys.stderr, "--> processing %s" % url
-		feed = urllib2.urlopen(url)
+		feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''}))
 	except urllib2.HTTPError as e:
 		print >> sys.stderr, "--> (%s) %s" % (url, e)
 		return ret
 	elementTree = xml.etree.ElementTree.parse(feed)
 				print >> sys.stderr, "--> (%s) exception: %s" % (url, e)
 			self._output_queue.put((idx, docfeed))
 			self._input_queue.task_done()
+def main():
+	ret = ''
+	epoch_now = time.time()
+	dtnow = datetime.datetime.fromtimestamp(epoch_now)
+	if os.path.exists(CACHE_HTML_FILE) and (epoch_now - os.stat(CACHE_HTML_FILE).st_mtime) < float(CACHE_LIFE):
+		with open(CACHE_HTML_FILE) as cache_html_file:
+			ret = cache_html_file.read()
+	else:
+		with open(FEEDS_FILE) as feeds_file:
+			feedlines = feeds_file.readlines()
+		docstruct = [None]*len(feedlines)
+		iq = Queue.Queue(feedlines)
+		oq = Queue.Queue(feedlines)
+		for _ in range(MAX_THREADS):
+			WorkerThread(input_queue=iq, output_queue=oq).start()
+		for (i, l) in enumerate(feedlines):
+			if l[0] != '#':
+				l = l.strip()
+				iq.put((i, l))
+		iq.join()
+		while True:
+			try:
+				(idx, docfeed) = oq.get_nowait()
+				docstruct[idx] = docfeed
+			except Queue.Empty:
+				break
+		ret = _to_html(dtnow, docstruct)
+		with open(CACHE_HTML_FILE, 'w') as cache_html_file:
+			cache_html_file.write(ret)
+	return ret
 if __name__ == "__main__":
-	with open("FEEDS") as feeds_file:
+	print main()
-		feedlines = feeds_file.readlines()
-	docstruct = [None]*len(feedlines)
-	iq = Queue.Queue(feedlines)
-	oq = Queue.Queue(feedlines)
-	for _ in range(MAX_THREADS):
-		WorkerThread(input_queue=iq, output_queue=oq).start()
-	for (i, l) in enumerate(feedlines):
-		if l[0] != '#':
-			l = l.strip()
-			iq.put((i, l))
-	iq.join()
-	while True:
-		try:
-			(idx, docfeed) = oq.get_nowait()
-			docstruct[idx] = docfeed
-		except Queue.Empty:
-			break
-	print _to_html(docstruct)

Mercurial > hg > index.fcgi > www > www-1

comparison myrss/myrss_parser.py @ 41:5f9bc02e9caf