# HG changeset patch
# User paulo
# Date 1359710767 28800
# Node ID 5f9bc02e9cafdda4b0a31b33693d5af5f5111a31
# Parent  62464a0034d148907fc74fe57c4ecad864aa881d
add datetimestamp and caching

diff -r 62464a0034d1 -r 5f9bc02e9caf myrss/myrss_parser.py
--- a/myrss/myrss_parser.py	Thu Jan 31 02:19:39 2013 -0800
+++ b/myrss/myrss_parser.py	Fri Feb 01 01:26:07 2013 -0800
@@ -4,11 +4,18 @@
 import urllib2
 import threading
 import Queue
+import datetime
+import time
 
 import html
 import xml.etree.ElementTree 
 
 
+FEEDS_FILE = "FEEDS"
+CACHE_HTML_FILE = "__cache__.html"
+
+#CACHE_LIFE = 1200 # [seconds]
+CACHE_LIFE = 30 # [seconds]
 MAX_ITEMS = 30
 MAX_LINK_Z = 4
 MAX_THREADS = 20
@@ -67,13 +74,19 @@
 	return (title, link, items)
 
 
-def _to_html(docstruct):
+def _to_html(dtnow, docstruct):
+	datetime_str = dtnow.strftime("%Y-%m-%d %H:%M %Z")
+	page_title = "myrss -- %s" % datetime_str
+
 	root = html.HTML()
 
 	header = root.header
-	header.title("myrss")
+	header.title(page_title)
 	header.link(rel="stylesheet", type="text/css", href="index.css")
 
+	body = root.body
+	body.h1(page_title)
+
 	link_z = 0
 
 	for feed in docstruct:
@@ -82,9 +95,9 @@
 
 		(title, link, items) = feed
 
-		root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
+		body.h2.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
 		link_z += 1
-		p = root.p
+		p = body.p
 
 		for (i, (it_title, it_link)) in enumerate(items):
 			if i > 0:
@@ -101,7 +114,7 @@
 
 	try:
 		print >> sys.stderr, "--> processing %s" % url
-		feed = urllib2.urlopen(url)
+		feed = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": ''}))
 	except urllib2.HTTPError as e:
 		print >> sys.stderr, "--> (%s) %s" % (url, e)
 		return ret
@@ -144,29 +157,49 @@
 			self._input_queue.task_done()
 			
 
+def main():
+	ret = ''
+
+	epoch_now = time.time()
+	dtnow = datetime.datetime.fromtimestamp(epoch_now)
+
+	if os.path.exists(CACHE_HTML_FILE) and (epoch_now - os.stat(CACHE_HTML_FILE).st_mtime) < float(CACHE_LIFE):
+		with open(CACHE_HTML_FILE) as cache_html_file:
+			ret = cache_html_file.read()
+
+	else:
+		with open(FEEDS_FILE) as feeds_file:
+			feedlines = feeds_file.readlines()
+
+		docstruct = [None]*len(feedlines)
+		iq = Queue.Queue(feedlines)
+		oq = Queue.Queue(feedlines)
+
+		for _ in range(MAX_THREADS):
+			WorkerThread(input_queue=iq, output_queue=oq).start()
+
+		for (i, l) in enumerate(feedlines):
+			if l[0] != '#':
+				l = l.strip()
+				iq.put((i, l))
+
+		iq.join()
+
+		while True:
+			try:
+				(idx, docfeed) = oq.get_nowait()
+				docstruct[idx] = docfeed
+			except Queue.Empty:
+				break
+
+		ret = _to_html(dtnow, docstruct)
+
+		with open(CACHE_HTML_FILE, 'w') as cache_html_file:
+			cache_html_file.write(ret)
+
+	return ret
+
+
 if __name__ == "__main__":
-	with open("FEEDS") as feeds_file:
-		feedlines = feeds_file.readlines()
+	print main()
 
-	docstruct = [None]*len(feedlines)
-	iq = Queue.Queue(feedlines)
-	oq = Queue.Queue(feedlines)
-
-	for _ in range(MAX_THREADS):
-		WorkerThread(input_queue=iq, output_queue=oq).start()
-
-	for (i, l) in enumerate(feedlines):
-		if l[0] != '#':
-			l = l.strip()
-			iq.put((i, l))
-
-	iq.join()
-
-	while True:
-		try:
-			(idx, docfeed) = oq.get_nowait()
-			docstruct[idx] = docfeed
-		except Queue.Empty:
-			break
-
-	print _to_html(docstruct)