changeset 39:915032dd35f4

add myrss2
author paulo
date Wed, 30 Jan 2013 02:32:22 -0800
parents 90be3f88005f
children 62464a0034d1
files myrss/index.css myrss/myrss_parser.py
diffstat 2 files changed, 130 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- a/myrss/index.css	Wed Jan 30 00:07:02 2013 -0800
     1.2 +++ b/myrss/index.css	Wed Jan 30 02:32:22 2013 -0800
     1.3 @@ -20,6 +20,21 @@
     1.4  	text-decoration: underline;
     1.5  }
     1.6  
     1.7 +a.z1
     1.8 +{
     1.9 +	color: #A31;
    1.10 +}
    1.11 +
    1.12 +a.z2
    1.13 +{
    1.14 +	color: #851;
    1.15 +}
    1.16 +
    1.17 +a.z3
    1.18 +{
    1.19 +	color: #833;
    1.20 +}
    1.21 +
    1.22  p:first-line
    1.23  {
    1.24  	font-weight: bold;
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/myrss/myrss_parser.py	Wed Jan 30 02:32:22 2013 -0800
     2.3 @@ -0,0 +1,115 @@
     2.4 +import os
     2.5 +import re
     2.6 +
     2.7 +import html
     2.8 +import xml.etree.ElementTree 
     2.9 +
    2.10 +
    2.11 +MAX_ITEMS = 30
    2.12 +MAX_LINK_Z = 4
    2.13 +
    2.14 +
    2.15 +_PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)")
    2.16 +
    2.17 +def _parse_root_tag(root_tag):
    2.18 +	re_match = _PARSE_ROOT_TAG_RE.match(root_tag)
    2.19 +
    2.20 +	if re_match is None:
    2.21 +		return (None, None)
    2.22 +	else:
    2.23 +		return re_match.group(2, 3)
    2.24 +	
    2.25 +
    2.26 +def _go_rss(elementTree):
    2.27 +	title = elementTree.find("channel/title").text.strip()
    2.28 +	link = elementTree.find("channel/link").text
    2.29 +
    2.30 +	items = []
    2.31 +
    2.32 +	for i in elementTree.findall("channel/item")[:MAX_ITEMS]:
    2.33 +		it_title = i.find("title").text.strip()
    2.34 +		it_link = i.find("link").text
    2.35 +
    2.36 +		items.append((it_title, it_link))
    2.37 +
    2.38 +	return (title, link, items)
    2.39 +
    2.40 +
    2.41 +def _go_atom(elementTree):
    2.42 +	ns = "http://www.w3.org/2005/Atom"
    2.43 +
    2.44 +	title = elementTree.find("{%s}title" % ns).text.strip()
    2.45 +	link = ''
    2.46 +
    2.47 +	for i in elementTree.findall("{%s}link" % ns):
    2.48 +		if i.get("type") == "text/html" and i.get("rel") == "alternate":
    2.49 +			link = i.get("href")
    2.50 +			break
    2.51 +
    2.52 +	items = []
    2.53 +
    2.54 +	for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]:
    2.55 +		it_title = i.find("{%s}title" % ns).text.strip()
    2.56 +		it_link = ''
    2.57 +		
    2.58 +		for j in i.findall("{%s}link" % ns):
    2.59 +			if j.get("type") == "text/html" and j.get("rel") == "alternate":
    2.60 +				it_link = j.get("href")
    2.61 +				break
    2.62 +
    2.63 +		items.append((it_title, it_link))
    2.64 +
    2.65 +	return (title, link, items)
    2.66 +
    2.67 +
    2.68 +def _to_html(docstruct):
    2.69 +	root = html.HTML()
    2.70 +
    2.71 +	header = root.header
    2.72 +	header.title("myrss")
    2.73 +	header.link(rel="stylesheet", type="text/css", href="index.css")
    2.74 +
    2.75 +	link_z = 0
    2.76 +
    2.77 +	for feed in docstruct:
    2.78 +		(title, link, items) = feed
    2.79 +
    2.80 +		root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
    2.81 +		link_z += 1
    2.82 +		p = root.p
    2.83 +
    2.84 +		for (i, (it_title, it_link)) in enumerate(items):
    2.85 +			if i > 0:
    2.86 +				p += " - "
    2.87 +
    2.88 +			p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z))
    2.89 +			link_z += 1
    2.90 +
    2.91 +	return unicode(root).encode("utf-8")
    2.92 +
    2.93 +
    2.94 +if __name__ == "__main__":
    2.95 +
    2.96 +	docstruct = []
    2.97 +	XMLFILE = "%d.feedtmp"
    2.98 +	
    2.99 +	for i in range(31):
   2.100 +		if os.path.exists(XMLFILE % i):
   2.101 +			elementTree = xml.etree.ElementTree.parse(XMLFILE % i)
   2.102 +			root = elementTree.getroot()
   2.103 +
   2.104 +			if _parse_root_tag(root.tag) == (None, "rss"):
   2.105 +				version = float(root.get("version", 0.0))
   2.106 +				if version >= 2.0:
   2.107 +					docstruct.append(_go_rss(elementTree))
   2.108 +				else:
   2.109 +					raise NotImplementedError("Unsupported rss version")
   2.110 +			elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"):
   2.111 +				docstruct.append(_go_atom(elementTree))
   2.112 +			else:
   2.113 +				raise NotImplementedError("Unknown root tag")
   2.114 +
   2.115 +	if len(docstruct) > 0:
   2.116 +		print _to_html(docstruct)
   2.117 +	else:
   2.118 +		raise RuntimeError("Could not produce docstruct")