# HG changeset patch # User paulo # Date 1359541942 28800 # Node ID 915032dd35f4b27f9e4759118ac93b1c941161ce # Parent 90be3f88005f5a465265bed8dcb89798000b1e73 add myrss2 diff -r 90be3f88005f -r 915032dd35f4 myrss/index.css --- a/myrss/index.css Wed Jan 30 00:07:02 2013 -0800 +++ b/myrss/index.css Wed Jan 30 02:32:22 2013 -0800 @@ -20,6 +20,21 @@ text-decoration: underline; } +a.z1 +{ + color: #A31; +} + +a.z2 +{ + color: #851; +} + +a.z3 +{ + color: #833; +} + p:first-line { font-weight: bold; diff -r 90be3f88005f -r 915032dd35f4 myrss/myrss_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/myrss/myrss_parser.py Wed Jan 30 02:32:22 2013 -0800 @@ -0,0 +1,115 @@ +import os +import re + +import html +import xml.etree.ElementTree + + +MAX_ITEMS = 30 +MAX_LINK_Z = 4 + + +_PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)") + +def _parse_root_tag(root_tag): + re_match = _PARSE_ROOT_TAG_RE.match(root_tag) + + if re_match is None: + return (None, None) + else: + return re_match.group(2, 3) + + +def _go_rss(elementTree): + title = elementTree.find("channel/title").text.strip() + link = elementTree.find("channel/link").text + + items = [] + + for i in elementTree.findall("channel/item")[:MAX_ITEMS]: + it_title = i.find("title").text.strip() + it_link = i.find("link").text + + items.append((it_title, it_link)) + + return (title, link, items) + + +def _go_atom(elementTree): + ns = "http://www.w3.org/2005/Atom" + + title = elementTree.find("{%s}title" % ns).text.strip() + link = '' + + for i in elementTree.findall("{%s}link" % ns): + if i.get("type") == "text/html" and i.get("rel") == "alternate": + link = i.get("href") + break + + items = [] + + for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]: + it_title = i.find("{%s}title" % ns).text.strip() + it_link = '' + + for j in i.findall("{%s}link" % ns): + if j.get("type") == "text/html" and j.get("rel") == "alternate": + it_link = j.get("href") + break + + items.append((it_title, it_link)) + + return (title, link, items) + + +def _to_html(docstruct): + root = html.HTML() + + header = root.header + header.title("myrss") + header.link(rel="stylesheet", type="text/css", href="index.css") + + link_z = 0 + + for feed in docstruct: + (title, link, items) = feed + + root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z)) + link_z += 1 + p = root.p + + for (i, (it_title, it_link)) in enumerate(items): + if i > 0: + p += " - " + + p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z)) + link_z += 1 + + return unicode(root).encode("utf-8") + + +if __name__ == "__main__": + + docstruct = [] + XMLFILE = "%d.feedtmp" + + for i in range(31): + if os.path.exists(XMLFILE % i): + elementTree = xml.etree.ElementTree.parse(XMLFILE % i) + root = elementTree.getroot() + + if _parse_root_tag(root.tag) == (None, "rss"): + version = float(root.get("version", 0.0)) + if version >= 2.0: + docstruct.append(_go_rss(elementTree)) + else: + raise NotImplementedError("Unsupported rss version") + elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"): + docstruct.append(_go_atom(elementTree)) + else: + raise NotImplementedError("Unknown root tag") + + if len(docstruct) > 0: + print _to_html(docstruct) + else: + raise RuntimeError("Could not produce docstruct")