paulo@39: import os paulo@39: import re paulo@39: paulo@39: import html paulo@39: import xml.etree.ElementTree paulo@39: paulo@39: paulo@39: MAX_ITEMS = 30 paulo@39: MAX_LINK_Z = 4 paulo@39: paulo@39: paulo@39: _PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)") paulo@39: paulo@39: def _parse_root_tag(root_tag): paulo@39: re_match = _PARSE_ROOT_TAG_RE.match(root_tag) paulo@39: paulo@39: if re_match is None: paulo@39: return (None, None) paulo@39: else: paulo@39: return re_match.group(2, 3) paulo@39: paulo@39: paulo@39: def _go_rss(elementTree): paulo@39: title = elementTree.find("channel/title").text.strip() paulo@39: link = elementTree.find("channel/link").text paulo@39: paulo@39: items = [] paulo@39: paulo@39: for i in elementTree.findall("channel/item")[:MAX_ITEMS]: paulo@39: it_title = i.find("title").text.strip() paulo@39: it_link = i.find("link").text paulo@39: paulo@39: items.append((it_title, it_link)) paulo@39: paulo@39: return (title, link, items) paulo@39: paulo@39: paulo@39: def _go_atom(elementTree): paulo@39: ns = "http://www.w3.org/2005/Atom" paulo@39: paulo@39: title = elementTree.find("{%s}title" % ns).text.strip() paulo@39: link = '' paulo@39: paulo@39: for i in elementTree.findall("{%s}link" % ns): paulo@39: if i.get("type") == "text/html" and i.get("rel") == "alternate": paulo@39: link = i.get("href") paulo@39: break paulo@39: paulo@39: items = [] paulo@39: paulo@39: for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]: paulo@39: it_title = i.find("{%s}title" % ns).text.strip() paulo@39: it_link = '' paulo@39: paulo@39: for j in i.findall("{%s}link" % ns): paulo@39: if j.get("type") == "text/html" and j.get("rel") == "alternate": paulo@39: it_link = j.get("href") paulo@39: break paulo@39: paulo@39: items.append((it_title, it_link)) paulo@39: paulo@39: return (title, link, items) paulo@39: paulo@39: paulo@39: def _to_html(docstruct): paulo@39: root = html.HTML() paulo@39: paulo@39: header = root.header paulo@39: header.title("myrss") paulo@39: header.link(rel="stylesheet", type="text/css", href="index.css") paulo@39: paulo@39: link_z = 0 paulo@39: paulo@39: for feed in docstruct: paulo@39: (title, link, items) = feed paulo@39: paulo@39: root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z)) paulo@39: link_z += 1 paulo@39: p = root.p paulo@39: paulo@39: for (i, (it_title, it_link)) in enumerate(items): paulo@39: if i > 0: paulo@39: p += " - " paulo@39: paulo@39: p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z)) paulo@39: link_z += 1 paulo@39: paulo@39: return unicode(root).encode("utf-8") paulo@39: paulo@39: paulo@39: if __name__ == "__main__": paulo@39: paulo@39: docstruct = [] paulo@39: XMLFILE = "%d.feedtmp" paulo@39: paulo@39: for i in range(31): paulo@39: if os.path.exists(XMLFILE % i): paulo@39: elementTree = xml.etree.ElementTree.parse(XMLFILE % i) paulo@39: root = elementTree.getroot() paulo@39: paulo@39: if _parse_root_tag(root.tag) == (None, "rss"): paulo@39: version = float(root.get("version", 0.0)) paulo@39: if version >= 2.0: paulo@39: docstruct.append(_go_rss(elementTree)) paulo@39: else: paulo@39: raise NotImplementedError("Unsupported rss version") paulo@39: elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"): paulo@39: docstruct.append(_go_atom(elementTree)) paulo@39: else: paulo@39: raise NotImplementedError("Unknown root tag") paulo@39: paulo@39: if len(docstruct) > 0: paulo@39: print _to_html(docstruct) paulo@39: else: paulo@39: raise RuntimeError("Could not produce docstruct")