view myrss/myrss_parser.py @ 39:915032dd35f4

add myrss2
author paulo
date Wed, 30 Jan 2013 02:32:22 -0800
parents
children 62464a0034d1
line source
1 import os
2 import re
4 import html
5 import xml.etree.ElementTree
8 MAX_ITEMS = 30
9 MAX_LINK_Z = 4
12 _PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)")
14 def _parse_root_tag(root_tag):
15 re_match = _PARSE_ROOT_TAG_RE.match(root_tag)
17 if re_match is None:
18 return (None, None)
19 else:
20 return re_match.group(2, 3)
23 def _go_rss(elementTree):
24 title = elementTree.find("channel/title").text.strip()
25 link = elementTree.find("channel/link").text
27 items = []
29 for i in elementTree.findall("channel/item")[:MAX_ITEMS]:
30 it_title = i.find("title").text.strip()
31 it_link = i.find("link").text
33 items.append((it_title, it_link))
35 return (title, link, items)
38 def _go_atom(elementTree):
39 ns = "http://www.w3.org/2005/Atom"
41 title = elementTree.find("{%s}title" % ns).text.strip()
42 link = ''
44 for i in elementTree.findall("{%s}link" % ns):
45 if i.get("type") == "text/html" and i.get("rel") == "alternate":
46 link = i.get("href")
47 break
49 items = []
51 for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]:
52 it_title = i.find("{%s}title" % ns).text.strip()
53 it_link = ''
55 for j in i.findall("{%s}link" % ns):
56 if j.get("type") == "text/html" and j.get("rel") == "alternate":
57 it_link = j.get("href")
58 break
60 items.append((it_title, it_link))
62 return (title, link, items)
65 def _to_html(docstruct):
66 root = html.HTML()
68 header = root.header
69 header.title("myrss")
70 header.link(rel="stylesheet", type="text/css", href="index.css")
72 link_z = 0
74 for feed in docstruct:
75 (title, link, items) = feed
77 root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
78 link_z += 1
79 p = root.p
81 for (i, (it_title, it_link)) in enumerate(items):
82 if i > 0:
83 p += " - "
85 p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z))
86 link_z += 1
88 return unicode(root).encode("utf-8")
91 if __name__ == "__main__":
93 docstruct = []
94 XMLFILE = "%d.feedtmp"
96 for i in range(31):
97 if os.path.exists(XMLFILE % i):
98 elementTree = xml.etree.ElementTree.parse(XMLFILE % i)
99 root = elementTree.getroot()
101 if _parse_root_tag(root.tag) == (None, "rss"):
102 version = float(root.get("version", 0.0))
103 if version >= 2.0:
104 docstruct.append(_go_rss(elementTree))
105 else:
106 raise NotImplementedError("Unsupported rss version")
107 elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"):
108 docstruct.append(_go_atom(elementTree))
109 else:
110 raise NotImplementedError("Unknown root tag")
112 if len(docstruct) > 0:
113 print _to_html(docstruct)
114 else:
115 raise RuntimeError("Could not produce docstruct")