Mercurial > hg > index.fcgi > www > www-1
comparison myrss/myrss_parser.py @ 39:915032dd35f4
add myrss2
author | paulo |
---|---|
date | Wed, 30 Jan 2013 02:32:22 -0800 |
parents | |
children | 62464a0034d1 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9aec8c0eb6b1 |
---|---|
1 import os | |
2 import re | |
3 | |
4 import html | |
5 import xml.etree.ElementTree | |
6 | |
7 | |
8 MAX_ITEMS = 30 | |
9 MAX_LINK_Z = 4 | |
10 | |
11 | |
12 _PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)") | |
13 | |
14 def _parse_root_tag(root_tag): | |
15 re_match = _PARSE_ROOT_TAG_RE.match(root_tag) | |
16 | |
17 if re_match is None: | |
18 return (None, None) | |
19 else: | |
20 return re_match.group(2, 3) | |
21 | |
22 | |
23 def _go_rss(elementTree): | |
24 title = elementTree.find("channel/title").text.strip() | |
25 link = elementTree.find("channel/link").text | |
26 | |
27 items = [] | |
28 | |
29 for i in elementTree.findall("channel/item")[:MAX_ITEMS]: | |
30 it_title = i.find("title").text.strip() | |
31 it_link = i.find("link").text | |
32 | |
33 items.append((it_title, it_link)) | |
34 | |
35 return (title, link, items) | |
36 | |
37 | |
38 def _go_atom(elementTree): | |
39 ns = "http://www.w3.org/2005/Atom" | |
40 | |
41 title = elementTree.find("{%s}title" % ns).text.strip() | |
42 link = '' | |
43 | |
44 for i in elementTree.findall("{%s}link" % ns): | |
45 if i.get("type") == "text/html" and i.get("rel") == "alternate": | |
46 link = i.get("href") | |
47 break | |
48 | |
49 items = [] | |
50 | |
51 for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]: | |
52 it_title = i.find("{%s}title" % ns).text.strip() | |
53 it_link = '' | |
54 | |
55 for j in i.findall("{%s}link" % ns): | |
56 if j.get("type") == "text/html" and j.get("rel") == "alternate": | |
57 it_link = j.get("href") | |
58 break | |
59 | |
60 items.append((it_title, it_link)) | |
61 | |
62 return (title, link, items) | |
63 | |
64 | |
65 def _to_html(docstruct): | |
66 root = html.HTML() | |
67 | |
68 header = root.header | |
69 header.title("myrss") | |
70 header.link(rel="stylesheet", type="text/css", href="index.css") | |
71 | |
72 link_z = 0 | |
73 | |
74 for feed in docstruct: | |
75 (title, link, items) = feed | |
76 | |
77 root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z)) | |
78 link_z += 1 | |
79 p = root.p | |
80 | |
81 for (i, (it_title, it_link)) in enumerate(items): | |
82 if i > 0: | |
83 p += " - " | |
84 | |
85 p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z)) | |
86 link_z += 1 | |
87 | |
88 return unicode(root).encode("utf-8") | |
89 | |
90 | |
91 if __name__ == "__main__": | |
92 | |
93 docstruct = [] | |
94 XMLFILE = "%d.feedtmp" | |
95 | |
96 for i in range(31): | |
97 if os.path.exists(XMLFILE % i): | |
98 elementTree = xml.etree.ElementTree.parse(XMLFILE % i) | |
99 root = elementTree.getroot() | |
100 | |
101 if _parse_root_tag(root.tag) == (None, "rss"): | |
102 version = float(root.get("version", 0.0)) | |
103 if version >= 2.0: | |
104 docstruct.append(_go_rss(elementTree)) | |
105 else: | |
106 raise NotImplementedError("Unsupported rss version") | |
107 elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"): | |
108 docstruct.append(_go_atom(elementTree)) | |
109 else: | |
110 raise NotImplementedError("Unknown root tag") | |
111 | |
112 if len(docstruct) > 0: | |
113 print _to_html(docstruct) | |
114 else: | |
115 raise RuntimeError("Could not produce docstruct") |