comparison myrss/myrss_parser.py @ 39:915032dd35f4

add myrss2
author paulo
date Wed, 30 Jan 2013 02:32:22 -0800
parents
children 62464a0034d1
comparison
equal deleted inserted replaced
-1:000000000000 0:9aec8c0eb6b1
1 import os
2 import re
3
4 import html
5 import xml.etree.ElementTree
6
7
8 MAX_ITEMS = 30
9 MAX_LINK_Z = 4
10
11
12 _PARSE_ROOT_TAG_RE = re.compile(r"(\{(.+)\})?(.+)")
13
14 def _parse_root_tag(root_tag):
15 re_match = _PARSE_ROOT_TAG_RE.match(root_tag)
16
17 if re_match is None:
18 return (None, None)
19 else:
20 return re_match.group(2, 3)
21
22
23 def _go_rss(elementTree):
24 title = elementTree.find("channel/title").text.strip()
25 link = elementTree.find("channel/link").text
26
27 items = []
28
29 for i in elementTree.findall("channel/item")[:MAX_ITEMS]:
30 it_title = i.find("title").text.strip()
31 it_link = i.find("link").text
32
33 items.append((it_title, it_link))
34
35 return (title, link, items)
36
37
38 def _go_atom(elementTree):
39 ns = "http://www.w3.org/2005/Atom"
40
41 title = elementTree.find("{%s}title" % ns).text.strip()
42 link = ''
43
44 for i in elementTree.findall("{%s}link" % ns):
45 if i.get("type") == "text/html" and i.get("rel") == "alternate":
46 link = i.get("href")
47 break
48
49 items = []
50
51 for i in elementTree.findall("{%s}entry" % ns)[:MAX_ITEMS]:
52 it_title = i.find("{%s}title" % ns).text.strip()
53 it_link = ''
54
55 for j in i.findall("{%s}link" % ns):
56 if j.get("type") == "text/html" and j.get("rel") == "alternate":
57 it_link = j.get("href")
58 break
59
60 items.append((it_title, it_link))
61
62 return (title, link, items)
63
64
65 def _to_html(docstruct):
66 root = html.HTML()
67
68 header = root.header
69 header.title("myrss")
70 header.link(rel="stylesheet", type="text/css", href="index.css")
71
72 link_z = 0
73
74 for feed in docstruct:
75 (title, link, items) = feed
76
77 root.h1.a(title, href=link, klass="z%d" % (link_z % MAX_LINK_Z))
78 link_z += 1
79 p = root.p
80
81 for (i, (it_title, it_link)) in enumerate(items):
82 if i > 0:
83 p += " - "
84
85 p.a(it_title, href=it_link, klass="z%d" % (link_z % MAX_LINK_Z))
86 link_z += 1
87
88 return unicode(root).encode("utf-8")
89
90
91 if __name__ == "__main__":
92
93 docstruct = []
94 XMLFILE = "%d.feedtmp"
95
96 for i in range(31):
97 if os.path.exists(XMLFILE % i):
98 elementTree = xml.etree.ElementTree.parse(XMLFILE % i)
99 root = elementTree.getroot()
100
101 if _parse_root_tag(root.tag) == (None, "rss"):
102 version = float(root.get("version", 0.0))
103 if version >= 2.0:
104 docstruct.append(_go_rss(elementTree))
105 else:
106 raise NotImplementedError("Unsupported rss version")
107 elif _parse_root_tag(root.tag) == ("http://www.w3.org/2005/Atom", "feed"):
108 docstruct.append(_go_atom(elementTree))
109 else:
110 raise NotImplementedError("Unknown root tag")
111
112 if len(docstruct) > 0:
113 print _to_html(docstruct)
114 else:
115 raise RuntimeError("Could not produce docstruct")