comparison myrss/myrss_app.py @ 69:ae0f2f438a95

myrss: add support for new "purl" RSS type
author paulo
date Thu, 11 Jun 2015 22:03:34 -0700
parents 66a232bae83c
children 3456dd3e8660
comparison
equal deleted inserted replaced
4:217205b1f74b 5:4ab4372f17fc
85 items.append((it_title, it_link)) 85 items.append((it_title, it_link))
86 86
87 return (title, link, items) 87 return (title, link, items)
88 88
89 89
90 def _go_purl_rss(elementTree):
91 ns = "http://purl.org/rss/1.0/"
92
93 title = _strip_if_not_none(elementTree.find("{%s}channel/{%s}title" % (ns, ns)).text)
94 link = elementTree.find("{%s}channel/{%s}link" % (ns, ns)).text
95
96 items = []
97
98 for i in elementTree.findall("{%s}item" % ns)[:MAX_ITEMS]:
99 it_title = _strip_if_not_none(i.find("{%s}title" % ns).text)
100 it_link = i.find("{%s}link" % ns).text
101
102 items.append((it_title, it_link))
103
104 return (title, link, items)
105
106
90 _STRIP_HTML_RE = re.compile(r"<.*?>") 107 _STRIP_HTML_RE = re.compile(r"<.*?>")
91 _htmlParser = HTMLParser.HTMLParser() 108 _htmlParser = HTMLParser.HTMLParser()
92 109
93 def _strip_html(txt): 110 def _strip_html(txt):
94 return _htmlParser.unescape(_STRIP_HTML_RE.sub('', txt)) 111 return _htmlParser.unescape(_STRIP_HTML_RE.sub('', txt))
157 ret = _go_rss(elementTree) 174 ret = _go_rss(elementTree)
158 else: 175 else:
159 raise NotImplementedError("Unsupported rss version") 176 raise NotImplementedError("Unsupported rss version")
160 elif parsed_root_tag == ("http://www.w3.org/2005/Atom", "feed"): 177 elif parsed_root_tag == ("http://www.w3.org/2005/Atom", "feed"):
161 ret = _go_atom(elementTree) 178 ret = _go_atom(elementTree)
179 elif parsed_root_tag == ("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "RDF"):
180 ret = _go_purl_rss(elementTree)
162 else: 181 else:
163 raise NotImplementedError("Unknown root tag") 182 raise NotImplementedError("Unknown root tag")
164 183
165 return ret 184 return ret
166 185