Mercurial > hg > index.fcgi > www > www-1
diff myrss2/myrss_app.py @ 110:1a5c0fc5627a
myrss2: fix gzip and leading whitespace handling; add test feed, test server; update FEEDS
author | paulo |
---|---|
date | Wed, 03 Jun 2020 01:20:20 -0700 |
parents | cffd95813b82 |
children | 2ed8cf5f36bf |
line diff
1.1 --- a/myrss2/myrss_app.py Sun May 24 00:22:32 2020 -0700 1.2 +++ b/myrss2/myrss_app.py Wed Jun 03 01:20:20 2020 -0700 1.3 @@ -1,14 +1,15 @@ 1.4 +import datetime 1.5 +import gzip 1.6 import io 1.7 import os 1.8 +import queue 1.9 +import re 1.10 import sys 1.11 -import re 1.12 -import urllib.request 1.13 -import urllib.error 1.14 import threading 1.15 -import queue 1.16 -import datetime 1.17 import time 1.18 import traceback 1.19 +import urllib.error 1.20 +import urllib.request 1.21 1.22 import logging 1.23 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") 1.24 @@ -165,15 +166,21 @@ 1.25 try: 1.26 logging.info("processing %s" % url) 1.27 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) 1.28 + response_headers = feed.info().as_string().splitlines() 1.29 + if 'Content-Encoding: gzip' in response_headers: 1.30 + body = gzip.decompress(feed.read()) 1.31 + else: 1.32 + body = feed.read() 1.33 + 1.34 except urllib.error.HTTPError as e: 1.35 logging.info("(%s) %s" % (url, e)) 1.36 return None 1.37 1.38 - return str(feed.read(), encoding="utf-8") 1.39 + return str(body, encoding="utf-8") 1.40 1.41 1.42 def _filter_feed(feed): 1.43 - ret = feed 1.44 + ret = feed.strip() 1.45 1.46 filter_out = ["\x16"] 1.47 for i in filter_out: 1.48 @@ -275,7 +282,7 @@ 1.49 logging.debug("Starting thread: %d" % i) 1.50 WorkerThread(input_queue=self._iq, output_queue=self._oq).start() 1.51 1.52 - # Raw WSGI 1.53 + # Raw WSGI 1.54 def __call__(self, environ, start_response): 1.55 response_code = "500 Internal Server Error" 1.56 response_type = "text/plain; charset=UTF-8"