www/www-1: myrss2/myrss_app.py comparison

myrss2: FEEDS: Add theconversation.com, remove Coding Horror

comparison

equal deleted inserted replaced

-:a1a5d4af7d3e
+:ad9a97e0db19
+import datetime
+import gzip
 import io
 import os
+import queue
+import re
 import sys
-import re
-import urllib.request
-import urllib.error
 import threading
-import queue
-import datetime
 import time
 import traceback
+import urllib.error
+import urllib.request
 import logging
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
 logging.basicConfig(
 level=getattr(logging, LOG_LEVEL),
 def _fetch_url(url):
 try:
 logging.info("processing %s" % url)
 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT)
+response_headers = feed.info().as_string().splitlines()
+if 'Content-Encoding: gzip' in response_headers:
+body = gzip.decompress(feed.read())
+else:
+body = feed.read()
 except urllib.error.HTTPError as e:
 logging.info("(%s) %s" % (url, e))
 return None
-return str(feed.read(), encoding="utf-8")
+return str(body, encoding="utf-8")
 def _filter_feed(feed):
-ret = feed
+ret = feed.strip()
 filter_out = ["\x16"]
 for i in filter_out:
 ret = ret.replace(i, "")
 for i in range(MAX_THREADS):
 logging.debug("Starting thread: %d" % i)
 WorkerThread(input_queue=self._iq, output_queue=self._oq).start()
 # Raw WSGI
 def __call__(self, environ, start_response):
 response_code = "500 Internal Server Error"
 response_type = "text/plain; charset=UTF-8"
 try:

Mercurial > hg > index.fcgi > www > www-1