comparison myrss2/myrss_app.py @ 139:cdbeddce6b10

myrss2: FEEDS: Update new.google.com URLs
author paulo
date Tue, 02 May 2023 05:09:32 +0000
parents cffd95813b82
children 2ed8cf5f36bf
comparison
equal deleted inserted replaced
0:a1a5d4af7d3e 1:ad9a97e0db19
1 import datetime
2 import gzip
1 import io 3 import io
2 import os 4 import os
5 import queue
6 import re
3 import sys 7 import sys
4 import re
5 import urllib.request
6 import urllib.error
7 import threading 8 import threading
8 import queue
9 import datetime
10 import time 9 import time
11 import traceback 10 import traceback
11 import urllib.error
12 import urllib.request
12 13
13 import logging 14 import logging
14 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") 15 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
15 logging.basicConfig( 16 logging.basicConfig(
16 level=getattr(logging, LOG_LEVEL), 17 level=getattr(logging, LOG_LEVEL),
163 164
164 def _fetch_url(url): 165 def _fetch_url(url):
165 try: 166 try:
166 logging.info("processing %s" % url) 167 logging.info("processing %s" % url)
167 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) 168 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT)
169 response_headers = feed.info().as_string().splitlines()
170 if 'Content-Encoding: gzip' in response_headers:
171 body = gzip.decompress(feed.read())
172 else:
173 body = feed.read()
174
168 except urllib.error.HTTPError as e: 175 except urllib.error.HTTPError as e:
169 logging.info("(%s) %s" % (url, e)) 176 logging.info("(%s) %s" % (url, e))
170 return None 177 return None
171 178
172 return str(feed.read(), encoding="utf-8") 179 return str(body, encoding="utf-8")
173 180
174 181
175 def _filter_feed(feed): 182 def _filter_feed(feed):
176 ret = feed 183 ret = feed.strip()
177 184
178 filter_out = ["\x16"] 185 filter_out = ["\x16"]
179 for i in filter_out: 186 for i in filter_out:
180 ret = ret.replace(i, "") 187 ret = ret.replace(i, "")
181 188
273 280
274 for i in range(MAX_THREADS): 281 for i in range(MAX_THREADS):
275 logging.debug("Starting thread: %d" % i) 282 logging.debug("Starting thread: %d" % i)
276 WorkerThread(input_queue=self._iq, output_queue=self._oq).start() 283 WorkerThread(input_queue=self._iq, output_queue=self._oq).start()
277 284
278 # Raw WSGI 285 # Raw WSGI
279 def __call__(self, environ, start_response): 286 def __call__(self, environ, start_response):
280 response_code = "500 Internal Server Error" 287 response_code = "500 Internal Server Error"
281 response_type = "text/plain; charset=UTF-8" 288 response_type = "text/plain; charset=UTF-8"
282 289
283 try: 290 try: