Mercurial > hg > index.fcgi > www > www-1
comparison myrss2/myrss_app.py @ 139:cdbeddce6b10
myrss2: FEEDS: Update new.google.com URLs
author | paulo |
---|---|
date | Tue, 02 May 2023 05:09:32 +0000 |
parents | cffd95813b82 |
children | 2ed8cf5f36bf |
comparison
equal
deleted
inserted
replaced
0:a1a5d4af7d3e | 1:ad9a97e0db19 |
---|---|
1 import datetime | |
2 import gzip | |
1 import io | 3 import io |
2 import os | 4 import os |
5 import queue | |
6 import re | |
3 import sys | 7 import sys |
4 import re | |
5 import urllib.request | |
6 import urllib.error | |
7 import threading | 8 import threading |
8 import queue | |
9 import datetime | |
10 import time | 9 import time |
11 import traceback | 10 import traceback |
11 import urllib.error | |
12 import urllib.request | |
12 | 13 |
13 import logging | 14 import logging |
14 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") | 15 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") |
15 logging.basicConfig( | 16 logging.basicConfig( |
16 level=getattr(logging, LOG_LEVEL), | 17 level=getattr(logging, LOG_LEVEL), |
163 | 164 |
164 def _fetch_url(url): | 165 def _fetch_url(url): |
165 try: | 166 try: |
166 logging.info("processing %s" % url) | 167 logging.info("processing %s" % url) |
167 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) | 168 feed = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 Browser"}), timeout=URLOPEN_TIMEOUT) |
169 response_headers = feed.info().as_string().splitlines() | |
170 if 'Content-Encoding: gzip' in response_headers: | |
171 body = gzip.decompress(feed.read()) | |
172 else: | |
173 body = feed.read() | |
174 | |
168 except urllib.error.HTTPError as e: | 175 except urllib.error.HTTPError as e: |
169 logging.info("(%s) %s" % (url, e)) | 176 logging.info("(%s) %s" % (url, e)) |
170 return None | 177 return None |
171 | 178 |
172 return str(feed.read(), encoding="utf-8") | 179 return str(body, encoding="utf-8") |
173 | 180 |
174 | 181 |
175 def _filter_feed(feed): | 182 def _filter_feed(feed): |
176 ret = feed | 183 ret = feed.strip() |
177 | 184 |
178 filter_out = ["\x16"] | 185 filter_out = ["\x16"] |
179 for i in filter_out: | 186 for i in filter_out: |
180 ret = ret.replace(i, "") | 187 ret = ret.replace(i, "") |
181 | 188 |
273 | 280 |
274 for i in range(MAX_THREADS): | 281 for i in range(MAX_THREADS): |
275 logging.debug("Starting thread: %d" % i) | 282 logging.debug("Starting thread: %d" % i) |
276 WorkerThread(input_queue=self._iq, output_queue=self._oq).start() | 283 WorkerThread(input_queue=self._iq, output_queue=self._oq).start() |
277 | 284 |
278 # Raw WSGI | 285 # Raw WSGI |
279 def __call__(self, environ, start_response): | 286 def __call__(self, environ, start_response): |
280 response_code = "500 Internal Server Error" | 287 response_code = "500 Internal Server Error" |
281 response_type = "text/plain; charset=UTF-8" | 288 response_type = "text/plain; charset=UTF-8" |
282 | 289 |
283 try: | 290 try: |