from collections import defaultdict
import re
import subprocess
-import requests # URL checking
+import requests # NVD database download
import json
import ijson
import distutils.version
import time
import gzip
import sys
-from multiprocessing import Pool
sys.path.append('utils/')
from getdeveloperlib import parse_developers # noqa: E402
Package.all_ignored_cves[pkgvar] = value.split()
-def check_url_status_worker(url, url_status):
- if url_status[0] == 'ok':
- try:
- url_status_code = requests.head(url, timeout=30).status_code
- if url_status_code >= 400:
- return ("error", "invalid {}".format(url_status_code))
- except requests.exceptions.RequestException:
- return ("error", "invalid (err)")
- return ("ok", "valid")
- return url_status
+async def check_url_status(session, pkg, retry=True):
+ try:
+ async with session.get(pkg.url) as resp:
+ if resp.status >= 400:
+ pkg.status['url'] = ("error", "invalid {}".format(resp.status))
+ return
+ except (aiohttp.ClientError, asyncio.TimeoutError):
+ if retry:
+ return await check_url_status(session, pkg, retry=False)
+ else:
+ pkg.status['url'] = ("error", "invalid (err)")
+ return
+ pkg.status['url'] = ("ok", "valid")
-def check_package_urls(packages):
- pool = Pool(processes=64)
- for pkg in packages:
- pkg.url_worker = pool.apply_async(check_url_status_worker, (pkg.url, pkg.status['url']))
- for pkg in packages:
- pkg.status['url'] = pkg.url_worker.get(timeout=3600)
- del pkg.url_worker
- pool.terminate()
+
+async def check_package_urls(packages):
+ tasks = []
+ connector = aiohttp.TCPConnector(limit_per_host=5)
+ async with aiohttp.ClientSession(connector=connector, trust_env=True) as sess:
+ packages = [p for p in packages if p.status['url'][0] == 'ok']
+ for pkg in packages:
+ tasks.append(check_url_status(sess, pkg))
+ await asyncio.wait(tasks)
def check_package_latest_version_set_status(pkg, status, version, identifier):
pkg.set_url()
pkg.set_developers(developers)
print("Checking URL status")
- check_package_urls(packages)
+ loop = asyncio.get_event_loop()
+ loop.run_until_complete(check_package_urls(packages))
print("Getting latest versions ...")
loop = asyncio.get_event_loop()
loop.run_until_complete(check_package_latest_version(packages))