requests

mac2022-06-30  26

import asyncio import functools from concurrent.futures.thread import ThreadPoolExecutor from requests_html import HTMLSession import sys session = HTMLSession() async def get_response(executor, *, url, loop: asyncio.AbstractEventLoop = None, ): if not loop: loop = asyncio.get_running_loop() request = functools.partial(session.get, url) return loop.run_in_executor(executor, request) async def bulk_requests(executor, *, urls, loop: asyncio.AbstractEventLoop = None, ): for url in urls: yield await get_response(executor, url=url, loop=loop) def filter_unsuccesful_requests(responses_and_exceptions): return filter( lambda url_and_response: not isinstance(url_and_response[1], Exception), responses_and_exceptions.items() ) async def main(): executor = ThreadPoolExecutor(10) urls = [ "https://baidu.com", "https://cnblogs.com", "https://163.com", ] requests = [request async for request in bulk_requests(executor, urls=urls, )] responses_and_exceptions = dict(zip(urls, await asyncio.gather(*requests, return_exceptions=True))) responses = {url: resp.html for (url, resp) in filter_unsuccesful_requests(responses_and_exceptions)} for res in responses.items(): print(res[1].xpath("//head//title//text()")[0]) for url in urls: if url not in responses: print(f"No successful request could be made to {url}. Reason: {responses_and_exceptions[url]}", file=sys.stderr) asyncio.run(main())

转载于:https://www.cnblogs.com/c-x-a/p/11028356.html

最新回复(0)