python 并发爬虫的快感

mac2022-06-30  26

import time from tomorrow import threads from requests_html import HTMLSession session=HTMLSession() @threads(50) # 使用装饰器,这个函数异步执行 def download(url): return session.get(url) def main(): start = time.time() urls = [ 'https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879','https://pypi.org/project/tomorrow/0.2.0/', 'https://www.cnblogs.com/pyld/p/4716744.html', 'http://www.xicidaili.com/nn/10', 'http://baidu.com', 'http://www.bubuko.com/infodetail-1028793.html?yyue=a21bo.50862.201879' ] req_list=[] for i in urls: req_list.append(download(i)) print(req_list) responses = [i.html.xpath("//title/text()") for i in req_list] print(responses) end = time.time() print("Time: %f seconds" % (end - start)) if __name__ == "__main__": main()

转载于:https://www.cnblogs.com/c-x-a/p/9485734.html

相关资源:Python并发爬虫常用实现方法解析
最新回复(0)