将互联网上的东西下载到本地
import urllib
.request
urllib
.request
.urlretrieve
("https://www.baidu.com","C:/Users/10167/Desktop/address.html")
清除缓存用
urllib
.request
.urlcleanup
()
爬取的网页的简介信息
data
= urllib
.request
.urlopen
("https://blog.csdn.net/qq_40666620/article/details/102834104")
print(data
.info
())
状态码,就可以找失效的连接什么的
print(data
.getcode
())
获取当前爬取的url地址
print(data
.geturl
())
timeout超时设置
for i
in range(0,100):
try:
data
= urllib
.request
.urlopen
("https://blog.csdn.net/qq_40666620/article/details/102834104"
,timeout
=0.1).read
()
print("success")
except Exception
as error
:
print(error
)
自动模拟http请求
import re
keyword
= "python"
keyword
= urllib
.request
.quote
(keyword
)
url
="http://www.baidu.com/s?wd="+keyword
target
= 'title":"(.*?)"'
for pn
in range(0,10):
data
= urllib
.request
.urlopen
(url
+"&pn="+str(9*pn
)).read
().decode
("utf-8")
result
= re
.compile(target
).findall
(data
)
for i
in range(0,len(result
)):
print(result
[i
])
转载请注明原文地址: https://mac.8miu.com/read-497610.html