import requests
from lxml import etree
class TieBa(object):
def __init__(self,query_string):
self.query_string = query_string
self.base_url = 'https://tieba.baidu.com/f'
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
}
def params(self):
para = {"kw": self.query_string}
return para
def send_request(self,url, parms={}):
response = requests.get(url, params= parms, headers = self.headers)
return response.content
# 2. 数据类型转换,提取数据
def parse_data(self, data, rule):
html_data = etree.HTML(data)
data_list = html_data.xpath(rule)
return data_list
# 3. 保存数据
def save_data(self, data, name):
print(name)
image_path = "D:/img/" + name
with open(image_path, 'wb') as f:
f.write(data)
# 主要运行的方法
def run(self):
tieba_params = self.params()
datas = self.send_request(self.base_url,tieba_params)
#xpath解析
detail_rule = '//div[@class="t_con cleafix"]/div/div/div/a/@href'
url_list = self.parse_data(datas, detail_rule)
for label in url_list:
image_url = 'https://tieba.baidu.com' + label
detail_data = self.send_request(image_url)
# 解析图片
detail_url = '//img[@class="BDE_Image"]/@src'
image_url_list = self.parse_data(detail_data, detail_url)
for image_url_1 in image_url_list:
image_data = self.send_request(image_url_1)
image_name = image_url_1[-12:]
#保存图片
self.save_data(image_data, image_name)
if __name__ =='__main__':
a = input('请先在D盘创建一个名为img的文件夹来接收图片\n'
'接下来请输入你要查询的关键字: ')
tieba = TieBa(a)
tieba.run()
转载于:https://www.cnblogs.com/wshr210/p/11302299.html
相关资源:JAVA上百实例源码以及开源项目