http://image.baidu.com/search/index?ct=201326592&cl=2&st=-1&lm=-1&nc=1&ie=utf-8&tn=baiduimage&ipn=r&rps=1&pv=&fm=rs7&word=风景
import re import os import time import requests if not os.path.exists('百度图片'): os.mkdir('百度图片') # 获取所有图片 response = requests.get( 'http://image.baidu.com/search/index?ct=201326592&cl=2&st=-1&lm=-1&nc=1&ie=utf-8&tn=baiduimage&ipn=r&rps=1&pv=&fm=rs7&word=风景') data = response.text img_desc_dics = re.findall("app.setData(\('imgData.*?\));", data, re.S)[0] img_desc_dics = eval(str(img_desc_dics)) # 获取所有图片的数据 img_datas = img_desc_dics[1]['data'] count = 0 for img_data in img_datas: # 获取搜索图片的参数 os_ = img_data.get('os') cs_ = img_data.get('cs') if os_ and cs_: # 获取搜索图片的信息 img_search_url = f'http://image.baidu.com/search/detail?ct=503316480&z=0&ipn=d&word=风景&step_word=&hs=0&pn=1&spn=0&di=195030&pi=0&rn=1&tn=baiduimagedetail&is=0,0&istype=0&ie=utf-8&oe=utf-8&in=&cl=2&lm=-1&st=-1&cs={cs_}&os={os_}' img_search_response = requests.get(img_search_url) img_search_data = img_search_response.text # 获取图片信息 img_url = re.findall('''\('firstSc'\);" src="(.*?)"''', img_search_data)[0] img_name = img_url.split('/')[-1] img_name = os.path.join('百度图片', img_name) # 拼接出图片的地址,如 百度图片/3822951_144045377000_2.jpg # 保存图片 img_response = requests.get(img_url) img_data = img_response.content fw = open(img_name, 'wb') fw.write(img_data) fw.flush() # 提示 count += 1 print(f'{img_name}保存成功,成功保存{count}张') # 防止百度禁ip,慢一点 time.sleep(0.01)转载于:https://www.cnblogs.com/dadazunzhe/p/11232559.html
相关资源:java爬虫爬取百度图片