python爬取umei网17万美女图片

mac2025-07-28  15

直接上代码

from pyquery import PyQuery as pq import re import pymongo import threading client=pymongo.MongoClient(host='localhost',port=27017) table=client.taobao.mei def save_to_mongo(result): try: if table.insert(result): print('存储到Mongo成功') except Exception: print('存储到Mongo失败',result) def father_link(): for i in range(1,43): doc=pq(url='http://www.umei.cc/meinvtupian/rentiyishu/'+str(i)+'.htm',encoding='utf-8') items=doc('.TypeList li').items() for item in items: Son_link=item.find('a').attr('href') doc2=pq(Son_link,encoding='utf-8') #下载第一页图片 image_url1=doc('#ArticleId22 > p > a > img').attr('src') image1={'image':image_url1} save_to_mongo(image1) print('save number 1 success!') page_num=doc2('body > div.wrap > div.NewPages > ul > li:nth-child(1) > a').text() page_num=re.findall(r"\d+\.?\d*",page_num) try: page_num=int(page_num[0]) for l in range(2,page_num): Son_url=Son_link[:-4]+'_'+str(l)+'.htm' doc3=pq(Son_url,encoding='utf-8') image_url=doc3('#ArticleId22 > p > a > img').attr('src') image={'image':image_url} save_to_mongo(image) print('save success!') except Exception: pass def main(): father_link() if __name__ == '__main__': main()
最新回复(0)