python爬虫采集文章基础信息保存excel案例

mac2024-03-15 37

import requests from bs4 import BeautifulSoup import openpyxl wb = openpyxl.Workbook() sheet = wb.active sheet.title = '文章' sheet['A1'] = '标题' sheet['b1'] = '摘要' sheet['c1'] = 'url' sheet['d1'] = '发布时间' sheet['e1'] = 'pic地址' #引用requests库 for i in range(10): res = requests.get('https://www.jb51.net/html5/list551_'+ str(i) +'.html') res.encoding='gbk' #定义Reponse对象的编码为utf-8。 html = res.text #把Response对象的内容以字符串的形式返回 soup = BeautifulSoup(html,'html.parser') items = soup.find_all(class_='item-inner') for item in items: title = item.find('p').text zhaiyao = item.find(class_='item-infode').text url = 'https://www.jb51.net'+item.find('p').find('a')['href'] addtime = item.find(class_='lbtn').text try: picadress = item.find('img')['src'] except: picadress = '' print('标题',title) print('摘要',zhaiyao) print('url',url) print('发布时间',addtime) print('图片地址',picadress) sheet.append([title,zhaiyao,url,addtime,picadress]) print('----------------') wb.save(r'c:/Users/Administrator/Desktop/文章.xlsx')

最新回复(0)