import urllib
import urllib.request
from bs4 import BeautifulSoup
"""
1.抓取糗事百科所有纯文本段子
2.保存的本地文件
"""
class QiuShi():
def __init__(self):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
self.headers = {'User-Agent':user_agent}
def query(self,page=1):
self.url = 'http://www.qiushibaike.com/text/page/' + str(page)
print(self.url)
res = urllib.request.Request(self.url,headers=self.headers)
html = urllib.request.urlopen(res)
bsoup = BeautifulSoup(html,'html.parser')
for content in bsoup.find_all('div',{'class':'content'}):
print(content.get_text())
if __name__ =='__main__':
qiushi = QiuShi()
for i in range(35):
qiushi.query(i)
转载于:https://www.cnblogs.com/lkpp/p/7400043.html
相关资源:Python爬取糗事百科段子(scrapy beautifulsoup)