import requests
from bs4
import BeautifulSoup
import re
import os
def GetHTML(url
):
headers
={'User-Agent': "Mozilla/5.0"}
try:
r
=requests
.get
(url
,headers
=headers
)
r
.raise_for_status
()
r
.encoding
=r
.apparent_encoding
html
=r
.text
.encode
(r
.encoding
,"ignore").decode
("gb2312","ignore")
soup
=BeautifulSoup
(html
,"lxml")
return soup
except:
print("获取失败")
def PageCollect(url
):
global PageList
soup
=GetHTML
(url
)
dt
=soup
.findAll
("dt")
for i
in dt
:
a
=i
.find
("a")
url
=a
.attrs
["href"]
title
=a
.attrs
["title"]
PageList
.append
([url
,title
])
def handover_Collect(url
):
soup
=GetHTML
(url
)
a
=soup
.find
("a",text
="下一页")
href
=a
.attrs
["href"]
url
=base
+href
print(url
)
PageCollect
(url
)
return url
def Pages_Collect(base
):
PageCollect
(base
)
url
=handover_Collect
(base
)
while 1:
try:
url
=handover_Collect
(url
)
except:
break
def GetTEXT(soup
):
content
=soup
.find
("ul",class_
="arzw").text
content
=re
.sub
("\n\n\n\r\n\r\n ","",content
)
TextList
=content
.split
("\n\r\n")
return TextList
def SaveTEXT(TextList
,title
,num
):
global filename
file_savePath
="E:/段子/"
FileName
=str(num
)+"."+filename
+"/"
TextName
=title
+".doc"
filedir
=file_savePath
+FileName
if not os
.path
.exists
(filedir
):
os
.mkdir
(filedir
)
with open(filedir
+TextName
,mode
="w",encoding
="utf-8") as f
:
for i
in range(len(TextList
)):
f
.write
(TextList
[i
]+"\n")
print(title
,"下载完成")
def PageDownload(url
,title
,num
):
soup
=GetHTML
(url
)
TextList
=GetTEXT
(soup
)
SaveTEXT
(TextList
,title
,num
)
def Pages_Download():
global PageList
i
=0
for Page
in PageList
:
href
=Page
[0]
title
=Page
[1]
base
="http://www.qqssly.com"
url
=base
+href
num
=(i
//10)+1
PageDownload
(url
,title
,num
)
i
=i
+1
def Show_Download_List():
global Download_Option
url
="http://www.qqssly.com/"
soup
=GetHTML
(url
)
content
=soup
.find
("ul",class_
="dht2")
content
=content
.findAll
("a")
for a
in content
:
url
=a
.attrs
["href"]
title
=a
.text
Download_Option
.append
([url
,title
])
print("请输入要下载的说说的类型")
for i
in range(len(Download_Option
)-2):
print(i
+1,Download_Option
[i
][1])
Download_Option
=[]
PageList
=[]
Show_Download_List
()
try:
Number
=int(input("请输入要下载说说类型的编号"))
base
=Download_Option
[Number
-1][0]
filename
=Download_Option
[Number
-1][1]
Pages_Collect
(base
)
Pages_Download
()
except:
print("输入错误")
转载请注明原文地址: https://mac.8miu.com/read-506546.html