"""
Created on Mon Oct 28 21:10:15 2019
@author: asus
"""
"""
requests爬虫实践:TOP250电影数据
"""
import requests
from bs4
import BeautifulSoup
def get_movies():
headers
= {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
'Host':'movie.douban.com'
}
movies_list
= []
for i
in range(0,10):
link
= 'https://movie.douban.com/top250?start=' + str(i
* 25) + '&filter='
r
= requests
.get
(link
,headers
= headers
,timeout
= 10)
print(str(i
+ 1),"页响应状态码 :",r
.status_code
)
soup
= BeautifulSoup
(r
.text
,"lxml")
div_list
= soup
.find_all
('div',class_
= 'hd')
for each
in div_list
:
movie
= each
.a
.span
.text
.strip
()
movies_list
.append
(movie
)
return movies_list
movies
= get_movies
()
print(movies
)
转载请注明原文地址: https://mac.8miu.com/read-499758.html