数据抓取
url
= 'http://tianqihoubao.com/aqi/beijing-201909.html'
import requests
from bs4
import BeautifulSoup
import pandas
as pd
headers
= {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
responses
= requests
.get
(url
,headers
=headers
)
soup
= BeautifulSoup
(responses
.text
,'lxml')
table
= soup
.find
('table')
df
= pd
.read_html
(table
.prettify
(),header
=0)
df
[0]
日期质量等级AQI指数当天AQI排名PM2.5PM10So2No2CoO3
02019-09-01良5429719512310.439612019-09-02良6634124582340.5111322019-09-03良8636047832390.6714232019-09-04良8736253803390.7612042019-09-05良8135544712330.6512952019-09-06良8734747822390.7014262019-09-07良8734045742360.6814072019-09-08良7024033712350.6212182019-09-09良9335466956311.3613292019-09-10优19339112190.4754102019-09-11优279915264320.7526112019-09-12优3010418292240.5452122019-09-13优3919222232210.5146132019-09-14优2810411252270.3853142019-09-15优4927826522370.5464152019-09-16优4320117433280.5366162019-09-17良5426229602310.6572172019-09-18优27695232190.2759182019-09-19良5426822592420.5545192019-09-20良6028536692400.6864202019-09-21良8535257785371.09102212019-09-22轻度污染10836578734291.27117222019-09-23良5217930402350.6572232019-09-24优375411342290.4067242019-09-25良5514822522360.6289252019-09-26良6422333562350.67113262019-09-27良7730852562280.86100272019-09-28良8329256662310.82110282019-09-29良8629857722310.82117292019-09-30良9735071963330.9396
def getAQI(url
):
headers
={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
response
=requests
.get
(url
,headers
=headers
)
soup
=BeautifulSoup
(response
.text
,'lxml')
table
=soup
.find
('table')
df
=pd
.read_html
(table
.prettify
(),header
=0)
return df
[0]
year
= 2019
cities
= ['beijing','shenzhen']
for city
in cities
:
dfs
=[]
filename
= 'testpandasdata/'+city
+'_'+str(year
)+'_AQI.csv'
print(filename
)
for month
in range(1,13):
url
= 'http://tianqihoubao.com/aqi/'+city
+'-'+str(year
)+str('%02d'%month
)+'.html'
data
= getAQI
(url
)
dfs
.append
(data
)
for df
in dfs
:
df
.to_csv
(filename
,header
=None,encoding
='utf-8',mode
='a')
testpandasdata/beijing_2019_AQI.csv
转载请注明原文地址: https://mac.8miu.com/read-493376.html