import requests
from bs4
import BeautifulSoup
import pandas
as pd
from pandas
import DataFrame
,Series
import json
url
="https://fe-api.zhaopin.com/c/i/sou"
params
={
'start': '90',
'pageSize': '90',
'cityId': '489',
'workExperience': '-1',
'education': '-1',
'companyType': '-1',
'employmentType': '-1',
'jobWelfareTag': '-1',
'kw': 'python',
'kt': '3',
}
headers
={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
}
url1
="https://jobs.zhaopin.com/%s.htm"
class Spider():
def __init__(self
):
self
.word
=input("请输入关键词:")
page
=input("请输入页数:")
params
['kw']=self
.word
self
.df
=DataFrame
()
self
.columns
=['职位名称','工资','更新时间','地点','公司名称','需求人数','学历','工作经验','职责']
for i
in range(0,int(page
)*90,90):
print(i
)
params
['start']=i
try:
r
=requests
.get
(url
,params
=params
,headers
=headers
,timeout
=10)
except:
continue
r
.encoding
='utf-8'
self
.get_data
(r
.text
)
def get_data(self
,text
):
js
=json
.loads
(text
)
for i
in js
['data']['results']:
idd
=i
['number']
title
=i
['jobName']
salary
=i
['salary']
updateDate
=i
['updateDate']
place
=i
['city']['display']
company
=i
['company']['name']
recruitCount
=i
['recruitCount']
eduLevel
=i
['eduLevel']['name']
workingExp
=i
['workingExp']['name']
u1
=url1
%idd
zhize
=self
.get_zhize
(u1
)
data
=[title
,salary
,updateDate
,place
,company
,recruitCount
,eduLevel
,workingExp
,zhize
]
self
.df
=self
.df
.append
(Series
(data
,index
=self
.columns
),ignore_index
=True)
print(title
)
self
.df
.to_csv
('%s.csv'%self
.word
)
def get_zhize(self
,link
):
try:
r
=requests
.get
(link
,headers
=headers
,timeout
=10)
except:
return "暂无"
r
.encoding
='utf-8'
soup
=BeautifulSoup
(r
.text
,'lxml')
text
=soup
.select
('div.describtion__detail-content')[0].text
.strip
()
return text
if __name__
=="__main__":
Spider
()
转载请注明原文地址: https://mac.8miu.com/read-52504.html