get
url = '' resp = requests.get(url, verify=False) page_source = resp.content.decode('utf-8')post
url = '' data = { '': '' } header = { 'Content-Type': 'application/x-www-form-urlencoded', 'X-Requested-With': 'XMLHttpRequest' } resp = requests.post(url, headers=header, data=data, cookies=resp.cookies, verify=False) resp_json = json.loads(resp.content.decode('utf-8'))session
ck = 'a=b' session = requests.Session() resp = session.post(url, data=data, cookies=str_to_cookie(ck), verify=False) resp2 = session.post(url2, data=data, verify=False)cookie工具类
def cookie_to_str(cookies): """cookie对象转字符""" cookie_str = '' for s in cookies: cookie_str += s.name + "=" + s.value + ';' return cookie_str def dict_to_str(cookies): """cookie dict转字符""" cookie_str = '' for x, y in cookies.items(): cookie_str += x + "=" + y + ';' return cookie_str def str_to_cookie(cookies): """字符转cookie字典""" dict = {} cks = cookies.split(';') for ck in cks: ck_kv = ck.split('=') if len(ck_kv) == 2: dict[ck_kv[0]] = ck_kv[1] return dictpip install lxml
html = etree.HTML(page_source) html_data = html.xpath('/html/body/div/ul/li/a') for i in html_data: print(i.text)// 相对路径 / 绝对路径
例子 .xpath('//li/a/text()') .xpath('//li/a//@href') .xpath('//li/a[@href="link.html"]') .xpath('//li[last()]/a/text()') .xpath('//a[contains(@href, "link")]') .xpath('//a[re:test(@id, "i\d+")]/text()')