js逆向入门

mac2025-08-23 17

中国土地市场网

1.分析请求过程

在控制台中获取相应的参数

2.分析js并修改参数

构建相应的爬虫代码

# -*- coding: utf-8 -*- # @Time : 2019/11/1 9:45 # @Author : import os import re import requests from lxml import etree def generate_signature(value): """ generate _signature parameter :param value:share_url id :return:signature string """ cwd = os.path.dirname(__file__) p = os.popen('cd %s && node landchina.js %s' % (cwd, value)) return p.readlines()[0] def landchina(): # 构建session会话 s = requests.Session() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Host': 'www.landchina.com', 'Pragma': 'no-cache', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', } s.headers = headers # 第一次请求 url = "https://www.landchina.com/default.aspx?tabid=226" resp1 = s.get(url=url) cookie1 = resp1.headers["Set-Cookie"] cookie_re = "(security_session_verify=\w+;)" security_session_verify = "".join(re.findall(cookie_re, cookie1)) security_session_verify_url = generate_signature("https://www.landchina.com/default.aspx?tabid=226").strip() s.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Cookie': security_session_verify + " srcurl=" + security_session_verify_url[ security_session_verify_url.rfind("=") + 1:], 'Host': 'www.landchina.com', 'Pragma': 'no-cache', 'Referer': 'https://www.landchina.com/default.aspx?tabid=226', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', } var_re = "(security_verify_data.*)" var = "".join(re.findall(var_re, generate_signature(""))) # 第二次请求 url2 = "https://www.landchina.com/default.aspx?tabid=226&" + var resp2 = s.get(url2) cookie2 = resp2.headers["Set-Cookie"] cookie_re = "(security_session_mid_verify=\w+;)" security_session_mid_verify = "".join(re.findall(cookie_re, cookie2)) s.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Cookie': '%s; %s' % (security_session_verify, security_session_mid_verify), 'Host': 'www.landchina.com', 'Pragma': 'no-cache', 'Referer': url2, 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', } # 第三次请求 rep = s.get(url=url) mytree = etree.HTML(rep.text) urls = mytree.xpath('//*[@id="TAB_contentTable"]//tr/td[@class="queryCellBordy"]/a') for ur in urls: title = "".join(ur.xpath(".//text()")) c_url = "https://www.landchina.com/"+"".join(ur.xpath("./@href")) data = { "title" : title, "c_url": c_url, } print(data) if __name__ == '__main__': landchina()

最新回复(0)