1.返回的json数据为空:原因是requests的请求对象没有加请求头和cookies
import requests from urllib.parse import urlencode def get_page_index(): data = { "aid": "24", "app_name": "web_search", "offset": "0", "format": "json", "keyword": "街拍", "autoload": "true", "count": "20", "en_qc": "1", "cur_tab": "1", "from": "search_tab", "pd": "synthesis", "timestamp": "1568883030289" } url = "https://www.toutiao.com/api/search/content/?" + urlencode(data) response = requests.get(url) if response.status_code == 200: print(response.text) if __name__ == '__main__': get_page_index()# 结果:{"count":0,"return_count":0,"query_id":"6537385837821170952","has_more":0,"request_id":"20190919170154010017090029827CF0A","search_id":"20190919170154010017090029827CF0A","cur_ts":1568883714,"offset":20,"message":"success","pd":"synthesis","show_tabs":1,"keyword":"街拍","city":"西安","log_pb":{"impr_id":"20190919170154010017090029827CF0A"},"data":null,"data_head":[{"challenge_code":1366,"cell_type":71,"keyword":"街拍","url":"sslocal://search?keyword=街拍\u0026from=\u0026source=search_tab"}],"ab_fields":null,"latency":0,"search_type":2,"tab_rank":null}2.正常获得数据
import requests from urllib.parse import urlencode def get_page_index(): data = { "aid": "24", "app_name": "web_search", "offset": "0", "format": "json", "keyword": "街拍", "autoload": "true", "count": "20", "en_qc": "1", "cur_tab": "1", "from": "search_tab", "pd": "synthesis", "timestamp": "1568883030289" } url = "https://www.toutiao.com/api/search/content/?" + urlencode(data) response = requests.get(url,headers=headers,cookies=cookies) if response.status_code == 200: print(response.content.decode("utf-8")) if __name__ == '__main__': headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"} cookies = {"Cookie": "tt_webid=6719272225969096196; WEATHER_CITY=北京; tt_webid=6719272225969096196; csrftoken=b28e41c77cd4f268af393de7d3e9d47a; UM_distinctid=16c4159a9ae7e3-04be696c185f6c-3f385c06-1fa400-16c4159a9afa94; CNZZDATA1259612802=1303724616-1564459685-https%3A%2F%2Fwww.toutiao.com%2F|1564459685; WIN_WH=1536_710; s_v_web_id=e588fb5c6570d79a16b67e84decce3d8; __tasessionId=y99fyeyyt1568882979794"} get_page_index() # 结果: {"count":20,"return_count":20,"query_id":"6537385837821170952","has_more":1,"request_id":"20190919170856010017031149086E0FC","search_id":"20190919170856010017031149086E0FC","cur_ts":1568884136,"offset":20,"message":"success","pd":"synthesis","show_tabs":1,"keyword":"街拍","city":"西安","tokens":["街拍"],"log_pb":{"impr_id":"20190919170856010017031149086E0FC"},"data":[{"ala_src":"user","app_info":{"query_type":"AladdinRpcQueryType"},"cell_type。。。。。。。。。。。。省略
图片地址位置定位:要现请求这个网址,获得相应解析出对应的imag_url
解析报错:SyntaxError: Non-UTF-8 code starting with '\xe5',在程序上方添加 # -*- coding:utf-8 -*-
json中的键值对,期望用双引号而不是单引号。原因是正则错误:
urlencode是从urllib.parse中的一个方法:将字典变成url的查询参数
from urllib.parse import urlencode data = {"a":1,"b":2} url = "http:www.baidu.com/?" print(url + urlencode(data))http:www.baidu.com/?a=1&b=2md5加密的不一致问题
一直以来都是用 hashlib中的md5进行加密,md5.update(二进制) md5.hexdigest(),可以会出现对相同的字符串进行加密,加密结果不一样的问题,看来是update方法造成的。
from hashlib import md5 fp = md5() demo = ["1","1","3","3"] for i in demo: fp.update(i.encode("utf-8")) print(fp.hexdigest()) # 结果: c4ca4238a0b923820dcc509a6f75849b 6512bd43d9caa6e02c990b0a82652dca 73278a4a86960eeb576a8fd4c9ec6997 fd06b8ea02fe5b1c2496fe1700e9d16c# 原因是md5.updage()会将上次的串和这次的进行拼接,1,11,113,1133,每次加密的串都不同,结果肯定不同。所以每加密之前,都对md5进行实例化,才能保证相同内容加密结果一样,因为以前这个方法都是放在函数里面的,每次调用函数,都会重新实例化md5,因此不存在问题。循环就存在问题上面代码可以改为from hashlib import md5
demo = ["1","1","3","3"]
for i in demo: fp = md5() fp.update(i.encode("utf-8")) print(fp.hexdigest())
# 结果为:
c4ca4238a0b923820dcc509a6f75849bc4ca4238a0b923820dcc509a6f75849beccbc87e4b5ce2fe28308fd9f2a7baf3eccbc87e4b5ce2fe28308fd9f2a7baf3
for i in demo: print(md5(i.encode("utf-8")).hexdigest()) # 这种方式行,因为每次都重新实例化了 # 结果 c4ca4238a0b923820dcc509a6f75849b c4ca4238a0b923820dcc509a6f75849b eccbc87e4b5ce2fe28308fd9f2a7baf3 eccbc87e4b5ce2fe28308fd9f2a7baf3# 看源码也没有理解update真正意图,只是说用字符串更新对象。 后续解决os模块的使用方法
os的基本用法 1. os.getcwd():查看当前所在路径。 current_path = os.getcwd() print(current_path) # 运行结果 C:\Users\Administrator\AppData\Roaming\Sublime Text 3\Packages\User 2. os.listdir(path):列举目录下的所有文件。返回的是列表类型。 dir_list = os.listdir(current) print(dir_list) # 运行结果 ['11.py', 'cuiqingcai.py', 'Localization.sublime-settings', 'oscrypto-ca-bundle.crt', 'Package Control.cache', 'Package Control.last-run', 'Package Control.merged-ca-bundle', 'Package Control.sublime-settings', 'Package Control.user-ca-bundle', 'Preferences.sublime-settings', 'reids分布式锁', 'sha1.py', 'test.py', 'untitled.sublime-build']具体用法见:https://www.cnblogs.com/yufeihlf/p/6179547.htmlMongo数据库与python的交互
import pyongo # 交互模块 # 第一步,建立客户端,链接mogo服务器,ip和port from pymongo import MongoClient client = MongoClient(host,port) collection = client[db名][集合名] # db名--相当于数据库的名称 集合名---相当于表名称 # 第二步,添加数据 ret = collection.insert_one({"name":"test10010","age":33}) print(ret) # 通过返回的数据进行判断if ret: xxxx示例: import pymongoclient = pymongo.MongoClient("localhost")# 链接指定数据库中的指定集合,不存在就新建collection = client["test"]["new"]ret = collection.insert({"new":"python"})print(ret)# 结果:5d85ce978a808f42364b045c插入前: 插入后:正则表达式知识点回顾:
import re pattern = re.compile("匹配规则", re.S) re.compile() 返回的就是一个匹配规则。陪着search find match等方法使用 import re a = """aaaaaaabbbbbbbb 111111ccccc""" pattern1 = re.compile("aaaaaaa(.*?)cccc") print(re.search(pattern1,a)) # None re.S可以匹配全部文本,不担心换行问题 pattern2 = pattern1 = re.compile("aaaaaaa(.*?)cccc",re.S) print(re.search(pattern2,a)) # <re.Match object; span=(0, 26), match='aaaaaaabbbbbbbb\n111111cccc'>posted on 2019-09-19 17:18 张京墨 阅读( ...) 评论( ...) 编辑 收藏
转载于:https://www.cnblogs.com/meloncodezhang/p/11551139.html
