python 金数据API调用数据

mac2024-05-17 45

需求：

前期使用金数据收集数据，现在使用金数据提供的API抓取之前的数据

问题：

金数据提供了curl和ruby示例，没有提供python版本的示例。得到的数据只有数据，没有字段名只能一次性获取前五十条数据，超过五十条只能逐一获取

curl用法：

curl -u api_key:api_secret https://jinshuju.net/api/v1/forms/ex27t2

中文乱码问题：

curl -u key:secret https://jinshuju.net/api/v1/forms/XXX/entries|iconv -f utf-8 -t gbk

python:

问题解决：

认证问题：request(auth=(user,password)) user,password 并不是按照官方说的放在heads里面没有字段名问题：事先下载了一下表格，各取一条数据，然后通过数据比对获取相应的字段名。获取限制问题：超过50条数据，需要递归获取新的data import requests import json import pandas as pd def get_data(): headers = {'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Mobile Safari/537.36', 'Authorization':'Basic Auth', 'Content-Type': 'application/json', # 'user':user, # 'password':password } user = 'key' password = 'secret' host = 'https://jinshuju.net' use_class = '/api/v1' entries_path = '/forms/Lo8qfz/entries' form_path = '/forms/Lo8qfz' url = host + use_class + entries_path # url2 = host + use_class + form_path try: request = requests.get(url,auth=(user,password),headers=headers) request.raise_for_status() request.encoding = request.apparent_encoding #print(request.text) except: print("数据获取失败") js = json.loads(request.text) data50 = js.get('data') def more50(js,data): if js.get('count') <= 50: data = js.get('data') return data else: next_number = js.get('next') url = host + use_class + entries_path + '?' + 'next=' + next_number try: request2 = requests.get(url,auth=(user,password),headers=headers) request2.raise_for_status() request2.encoding = request2.apparent_encoding #print(request.text) except: print("数据获取失败") js2 = json.loads(request2.text) next_number2 = js2.get('next') data_next = js2.get('data') data = data + data_next if next_number2: return more50(js,data) else: return data data = more50(js,data50) return data data = get_data() def deal_data(data): new_dic = {} join_list = [] df2 = pd.DataFrame() for i,dic in enumerate(data): if 'creator_name' in list(dic.keys()): dic.pop('creator_name') if 'serial_number' in list(dic.keys()): dic.pop('serial_number') for k,v in dic.items(): if len(k.split('_')) > 1: k2 = k.split('_')[1] if type(v)==list: if len(v)>0 and type(v[0])==str: v2 = '，'.join(v) new_dic[k2] = v2 elif len(v)>0 and type(v[0])==dict: for j in range(len(v)): join_list.append(','.join([v[j]['statement'],v[j]['choice']])) v2 = ';'.join(join_list) new_dic[k2] = v2 elif len(v)==0: new_dic[k2] = '' else: v2=v new_dic[k2] = v2 df2 = df2.append(new_dic,ignore_index=True) return df2 df2 = deal_data(data) df3 = df2.drop('at',axis=1) namelist = df3.columns.to_list() namelist2 = [int(i) for i in namelist if i] #对数字列排序 namelist2.sort() namelist = [str(i) for i in namelist2 if i] df4 = df3[namelist].drop('20',axis=1) ##获取比对列名 excel_data = pd.read_csv(r'D:\python_code\ruijin_metaboliaze\CODE_API\info_patients.csv') match_df = excel_data[excel_data['ident_id'].isin(['130403197605251817'])] oldmatch_df = df4[df4['22'].isin(['130403197605251817'])].reset_index(drop=True) ##数据预处理 oldmatch_df = oldmatch_df.fillna(0) match_df = match_df.fillna(0) match_df[['weight','food_preference']] = match_df[['weight','food_preference']].astype(int) ##比对映射函数 def match_colname(match_df,oldmatch_df): m_dict = {} for index,row in match_df.iteritems(): flag = 0 if flag == 1: continue else: pass for index2,row2 in oldmatch_df.iteritems(): if row2[0]=='' and row[0]==0 and flag == 0: flag = 1 m_dict[index]=index2 oldmatch_df = oldmatch_df.drop(index2,axis=1) match_df = match_df.drop(index,axis=1) # print(index2) break elif str(row2[0]) == str(row[0]).strip() and flag == 0: flag = 1 m_dict[index]=index2 oldmatch_df = oldmatch_df.drop(index2,axis=1) match_df = match_df.drop(index,axis=1) # print(index2) break return m_dict m_dict = match_colname(match_df,oldmatch_df) f_dict = {v:k for k,v in m_dict.items()} #键值对转换 df5 = df4.rename(columns=f_dict)

最新回复(0)