笔记:主要掌握根据字典内容同时替换多个字符串

mac2026-04-05  2

import json import re import os import pandas as pd from sqlalchemy import create_engine # 根据字典同时替换多个字符串 class make_xlat: def __init__(self, *args, **kwargs): self.adict = dict(*args, **kwargs) self.rx = self.make_rx() def make_rx(self): return re.compile('|'.join(map(re.escape, self.adict))) def one_xlat(self, match): return str(self.adict[match.group(0)]) def __call__(self, text): return self.rx.sub(self.one_xlat, text) def get_file_path(rootdir): for (dirpath, dirnames, filenames) in os.walk(rootdir): pathname = [os.path.join(dirpath, filename) for filename in filenames] return pathname def write2sql(df, tableName): con = create_engine('mysql+pymysql://user:password@host:port/database?charset=utf8') df.index = range(1, len(df) + 1) df.to_sql(tableName, con, schema='database', if_exists='append', index=True, index_label='id') # chunksize=10000 def get_data(filename_list): data_li = [] for filename in filename_list: print('filename==>', filename) with open(filename, 'r+', encoding='utf-8') as f: content = f.read().split('=')[1] datas = json.loads(re.match(r'.*data: (.*),font:', content).group(1)) # print('datas==>', datas) if datas: font = json.loads(re.match(r'.*font:(.*)}', content).group(1)) adict = {font_map['code']: font_map['value'] for font_map in font['FontMapping']} print('adic===>', adict) translate = make_xlat(adict) expenses = ['sumasset', 'fixedasset', 'monetaryfund', 'accountrec', 'inventory', 'sumliab', 'accountpay', 'advancereceive', 'sumshequity', 'cashanddepositcbank', 'loanadvances', 'saleablefasset', 'borrowfromcbank', 'acceptdeposit', 'sellbuybackfasset', 'settlementprovision', 'borrowfund', 'agenttradesecurity', 'premiumrec', 'stborrow', 'premiumadvance'] tbs = ['accountrec_tb', 'inventory_tb', 'monetaryfund_tb', 'accountpay_tb', 'advancereceive_tb', 'sumshequity_tb', 'tsatz', 'tdetz', 'cashanddepositcbank_tb', 'loanadvances_tb', 'saleablefasset_tb', 'borrowfromcbank_tb', 'acceptdeposit_tb', 'sellbuybackfasset_tb', 'settlementprovision_tb', 'borrowfund_tb', 'agenttradesecurity_tb', 'premiumrec_tb', 'stborrow_tb', 'premiumadvance_tb', 'ld', 'zcfzl'] for data in datas: expenses_dic = {} expenses_dic['gp_code'] = filename.split('.')[0].split('\\')[-1] expenses_dic['reportdate'] = data['reportdate'] expenses_dic['noticedate'] = data['noticedate'] for expense in expenses: if data[expense] == '-': expenses_dic[expense] = data[expense] else: # print(translate(data[expense])) expenses_dic[expense] = round(float(translate(data[expense])) / 100000000, 2) # 单位亿 for tb in tbs: if data[tb] == '-' or '无穷大' in data[tb] or '非数字' in data[tb]: expenses_dic[tb] = data[tb] else: expenses_dic[tb] = round(float(translate(data[tb])) * 100, 2) # 百分比 print('expenses_dic=====>', expenses_dic) data_li.append(expenses_dic) df = pd.DataFrame(data_li) return df if __name__ == '__main__': rootdir = 'dir' filename_list = get_file_path(rootdir) df = get_data(filename_list) print('=========================df') print(df) # write2sql(df, 'profit_tb') # write2sql(df, 'xjll_tb')
最新回复(0)