import json
import re
import os
import pandas
as pd
from sqlalchemy
import create_engine
class make_xlat:
def __init__(self
, *args
, **kwargs
):
self
.adict
= dict(*args
, **kwargs
)
self
.rx
= self
.make_rx
()
def make_rx(self
):
return re
.compile('|'.join
(map(re
.escape
, self
.adict
)))
def one_xlat(self
, match
):
return str(self
.adict
[match
.group
(0)])
def __call__(self
, text
):
return self
.rx
.sub
(self
.one_xlat
, text
)
def get_file_path(rootdir
):
for (dirpath
, dirnames
, filenames
) in os
.walk
(rootdir
):
pathname
= [os
.path
.join
(dirpath
, filename
) for filename
in filenames
]
return pathname
def write2sql(df
, tableName
):
con
= create_engine
('mysql+pymysql://user:password@host:port/database?charset=utf8')
df
.index
= range(1, len(df
) + 1)
df
.to_sql
(tableName
, con
, schema
='database', if_exists
='append', index
=True, index_label
='id')
def get_data(filename_list
):
data_li
= []
for filename
in filename_list
:
print('filename==>', filename
)
with open(filename
, 'r+', encoding
='utf-8') as f
:
content
= f
.read
().split
('=')[1]
datas
= json
.loads
(re
.match
(r
'.*data: (.*),font:', content
).group
(1))
if datas
:
font
= json
.loads
(re
.match
(r
'.*font:(.*)}', content
).group
(1))
adict
= {font_map
['code']: font_map
['value'] for font_map
in font
['FontMapping']}
print('adic===>', adict
)
translate
= make_xlat
(adict
)
expenses
= ['sumasset', 'fixedasset', 'monetaryfund', 'accountrec', 'inventory', 'sumliab',
'accountpay', 'advancereceive', 'sumshequity', 'cashanddepositcbank',
'loanadvances', 'saleablefasset', 'borrowfromcbank', 'acceptdeposit', 'sellbuybackfasset',
'settlementprovision', 'borrowfund', 'agenttradesecurity', 'premiumrec', 'stborrow',
'premiumadvance']
tbs
= ['accountrec_tb', 'inventory_tb', 'monetaryfund_tb', 'accountpay_tb', 'advancereceive_tb',
'sumshequity_tb', 'tsatz', 'tdetz', 'cashanddepositcbank_tb', 'loanadvances_tb',
'saleablefasset_tb', 'borrowfromcbank_tb', 'acceptdeposit_tb', 'sellbuybackfasset_tb',
'settlementprovision_tb', 'borrowfund_tb', 'agenttradesecurity_tb', 'premiumrec_tb',
'stborrow_tb', 'premiumadvance_tb', 'ld', 'zcfzl']
for data
in datas
:
expenses_dic
= {}
expenses_dic
['gp_code'] = filename
.split
('.')[0].split
('\\')[-1]
expenses_dic
['reportdate'] = data
['reportdate']
expenses_dic
['noticedate'] = data
['noticedate']
for expense
in expenses
:
if data
[expense
] == '-':
expenses_dic
[expense
] = data
[expense
]
else:
expenses_dic
[expense
] = round(float(translate
(data
[expense
])) / 100000000, 2)
for tb
in tbs
:
if data
[tb
] == '-' or '无穷大' in data
[tb
] or '非数字' in data
[tb
]:
expenses_dic
[tb
] = data
[tb
]
else:
expenses_dic
[tb
] = round(float(translate
(data
[tb
])) * 100, 2)
print('expenses_dic=====>', expenses_dic
)
data_li
.append
(expenses_dic
)
df
= pd
.DataFrame
(data_li
)
return df
if __name__
== '__main__':
rootdir
= 'dir'
filename_list
= get_file_path
(rootdir
)
df
= get_data
(filename_list
)
print('=========================df')
print(df
)
转载请注明原文地址: https://mac.8miu.com/read-512876.html