美文网首页
python--文件对比,整理,合并excel脚本

python--文件对比,整理,合并excel脚本

作者: w_dll | 来源:发表于2019-08-22 16:40 被阅读0次

描述

  • 有许多文件夹,里面可能有成百上千个文件
  • 内容主要分为sql文件 和 excel文件
  • excel文件记录sql文件信息,同时是sql文件的描述

需求

  • 由于许多人提交不规范 找出sql文件和excel文件记录信息有差异的地方,主要是名称
  • 将sql文件从这些文件夹中汇总,加上数字前缀,同时若为特定脚本,在汇总文件夹中存放时,需要另建文件夹加以区分
  • 将所有excel汇总,同时将不规范格式的excel进行处理汇总

1 sql文件和excel信息比对脚本

#!/usr/bin/python  
# -*- coding:utf8 -*- 
import os
import openpyxl
from openpyxl import Workbook,load_workbook
sql_list=[]
excel_list=[]
for root,dirs,files in os.walk(r"."):
    for file in files:
        this_file=os.path.join(root,file)
        if ".sql" in this_file or ".dmp" in this_file:
            index=file.find('.')
            sql_list.append(file[:index])
        if ".xl" in this_file and "~" not in this_file:
            excel_list.append(this_file)
sql_list_in_excel=[]
for excel_name in excel_list:
    wb = load_workbook(excel_name)
    #ws = wb.get_sheet_by_name(wb.sheetnames[0])
    ws=wb.worksheets[0]
    rows=ws.max_row
    cols=ws.max_column
    col_number=5
    for i in range(2,cols+1):
        val=ws.cell(row=1,column=i).value
        if val and '脚本' in val:
            col_number=i
            break
    for i in range(2,rows+1):
        val=ws.cell(row=i,column=col_number).value
        if val and 'None' not in val:
            index=val.find('.')
            if index != -1:
                sql_list_in_excel.append(val[:index])
            else:
                sql_list_in_excel.append(val)
    wb.close()
#print(val[:index])
print("-----------sql文件比excel中多-----------")
for this_sql in sql_list:
    if this_sql not in sql_list_in_excel:
        print(this_sql)
print("-----------excel比sql文件多-------------")
for this_excel in sql_list_in_excel:
    if this_excel not in sql_list:
        print(this_excel)
#pyinstaller打包后,防止闪退
wait=input()

2 汇总文件脚本

#!/usr/bin/python
# -*- coding:utf8 -*-
import os
import shutil
print('just waiting...')
if os.path.exists('scripts'):
    shutil.rmtree('scripts')
os.mkdir('scripts')
if os.path.exists('excels'):
    shutil.rmtree('excels')
os.mkdir('excels')
i=0
j=0
for root,dirs,files in os.walk(r'.'):
    dirs[:] = [d for d in dirs if d not in "scripts"]
    dirs[:] = [d for d in dirs if d not in "excels"]
    for f in files:
        f_name=os.path.join(root,f)
        index=f_name.rfind('\\')
        this_dir=f_name[:index]
        index=this_dir.rfind('\\')
        this_dir=this_dir[index+1:]
        #print(this_dir)
        if '.sql' in f_name or '.dmp' in f_name:
            if '仅' in f_name or '执行' in f_name:
                this_path='scripts'+'/'+this_dir
                if not os.path.exists(this_path):
                    os.mkdir(this_path)
                i=i+1
                tar_file_name=str(i)+'_'+f
                f_tar=this_path+'/'+tar_file_name
                shutil.copyfile(f_name,f_tar)
            else:
                i=i+1
                tar_file_name=str(i)+'_'+f
                f_tar='scripts/'+tar_file_name
                shutil.copyfile(f_name,f_tar)
        if '.xl' in f_name and '~' not in f_name:
            j=j+1
            tar_file_name=str(j)+'_'+f
            f_tar='excels/'+tar_file_name
            shutil.copyfile(f_name,f_tar)

3 按格式合并excel脚本

#!/usr/bin/python
# -*- coding: utf8 -*-
import os
import openpyxl
from openpyxl import load_workbook
def format_date(this_date):
    this_date=str(this_date)
    this_date=this_date.rstrip()
    this_len=len(this_date)
    #print(this_len)
    if this_len==19:
        return(this_date[:10])
    if this_len==10:
        return(this_date[0:4]+'-'+this_date[5:7]+'-'+this_date[8:])
    if this_len==8:
        return(this_date[0:4]+'-'+this_date[4:6]+'-'+this_date[6:])
    else:
        return(this_date)
if os.path.exists('all_xl.xlsx'):
    os.remove('all_xl.xlsx')
#存放excel文件的目录为./excels
excel_list=[]
for root,dirs,files in os.walk(r"./excels"):
    excel_list[:]=files
#print(excel_list)

s_wb = openpyxl.Workbook()
s_ws = s_wb.active
s_ws.append(['主体域','对象列表','表名','对象修改类型','脚本','变更内容','发起人','修改时间','脚本类型','小组审核人'])
for this_excel in excel_list:
    xl_file_name="./excels/"+this_excel
    wb = load_workbook(xl_file_name)
    ws=wb.worksheets[0]
    #print(ws.merged_cells)
    #resolve irregularities
    if ws.max_column == 14:
        for row in range(2,ws.max_row+1):
            val_row_list=[]
            none_count=0
            for i in range(1,ws.max_column+1):
                if str(ws.cell(row,i).value) == 'None':
                    none_count+=1
            if none_count == 14:
                break
            val_row_list.append(ws.cell(row,1).value)
            val_row_list.append('表')
            val_row_list.append(ws.cell(row,2).value)
            val_row_list.append(ws.cell(row,4).value)
            val_row_list.append(ws.cell(row,3).value)
            val_row_list.append(ws.cell(row,4).value)
            val_row_list.append(ws.cell(row,5).value)
            val_row_list.append(ws.cell(row,8).value)
            val_row_list.append(ws.cell(row,9).value)
            val_row_list.append(ws.cell(row,11).value)
            #print(val_row_list)
            s_ws.append(val_row_list)
        wb.close()
    else:
        for row in range(2,ws.max_row+1):
            val_row_list=[]
            none_count=0
            for i in range(1,ws.max_column+1):
                if str(ws.cell(row,i).value) == 'None':
                    none_count+=1
            if none_count == 10:
                break
            for col in range(1,ws.max_column+1):
                this_val=str(ws.cell(row,col).value)
                #print(this_val)
                if "None" == str(this_val):
                    number=row
                    while "None" == str(this_val) and number>1:
                        number=number-1
                        this_val=str(ws.cell(int(number),col).value) 
                    val_row_list.append(this_val)
                else:
                    val_row_list.append(this_val)
            s_ws.append(val_row_list)
        wb.close()
#start merging
rows=['']+list(s_ws.rows)  
index=2
row_count=len(rows)
while index < row_count:
    value=rows[index][4].value
    for this_index,row in enumerate(rows[index+1:],index+1):
        if not (row[4].value==None or row[4].value==value):
            break
    else:
        s_ws.merge_cells('E'+str(index)+':E'+str(this_index))
        break
    s_ws.merge_cells('E'+str(index)+':E'+str(this_index-1))
    index=this_index
for row in range(2,s_ws.max_row+1):
    value_addr='H'+str(row)
    s_ws[value_addr]=format_date(s_ws[value_addr].value)
s_wb.save('all_xl.xlsx')

说明

  • 使用pyinstaller打包成exe文件
  • 脚本1单独打包 脚本2和3一起打包
  • 打包时候确保有相关库: openpyxl


    workspace.png

相关文章

网友评论

      本文标题:python--文件对比,整理,合并excel脚本

      本文链接:https://www.haomeiwen.com/subject/cvxosctx.html