r_sql

作者: codeduck1 | 来源:发表于2023-12-27 11:39 被阅读0次
    from IPython.display import Image
    import numpy as np
    import pandas as pd
    from datetime import datetime
    import time
    import subprocess
    import shlex
    from io import BytesIO
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    from datetime import datetime
    from pytz import timezone
    import re
    
    def del_colnpoint(strr):
        if '.' in list(strr):
            return strr.split('.')[1]
        else:
            return strr
        
    def run_sql(cmd):
        cmd = 'set hive.cli.print.header=true;' + cmd
        hivecmd='hive -S -e ' + '"{0}"'.format(cmd) 
        args = shlex.split(hivecmd)
        coutput = subprocess.run(args, stdout=subprocess.PIPE)
        if coutput.returncode != 0:
            print("select data error!")
            print(coutput.stderr)
            return None
        else:
            data = pd.read_csv(BytesIO(coutput.stdout), sep='\t', low_memory=False, error_bad_lines=False)
            data.columns = [del_colnpoint(strr) for strr in data.columns]
            print('本DataFrame的行数为:'+str(data.shape[0]))
            print('本DataFrame的列数为:'+str(data.shape[1]))
            return data   
        print("Run End:   ", datetime.today().astimezone(timezone('Asia/Shanghai'))) 
    
    def run_sql_f(f):
        hivecmd='hive -S -f {}'.format(f) 
        args = shlex.split(hivecmd)
        coutput = subprocess.run(args, stdout=subprocess.PIPE)
        if coutput.returncode != 0:
            print("select data error!")
            print(coutput.stderr)
            return None
        else:
            data = pd.read_csv(BytesIO(coutput.stdout), sep='\t', low_memory=False, error_bad_lines=False)
            data.columns = [del_colnpoint(strr) for strr in data.columns]
            return data
    
    

    相关文章

      网友评论

          本文标题:r_sql

          本文链接:https://www.haomeiwen.com/subject/icmrndtx.html