python - Save a pandas dataframe as table in Image or pdf document with nice multi index display -

i'm trying include data frame multi-index in report in pdf. have nice table output.

i have found these 2 solutions:

pandas.df -> html -> pdf

    import pandas pd     ipython.display import html     import pdfkit      # df generation     df = pd.read_csv(path_to_csv, sep =',')     groupeddf = df.groupby('cluster')     res = groupeddf.describe([0.05, 0.5, 0.95])     res.index.rename(['cluster', 'stats'], inplace=true)      res['cluster'] = res.index.get_level_values('cluster')     res['stats'] = res.index.get_level_values('stats')     populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \                                                             0].values).tolist()     res['population'] = [populations[i] in res.index.labels[0].values()]     total_pop = sum(populations)     res['frequency'] =(res['population']/total_pop).round(3)     res.set_index(['cluster', 'population','frequency', 'stats'], inplace=true)        res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |     (res.index.get_level_values('stats') == 'mean') |     (res.index.get_level_values('stats') == '50%') |     (res.index.get_level_values('stats') == '95%')]     res1 = res1.round(2)     # saving df          h = html(res1.to_html())     my_file = open('test.html', 'w')     my_file.write(h.data)     my_file.close()       options = {         'orientation': 'landscape'         }     open('test.html') f:         pdfkit.from_file(f, 'out.pdf', options=options)

but has dependence on pdfkit make difficult us. that's why trying use pandas.df -> tex -> pdf (as mentioned in export pandas dataframe table image )

    import pandas pd     import os     # df generation                   df = pd.read_csv(path_to_csv, sep =',')     groupeddf = df.groupby('cluster')     res = groupeddf.describe([0.05, 0.5, 0.95])     res.index.rename(['cluster', 'stats'], inplace=true)      res['cluster'] = res.index.get_level_values('cluster')     res['stats'] = res.index.get_level_values('stats')     populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \                                                             0].values).tolist()     res['population'] = [populations[i] in res.index.labels[0].values()]     total_pop = sum(populations)     res['frequency'] =(res['population']/total_pop).round(3)     res.set_index(['cluster', 'population','frequency', 'stats'], inplace=true)        res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |     (res.index.get_level_values('stats') == 'mean') |     (res.index.get_level_values('stats') == '50%') |     (res.index.get_level_values('stats') == '95%')]     res1 = res1.round(2)     res1.rename(columns=lambda x: x.replace('_', ' '), inplace=true)          #latex     template = r'''\documentclass[preview]{{standalone}}     \usepackage{{booktabs}}     \begin{{document}}     {}     \end{{document}}     '''      open("outputfile.tex", "wb") afile:          afile.write(template.format(res1.to_latex()))     os.system("pdflatex outputfile.tex")

however, not familiar latex, , error :

  ! latex error: file `standalone.cls' not found.   type x quit or <return> proceed,  or enter new name. (default extension: cls)

any idea error or standard way pandas.df -> pdf ?

the solution work me: pandas >= 0.17 installed pdflatex. copied latex package such booktabs.sty, geography.sty , pdflscape.sty

import pandas pd import os import math  def save_summary_table_as_pdf(path_to_csv, path_to_output_folder):     pwd = os.getcwd()     df = pd.read_csv(path_to_csv, sep =',')      #data preparation     groupeddf = df.groupby('cluster')     res = groupeddf.describe([0.05, 0.5, 0.95])     res.index.rename(['cluster', 'stats'], inplace=true)      res['cluster'] = res.index.get_level_values('cluster')     res['stats'] = res.index.get_level_values('stats')     populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \                                                             0].values).tolist()     res['population'] = [populations[i] in res.index.labels[0].values()]     total_pop = sum(populations)     res['frequency'] =(res['population']/total_pop).round(3)     res.set_index(['cluster', 'population','frequency', 'stats'], inplace=true)     res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |     (res.index.get_level_values('stats') == 'mean') |     (res.index.get_level_values('stats') == '50%') |     (res.index.get_level_values('stats') == '95%')]     res1 = res1.round(2)     res1.rename(columns=lambda x: x.replace('_', ' '), inplace=true)        #latex     nbpages = int(math.ceil(res1.shape[0]*1.0/40))      templatetop = r'''\documentclass[a3paper, 5pt]{article}     \usepackage{booktabs}     \usepackage{pdflscape}     \usepackage[a4paper,bindingoffset=0.2in,%             left=0.25in,right=0.25in,top=1in,bottom=1in,%             footskip=.25in]{geometry}     \begin{document}     \begin{landscape}     \pagenumbering{gobble}     \oddsidemargin = 0pt     \hoffset = -0.25in     \topmargin = 1pt     \headheight = 0pt     \headsep = 0pt     '''     templatebottom = '''     \end{landscape}     \end{document}     '''     output_folder_path_abs = path_to_output_folder     output_tex = os.path.join(output_folder_path_abs,      "clustering_summary_table.tex")      open(output_tex, "wb") afile:          afile.write(templatetop +'\n')         in range(0, nbpages):             afile.write(res1.iloc[(i*40):((i+1)*40), :].to_latex() +'\n' +                                                  """\pagenumbering{gobble}""")         afile.write(templatebottom +'\n')     os.chdir(output_folder_path_abs)     os.system('pdflatex clustering_summary_table.tex')     os.chdir(pwd)     os.remove(output_tex)     os.remove(os.path.join(path_to_output_folder,                                             'clustering_summary_table.aux'))     os.remove(os.path.join(path_to_output_folder,                                             'clustering_summary_table.log'))  if __name__ == "__main__":     print 'begin generate pdf table clustering'     import argparse     parser = argparse.argumentparser()     parser.add_argument("path_to_csv")     parser.add_argument("outputfolder")     args = vars(parser.parse_args())     filedir = os.path.abspath(os.path.dirname(__file__))     output_folder_path_abs = os.path.abspath(args['outputfolder'])     input_folder_path_abs = os.path.abspath(args['path_to_csv'])     # copy user package latex folder     os.system('scp '     +os.path.abspath(os.path.join(filedir, 'userpackagelatex/booktabs.sty'))+     ' ' +output_folder_path_abs)     os.system('scp '     +os.path.abspath(os.path.join(filedir, 'userpackagelatex/geography.sty'))+     ' ' +output_folder_path_abs)     os.system('scp '     +os.path.abspath(os.path.join(filedir, 'userpackagelatex/pdflscape.sty'))+     ' ' +output_folder_path_abs)     save_summary_table_as_pdf(input_folder_path_abs, output_folder_path_abs)     os.remove(os.path.join(output_folder_path_abs, 'booktabs.sty'))     os.remove(os.path.join(output_folder_path_abs, 'geography.sty'))     os.remove(os.path.join(output_folder_path_abs, 'pdflscape.sty'))

Search This Blog

M16

python - Save a pandas dataframe as table in Image or pdf document with nice multi index display -

Comments

Post a Comment

Popular posts from this blog

Failed to execute goal org.apache.maven.plugins:maven-surefire-plugin:2.12:test (default-test) on project.Error occurred in starting fork -

windows - Debug iNetMgr.exe unhandle exception System.Management.Automation.CmdletInvocationException -

configurationsection - activeMq-5.13.3 setup configurations for wildfly 10.0.0 -