# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 15:52:53 2015
@author: chris sewell
"""
from math import log10, floor
from io import BytesIO
from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.section import WD_ORIENT
from docx.shared import Cm
from pandas import Index, MultiIndex
[docs]class MSDocument(object):
"""a class to output a Microsoft Word Document
NB: docx.api.Document can't be directly inherited as it is a function which
returns various classes dependent on the *docx* parameter
"""
def __init__(self, docx=None):
"""a class to output a Microsoft Word Document
inherited api details for :py:class:`docx.document.Document` can be
found at; https://python-docx.readthedocs.org/en/latest/api/document.html
Parameters
----------
docx : str or file-like object
can be either a path to a .docx file (a string) or a file-like object.
If docx is missing or None, the built-in default document “template”
is loaded.
"""
self._docx = Document(docx=docx)
[docs] def __getattr__(self, name):
""" required to get docx.Document methods """
return getattr(self._docx, name)
[docs] def __dir__(self):
""" required to have docx.Document methods in ipython tab completion"""
dirlist = self.__class__.__dict__.keys() + self._docx.__class__.__dict__.keys()
return sorted(dirlist)
[docs] def add_markdown(self, text='', style='Body Text'):
"""adds a paragraph to the document, allowing for
markdown style **bold** and *italic* text
"""
p = self._docx.add_paragraph(style=style)
if not text:
return p
bold_split = text.split('**')
for i, text in enumerate(bold_split):
if i % 2 == 0:
italic_split = text.split('*')
for j, other in enumerate(italic_split):
if j % 2 == 0:
p.add_run(other)
else:
p.add_run(other).italic = True
else:
p.add_run(text).bold = True
return p
[docs] def add_list(self, text_list=[], numbered=False):
"""adds a list """
if numbered:
style='List Number'
else:
style='List Bullet'
return [self._docx.add_paragraph(tx, style=style) for tx in text_list]
[docs] def add_mpl(self, fig, dpi=None, width=None, height=None, pad_inches=0.2):
"""add matplotlib figure to the document, width/height in cm
Amount of padding around the figure """
stream = BytesIO()
fig.savefig(stream, format='png', dpi=dpi,
bbox_inches='tight', pad_inches=pad_inches,
transparent=True)
width = Cm(width) if width else None
height = Cm(height) if height else None
pic = self._docx.add_picture(stream, width=width, height=height)
self._docx.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER
return pic
[docs] def add_dataframe(self, df, incl_indx=True, autofit=True, sig_figures=5,
style='Medium List 1 Accent 1'):
"""add dataframe as a table
"""
rows, cols = df.shape
if type(df.columns) == Index:
hrows = 1
elif type(df.columns) == MultiIndex:
hrows = len(df.columns.levels)
else:
raise TypeError('df does not have standard or multi column index')
if incl_indx:
if type(df.index) == Index:
icols = 1
elif type(df.index) == MultiIndex:
icols = len(df.index.levels)
else:
raise TypeError('df does not have standard or multi row index')
else:
icols = 0
table = self._docx.add_table(rows=rows+hrows, cols=cols+icols,
style=style)
table.alignment = WD_TABLE_ALIGNMENT.CENTER
table.autofit = autofit
if type(df.columns) == MultiIndex:
merge_cells=[None, None]
prev_val = None
for col, vals in enumerate(df.columns.tolist()):
for hrw, val in enumerate(vals):
cell = table.rows[hrw].cells[col+icols]
if not hrw==0:
p=cell.add_paragraph(str(val))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
elif not merge_cells[0]:
merge_cells[0] = cell
prev_val = val
elif not merge_cells[1] and not val==prev_val:
p=merge_cells[0].add_paragraph(str(prev_val))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
merge_cells[0] = cell
prev_val = val
elif not merge_cells[1]:
merge_cells[1] = cell
prev_val = val
elif val == prev_val:
merge_cells[1] = cell
prev_val = val
else:
m = merge_cells[0].merge(merge_cells[1])
m.add_paragraph(str(prev_val))
m.alignment = WD_ALIGN_PARAGRAPH.CENTER
merge_cells=[cell, None]
prev_val = val
if merge_cells[0] != None and merge_cells[1] != None:
m = merge_cells[0].merge(merge_cells[1])
m.add_paragraph(str(prev_val))
m.alignment = WD_ALIGN_PARAGRAPH.CENTER
else:
p=merge_cells[0].add_paragraph(str(prev_val))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
else:
for col, head in enumerate(df.keys()):
p=table.rows[hrows-1].cells[col+icols].add_paragraph(str(head))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
if incl_indx and type(df.index) == MultiIndex:
for col, name in enumerate(df.index.names):
p=table.rows[hrows-1].cells[col].add_paragraph(str(name))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
def rnd(val):
try:
if val >= 0.:
return round(val, -int(floor(log10(val))) + (sig_figures - 1))
else:
return -round(-val, -int(floor(log10(-val))) + (sig_figures - 1))
except Exception:
return val
for row, id_series in enumerate(df.iterrows()):
if incl_indx and type(df.index) == Index:
p=table.rows[row+hrows].cells[hrows-1].add_paragraph(str(df.index[row]))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
if incl_indx and type(df.index) == MultiIndex:
for col, val in enumerate(df.index.tolist()[row]):
p=table.rows[row+hrows].cells[col].add_paragraph(str(val))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
for col, item in enumerate(id_series[1].iteritems()):
p=table.rows[row+hrows].cells[col+icols].add_paragraph(str(rnd(item[1])))
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
return table