Source code for pygauss.utils

# -*- coding: utf-8 -*-
"""
Created on Thu Apr 30 01:08:30 2015

@author: chris
"""
from numpy import bmat, hstack, dot, ones, zeros, sum, asarray
from numpy.linalg import solve

from pandas.core.index import MultiIndex

[docs]def is_wellcentered(pts, tol=1e-8): """ Determines whether the M points in N dimensions define a well-centered simplex. """ bary_coords = circumcenter_barycoords(pts) return min(bary_coords) > tol
[docs]def circumcenter_barycoords(pts): """ Computes the barycentric coordinates of the circumcenter M, N-dimensional points (1 <= M <= N + 1 and N >= 1). The points are given by the rows of an (M)x(N) dimensional matrix pts. Uses an extension of the method described here: http://www.ics.uci.edu/~eppstein/junkyard/circumcenter.html """ pts = asarray(pts) rows,cols = pts.shape assert(rows <= cols + 1) A = bmat( [[ 2*dot(pts,pts.T), ones((rows,1)) ], [ ones((1,rows)) , zeros((1,1)) ]] ) b = hstack((sum(pts * pts, axis=1),ones((1)))) x = solve(A,b) bary_coords = x[:-1] return bary_coords
[docs]def circumcenter(pts): """ Computes the circumcenter and circumradius of M, N-dimensional points (1 <= M <= N + 1 and N >= 1). The points are given by the rows of an (M)x(N) dimensional maatrix pts. Returns a tuple (center, radius) where center is a column vector of length N and radius is a scalar. In the case of four points in 3D, pts is a 4x3 matrix arranged as: pts = [ x0 y0 z0 ] [ x1 y1 z1 ] [ x2 y2 z2 ] [ x3 y3 z3 ] with return value ([ cx cy cz ], R) Uses an extension of the method described here: http://www.ics.uci.edu/~eppstein/junkyard/circumcenter.html """ pts = asarray(pts) bary_coords = circumcenter_barycoords(pts) center = dot(bary_coords,pts) #radius = norm(pts[0,:] - center) return center
import os # On OSx Conda creates its own environment with a reduced $PATH variable if os.path.exists(os.path.sep + os.path.join('usr', 'local', 'bin')): os.environ["PATH"] += os.pathsep + os.path.sep + os.path.join('usr', 'local', 'bin') if os.path.exists(os.path.sep + os.path.join('usr', 'texbin')): os.environ["PATH"] += os.pathsep + os.path.sep + os.path.join('usr', 'texbin') import subprocess from subprocess import PIPE try: from subprocess import DEVNULL # Python 3 except ImportError: DEVNULL = open(os.devnull, 'r+b', 0) from IPython.display import Image as IPy_Image #from PIL import Image #from io import BytesIO import random import warnings from scipy.interpolate import interp1d _IMGMAGIK = 'convert'
[docs]def set_imagik_exe(name): assert type(name) is str global _IMGMAGIK _IMGMAGIK=name
_PTS_TO_PIX = { 10.5: 14, 10: 13, 11: 15, 12: 16, 13.5: 18, 13: 17, 14.5: 20, 14: 19, 15: 21, 16: 22, 17: 23, 18: 24, 20: 26, 22: 29, 24: 32, 26: 35, 27: 36, 28: 37, 29: 38, 30: 40, 32: 42, 34: 45, 36: 48, 6: 8, 7.5: 10, 7: 9, 8: 11, 9: 12}
[docs]def df_to_img(df, na_rep='-', other_temp=None, font_size=None, width=None, height=None, unconfined=False): """ converts a pandas Dataframe to an IPython image na_rep : str how to represent empty (nan) cells other_temp : str a latex template to use for the table other than the default The function uses pandas to convert the dataframe to a latex table, applies a template, converts to a PDF, converts to an image, and finally return the image to use this function you will need the pdflatex executable from tex distribution, the convert executable from imagemagick, which also requires ghostscript; http://www.ghostscript.com/download/gsdnld.html http://www.imagemagick.org/script/binary-releases.php NB: on Windows some issues were found with convert being an already existing application. To overcome this change its filename and use the im_name variable. """ if font_size: #estimate height of table to give certain font size row_height = interp1d(_PTS_TO_PIX.keys(), _PTS_TO_PIX.values())(font_size) if hasattr(df.columns, 'levels'): header_rows = len(df.columns.levels) else: header_rows = 1 tbl_height = int((header_rows + df.shape[0])*row_height*1.15) # pandas 0.16 has a bug when using heirarchical row indexes use_indx = True if type(df.index) == MultiIndex: df = df.reset_index() use_indx = False latex_str = df.to_latex(index=use_indx, escape=False, na_rep=na_rep) rand = random.randint(1, 100000) filename = 'df_to_pdf_out{0}.tex'.format(rand) pdffile = 'df_to_pdf_out{0}.pdf'.format(rand) logname = 'df_to_pdf_out{0}.log'.format(rand) auxname = 'df_to_pdf_out{0}.aux'.format(rand) imgname = 'df_to_pdf_out{0}.png'.format(rand) template = r'''\documentclass{{article}} \usepackage{{graphicx}} \usepackage{{booktabs}} \pagenumbering{{gobble}} \begin{{document}} \begin{{table}}[ht] \centering \resizebox{{\textwidth}}{{!}} {} \end{{table}} \end{{document}} ''' if other_temp: template = other_temp with open(filename, 'wb') as f: f.write(template.format('{'+latex_str+'}')) try: proc = subprocess.Popen(['pdflatex', filename], stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() except: os.unlink(filename) if os.path.exists(logname): os.unlink(logname) if os.path.exists(auxname): os.unlink(auxname) raise RuntimeError('pdflatex not installed') os.unlink(filename) if os.path.exists(logname): os.unlink(logname) if os.path.exists(auxname): os.unlink(auxname) if err: raise RuntimeError('error in pdflatex run:\n {0}'.format(err)) if not os.path.exists(pdffile): raise RuntimeError('pdflatex did not produce a pdf file') inargs = [_IMGMAGIK, '-trim', '-density', '600', pdffile, '-quality', '100', '-sharpen', '0x1.0', imgname] if font_size: inargs.insert(2, '-resize') inargs.insert(3, 'x{0}'.format(tbl_height)) try: proc = subprocess.Popen(inargs, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() except: os.unlink(pdffile) raise RuntimeError('imagemagick (convert) not installed') os.unlink(pdffile) if err: if not os.path.exists(imgname): raise RuntimeError('error in imagemagick run:\n {0}'.format(err)) else: warnings.warn('non-fatal error in imagemagick run:\n {0}'.format(err)) if not os.path.exists(imgname): raise RuntimeError('imagemagick did not produce a png file') # img = Image.open(imgname) # hpercent = (tbl_height / float(img.size[1])) # tbl_width = int((float(img.size[0]) * float(hpercent))) # # img = img.resize((tbl_width, tbl_height), Image.ANTIALIAS) # # b = BytesIO() # img.save(b, format='png') # # ipy_img = IPy_Image(data=b.getvalue(), width=width, height=height, # unconfined=unconfined) ipy_img = IPy_Image(filename=imgname, width=width, height=height, unconfined=unconfined) os.unlink(imgname) return ipy_img
import matplotlib.pyplot as plt
[docs]def imgplot_kmean_groups(analysis, category, cat_name, groups, columns, filters={}, output=[], max_cols=2, **kwargs): df = analysis.calc_kmean_groups( category, cat_name, groups, columns=columns, filters=filters) for cat, gf in df.groupby('Category'): fig, caption = analysis.plot_mol_images( max_cols=max_cols, info_columns=output, rows=gf.index.tolist(), **kwargs) fig.suptitle('Category {0}:'.format(cat+1), fontsize=20, x=0, color='blue') fig.subplots_adjust(top=0.8) plt.show() print caption
if __name__ == '__main__': print circumcenter([[1, 0, 0], [0, 1, 0], [0, 0, 1]])