view talk/code/plots.py @ 53:874eb0823660

plots: ensure that adjusted boxplots use the same scale
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Mon, 16 May 2016 22:27:30 -0400
parents 4a669a51f49c
children bb6e5cf6aa83
line wrap: on
line source

import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import numpy as np

from medcouple import medcouple_1d

def boxhistplot(data, fig=None, rect=None, xticks=None, colour=None,
                bins=None, title=None, adjusted=False):
    data_stats = cbook.boxplot_stats(data)

    left, bottom, width, height = rect
    histheight = 0.75*height
    boxheight = 0.25*height

    # setup the figure and axes
    histAx = fig.add_axes([left, bottom, width, histheight])
    bpAx = fig.add_axes([left, bottom+histheight, width, boxheight])

    # plot stuff
    bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})
    histAx.hist(data, bins=bins, color=colour)


    xlims = np.array([bpAx.get_xlim(), histAx.get_xlim()])

    # Do an adjusted boxplot
    if adjusted:
        mc = medcouple_1d(data)
        iqr = data_stats[0]['iqr']
        q1 =  data_stats[0]['q1']
        q3 =  data_stats[0]['q3']
        if mc > 0:
            whishi = 1.5*iqr*np.exp(4*mc)
            whislo = 1.5*iqr*np.exp(-3*mc)
        else:
            whishi = 1.5*iqr*np.exp(3*mc)
            whislo = 1.5*iqr*np.exp(-4*mc)
        data_stats[0]['whishi'] = q3 + whishi
        data_stats[0]['whislo'] = q1 - whislo

        # Recompute the outliers
        data_stats[0]['fliers'] = [
            flier for flier in data
            if (flier < data_stats[0]['whislo']  or
                flier > data_stats[0]['whishi'])
        ]
        bpAx.cla()
        bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})

    # confirm that the axes line up
    for ax in [bpAx, histAx]:
        ax.set_xlim([xlims.min(), xlims.max()])

    bpAx.set_xticklabels([])  # clear out overlapping xlabels
    bpAx.set_yticks([])  # don't need that 1 tick mark
    bpAx.set_title(title,fontsize=20)

    histAx.set_xticks(xticks)
    histAx.get_xaxis().tick_bottom()
    bpAx.get_xaxis().tick_top()

    print "%d outliers for %s" % (len(data_stats[0]['fliers']), title)

with open("../../data/men") as f:
    men = [float(x) for x in f.readlines()]

with open("../../data/women") as f:
    women = [float(x) for x in f.readlines()]


xticks = np.arange(5,105,5)
bins = 0.5 + np.arange(0,100)

boyrect = [0.05, 0.55, 0.9, 0.35]
boycolour = [0.3, 0.3, 1]

girlrect = [0.05, 0.1, 0.9, 0.35]
girlcolour = 'pink'

boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
            bins=bins, title="Ages of actors")
boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
            bins=bins, title="Ages of actresses")
plt.savefig("boys-and-girls.pdf")

fig = plt.figure(figsize=(12,8))
boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
            bins=bins, title="Ages of actors", adjusted=True)
boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
            bins=bins, title="Ages of actresses", adjusted=True)
plt.savefig("boys-and-girls-adjusted.pdf")