changeset 50:4a669a51f49c

also do adjusted boxplots
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 15 May 2016 16:00:34 -0400
parents 1676c032cde5
children 18e7cd6ff057
files talk/code/medcouple.py talk/code/plots.py
diffstat 2 files changed, 44 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
new file mode 120000
--- /dev/null
+++ b/talk/code/medcouple.py
@@ -0,0 +1,1 @@
+../../medcouple.py
\ No newline at end of file
--- a/talk/code/plots.py
+++ b/talk/code/plots.py
@@ -2,17 +2,41 @@
 import matplotlib.cbook as cbook
 import numpy as np
 
+from medcouple import medcouple_1d
+
 def boxhistplot(data, fig=None, rect=None, xticks=None, colour=None,
-                bins=None, title=None):
+                bins=None, title=None, adjusted=False):
     data_stats = cbook.boxplot_stats(data)
 
     left, bottom, width, height = rect
     histheight = 0.75*height
     boxheight = 0.25*height
 
+    # Do an adjusted boxplot
+    if adjusted:
+        mc = medcouple_1d(data)
+        iqr = data_stats[0]['iqr']
+        q1 =  data_stats[0]['q1']
+        q3 =  data_stats[0]['q3']
+        if mc > 0:
+            whishi = 1.5*iqr*np.exp(4*mc)
+            whislo = 1.5*iqr*np.exp(-3*mc)
+        else:
+            whishi = 1.5*iqr*np.exp(3*mc)
+            whislo = 1.5*iqr*np.exp(-4*mc)
+        data_stats[0]['whishi'] = q3 + whishi
+        data_stats[0]['whislo'] = q1 - whislo
+
+        # Recompute the outliers
+        data_stats[0]['fliers'] = [
+            flier for flier in data
+            if (flier < data_stats[0]['whislo']  or
+                flier > data_stats[0]['whishi'])
+        ]
+
     # setup the figure and axes
+    histAx = fig.add_axes([left, bottom, width, histheight])
     bpAx = fig.add_axes([left, bottom+histheight, width, boxheight])
-    histAx = fig.add_axes([left, bottom, width, histheight])
 
     # plot stuff
     bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})
@@ -31,6 +55,8 @@
     histAx.get_xaxis().tick_bottom()
     bpAx.get_xaxis().tick_top()
 
+    print "%d outliers for %s" % (len(data_stats[0]['fliers']), title)
+
 with open("../../data/men") as f:
     men = [float(x) for x in f.readlines()]
 
@@ -41,12 +67,21 @@
 xticks = np.arange(5,105,5)
 bins = 0.5 + np.arange(0,100)
 
-boxhistplot(men, fig=fig, rect=[0.05, 0.55, 0.9, 0.35], xticks=xticks,
-            colour=[0.3, 0.3, 1],
-            bins=bins, title="Ages of actors")
+boyrect = [0.05, 0.55, 0.9, 0.35]
+boycolour = [0.3, 0.3, 1]
+
+girlrect = [0.05, 0.1, 0.9, 0.35]
+girlcolour = 'pink'
 
-boxhistplot(women, fig=fig, rect=[0.05, 0.1, 0.9, 0.35], xticks=xticks,
-            colour='pink',
+boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
+            bins=bins, title="Ages of actors")
+boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
             bins=bins, title="Ages of actresses")
+plt.savefig("boys-and-girls.pdf")
 
-plt.savefig("boys-and-girls.pdf")
+fig = plt.figure(figsize=(12,8))
+boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
+            bins=bins, title="Ages of actors", adjusted=True)
+boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
+            bins=bins, title="Ages of actresses", adjusted=True)
+plt.savefig("boys-and-girls-adjusted.pdf")