annotate talk/code/plots.py @ 50:4a669a51f49c

also do adjusted boxplots
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 15 May 2016 16:00:34 -0400
parents 1baa6b0a7199
children 874eb0823660
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
1 import matplotlib.pyplot as plt
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
2 import matplotlib.cbook as cbook
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
3 import numpy as np
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
4
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
5 from medcouple import medcouple_1d
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
6
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
7 def boxhistplot(data, fig=None, rect=None, xticks=None, colour=None,
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
8 bins=None, title=None, adjusted=False):
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
9 data_stats = cbook.boxplot_stats(data)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
10
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
11 left, bottom, width, height = rect
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
12 histheight = 0.75*height
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
13 boxheight = 0.25*height
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
14
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
15 # Do an adjusted boxplot
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
16 if adjusted:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
17 mc = medcouple_1d(data)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
18 iqr = data_stats[0]['iqr']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
19 q1 = data_stats[0]['q1']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
20 q3 = data_stats[0]['q3']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
21 if mc > 0:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
22 whishi = 1.5*iqr*np.exp(4*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
23 whislo = 1.5*iqr*np.exp(-3*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
24 else:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
25 whishi = 1.5*iqr*np.exp(3*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
26 whislo = 1.5*iqr*np.exp(-4*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
27 data_stats[0]['whishi'] = q3 + whishi
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
28 data_stats[0]['whislo'] = q1 - whislo
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
29
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
30 # Recompute the outliers
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
31 data_stats[0]['fliers'] = [
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
32 flier for flier in data
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
33 if (flier < data_stats[0]['whislo'] or
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
34 flier > data_stats[0]['whishi'])
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
35 ]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
36
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
37 # setup the figure and axes
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
38 histAx = fig.add_axes([left, bottom, width, histheight])
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
39 bpAx = fig.add_axes([left, bottom+histheight, width, boxheight])
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
40
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
41 # plot stuff
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
42 bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
43 histAx.hist(data, bins=bins, color=colour)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
44
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
45 # confirm that the axes line up
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
46 xlims = np.array([bpAx.get_xlim(), histAx.get_xlim()])
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
47 for ax in [bpAx, histAx]:
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
48 ax.set_xlim([xlims.min(), xlims.max()])
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
49
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
50 bpAx.set_xticklabels([]) # clear out overlapping xlabels
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
51 bpAx.set_yticks([]) # don't need that 1 tick mark
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
52 bpAx.set_title(title,fontsize=20)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
53
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
54 histAx.set_xticks(xticks)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
55 histAx.get_xaxis().tick_bottom()
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
56 bpAx.get_xaxis().tick_top()
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
57
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
58 print "%d outliers for %s" % (len(data_stats[0]['fliers']), title)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
59
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
60 with open("../../data/men") as f:
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
61 men = [float(x) for x in f.readlines()]
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
62
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
63 with open("../../data/women") as f:
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
64 women = [float(x) for x in f.readlines()]
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
65
46
7887a0b32539 put the two boxplots above each other
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 45
diff changeset
66 fig = plt.figure(figsize=(12,8))
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
67 xticks = np.arange(5,105,5)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
68 bins = 0.5 + np.arange(0,100)
46
7887a0b32539 put the two boxplots above each other
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 45
diff changeset
69
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
70 boyrect = [0.05, 0.55, 0.9, 0.35]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
71 boycolour = [0.3, 0.3, 1]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
72
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
73 girlrect = [0.05, 0.1, 0.9, 0.35]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
74 girlcolour = 'pink'
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
75
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
76 boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
77 bins=bins, title="Ages of actors")
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
78 boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
79 bins=bins, title="Ages of actresses")
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
80 plt.savefig("boys-and-girls.pdf")
46
7887a0b32539 put the two boxplots above each other
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 45
diff changeset
81
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
82 fig = plt.figure(figsize=(12,8))
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
83 boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
84 bins=bins, title="Ages of actors", adjusted=True)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
85 boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
86 bins=bins, title="Ages of actresses", adjusted=True)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
87 plt.savefig("boys-and-girls-adjusted.pdf")