annotate talk/code/plots.py @ 54:bb6e5cf6aa83

plots: add geometric data
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Mon, 16 May 2016 22:29:33 -0400
parents 874eb0823660
children e2d6c57dc1f4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
1 import matplotlib.pyplot as plt
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
2 import matplotlib.cbook as cbook
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
3 import numpy as np
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
4
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
5 from medcouple import medcouple_1d
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
6
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
7 def boxhistplot(data, fig=None, rect=None, xticks=None, colour=None,
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
8 bins=None, title=None, adjusted=False):
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
9 data_stats = cbook.boxplot_stats(data)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
10
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
11 left, bottom, width, height = rect
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
12 histheight = 0.75*height
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
13 boxheight = 0.25*height
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
14
53
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
15 # setup the figure and axes
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
16 histAx = fig.add_axes([left, bottom, width, histheight])
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
17 bpAx = fig.add_axes([left, bottom+histheight, width, boxheight])
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
18
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
19 # plot stuff
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
20 bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
21 histAx.hist(data, bins=bins, color=colour)
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
22
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
23
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
24 xlims = np.array([bpAx.get_xlim(), histAx.get_xlim()])
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
25
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
26 # Do an adjusted boxplot
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
27 if adjusted:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
28 mc = medcouple_1d(data)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
29 iqr = data_stats[0]['iqr']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
30 q1 = data_stats[0]['q1']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
31 q3 = data_stats[0]['q3']
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
32 if mc > 0:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
33 whishi = 1.5*iqr*np.exp(4*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
34 whislo = 1.5*iqr*np.exp(-3*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
35 else:
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
36 whishi = 1.5*iqr*np.exp(3*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
37 whislo = 1.5*iqr*np.exp(-4*mc)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
38 data_stats[0]['whishi'] = q3 + whishi
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
39 data_stats[0]['whislo'] = q1 - whislo
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
40
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
41 # Recompute the outliers
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
42 data_stats[0]['fliers'] = [
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
43 flier for flier in data
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
44 if (flier < data_stats[0]['whislo'] or
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
45 flier > data_stats[0]['whishi'])
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
46 ]
53
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
47 bpAx.cla()
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
48 bpAx.bxp(data_stats, vert=False, flierprops={"marker": 'x'})
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
49
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
50 # confirm that the axes line up
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
51 for ax in [bpAx, histAx]:
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
52 ax.set_xlim([xlims.min(), xlims.max()])
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
53
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
54 bpAx.set_xticklabels([]) # clear out overlapping xlabels
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
55 bpAx.set_yticks([]) # don't need that 1 tick mark
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
56 bpAx.set_title(title,fontsize=20)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
57
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
58 histAx.set_xticks(xticks)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
59 histAx.get_xaxis().tick_bottom()
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
60 bpAx.get_xaxis().tick_top()
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
61
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
62 print "%d outliers for %s" % (len(data_stats[0]['fliers']), title)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
63
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
64 with open("../../data/men") as f:
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
65 men = [float(x) for x in f.readlines()]
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
66
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
67 with open("../../data/women") as f:
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
68 women = [float(x) for x in f.readlines()]
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
69
53
874eb0823660 plots: ensure that adjusted boxplots use the same scale
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 50
diff changeset
70
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
71 xticks = np.arange(5,105,5)
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
72 bins = 0.5 + np.arange(0,100)
46
7887a0b32539 put the two boxplots above each other
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 45
diff changeset
73
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
74 boyrect = [0.05, 0.55, 0.9, 0.35]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
75 boycolour = [0.3, 0.3, 1]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
76
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
77 girlrect = [0.05, 0.1, 0.9, 0.35]
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
78 girlcolour = 'pink'
45
6daaf6a8e431 Add code to plot boys and girls
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
79
54
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
80 fig = plt.figure(figsize=(12,8))
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
81 boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
82 bins=bins, title="Ages of actors")
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
83 boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
48
1baa6b0a7199 move duplicate code into function
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 46
diff changeset
84 bins=bins, title="Ages of actresses")
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
85 plt.savefig("boys-and-girls.pdf")
46
7887a0b32539 put the two boxplots above each other
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 45
diff changeset
86
50
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
87 fig = plt.figure(figsize=(12,8))
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
88 boxhistplot(men, fig=fig, rect=boyrect, xticks=xticks, colour=boycolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
89 bins=bins, title="Ages of actors", adjusted=True)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
90 boxhistplot(women, fig=fig, rect=girlrect, xticks=xticks, colour=girlcolour,
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
91 bins=bins, title="Ages of actresses", adjusted=True)
4a669a51f49c also do adjusted boxplots
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 48
diff changeset
92 plt.savefig("boys-and-girls-adjusted.pdf")
54
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
93
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
94 np.random.seed(0)
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
95 geo = np.random.geometric(0.20, size=10000)
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
96
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
97 plt.close('all')
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
98 plt.plot(geo, [1]*geo.size, 'x')
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
99 plt.yticks([])
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
100 plt.savefig("geometric-points.pdf")
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
101
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
102 plt.close('all')
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
103 plt.boxplot(geo, vert=False, flierprops=dict(marker='x'))
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
104 plt.savefig("geometric-boxplot.pdf")
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
105
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
106 fig = plt.figure(figsize=(12,8))
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
107 boxhistplot(geo, fig=fig, rect=[0.05, 0.1, 0.9, 0.7], colour='yellow',
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
108 bins=20, title="MS lesion counts (simulated)",
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
109 xticks=[0, 10, 20, 30, 40, 50])
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
110 plt.savefig("geometric-boxhistplot.pdf")
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
111
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
112 fig = plt.figure(figsize=(12,8))
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
113 boxhistplot(geo, fig=fig, rect=[0.05, 0.1, 0.9, 0.7], colour='yellow',
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
114 bins=20, title="MS lesion counts (simulated)",
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
115 xticks=[0, 10, 20, 30, 40, 50], adjusted=True)
bb6e5cf6aa83 plots: add geometric data
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 53
diff changeset
116 plt.savefig("geometric-boxhistplot-adjusted.pdf")