# HG changeset patch # User Jordi GutiƩrrez Hermoso # Date 1463536132 14400 # Node ID 73b369370665e53f47b19b511b109656b1fd2a16 # Parent cd940f75aab69cf281a2695423f162e2207f2ea5 Finish section 2 diff --git a/talk/talk.tex b/talk/talk.tex --- a/talk/talk.tex +++ b/talk/talk.tex @@ -2,8 +2,12 @@ \documentclass[blue]{beamer} \usepackage{bm, fourier, anyfontsize, xcolor} \newcommand{\MC}{\operatorname{MC}} +\newcommand{\signum}{\operatorname{signum}} \newcommand{\IQR}{\operatorname{IQR}} +\theoremstyle{definition} +\newtheorem*{defn}{Definition} + \mode { @@ -84,7 +88,7 @@ \begin{center} \pgfimage[height=2.5in]{img/normal-boxhistplot} \end{center} - + The boxplot identifies $10$ outliers out of $1000$ points ($1\%$) \end{frame} @@ -103,12 +107,12 @@ \begin{overlayarea}{\textwidth}{8cm} \only<1>{ \pgfimage[height=3in]{img/geometric-boxhistplot} - + $433$ outliers out of $10 000$ points ($4.3\%$) } \only<2>{ \pgfimage[height=3in]{img/boys-and-girls} - + $578$ and $644$ outliers for actors and actresses respectively ($1.2\%$ and $3\%$) } @@ -155,8 +159,8 @@ For the whiskers, Hubert and Vandervieren recommend: \[ \begin{cases} - [Q_1 - 1.5 \IQR e^{-3 \MC}, Q_3 + 1.5 \IQR e^{4 \MC}] &\text{if} \MC > 0 \\ - [Q_1 - 1.5 \IQR e^{-4 \MC}, Q_3 + 1.5 \IQR e^{3 \MC}] &\text{if} \MC < 0 + [Q_1 - 1.5 \IQR e^{-3 \MC}, Q_3 + 1.5 \IQR e^{4 \MC}] &\text{if } \MC > 0 \\ + [Q_1 - 1.5 \IQR e^{-4 \MC}, Q_3 + 1.5 \IQR e^{3 \MC}] &\text{if } \MC < 0 \end{cases} \] \pause @@ -171,12 +175,12 @@ \begin{overlayarea}{\textwidth}{8cm} \only<1>{ \pgfimage[height=3in]{img/geometric-boxhistplot} - + $433$ outliers out of $10 000$ points ($4.3\%$) } \only<2>{ \pgfimage[height=3in]{img/geometric-boxhistplot-adjusted} - + \textcolor{red}{$25$ outliers} out of $10 000$ points (\textcolor{red}{$0.25\%$}) (\textcolor{blue}{$\MC = 0.25$}) } @@ -187,12 +191,12 @@ \begin{overlayarea}{\textwidth}{8cm} \only<1>{ \pgfimage[height=3in]{img/normal-boxhistplot} - + $10$ outliers out of $1 000$ points ($1\%$) } \only<2>{ \pgfimage[height=3in]{img/normal-boxhistplot-adjusted} - + \textcolor{red}{$10$ outliers} out of $1 000$ points (\textcolor{red}{$1\%$}) (\textcolor{blue}{$\MC = 0.0006$}) } @@ -222,7 +226,69 @@ \section{The Medcouple} \begin{frame} - + \emph{G. Brys; M. Hubert; A. Struyf (November 2004). "A Robust + Measure of Skewness". Journal of Computational and Graphical + Statistics 13 (4): 996-1017. doi:10.1198/106186004X12632.} +\end{frame} + +\begin{frame}{Motivation} + Consider the quartile skewness: + \[ + B_1 = \frac{(Q_3 - Q_2) - (Q_2 - Q_1)}{Q_3 - Q_1} + \] + $Q_2 = \text{median}$ +\end{frame} + +\begin{frame}{Definition} + Idea: compute this kernel over all couples split along the median: + \[ + h(x_i, x_j) = + \begin{cases} + \frac{(x_i - m) - (m - x_j)}{x_i - x_j} \\ + \signum(p - 1 - i - j) & \text{if } x_i = m = x_j + \end{cases} + \] + \pause + where + \begin{itemize} + \item $m = \text{median}$ + \item $x_i \geq m \geq x_j$ + \item $p = |\{x_i \geq m\}|$ + \end{itemize} + \pause + \begin{defn} + The \emph{medcouple} is the median of the kernel of all couples + above. + \end{defn} +\end{frame} + +\begin{frame}{Properties} + It is easy to see that medcouple is + \pause + \begin{itemize} + \item location-invariant + \pause + \item scale-invariant + \pause + \item between $-1$ and $1$ + \pause + \item a measure of skewness + \end{itemize} +\end{frame} + +\begin{frame}{Properties} + The medcouple is a \emph{robust} measure of skewness. + \pause + \begin{defn} + A statistic is \emph{robust} if it does not depend on the values + of extreme values (outliers). + \end{defn} + \pause + \begin{itemize} + \item The median has maximum robustness. Its breakdown point is $50\%$. + \pause + \item The medcouple's breakdown point is $25\%$. + \end{itemize} \end{frame} @@ -263,8 +329,8 @@ \end{center} Evaluate the kernel for all couples. \[ - \frac{ (x_i^+ - x_m) - (x_m - x_j^-)}{x_i - x_j}, - \quad x_i^+ \in X^+, \quad x_j^- \in X^- + \frac{ (x_i - x_m) - (x_m - x_j)}{x_i - x_j}, + \quad x_i \in X^+, \quad x_j \in X^- \]} \only<3>{% \begin{center}