changeset 61:73b369370665

Finish section 2
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Tue, 17 May 2016 21:48:52 -0400
parents cd940f75aab6
children 079a87062a98
files talk/talk.tex
diffstat 1 files changed, 78 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/talk/talk.tex
+++ b/talk/talk.tex
@@ -2,8 +2,12 @@
 \documentclass[blue]{beamer}
 \usepackage{bm, fourier, anyfontsize, xcolor}
 \newcommand{\MC}{\operatorname{MC}}
+\newcommand{\signum}{\operatorname{signum}}
 \newcommand{\IQR}{\operatorname{IQR}}
 
+\theoremstyle{definition}
+\newtheorem*{defn}{Definition}
+
 
 \mode<presentation>
 {
@@ -84,7 +88,7 @@
   \begin{center}
     \pgfimage[height=2.5in]{img/normal-boxhistplot}
   \end{center}
-  
+
   The boxplot identifies $10$ outliers out of $1000$ points ($1\%$)
 \end{frame}
 
@@ -103,12 +107,12 @@
   \begin{overlayarea}{\textwidth}{8cm}
     \only<1>{
       \pgfimage[height=3in]{img/geometric-boxhistplot}
-      
+
       $433$ outliers out of $10 000$ points ($4.3\%$)
     }
     \only<2>{
       \pgfimage[height=3in]{img/boys-and-girls}
-      
+
       $578$ and $644$ outliers for actors and actresses respectively
       ($1.2\%$ and $3\%$)
     }
@@ -155,8 +159,8 @@
   For the whiskers, Hubert and Vandervieren recommend:
   \[
   \begin{cases}
-    [Q_1 - 1.5 \IQR e^{-3 \MC},  Q_3 + 1.5 \IQR e^{4 \MC}] &\text{if} \MC > 0 \\
-    [Q_1 - 1.5 \IQR e^{-4 \MC},  Q_3 + 1.5 \IQR e^{3 \MC}] &\text{if} \MC < 0
+    [Q_1 - 1.5 \IQR e^{-3 \MC},  Q_3 + 1.5 \IQR e^{4 \MC}] &\text{if } \MC > 0 \\
+    [Q_1 - 1.5 \IQR e^{-4 \MC},  Q_3 + 1.5 \IQR e^{3 \MC}] &\text{if } \MC < 0
   \end{cases}
   \]
   \pause
@@ -171,12 +175,12 @@
   \begin{overlayarea}{\textwidth}{8cm}
     \only<1>{
       \pgfimage[height=3in]{img/geometric-boxhistplot}
-      
+
       $433$ outliers out of $10 000$ points ($4.3\%$)
     }
     \only<2>{
       \pgfimage[height=3in]{img/geometric-boxhistplot-adjusted}
-      
+
       \textcolor{red}{$25$ outliers} out of $10 000$ points
       (\textcolor{red}{$0.25\%$}) (\textcolor{blue}{$\MC = 0.25$})
     }
@@ -187,12 +191,12 @@
   \begin{overlayarea}{\textwidth}{8cm}
     \only<1>{
       \pgfimage[height=3in]{img/normal-boxhistplot}
-      
+
       $10$ outliers out of $1 000$ points ($1\%$)
     }
     \only<2>{
       \pgfimage[height=3in]{img/normal-boxhistplot-adjusted}
-      
+
       \textcolor{red}{$10$ outliers} out of $1 000$ points
       (\textcolor{red}{$1\%$}) (\textcolor{blue}{$\MC = 0.0006$})
     }
@@ -222,7 +226,69 @@
 \section{The Medcouple}
 
 \begin{frame}
-  
+  \emph{G. Brys; M. Hubert; A. Struyf (November 2004). "A Robust
+    Measure of Skewness". Journal of Computational and Graphical
+    Statistics 13 (4): 996-1017. doi:10.1198/106186004X12632.}
+\end{frame}
+
+\begin{frame}{Motivation}
+  Consider the quartile skewness:
+  \[
+  B_1 = \frac{(Q_3 - Q_2) - (Q_2 - Q_1)}{Q_3 - Q_1}
+  \]
+  $Q_2 = \text{median}$
+\end{frame}
+
+\begin{frame}{Definition}
+  Idea: compute this kernel over all couples split along the median:
+  \[
+  h(x_i, x_j) =
+  \begin{cases}
+    \frac{(x_i - m) - (m - x_j)}{x_i - x_j} \\
+    \signum(p - 1 - i - j) & \text{if } x_i = m = x_j
+  \end{cases}
+  \]
+  \pause
+  where
+  \begin{itemize}
+    \item $m = \text{median}$
+    \item $x_i \geq m \geq x_j$
+    \item $p = |\{x_i \geq m\}|$
+  \end{itemize}
+  \pause
+  \begin{defn}
+    The \emph{medcouple} is the median of the kernel of all couples
+    above.
+  \end{defn}
+\end{frame}
+
+\begin{frame}{Properties}
+  It is easy to see that medcouple is
+  \pause
+  \begin{itemize}
+    \item location-invariant
+    \pause
+    \item scale-invariant
+    \pause
+    \item between $-1$ and $1$
+    \pause
+    \item a measure of skewness
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{Properties}
+  The medcouple is a \emph{robust} measure of skewness.
+  \pause
+  \begin{defn}
+    A statistic is \emph{robust} if it does not depend on the values
+    of extreme values (outliers).
+  \end{defn}
+  \pause
+  \begin{itemize}
+    \item The median has maximum robustness. Its breakdown point is $50\%$.
+    \pause
+    \item The medcouple's breakdown point is $25\%$.
+  \end{itemize}
 \end{frame}
 
 
@@ -263,8 +329,8 @@
       \end{center}
       Evaluate the kernel for all couples.
       \[
-      \frac{ (x_i^+ - x_m) - (x_m - x_j^-)}{x_i - x_j},
-      \quad x_i^+ \in X^+, \quad x_j^- \in X^-
+      \frac{ (x_i - x_m) - (x_m - x_j)}{x_i - x_j},
+      \quad x_i \in X^+, \quad x_j \in X^-
       \]}
     \only<3>{%
       \begin{center}