How to proceed in this proof?

156 Views Asked by At

Let \begin{eqnarray} \eta(x) = \mathbf{P}(Y=1 \mid X= x) \end{eqnarray} and $Y\in \{0,1\}$. Assume $\tilde{\eta}_1(x)$ and $\tilde{\eta}_0(x)$ are some approximations of $\eta(x)$ and $1-\eta(x)$ respectively and sum of them is not one. Also, define $$ g(x) = \begin{cases} 0; & \text{if } \tilde{\eta}_1(x) \leq \tilde{\eta}_0(x)\\ 1; & \text{otherewise} \end{cases}, \quad g^*(x) = \begin{cases} 0; & \text{if } \eta(x) \leq 1-\eta(x)\\ 1 & \text{otherewise} \end{cases}. $$ How to prove the following theorem: $$ \mathbf{P}(Y\ne g(x) )-\mathbf{P}(Y\ne g^*(x) ) \leq \int_{\mathbb R^d} \vert 1-\eta(x)-\tilde{\eta}_0(x)\vert \mu(dx) + \int_{\mathbb R^d} \vert \eta(x)-\tilde{\eta}_1(x)\vert \mu(dx). $$ This is Theorem 2.3 from the book by Devroye: A probabilistic Theory of Pattern Recognition.

Edit:

We know $$ \mathbf{P}(Y\ne g(x) )-\mathbf{P}(Y\ne g^*(x) )= \int_{\mathbb R^d} \Bigl\{\mathbf{P}(Y\ne g(X)\vert X= x )-\mathbf{P}(Y\ne g^*(X)\vert X= x )\Bigr\}\mu(dx), $$ and \begin{align*} \mathbf{P}(Y\ne g(X) \mid X= x ) &= 1- \mathbf{P}(Y = g(x) \mid X= x)\\ & = 1- (\mathbf{P}(Y = 0,\ g(x)=0 \mid X= x) + \mathbf{P}(Y = 1,\ g(x)=1 \mid X= x))\\ & = 1- (I_{[g(x)=0]}\mathbf{P}(Y = 0 \mid X= x) + I_{[g(x)=1]}\mathbf{P}(Y = 1\mid X= x)). \end{align*} Therefore, \begin{align*} &\mathrel{\phantom{=}}{} \mathbf{P}(Y\ne g(X) \mid X= x )-\mathbf{P}(Y\ne g^*(X) \mid X= x )\\ & = (I_{[g^*(x)=0]}\mathbf{P}(Y = 0 \mid X= x) + I_{[g^*(x)=1]}\mathbf{P}(Y = 1 \mid X= x))\\ &\quad - (I_{[g(x)=0]}\mathbf{P}(Y = 0 \mid X= x) + I_{[g(x)=1]}\mathbf{P}(Y = 1 \mid X= x))\\ & = (I_{[g^*(x)=0]}(1-\eta(x)) + I_{[g^*(x)=1]}\eta(x))\\ &\quad - (I_{[g(x)=0]}\mathbf{P}(Y = 0 \mid X= x) + I_{[g(x)=1]}\mathbf{P}(Y = 1 \mid X= x)). \end{align*} But how to proceed?

1

There are 1 best solutions below

1
On BEST ANSWER

$\def\peq{\mathrel{\phantom{=}}{}}\def\d{\mathrm{d}}\def\Ω{{\mit Ω}}$Define $η_1(x) = η(x)$, $η_0(x) = 1 - η(x)$. It is given that$$ η_1(X) = E(I_{\{Y = 1\}} \mid X),\ η_0(X) = E(I_{\{Y = 0\}} \mid X),\\ g(X) = I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X)\}},\ g^*(X) = I_{\{η_1(X) > η_0(X)\}}, $$ thus$$ I_{\{g(X) = 1\}} = I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X)\}},\ I_{\{g(X) = 0\}} = I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X)\}},\\ I_{\{g^*(X) = 1\}} = I_{\{η_1(X) > η_0(X)\}},\ I_{\{g^*(X) = 0\}} = I_{\{η_1(X) \leqslant η_0(X)\}}, $$ and\begin{align*} P(Y \neq g(X)) &= E(I_{\{Y = 1,\ g(X) = 0\}}) + E(I_{\{Y = 0,\ g(X) = 1\}})\\ &= E(I_{\{Y = 1\}} I_{\{g(X) = 0\}}) + E(I_{\{Y = 0\}} I_{\{g(X) = 1\}})\\ &= E(E(I_{\{Y = 1\}} I_{\{g(X) = 0\}} \mid X)) + E(E(I_{\{Y = 0\}} I_{\{g(X) = 1\}} \mid X))\\ &= E(I_{\{g(X) = 0\}} E(I_{\{Y = 1\}} \mid X)) + E(I_{\{g(X) = 1\}} E(I_{\{Y = 0\}} \mid X))\\ &= E(I_{\{g(X) = 0\}} E(I_{\{Y = 1\}} \mid X)) + E(I_{\{g(X) = 1\}} E(I_{\{Y = 0\}} \mid X))\\ &= E(I_{\{g(X) = 0\}} η_1(X)) + E(I_{\{g(X) = 1\}} η_0(X))\\ &= E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X)\}} η_1(X)) + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X)\}} η_0(X)), \end{align*} analogously,$$ P(Y \neq g^*(X)) = E(I_{\{η_1(X) \leqslant η_0(X)\}} η_1(X)) + E(I_{\{η_1(X) > η_0(X)\}} η_0(X)), $$ which implies\begin{align*} &\peq P(Y \neq g(X)) - P(Y \neq g^*(X))\\ &= (E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X)\}} η_1(X)) + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X)\}} η_0(X)))\\ &\peq - (E(I_{\{η_1(X) \leqslant η_0(X)\}} η_1(X)) + E(I_{\{η_1(X) > η_0(X)\}} η_0(X)))\\ &= \bigl(\color{red}{E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_1(X))} + E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_1(X))\\ &\peq + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_0(X)) + \color{blue}{E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_0(X))}\bigr)\\ &\peq - \bigl(\color{red}{E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_1(X))} + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_1(X))\\ &\peq + E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_0(X)) + \color{blue}{E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_0(X))}\bigr)\\ &= (E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_1(X)) + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_0(X)))\\ &\peq - (E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} η_1(X)) + E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} η_0(X)))\\ &= E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} (η_1(X) - η_0(X)))\\ &\peq + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} (η_0(X) - η_1(X))). \end{align*}

Note that\begin{align*} &\peq \int\limits_{\mathbb{R}^d} |η_0(x) - \widetilde{η}_0(x)| \,μ(\d x) + \int\limits_{\mathbb{R}^d} |η_1(x) - \widetilde{η}_1(x)| \,μ(\d x)\\ &= \int\limits_\Ω |η_0(X) - \widetilde{η}_0(X)| \,\d P + \int\limits_\Ω |η_1(X) - \widetilde{η}_1(X)| \,\d P\\ &= E(|η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)|)\\ &\geqslant E(I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} (|η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)|))\\ &\peq + E(I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} (|η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)|)). \end{align*} On $\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}$ there is\begin{align*} &\peq |η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)| \geqslant (\widetilde{η}_0(X) - η_0(X)) + (η_1(X) - \widetilde{η}_1(X))\\ &= (η_1(X) - η_0(X)) + (\widetilde{η}_0(X) - \widetilde{η}_1(X)) \geqslant η_1(X) - η_0(X), \end{align*} and on $\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}$ there is\begin{align*} &\peq |η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)| \geqslant (η_0(X) - \widetilde{η}_0(X)) + (\widetilde{η}_1(X) - η_1(X))\\ &= (η_0(X) - η_1(X)) + (\widetilde{η}_1(X) - \widetilde{η}_0(X)) \geqslant η_0(X) - η_1(X). \end{align*} Thus,\begin{align*} &\peq I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} (|η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)|)\\ &\geqslant I_{\{\widetilde{η}_1(X) \leqslant \widetilde{η}_0(X),\ η_1(X) > η_0(X)\}} (η_1(X) - η_0(X)),\\ &\peq I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} (|η_0(X) - \widetilde{η}_0(X)| + |η_1(X) - \widetilde{η}_1(X)|)\\ &\geqslant I_{\{\widetilde{η}_1(X) > \widetilde{η}_0(X),\ η_1(X) \leqslant η_0(X)\}} (η_0(X) - η_1(X)), \end{align*} and\begin{align*} &\peq P(Y \neq g(X)) - P(Y \neq g^*(X))\\ &\leqslant \int\limits_{\mathbb{R}^d} |η_0(x) - \widetilde{η}_0(x)| \,μ(\d x) + \int\limits_{\mathbb{R}^d} |η_1(x) - \widetilde{η}_1(x)| \,μ(\d x). \end{align*}