機械学習基礎理論独習

誤りがあればご指摘いただけると幸いです。数式が整うまで少し時間かかります。リンクフリーです。

勉強ログです。リンクフリーです
目次へ戻る

PRML演習問題 10.16(標準) www

問題

(10.70)で与えられる変分ガウス混合モデルの下界の、最初の二項についての結果(10.71)(10.72)を確かめよ。

参照

\begin{eqnarray}
p({\bf Z}|{\boldsymbol\pi})=\prod_{n=1}^N\prod_{k=1}^K\pi_k^{z_{nk}}\tag{10.37}
\end{eqnarray}

\begin{eqnarray}
p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})=\prod_{n=1}^N\prod_{k=1}^K{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})^{z_{nk}}\tag{10.38}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[z_{nk}]=r_{nk}\tag{10.50}
\end{eqnarray}

\begin{eqnarray}
N_k=\sum_{n=1}^Nr_{nk}\tag{10.51}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}_{{\boldsymbol\mu}_k,{\bf\Lambda}_k}[({\bf x}_n-{\boldsymbol\mu}_k)^\top{\bf\Lambda}_k({\bf x}_n-{\boldsymbol\mu}_k)]=D\beta_k^{-1}+\nu_k({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)\tag{10.64}
\end{eqnarray}

\begin{eqnarray}
\ln\widetilde{\Lambda}_k\equiv{\mathbb E}[\ln|{\bf\Lambda}_k|]=\sum_{i=1}^D\psi\left(\frac{\nu_k+1-i}{2}\right)+D\ln2+\ln|{\bf W}_k|\tag{10.65}
\end{eqnarray}

\begin{eqnarray}
\ln\tilde{\pi}_k\equiv{\mathbb E}[\ln\pi_k]=\psi(\alpha_k)-\psi(\widehat{\alpha})\tag{10.66}
\end{eqnarray}

\begin{eqnarray}
{\mathcal L}&=&\sum_{\bf Z}\iiint q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})\ln\left(\frac{p({\bf X},{\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})}{q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})}\right){\rm d}{\boldsymbol\pi}{\rm d}{\boldsymbol\mu}{\rm d}{\bf\Lambda}\\
&=&{\mathbb E}[\ln p({\bf X},{\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})]-{\mathbb E}[\ln q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})]\\
&=&{\mathbb E}[\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})]+{\mathbb E}[\ln p({\bf Z}|{\boldsymbol\pi})]+{\mathbb E}[\ln p({\boldsymbol\pi})]+{\mathbb E}[\ln p({\bf Z}|{\boldsymbol\mu},{\bf\Sigma})]\\
&&-{\mathbb E}[\ln q({\bf Z})]-{\mathbb E}[\ln q({\boldsymbol\pi})]-{\mathbb E}[\ln q({\boldsymbol\mu},{\bf\Sigma})]\tag{10.70}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})]=\frac{1}{2}\sum_{k=1}^KN_k\left(\ln\widetilde{\Lambda}_k-D\beta_k^{-1}-\nu_k{\rm Tr}({\bf S}_k{\bf W}_k)-\nu_k(\overline{\bf x}_k-{\bf m}_k)^\top{\bf W}_k(\overline{\bf x}_k-{\bf m}_k)-D\ln(2\pi)\right)\tag{10.71}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln p({\bf Z}|{\boldsymbol\pi})]=\sum_{n=1}^N\sum_{k=1}^Kr_{nk}\ln\tilde{\pi}_k\tag{10.72}
\end{eqnarray}

解答

{\mathbb E}[\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})]=\left\langle\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})\right\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}を計算します。

\begin{eqnarray}
\langle\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}&=&\left\langle\ln \Big(\underbrace{\prod_{n=1}^N\prod_{k=1}^K{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})^{z_{nk}}}_{(10.38)}\Big)\right\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\left\langle\sum_{n=1}^N\sum_{k=1}^Kz_{nk}\ln{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})\right\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\left\langle z_{nk}\ln{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})\right\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\left\langle \langle z_{nk}\rangle_{q({\bf Z})}\ln{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})\right\rangle_{q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\left\langle \underbrace{r_{nk}}_{(10.50)}\ln{\mathcal N}({\bf x}_n|{\boldsymbol\mu}_k,{\bf\Lambda}_k^{-1})\right\rangle_{q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\sum_{n=1}^N\sum_{k=1}^Kr_{nk}\left\langle -\frac{D}{2}\ln(2\pi)+\frac{1}{2}\ln|{\bf\Lambda}_k|-\frac{1}{2}({\bf x}_n-{\boldsymbol\mu}_k)^\top{\bf\Lambda}_k({\bf x}_n-{\boldsymbol\mu}_k))\right\rangle_{q({\boldsymbol\mu},{\bf\Sigma})}\\
&=&\sum_{n=1}^N\sum_{k=1}^Kr_{nk}\left(-\frac{D}{2}\ln(2\pi)+\frac{1}{2}\underbrace{\ln\widetilde{\Lambda}_k}_{(10.65)}-\frac{1}{2}\left(\underbrace{D\beta_k^{-1}+\nu_k({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)}_{(10.64)}\right)\right)\\
&=&\frac{1}{2}\sum_{k=1}^K\underbrace{N_k}_{(10.51)}\left(-D\ln(2\pi)+\ln\widetilde{\Lambda}_k-D\beta_k^{-1}\right)-\frac{1}{2}\sum_{k=1}^K\nu_k\underbrace{\sum_{n=1}^Nr_{nk}({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)}_{=:X}\tag{1}
\end{eqnarray}

X=\displaystyle\sum_{n=1}^Nr_{nk}({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)とおきます。

\begin{eqnarray}
X&=&\sum_{n=1}^Nr_{nk}({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)\\
&=&\underbrace{{\rm Tr}\left(\sum_{n=1}^Nr_{nk}({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)\right)}_{x\in{\mathbb R},\ x={\rm Tr}(x)}\\
&=&{\rm Tr}\left(\sum_{n=1}^Nr_{nk}\underbrace{{\bf W}_k({\bf x}_n-{\bf m}_k)({\bf x}_n-{\bf m}_k)^\top}_{{\rm Tr}({\bf AB})={\rm Tr}({\bf BA})}\right)\\
&=&{\rm Tr}\left({\bf W}_k\left(\sum_{n=1}^Nr_{nk}({\bf x}_n-{\bf m}_k)({\bf x}_n-{\bf m}_k)^\top\right)\right)\\
&=&{\rm Tr}\left({\bf W}_k\left(\underbrace{\sum_{n=1}^Nr_{nk}{\bf x}_n{\bf x}_n^\top}_{=:Y}-2\sum_{n=1}^Nr_{nk}{\bf x}_n{\bf m}_k^\top+\sum_{n=1}^Nr_{nk}{\bf m}_k{\bf m}_k^\top\right)\right)\tag{2}
\end{eqnarray}

Y=\displaystyle\sum_{n=1}^Nr_{nk}{\bf x}_n{\bf x}_n^\topとおきます。

\begin{eqnarray}
Y&=&\sum_{n=1}^Nr_{nk}{\bf x}_n{\bf x}_n^\top\\
&=&\sum_{n=1}^Nr_{nk}\left(({\bf x}_n-\overline{\bf x}_k)({\bf x}_n-\overline{\bf x}_k)^\top+2{\bf x}_n\overline{\bf x}_k^\top-\overline{\bf x}_k\overline{\bf x}_k^\top\right)\\
&=&\sum_{n=1}^Nr_{nk}({\bf x}_n-\overline{\bf x}_k)({\bf x}_n-\overline{\bf x}_k)^\top+2\sum_{n=1}^Nr_{nk}{\bf x}_n\overline{\bf x}_k^\top-\sum_{n=1}^Nr_{nk}\overline{\bf x}_k\overline{\bf x}_k^\top\\
&=&\underbrace{N_k{\bf S}_k}_{(10.51),(10.53)}+2\underbrace{N_k\overline{\bf x}_k}_{(10.52)}\overline{\bf x}_k^\top-\underbrace{N_k}_{(10.51)}\overline{\bf x}_k\overline{\bf x}_k^\top\\
&=&N_k{\bf S}_k+N_k\overline{\bf x}_k\overline{\bf x}_k^\top\tag{3}
\end{eqnarray}

(3)を式(2)に代入します。

\begin{eqnarray}
X&=&{\rm Tr}\left({\bf W}_k\left(N_k{\bf S}_k+N_k\overline{\bf x}_k\overline{\bf x}_k^\top-2\sum_{n=1}^Nr_{nk}{\bf x}_n{\bf m}_k^\top+\sum_{n=1}^Nr_{nk}{\bf m}_k{\bf m}_k^\top\right)\right)\\
&=&{\rm Tr}\left({\bf W}_k\left(N_k{\bf S}_k+N_k\overline{\bf x}_k\overline{\bf x}_k^\top-2\underbrace{N_k\overline{\bf x}_k}_{(10.52)}{\bf m}_k^\top+\underbrace{N_k}_{(10.51)}{\bf m}_k{\bf m}_k^\top\right)\right)\\
&=&N_k{\rm Tr}\left({\bf W}_k\left({\bf S}_k+\overline{\bf x}_k\overline{\bf x}_k^\top-2\overline{\bf x}_k{\bf m}_k^\top+{\bf m}_k{\bf m}_k^\top\right)\right)\\
&=&N_k{\rm Tr}\left({\bf W}_k\left({\bf S}_k+(\overline{\bf x}_k-{\bf m}_k)(\overline{\bf x}_k-{\bf m}_k)^\top\right)\right)\\
&=&N_k\left({\rm Tr}\left({\bf W}_k{\bf S}_k\right)+{\rm Tr}\left({\bf W}_k(\overline{\bf x}_k-{\bf m}_k)(\overline{\bf x}_k-{\bf m}_k)^\top\right)\right)\\
&=&N_k\left(\underbrace{{\rm Tr}\left({\bf S}_k{\bf W}_k\right)}_{{\rm Tr}({\bf A}{\bf B})={\rm Tr}({\bf B}{\bf A})}+\underbrace{{\rm Tr}\left((\overline{\bf x}_k-{\bf m}_k)^\top{\bf W}_k(\overline{\bf x}_k-{\bf m}_k)\right)}_{{\rm Tr}({\bf A}{\bf B})={\rm Tr}({\bf B}{\bf A})}\right)\tag{4}
\end{eqnarray}

(4)を式(1)に代入します。

\begin{eqnarray}
\langle\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\bf Z})q({\boldsymbol\mu},{\bf\Sigma})}&=&\frac{1}{2}\sum_{k=1}^KN_k\left(-D\ln(2\pi)+\ln\widetilde{\Lambda}_k-D\beta_k^{-1}\right)-\frac{1}{2}\sum_{k=1}^K\nu_kN_k\left({\rm Tr}\left({\bf S}_k{\bf W}_k\right)+{\rm Tr}\left((\overline{\bf x}_k-{\bf m}_k)^\top{\bf W}_k(\overline{\bf x}_k-{\bf m}_k)\right)\right)\\
&=&\frac{1}{2}\sum_{k=1}^KN_k\left(\ln\widetilde{\Lambda}_k-D\beta_k^{-1}-\nu_k{\rm Tr}({\bf S}_k{\bf W}_k)-\nu_k(\overline{\bf x}_k-{\bf m}_k)^\top{\bf W}_k(\overline{\bf x}_k-{\bf m}_k)-D\ln(2\pi)\right)\tag{5}
\end{eqnarray}

(5)より、式(10.71)が示せました。

{\mathbb E}[\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})]=\langle\ln p({\bf Z}|{\boldsymbol\pi})\rangle_{q({\bf Z})q({\boldsymbol\pi})}を計算します。

\begin{eqnarray}
\langle\ln p({\bf Z}|{\boldsymbol\pi})\rangle_{q({\bf Z})q({\boldsymbol\pi})}&=&\left\langle\ln\Big(\underbrace{\prod_{n=1}^N\prod_{k=1}^K\pi_k^{z_{nk}}}_{(10.37)}\Big)\right\rangle_{q({\bf Z})q({\boldsymbol\pi})}\\
&=&\left\langle\sum_{n=1}^N\sum_{k=1}^Kz_{nk}\ln\pi_k\right\rangle_{q({\bf Z})q({\boldsymbol\pi})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\left\langle z_{nk}\ln\pi_k\right\rangle_{q({\bf Z})q({\boldsymbol\pi})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\langle z_{nk}\rangle_{q({\bf Z})}\langle\ln\pi_k\rangle_{q({\boldsymbol\pi})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\underbrace{r_{nk}}_{(10.50)}\underbrace{\ln\tilde{\pi}_k}_{(10.66)}\tag{6}
\end{eqnarray}

(6)より、式(10.72)が示せました。

目次へ戻る