機械学習基礎理論独習

誤りがあればご指摘いただけると幸いです。数式が整うまで少し時間かかります。リンクフリーです。

勉強ログです。リンクフリーです
目次へ戻る

PRML演習問題 10.17(難問)

問題

(10.70)で与えられる変分ガウス混合モデルの下界の、残りの項についての結果(10.73)-(10.77)を確かめよ。

参照

\begin{eqnarray}
p({\boldsymbol\pi})={\rm Dir}({\boldsymbol\pi}|{\boldsymbol\alpha}_0)=C({\boldsymbol\alpha}_0)\prod_{k=1}^K\pi_k^{\alpha_0-1}\tag{10.39}
\end{eqnarray}

\begin{eqnarray}
p({\boldsymbol\mu},{\bf\Lambda})&=&p({\boldsymbol\mu}|{\bf\Lambda})p({\bf\Lambda})\\
&=&\prod_{k=1}^K{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right){\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\tag{10.40}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[z_{nk}]=r_{nk}\tag{10.50}
\end{eqnarray}

\begin{eqnarray}
q^*({\boldsymbol\pi})={\rm Dir}({\boldsymbol\pi}|{\boldsymbol\alpha})\tag{10.57}
\end{eqnarray}

\begin{eqnarray}
q^*({\boldsymbol\mu}_k,{\bf\Lambda}_k)={\mathcal N}({\boldsymbol\mu}_k|{\bf m}_k,(\beta_k{\bf\Lambda}_k)^{-1}){\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\tag{10.59}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}_{{\boldsymbol\mu}_k,{\bf\Lambda}_k}[({\bf x}_n-{\boldsymbol\mu}_k)^\top{\bf\Lambda}_k({\bf x}_n-{\boldsymbol\mu}_k)]=D\beta_k^{-1}+\nu_k({\bf x}_n-{\bf m}_k)^\top{\bf W}_k({\bf x}_n-{\bf m}_k)\tag{10.64}
\end{eqnarray}

\begin{eqnarray}
\ln\widetilde{\Lambda}_k\equiv{\mathbb E}[\ln|{\bf\Lambda}_k|]=\sum_{i=1}^D\psi\left(\frac{\nu_k+1-i}{2}\right)+D\ln2+\ln|{\bf W}_k|\tag{10.65}
\end{eqnarray}

\begin{eqnarray}
\ln\tilde{\pi}_k\equiv{\mathbb E}[\ln\pi_k]=\psi(\alpha_k)-\psi(\widehat{\alpha})\tag{10.66}
\end{eqnarray}

\begin{eqnarray}
{\mathcal L}&=&\sum_{\bf Z}\iiint q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})\ln\left(\frac{p({\bf X},{\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})}{q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})}\right){\rm d}{\boldsymbol\pi}{\rm d}{\boldsymbol\mu}{\rm d}{\bf\Lambda}\\
&=&{\mathbb E}[\ln p({\bf X},{\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})]-{\mathbb E}[\ln q({\bf Z},{\boldsymbol\pi},{\boldsymbol\mu},{\bf\Lambda})]\\
&=&{\mathbb E}[\ln p({\bf X}|{\bf Z},{\boldsymbol\mu},{\bf\Lambda})]+{\mathbb E}[\ln p({\bf Z}|{\boldsymbol\pi})]+{\mathbb E}[\ln p({\boldsymbol\pi})]+{\mathbb E}[\ln p({\bf Z}|{\boldsymbol\mu},{\bf\Sigma})]\\
&&-{\mathbb E}[\ln q({\bf Z})]-{\mathbb E}[\ln q({\boldsymbol\pi})]-{\mathbb E}[\ln q({\boldsymbol\mu},{\bf\Sigma})]\tag{10.70}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln p({\boldsymbol\pi})]=\ln C({\boldsymbol\alpha}_0)+(\alpha_0-1)\sum_{k=1}^K\ln\tilde{\pi}_k\tag{10.73}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln p({\boldsymbol\mu},{\bf\Lambda})]&=&\frac{1}{2}\sum_{k=1}^K\left(D\ln(\beta_0/2\pi)+\ln\widetilde{\Lambda}_k - \frac{D\beta_0}{{\beta}_k}-\beta_0\nu_k({\bf m}_k-{\bf m}_0)^\top{\bf W}_k({\bf m}_k-{\bf m}_0)\right)\\
&+&K\ln B({\bf W}_0,\nu_0)+\frac{\nu_0-D-1}{2}\sum_{k=1}^K\ln\widetilde{\Lambda}_k-\frac{1}{2}\sum_{k=1}^K\nu_k{\rm Tr}({\bf W}_0^{-1}{\bf W}_k)\tag{10.74}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln q({\bf Z})]=\sum_{n=1}^N\sum_{k=1}^Kr_{nk}\ln r_{nk}\tag{10.75}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln q({\boldsymbol\pi})]=\sum_{k=1}^K(\alpha_k-1)\ln\tilde{\pi}_k+\ln C({\boldsymbol\alpha})\tag{10.76}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[\ln q({\boldsymbol\mu},{\bf\Lambda})]=\sum_{k=1}^K\left(\frac{1}{2}\ln\widetilde{\Lambda}_k+\frac{D}{2}\ln\left(\frac{\beta_k}{2\pi}\right)-\frac{D}{2}-{\rm H}[q({\bf\Lambda}_k)]\right)\tag{10.77}
\end{eqnarray}

\begin{eqnarray}
{\mathcal W}({\bf\Lambda},{\bf W},\nu)=B({\bf W},\nu)|{\bf\Lambda}|^{(\nu-D-1)/2}\exp\left(-\frac{1}{2}{\rm Tr}({\bf W}^{-1}{\bf\Lambda})\right)\tag{B.78}
\end{eqnarray}

\begin{eqnarray}
{\mathbb E}[{\bf\Lambda}]=\nu{\bf W}\tag{B.80}
\end{eqnarray}

\begin{eqnarray}
{\rm H}[{\bf\Lambda}]=-\ln B({\bf W},\nu)-\frac{(\nu-D-1)}{2}{\mathbb E}[\ln|{\bf\Lambda}|]+\frac{\nu D}{2}\tag{B.82}
\end{eqnarray}

解答

{\mathbb E}[\ln p({\boldsymbol\pi})]=\langle\ln p({\boldsymbol\pi})\rangle_{q({\boldsymbol\pi})}を計算します。

\begin{eqnarray}
\langle\ln p({\boldsymbol\pi})\rangle_{q({\boldsymbol\pi})}&=&\left\langle\ln\Big(\underbrace{C({\boldsymbol\alpha}_0)\prod_{k=1}^K\pi_k^{\alpha_0-1}}_{(10.39)}\Big)\right\rangle_{q({\boldsymbol\pi})}\\
&=&\left\langle\ln C({\boldsymbol\alpha}_0)+\sum_{k=1}^K(\alpha_0-1)\ln\pi_k\right\rangle_{q({\boldsymbol\pi})}\\
&=&\ln C({\boldsymbol\alpha}_0)+\sum_{k=1}^K(\alpha_0-1)\left\langle\ln\pi_k\right\rangle_{q({\boldsymbol\pi})}\\
&=&\ln C({\boldsymbol\alpha}_0)+\sum_{k=1}^K(\alpha_0-1)\underbrace{\ln\tilde{\pi}_k}_{(10.66)}\tag{1}
\end{eqnarray}

(1)より、式(10.73)が示せました。

{\mathbb E}[\ln p({\boldsymbol\mu},{\bf\Lambda})]=\langle\ln p({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}を計算します。

\begin{eqnarray}
\langle\ln p({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}&=&\langle\ln p({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\left\langle\ln \Big(\underbrace{\prod_{k=1}^K{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right){\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)}_{(10.40)}\Big)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\left\langle \sum_{k=1}^K\left(\ln{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right)+\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\right)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\sum_{k=1}^K\left\langle \ln{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right)+\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\sum_{k=1}^K\underbrace{\left\langle \ln{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}}_{=:A}+\sum_{k=1}^K\underbrace{\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\right\rangle_{q({\bf\Lambda})}}_{=:B}\tag{2}
\end{eqnarray}

A=\left\langle \ln{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}とおきます。

\begin{eqnarray}
A&=&\left\langle \ln{\mathcal N}\left({\boldsymbol\mu}_k|{\bf m}_0,(\beta_0{\bf\Lambda}_k)^{-1}\right)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\left\langle -\frac{D}{2}\ln(2\pi)+\frac{1}{2}\ln|\beta_0{\bf\Lambda}_k|-\frac{1}{2}({\boldsymbol\mu}_k-{\bf m}_0)^\top(\beta_0{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_0)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\left\langle -\frac{D}{2}\ln(2\pi)+\frac{1}{2}\underbrace{\ln\beta_0}_{\beta_0\in{\mathbb R}^{+}}+\frac{1}{2}\ln|{\bf\Lambda}_k|-\frac{1}{2}\beta_0({\boldsymbol\mu}_k-{\bf m}_0)^\top{\bf\Lambda}_k({\boldsymbol\mu}_k-{\bf m}_0)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\frac{1}{2}\left\langle D\ln\left(\frac{\beta_0}{2\pi}\right)+\ln|{\bf\Lambda}_k|-\beta_0({\boldsymbol\mu}_k-{\bf m}_0)^\top{\bf\Lambda}_k({\boldsymbol\mu}_k-{\bf m}_0)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\\
&=&\frac{1}{2}\left(D\ln\left(\frac{\beta_0}{2\pi}\right)+\left\langle \ln|{\bf\Lambda}_k|\right\rangle_{q({\bf\Lambda})}-\beta_0\left\langle({\boldsymbol\mu}_k-{\bf m}_0)^\top{\bf\Lambda}_k({\boldsymbol\mu}_k-{\bf m}_0)\right\rangle_{q({\boldsymbol\mu}|{\bf\Lambda})q({\bf\Lambda})}\right)\\
&=&\frac{1}{2}\left(D\ln\left(\frac{\beta_0}{2\pi}\right)+\underbrace{\ln\widetilde{\Lambda}_k}_{(10.65)}-\beta_0\big(\underbrace{D\beta_k^{-1}+\nu_k({\bf m}_0-{\bf m}_k)^\top{\bf W}_k({\bf m}_0-{\bf m}_k)}_{(10.64)}\big)\right)\\
&=&\frac{1}{2}\left(D\ln\left(\frac{\beta_0}{2\pi}\right)+\ln\widetilde{\Lambda}_k-\frac{D\beta_0}{\beta_k}-\beta_0\nu_k({\bf m}_0-{\bf m}_k)^\top{\bf W}_k({\bf m}_0-{\bf m}_k)\right)\tag{3}
\end{eqnarray}

B=\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\right\rangle_{q({\bf\Lambda})}とおきます。

\begin{eqnarray}
B&=&\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_0,\nu_0)\right\rangle_{q({\bf\Lambda})}\\
&=&\left\langle\ln\Bigg(\underbrace{B({\bf W_0},\nu_0)|{\bf\Lambda}_k|^{(\nu_0-D-1)/2}\exp\left(-\frac{1}{2}{\rm Tr}({\bf W}_0^{-1}{\bf\Lambda}_k)\right)}_{(B.78)}\Bigg)\right\rangle_{q({\bf\Lambda})}\\
&=&\left\langle\ln B({\bf W_0},\nu_0)+ \frac{\nu_0-D-1}{2}\ln|{\bf\Lambda}_k|-\frac{1}{2}{\rm Tr}({\bf W}_0^{-1}{\bf\Lambda}_k)\right\rangle_{q({\bf\Lambda})}\\
&=&\ln B({\bf W_0},\nu_0)+ \frac{\nu_0-D-1}{2}\left\langle\ln|{\bf\Lambda}_k|\right\rangle_{q({\bf\Lambda})}-\frac{1}{2}{\rm Tr}\left({\bf W}_0^{-1}\left\langle{\bf\Lambda}_k\right\rangle_{q({\bf\Lambda})}\right)\\
&=&\ln B({\bf W_0},\nu_0)+ \frac{\nu_0-D-1}{2}\underbrace{\ln\widetilde{\Lambda}_k}_{(10.65)}-\frac{1}{2}{\rm Tr}\left({\bf W}_0^{-1}\underbrace{\nu_k{\bf W}_k}_{(B.80)}\right)\\
&=&\ln B({\bf W_0},\nu_0)+ \frac{\nu_0-D-1}{2}\ln\widetilde{\Lambda}_k-\frac{1}{2}\nu_k{\rm Tr}\left({\bf W}_0^{-1}{\bf W}_k\right)\tag{4}
\end{eqnarray}

(3),(4)を式(2)に代入します。

\begin{eqnarray}
\langle\ln p({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}&=&\sum_{k=1}^K\left(\frac{1}{2}\left(D\ln\left(\frac{\beta_0}{2\pi}\right)+\ln\widetilde{\Lambda}_k-\frac{D\beta_0}{\beta_k}-\beta_0\nu_k({\bf m}_0-{\bf m}_k)^\top{\bf W}_k({\bf m}_0-{\bf m}_k)\right)\right)\\
&+&\sum_{k=1}^K\left(\ln B({\bf W_0},\nu_0)+ \frac{\nu_0-D-1}{2}\ln\widetilde{\Lambda}_k-\frac{1}{2}\nu_k{\rm Tr}\left({\bf W}_0^{-1}{\bf W}_k\right)\right)\\
&=&\frac{1}{2}\sum_{k=1}^K\left(D\ln(\beta_0/2\pi)+\ln\widetilde{\Lambda}_k - \frac{D\beta_0}{{\beta}_k}-\beta_0\nu_k({\bf m}_k-{\bf m}_0)^\top{\bf W}_k({\bf m}_k-{\bf m}_0)\right)\\
&+&K\ln B({\bf W}_0,\nu_0)+\frac{\nu_0-D-1}{2}\sum_{k=1}^K\ln\widetilde{\Lambda}_k-\frac{1}{2}\sum_{k=1}^K\nu_k{\rm Tr}({\bf W}_0^{-1}{\bf W}_k)\tag{5}
\end{eqnarray}

(5)より、式(10.74)が示せました。

{\mathbb E}[\ln q({\bf Z})]=\langle\ln q({\bf Z})\rangle_{\ln q({\bf Z})}を計算します。

\begin{eqnarray}
\langle\ln q({\bf Z})\rangle_{\ln q({\bf Z})}&=&\left\langle\ln \left(\prod_{n=1}^N\prod_{k=1}^Kr_{nk}^{z_{nk}}\right)\right\rangle_{q({\bf Z})}\\
&=&\left\langle\sum_{n=1}^N\sum_{k=1}^Kz_{nk}\ln r_{nk}\right\rangle_{q({\bf Z})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\ln r_{nk}\left\langle z_{nk}\right\rangle_{q({\bf Z})}\\
&=&\sum_{n=1}^N\sum_{k=1}^K\ln r_{nk}\cdot \underbrace{r_{nk}}_{(10.50)}\\
&=&\sum_{n=1}^N\sum_{k=1}^Kr_{nk}\ln r_{nk}\tag{6}
\end{eqnarray}

(6)より、式(10.75)が示せました。

{\mathbb E}[\ln q({\boldsymbol\pi})]=\langle\ln q({\boldsymbol\pi})\rangle_{q({\boldsymbol\pi})}を計算します。

\begin{eqnarray}
\langle\ln q({\boldsymbol\pi})\rangle_{q({\boldsymbol\pi})}&=&\Big\langle\ln\underbrace{{\rm Dir}({\boldsymbol\pi}|{\boldsymbol\alpha})}_{(10.57)}\Big\rangle_{q({\boldsymbol\pi})}\\
&=&\left\langle\ln\Big(C({\boldsymbol\alpha})\prod_{k=1}^K\pi_k^{\alpha_k-1}\Big)\right\rangle_{q({\boldsymbol\pi})}\\
&=&\left\langle\ln C({\boldsymbol\alpha})+\sum_{k=1}^K(\alpha_k-1)\ln\pi_k\right\rangle_{q({\boldsymbol\pi})}\\
&=&\ln C({\boldsymbol\alpha})+\sum_{k=1}^K(\alpha_k-1)\left\langle\ln\pi_k\right\rangle_{q({\boldsymbol\pi})}\\
&=&\ln C({\boldsymbol\alpha})+\sum_{k=1}^K(\alpha_k-1)\underbrace{\ln\tilde{\pi}_k}_{(10.66)}\tag{7}
\end{eqnarray}

(7)より、式(10.76)が示せました。

{\mathbb E}[\ln q({\boldsymbol\mu},{\bf\Lambda})]=\langle\ln q({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}を計算します。

\begin{eqnarray}
\langle\ln q({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}&=&\left\langle\ln\left(\prod_{k=1}^Kp({\boldsymbol\mu}_k,{\bf\Lambda}_k)\right) \right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\left\langle\ln\Bigg(\prod_{k=1}^K\underbrace{{\mathcal N}({\boldsymbol\mu}_k|{\bf m}_k,(\beta_k{\bf\Lambda}_k)^{-1}){\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)}_{(10.59)}\Bigg) \right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\left\langle\sum_{k=1}^K\left(-\frac{D}{2}\ln(2\pi)+\frac{1}{2}\ln|\beta_k{\bf\Lambda}_k|-\frac{1}{2}({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)+\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\right) \right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\left\langle\sum_{k=1}^K\Bigg(-\frac{D}{2}\ln(2\pi)+\frac{1}{2}\underbrace{\ln\beta_k}_{\beta_k\in{\mathbb R}^+}+\frac{1}{2}\ln|{\bf\Lambda}_k|-\frac{1}{2}({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)+\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\Bigg) \right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\sum_{k=1}^K\left(\frac{D}{2}\ln\left(\frac{\beta_k}{2\pi}\right)+\frac{1}{2}\left\langle\ln|{\bf\Lambda}_k|\right\rangle_{q({\bf\Lambda})}-\frac{1}{2}\left\langle({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)\right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}+\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\right\rangle_{q({\bf\Lambda})}\right)\\
&=&\sum_{k=1}^K\left(\frac{D}{2}\ln\left(\frac{\beta_k}{2\pi}\right)+\frac{1}{2}\underbrace{\ln\widetilde{\Lambda}_k}_{(10.65)}-\frac{1}{2}\underbrace{\left\langle({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)\right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}}_{=:X}+\underbrace{\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\right\rangle_{q({\bf\Lambda})}}_{=:Y}\right)\tag{8}
\end{eqnarray}

X=\left\langle({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)\right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}とおきます。

\begin{eqnarray}
X&=&\left\langle({\boldsymbol\mu}_k-{\bf m}_k)^\top(\beta_k{\bf\Lambda}_k)({\boldsymbol\mu}_k-{\bf m}_k)\right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\beta_k\left\langle({\boldsymbol\mu}_k-{\bf m}_k)^\top{\bf\Lambda}_k({\boldsymbol\mu}_k-{\bf m}_k)\right\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}\\
&=&\beta_k{\rm Tr}\Big(\underbrace{D\beta_k^{-1}+\nu_k({\bf m}_k-{\bf m}_k)^\top{\bf W}_k({\bf m}_k-{\bf m}_k)}_{(10.64)}\Big)\\
&=&\beta_k{\rm Tr}(D\beta_k^{-1})\\
&=&\beta_kD\beta_k^{-1}\\
&=&D\tag{9}
\end{eqnarray}

Y=\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\right\rangle_{q({\bf\Lambda})}とおきます。

\begin{eqnarray}
Y&=&\left\langle\ln{\mathcal W}({\bf\Lambda}_k|{\bf W}_k,\nu_k)\right\rangle_{q({\bf\Lambda})}\\
&=&\left\langle\ln\Bigg(\underbrace{B({\bf W_k},\nu_k)|{\bf\Lambda}_k|^{(\nu_k-D-1)/2}\exp\left(-\frac{1}{2}{\rm Tr}({\bf W}_k^{-1}{\bf\Lambda}_k)\right)}_{(B.78)}\Bigg)\right\rangle_{q({\bf\Lambda})}\\
&=&\left\langle\ln B({\bf W_k},\nu_k)+ \frac{\nu_k-D-1}{2}\ln|{\bf\Lambda}_k|-\frac{1}{2}{\rm Tr}({\bf W}_k^{-1}{\bf\Lambda}_k)\right\rangle_{q({\bf\Lambda})}\\
&=&\ln B({\bf W_k},\nu_k)+ \frac{\nu_k-D-1}{2}\left\langle\ln|{\bf\Lambda}_k|\right\rangle_{q({\bf\Lambda})}-\frac{1}{2}{\rm Tr}\left({\bf W}_k^{-1}\left\langle{\bf\Lambda}_k\right\rangle_{q({\bf\Lambda})}\right)\\
&=&\underbrace{{\rm H}[{\bf\Lambda}]}_{(B.82)}\tag{10}
\end{eqnarray}

(9),(10)を式(8)に代入します。

\begin{eqnarray}
\langle\ln q({\boldsymbol\mu},{\bf\Lambda})\rangle_{q({\boldsymbol\mu},{\bf\Lambda})}&=&\sum_{k=1}^K\left(\frac{D}{2}\ln\left(\frac{\beta_k}{2\pi}\right)+\frac{1}{2}\ln\widetilde{\Lambda}_k-\frac{D}{2}+{\rm H}[{\bf\Lambda}]\right)\tag{11}
\end{eqnarray}

(11)より、式(10.77)が示せました。

目次へ戻る