关于最大似然估计法,我们有以下直观想法:现在已经取到样本值$x_{1},x_{2},\cdots,x_{n}$了,这表明取到这一样本值的概率$L(\theta)$比较大。我们当然不会考虑那些不能使样本$x_{1},x_{2},\cdots,x_{n}$出现的$\theta \in \Theta$作为$\theta$的估计,再者,如果已知当$\theta=\theta_{0}\in \Theta$时使$L(\theta)$取很大值,而$\Theta$中的其他值使$L(\theta)$取很小值,我自自然认为取$\theta_{0}$作为未知参数$\theta$的估计值较为合理
来源:《概率论与数理统计》高等教育出版社-P152
$$
\begin{gathered}
\text{Data}:X=(x_{1},x_{2},\cdots,x_{N})^{T}=\begin{pmatrix}
x_{1}^{T} \ x_{2}^{T} \ \vdots \ x_{N}^{T}
\end{pmatrix}{N \times p},x{i} \in \mathbb{R}^{p},x_{i}\overset{\text{iid}}{\sim }N(\mu,\Sigma )\
\text{MLE}:\theta_{\text{MLE}}=\mathop{argmax}\limits_{\theta}P(X|\theta),\theta=(\mu,\Sigma )
\end{gathered}
$$
令$p=1,\theta=(\mu,\sigma^{2})$
$$
\begin{aligned}
p(x)&=\frac{1}{\sqrt{2\pi}\sigma}\text{exp}\left(-\frac{(x-\mu)^{2}}{2\sigma^{2}}\right)\
p(x)&=\frac{1}{(2\pi)^{\frac{\pi}{2}}\left|\Sigma \right|^{\frac{1}{2}}}\text{exp}\left(- \frac{1}{2}(x-\mu )^{T}\Sigma ^{-1}(x-\mu)\right)
\end{aligned}
$$
这里先讨论一维的情况
$$
\begin{aligned}
\text{log }P(X|\theta)&=\text{log }\prod\limits_{i=1}^{N}p(x_{i}|\theta)\
&=\sum\limits_{i=1}^{N}\text{log }p(x_{i}\theta)\
&=\sum\limits_{i=1}^{N}\text{log } \frac{1}{\sqrt{2\pi}\sigma}\text{exp}\left(- \frac{(x-\mu)^{2}}{2\sigma^{2}}\right)\
&=\sum\limits_{i=1}^{N}\left[\text{log } \frac{1}{\sqrt{2\pi}}+\text{log } \frac{1}{\sigma}- \frac{(x_{i}-\mu)^{2}}{2\sigma^{2}}\right]
\end{aligned}
$$
对于$\mu_\text{MLE}$
$$
\begin{aligned}
\mu_\text{MLE}&=\mathop{argmax}\limits_{\mu}\log P(X|\theta)\
&=\mathop{argmax}\limits_{\mu}\sum\limits_{i=1}^{N}- \frac{(x_{i}-\mu)^{2}}{2\sigma^{2}}\
&=\mathop{argmin}\limits_{\mu}\sum\limits_{i=1}^{N}(x_{i}-\mu)^{2}\
\frac{\partial }{\partial \mu}\sum\limits_{i=1}^{N}(x_{i}-\mu)^{2}&=\sum\limits_{i=1}^{N}2(x_{i}-\mu)(-1)\
\sum\limits_{i=1}^{N}2(x_{i}-\mu)(-1)&=0\
\sum\limits_{i=1}^{N}(x_{i}-\mu)&=0\
\mu_\text{MLE} &=\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}
\end{aligned}
$$
对于$\sigma^{2}_\text{MLE}$
$$
\begin{aligned}
\sigma^{2}\text{MLE}&=\mathop{argmax}\limits{\sigma}P(X|\theta)\
&=\mathop{argmax}\limits_{\sigma}\left(- \log \sigma- \frac{1}{2\sigma^{2}}(x_{i}-\mu)^{2}\right)\
\frac{\partial }{\partial \sigma}\left(- \log \sigma- \frac{1}{2\sigma^{2}}(x_{i}-\mu)^{2}\right)&=\sum\limits_{i=1}^{N}\left[- \frac{1}{\sigma}+ \frac{1}{2}(x_{i}-\sigma)^{2}(-2)\sigma^{-3}\right]\
\sum\limits_{i=1}^{N}\left[- \frac{1}{\sigma}+ \frac{1}{2}(x_{i}-\sigma)^{2}(-2)\sigma^{-3}\right]&=0\
-\sum\limits_{i=1}^{N}\sigma^{2}+\sum\limits_{i=1}^{N}(x_{i}-\mu)^{2}&=0\
\sum\limits_{i=1}^{N}\sigma^{2}&=\sum\limits_{i=1}^{N}(x_{i}-\mu)^{2}\
\sigma^{2}\text{MLE}&=\frac{1}{N}\sum\limits{i=1}^{N}(x_{i}-\mu_\text{MLE})^{2}
\end{aligned}
$$
实际上,$\mu_\text{MLE}$是无偏估计,$\sigma^{2}_\text{MLE}$是有偏估计
对于$\mu_\text{MLE}$
$$
\begin{aligned}
E(\mu_\text{MLE})&=\frac{1}{N}\sum\limits_{i=1}^{N}E(x_{i})\
&=\frac{1}{N}\sum\limits_{i=1}^{N}\mu\
&=\mu\
\end{aligned}
$$
对于$\sigma^{2}_\text{MLE}$
$$
\begin{aligned}
\sigma_\text{MLE}^{2}&=\frac{1}{N}\sum\limits_{i=1}^{N}(x_{i}-\mu_\text{MLE})^{2}\
&=\frac{1}{N}\sum\limits_{i=1}^{N}(x_{i}^{2}-2x_{i}\mu_\text{MLE}+\mu_\text{MLE}^{2})\
&=\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}^{2}-2\cdot \mu_\text{MLE}^{2}+\mu_\text{MLE}^{2}\
&=\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}^{2}-\mu_\text{MLE}^{2}\
E(\sigma_\text{MLE}^{2})&=E\left(\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}^{2}-\mu_\text{MLE}^{2}\right)\
&=E\left[ \left(\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}^{2}-\mu^{2}\right)-(\mu_\text{MLE}^{2}-\mu^{2})\right]\
&=E\left(\frac{1}{N}\sum\limits_{i=1}^{N}x_{i}^{2}-\mu^{2}\right)-E(\mu_\text{MLE}^{2}-\mu^{2})\
&=\frac{1}{N}\sum\limits_{i=1}^{N}E(x_{i}^{2}-\mu^{2})-[E(\mu_\text{MLE}^{2})-E(\mu^{2})]\
&=\frac{1}{N}\sum\limits_{i=1}^{N}[E(x_{i}^{2})-E(\mu^{2})]-[E(\mu_\text{MLE}^{2})-E(\mu^{2})]\
&=\frac{1}{N}\sum\limits_{i=1}^{N}[E(x_{i}^{2})-\mu^{2}]-[E(\mu_\text{MLE}^{2})-\mu^{2}]\
&=\frac{1}{N}\sum\limits_{i=1}^{N}[E(x_{i}^{2})-E(x_{i})^{2}]-[E(\mu_\text{MLE}^{2})-E(\mu_\text{MLE})^{2}]\
&=\frac{1}{N}\sum\limits_{i=1}^{N}\text{Var}(x_{i})-\text{Var}(\mu_\text{MLE})\
&=\frac{1}{N}\sum\limits_{i=1}^{N}\sigma^{2}- \frac{\sigma^{2}}{N}\
&=\frac{N-1}{N}\sigma^{2}
\end{aligned}
$$