隐马尔可夫模型python实现。
- 隐马尔可夫代码实现比数学计算要简单,只需要三个矩阵:转移概率矩阵、发射概率矩阵、初始概率矩阵。
- 维特比算法是用来推测最后结果的。
- HMM实现可以作为CRF实现的基础。
class HMM:
def __init__(self, hidden_states):
self.hidden_states = hidden_states
self.start = {k: 0.0 for k in hidden_states}
self.trans = {k: {s: 0.0 for s in hidden_states} for k in hidden_states}
self.emi = {k: {} for k in hidden_states}
def train_single(self, hidden_state_list, state_list):
'''
训练一条样本
:param hidden_state_list: 例如['B', 'M', 'E', 'S']
:param state_list: ['娃','哈','哈','啊']
'''
for idx, hidden_state in enumerate(hidden_state_list):
state = state_list[idx]
if idx == 0:
self.start[hidden_state] += 1
else:
front_hidden_state = hidden_state_list[idx-1]
self.trans[front_hidden_state][hidden_state] += 1
self.emi[hidden_state][state] = self.emi[hidden_state].get(state, 0) + 1
def train(self, train_data):
for hsl, sl in train_data:
self.train_single(hsl, sl)
self.start = {k: np.log(v / np.sum(list(self.start.values()))) if v != 0 else -3e+100 for k, v in self.start.items()}
self.trans = {k: {vk:np.log(vv / np.sum(list(v.values()))) if vv != 0 else -3e+100 for vk, vv in v.items()} for k, v in self.trans.items()}
self.emi = {k:{vk:np.log(vv / np.sum(list(v.values()))) if vv != 0 else -3e+100 for vk, vv in v.items()} for k, v in self.emi.items()}
def viterbi(self, state_list):
for idx, state in enumerate(state_list):
if idx == 0:
current_path = {hidden_state:[hidden_state] for hidden_state in self.hidden_states}
current_prob = {hidden_state:self.start[hidden_state] + self.emi[hidden_state].get(state, -3e+100) for hidden_state in self.hidden_states}
else:
temp_current_prob = {}
temp_current_path = {}
for cur_hs in self.hidden_states:
prob, hs = max([(current_prob[fr_hs] + self.trans[fr_hs].get(cur_hs, -3e+100) + self.emi[cur_hs].get(state, -3e+100), fr_hs) for fr_hs in self.hidden_states])
temp_current_path[cur_hs] = current_path[hs] + [cur_hs]
temp_current_prob[cur_hs] = prob
current_path = temp_current_path
current_prob = temp_current_prob
max_prob, max_state = max([(v, k) for k, v in current_prob.items()])
return current_path[max_state]