# --------------------------------------------------------- #
# #
# Train Language Recognition #
# #
# --------------------------------------------------------- #
# #
# Train Language Recognition #
# Copyright(c) iFlytek Corporation, 2018 #
# Hefei, Anhui, PRC #
# http://www.iflytek.com #
# #
# --------------------------------------------------------- #
# python : 2.7 version #
# cuda : Toolkit 9.1 #
# pytorch : 0.4.0 #
# --------------------------------------------------------- #
import os
import time
import codecs
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level = logging.DEBUG,
format = '%(asctime)s[%(levelname)s] ---- %(message)s',
)
import torch
import torch.utils.data as Data
from read_data import get_samples, get_data, TorchDataSet
from net_component import LanNet
## ======================================
# 配置文件和参数
# 数据列表
train_list = "./label_train_list_fb.txt"
dev_list = "./label_dev_list_fb.txt"
# 基本配置参数
use_cuda = torch.cuda.is_available()
if use_cuda:
device = torch.device("cuda:2")
# 保存模型地址
model_dir = "/inference/models"
if not os.path.exists(model_dir):
os.makedirs(model_dir)
# 网络参数
dimension = 40
language_nums = 6
learning_rate = 0.1
batch_size = 64
chunk_num = 10
train_iteration = 10
display_fre = 50
half = 4
## ======================================
train_dataset = TorchDataSet(train_list, batch_size, chunk_num, dimension)
dev_dataset = TorchDataSet(dev_list, batch_size, chunk_num, dimension)
logging.info('finish reading all train data')
# 优化器,SGD更新梯度
train_module = LanNet(input_dim=dimension, hidden_dim=128, bn_dim=30, output_dim=language_nums)
logging.info(train_module)
optimizer = torch.optim.SGD(train_module.parameters(), lr=learning_rate, momentum=0.9)
# 将模型放入GPU中
if use_cuda:
train_module = train_module.to(device)
for epoch in range(train_iteration):
if epoch >= half:
learning_rate /= 2.
optimizer = torch.optim.SGD(train_module.parameters(), lr=learning_rate, momentum=0.9)
## train
train_dataset.reset()
train_module.train()
epoch_tic = time.time()
train_loss = 0.
train_acc = 0.
sum_batch_size = 0
curr_batch_size = 0
curr_batch_acc = 0
for step, (batch_x, batch_y) in enumerate(train_dataset):
tic = time.time()
batch_target = batch_y[:,0].contiguous().view(-1, 1).long()
batch_frames = batch_y[:,1].contiguous().view(-1, 1)
max_batch_frames = int(max(batch_frames).item())
batch_train_data = batch_x[:, :max_batch_frames, :]
step_batch_size = batch_target.size(0)
batch_mask = torch.zeros(step_batch_size, max_batch_frames)
for ii in range(step_batch_size):
frames = int(batch_frames[ii].item())
batch_mask[ii, :frames] = 1.
# 将数据放入GPU中
if use_cuda:
batch_train_data = batch_train_data.to(device)
batch_mask = batch_mask.to(device)
batch_target = batch_target.to(device)
acc, loss = train_module(batch_train_data, batch_mask, batch_target)
# loss = loss.sum()
backward_loss = loss
optimizer.zero_grad()
backward_loss.backward()
optimizer.step()
toc = time.time()
step_time = toc-tic
train_loss += loss.item()
train_acc += acc
curr_batch_acc += acc
sum_batch_size += 1
curr_batch_size += 1
if step % display_fre == 0:
logging.info('Epoch:%d, Batch:%d, acc:%.6f, loss:%.6f, cost time :%.6fs', epoch, step, curr_batch_acc/curr_batch_size, loss.item(), step_time)
curr_batch_acc = 0.
curr_batch_size = 0
modelfile = '%s/model%d.model'%(model_dir, epoch)
torch.save(train_module.state_dict(), modelfile)
epoch_toc = time.time()
epoch_time = epoch_toc-epoch_tic
logging.info('Epoch:%d, train-acc:%.6f, train-loss:%.6f, cost time :%.6fs', epoch, train_acc/sum_batch_size, train_loss/sum_batch_size, epoch_time)
## -----------------------------------------------------------------------------------------------------------------------------
## dev
train_module.eval()
epoch_tic = time.time()
dev_loss = 0.
dev_acc = 0.
dev_batch_num = 0
for step, (batch_x, batch_y) in enumerate(dev_dataset):
tic = time.time()
batch_target = batch_y[:,0].contiguous().view(-1, 1).long()
batch_frames = batch_y[:,1].contiguous().view(-1, 1)
max_batch_frames = int(max(batch_frames).item())
batch_dev_data = batch_x[:, :max_batch_frames, :]
step_batch_size = batch_target.size(0)
batch_mask = torch.zeros(step_batch_size, max_batch_frames)
for ii in range(step_batch_size):
frames = int(batch_frames[ii].item())
batch_mask[ii, :frames] = 1.
# 将数据放入GPU中
if use_cuda:
batch_dev_data = batch_dev_data.to(device)
batch_mask = batch_mask.to(device)
batch_target = batch_target.to(device)
with torch.no_grad():
acc, loss = train_module(batch_dev_data, batch_mask, batch_target)
loss = loss.sum()/step_batch_size
toc = time.time()
step_time = toc-tic
dev_loss += loss.item()
dev_acc += acc
dev_batch_num += 1
epoch_toc = time.time()
epoch_time = epoch_toc-epoch_tic
acc=dev_acc/dev_batch_num
logging.info('Epoch:%d, dev-acc:%.6f, dev-loss:%.6f, cost time :%.6fs', epoch, acc, dev_loss/dev_batch_num, epoch_time)
# -*- coding:utf-8 -*-
import codecs
import copy
import random
import torch
from HTKfile import HTKfile
def get_samples(list):
samples = 0
max_frames = 0
with codecs.open(list, 'r', 'utf-8') as file_list:
for line in file_list:
line = line.strip() # 去除结尾换行符
if not line: # remove the blank line
continue
splited_line = line.split()
htk_feature = splited_line[0]
htk_file = HTKfile(htk_feature)
feature_frames = htk_file.get_frame_num()
max_frames = max(max_frames, feature_frames)
samples += 1
file_list.close()
return samples, max_frames
def get_data(list, samples, max_frames, dimension):
data = torch.zeros(samples, max_frames, dimension)
target_frames = torch.zeros(samples, 2)
name_list = []
# 存储数据
line_num = 0
with codecs.open(list, 'r', 'utf-8') as file_list:
for line in file_list:
line = line.strip() # 去除结尾换行符
if not line: # remove the blank line
continue
splited_line = line.split()
htk_feature = splited_line[0]
target_label = int(str(splited_line[1]))
htk_file = HTKfile(htk_feature)
feature_data = htk_file.read_data()
file_name = htk_file.get_file_name()
feature_frames = htk_file.get_frame_num()
curr_feature = torch.Tensor(feature_data)
means = curr_feature.mean(dim=0, keepdim=True)
curr_feature_norm = curr_feature - means.expand_as(curr_feature)
data[line_num,:feature_frames,:] = curr_feature_norm
target_frames[line_num] = torch.Tensor([target_label, feature_frames])
name_list.append(file_name)
line_num += 1
file_list.close()
return data, target_frames, name_list
class TorchDataSet(object):
def __init__(self, file_list, batch_size, chunk_num, dimension):
self._batch_size = batch_size
self._chunck_num = chunk_num
self._chunck_size = self._chunck_num*self._batch_size
self._dimension = dimension
self._file_point = codecs.open(file_list, 'r', 'utf-8')
self._dataset = self._file_point.readlines()
self._file_point.close()
random.shuffle(self._dataset)
def reset(self):
random.shuffle(self._dataset)
def __iter__(self):
data_size = len(self._dataset)
batch_data = []
target_frames = []
name_list = []
max_frames = 0
for ii in range(data_size):
line = self._dataset[ii].strip()
splited_line = line.split()
htk_feature = splited_line[0]
target_label = int(str(splited_line[1]))
htk_file = HTKfile(htk_feature)
feature_data = htk_file.read_data()
file_name = htk_file.get_file_name()
feature_frames = htk_file.get_frame_num()
if feature_frames > max_frames:
max_frames = feature_frames
curr_feature = torch.Tensor(feature_data)
means = curr_feature.mean(dim=0, keepdim=True)
curr_feature_norm = curr_feature - means.expand_as(curr_feature)
batch_data.append(curr_feature_norm)
target_frames.append(torch.Tensor([target_label, feature_frames]))
name_list.append(file_name)
if (ii+1) % self._chunck_size == 0:
chunk_size = len(batch_data)
idx = 0
data = torch.zeros(self._batch_size, max_frames, self._dimension)
target = torch.zeros(self._batch_size, 2)
for jj in range(chunk_size):
curr_data = batch_data[jj]
curr_tgt = target_frames[jj]
curr_frame = curr_data.size(0)
data[idx,:curr_frame,:] = curr_data[:,:]
target[idx,:] = curr_tgt[:]
idx += 1
if idx % self._batch_size == 0:
idx = 0
yield data, target
max_frames = 0
batch_data = []
target_frames = []
name_list = []
else:
pass
chunk_size = len(batch_data)
if chunk_size > self._batch_size:
idx = 0
data = torch.zeros(self._batch_size, max_frames, self._dimension)
target = torch.zeros(self._batch_size, 2)
for jj in range(chunk_size):
curr_data = batch_data[jj]
curr_tgt = target_frames[jj]
curr_frame = curr_data.size(0)
data[idx,:curr_frame,:] = curr_data[:,:]
target[idx,:] = curr_tgt[:]
idx += 1
if idx % self._batch_size == 0:
idx = 0
yield data, target
# -*- coding:utf-8 -*-
"""a module to read the HTK format file"""
__author__ = 'yfhu3'
__email__ = 'yfhu3@iflytek.com'
__version__ = '2018.01.02 with python 2.7.14'
import numpy as np
import struct
import re
# HTK文件结构:
# 帧数:4字节(第0-第3字节)
# 采样周期:4字节(第4-第7字节)
# 每一帧的字节数:2字节(第8-第9字节)
# 参数类型:2字节(第10-第11字节)
# 数据:N字节(第12字节开始-文件结尾)
class HTKfile(object):
#
def __init__(self, path):
self.__start_frame = 0
self.__end_frame = 0
self.__new_path = ''
if path[-1] == ']': # 判断输入路径末尾有没有指定帧序号的部分 eg: [2, 56]
temp_value = re.split(r'[\[,\s\]]', path)
self.__new_path = temp_value[0]
self.__start_frame = int(temp_value[-3])
self.__end_frame = int(temp_value[-2])
else:
self.__new_path = path
self.__input = open(self.__new_path, 'rb')
# HTK的数据存储方式是大端存储,需要进行大端到小端的转换
self.__frame_num = struct.unpack('>I', self.__input.read(4))[0] # 帧数
self.__sample_period = struct.unpack('>I', self.__input.read(4))[0] # 采样周期
self.__bytes_of_one_frame = struct.unpack('>H', self.__input.read(2))[0] # 每一帧的字节数
self.__feature_dim = self.__bytes_of_one_frame / 4 # dimension of feature
self.__sample_kind = struct.unpack('>h', self.__input.read(2))[0] # 参数类型
temp_value_2 = re.split(r'[/.]', path)
self.__file_name = temp_value_2[-2]
if self.__end_frame == 0 or self.__end_frame > self.__frame_num:
self.__end_frame = self.__frame_num # 如果尾帧数据在之前未被更改,则其值为帧总数
def read_data(self):
curr_data = struct.unpack('>'+'f'*self.__frame_num*self.__feature_dim, self.__input.read(self.__frame_num*self.__bytes_of_one_frame))
data = np.array(curr_data, dtype= 'float32')
data = data.reshape(self.__frame_num, self.__feature_dim)
return data[self.__start_frame: self.__end_frame]
# 下方是在外界获取变量值的函数
def get_start_frame(self):
return self.__start_frame
def get_end_frame(self):
return self.__end_frame
def get_frame_num(self):
return (self.__end_frame - self.__start_frame)
def get_sample_period(self):
return self.__sample_period
def get_bytes_of_one_frame(self):
return self.__bytes_of_one_frame
def get_file_name(self):
return self.__file_name
def get_feature_dim(self):
return self.__feature_dim
def get_state_label(self):
return self.__state_label
# -*- coding:utf-8 -*-
import codecs
import copy
import random
import torch
from HTKfile import HTKfile
def get_samples(list):
samples = 0
max_frames = 0
with codecs.open(list, 'r', 'utf-8') as file_list:
for line in file_list:
line = line.strip() # 去除结尾换行符
if not line: # remove the blank line
continue
splited_line = line.split()
htk_feature = splited_line[0]
htk_file = HTKfile(htk_feature)
feature_frames = htk_file.get_frame_num()
max_frames = max(max_frames, feature_frames)
samples += 1
file_list.close()
return samples, max_frames
def get_data(list, samples, max_frames, dimension):
data = torch.zeros(samples, max_frames, dimension)
target_frames = torch.zeros(samples, 2)
name_list = []
# 存储数据
line_num = 0
with codecs.open(list, 'r', 'utf-8') as file_list:
for line in file_list:
line = line.strip() # 去除结尾换行符
if not line: # remove the blank line
continue
splited_line = line.split()
htk_feature = splited_line[0]
target_label = int(str(splited_line[1]))
htk_file = HTKfile(htk_feature)
feature_data = htk_file.read_data()
file_name = htk_file.get_file_name()
feature_frames = htk_file.get_frame_num()
curr_feature = torch.Tensor(feature_data)
means = curr_feature.mean(dim=0, keepdim=True)
curr_feature_norm = curr_feature - means.expand_as(curr_feature)
data[line_num,:feature_frames,:] = curr_feature_norm
target_frames[line_num] = torch.Tensor([target_label, feature_frames])
name_list.append(file_name)
line_num += 1
file_list.close()
return data, target_frames, name_list
class TorchDataSet(object):
def __init__(self, file_list, batch_size, chunk_num, dimension):
self._batch_size = batch_size
self._chunck_num = chunk_num
self._chunck_size = self._chunck_num*self._batch_size
self._dimension = dimension
self._file_point = codecs.open(file_list, 'r', 'utf-8')
self._dataset = self._file_point.readlines()
self._file_point.close()
random.shuffle(self._dataset)
def reset(self):
random.shuffle(self._dataset)
def __iter__(self):
data_size = len(self._dataset)
batch_data = []
target_frames = []
name_list = []
max_frames = 0
for ii in range(data_size):
line = self._dataset[ii].strip()
splited_line = line.split()
htk_feature = splited_line[0]
target_label = int(str(splited_line[1]))
htk_file = HTKfile(htk_feature)
feature_data = htk_file.read_data()
file_name = htk_file.get_file_name()
feature_frames = htk_file.get_frame_num()
if feature_frames > max_frames:
max_frames = feature_frames
curr_feature = torch.Tensor(feature_data)
means = curr_feature.mean(dim=0, keepdim=True)
curr_feature_norm = curr_feature - means.expand_as(curr_feature)
batch_data.append(curr_feature_norm)
target_frames.append(torch.Tensor([target_label, feature_frames]))
name_list.append(file_name)
if (ii+1) % self._chunck_size == 0:
chunk_size = len(batch_data)
idx = 0
data = torch.zeros(self._batch_size, max_frames, self._dimension)
target = torch.zeros(self._batch_size, 2)
for jj in range(chunk_size):
curr_data = batch_data[jj]
curr_tgt = target_frames[jj]
curr_frame = curr_data.size(0)
data[idx,:curr_frame,:] = curr_data[:,:]
target[idx,:] = curr_tgt[:]
idx += 1
if idx % self._batch_size == 0:
idx = 0
yield data, target
max_frames = 0
batch_data = []
target_frames = []
name_list = []
else:
pass
chunk_size = len(batch_data)
if chunk_size > self._batch_size:
idx = 0
data = torch.zeros(self._batch_size, max_frames, self._dimension)
target = torch.zeros(self._batch_size, 2)
for jj in range(chunk_size):
curr_data = batch_data[jj]
curr_tgt = target_frames[jj]
curr_frame = curr_data.size(0)
data[idx,:curr_frame,:] = curr_data[:,:]
target[idx,:] = curr_tgt[:]
idx += 1
if idx % self._batch_size == 0:
idx = 0
yield data, target