<p>1.试用了google 在线语音识别,长语音没有成功,思路试用`GitHub <https://github.com/Uberi/speech_recognition>`__</p><p>2.试用科大讯飞sdk.成功,但是背景噪音大后,识别基本不行。其中参数ent=sms-en16k识别英文。</p>
#include "stdlib.h" #include "stdio.h" #include <windows.h> #include <conio.h> #include <errno.h> #include "../../include/qisr.h" #include "../../include/msp_cmn.h" #include "../../include/msp_errors.h" #pragma comment(lib,"D:\\work\\Windows_voice_1.051_54a924a4\\lib\\msc.lib")//x86 int ran_iat() { int ret=0; int error=0; char rec_result[102400] = {0}; const char *sessionID = NULL; FILE *f_pcm = NULL; FILE* fout=NULL; char *pPCM = NULL; int lastAudio = 0 ; int audStat = MSP_AUDIO_SAMPLE_CONTINUE ; int epStatus = MSP_EP_LOOKING_FOR_SPEECH; int recStatus = MSP_REC_STATUS_SUCCESS ; long pcmCount = 0; long pcmSize = 0; int conutSession=0; char *fname = "D:\\work\\Windows_voice_1.051_54a924a4\\bin\\wav\\abcddel.wav"; f_pcm = fopen(fname, "rb"); if (NULL != f_pcm) { fseek(f_pcm, 0, SEEK_END); pcmSize = ftell(f_pcm); fseek(f_pcm, 0, SEEK_SET); pPCM = (char *)malloc(pcmSize); fread((void *)pPCM, pcmSize, 1, f_pcm); fclose(f_pcm); f_pcm = NULL; }//读取音频文件 else{ printf("Open audio failed"); return 0; } fout = fopen( "iat.txt" , "ab"); if( NULL == fout ) { printf("failed to open file,please check the file.\n"); } exit: sessionID = QISRSessionBegin(NULL,"sub=iat,auf=audio/L16;rate=16000,aue=speex-wb,ent=sms-en16k,rst=plain,rse=gb2312,vad_speech_tail=900",&error);//vad_speech_tail=900,这是vad后端点检测,就靠这个来断句,默认2s,感觉太大了,900ms时,就是说话停顿,结束此次会话,开启下次会话 if (error !=0) { printf("session begin error %d",error); return 0; } conutSession ++; while (1) { unsigned int len = 6400; int countLen=6400; if (pcmSize < 12800) { len = pcmSize; lastAudio = 1;//音频长度小于12800 } audStat = MSP_AUDIO_SAMPLE_CONTINUE;//有后继音频 if (pcmCount == 0) audStat = MSP_AUDIO_SAMPLE_FIRST; if (len<=0) { break; } printf("csid=%s,count=%d,aus=%d,",sessionID,pcmCount/countLen,audStat); ret = QISRAudioWrite(sessionID, (const void *)&pPCM[pcmCount], len, audStat, &epStatus, &recStatus);//写音频 printf("eps=%d,rss=%d,ret=%d\n",epStatus,recStatus,error); if (epStatus >=3)//开发文档中epStatus>=3时,就会检测到一句话的尾端点,然后认为这句话结束和会话session结束,就另开一路会话 { QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus);//写一个0,告诉云端,这次会话结束了 //必须拿完所有结果才开启下次会话,msc不能多线程调用,必须按sessionbegin、audiowrite、getresult、sessionend流程来,所以后面不能单独调getresult, while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error) { const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果 if (NULL != rslt) { strcat(rec_result,rslt); fwrite(rslt,1,strlen(rslt),fout); } } QISRSessionEnd(sessionID, NULL); goto exit; } if (ret != 0) break; pcmCount += (long)len; pcmSize -= (long)len; if (recStatus == MSP_REC_STATUS_SUCCESS) { const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//服务端已经有识别结果,可以获取 if (NULL != rslt) { strcat(rec_result,rslt); fwrite(rslt,1,strlen(rslt),fout); } } if (epStatus == MSP_EP_AFTER_SPEECH) break; _sleep(30);//需要sleep下, } QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus); free(pPCM); pPCM = NULL; while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error) { const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果 if (NULL != rslt) { strcat(rec_result,rslt); fwrite(rslt,1,strlen(rslt),fout); } _sleep(50); } QISRSessionEnd(sessionID, NULL); printf("=============================================================\n"); printf("The result is: %s\n",rec_result); printf("=============================================================\n"); printf("session count %d\n",conutSession); return 0; } int main() { int ret=0; ret = MSPLogin(NULL,NULL,"appid = 54a924a4"); ran_iat(); MSPLogout(); system("pause"); return 0; }