语音识别总结

时间:2021-12-31 20:32:47

<p>1.试用了google 在线语音识别,长语音没有成功,思路试用`GitHub <https://github.com/Uberi/speech_recognition>`__</p><p>2.试用科大讯飞sdk.成功,但是背景噪音大后,识别基本不行。其中参数ent=sms-en16k识别英文。</p>
#include "stdlib.h"
#include "stdio.h"
#include <windows.h>
#include <conio.h>
#include <errno.h>


#include "../../include/qisr.h"
#include "../../include/msp_cmn.h"
#include "../../include/msp_errors.h"


#pragma comment(lib,"D:\\work\\Windows_voice_1.051_54a924a4\\lib\\msc.lib")//x86




int ran_iat()
{
        int ret=0;
        int error=0;
        char rec_result[102400] = {0};
        const char *sessionID = NULL;
        FILE *f_pcm = NULL;
        FILE* fout=NULL;
        char *pPCM = NULL;
        int lastAudio = 0 ;
        int audStat = MSP_AUDIO_SAMPLE_CONTINUE ;
        int epStatus = MSP_EP_LOOKING_FOR_SPEECH;
        int recStatus = MSP_REC_STATUS_SUCCESS ;
        long pcmCount = 0;
        long pcmSize = 0;
        int conutSession=0;
		char *fname = "D:\\work\\Windows_voice_1.051_54a924a4\\bin\\wav\\abcddel.wav";
         f_pcm = fopen(fname, "rb");
        if (NULL != f_pcm) {
                fseek(f_pcm, 0, SEEK_END);
                pcmSize = ftell(f_pcm);
                fseek(f_pcm, 0, SEEK_SET);
                pPCM = (char *)malloc(pcmSize);
                fread((void *)pPCM, pcmSize, 1, f_pcm);
                fclose(f_pcm);
                f_pcm = NULL;
        }//读取音频文件
        else{
                printf("Open audio failed");
                return 0;
        }


        fout = fopen( "iat.txt" , "ab");
        if( NULL == fout )
        {
                printf("failed to open file,please check the file.\n");
        }




        exit:
        sessionID = QISRSessionBegin(NULL,"sub=iat,auf=audio/L16;rate=16000,aue=speex-wb,ent=sms-en16k,rst=plain,rse=gb2312,vad_speech_tail=900",&error);//vad_speech_tail=900,这是vad后端点检测,就靠这个来断句,默认2s,感觉太大了,900ms时,就是说话停顿,结束此次会话,开启下次会话
        if (error !=0)
        {
                printf("session begin error %d",error);
                return 0;
        }
        conutSession ++;
        while (1) {
                unsigned int len = 6400;
                int countLen=6400;
                if (pcmSize < 12800) {
                        len = pcmSize;
                        lastAudio = 1;//音频长度小于12800
                }
                audStat = MSP_AUDIO_SAMPLE_CONTINUE;//有后继音频
                if (pcmCount == 0)
                        audStat = MSP_AUDIO_SAMPLE_FIRST;
                if (len<=0)
                {
                        break;
                }
                printf("csid=%s,count=%d,aus=%d,",sessionID,pcmCount/countLen,audStat);
                ret = QISRAudioWrite(sessionID, (const void *)&pPCM[pcmCount], len, audStat, &epStatus, &recStatus);//写音频
                printf("eps=%d,rss=%d,ret=%d\n",epStatus,recStatus,error);
                if (epStatus >=3)//开发文档中epStatus>=3时,就会检测到一句话的尾端点,然后认为这句话结束和会话session结束,就另开一路会话
                {
                        QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus);//写一个0,告诉云端,这次会话结束了
              //必须拿完所有结果才开启下次会话,msc不能多线程调用,必须按sessionbegin、audiowrite、getresult、sessionend流程来,所以后面不能单独调getresult,
                        while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error)
                        {
                                const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果
                                if (NULL != rslt)
                                {
                                        strcat(rec_result,rslt);
                                        fwrite(rslt,1,strlen(rslt),fout);
                                }
                        }
                        QISRSessionEnd(sessionID, NULL);
                        goto exit;
                }
                if (ret != 0)
                        break;
                pcmCount += (long)len;
                pcmSize -= (long)len;
                if (recStatus == MSP_REC_STATUS_SUCCESS) 
                {
                        const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//服务端已经有识别结果,可以获取
                        if (NULL != rslt)
                        {
                                strcat(rec_result,rslt);
                                fwrite(rslt,1,strlen(rslt),fout);
                        }
                }
                if (epStatus == MSP_EP_AFTER_SPEECH)
                        break;
                _sleep(30);//需要sleep下,
        }
        QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus);
        free(pPCM);
        pPCM = NULL;
        while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error) {
                const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果
                if (NULL != rslt)
                {
                        strcat(rec_result,rslt);
                        fwrite(rslt,1,strlen(rslt),fout);
                }
                _sleep(50);
        }
        QISRSessionEnd(sessionID, NULL);
        printf("=============================================================\n");
        printf("The result is: %s\n",rec_result);
        printf("=============================================================\n");
        printf("session count %d\n",conutSession);
        return 0;
}
        


int main()
{
        int ret=0;
        ret = MSPLogin(NULL,NULL,"appid = 54a924a4");
        ran_iat();
        MSPLogout();
        system("pause");
        return 0;
}