声音采集的方式:直接对已有的声音(已经录制好的)进行处理;以及现场录制。这样的工具有:Windows recorder,Adobe audition,Linux的arecord。
- 判别:计算每个时刻的能量,设定一个阈值k,如果大于它,我们认为是1(1表示该点是语言),否则就是0。能量计算的公式就是:
- 平滑:小于100ms的silien我们认为是语音的部分,大于250ms的语言我们才认为是语言。在截取的语音信号前后多截出250ms。这个的前提是比较安静,如果不安静的话那么就得另当别论,看外界影响有多大。
- 算法一:先来一个比较简单的算法
- 算法二:更复杂一些的算法
#include <stdio.h> #include <stdlib.h> #include <math.h> //function log #include <conio.h> //kbhit() #include "portaudio.h" #include "readwave.h" //WriteWave() /* #define SAMPLE_RATE (17932) // Test failure to open with this value. */ //SAMPLE_RATE, FRAMES_PER_BUFFER, NUM_SECONDS, NUM_CHANNELS are modified by Yao Canwu #define SAMPLE_RATE (16000) #define FRAMES_PER_BUFFER (400) #define NUM_SECONDS (60) #define NUM_CHANNELS (1) /* #define DITHER_FLAG (paDitherOff) */ #define DITHER_FLAG (0) /**/ /** Set to 1 if you want to capture the recording to a file. */ #define WRITE_TO_FILE (0) /* Select sample format. */ #if 0 #define PA_SAMPLE_TYPE paFloat32 typedef float SAMPLE; #define SAMPLE_SILENCE (0.0f) #define PRINTF_S_FORMAT "%.8f" #elif 1 #define PA_SAMPLE_TYPE paInt16 typedef short SAMPLE; #define SAMPLE_SILENCE (0) #define PRINTF_S_FORMAT "%d" #elif 0 #define PA_SAMPLE_TYPE paInt8 typedef char SAMPLE; #define SAMPLE_SILENCE (0) #define PRINTF_S_FORMAT "%d" #else #define PA_SAMPLE_TYPE paUInt8 typedef unsigned char SAMPLE; #define SAMPLE_SILENCE (128) #define PRINTF_S_FORMAT "%d" #endif typedef struct { int frameIndex; /* Index into sample array. */ int maxFrameIndex; SAMPLE *recordedSamples; } paTestData; //calculate the energy in decibe of a frame segment //added by Yao Canwu float energyPerSampleInDecibe(const SAMPLE *ptr) { float energy = 0.0f; SAMPLE temp; for (unsigned long i = 0; i<FRAMES_PER_BUFFER; i++) { temp = *(ptr + i); energy += temp * temp; } energy = 10 * log(energy); return energy; } //An Adaptive Endpointing Algorithm //added by Yao Canwu const float forgetfactor = 1; const float adjustment = 0.05; //key value for classifyFrame(), need to adjust to different environment. const float threshold = 10; // float background = 0; float level = 0; int count = 0; bool classifyFrame(const SAMPLE *ptr) { float current = energyPerSampleInDecibe(ptr); bool isSpeech = false; level = ((level * forgetfactor) + current) / (forgetfactor + 1); if (current < background) background = current; else background += (current - background) * adjustment; if (level < background) level = background; if (level - background > threshold) isSpeech = true; return isSpeech; } /* This routine will be called by the PortAudio engine when audio is needed. ** It may be called at interrupt level on some machines so don't do anything ** that could mess up the system like calling malloc() or free(). */ static int recordCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo* timeInfo, PaStreamCallbackFlags statusFlags, void *userData) { paTestData *data = (paTestData*)userData; const SAMPLE *rptr = (const SAMPLE*)inputBuffer; SAMPLE *wptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS]; long framesToCalc; long i; int finished; unsigned long framesLeft = data->maxFrameIndex - data->frameIndex; (void)outputBuffer; /* Prevent unused variable warnings. */ (void)timeInfo; (void)statusFlags; (void)userData; if (framesLeft < framesPerBuffer) { framesToCalc = framesLeft; finished = paComplete; } else { framesToCalc = framesPerBuffer; finished = paContinue; } if (inputBuffer == NULL) { for (i = 0; i<framesToCalc; i++) { *wptr++ = SAMPLE_SILENCE; /* left */ if (NUM_CHANNELS == 2) *wptr++ = SAMPLE_SILENCE; /* right */ } } else { for (i = 0; i<framesToCalc; i++) { *wptr++ = *rptr++; /* left */ if (NUM_CHANNELS == 2) *wptr++ = *rptr++; /* right */ } } data->frameIndex += framesToCalc; /* calculate the initial background and initial level, ** which will be used for classify frame ** Added by Yao Canwu */ if (data->frameIndex == 0) { level = energyPerSampleInDecibe(&data->recordedSamples[0]); background = 0.0f; SAMPLE temp; for (i = 0; i < 10 * framesPerBuffer; i++) { temp = data->recordedSamples[i]; background += temp * temp; } background = log(background); } //Silence in 4 seconds means the end of audio capture if (classifyFrame(rptr)) count = 0; else count++; //printf("count = %d\n", count); if (count >= 80) data->maxFrameIndex = data->frameIndex; return finished; } /* This routine will be called by the PortAudio engine when audio is needed. ** It may be called at interrupt level on some machines so don't do anything ** that could mess up the system like calling malloc() or free(). */ static int playCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo* timeInfo, PaStreamCallbackFlags statusFlags, void *userData) { paTestData *data = (paTestData*)userData; SAMPLE *rptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS]; SAMPLE *wptr = (SAMPLE*)outputBuffer; unsigned int i; int finished; unsigned int framesLeft = data->maxFrameIndex - data->frameIndex; (void)inputBuffer; /* Prevent unused variable warnings. */ (void)timeInfo; (void)statusFlags; (void)userData; if (framesLeft < framesPerBuffer) { /* final buffer... */ for (i = 0; i<framesLeft; i++) { *wptr++ = *rptr++; /* left */ if (NUM_CHANNELS == 2) *wptr++ = *rptr++; /* right */ } for (; i<framesPerBuffer; i++) { *wptr++ = 0; /* left */ if (NUM_CHANNELS == 2) *wptr++ = 0; /* right */ } data->frameIndex += framesLeft; finished = paComplete; } else { for (i = 0; i<framesPerBuffer; i++) { *wptr++ = *rptr++; /* left */ if (NUM_CHANNELS == 2) *wptr++ = *rptr++; /* right */ } data->frameIndex += framesPerBuffer; finished = paContinue; } return finished; } /*******************************************************************/ int main(void) { PaStreamParameters inputParameters, outputParameters; PaStream* stream; PaError err = paNoError; paTestData data; int i; int totalFrames; int numSamples; int numBytes; SAMPLE max, val; double average; printf("patest_record.c\n"); fflush(stdout); data.maxFrameIndex = totalFrames = NUM_SECONDS * SAMPLE_RATE; /* Record for a few seconds. */ data.frameIndex = 0; numSamples = totalFrames * NUM_CHANNELS; numBytes = numSamples * sizeof(SAMPLE); data.recordedSamples = (SAMPLE *)malloc(numBytes); /* From now on, recordedSamples is initialised. */ if (data.recordedSamples == NULL) { printf("Could not allocate record array.\n"); goto done; } for (i = 0; i<numSamples; i++) data.recordedSamples[i] = 0; err = Pa_Initialize(); if (err != paNoError) goto done; inputParameters.device = Pa_GetDefaultInputDevice(); /* default input device */ if (inputParameters.device == paNoDevice) { fprintf(stderr, "Error: No default input device.\n"); goto done; } inputParameters.channelCount = 1; /* stereo input */ inputParameters.sampleFormat = PA_SAMPLE_TYPE; inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency; inputParameters.hostApiSpecificStreamInfo = NULL; //set a keyboard hit to start recording. Added by Yao Canwu printf("Press any key to start recording\n"); while (!kbhit()){} /* Record some audio. -------------------------------------------- */ err = Pa_OpenStream( &stream, &inputParameters, NULL, /* &outputParameters, */ SAMPLE_RATE, FRAMES_PER_BUFFER, paClipOff, /* we won't output out of range samples so don't bother clipping them */ recordCallback, &data); if (err != paNoError) goto done; err = Pa_StartStream(stream); if (err != paNoError) goto done; printf("\n=== Now start recording!!\n"); fflush(stdout); /* Pa_IsStreamActive: Determine whether the stream is active. A stream is active after a successful call to Pa_StartStream(), until it becomes inactive either as a result of a call to Pa_StopStream() or Pa_AbortStream(), or as a result of a return value other than paContinue from the stream callback. In the latter case, the stream is considered inactive after the last buffer has finished playing. */ while ((err = Pa_IsStreamActive(stream)) == 1) { Pa_Sleep(1000); printf("index = %d\n", data.frameIndex); fflush(stdout); } if (err < 0) goto done; err = Pa_CloseStream(stream); if (err != paNoError) goto done; //Write wave to file in wav formate. Added by Yao Canwu printf("Waiting to save into file...\n"); char *path = "audio.wav"; WriteWave(path, data.recordedSamples, data.maxFrameIndex, SAMPLE_RATE); printf("Save successfully!\n"); /* Write recorded data to a file. */ #if WRITE_TO_FILE { FILE *fid; fid = fopen("recorded.raw", "wb"); if (fid == NULL) { printf("Could not open file."); } else { fwrite(data.recordedSamples, NUM_CHANNELS * sizeof(SAMPLE), totalFrames, fid); fclose(fid); printf("Wrote data to 'recorded.raw'\n"); } } #endif /* Playback recorded data. -------------------------------------------- */ data.frameIndex = 0; outputParameters.device = Pa_GetDefaultOutputDevice(); /* default output device */ if (outputParameters.device == paNoDevice) { fprintf(stderr, "Error: No default output device.\n"); goto done; } outputParameters.channelCount = 1; /* stereo output */ outputParameters.sampleFormat = PA_SAMPLE_TYPE; outputParameters.suggestedLatency = Pa_GetDeviceInfo(outputParameters.device)->defaultLowOutputLatency; outputParameters.hostApiSpecificStreamInfo = NULL; printf("\n=== Now playing back. ===\n"); fflush(stdout); err = Pa_OpenStream( &stream, NULL, /* no input */ &outputParameters, SAMPLE_RATE, FRAMES_PER_BUFFER, paClipOff, /* we won't output out of range samples so don't bother clipping them */ playCallback, &data); if (err != paNoError) goto done; if (stream) { err = Pa_StartStream(stream); if (err != paNoError) goto done; printf("Waiting for playback to finish.\n"); fflush(stdout); while ((err = Pa_IsStreamActive(stream)) == 1) Pa_Sleep(100); if (err < 0) goto done; err = Pa_CloseStream(stream); if (err != paNoError) goto done; printf("Done.\n"); fflush(stdout); } done: Pa_Terminate(); if (data.recordedSamples) /* Sure it is NULL or valid. */ free(data.recordedSamples); if (err != paNoError) { fprintf(stderr, "An error occured while using the portaudio stream\n"); fprintf(stderr, "Error number: %d\n", err); fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err)); err = 1; /* Always return 0 or 1, but no other return codes. */ } system("pause"); return err; }readwav:
#include <stdlib.h> #include <math.h> #include <memory.h> #include <assert.h> #include <string.h> #include "readwave.h" bool WaveRewind(FILE *wav_file, WavFileHead *wavFileHead) { char riff[8],wavefmt[8]; short i; rewind(wav_file); fread(wavFileHead,sizeof(struct WavFileHead),1,wav_file); for ( i=0;i<8;i++ ) { riff[i]=wavFileHead->RIFF[i]; wavefmt[i]=wavFileHead->WAVEfmt_[i]; } riff[4]='\0'; wavefmt[7]='\0'; if ( strcmp(riff,"RIFF")==0 && strcmp(wavefmt,"WAVEfmt")==0 ) return true; // It is WAV file. else { rewind(wav_file); return(false); } } short *ReadWave(const char *wavFile, int *numSamples, int *sampleRate ) { FILE *wavFp; WavFileHead wavHead; short *waveData; long numRead; wavFp = fopen(wavFile, "rb"); if (!wavFp) { printf("\nERROR:can't open %s!\n", wavFile); exit(0); } if (WaveRewind(wavFp, &wavHead) == false) { printf("\nERROR:%s is not a Windows wave file!\n", wavFile); exit(0); } waveData = new short [wavHead.RawDataFileLength/sizeof(short)]; numRead = fread(waveData, sizeof(short), wavHead.RawDataFileLength / 2, wavFp); assert(numRead * sizeof(short) == (unsigned long)wavHead.RawDataFileLength); fclose(wavFp); *numSamples = wavHead.RawDataFileLength/sizeof(short); *sampleRate = wavHead.SampleRate; return waveData; } void FillWaveHeader(void *buffer, int raw_wave_len, int sampleRate) { WavFileHead wavHead; strcpy(wavHead.RIFF, "RIFF"); strcpy(wavHead.WAVEfmt_, "WAVEfmt "); wavHead.FileLength = raw_wave_len + 36; wavHead.noUse = 16; wavHead.FormatCategory = 1; wavHead.NChannels = 1; wavHead.SampleRate = sampleRate; wavHead.SampleBytes = sampleRate*2; wavHead.BytesPerSample = 2; wavHead.NBitsPersample = 16; strcpy(wavHead.data, "data"); wavHead.RawDataFileLength = raw_wave_len; memcpy(buffer, &wavHead, sizeof(WavFileHead)); } void WriteWave(const char *wavFile, short *waveData, int numSamples, int sampleRate) { FILE *wavFp; WavFileHead wavHead; long numWrite; wavFp = fopen(wavFile, "wb"); if (!wavFp) { printf("\nERROR:can't open %s!\n", wavFile); exit(0); } FillWaveHeader(&wavHead, numSamples*sizeof(short), sampleRate); fwrite(&wavHead, sizeof(WavFileHead), 1, wavFp); numWrite = fwrite(waveData, sizeof(short), numSamples, wavFp); assert(numWrite == numSamples); fclose(wavFp); } void GetWavHeader(const char *wavFile, short *Bits, int *Rate, short *Format, int *Length, short *Channels) { FILE *wavFp; WavFileHead wavHead; char *waveData; long numRead,File_length; wavFp = fopen(wavFile, "rb"); if (!wavFp) { printf("\nERROR:can't open %s!\n", wavFile); exit(0); } fseek(wavFp,0,SEEK_END); File_length=ftell(wavFp); if (WaveRewind(wavFp, &wavHead) == false) { printf("\nERROR:%s is not a Windows wave file!\n", wavFile); exit(0); } waveData = new char[(File_length-sizeof(struct WavFileHead))/sizeof(char)]; numRead = fread(waveData, sizeof(char), File_length-sizeof(struct WavFileHead), wavFp); fclose(wavFp); *Bits = wavHead.NBitsPersample; *Format = wavHead.FormatCategory; *Rate = wavHead.SampleRate; *Length = (int)numRead; *Channels = wavHead.NChannels; delete [] waveData; } short *ReadWavFile(const char *wavFile, int *numSamples, int *sampleRate ) { FILE *wavFp; WavFileHead wavHead; short *waveData; long numRead,File_length; wavFp = fopen(wavFile, "rb"); if (!wavFp) { printf("\nERROR:can't open %s!\n", wavFile); exit(0); } fseek(wavFp,0,SEEK_END); File_length=ftell(wavFp); if (WaveRewind(wavFp, &wavHead) == false) { printf("\nERROR:%s is not a Windows wave file!\n", wavFile); exit(0); } waveData = new short [(File_length-sizeof(struct WavFileHead))/sizeof(short)]; numRead = fread(waveData, sizeof(short), (File_length-sizeof(struct WavFileHead))/sizeof(short), wavFp); fclose(wavFp); *numSamples = (int)numRead; *sampleRate = wavHead.SampleRate; return waveData; } void ReadWav(const char *wavFile, short *waveData, int *numSamples, int *sampleRate) { FILE *wavFp; WavFileHead wavHead; long numRead; wavFp = fopen(wavFile, "rb"); if (!wavFp) { printf("\nERROR:can't open %s!\n", wavFile); exit(0); } if (WaveRewind(wavFp, &wavHead) == false) { printf("\nERROR:%s is not a Windows PCM file!\n", wavFile); exit(0); } numRead = fread(waveData, sizeof(short), wavHead.RawDataFileLength/2, wavFp); assert(numRead*sizeof(short) == (unsigned long)wavHead.RawDataFileLength); fclose(wavFp); *numSamples = wavHead.RawDataFileLength/sizeof(short); *sampleRate = wavHead.SampleRate; }