Android音视频学习第5章:视频直播实现之推送音频篇

时间:2023-02-11 23:19:58

音频推送流程
1.音视频参数设置
2.开始推流
3.音频视频采集
4.进行编码(生产者)
5.RTMP推流(消费者)
/**
* 音频编码器配置
*/
setAudioOptions{

audio_encode_handle = faacEncOpen(sampleRateInHz,numChannels,&nInputSamples,&nMaxOutputBytes);
if(!audio_encode_handle){
LOGE("音频编码器打开失败");
return;
}
//设置音频编码参数
faacEncHandle faacEncConfigurationPtr p_config = faacEncGetCurrentConfiguration(audio_encode_handle);
p_config->mpegVersion = MPEG4;
p_config->allowMidside = 1;
p_config->aacObjectType = LOW;
p_config->outputFormat = 0; //输出是否包含ADTS头
p_config->useTns = 1; //时域噪音控制,大概就是消爆音
p_config->useLfe = 0;
// p_config->inputFormat = FAAC_INPUT_16BIT;
p_config->quantqual = 100;
p_config->bandWidth = 0; //频宽
p_config->shortctl = SHORTCTL_NORMAL;

if(!faacEncSetConfiguration(audio_encode_handle,p_config)){
LOGE("%s","音频编码器配置失败..");
throwNativeError(env,INIT_FAILED);
return;
}

LOGI("%s","音频编码器配置成功");

}
/**
* 对音频采样数据进行AAC编码
*/
fireAudio{

    int *pcmbuf;
unsigned char *bitbuf;
jbyte* b_buffer = (*env)->GetByteArrayElements(env, buffer, 0);
pcmbuf = (short*) malloc(nInputSamples * sizeof(int));
bitbuf = (unsigned char*) malloc(nMaxOutputBytes * sizeof(unsigned char));
int nByteCount = 0;
unsigned int nBufferSize = (unsigned int) len / 2;
unsigned short* buf = (unsigned short*) b_buffer;
while (nByteCount < nBufferSize) {
int audioLength = nInputSamples;
if ((nByteCount + nInputSamples) >= nBufferSize) {
audioLength = nBufferSize - nByteCount;
}
int i;
for (i = 0; i < audioLength; i++) {//每次从实时的pcm音频队列中读出量化位数为8的pcm数据。
int s = ((int16_t *) buf + nByteCount)[i];
pcmbuf[i] = s << 8;//用8个二进制位来表示一个采样量化点(模数转换)
}
nByteCount += nInputSamples;
//利用FAAC进行编码,pcmbuf为转换后的pcm流数据,audioLength为调用faacEncOpen时得到的输入采样数,bitbuf为编码后的数据buff,nMaxOutputBytes为调用faacEncOpen时得到的最大输出字节数
int byteslen = faacEncEncode(audio_encode_handle, pcmbuf, audioLength,
bitbuf, nMaxOutputBytes);
if (byteslen < 1) {
continue;
}
add_aac_body(bitbuf, byteslen);//从bitbuf中得到编码后的aac数据流,放到数据队列
}
(*env)->ReleaseByteArrayElements(env, buffer, b_buffer, NULL);
if (bitbuf)
free(bitbuf);
if (pcmbuf)
free(pcmbuf);

}
AAC header结构(2个字节)
1、SoundFormat,4bit
0 = Linear PCM, platform endian
1 = ADPCM
2 = MP3
3 = Linear PCM, little endian
4 = Nellymoser 16 kHz mono
5 = Nellymoser 8 kHz mono
6 = Nellymoser
7 = G.711 A-law logarithmic PCM
8 = G.711 mu-law logarithmic PCM
9 = reserved
10 = AAC
11 = Speex
14 = MP3 8 kHz
15 = Device-specific sound
2、SoundRate,2bit,抽样频率
0 = 5.5 kHz
1 = 11 kHz
2 = 22 kHz
3 = 44 kHz
对于AAC音频来说,总是0x11,即44khz.
3、SoundSize,1bit,音频的位数。
0 = 8-bit samples
1 = 16-bit samples
AAC总是为0x01,16位。
4、SoundType,1bit,声道
0 = Mono sound
1 = Stereo sound
5、AACPacketType,8bit。
这个字段来表示AACAUDIODATA的类型:0 = AAC sequence header,1 = AAC raw。第一个音频包用0,后面的都用1。

body[0] = 0xAF表示采用44100HZ 16bit stereo的AAC,指每秒钟有 44100 次采样, 采样数据用16位(2字节)记录,双声道(立体声)。
音频文件的格式决定了其声音的品质,日常生活中电话、收音机等均为模拟音频信号,即不存在采样频率和采样位数的概念,可以这样比较一下:
 44KHz,16bit的声音称作:CD音质;
 22KHz、16Bit的声音效果近似于立体声(FM Stereo)广播,称作:广播音质;
 11kHz、8Bit的声音,称作:电话音质。

/**
* 添加AAC rtmp packet
*/
add_aac_body{

    int body_size = 2 + len;
RTMPPacket *packet = malloc(sizeof(RTMPPacket));
//RTMPPacket初始化
RTMPPacket_Alloc(packet,body_size);
RTMPPacket_Reset(packet);
unsigned char * body = packet->m_body;
//头信息配置
/*AF 00 + AAC RAW data*/
body[0] = 0xAF;//10 5 SoundFormat(4bits):10=AAC,SoundRate(2bits):3=44kHz,SoundSize(1bit):1=16-bit samples,SoundType(1bit):1=Stereo sound
body[1] = 0x01;//AACPacketType:1表示AAC raw
memcpy(&body[2], buf, len); /*spec_buf是AAC raw数据*/
packet->m_packetType = RTMP_PACKET_TYPE_AUDIO;
packet->m_nBodySize = body_size;
packet->m_nChannel = 0x04;
packet->m_hasAbsTimestamp = 0;
packet->m_headerType = RTMP_PACKET_SIZE_LARGE;
packet->m_nTimeStamp = RTMP_GetTime() - start_time;
add_rtmp_packet(packet);

}