一、概述
最近在学习ffmpeg解码的内容,参考了官方的教程http://dranger.com/ffmpeg/tutorial03.html,结果发现这个音频解码的教程有点问题。参考了各种博客,并同时啃ffplay.c的源码,发现avcodec_decode_audio4多了一个resample(重采样)的概念。
其解码以及播放音频的思路为:
首先,ffmpeg设置本机的audio播放参数(target format),如freq(频率)为44100,format为AV_SAMPLE_FMT_S16,channels为2。这个播放参数是SDL实际播放音频时使用的参数。
但是!但是我们的audio file(如mp3文件)的audio数据很可能有其自己的audio播放参数(source format),而这些参数不同于我们实际的SDL播放参数,于是ffmpeg在其中插入resample(重采用)的过程,将source format转换成target format。
简单的说就是一个audio参数设置思路的转变:
这个思路转变最大的好处,就是本机播放的格式可以不用再迁就audio file,而是可以根据自己的需要自行设定,缺点很显然就是ffmpeg的CPU开销会增大。
二、代码示例(源码见“附录”)
源码在官方教程基础上把其中视频部分删除,在main函数最后加上一个无限循环,并添加resample函数,最后将resample插入到sdl的回调函数之中。
源码中关于queue的代码为官网教程原版复制,其主要作用就是让main函数和SDL audio线程互斥的push queue和get queue,以下不再赘述。
1、main函数代码结构
main函数伪代码结构如下:
SDL Initialization
ffmpeg open audio file
Set SDL audio parameters
Set ffmpeg audio parameters(target format)
while(ffmpeg_read_frame(pkt)) {
packet_queue_put(pkt);
}
while() {
sleep();
}
ffmpeg从audio file中不停的读取数据,并将读出的packet放入queue中。此时我们要清楚,另外还有一个SDL audio线程在等待queue中的数据。
2、SDL audio线程
SDL audio线程主要执行一个回调函数,对应源码中的函数为audio_callback(void * userdata, Uint8 * stream, int len)。这个函数的使命就是将解码后的数据放入参数stream这个缓冲区中,以便SDL audio线程从stream缓冲区中获取数据play。这个缓冲区的大小为参数len,而userdata则是用户自定的参数。其伪代码结构如下:
audio_buf_index = ;
while(len > ) {
audio_size = audio_decode_frame(audio_buf_tmp);
memcpy(stream, audio_buf_tmp, audio_size);
len -= audio_size;
stream += audio_size;
audio_buf_index += audio_size;
}
其中audio_decode_frame函数会从queue中取出packet,并对packet中的frame进行解码和resample,然后将数据放入audio_buf_tmp缓冲区中。
3、Resample函数
Resample的过程和结构体SwrContext息息相关。使用这个结构体共需要2步。
1、先初始化SwrContex,指定target format和source format;
2、使用已初始化的SwrContext,对frame进行resample。
Resample的伪代码如下:
struct SwrContext * swr_ctx = NULL;
audio_hw_params_src = audio_hw_params_tgt
int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size)
{
if(audio_hw_params_src != audio_hw_params(af)) {
swr_ctx = swr_alloc_set_opts(audio_hw_params_tgt, audio_hw_params(af));
audio_hw_params_src = audio_hw_params(af);
}
in = af;
swr_convert(swr_ctx, out, in);
audio_buf = out;
}
一开始,audio_hw_parames_src(source format)被初始化为target format,在resample获得第一个frame后,会从该frame中提取source format,并将其赋值给audio_hw_params_src,同时初始化SwrContext这个结构体,指定target format和source format。然后swr_convert对输入的frame进行resample(swr_convert),然后将resample后得到的数据放进resample函数指定的缓冲区(audio_buf)中。
附录:
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h> #include <SDL.h>
#include <SDL_thread.h> #ifdef __MINGW32__
#undef main /* Prevents SDL from overriding main() */
#endif #include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h> // compatibility with newer API
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#define av_frame_free avcodec_free_frame
#endif #define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000 #include <signal.h> typedef struct AudioParams {
int freq;
int channels;
int64_t channel_layout;
enum AVSampleFormat fmt;
int frame_size;
int bytes_per_sec;
} AudioParams;
int sample_rate, nb_channels;
int64_t channel_layout;
AudioParams audio_hw_params_tgt;
AudioParams audio_hw_params_src; int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size); struct SwrContext * swr_ctx = NULL; int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size)
{
int data_size = ;
int resampled_data_size = ;
int64_t dec_channel_layout;
data_size = av_samples_get_buffer_size(NULL,
av_frame_get_channels(af),
af->nb_samples,
af->format,
); dec_channel_layout =
(af->channel_layout && av_frame_get_channels(af) == av_get_channel_layout_nb_channels(af->channel_layout)) ?
af->channel_layout : av_get_default_channel_layout(av_frame_get_channels(af));
if( af->format != audio_hw_params_src.fmt ||
af->sample_rate != audio_hw_params_src.freq ||
dec_channel_layout != audio_hw_params_src.channel_layout ||
!swr_ctx) {
swr_free(&swr_ctx);
swr_ctx = swr_alloc_set_opts(NULL,
audio_hw_params_tgt.channel_layout, audio_hw_params_tgt.fmt, audio_hw_params_tgt.freq,
dec_channel_layout, af->format, af->sample_rate,
, NULL);
if (!swr_ctx || swr_init(swr_ctx) < ) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->sample_rate, av_get_sample_fmt_name(af->format), av_frame_get_channels(af),
audio_hw_params_tgt.freq, av_get_sample_fmt_name(audio_hw_params_tgt.fmt), audio_hw_params_tgt.channels);
swr_free(&swr_ctx);
return -;
}
printf("swr_init\n");
audio_hw_params_src.channels = av_frame_get_channels(af);
audio_hw_params_src.fmt = af->format;
audio_hw_params_src.freq = af->sample_rate;
} if (swr_ctx) {
const uint8_t **in = (const uint8_t **)af->extended_data;
uint8_t **out = &audio_buf;
int out_count = (int64_t)af->nb_samples * audio_hw_params_tgt.freq / af->sample_rate + ;
int out_size = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, out_count, audio_hw_params_tgt.fmt, );
int len2;
if (out_size < ) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -;
}
av_fast_malloc(&audio_buf, audio_buf_size, out_size);
if (!audio_buf)
return AVERROR(ENOMEM);
len2 = swr_convert(swr_ctx, out, out_count, in, af->nb_samples);
if (len2 < ) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(swr_ctx) < )
swr_free(&swr_ctx);
}
resampled_data_size = len2 * audio_hw_params_tgt.channels * av_get_bytes_per_sample(audio_hw_params_tgt.fmt);
} else {
audio_buf = af->data[];
resampled_data_size = data_size;
} return resampled_data_size;
} static void sigterm_handler(int sig)
{
exit();
} typedef struct PacketQueue {
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue; PacketQueue audioq; int quit = ; void packet_queue_init(PacketQueue *q) {
memset(q, , sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt) { AVPacketList *pkt1;
if(av_dup_packet(pkt) < ) {
return -;
}
pkt1 = av_malloc(sizeof(AVPacketList));
if (!pkt1)
return -;
pkt1->pkt = *pkt;
pkt1->next = NULL; SDL_LockMutex(q->mutex); if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond); SDL_UnlockMutex(q->mutex);
return ;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret; SDL_LockMutex(q->mutex); for(;;) { if(quit) {
ret = -;
break;
} pkt1 = q->first_pkt;
if (pkt1) {
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
av_free(pkt1);
ret = ;
break;
} else if (!block) {
ret = ;
break;
} else {
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
} AVFrame frame;
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) { static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = ; int len1, data_size = ; for(;;) {
while(audio_pkt_size > ) {
int got_frame = ;
len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt);
if(len1 < ) {
/* if error, skip frame */
audio_pkt_size = ;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
data_size = ;
if(got_frame) {
data_size = resample(&frame, audio_buf, &buf_size);
// data_size = av_samples_get_buffer_size(NULL,
// aCodecCtx->channels,
// frame.nb_samples,
// aCodecCtx->sample_fmt,
// 1);
assert(data_size <= buf_size);
// memcpy(audio_buf, frame.data[0], data_size);
}
if(data_size <= ) {
/* No data yet, get more frames */
continue;
}
// memcpy(audio_buf, frame.data[0], data_size); /* We have data, return it and come back for more later */
return data_size;
}
if(pkt.data)
av_free_packet(&pkt); if(quit) {
return -;
} if(packet_queue_get(&audioq, &pkt, ) < ) {
return -;
}
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
} void audio_callback(void *userdata, Uint8 *stream, int len) { AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size; static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * ) / ];
static unsigned int audio_buf_size = ;
static unsigned int audio_buf_index = ; while(len > ) {
if(audio_buf_index >= audio_buf_size) {
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < ) {
/* If error, output silence */
audio_buf_size = ; // arbitrary?
memset(audio_buf, , audio_buf_size);
} else {
audio_buf_size = audio_size;
}
audio_buf_index = ;
}
len1 = audio_buf_size - audio_buf_index;
if(len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
} int main(int argc, char *argv[]) { AVFormatContext *pFormatCtx = NULL;
int i, audioStream;
AVPacket packet; AVCodecContext *aCodecCtxOrig = NULL;
AVCodecContext *aCodecCtx = NULL;
AVCodec *aCodec = NULL; SDL_Event event;
SDL_AudioSpec wanted_spec, spec; signal(SIGINT , sigterm_handler); /* Interrupt (ANSI). */
signal(SIGTERM, sigterm_handler); /* Termination (ANSI). */ if(argc < ) {
fprintf(stderr, "Usage: test <file>\n");
exit();
}
// Register all formats and codecs
av_register_all(); if(SDL_Init(SDL_INIT_AUDIO)) {
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
exit();
} // Open video file
if(avformat_open_input(&pFormatCtx, argv[], NULL, NULL)!=)
return -; // Couldn't open file // Retrieve stream information
if(avformat_find_stream_info(pFormatCtx, NULL)<)
return -; // Couldn't find stream information // Dump information about file onto standard error
av_dump_format(pFormatCtx, , argv[], ); // Find the first video stream
audioStream=-;
for(i=; i<pFormatCtx->nb_streams; i++) {
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
audioStream < ) {
audioStream=i;
}
}
// if(videoStream==-1)
// return -1; // Didn't find a video stream
if(audioStream==-)
return -; aCodecCtxOrig=pFormatCtx->streams[audioStream]->codec;
aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
if(!aCodec) {
fprintf(stderr, "Unsupported codec!\n");
return -;
} // Copy context
aCodecCtx = avcodec_alloc_context3(aCodec);
if(avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != ) {
fprintf(stderr, "Couldn't copy codec context");
return -; // Error copying codec context
} avcodec_open2(aCodecCtx, aCodec, NULL); sample_rate = aCodecCtx->sample_rate;
nb_channels = aCodecCtx->channels;
channel_layout = aCodecCtx->channel_layout; printf("channel_layout=%" PRId64 "\n", channel_layout);
printf("nb_channels=%d\n", nb_channels);
printf("freq=%d\n", sample_rate); if (!channel_layout || nb_channels != av_get_channel_layout_nb_channels(channel_layout)) {
channel_layout = av_get_default_channel_layout(nb_channels);
channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
printf("correction\n");
} // Set audio settings from codec info
wanted_spec.freq = sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = nb_channels;
wanted_spec.silence = ;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx; if(SDL_OpenAudio(&wanted_spec, &spec) < ) {
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
return -;
}
printf("freq: %d\tchannels: %d\n", spec.freq, spec.channels); audio_hw_params_tgt.fmt = AV_SAMPLE_FMT_S16;
audio_hw_params_tgt.freq = spec.freq;
audio_hw_params_tgt.channel_layout = channel_layout;
audio_hw_params_tgt.channels = spec.channels;
audio_hw_params_tgt.frame_size = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, , audio_hw_params_tgt.fmt, );
audio_hw_params_tgt.bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, audio_hw_params_tgt.freq, audio_hw_params_tgt.fmt, );
if (audio_hw_params_tgt.bytes_per_sec <= || audio_hw_params_tgt.frame_size <= ) {
printf("size error\n");
return -;
}
audio_hw_params_src = audio_hw_params_tgt; // audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(); // Read frames and save first five frames to disk
i=;
while(av_read_frame(pFormatCtx, &packet)>=) {
if(packet.stream_index==audioStream) {
packet_queue_put(&audioq, &packet);
} else {
av_free_packet(&packet);
}
// Free the packet that was allocated by av_read_frame
SDL_PollEvent(&event);
switch(event.type) {
case SDL_QUIT:
quit = ;
SDL_Quit();
exit();
break;
default:
break;
} } while() SDL_Delay(); // Close the codecs
avcodec_close(aCodecCtxOrig);
avcodec_close(aCodecCtx); // Close the video file
avformat_close_input(&pFormatCtx); return ;
}
http://pan.baidu.com/s/1pJUXLZP