最简单的基于FFMPEG+SDL的音视频播放器

时间:2022-10-23 12:05:25

一、概述

        在《最简单的基于FFMPEG+SDL的音频播放器》记录一中,我们实现了音频的播放。更早前,我们在《最简单的基于FFMPEG+SDL的视频播放器》记录一中,实现了视频的播放。在实现视频播放的时候,我们设置了一个延迟40ms,否则视频就会以解码的速度去播放,很快速。在音频播放器中,我们有两个地方控制了播放音频的速度。第一个是采样率(http://wiki.libsdl.org/SDL_AudioSpec),采样率决定每秒送多少样本数据帧到音频设备,采样率和播放速度成正比。第二个是调用SDL_PauseAudio(0)播放音频后,有一个循环等待缓冲区数据播放完毕,才能进行下一帧数据帧的播放。

                //Play    
                SDL_PauseAudio(0);  
                while (audio_len > 0)//Wait until finish    
                    SDL_Delay(1);//ms  
这两点就保证了音频的正常播放,而不用视频播放人为的去设定一个延迟时间保证播放速度。

        这一次,我们将同时完成视频播放和音频播放。但我们将采用不同的方式去实现音频播放,核心内容跟”音频播放器“是一样的,只是我们采用了队列来保存解复用后的包,这样将解复用和解码两个过程分开,更加清晰,也有利于将来扩展更多的功能。

二、主要内容

1、首先、我们先描述一下我们将要用于保存包的队列,该结构体如下:

/*包队列*/
typedef struct PacketQueue {
	AVPacketList *first_pkt, *last_pkt;
	int nb_packets;//包个数
	int size;//包大小
	SDL_mutex *mutex;//互斥量
	SDL_cond *cond;//条件变量
} PacketQueue;
AVPacketList:AVPacketList是FFMPEG内置的结构体,是包的一个链表,结构体如下:

typedef struct AVPacketList {
     AVPacket pkt;
     struct AVPacketList *next;
  } AVPacketList;
nb_packets:队列中包数量

size:队列中所有包的大小,以字节为单位

mutex和cond:互斥量和条件变量,因为音频播放是在另外一个线程中(想想那个callback),所以对于队列这个公共资源的读写需要互斥。如果对于互斥量和条件变量不熟悉可以参考生产者-消费者问题。

2、关于该队列的一些操作如下:

2.1、初始化队列

/*初始化队列*/
void packet_queue_init(PacketQueue *q) {
	memset(q, 0, sizeof(PacketQueue));//将队列所在内存用0填充
	q->mutex = SDL_CreateMutex();//创建互斥量和条件变量
	q->cond = SDL_CreateCond();
}
2.2、入队

/*入队操作*/
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

	AVPacketList *pkt1;
	if (av_dup_packet(pkt) < 0) {//将pkt的内存复制到独立的内存中
		return -1;
	}
	pkt1 = (AVPacketList*)av_malloc(sizeof(AVPacketList));
	if (!pkt1) return -1;
	pkt1->pkt = *pkt;
	pkt1->next = NULL;

	SDL_LockMutex(q->mutex);//先锁住队列,然后再入队
	if (!q->last_pkt)
		q->first_pkt = pkt1;
	else
		q->last_pkt->next = pkt1;
	q->last_pkt = pkt1;
	q->nb_packets++;
	q->size += pkt1->pkt.size;
	SDL_CondSignal(q->cond);//唤醒被该条件变量阻塞的线程
	SDL_UnlockMutex(q->mutex);//解锁资源
	return 0;
}

2.3、出队

/*
出队操作
q:     队列
pkt:   出队的包
block: 是否人工阻塞
return: -1 退出  0 阻塞  1 成功
*/
int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
	AVPacketList *pkt1;
	int ret;

	SDL_LockMutex(q->mutex);//锁住队列,再出队
	for (;;) {

		if (quit) {//退出,quit是全局变量,暂时可不管
			ret = -1;
			break;
		}

		pkt1 = q->first_pkt;
		if (pkt1) {//正常出队
			q->first_pkt = pkt1->next;
			if (!q->first_pkt)
				q->last_pkt = NULL;
			q->nb_packets--;
			q->size -= pkt1->pkt.size;
			*pkt = pkt1->pkt;
			av_free(pkt1);
			ret = 1;
			break;
		}
		else if (!block) {//阻塞
			ret = 0;
			break;
		}
		else {
			SDL_CondWait(q->cond, q->mutex);//阻塞线程等待条件变量激活并解锁,收到激活信号后,再次上锁
		}
	}
	SDL_UnlockMutex(q->mutex);//解锁资源
	return ret;
}

程序中的quit是一个全局变量,保证视频播放完成后可以正常退出,否则进程将无法结束。代码如下:

//当SDL退出时,设置quit=1
SDL_PollEvent(&event);
  switch(event.type) {
  case SDL_QUIT:
    quit = 1;
     ....


剩下的事情就和原来一样了,读文件,读取流信息,解复用,解码......,特别的地方就是如何将解复用的包入队,和从队列取包,然后解码。

main函数源代码如下:

<pre name="code" class="cpp">int main(int argc, char *argv[]) {

	struct SwsContext * sws_ctx = NULL;
	AVFormatContext *pFormatCtx = NULL;
	int             i, videoStream, audioStream;
	AVPacket        packet;
	int             frameFinished;
	AVCodecContext  *pCodecCtxOrig = NULL;
	AVCodecContext  *pCodecCtx = NULL;
	AVCodec         *pCodec = NULL;
	AVFrame         *pFrame = NULL;
	AVCodecContext  *aCodecCtxOrig = NULL;
	AVCodecContext  *aCodecCtx = NULL;
	AVCodec         *aCodec = NULL;
	SDL_Overlay     *bmp;
	SDL_Surface     *screen;
	SDL_Rect        rect;
	SDL_Event       event;
	SDL_AudioSpec   wanted_spec, spec;

	// Register all formats and codecs
	av_register_all();
	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
		exit(1);
	}
	//文件路径
	char* filepath = "2.mp4";
	// Open video file
	if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0)
		return -1; // Couldn't open file
	// Retrieve stream information
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
		return -1; // Couldn't find stream information

	// Find the first video stream and audio stream
	videoStream = -1;
	audioStream = -1;
	for (i = 0; i < pFormatCtx->nb_streams; i++) {
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
			videoStream < 0) {
			videoStream = i;
		}
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
			audioStream < 0) {
			audioStream = i;
		}
	}
	if (videoStream == -1)
		return -1; // Didn't find a video stream
	if (audioStream == -1)
		return -1;

	//find the codeccontext
	aCodecCtxOrig = pFormatCtx->streams[audioStream]->codec;
	//get the decoder
	aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
	if (!aCodec) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1;
	}

	// Copy context
	aCodecCtx = avcodec_alloc_context3(aCodec);
	if (avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Set audio settings from codec info
	wanted_spec.freq =   44100;
	wanted_spec.format = AUDIO_S16SYS;
	wanted_spec.channels = aCodecCtx->channels;
	wanted_spec.silence = 0;
	wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
	wanted_spec.callback = audio_callback;
	wanted_spec.userdata = aCodecCtx;

	//openaudio device
	if (SDL_OpenAudio(&wanted_spec, &spec) < 0) {
		fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
		return -1;
	}
	//open the audio decoder
	avcodec_open2(aCodecCtx, aCodec, NULL);

	packet_queue_init(&audioq);
	//play audio
	SDL_PauseAudio(0);
	//------------------------------------------------------------
	//video part
	// Get a pointer to the codec context for the video stream
	pCodecCtxOrig = pFormatCtx->streams[videoStream]->codec;
	// Find the decoder for the video stream
	pCodec = avcodec_find_decoder(pCodecCtxOrig->codec_id);
	if (pCodec == NULL) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1; // Codec not found
	}

	// Copy context
	pCodecCtx = avcodec_alloc_context3(pCodec);
	if (avcodec_copy_context(pCodecCtx, pCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Open codec
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
		return -1; // Could not open codec

	// Allocate video frame
	pFrame = av_frame_alloc();

	// Make a screen to put our video
	screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);
	if (!screen) {
		fprintf(stderr, "SDL: could not set video mode - exiting\n");
		exit(1);
	}

	// Allocate a place to put our YUV image on that screen
	bmp = SDL_CreateYUVOverlay(pCodecCtx->width,
		pCodecCtx->height,
		SDL_YV12_OVERLAY,//YVU模式
		screen);

	// initialize SWS context for software scaling
	sws_ctx = sws_getContext(pCodecCtx->width,
		pCodecCtx->height,
		pCodecCtx->pix_fmt,
		pCodecCtx->width,
		pCodecCtx->height,
		PIX_FMT_YUV420P,
		SWS_BILINEAR,
		NULL,
		NULL,
		NULL
		);

	// Read frames and save first five frames to disk
	while (av_read_frame(pFormatCtx, &packet) >= 0) {
		// Is this a packet from the video stream?
		if (packet.stream_index == videoStream) {
			// Decode video frame
			avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

			// Did we get a video frame?
			if (frameFinished) {
				SDL_LockYUVOverlay(bmp);

				AVPicture pict;
				pict.data[0] = bmp->pixels[0];
				pict.data[1] = bmp->pixels[2];
				pict.data[2] = bmp->pixels[1];

				pict.linesize[0] = bmp->pitches[0];
				pict.linesize[1] = bmp->pitches[2];
				pict.linesize[2] = bmp->pitches[1];

				// Convert the image into YUV format that SDL uses	
				sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
					pFrame->linesize, 0, pCodecCtx->height,
					pict.data, pict.linesize);

				SDL_UnlockYUVOverlay(bmp);

				rect.x = 0;
				rect.y = 0;
				rect.w = pCodecCtx->width;
				rect.h = pCodecCtx->height;
				SDL_DisplayYUVOverlay(bmp, &rect);
				av_free_packet(&packet);
				SDL_Delay(20);//延迟一下,防止播放太快
			}
		}
		else if (packet.stream_index == audioStream) {
			packet_queue_put(&audioq, &packet);//音频包入队
		}
		else {
			// Free the packet that was allocated by av_read_frame
			av_free_packet(&packet);
		}
		//事件处理,SDL视频播放完成后,会触发SDL_QUIT事件
		SDL_PollEvent(&event);
		switch (event.type) {
		case SDL_QUIT:
			quit = 1;
			SDL_Quit();
			exit(0);
			break;
		default:
			break;
		}

	}
	//close the video and audio context
	avcodec_close(pCodecCtxOrig);
	avcodec_close(pCodecCtx);
	avcodec_close(aCodecCtxOrig);
	avcodec_close(aCodecCtx);
	// Close the video file
	avformat_close_input(&pFormatCtx);
	return 0;
}


 视频部分的代码如果有不懂的,可以回去看看: 
最简单的视频播放器记录一 
最简单的视频播放器记录二 

音频回调函数如下:

/*
音频回调函数
userdata:in 编解码上下文
stream: out 播放的缓冲区
len:    缓冲区大小
*/
void audio_callback(void *userdata, Uint8 *stream, int len) {

	AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
	int len1, audio_size;

	static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
	static unsigned int audio_buf_size = 0;
	static unsigned int audio_buf_index = 0;

	while (len > 0) {
		if (audio_buf_index >= audio_buf_size) {
			/* We have already sent all our data; get more */
			audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));//解码包
			if (audio_size < 0) {
				/* If error, output silence */
				audio_buf_size = 1024;
				memset(audio_buf, 0, audio_buf_size);
			}
			else {
				audio_buf_size = audio_size;
			}
			audio_buf_index = 0;
		}
		len1 = audio_buf_size - audio_buf_index;//音频数据长度
		if (len1 > len)//如果大于要播放的长度,则截取
			len1 = len;
		memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);//将要播放的内容拷贝至输出缓冲区
                //记录剩余数据
		len -= len1;
		stream += len1;
		audio_buf_index += len1;
	}
}

音频解码函数如下:

/*
aCodecCtx:编解码上下文
audio_buf:out 音频缓冲区
buf_size: 缓冲区大小
return: -1 错误, data_size  音频原始数据大小
*/
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {

	static AVPacket pkt;
	static uint8_t *audio_pkt_data = NULL;
	static int audio_pkt_size = 0;
	static AVFrame frame;
	int len1;//解码消费了包多少字节数据
        int data_size = 0;
	//新版中avcodec_decode_audio4()解码后输出的音频采样数据格式为AV_SAMPLE_FMT_FLTP(float, planar)而不再是AV_SAMPLE_FMT_S16(signed 16 bits)。因此         //无法直接使用SDL进行播放,必须使用SwrContext对音频采样数据进行转换之后,再进行输出播放。
        //输出参数设置
        uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立体声
         int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据通道布局类型获取通道数 
         int out_nb_samples = 1024;//单个通道样本个数,需要根据音频封装格式动态改变,不然某些格式的音频文件播放会有杂音 
         AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//采样格式,SDL可以播放此种格式
           int out_sample_rate = 44100;//输出时采样率,CD一般为44100HZ 
           //获取声道布局,一些codec的channel_layout可能会丢失,这里需要重新获取一次 
          uint64_t in_channel_layout = av_get_default_channel_layout(aCodecCtx->channels); 
         //输出缓存 
          uint8_t *out_buffer_audio = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);//*2是保证输出缓存大于输入数据大小
         //音频格式转换设置 
         struct SwrContext *au_convert_ctx; 
        au_convert_ctx = swr_alloc(); 
        au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,in_channel_layout, aCodecCtx->sample_fmt, aCodecCtx->sample_rate, 0, NULL); 
      swr_init(au_convert_ctx); 
      for (;;) { 
      while (audio_pkt_size > 0) { //不停的解码,直到一个包的数据都被解码完毕 
        int got_frame = 0; 
       len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt); 
       if (len1 < 0) { /* if error, skip frame */ 
          audio_pkt_size = 0; 
          break;
          } 
       audio_pkt_data += len1;//剩余数据起始位置 
       audio_pkt_size -= len1;//剩余的字节数 
       data_size = 0; if (got_frame) { //动态改变输出样本数,保证有些文件不会出现杂音 
          if (out_nb_samples != frame.nb_samples){out_nb_samples = frame.nb_samples;} 
          //计算原始数据所需空间 
          data_size = av_samples_get_buffer_size(NULL,aCodecCtx->channels,out_nb_samples,out_sample_fmt,1); 
         //转换格式,否则SDL不能播放 
          swr_convert(au_convert_ctx, &out_buffer_audio, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)frame.data, frame.nb_samples); 
          assert(data_size <= buf_size); 
          memcpy(audio_buf, out_buffer_audio, data_size);//将数据拷贝到输出缓冲区 
          av_free(au_convert_ctx);
         } 
       if (data_size <= 0) { /* No data yet, get more frames */ 
         continue;
         }
         /* We have data, return it and come back for more later */ 
        return data_size;
        }
       if (pkt.data) av_free_packet(&pkt); 
       if (quit) { //退出
          return -1;
           } //从队列获取包
        if (packet_queue_get(&audioq, &pkt, 1) < 0) {return -1;} 
       audio_pkt_data = pkt.data;//<span style="font-family:Arial,Helvetica,sans-serif">audio_pkt_data 指向数据地址</span> 
       audio_pkt_size = pkt.size;//<span style="font-family:Arial,Helvetica,sans-serif">audio_pkt_size=包大小</span>

}}

运行结果:

最简单的基于FFMPEG+SDL的音视频播放器

        运行后,可以看到视频和音频都可以播放了。但是视频和音频并不同步,因为视频我们采用的是延迟20ms,所以看起来有点偏快。音频是正常的,因为有采样率控制着播放的速度。如果视频延迟时间再慢一点,就会影响到音频的播放,听起来就会有卡顿。        

        所以,如果还要继续优化,那么视频播放应该在另外的线程中进行。而且还要考虑视频和音频的同步,比如将视频同步到音频。

       最后再将整个源代码贴上来:

extern"C"{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include "include/sdl/SDL.h"
#include "include/sdl/SDL_thread.h"
#include "include/libavutil/time.h"
#include "include/libavutil/avstring.h"
#include "libswresample/swresample.h"
}

#pragma comment(lib, "lib/avformat.lib")
#pragma comment(lib, "lib/avcodec.lib")
#pragma comment(lib, "lib/avutil.lib")
#pragma comment(lib, "lib/swscale.lib")
#pragma comment(lib, "lib/swresample.lib")
#pragma comment(lib, "lib/SDL.lib")
#pragma comment(lib, "lib/SDLmain.lib")

#include <stdio.h>
#include<stdlib.h>
#include<string.h>
#include <assert.h>

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE  192000
/*包队列*/
typedef struct PacketQueue {
	AVPacketList *first_pkt, *last_pkt;
	int nb_packets;//包个数
	int size;//包大小
	SDL_mutex *mutex;//互斥量
	SDL_cond *cond;//条件量
} PacketQueue;


PacketQueue audioq;//音频包队列
int quit = 0;//是否退出

void packet_queue_init(PacketQueue *q) {
	memset(q, 0, sizeof(PacketQueue));
	q->mutex = SDL_CreateMutex();
	q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

	AVPacketList *pkt1;
	if (av_dup_packet(pkt) < 0) {
		return -1;
	}
	pkt1 = (AVPacketList*)av_malloc(sizeof(AVPacketList));
	if (!pkt1)
		return -1;
	pkt1->pkt = *pkt;
	pkt1->next = NULL;

	SDL_LockMutex(q->mutex);
	if (!q->last_pkt)
		q->first_pkt = pkt1;
	else
		q->last_pkt->next = pkt1;
	q->last_pkt = pkt1;
	q->nb_packets++;
	q->size += pkt1->pkt.size;
	SDL_CondSignal(q->cond);
	SDL_UnlockMutex(q->mutex);
	return 0;
}
int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
	AVPacketList *pkt1;
	int ret;

	SDL_LockMutex(q->mutex);

	for (;;) {

		if (quit) {
			ret = -1;
			break;
		}

		pkt1 = q->first_pkt;
		if (pkt1) {
			q->first_pkt = pkt1->next;
			if (!q->first_pkt)
				q->last_pkt = NULL;
			q->nb_packets--;
			q->size -= pkt1->pkt.size;
			*pkt = pkt1->pkt;
			av_free(pkt1);
			ret = 1;
			break;
		}
		else if (!block) {
			ret = 0;
			break;
		}
		else {
			SDL_CondWait(q->cond, q->mutex);
		}
	}
	SDL_UnlockMutex(q->mutex);
	return ret;
}




int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {

	static AVPacket pkt;
	static uint8_t *audio_pkt_data = NULL;
	static int audio_pkt_size = 0;
	static AVFrame frame;
	int len1, data_size = 0;
	//音频转换数据格式,否则输出是杂音
	//---------------------------------------------------------------------------
	//输出参数设置  
	uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立体声  
	int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据通道布局类型获取通道数
	int out_nb_samples = 1024;//单个通道样本个数,需要根据音频封装格式动态改变,不然不同格式的文件音频播放速度会不同
	AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//采样格式  
	int out_sample_rate = 44100;//输出时采样率,CD一般为44100HZ  
	//获取声道布局,一些codec的channel_layout可能会丢失,这里需要重新获取一次
	uint64_t in_channel_layout = av_get_default_channel_layout(aCodecCtx->channels);
	//输出缓存
	uint8_t *out_buffer_audio = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);//*2是保证输出缓存大于输入数据大小  
	//音频格式转换设置   
	struct SwrContext *au_convert_ctx;
	au_convert_ctx = swr_alloc();
	au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,
		in_channel_layout, aCodecCtx->sample_fmt, aCodecCtx->sample_rate, 0, NULL);
	swr_init(au_convert_ctx);
	for (;;) {
		while (audio_pkt_size > 0) {
			int got_frame = 0;
			len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt);
			if (len1 < 0) {
				/* if error, skip frame */
				audio_pkt_size = 0;
				break;
			}
			audio_pkt_data += len1;
			audio_pkt_size -= len1;
			data_size = 0;
			if (got_frame) {
				//FIX:FLAC,MP3,AAC Different number of samples  
				if (out_nb_samples != frame.nb_samples){
					out_nb_samples = frame.nb_samples;
				}
				data_size = av_samples_get_buffer_size(NULL,
					aCodecCtx->channels,
					out_nb_samples,
					out_sample_fmt,
					1);
				//转换格式,否则是杂音
				swr_convert(au_convert_ctx, &out_buffer_audio, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)frame.data, frame.nb_samples);
				assert(data_size <= buf_size);
				memcpy(audio_buf, out_buffer_audio, data_size);
				av_free(au_convert_ctx);
			}
			if (data_size <= 0) {
				/* No data yet, get more frames */
				continue;
			}
			/* We have data, return it and come back for more later */
			return data_size;
		}
		if (pkt.data)
			av_free_packet(&pkt);

		if (quit) {
			return -1;
		}

		if (packet_queue_get(&audioq, &pkt, 1) < 0) {
			return -1;
		}
		audio_pkt_data = pkt.data;
		audio_pkt_size = pkt.size;
	}
}

void audio_callback(void *userdata, Uint8 *stream, int len) {

	AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
	int len1, audio_size;

	static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
	static unsigned int audio_buf_size = 0;
	static unsigned int audio_buf_index = 0;

	while (len > 0) {
		if (audio_buf_index >= audio_buf_size) {
			/* We have already sent all our data; get more */
			audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
			if (audio_size < 0) {
				/* If error, output silence */
				audio_buf_size = 1024; // arbitrary?
				memset(audio_buf, 0, audio_buf_size);
			}
			else {
				audio_buf_size = audio_size;
			}
			audio_buf_index = 0;
		}
		len1 = audio_buf_size - audio_buf_index;
		if (len1 > len)
			len1 = len;
		memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
		len -= len1;
		stream += len1;
		audio_buf_index += len1;
	    SDL_Delay(1);
	}
}

int main(int argc, char *argv[]) {

	struct SwsContext * sws_ctx = NULL;
	AVFormatContext *pFormatCtx = NULL;
	int             i, videoStream, audioStream;
	AVPacket        packet;
	int             frameFinished;
	AVCodecContext  *pCodecCtxOrig = NULL;
	AVCodecContext  *pCodecCtx = NULL;
	AVCodec         *pCodec = NULL;
	AVFrame         *pFrame = NULL;
	AVCodecContext  *aCodecCtxOrig = NULL;
	AVCodecContext  *aCodecCtx = NULL;
	AVCodec         *aCodec = NULL;
	SDL_Overlay     *bmp;
	SDL_Surface     *screen;
	SDL_Rect        rect;
	SDL_Event       event;
	SDL_AudioSpec   wanted_spec, spec;

	// Register all formats and codecs
	av_register_all();
	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
		exit(1);
	}
	//文件路径
	char* filepath = "2.mp4";
	// Open video file
	if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0)
		return -1; // Couldn't open file
	// Retrieve stream information
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
		return -1; // Couldn't find stream information

	// Find the first video stream and audio stream
	videoStream = -1;
	audioStream = -1;
	for (i = 0; i < pFormatCtx->nb_streams; i++) {
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
			videoStream < 0) {
			videoStream = i;
		}
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
			audioStream < 0) {
			audioStream = i;
		}
	}
	if (videoStream == -1)
		return -1; // Didn't find a video stream
	if (audioStream == -1)
		return -1;

	//find the codeccontext
	aCodecCtxOrig = pFormatCtx->streams[audioStream]->codec;
	//get the decoder
	aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
	if (!aCodec) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1;
	}

	// Copy context
	aCodecCtx = avcodec_alloc_context3(aCodec);
	if (avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Set audio settings from codec info
	wanted_spec.freq =   44100;
	wanted_spec.format = AUDIO_S16SYS;
	wanted_spec.channels = aCodecCtx->channels;
	wanted_spec.silence = 0;
	wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
	wanted_spec.callback = audio_callback;
	wanted_spec.userdata = aCodecCtx;

	//openaudio device
	if (SDL_OpenAudio(&wanted_spec, &spec) < 0) {
		fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
		return -1;
	}
	//open the audio decoder
	avcodec_open2(aCodecCtx, aCodec, NULL);

	packet_queue_init(&audioq);
	//play audio
	SDL_PauseAudio(0);
	//------------------------------------------------------------
	//video part
	// Get a pointer to the codec context for the video stream
	pCodecCtxOrig = pFormatCtx->streams[videoStream]->codec;
	// Find the decoder for the video stream
	pCodec = avcodec_find_decoder(pCodecCtxOrig->codec_id);
	if (pCodec == NULL) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1; // Codec not found
	}

	// Copy context
	pCodecCtx = avcodec_alloc_context3(pCodec);
	if (avcodec_copy_context(pCodecCtx, pCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Open codec
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
		return -1; // Could not open codec

	// Allocate video frame
	pFrame = av_frame_alloc();

	// Make a screen to put our video
	screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);
	if (!screen) {
		fprintf(stderr, "SDL: could not set video mode - exiting\n");
		exit(1);
	}

	// Allocate a place to put our YUV image on that screen
	bmp = SDL_CreateYUVOverlay(pCodecCtx->width,
		pCodecCtx->height,
		SDL_YV12_OVERLAY,//YVU模式
		screen);

	// initialize SWS context for software scaling
	sws_ctx = sws_getContext(pCodecCtx->width,
		pCodecCtx->height,
		pCodecCtx->pix_fmt,
		pCodecCtx->width,
		pCodecCtx->height,
		PIX_FMT_YUV420P,
		SWS_BILINEAR,
		NULL,
		NULL,
		NULL
		);

	// Read frames and save first five frames to disk
	while (av_read_frame(pFormatCtx, &packet) >= 0) {
		// Is this a packet from the video stream?
		if (packet.stream_index == videoStream) {
			// Decode video frame
			avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

			// Did we get a video frame?
			if (frameFinished) {
				SDL_LockYUVOverlay(bmp);

				AVPicture pict;
				pict.data[0] = bmp->pixels[0];
				pict.data[1] = bmp->pixels[2];
				pict.data[2] = bmp->pixels[1];

				pict.linesize[0] = bmp->pitches[0];
				pict.linesize[1] = bmp->pitches[2];
				pict.linesize[2] = bmp->pitches[1];

				// Convert the image into YUV format that SDL uses	
				sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
					pFrame->linesize, 0, pCodecCtx->height,
					pict.data, pict.linesize);

				SDL_UnlockYUVOverlay(bmp);

				rect.x = 0;
				rect.y = 0;
				rect.w = pCodecCtx->width;
				rect.h = pCodecCtx->height;
				SDL_DisplayYUVOverlay(bmp, &rect);
				av_free_packet(&packet);
				SDL_Delay(20);//延迟一下,防止播放太快
			}
		}
		else if (packet.stream_index == audioStream) {
			packet_queue_put(&audioq, &packet);//音频包入队
		}
		else {
			// Free the packet that was allocated by av_read_frame
			av_free_packet(&packet);
		}
		
		SDL_PollEvent(&event);
		switch (event.type) {
		case SDL_QUIT:
			quit = 1;
			SDL_Quit();
			exit(0);
			break;
		default:
			break;
		}

	}
	//close the video and audio context
	avcodec_close(pCodecCtxOrig);
	avcodec_close(pCodecCtx);
	avcodec_close(aCodecCtxOrig);
	avcodec_close(aCodecCtx);
	// Close the video file
	avformat_close_input(&pFormatCtx);
	return 0;
}

参考链接:
http://dranger.com/ffmpeg/tutorial03.html