FFmpeg进阶: 转码输出视频文件中的音频

时间:2022-12-19 08:54:21

很多时候为了方便收听视频文件中的音频信息,我们会将视频文件中的音频流转码输出成音频文件,方便在对应的平台上进行播放。这里就介绍一下如何通过FFmpeg将视频文件中的音频流转码成特定编码格式的音频文件。

转码过程中我们先对视频文件进行解封装得到音频流,然后通过对应的音频解码器对音频流进行解码得到原始的音频帧。得到音频帧之后,我们就可以通过FFmpeg提供的各种滤镜对音频参数进行调整了,调整的参数包括:采样格式、采样率、通道布局、比特率。

下面介绍一下各个参数:
1.采样格式
采样格式就是每个音频数据点的数据格式,支持下面的格式

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
    AV_SAMPLE_FMT_S64,         ///< signed 64 bits
    AV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

2.采样率
采样的频率,指的是单位时间内每秒钟进行采样的次数,频率越高,离散的数据和连续的模拟信号的拟合就越接近,声音的质量也就越高,占的存储也就越大。 通常情况下,支持的采样率有22KHz/44KHz/48KHz等

3.通道布局
channels 为音频的通道数 1 2 3 4 5…
channel_layout 为音频通道格式类型如 单通道、双通道、立体声等等

4.码率
数据传输时单位时间传送的数据位数,一般我们用的单位是kbps即千位每秒。通俗一点的理解就是取样率,单位时间内取样率越大,精度就越高,处理出来的文件就越接近原始文件。

使用FFmpeg提取音频流进行转码并动态调整音频参数的示例如下所示:


extern "C" 
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavfilter/avfilter.h>
#include <libswresample/swresample.h>
}
#include <string>

#include "audio_filter.h"

//@1输出文件名称 @2输入文件名称 @3采样格式 @4采样率 @5通道布局  @6码率
int extract_audio(const char *output_filename, const char *input_filename, AVSampleFormat sample_fmt,
	int sample_rate, uint64_t channel_layout, uint64_t bitrate) 
{
	
	AVFormatContext *inFmtCtx = nullptr;
	AVFormatContext *outFmtCtx = nullptr;

	AVCodecContext *aDecCtx = nullptr;
	AVCodecContext *aEncCtx = nullptr;

	AVStream *aOutStream = nullptr;

	int ret;

	//打开文件获取流信息
	ret = avformat_open_input(&inFmtCtx, input_filename, nullptr, nullptr);
	avformat_find_stream_info(inFmtCtx, nullptr);
    avformat_alloc_output_context2(&outFmtCtx, nullptr, nullptr, output_filename);
	
	for (int i = 0; i < inFmtCtx->nb_streams; ++i) 
	{
		AVStream *inStream = inFmtCtx->streams[i];
		if (inStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
		{
			//打开解码器
			AVCodec *decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
			aDecCtx = avcodec_alloc_context3(decoder);
			ret = avcodec_parameters_to_context(aDecCtx, inStream->codecpar);
			ret = avcodec_open2(aDecCtx, decoder, nullptr);


			//创建音频编码器
			AVCodec *encoder = avcodec_find_encoder(outFmtCtx->oformat->audio_codec);
			aOutStream = avformat_new_stream(outFmtCtx, encoder);
			aOutStream->id = outFmtCtx->nb_streams - 1;
			aEncCtx = avcodec_alloc_context3(encoder);

			//指定编码器的参数
			aEncCtx->codec_id = encoder->id; //编码器ID
			aEncCtx->sample_fmt = sample_fmt ? sample_fmt : aDecCtx->sample_fmt; //采样格式
			aEncCtx->sample_rate = sample_rate ? sample_rate : aDecCtx->sample_rate;//采样率
			aEncCtx->channel_layout = channel_layout;//通道布局
			aEncCtx->channels = av_get_channel_layout_nb_channels(channel_layout);//通道数
			aEncCtx->bit_rate = bitrate ? bitrate : aDecCtx->bit_rate; //码率
			aEncCtx->time_base = { 1, aEncCtx->sample_rate }; //时间基
			aOutStream->time_base = aEncCtx->time_base;


			if (outFmtCtx->oformat->flags & AVFMT_GLOBALHEADER)
				aEncCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
			ret = avcodec_open2(aEncCtx, encoder, nullptr);
			ret = avcodec_parameters_from_context(aOutStream->codecpar, aEncCtx);
			av_dict_copy(&aOutStream->metadata, inStream->metadata, 0);
			break;
		}
	}

	if (!(outFmtCtx->oformat->flags & AVFMT_NOFILE)) {
		
		ret = avio_open(&outFmtCtx->pb, output_filename, AVIO_FLAG_WRITE);
		if (ret < 0) 
		{
			return -1;
		}
	}

	//写文件头
	ret = avformat_write_header(outFmtCtx, nullptr);
	if (ret < 0) 
	{
		return -1;
	}

	AVFrame *inAudioFrame = av_frame_alloc();
	AVFrame *outAudioFrame = av_frame_alloc();

	outAudioFrame->format = aEncCtx->sample_fmt;
	outAudioFrame->sample_rate = aEncCtx->sample_rate;
	outAudioFrame->channel_layout = aEncCtx->channel_layout;
	outAudioFrame->nb_samples = aEncCtx->frame_size;
	ret = av_frame_get_buffer(outAudioFrame, 0);
	
	int64_t audio_pts = 0;

	//通过滤镜对音频帧进行处理操作
	AudioFilter filter;
	char description[512];
	AudioConfig inConfig(aDecCtx->sample_fmt, aDecCtx->sample_rate, aDecCtx->channel_layout, aDecCtx->time_base);
	AudioConfig outConfig(aEncCtx->sample_fmt, aEncCtx->sample_rate, aEncCtx->channel_layout, aEncCtx->time_base);
	char ch_layout[64];
	av_get_channel_layout_string(ch_layout, sizeof(ch_layout),
		av_get_channel_layout_nb_channels(aEncCtx->channel_layout), aEncCtx->channel_layout);
	snprintf(description, sizeof(description),
		"[in]aresample=sample_rate=%d[res];[res]aformat=sample_fmts=%s:sample_rates=%d:channel_layouts=%s[out]",
		aEncCtx->sample_rate,
		av_get_sample_fmt_name(aEncCtx->sample_fmt),
		aEncCtx->sample_rate,
		ch_layout);
	filter.create(description, &inConfig, &outConfig);

	while (true) {
		AVPacket inPacket{ nullptr };
		av_init_packet(&inPacket);
		ret = av_read_frame(inFmtCtx, &inPacket);
		if (ret == AVERROR_EOF) {
			break;
		}
		else if (ret < 0) 
		{
			return -1;
		}

        //调整完音频参数之后对音频帧进行编码
		if (inPacket.stream_index == AVMEDIA_TYPE_AUDIO) 
		{
			avcodec_send_packet(aDecCtx, &inPacket);
			avcodec_receive_frame(aDecCtx, inAudioFrame);

			if (ret == 0) {
				
				ret = filter.addInput1(inAudioFrame);
				av_frame_unref(inAudioFrame);

				do {
					outAudioFrame->nb_samples = aEncCtx->frame_size;
					ret = filter.getFrame(outAudioFrame);
					if (ret == 0) {

						outAudioFrame->pts = audio_pts;
						audio_pts += outAudioFrame->nb_samples;
						ret = avcodec_send_frame(aEncCtx, outAudioFrame);	
					}
					else 
					{
						
						break;
					}

					do {
						AVPacket outPacket{ nullptr };
						av_init_packet(&outPacket);
						ret = avcodec_receive_packet(aEncCtx, &outPacket);
						if (ret == 0) {
							av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);
							outPacket.stream_index = aOutStream->index;
							
							ret = av_interleaved_write_frame(outFmtCtx, &outPacket);
							if (ret < 0) {
								
								break;
							}
						}
						else {
							
							break;
						}
					} while (true);

				} while (true);

			
			}
		}
	}

	//最后刷新音频数据
	int eof = 0;
	do {
		ret = filter.getFrame(outAudioFrame);
		if (ret == 0) {
			outAudioFrame->pts = audio_pts;
			audio_pts += outAudioFrame->nb_samples;
			
		}
		
		ret = avcodec_send_frame(aEncCtx, ret == 0 ? outAudioFrame : nullptr);
		
		do {
			AVPacket outPacket{ nullptr };
			ret = avcodec_receive_packet(aEncCtx, &outPacket);
			if (ret == 0) {
				av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);
				outPacket.stream_index = aOutStream->index;
				
				ret = av_interleaved_write_frame(outFmtCtx, &outPacket);
				if (ret < 0) {
					
					eof = 1;
					break;
				}
			}
			else if (ret == AVERROR_EOF) {
				
				eof = 1;
				break;
			}
			else 
			{
				break;
			}
		} while (true);

	} while (!eof);

	filter.destroy();

	//清理编码器和解码器
	av_write_trailer(outFmtCtx);
	avformat_close_input(&inFmtCtx);
	av_frame_free(&inAudioFrame);
	av_frame_free(&outAudioFrame);
	avcodec_free_context(&aDecCtx);
	avcodec_free_context(&aEncCtx);

	avformat_free_context(inFmtCtx);
	avformat_free_context(outFmtCtx);

	return 0;
}

int main(int argc, char* argv[])
{
	std::string input_file_path = std::string(argv[1]);
	std::string output_file_path = std::string(argv[2]);
	extract_audio(output_file_path.c_str(), input_file_path.c_str(), (AVSampleFormat)0, 48000, AV_CH_LAYOUT_STEREO, 0);
}

这里用到的音频封装,在另一篇文章里面有,这里就不重复列举了,可以参考另一篇文章:
FFmpeg进阶: 给视频添加背景音乐