很多时候为了方便收听视频文件中的音频信息,我们会将视频文件中的音频流转码输出成音频文件,方便在对应的平台上进行播放。这里就介绍一下如何通过FFmpeg将视频文件中的音频流转码成特定编码格式的音频文件。
转码过程中我们先对视频文件进行解封装得到音频流,然后通过对应的音频解码器对音频流进行解码得到原始的音频帧。得到音频帧之后,我们就可以通过FFmpeg提供的各种滤镜对音频参数进行调整了,调整的参数包括:采样格式、采样率、通道布局、比特率。
下面介绍一下各个参数:
1.采样格式
采样格式就是每个音频数据点的数据格式,支持下面的格式
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_S64, ///< signed 64 bits
AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
2.采样率
采样的频率,指的是单位时间内每秒钟进行采样的次数,频率越高,离散的数据和连续的模拟信号的拟合就越接近,声音的质量也就越高,占的存储也就越大。 通常情况下,支持的采样率有22KHz/44KHz/48KHz等
3.通道布局
channels 为音频的通道数 1 2 3 4 5…
channel_layout 为音频通道格式类型如 单通道、双通道、立体声等等
4.码率
数据传输时单位时间传送的数据位数,一般我们用的单位是kbps即千位每秒。通俗一点的理解就是取样率,单位时间内取样率越大,精度就越高,处理出来的文件就越接近原始文件。
使用FFmpeg提取音频流进行转码并动态调整音频参数的示例如下所示:
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavfilter/avfilter.h>
#include <libswresample/swresample.h>
}
#include <string>
#include "audio_filter.h"
//@1输出文件名称 @2输入文件名称 @3采样格式 @4采样率 @5通道布局 @6码率
int extract_audio(const char *output_filename, const char *input_filename, AVSampleFormat sample_fmt,
int sample_rate, uint64_t channel_layout, uint64_t bitrate)
{
AVFormatContext *inFmtCtx = nullptr;
AVFormatContext *outFmtCtx = nullptr;
AVCodecContext *aDecCtx = nullptr;
AVCodecContext *aEncCtx = nullptr;
AVStream *aOutStream = nullptr;
int ret;
//打开文件获取流信息
ret = avformat_open_input(&inFmtCtx, input_filename, nullptr, nullptr);
avformat_find_stream_info(inFmtCtx, nullptr);
avformat_alloc_output_context2(&outFmtCtx, nullptr, nullptr, output_filename);
for (int i = 0; i < inFmtCtx->nb_streams; ++i)
{
AVStream *inStream = inFmtCtx->streams[i];
if (inStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
//打开解码器
AVCodec *decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
aDecCtx = avcodec_alloc_context3(decoder);
ret = avcodec_parameters_to_context(aDecCtx, inStream->codecpar);
ret = avcodec_open2(aDecCtx, decoder, nullptr);
//创建音频编码器
AVCodec *encoder = avcodec_find_encoder(outFmtCtx->oformat->audio_codec);
aOutStream = avformat_new_stream(outFmtCtx, encoder);
aOutStream->id = outFmtCtx->nb_streams - 1;
aEncCtx = avcodec_alloc_context3(encoder);
//指定编码器的参数
aEncCtx->codec_id = encoder->id; //编码器ID
aEncCtx->sample_fmt = sample_fmt ? sample_fmt : aDecCtx->sample_fmt; //采样格式
aEncCtx->sample_rate = sample_rate ? sample_rate : aDecCtx->sample_rate;//采样率
aEncCtx->channel_layout = channel_layout;//通道布局
aEncCtx->channels = av_get_channel_layout_nb_channels(channel_layout);//通道数
aEncCtx->bit_rate = bitrate ? bitrate : aDecCtx->bit_rate; //码率
aEncCtx->time_base = { 1, aEncCtx->sample_rate }; //时间基
aOutStream->time_base = aEncCtx->time_base;
if (outFmtCtx->oformat->flags & AVFMT_GLOBALHEADER)
aEncCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
ret = avcodec_open2(aEncCtx, encoder, nullptr);
ret = avcodec_parameters_from_context(aOutStream->codecpar, aEncCtx);
av_dict_copy(&aOutStream->metadata, inStream->metadata, 0);
break;
}
}
if (!(outFmtCtx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&outFmtCtx->pb, output_filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
return -1;
}
}
//写文件头
ret = avformat_write_header(outFmtCtx, nullptr);
if (ret < 0)
{
return -1;
}
AVFrame *inAudioFrame = av_frame_alloc();
AVFrame *outAudioFrame = av_frame_alloc();
outAudioFrame->format = aEncCtx->sample_fmt;
outAudioFrame->sample_rate = aEncCtx->sample_rate;
outAudioFrame->channel_layout = aEncCtx->channel_layout;
outAudioFrame->nb_samples = aEncCtx->frame_size;
ret = av_frame_get_buffer(outAudioFrame, 0);
int64_t audio_pts = 0;
//通过滤镜对音频帧进行处理操作
AudioFilter filter;
char description[512];
AudioConfig inConfig(aDecCtx->sample_fmt, aDecCtx->sample_rate, aDecCtx->channel_layout, aDecCtx->time_base);
AudioConfig outConfig(aEncCtx->sample_fmt, aEncCtx->sample_rate, aEncCtx->channel_layout, aEncCtx->time_base);
char ch_layout[64];
av_get_channel_layout_string(ch_layout, sizeof(ch_layout),
av_get_channel_layout_nb_channels(aEncCtx->channel_layout), aEncCtx->channel_layout);
snprintf(description, sizeof(description),
"[in]aresample=sample_rate=%d[res];[res]aformat=sample_fmts=%s:sample_rates=%d:channel_layouts=%s[out]",
aEncCtx->sample_rate,
av_get_sample_fmt_name(aEncCtx->sample_fmt),
aEncCtx->sample_rate,
ch_layout);
filter.create(description, &inConfig, &outConfig);
while (true) {
AVPacket inPacket{ nullptr };
av_init_packet(&inPacket);
ret = av_read_frame(inFmtCtx, &inPacket);
if (ret == AVERROR_EOF) {
break;
}
else if (ret < 0)
{
return -1;
}
//调整完音频参数之后对音频帧进行编码
if (inPacket.stream_index == AVMEDIA_TYPE_AUDIO)
{
avcodec_send_packet(aDecCtx, &inPacket);
avcodec_receive_frame(aDecCtx, inAudioFrame);
if (ret == 0) {
ret = filter.addInput1(inAudioFrame);
av_frame_unref(inAudioFrame);
do {
outAudioFrame->nb_samples = aEncCtx->frame_size;
ret = filter.getFrame(outAudioFrame);
if (ret == 0) {
outAudioFrame->pts = audio_pts;
audio_pts += outAudioFrame->nb_samples;
ret = avcodec_send_frame(aEncCtx, outAudioFrame);
}
else
{
break;
}
do {
AVPacket outPacket{ nullptr };
av_init_packet(&outPacket);
ret = avcodec_receive_packet(aEncCtx, &outPacket);
if (ret == 0) {
av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);
outPacket.stream_index = aOutStream->index;
ret = av_interleaved_write_frame(outFmtCtx, &outPacket);
if (ret < 0) {
break;
}
}
else {
break;
}
} while (true);
} while (true);
}
}
}
//最后刷新音频数据
int eof = 0;
do {
ret = filter.getFrame(outAudioFrame);
if (ret == 0) {
outAudioFrame->pts = audio_pts;
audio_pts += outAudioFrame->nb_samples;
}
ret = avcodec_send_frame(aEncCtx, ret == 0 ? outAudioFrame : nullptr);
do {
AVPacket outPacket{ nullptr };
ret = avcodec_receive_packet(aEncCtx, &outPacket);
if (ret == 0) {
av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);
outPacket.stream_index = aOutStream->index;
ret = av_interleaved_write_frame(outFmtCtx, &outPacket);
if (ret < 0) {
eof = 1;
break;
}
}
else if (ret == AVERROR_EOF) {
eof = 1;
break;
}
else
{
break;
}
} while (true);
} while (!eof);
filter.destroy();
//清理编码器和解码器
av_write_trailer(outFmtCtx);
avformat_close_input(&inFmtCtx);
av_frame_free(&inAudioFrame);
av_frame_free(&outAudioFrame);
avcodec_free_context(&aDecCtx);
avcodec_free_context(&aEncCtx);
avformat_free_context(inFmtCtx);
avformat_free_context(outFmtCtx);
return 0;
}
int main(int argc, char* argv[])
{
std::string input_file_path = std::string(argv[1]);
std::string output_file_path = std::string(argv[2]);
extract_audio(output_file_path.c_str(), input_file_path.c_str(), (AVSampleFormat)0, 48000, AV_CH_LAYOUT_STEREO, 0);
}
这里用到的音频封装,在另一篇文章里面有,这里就不重复列举了,可以参考另一篇文章:
FFmpeg进阶: 给视频添加背景音乐