added audio resampling, fixed some issues

This commit is contained in:
Selim Mustafaev 2016-11-06 17:36:09 +03:00
parent 2c1673538d
commit 01354ad13b
11 changed files with 93 additions and 22 deletions

View File

@ -104,13 +104,15 @@ FFMPEG_FIND(LIBAVDEVICE avdevice avdevice.h)
FFMPEG_FIND(LIBAVCODEC avcodec avcodec.h) FFMPEG_FIND(LIBAVCODEC avcodec avcodec.h)
FFMPEG_FIND(LIBAVUTIL avutil avutil.h) FFMPEG_FIND(LIBAVUTIL avutil avutil.h)
FFMPEG_FIND(LIBSWSCALE swscale swscale.h) FFMPEG_FIND(LIBSWSCALE swscale swscale.h)
FFMPEG_FIND(LIBSWRESAMPLE swresample swresample.h)
SET(FFMPEG_FOUND "NO") SET(FFMPEG_FOUND "NO")
IF (FFMPEG_LIBAVFORMAT_FOUND AND IF (FFMPEG_LIBAVFORMAT_FOUND AND
FFMPEG_LIBAVDEVICE_FOUND AND FFMPEG_LIBAVDEVICE_FOUND AND
FFMPEG_LIBAVCODEC_FOUND AND FFMPEG_LIBAVCODEC_FOUND AND
FFMPEG_LIBAVUTIL_FOUND AND FFMPEG_LIBAVUTIL_FOUND AND
FFMPEG_LIBSWSCALE_FOUND FFMPEG_LIBSWSCALE_FOUND AND
FFMPEG_LIBSWRESAMPLE_FOUND
) )
@ -126,6 +128,7 @@ IF (FFMPEG_LIBAVFORMAT_FOUND AND
${FFMPEG_LIBAVCODEC_LIBS} ${FFMPEG_LIBAVCODEC_LIBS}
${FFMPEG_LIBAVUTIL_LIBS} ${FFMPEG_LIBAVUTIL_LIBS}
${FFMPEG_LIBSWSCALE_LIBS} ${FFMPEG_LIBSWSCALE_LIBS}
${FFMPEG_LIBSWRESAMPLE_LIBS}
) )
ELSE () ELSE ()

View File

@ -12,11 +12,15 @@ namespace ffcpp {
} }
void FifoQueue::addSamples(const Frame &frame) { void FifoQueue::addSamples(const Frame &frame) {
int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + frame.samplesCount()); const AVFrame* frameImpl = frame;
addSamples((void**)frameImpl->data, frameImpl->nb_samples);
}
void FifoQueue::addSamples(void **data, int samplesCount) {
int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + samplesCount);
throwIfError(res, "cannot reallocate fifo queue"); throwIfError(res, "cannot reallocate fifo queue");
const AVFrame* frameImpl = frame; res = av_audio_fifo_write(_fifo, data, samplesCount);
res = av_audio_fifo_write(_fifo, (void**)frameImpl->data, frame.samplesCount());
throwIfError(res, "cannot add data from frame to fifo queue"); throwIfError(res, "cannot add data from frame to fifo queue");
} }

View File

@ -16,6 +16,7 @@ namespace ffcpp {
public: public:
FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize); FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize);
void addSamples(const Frame& frame); void addSamples(const Frame& frame);
void addSamples(void** data, int samplesCount);
bool enoughSamples() const; bool enoughSamples() const;
void readFrame(Frame& frame); void readFrame(Frame& frame);
}; };

View File

@ -102,13 +102,18 @@ namespace ffcpp {
AVCodecContext* ctx = stream->codec; AVCodecContext* ctx = stream->codec;
ctx->width = width; ctx->width = width;
ctx->height = height; ctx->height = height;
if(pixelFormat == AV_PIX_FMT_NONE) {
ctx->pix_fmt = codec->pix_fmts[0];
} else {
ctx->pix_fmt = pixelFormat; ctx->pix_fmt = pixelFormat;
}
_streams.emplace_back(stream, codec); _streams.emplace_back(stream, codec);
return _streams.back(); return _streams.back();
} }
Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate) { Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat) {
AVCodec* codec = avcodec_find_encoder(codecID); AVCodec* codec = avcodec_find_encoder(codecID);
if(!codec) throw std::runtime_error("cannot find codec"); if(!codec) throw std::runtime_error("cannot find codec");
@ -117,7 +122,11 @@ namespace ffcpp {
// TODO: Here we need adjust encoder parameters // TODO: Here we need adjust encoder parameters
AVCodecContext* ctx = stream->codec; AVCodecContext* ctx = stream->codec;
if(sampleFormat == AV_SAMPLE_FMT_NONE) {
ctx->sample_fmt = codec->sample_fmts[0]; ctx->sample_fmt = codec->sample_fmts[0];
} else {
ctx->sample_fmt = sampleFormat;
}
ctx->global_quality = 10; ctx->global_quality = 10;
ctx->channels = channels; ctx->channels = channels;
ctx->channel_layout = (uint64_t)av_get_default_channel_layout(channels); ctx->channel_layout = (uint64_t)av_get_default_channel_layout(channels);

View File

@ -33,8 +33,8 @@ namespace ffcpp {
bool hasAudio() const; bool hasAudio() const;
Stream& videoStream(size_t index = 0); Stream& videoStream(size_t index = 0);
Stream& audioStream(size_t index = 0); Stream& audioStream(size_t index = 0);
Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat); Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat = AV_PIX_FMT_NONE);
Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate); Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat = AV_SAMPLE_FMT_NONE);
Packet readPacket(); Packet readPacket();
AVMediaType packetType(const Packet& packet); AVMediaType packetType(const Packet& packet);

View File

@ -1,6 +1,8 @@
#include "Resampler.h" #include "Resampler.h"
#include "ffcpp.h" #include "ffcpp.h"
#include <stdexcept> #include <stdexcept>
#include <memory>
#include <iostream>
extern "C" { extern "C" {
#include <libavutil/opt.h> #include <libavutil/opt.h>
@ -10,6 +12,10 @@ namespace ffcpp {
Resampler::Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout, Resampler::Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout,
int outSampleRate, AVSampleFormat outSampleFormat) { int outSampleRate, AVSampleFormat outSampleFormat) {
_dstChannelLayout = outChannelLayout;
_dstSampleFormat = outSampleFormat;
_dstSampleRate = outSampleRate;
_swrContext = swr_alloc(); _swrContext = swr_alloc();
if(!_swrContext) { if(!_swrContext) {
throw new std::runtime_error("cannot create resampler"); throw new std::runtime_error("cannot create resampler");
@ -27,13 +33,34 @@ namespace ffcpp {
throwIfError(res, "cannot init resampler"); throwIfError(res, "cannot init resampler");
} }
Resampler::Resampler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx)
: Resampler((int)decoderCtx->channel_layout, decoderCtx->sample_rate, decoderCtx->sample_fmt,
(int)encoderCtx->channel_layout, encoderCtx->sample_rate, encoderCtx->sample_fmt) {
}
Resampler::~Resampler() { Resampler::~Resampler() {
if(_swrContext) { if(_swrContext) {
swr_free(&_swrContext); swr_free(&_swrContext);
} }
} }
Frame Resampler::Resample(Frame& inFrame) { Frame Resampler::resample(Frame& inFrame) {
return Frame(); int channelsCount = av_get_channel_layout_nb_channels(_dstChannelLayout);
AVFrame* fin = inFrame;
int outSamples = swr_get_out_samples(_swrContext, fin->nb_samples);
Frame outFrame(outSamples, channelsCount, _dstSampleFormat, _dstSampleRate);
int res = swr_convert_frame(_swrContext, outFrame, inFrame);
throwIfError(res, "cannot convert audio frame");
return outFrame;
} }
bool Resampler::needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) {
return (decoderCtx->channels != encoderCtx->channels ||
decoderCtx->channel_layout != encoderCtx->channel_layout ||
decoderCtx->sample_fmt != encoderCtx->sample_fmt ||
decoderCtx->sample_rate != encoderCtx->sample_rate);
}
} }

View File

@ -12,13 +12,18 @@ namespace ffcpp {
class Resampler { class Resampler {
private: private:
SwrContext* _swrContext; SwrContext* _swrContext;
int _dstChannelLayout;
AVSampleFormat _dstSampleFormat;
int _dstSampleRate;
public: public:
Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat,
int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat); int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat);
Resampler(AVCodecContext* decoderCtx, AVCodecContext* encoderCtx);
~Resampler(); ~Resampler();
Frame Resample(Frame& inFrame); Frame resample(Frame& inFrame);
static bool needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
}; };
} }

View File

@ -15,6 +15,12 @@ namespace ffcpp {
} }
} }
Scaler::Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx)
: Scaler(decoderCtx->width, decoderCtx->height, decoderCtx->pix_fmt,
encoderCtx->width, encoderCtx->height, encoderCtx->pix_fmt) {
}
Frame Scaler::scale(Frame &inFrame) { Frame Scaler::scale(Frame &inFrame) {
Frame outFrame(_dstWidth, _dstHeight, _dstPixFmt); Frame outFrame(_dstWidth, _dstHeight, _dstPixFmt);
@ -30,4 +36,10 @@ namespace ffcpp {
return outFrame; return outFrame;
} }
bool Scaler::needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) {
return (decoderCtx->width != encoderCtx->width ||
decoderCtx->height != encoderCtx->height ||
decoderCtx->pix_fmt != encoderCtx->pix_fmt);
}
} }

View File

@ -18,7 +18,9 @@ namespace ffcpp {
public: public:
Scaler(int srcWidth, int srcHeight, AVPixelFormat srcPixFmt, int dstWidth, int dstHeight, AVPixelFormat dstPixFmt); Scaler(int srcWidth, int srcHeight, AVPixelFormat srcPixFmt, int dstWidth, int dstHeight, AVPixelFormat dstPixFmt);
Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
Frame scale(Frame& inFrame); Frame scale(Frame& inFrame);
static bool needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
}; };
} }

View File

@ -3,6 +3,7 @@
#include "ffcpp/MediaFile.h" #include "ffcpp/MediaFile.h"
#include "ffcpp/FifoQueue.h" #include "ffcpp/FifoQueue.h"
#include "ffcpp/Scaler.h" #include "ffcpp/Scaler.h"
#include "ffcpp/Resampler.h"
constexpr int VIDEO_STREAM_INDEX = 0; constexpr int VIDEO_STREAM_INDEX = 0;
constexpr int AUDIO_STREAM_INDEX = 1; constexpr int AUDIO_STREAM_INDEX = 1;
@ -12,14 +13,15 @@ constexpr int VIDEO_HEIGHT = 480;
namespace ff = ffcpp; namespace ff = ffcpp;
void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream, int streamIndex) { void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream,
int streamIndex, AVRational srcTimeBase, AVRational dstTimeBase) {
if(encoder.capabilities() & AV_CODEC_CAP_DELAY) { if(encoder.capabilities() & AV_CODEC_CAP_DELAY) {
while (1) { while (1) {
auto packet = encoder.encode(nullptr); auto packet = encoder.encode(nullptr);
if(!packet) break; if(!packet) break;
packet.setStreamIndex(streamIndex); packet.setStreamIndex(streamIndex);
packet.rescaleTimestamps(inStream.timeBase(), outStream.timeBase()); packet.rescaleTimestamps(srcTimeBase, dstTimeBase);
file.writePacket(packet); file.writePacket(packet);
} }
} }
@ -41,7 +43,7 @@ int main(int argc, char** argv) {
ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P); ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P);
ff::Codec& vEncoder = outVStream.codec(); ff::Codec& vEncoder = outVStream.codec();
ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, aDecoder.channels(), aDecoder.sampleRate()); ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, 2, 44100, AV_SAMPLE_FMT_FLTP);
ff::Codec& aEncoder = outAStream.codec(); ff::Codec& aEncoder = outAStream.codec();
auto aEncTimeBase = aEncoder.timeBase(); auto aEncTimeBase = aEncoder.timeBase();
@ -55,14 +57,19 @@ int main(int argc, char** argv) {
int64_t aPts = 0; int64_t aPts = 0;
ff::FifoQueue fifo(aEncoder.sampleFormat(), aEncoder.channels(), aEncoder.frameSize()); ff::FifoQueue fifo(aEncoder.sampleFormat(), aEncoder.channels(), aEncoder.frameSize());
ff::Scaler scaler(vDecoder.width(), vDecoder.height(), vDecoder.pixelFormat(), VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P); ff::Scaler scaler(vDecoder, vEncoder);
ff::Resampler resampler(aDecoder, aEncoder);
bool needScaling = ff::Scaler::needScaling(vDecoder, vEncoder);
bool needResampling = ff::Resampler::needResampling(aDecoder, aEncoder);
while(auto packet = input.readPacket()) { while(auto packet = input.readPacket()) {
AVMediaType packetType = input.packetType(packet); AVMediaType packetType = input.packetType(packet);
if(packetType == AVMEDIA_TYPE_AUDIO) { if(packetType == AVMEDIA_TYPE_AUDIO) {
auto frame = aDecoder.decode(packet); auto frame = aDecoder.decode(packet);
if(needResampling)
frame = resampler.resample(frame);
fifo.addSamples(frame); fifo.addSamples(frame);
if(!fifo.enoughSamples()) continue;
// FIXME: we're losing last samples in case when fifo queue isn't full enough for encoder
while(fifo.enoughSamples()) { while(fifo.enoughSamples()) {
auto frame = aEncoder.createAudioFrame(); auto frame = aEncoder.createAudioFrame();
fifo.readFrame(frame); fifo.readFrame(frame);
@ -71,11 +78,12 @@ int main(int argc, char** argv) {
auto encPacket = aEncoder.encode(frame); auto encPacket = aEncoder.encode(frame);
if(!encPacket) continue; if(!encPacket) continue;
encPacket.setStreamIndex(AUDIO_STREAM_INDEX); encPacket.setStreamIndex(AUDIO_STREAM_INDEX);
encPacket.rescaleTimestamps(aDecoder.timeBase(), outAStream.timeBase()); encPacket.rescaleTimestamps(aEncoder.timeBase(), outAStream.timeBase());
output.writePacket(encPacket); output.writePacket(encPacket);
} }
} else if(packetType == AVMEDIA_TYPE_VIDEO) { } else if(packetType == AVMEDIA_TYPE_VIDEO) {
auto frame = vDecoder.decode(packet); auto frame = vDecoder.decode(packet);
if(needScaling)
frame = scaler.scale(frame); frame = scaler.scale(frame);
frame.setPictureType(AV_PICTURE_TYPE_NONE); frame.setPictureType(AV_PICTURE_TYPE_NONE);
auto encPacket = vEncoder.encode(frame); auto encPacket = vEncoder.encode(frame);
@ -86,8 +94,8 @@ int main(int argc, char** argv) {
} }
} }
flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX); flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX, vStream.timeBase(), outVStream.timeBase());
flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX); flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX, aEncoder.timeBase(), outAStream.timeBase());
output.writeTrailer(); output.writeTrailer();
return 0; return 0;