diff --git a/cmake/modules/FindFFMPEG.cmake b/cmake/modules/FindFFMPEG.cmake index 5a56709..68b57f1 100644 --- a/cmake/modules/FindFFMPEG.cmake +++ b/cmake/modules/FindFFMPEG.cmake @@ -104,13 +104,15 @@ FFMPEG_FIND(LIBAVDEVICE avdevice avdevice.h) FFMPEG_FIND(LIBAVCODEC avcodec avcodec.h) FFMPEG_FIND(LIBAVUTIL avutil avutil.h) FFMPEG_FIND(LIBSWSCALE swscale swscale.h) +FFMPEG_FIND(LIBSWRESAMPLE swresample swresample.h) SET(FFMPEG_FOUND "NO") IF (FFMPEG_LIBAVFORMAT_FOUND AND FFMPEG_LIBAVDEVICE_FOUND AND FFMPEG_LIBAVCODEC_FOUND AND FFMPEG_LIBAVUTIL_FOUND AND - FFMPEG_LIBSWSCALE_FOUND + FFMPEG_LIBSWSCALE_FOUND AND + FFMPEG_LIBSWRESAMPLE_FOUND ) @@ -126,6 +128,7 @@ IF (FFMPEG_LIBAVFORMAT_FOUND AND ${FFMPEG_LIBAVCODEC_LIBS} ${FFMPEG_LIBAVUTIL_LIBS} ${FFMPEG_LIBSWSCALE_LIBS} + ${FFMPEG_LIBSWRESAMPLE_LIBS} ) ELSE () diff --git a/ffcpp/FifoQueue.cpp b/ffcpp/FifoQueue.cpp index cd60842..18327e1 100644 --- a/ffcpp/FifoQueue.cpp +++ b/ffcpp/FifoQueue.cpp @@ -12,11 +12,15 @@ namespace ffcpp { } void FifoQueue::addSamples(const Frame &frame) { - int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + frame.samplesCount()); + const AVFrame* frameImpl = frame; + addSamples((void**)frameImpl->data, frameImpl->nb_samples); + } + + void FifoQueue::addSamples(void **data, int samplesCount) { + int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + samplesCount); throwIfError(res, "cannot reallocate fifo queue"); - const AVFrame* frameImpl = frame; - res = av_audio_fifo_write(_fifo, (void**)frameImpl->data, frame.samplesCount()); + res = av_audio_fifo_write(_fifo, data, samplesCount); throwIfError(res, "cannot add data from frame to fifo queue"); } diff --git a/ffcpp/FifoQueue.h b/ffcpp/FifoQueue.h index c1c3cef..6404f05 100644 --- a/ffcpp/FifoQueue.h +++ b/ffcpp/FifoQueue.h @@ -16,6 +16,7 @@ namespace ffcpp { public: FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize); void addSamples(const Frame& frame); + void addSamples(void** data, int samplesCount); bool enoughSamples() const; void readFrame(Frame& frame); }; diff --git a/ffcpp/Frame.cpp b/ffcpp/Frame.cpp index 6653d93..bdcbcbd 100644 --- a/ffcpp/Frame.cpp +++ b/ffcpp/Frame.cpp @@ -58,7 +58,7 @@ namespace ffcpp { return _frame; } - void Frame::guessPts() { + void Frame::guessPts() { _frame->pts = av_frame_get_best_effort_timestamp(_frame); } diff --git a/ffcpp/MediaFile.cpp b/ffcpp/MediaFile.cpp index f7aba23..49b19bd 100644 --- a/ffcpp/MediaFile.cpp +++ b/ffcpp/MediaFile.cpp @@ -102,13 +102,18 @@ namespace ffcpp { AVCodecContext* ctx = stream->codec; ctx->width = width; ctx->height = height; - ctx->pix_fmt = pixelFormat; + + if(pixelFormat == AV_PIX_FMT_NONE) { + ctx->pix_fmt = codec->pix_fmts[0]; + } else { + ctx->pix_fmt = pixelFormat; + } _streams.emplace_back(stream, codec); return _streams.back(); } - Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate) { + Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat) { AVCodec* codec = avcodec_find_encoder(codecID); if(!codec) throw std::runtime_error("cannot find codec"); @@ -117,7 +122,11 @@ namespace ffcpp { // TODO: Here we need adjust encoder parameters AVCodecContext* ctx = stream->codec; - ctx->sample_fmt = codec->sample_fmts[0]; + if(sampleFormat == AV_SAMPLE_FMT_NONE) { + ctx->sample_fmt = codec->sample_fmts[0]; + } else { + ctx->sample_fmt = sampleFormat; + } ctx->global_quality = 10; ctx->channels = channels; ctx->channel_layout = (uint64_t)av_get_default_channel_layout(channels); diff --git a/ffcpp/MediaFile.h b/ffcpp/MediaFile.h index e0105ad..8069d99 100644 --- a/ffcpp/MediaFile.h +++ b/ffcpp/MediaFile.h @@ -33,8 +33,8 @@ namespace ffcpp { bool hasAudio() const; Stream& videoStream(size_t index = 0); Stream& audioStream(size_t index = 0); - Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat); - Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate); + Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat = AV_PIX_FMT_NONE); + Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat = AV_SAMPLE_FMT_NONE); Packet readPacket(); AVMediaType packetType(const Packet& packet); diff --git a/ffcpp/Resampler.cpp b/ffcpp/Resampler.cpp index 9ed9e75..6676c0f 100644 --- a/ffcpp/Resampler.cpp +++ b/ffcpp/Resampler.cpp @@ -1,6 +1,8 @@ #include "Resampler.h" #include "ffcpp.h" #include +#include +#include extern "C" { #include @@ -10,6 +12,10 @@ namespace ffcpp { Resampler::Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat) { + _dstChannelLayout = outChannelLayout; + _dstSampleFormat = outSampleFormat; + _dstSampleRate = outSampleRate; + _swrContext = swr_alloc(); if(!_swrContext) { throw new std::runtime_error("cannot create resampler"); @@ -27,13 +33,34 @@ namespace ffcpp { throwIfError(res, "cannot init resampler"); } + Resampler::Resampler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) + : Resampler((int)decoderCtx->channel_layout, decoderCtx->sample_rate, decoderCtx->sample_fmt, + (int)encoderCtx->channel_layout, encoderCtx->sample_rate, encoderCtx->sample_fmt) { + } + Resampler::~Resampler() { if(_swrContext) { swr_free(&_swrContext); } } - Frame Resampler::Resample(Frame& inFrame) { - return Frame(); + Frame Resampler::resample(Frame& inFrame) { + int channelsCount = av_get_channel_layout_nb_channels(_dstChannelLayout); + AVFrame* fin = inFrame; + int outSamples = swr_get_out_samples(_swrContext, fin->nb_samples); + + Frame outFrame(outSamples, channelsCount, _dstSampleFormat, _dstSampleRate); + int res = swr_convert_frame(_swrContext, outFrame, inFrame); + throwIfError(res, "cannot convert audio frame"); + + return outFrame; } + + bool Resampler::needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) { + return (decoderCtx->channels != encoderCtx->channels || + decoderCtx->channel_layout != encoderCtx->channel_layout || + decoderCtx->sample_fmt != encoderCtx->sample_fmt || + decoderCtx->sample_rate != encoderCtx->sample_rate); + } + } diff --git a/ffcpp/Resampler.h b/ffcpp/Resampler.h index ab08691..186d36c 100644 --- a/ffcpp/Resampler.h +++ b/ffcpp/Resampler.h @@ -12,13 +12,18 @@ namespace ffcpp { class Resampler { private: SwrContext* _swrContext; + int _dstChannelLayout; + AVSampleFormat _dstSampleFormat; + int _dstSampleRate; public: Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat); + Resampler(AVCodecContext* decoderCtx, AVCodecContext* encoderCtx); ~Resampler(); - Frame Resample(Frame& inFrame); + Frame resample(Frame& inFrame); + static bool needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx); }; } diff --git a/ffcpp/Scaler.cpp b/ffcpp/Scaler.cpp index e163faf..3f6a230 100644 --- a/ffcpp/Scaler.cpp +++ b/ffcpp/Scaler.cpp @@ -15,6 +15,12 @@ namespace ffcpp { } } + Scaler::Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) + : Scaler(decoderCtx->width, decoderCtx->height, decoderCtx->pix_fmt, + encoderCtx->width, encoderCtx->height, encoderCtx->pix_fmt) { + } + + Frame Scaler::scale(Frame &inFrame) { Frame outFrame(_dstWidth, _dstHeight, _dstPixFmt); @@ -30,4 +36,10 @@ namespace ffcpp { return outFrame; } + bool Scaler::needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) { + return (decoderCtx->width != encoderCtx->width || + decoderCtx->height != encoderCtx->height || + decoderCtx->pix_fmt != encoderCtx->pix_fmt); + } + } diff --git a/ffcpp/Scaler.h b/ffcpp/Scaler.h index e528497..e7f0569 100644 --- a/ffcpp/Scaler.h +++ b/ffcpp/Scaler.h @@ -18,7 +18,9 @@ namespace ffcpp { public: Scaler(int srcWidth, int srcHeight, AVPixelFormat srcPixFmt, int dstWidth, int dstHeight, AVPixelFormat dstPixFmt); + Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx); Frame scale(Frame& inFrame); + static bool needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx); }; } diff --git a/main.cpp b/main.cpp index 42d78e6..e85ce97 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,7 @@ #include "ffcpp/MediaFile.h" #include "ffcpp/FifoQueue.h" #include "ffcpp/Scaler.h" +#include "ffcpp/Resampler.h" constexpr int VIDEO_STREAM_INDEX = 0; constexpr int AUDIO_STREAM_INDEX = 1; @@ -12,14 +13,15 @@ constexpr int VIDEO_HEIGHT = 480; namespace ff = ffcpp; -void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream, int streamIndex) { +void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream, + int streamIndex, AVRational srcTimeBase, AVRational dstTimeBase) { if(encoder.capabilities() & AV_CODEC_CAP_DELAY) { while (1) { auto packet = encoder.encode(nullptr); if(!packet) break; packet.setStreamIndex(streamIndex); - packet.rescaleTimestamps(inStream.timeBase(), outStream.timeBase()); + packet.rescaleTimestamps(srcTimeBase, dstTimeBase); file.writePacket(packet); } } @@ -41,7 +43,7 @@ int main(int argc, char** argv) { ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P); ff::Codec& vEncoder = outVStream.codec(); - ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, aDecoder.channels(), aDecoder.sampleRate()); + ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, 2, 44100, AV_SAMPLE_FMT_FLTP); ff::Codec& aEncoder = outAStream.codec(); auto aEncTimeBase = aEncoder.timeBase(); @@ -55,14 +57,19 @@ int main(int argc, char** argv) { int64_t aPts = 0; ff::FifoQueue fifo(aEncoder.sampleFormat(), aEncoder.channels(), aEncoder.frameSize()); - ff::Scaler scaler(vDecoder.width(), vDecoder.height(), vDecoder.pixelFormat(), VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P); + ff::Scaler scaler(vDecoder, vEncoder); + ff::Resampler resampler(aDecoder, aEncoder); + bool needScaling = ff::Scaler::needScaling(vDecoder, vEncoder); + bool needResampling = ff::Resampler::needResampling(aDecoder, aEncoder); while(auto packet = input.readPacket()) { AVMediaType packetType = input.packetType(packet); if(packetType == AVMEDIA_TYPE_AUDIO) { auto frame = aDecoder.decode(packet); + if(needResampling) + frame = resampler.resample(frame); fifo.addSamples(frame); - if(!fifo.enoughSamples()) continue; + // FIXME: we're losing last samples in case when fifo queue isn't full enough for encoder while(fifo.enoughSamples()) { auto frame = aEncoder.createAudioFrame(); fifo.readFrame(frame); @@ -71,12 +78,13 @@ int main(int argc, char** argv) { auto encPacket = aEncoder.encode(frame); if(!encPacket) continue; encPacket.setStreamIndex(AUDIO_STREAM_INDEX); - encPacket.rescaleTimestamps(aDecoder.timeBase(), outAStream.timeBase()); + encPacket.rescaleTimestamps(aEncoder.timeBase(), outAStream.timeBase()); output.writePacket(encPacket); } } else if(packetType == AVMEDIA_TYPE_VIDEO) { auto frame = vDecoder.decode(packet); - frame = scaler.scale(frame); + if(needScaling) + frame = scaler.scale(frame); frame.setPictureType(AV_PICTURE_TYPE_NONE); auto encPacket = vEncoder.encode(frame); if(!encPacket) continue; @@ -86,8 +94,8 @@ int main(int argc, char** argv) { } } - flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX); - flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX); + flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX, vStream.timeBase(), outVStream.timeBase()); + flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX, aEncoder.timeBase(), outAStream.timeBase()); output.writeTrailer(); return 0;