added audio resampling, fixed some issues

2016-11-06 17:36:09 +03:00 · 2016-11-06 17:36:09 +03:00 · 01354ad13b
commit 01354ad13b
parent 2c1673538d
11 changed files with 93 additions and 22 deletions
--- a/cmake/modules/FindFFMPEG.cmake
+++ b/cmake/modules/FindFFMPEG.cmake
@ -104,13 +104,15 @@ FFMPEG_FIND(LIBAVDEVICE avdevice avdevice.h)
 FFMPEG_FIND(LIBAVCODEC  avcodec  avcodec.h)
 FFMPEG_FIND(LIBAVUTIL   avutil   avutil.h)
 FFMPEG_FIND(LIBSWSCALE  swscale  swscale.h)
+FFMPEG_FIND(LIBSWRESAMPLE swresample swresample.h)

 SET(FFMPEG_FOUND "NO")
 IF   (FFMPEG_LIBAVFORMAT_FOUND AND
        FFMPEG_LIBAVDEVICE_FOUND AND
        FFMPEG_LIBAVCODEC_FOUND AND
        FFMPEG_LIBAVUTIL_FOUND AND
-        FFMPEG_LIBSWSCALE_FOUND
+        FFMPEG_LIBSWSCALE_FOUND AND
+        FFMPEG_LIBSWRESAMPLE_FOUND
        )


@ -126,6 +128,7 @@ IF   (FFMPEG_LIBAVFORMAT_FOUND AND
            ${FFMPEG_LIBAVCODEC_LIBS}
            ${FFMPEG_LIBAVUTIL_LIBS}
            ${FFMPEG_LIBSWSCALE_LIBS}
+            ${FFMPEG_LIBSWRESAMPLE_LIBS}
            )

 ELSE ()
--- a/ffcpp/FifoQueue.cpp
+++ b/ffcpp/FifoQueue.cpp
@ -12,11 +12,15 @@ namespace ffcpp {
 	}

 	void FifoQueue::addSamples(const Frame &frame) {
-		int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + frame.samplesCount());
+		const AVFrame* frameImpl = frame;
+		addSamples((void**)frameImpl->data, frameImpl->nb_samples);
+	}
+
+	void FifoQueue::addSamples(void **data, int samplesCount) {
+		int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + samplesCount);
 		throwIfError(res, "cannot reallocate fifo queue");

-		const AVFrame* frameImpl = frame;
-		res = av_audio_fifo_write(_fifo, (void**)frameImpl->data, frame.samplesCount());
+		res = av_audio_fifo_write(_fifo, data, samplesCount);
 		throwIfError(res, "cannot add data from frame to fifo queue");
 	}

--- a/ffcpp/FifoQueue.h
+++ b/ffcpp/FifoQueue.h
@ -16,6 +16,7 @@ namespace ffcpp {
 	public:
 		FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize);
 		void addSamples(const Frame& frame);
+		void addSamples(void** data, int samplesCount);
 		bool enoughSamples() const;
 		void readFrame(Frame& frame);
 	};
--- a/ffcpp/MediaFile.cpp
+++ b/ffcpp/MediaFile.cpp
@ -102,13 +102,18 @@ namespace ffcpp {
 		AVCodecContext* ctx = stream->codec;
 		ctx->width = width;
 		ctx->height = height;
+
+		if(pixelFormat == AV_PIX_FMT_NONE) {
+			ctx->pix_fmt = codec->pix_fmts[0];
+		} else {
 			ctx->pix_fmt = pixelFormat;
+		}

 		_streams.emplace_back(stream, codec);
 		return _streams.back();
 	}

-	Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate) {
+	Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat) {
 		AVCodec* codec = avcodec_find_encoder(codecID);
 		if(!codec) throw std::runtime_error("cannot find codec");

@ -117,7 +122,11 @@ namespace ffcpp {

 		// TODO: Here we need adjust encoder parameters
 		AVCodecContext* ctx = stream->codec;
+		if(sampleFormat == AV_SAMPLE_FMT_NONE) {
 			ctx->sample_fmt = codec->sample_fmts[0];
+		} else {
+			ctx->sample_fmt = sampleFormat;
+		}
 		ctx->global_quality = 10;
 		ctx->channels = channels;
 		ctx->channel_layout = (uint64_t)av_get_default_channel_layout(channels);
--- a/ffcpp/MediaFile.h
+++ b/ffcpp/MediaFile.h
@ -33,8 +33,8 @@ namespace ffcpp {
 		bool hasAudio() const;
 		Stream& videoStream(size_t index = 0);
 		Stream& audioStream(size_t index = 0);
-		Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat);
-		Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate);
+		Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat = AV_PIX_FMT_NONE);
+		Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate, AVSampleFormat sampleFormat = AV_SAMPLE_FMT_NONE);
 		Packet readPacket();
 		AVMediaType packetType(const Packet& packet);

--- a/ffcpp/Resampler.cpp
+++ b/ffcpp/Resampler.cpp
@ -1,6 +1,8 @@
 #include "Resampler.h"
 #include "ffcpp.h"
 #include <stdexcept>
+#include <memory>
+#include <iostream>

 extern "C" {
 	#include <libavutil/opt.h>
@ -10,6 +12,10 @@ namespace ffcpp {

 	Resampler::Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout,
 	                     int outSampleRate, AVSampleFormat outSampleFormat) {
+		_dstChannelLayout = outChannelLayout;
+		_dstSampleFormat = outSampleFormat;
+		_dstSampleRate = outSampleRate;
+
 		_swrContext = swr_alloc();
 		if(!_swrContext) {
 			throw new std::runtime_error("cannot create resampler");
@ -27,13 +33,34 @@ namespace ffcpp {
 		throwIfError(res, "cannot init resampler");
 	}

+	Resampler::Resampler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx)
+			: Resampler((int)decoderCtx->channel_layout, decoderCtx->sample_rate, decoderCtx->sample_fmt,
+			            (int)encoderCtx->channel_layout, encoderCtx->sample_rate, encoderCtx->sample_fmt) {
+	}
+
 	Resampler::~Resampler() {
 		if(_swrContext) {
 			swr_free(&_swrContext);
 		}
 	}

-	Frame Resampler::Resample(Frame& inFrame) {
-		return Frame();
+	Frame Resampler::resample(Frame& inFrame) {
+		int channelsCount = av_get_channel_layout_nb_channels(_dstChannelLayout);
+		AVFrame* fin = inFrame;
+		int outSamples = swr_get_out_samples(_swrContext, fin->nb_samples);
+
+		Frame outFrame(outSamples, channelsCount, _dstSampleFormat, _dstSampleRate);
+		int res = swr_convert_frame(_swrContext, outFrame, inFrame);
+		throwIfError(res, "cannot convert audio frame");
+
+		return outFrame;
 	}
+
+	bool Resampler::needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) {
+		return (decoderCtx->channels != encoderCtx->channels ||
+				decoderCtx->channel_layout != encoderCtx->channel_layout ||
+				decoderCtx->sample_fmt != encoderCtx->sample_fmt ||
+				decoderCtx->sample_rate != encoderCtx->sample_rate);
+	}
+
 }
--- a/ffcpp/Resampler.h
+++ b/ffcpp/Resampler.h
@ -12,13 +12,18 @@ namespace ffcpp {
 	class Resampler {
 	private:
 		SwrContext* _swrContext;
+		int _dstChannelLayout;
+		AVSampleFormat _dstSampleFormat;
+		int _dstSampleRate;

 	public:
 		Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat,
 		          int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat);
+		Resampler(AVCodecContext* decoderCtx, AVCodecContext* encoderCtx);
 		~Resampler();

-		Frame Resample(Frame& inFrame);
+		Frame resample(Frame& inFrame);
+		static bool needResampling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
 	};

 }
--- a/ffcpp/Scaler.cpp
+++ b/ffcpp/Scaler.cpp
@ -15,6 +15,12 @@ namespace ffcpp {
 		}
 	}

+	Scaler::Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx)
+			: Scaler(decoderCtx->width, decoderCtx->height, decoderCtx->pix_fmt,
+			         encoderCtx->width, encoderCtx->height, encoderCtx->pix_fmt) {
+	}
+
+
 	Frame Scaler::scale(Frame &inFrame) {
 		Frame outFrame(_dstWidth, _dstHeight, _dstPixFmt);

@ -30,4 +36,10 @@ namespace ffcpp {
 		return outFrame;
 	}

+	bool Scaler::needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx) {
+		return (decoderCtx->width != encoderCtx->width ||
+				decoderCtx->height != encoderCtx->height ||
+				decoderCtx->pix_fmt != encoderCtx->pix_fmt);
+	}
+
 }
--- a/ffcpp/Scaler.h
+++ b/ffcpp/Scaler.h
@ -18,7 +18,9 @@ namespace ffcpp {

 	public:
 		Scaler(int srcWidth, int srcHeight, AVPixelFormat srcPixFmt, int dstWidth, int dstHeight, AVPixelFormat dstPixFmt);
+		Scaler(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
 		Frame scale(Frame& inFrame);
+		static bool needScaling(AVCodecContext *decoderCtx, AVCodecContext *encoderCtx);
 	};

 }
--- a/main.cpp
+++ b/main.cpp
@ -3,6 +3,7 @@
 #include "ffcpp/MediaFile.h"
 #include "ffcpp/FifoQueue.h"
 #include "ffcpp/Scaler.h"
+#include "ffcpp/Resampler.h"

 constexpr int VIDEO_STREAM_INDEX = 0;
 constexpr int AUDIO_STREAM_INDEX = 1;
@ -12,14 +13,15 @@ constexpr int VIDEO_HEIGHT = 480;

 namespace ff = ffcpp;

-void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream, int streamIndex) {
+void flushEncoder(ff::MediaFile& file, ff::Codec& encoder, const ff::Stream& inStream, const ff::Stream& outStream,
+                  int streamIndex, AVRational srcTimeBase, AVRational dstTimeBase) {
 	if(encoder.capabilities() & AV_CODEC_CAP_DELAY) {
 		while (1) {
 			auto packet = encoder.encode(nullptr);
 			if(!packet) break;

 			packet.setStreamIndex(streamIndex);
-			packet.rescaleTimestamps(inStream.timeBase(), outStream.timeBase());
+			packet.rescaleTimestamps(srcTimeBase, dstTimeBase);
 			file.writePacket(packet);
 		}
 	}
@ -41,7 +43,7 @@ int main(int argc, char** argv) {
 	ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P);
 	ff::Codec& vEncoder = outVStream.codec();

-	ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, aDecoder.channels(), aDecoder.sampleRate());
+	ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, 2, 44100, AV_SAMPLE_FMT_FLTP);
 	ff::Codec& aEncoder = outAStream.codec();

 	auto aEncTimeBase = aEncoder.timeBase();
@ -55,14 +57,19 @@ int main(int argc, char** argv) {

 	int64_t aPts = 0;
 	ff::FifoQueue fifo(aEncoder.sampleFormat(), aEncoder.channels(), aEncoder.frameSize());
-	ff::Scaler scaler(vDecoder.width(), vDecoder.height(), vDecoder.pixelFormat(), VIDEO_WIDTH, outHeight, AV_PIX_FMT_YUV420P);
+	ff::Scaler scaler(vDecoder, vEncoder);
+	ff::Resampler resampler(aDecoder, aEncoder);
+	bool needScaling = ff::Scaler::needScaling(vDecoder, vEncoder);
+	bool needResampling = ff::Resampler::needResampling(aDecoder, aEncoder);
 	while(auto packet = input.readPacket()) {
 		AVMediaType packetType = input.packetType(packet);
 		if(packetType == AVMEDIA_TYPE_AUDIO) {
 			auto frame = aDecoder.decode(packet);
+			if(needResampling)
+				frame = resampler.resample(frame);
 			fifo.addSamples(frame);
-			if(!fifo.enoughSamples()) continue;

+			// FIXME: we're losing last samples in case when fifo queue isn't full enough for encoder
 			while(fifo.enoughSamples()) {
 				auto frame = aEncoder.createAudioFrame();
 				fifo.readFrame(frame);
@ -71,11 +78,12 @@ int main(int argc, char** argv) {
 				auto encPacket = aEncoder.encode(frame);
 				if(!encPacket) continue;
 				encPacket.setStreamIndex(AUDIO_STREAM_INDEX);
-				encPacket.rescaleTimestamps(aDecoder.timeBase(), outAStream.timeBase());
+				encPacket.rescaleTimestamps(aEncoder.timeBase(), outAStream.timeBase());
 				output.writePacket(encPacket);
 			}
 		} else if(packetType == AVMEDIA_TYPE_VIDEO) {
 			auto frame = vDecoder.decode(packet);
+			if(needScaling)
 				frame = scaler.scale(frame);
 			frame.setPictureType(AV_PICTURE_TYPE_NONE);
 			auto encPacket = vEncoder.encode(frame);
@ -86,8 +94,8 @@ int main(int argc, char** argv) {
 		}
 	}

-	flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX);
-	flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX);
+	flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX, vStream.timeBase(), outVStream.timeBase());
+	flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX, aEncoder.timeBase(), outAStream.timeBase());
 	output.writeTrailer();

 	return 0;