very basic support of transcoding audio (incomplete and incorrect)

2016-10-23 20:38:42 +03:00 · 2016-10-23 20:38:42 +03:00 · 990527481d
commit 990527481d
parent 2a2ab38bf5
13 changed files with 169 additions and 37 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,6 +11,6 @@ link_directories(${FFMPEG_LIBRARY_DIRS})

 #message(FATAL_ERROR ${FFMPEG_LIBRARIES})

-set(SOURCE_FILES main.cpp ffcpp/MediaFile.cpp ffcpp/MediaFile.h ffcpp/ffcpp.cpp ffcpp/ffcpp.h ffcpp/Stream.cpp ffcpp/Stream.h ffcpp/Codec.cpp ffcpp/Codec.h ffcpp/Packet.cpp ffcpp/Packet.h ffcpp/Frame.cpp ffcpp/Frame.h)
+set(SOURCE_FILES main.cpp ffcpp/MediaFile.cpp ffcpp/MediaFile.h ffcpp/ffcpp.cpp ffcpp/ffcpp.h ffcpp/Stream.cpp ffcpp/Stream.h ffcpp/Codec.cpp ffcpp/Codec.h ffcpp/Packet.cpp ffcpp/Packet.h ffcpp/Frame.cpp ffcpp/Frame.h ffcpp/FifoQueue.cpp ffcpp/FifoQueue.h)
 add_executable(ffConv ${SOURCE_FILES})
 target_link_libraries(ffConv ${FFMPEG_LIBRARIES})
--- a/ffcpp/Codec.cpp
+++ b/ffcpp/Codec.cpp
@ -59,6 +59,15 @@ namespace ffcpp {
 		return _codecCtx->pix_fmt;
 	}

+	AVSampleFormat Codec::sampleFormat() const {
+		return _codecCtx->codec->sample_fmts[0];
+	}
+
+	int Codec::frameSize() const {
+		return _codecCtx->frame_size;
+	}
+
+
 	void Codec::setWidth(int width) {
 		_codecCtx->width = width;
 	}
@ -71,11 +80,11 @@ namespace ffcpp {
 		_codecCtx->pix_fmt = pixelFormat;
 	}

-	Codec::Codec(Codec&& c) {
+	Codec::Codec(Codec&& c) noexcept {
 		*this = std::move(c);
 	}

-	Codec& Codec::operator=(Codec&& c) {
+	Codec& Codec::operator=(Codec&& c) noexcept {
 		_codec = c._codec;
 		_codecCtx = c._codecCtx;
 		c._codec = nullptr;
@ -108,4 +117,8 @@ namespace ffcpp {
 		return packet;
 	}

+	Frame Codec::createAudioFrame() const {
+		return Frame(_codecCtx->frame_size, _codecCtx->channels, _codecCtx->codec->sample_fmts[0], _codecCtx->sample_rate);
+	}
+
 }
--- a/ffcpp/Codec.h
+++ b/ffcpp/Codec.h
@ -1,6 +1,7 @@
 #ifndef FFCONV_CODEC_H
 #define FFCONV_CODEC_H

+#include "ffcpp.h"
 #include "Packet.h"
 #include "Frame.h"

@ -15,7 +16,7 @@ namespace ffcpp {
 		Decoder
 	};

-	class Codec {
+	class Codec: public non_copyable {
 	private:
 		AVCodec* _codec;
 		AVCodecContext* _codecCtx;
@ -33,6 +34,8 @@ namespace ffcpp {
 		AVRational timeBase() const;
 		int capabilities() const;
 		AVPixelFormat pixelFormat() const;
+		AVSampleFormat sampleFormat() const;
+		int frameSize() const;

 		void setWidth(int width);
 		void setHeight(int height);
@ -40,10 +43,11 @@ namespace ffcpp {

 		Frame decode(Packet& packet);
 		Packet encode(AVFrame* frame);
+		Frame createAudioFrame() const;

 	public:
-		Codec(Codec&& c);
-		Codec& operator=(Codec&& c);
+		Codec(Codec&& c) noexcept;
+		Codec& operator=(Codec&& c) noexcept;
 	};

 }
--- a/ffcpp/FifoQueue.cpp
+++ b/ffcpp/FifoQueue.cpp
@ -0,0 +1,34 @@
+#include "FifoQueue.h"
+#include "ffcpp.h"
+#include <stdexcept>
+
+namespace ffcpp {
+
+	FifoQueue::FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize) {
+		_frameSize = frameSize;
+		_fifo = av_audio_fifo_alloc(sampleFormat, channels, 1);
+		if(!_fifo)
+			throw std::runtime_error("cannot create audio fifo queue");
+	}
+
+	void FifoQueue::addSamples(const Frame &frame) {
+		int res = av_audio_fifo_realloc(_fifo, av_audio_fifo_size(_fifo) + frame.samplesCount());
+		throwIfError(res, "cannot reallocate fifo queue");
+
+		const AVFrame* frameImpl = frame;
+		res = av_audio_fifo_write(_fifo, (void**)frameImpl->data, frame.samplesCount());
+		throwIfError(res, "cannot add data from frame to fifo queue");
+	}
+
+	bool FifoQueue::enoughSamples() const {
+		return av_audio_fifo_size(_fifo) >= _frameSize;
+	}
+
+	void FifoQueue::readFrame(Frame& frame) {
+		AVFrame* nativeFrame = frame;
+
+		int res = av_audio_fifo_read(_fifo, (void**)nativeFrame->data, _frameSize);
+		throwIfError(res, "cannot read data from fifo queue");
+	}
+
+}
--- a/ffcpp/FifoQueue.h
+++ b/ffcpp/FifoQueue.h
@ -0,0 +1,25 @@
+#ifndef FFCONV_FIFOQUEUE_H
+#define FFCONV_FIFOQUEUE_H
+
+#include "Frame.h"
+
+extern "C" {
+	#include "libavutil/audio_fifo.h"
+}
+
+namespace ffcpp {
+
+	class FifoQueue {
+	private:
+		AVAudioFifo* _fifo;
+		int _frameSize;
+	public:
+		FifoQueue(AVSampleFormat sampleFormat, int channels, int frameSize);
+		void addSamples(const Frame& frame);
+		bool enoughSamples() const;
+		void readFrame(Frame& frame);
+	};
+
+}
+
+#endif //FFCONV_FIFOQUEUE_H
--- a/ffcpp/Frame.cpp
+++ b/ffcpp/Frame.cpp
@ -1,3 +1,4 @@
+#include "ffcpp.h"
 #include "Frame.h"
 #include <stdexcept>

@ -7,6 +8,17 @@ namespace ffcpp {
 		_frame = av_frame_alloc();
 	}

+	Frame::Frame(int size, int channels, AVSampleFormat sampleFormat, int sampleRate): Frame() {
+		_frame->nb_samples = size;
+		_frame->channels = channels;
+		_frame->channel_layout = (uint64_t)av_get_default_channel_layout(channels);
+		_frame->format = sampleFormat;
+		_frame->sample_rate = sampleRate;
+
+		int res = av_frame_get_buffer(_frame, 0);
+		throwIfError(res, "cannot initialize buffer in audio frame");
+	}
+
 	Frame::Frame(Frame &&frame) {
 		*this = std::move(frame);
 	}
@ -39,4 +51,8 @@ namespace ffcpp {
 		_frame->pict_type = type;
 	}

+	int Frame::samplesCount() const {
+		return _frame->nb_samples;
+	}
+
 }
--- a/ffcpp/Frame.h
+++ b/ffcpp/Frame.h
@ -13,6 +13,7 @@ namespace ffcpp {

 	public:
 		Frame();
+		Frame(int size, int channels, AVSampleFormat sampleFormat, int sampleRate);
 		Frame(Frame&& frame);
 		~Frame();

@ -22,6 +23,7 @@ namespace ffcpp {

 		void guessPts();
 		void setPictureType(AVPictureType type);
+		int samplesCount() const;
 	};

 }
--- a/ffcpp/MediaFile.cpp
+++ b/ffcpp/MediaFile.cpp
@ -28,7 +28,11 @@ namespace ffcpp {
 			}

 			if(_formatCtx->oformat->flags & AVFMT_GLOBALHEADER)
-				_formatCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+				_formatCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+
+			// Allocate enough space, so streams will never be destroyed
+			// due to reallocation of internal buffer of std::vector
+			_streams.reserve(100);

 			av_dump_format(_formatCtx, 0, src.c_str(), 1);
 		}
@ -88,7 +92,7 @@ namespace ffcpp {
 		}
 	}

-	Stream& MediaFile::addStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat) {
+	Stream& MediaFile::addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat) {
 		AVCodec* codec = avcodec_find_encoder(codecID);
 		if(!codec) throw std::runtime_error("cannot find codec");

@ -100,7 +104,26 @@ namespace ffcpp {
 		ctx->height = height;
 		ctx->pix_fmt = pixelFormat;

-		_streams.push_back(Stream(stream, codec));
+		_streams.emplace_back(stream, codec);
+		return _streams.back();
+	}
+
+	Stream& MediaFile::addAudioStream(AVCodecID codecID) {
+		AVCodec* codec = avcodec_find_encoder(codecID);
+		if(!codec) throw std::runtime_error("cannot find codec");
+
+		AVStream* stream = avformat_new_stream(_formatCtx, codec);
+		if(!stream) throw  std::runtime_error("cannot create stream");
+
+		// TODO: Here we need adjust encoder parameters
+		AVCodecContext* ctx = stream->codec;
+		ctx->sample_fmt = codec->sample_fmts[0];
+		ctx->global_quality = 10;
+		ctx->channels = 6;
+		ctx->channel_layout = (uint64_t)av_get_default_channel_layout(ctx->channels);
+		ctx->sample_rate = 48000;
+
+		_streams.emplace_back(stream, codec);
 		return _streams.back();
 	}

--- a/ffcpp/MediaFile.h
+++ b/ffcpp/MediaFile.h
@ -33,7 +33,8 @@ namespace ffcpp {
 		bool hasAudio() const;
 		Stream& videoStream(size_t index = 0);
 		Stream& audioStream(size_t index = 0);
-		Stream& addStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat);
+		Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat);
+		Stream& addAudioStream(AVCodecID codecID);
 		Packet readPacket();
 		AVMediaType packetType(const Packet& packet);

--- a/ffcpp/Stream.cpp
+++ b/ffcpp/Stream.cpp
@ -4,7 +4,6 @@
 namespace ffcpp {

 	Stream::Stream(): _stream(nullptr) {
-		int x = 0;
 	}

 	Stream::Stream(AVStream *stream): _stream(stream), _codec(_stream->codec, CodecType::Decoder) {
@ -31,11 +30,11 @@ namespace ffcpp {
 		_stream->time_base = timeBase;
 	}

-	Stream::Stream(Stream&& stream) {
+	Stream::Stream(Stream&& stream) noexcept {
 		*this = std::move(stream);
 	}

-	Stream& Stream::operator=(Stream&& stream) {
+	Stream& Stream::operator=(Stream&& stream) noexcept {
 		_codec = std::move(stream._codec);
 		_stream = stream._stream;
 		stream._stream = nullptr;
--- a/ffcpp/Stream.h
+++ b/ffcpp/Stream.h
@ -25,8 +25,8 @@ namespace ffcpp {
 		void setTimeBase(AVRational timeBase);

 	public:
-		Stream(Stream&& stream);
-		Stream& operator=(Stream&& stream);
+		Stream(Stream&& stream) noexcept;
+		Stream& operator=(Stream&& stream) noexcept;
 	};

 }
--- a/ffcpp/ffcpp.h
+++ b/ffcpp/ffcpp.h
@ -8,6 +8,16 @@ namespace ffcpp {
 	void init();
 	void throwIfError(int result, const std::string& description);

+	class non_copyable
+	{
+	protected:
+		non_copyable() = default;
+		~non_copyable() = default;
+
+		non_copyable(non_copyable const &) = delete;
+		void operator=(non_copyable const &x) = delete;
+	};
+
 }

 #endif //FFCONV_FFCPP_H
--- a/main.cpp
+++ b/main.cpp
@ -1,9 +1,11 @@
 #include <iostream>
 #include "ffcpp/ffcpp.h"
 #include "ffcpp/MediaFile.h"
+#include "ffcpp/FifoQueue.h"

 constexpr int VIDEO_STREAM_INDEX = 0;
 constexpr int AUDIO_STREAM_INDEX = 1;
+constexpr int AUDIO_CHANNELS_COUNT = 6;

 namespace ff = ffcpp;

@ -26,40 +28,43 @@ int main(int argc, char** argv) {
 	ff::MediaFile output(argv[2], ff::Mode::Write);

 	ff::Stream& vStream = input.videoStream();
+	ff::Stream& aStream = input.audioStream();

 	ff::Codec& vDecoder = vStream.codec();
-	ff::Stream& outVStream = output.addStream(AV_CODEC_ID_H264, vDecoder.width(), vDecoder.height(), AV_PIX_FMT_YUV420P);
+	ff::Codec& aDecoder = aStream.codec();
+
+	ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, vDecoder.width(), vDecoder.height(),
+	                                               AV_PIX_FMT_YUV420P);
 	outVStream.setTimeBase(vDecoder.timeBase());
 	ff::Codec& vEncoder = outVStream.codec();
+	ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS);
+	ff::Codec& aEncoder = outAStream.codec();
+
 	output.writeHeader();

-	int64_t oldPts = 0, oldDts = 0;
+	ff::FifoQueue fifo(aEncoder.sampleFormat(), AUDIO_CHANNELS_COUNT, aEncoder.frameSize());
 	while(auto packet = input.readPacket()) {
 		AVMediaType packetType = input.packetType(packet);
-		if(packetType == AVMEDIA_TYPE_VIDEO) {
+		if(packetType == AVMEDIA_TYPE_AUDIO) {
+			auto frame = aDecoder.decode(packet);
+			fifo.addSamples(frame);
+			if(!fifo.enoughSamples()) continue;
+
+			while(fifo.enoughSamples()) {
+				auto frame = aEncoder.createAudioFrame();
+				fifo.readFrame(frame);
+				auto encPacket = aEncoder.encode(frame);
+				if(!encPacket) continue;
+				encPacket.setStreamIndex(AUDIO_STREAM_INDEX);
+				encPacket.rescaleTimestamps(vStream.timeBase(), outVStream.timeBase());
+				output.writePacket(encPacket);
+			}
+		} else if(packetType == AVMEDIA_TYPE_VIDEO) {
 			auto frame = vDecoder.decode(packet);
 			frame.setPictureType(AV_PICTURE_TYPE_NONE);
 			auto encPacket = vEncoder.encode(frame);
 			if(!encPacket) continue;
-
-			encPacket.setStreamIndex(packetType == AVMEDIA_TYPE_VIDEO ? VIDEO_STREAM_INDEX : AUDIO_STREAM_INDEX);
-
-			/*
-			// try to recover in case of bad pts/dts
-			if(encPacket.pts < encPacket.dts) {
-				encPacket.dts = encPacket.pts;
-			}
-
-			if(encPacket.pts < oldPts)
-				encPacket.pts = oldPts;
-
-			if(encPacket.dts < oldDts)
-				encPacket.dts = oldDts;
-
-			oldPts = encPacket.pts;
-			oldDts = encPacket.dts;
-			 */
-
+			encPacket.setStreamIndex(VIDEO_STREAM_INDEX);
 			encPacket.rescaleTimestamps(vStream.timeBase(), outVStream.timeBase());
 			output.writePacket(encPacket);
 		}