From a9e042eb4f68ab9fa21c5c19876ddbefccc09ed0 Mon Sep 17 00:00:00 2001
From: Selim Mustafaev <selim@fastmail.fm>
Date: Sun, 30 Oct 2016 01:30:53 +0300
Subject: [PATCH] transcoding of audio is now working, but without resampling

---
 ffcpp/Codec.cpp     | 12 +++++++++++-
 ffcpp/Codec.h       |  2 ++
 ffcpp/Frame.cpp     |  4 ++++
 ffcpp/Frame.h       |  1 +
 ffcpp/MediaFile.cpp |  8 ++++----
 ffcpp/MediaFile.h   |  2 +-
 main.cpp            | 23 ++++++++++++++++-------
 7 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/ffcpp/Codec.cpp b/ffcpp/Codec.cpp
index 212c70e..4c59790 100644
--- a/ffcpp/Codec.cpp
+++ b/ffcpp/Codec.cpp
@@ -67,6 +67,13 @@ namespace ffcpp {
 		return _codecCtx->frame_size;
 	}
 
+	int Codec::channels() const {
+		return _codecCtx->channels;
+	}
+
+	int Codec::sampleRate() const {
+		return _codecCtx->sample_rate;
+	}
 
 	void Codec::setWidth(int width) {
 		_codecCtx->width = width;
@@ -102,7 +109,10 @@ namespace ffcpp {
 			if(res < 0) throw std::runtime_error("cannot decode packet");
 		}
 
-		frame.guessPts();
+		if(_codecCtx->codec_type == AVMEDIA_TYPE_VIDEO) {
+			frame.guessPts();
+		}
+
 		return frame;
 	}
 
diff --git a/ffcpp/Codec.h b/ffcpp/Codec.h
index 817ef19..ca29030 100644
--- a/ffcpp/Codec.h
+++ b/ffcpp/Codec.h
@@ -36,6 +36,8 @@ namespace ffcpp {
 		AVPixelFormat pixelFormat() const;
 		AVSampleFormat sampleFormat() const;
 		int frameSize() const;
+		int channels() const;
+		int sampleRate() const;
 
 		void setWidth(int width);
 		void setHeight(int height);
diff --git a/ffcpp/Frame.cpp b/ffcpp/Frame.cpp
index a5394c2..9e4b5d2 100644
--- a/ffcpp/Frame.cpp
+++ b/ffcpp/Frame.cpp
@@ -55,4 +55,8 @@ namespace ffcpp {
 		return _frame->nb_samples;
 	}
 
+	void Frame::setPts(int pts) {
+		_frame->pts = pts;
+	}
+
 }
diff --git a/ffcpp/Frame.h b/ffcpp/Frame.h
index 8682ae8..6dce306 100644
--- a/ffcpp/Frame.h
+++ b/ffcpp/Frame.h
@@ -24,6 +24,7 @@ namespace ffcpp {
 		void guessPts();
 		void setPictureType(AVPictureType type);
 		int samplesCount() const;
+		void setPts(int pts);
 	};
 
 }
diff --git a/ffcpp/MediaFile.cpp b/ffcpp/MediaFile.cpp
index 6b9bc3f..f7aba23 100644
--- a/ffcpp/MediaFile.cpp
+++ b/ffcpp/MediaFile.cpp
@@ -108,7 +108,7 @@ namespace ffcpp {
 		return _streams.back();
 	}
 
-	Stream& MediaFile::addAudioStream(AVCodecID codecID) {
+	Stream& MediaFile::addAudioStream(AVCodecID codecID, int channels, int sampleRate) {
 		AVCodec* codec = avcodec_find_encoder(codecID);
 		if(!codec) throw std::runtime_error("cannot find codec");
 
@@ -119,9 +119,9 @@ namespace ffcpp {
 		AVCodecContext* ctx = stream->codec;
 		ctx->sample_fmt = codec->sample_fmts[0];
 		ctx->global_quality = 10;
-		ctx->channels = 6;
-		ctx->channel_layout = (uint64_t)av_get_default_channel_layout(ctx->channels);
-		ctx->sample_rate = 48000;
+		ctx->channels = channels;
+		ctx->channel_layout = (uint64_t)av_get_default_channel_layout(channels);
+		ctx->sample_rate = sampleRate;
 
 		_streams.emplace_back(stream, codec);
 		return _streams.back();
diff --git a/ffcpp/MediaFile.h b/ffcpp/MediaFile.h
index 2d2c7d5..e0105ad 100644
--- a/ffcpp/MediaFile.h
+++ b/ffcpp/MediaFile.h
@@ -34,7 +34,7 @@ namespace ffcpp {
 		Stream& videoStream(size_t index = 0);
 		Stream& audioStream(size_t index = 0);
 		Stream& addVideoStream(AVCodecID codecID, int width, int height, AVPixelFormat pixelFormat);
-		Stream& addAudioStream(AVCodecID codecID);
+		Stream& addAudioStream(AVCodecID codecID, int channels, int sampleRate);
 		Packet readPacket();
 		AVMediaType packetType(const Packet& packet);
 
diff --git a/main.cpp b/main.cpp
index 2e317bd..fc721bc 100644
--- a/main.cpp
+++ b/main.cpp
@@ -5,7 +5,6 @@
 
 constexpr int VIDEO_STREAM_INDEX = 0;
 constexpr int AUDIO_STREAM_INDEX = 1;
-constexpr int AUDIO_CHANNELS_COUNT = 6;
 
 namespace ff = ffcpp;
 
@@ -33,16 +32,23 @@ int main(int argc, char** argv) {
 	ff::Codec& vDecoder = vStream.codec();
 	ff::Codec& aDecoder = aStream.codec();
 
-	ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, vDecoder.width(), vDecoder.height(),
-	                                               AV_PIX_FMT_YUV420P);
-	outVStream.setTimeBase(vDecoder.timeBase());
+	ff::Stream& outVStream = output.addVideoStream(AV_CODEC_ID_H264, vDecoder.width(), vDecoder.height(), AV_PIX_FMT_YUV420P);
 	ff::Codec& vEncoder = outVStream.codec();
-	ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS);
+
+	ff::Stream& outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, aDecoder.channels(), aDecoder.sampleRate());
 	ff::Codec& aEncoder = outAStream.codec();
 
+	auto aEncTimeBase = aEncoder.timeBase();
+	if(aEncTimeBase.den/aEncTimeBase.num != aEncoder.sampleRate()) {
+		std::cout << "audio encoder time base is not based on sample rate" << std::endl;
+		std::cout << "exiting" << std::endl;
+		return 0;
+	}
+
 	output.writeHeader();
 
-	ff::FifoQueue fifo(aEncoder.sampleFormat(), AUDIO_CHANNELS_COUNT, aEncoder.frameSize());
+	int64_t aPts = 0;
+	ff::FifoQueue fifo(aEncoder.sampleFormat(), aEncoder.channels(), aEncoder.frameSize());
 	while(auto packet = input.readPacket()) {
 		AVMediaType packetType = input.packetType(packet);
 		if(packetType == AVMEDIA_TYPE_AUDIO) {
@@ -53,10 +59,12 @@ int main(int argc, char** argv) {
 			while(fifo.enoughSamples()) {
 				auto frame = aEncoder.createAudioFrame();
 				fifo.readFrame(frame);
+				frame.setPts(aPts);
+				aPts += frame.samplesCount();
 				auto encPacket = aEncoder.encode(frame);
 				if(!encPacket) continue;
 				encPacket.setStreamIndex(AUDIO_STREAM_INDEX);
-				encPacket.rescaleTimestamps(vStream.timeBase(), outVStream.timeBase());
+				encPacket.rescaleTimestamps(aDecoder.timeBase(), outAStream.timeBase());
 				output.writePacket(encPacket);
 			}
 		} else if(packetType == AVMEDIA_TYPE_VIDEO) {
@@ -71,6 +79,7 @@ int main(int argc, char** argv) {
 	}
 
 	flushEncoder(output, vEncoder, vStream, outVStream, VIDEO_STREAM_INDEX);
+	flushEncoder(output, aEncoder, aStream, outAStream, AUDIO_STREAM_INDEX);
 	output.writeTrailer();
 
 	return 0;