Added some code to support playing audio

2017-11-06 17:05:51 +03:00 · 2017-11-06 17:05:51 +03:00 · 14fa5f923f
commit 14fa5f923f
parent 2036185cc1
15 changed files with 1745 additions and 43 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,8 +1,9 @@
 cmake_minimum_required(VERSION 3.5)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14  -ggdb -O2")
+set(CMAKE_CXX_FLAGS "-std=c++14 -g -O2 -pthread")
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -std=c++14 -ggdb -O0")
+set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0 -pthread")
 SET(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} -pthread")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/lib)
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -1,3 +1,13 @@
 option(BUILD_WITH_TSAN "Build with thread sanitizer" OFF)
 option(BUILD_WITH_ASAN "Build with address sanitizer" OFF)
 if(BUILD_WITH_TSAN)
    SET(THREAD_SANITIZER_FLAG "-fsanitize=thread")
    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${THREAD_SANITIZER_FLAG}")
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${THREAD_SANITIZER_FLAG}")
    SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${THREAD_SANITIZER_FLAG} -ltsan" )
 endif()
 project(ffConv)
 add_executable(ffConv ffConv.cpp)
 add_dependencies(ffConv ffcpp)
--- a/examples/ffConv.cpp
+++ b/examples/ffConv.cpp
@ -43,7 +43,7 @@ int main(int argc, char** argv) {
 	auto outVStream = output.addVideoStream(AV_CODEC_ID_H264, VIDEO_WIDTH, outHeight, vDecoder->timeBase(), AV_PIX_FMT_YUV420P);
 	auto vEncoder = outVStream->codec();
-	auto outAStream = output.addAudioStream(AV_CODEC_ID_VORBIS, 2, 44100, AV_SAMPLE_FMT_FLTP);
+	auto outAStream = output.addAudioStream(AV_CODEC_ID_AC3, 2, 44100, AV_SAMPLE_FMT_FLTP);
 	auto aEncoder = outAStream->codec();
 	output.writeHeader();
--- a/examples/ffPlayer.cpp
+++ b/examples/ffPlayer.cpp
@ -11,7 +11,7 @@ namespace ff = ffcpp;
 #define WINDOW_WIDTH    640
 #define WINDOW_HEIGHT   480
-class SDLWindow: public ff::IVideoSink {
+class SDLWindow: public ff::IVideoSink, public ff::IAudioSink {
 private:
    template<typename T> using SDLUniquePtr = std::unique_ptr<T, void(*)(T*)>;
    using SDLWindowPtr = SDLUniquePtr<SDL_Window>;
@ -26,9 +26,10 @@ private:
    SDL_AudioDeviceID _aDevId;
    std::packaged_task<void()> _renderTask;
    ff::IAudioSource* _audioSrc;
 public:
-    SDLWindow(): _wnd(nullptr, SDL_DestroyWindow), _renderer(nullptr, SDL_DestroyRenderer), _texture(nullptr, SDL_DestroyTexture) {
+    SDLWindow(): _wnd(nullptr, SDL_DestroyWindow), _renderer(nullptr, SDL_DestroyRenderer), _texture(nullptr, SDL_DestroyTexture), _audioSrc(nullptr) {
        int res = SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER);
        if(res < 0) throw std::runtime_error("Error initializing SDL");
@ -44,13 +45,16 @@ public:
        SDL_AudioSpec want;
        SDL_zero(want);
        want.freq = 44100;
-        want.format = AUDIO_S16;
+        want.format = AUDIO_F32;
        want.channels = 2;
-        want.samples = 4096;
+        want.samples = 8192;
        want.callback = SDLWindow::audioCallback;
        want.userdata = this;
        _aDevId = SDL_OpenAudioDevice(nullptr, 0, &want, &_audioSpec, SDL_AUDIO_ALLOW_ANY_CHANGE);
        if(_aDevId == 0) throw std::runtime_error("Error opening audio device");
        SDL_PauseAudioDevice(_aDevId, 0);
    }
    void handleEvents() {
@ -71,10 +75,26 @@ public:
 private:
    static void audioCallback(void* userdata, Uint8* stream, int len) {
        ff::IAudioSource* src = static_cast<SDLWindow*>(userdata)->_audioSrc;
        if(src) {
            //std::cout << "fill sample buffer" << std::endl;
            src->fillSampleBuffer(stream, len);
        }
    }
-public:
+    AVSampleFormat sdlToFFMpeg(SDL_AudioFormat format) {
        switch (format) {
            case AUDIO_S16: return AV_SAMPLE_FMT_S16;
            case AUDIO_S32: return AV_SAMPLE_FMT_S32;
            case AUDIO_F32: return AV_SAMPLE_FMT_FLT;
            default:
                throw std::runtime_error("unknown audio sample format: " + std::to_string(format));
        }
    }
    // IVideoSink implementation
 private:
    virtual AVPixelFormat getPixelFormat() const noexcept override {
        return AV_PIX_FMT_YUV420P;
    }
@ -102,17 +122,41 @@ public:
        future.get();
    }
    // IAudioSink implementation
 private:
    void setAudioSource(ff::IAudioSource* audioSrc) override {
        std::cout << "set audio source" << std::endl;
        _audioSrc = audioSrc;
    }
    AVSampleFormat getSampleFormat() override {
        return sdlToFFMpeg(_audioSpec.format);
    }
    int getChannelsCount() override {
        return _audioSpec.channels;
    }
    int getSampleRate() override {
        return _audioSpec.freq;
    }
 };
 int main(int argc, char** argv) {
    try {
        auto wnd = std::make_shared<SDLWindow>();
-    ff::Player player(wnd);
+        ff::Player player(wnd, wnd);
        player.setMedia(argv[1]);
        player.setVideoSize(WINDOW_WIDTH, WINDOW_HEIGHT);
        player.play();
        wnd->handleEvents();
    } catch (...) {
        std::cout << "exception" << std::endl;
        return 0;
    }
    return 0;
 }
--- a/include/ffcpp/Frame.h
+++ b/include/ffcpp/Frame.h
@ -35,6 +35,8 @@ namespace ffcpp {
 		void setPts(int pts);
 		bool isKeyFrame() const;
        int pts() const;
 		void guessChannelLayout();
        int size() const;
 	};
 }
--- a/include/ffcpp/Player.h
+++ b/include/ffcpp/Player.h
@ -4,11 +4,14 @@
 #include "ffcpp/MediaFile.h"
 #include "ffcpp/Scaler.h"
 #include "TSQueue.h"
 #include "Resampler.h"
 #include "readerwriterqueue.h"
 #include <memory>
 #include <thread>
 #include <condition_variable>
 #include <mutex>
 #include <cstdint>
 #include <cstdio>
 namespace ffcpp {
@ -19,9 +22,15 @@ namespace ffcpp {
                                        int uPitch, int vPitch) = 0;
    };
    struct IAudioSource {
        virtual void fillSampleBuffer(uint8_t *data, int length) = 0;
    };
    struct IAudioSink {
-        virtual void setPauseCallback(std::function<void(bool)> callback) = 0;
+        virtual void setAudioSource(IAudioSource* audioSrc) = 0;
-        virtual void setAudioDataCallback(std::function<void(uint8_t*,size_t)> callback) = 0;
+        virtual AVSampleFormat getSampleFormat() = 0;
        virtual int getChannelsCount() = 0;
        virtual int getSampleRate() = 0;
    };
    enum class PlayerState {
@ -31,23 +40,36 @@ namespace ffcpp {
        Paused
    };
-    class Player {
+    class Player: private IAudioSource {
    private:
        static constexpr size_t AUDIO_BUFFER_LENGTH = 16*1024;
    private:
        typedef moodycamel::ReaderWriterQueue<FramePtr> FrameQueue;
    private:
        std::shared_ptr<IVideoSink> _vSink;
        std::shared_ptr<IAudioSink> _aSink;
        std::unique_ptr<MediaFile> _curMedia;
        StreamPtr _aStream;
        StreamPtr _vStream;
        ScalerPtr _scaler;
        ResamplerPtr _resampler;
        PlayerState _state;
-        TSQueue<Frame> _decodedFrames;
+        std::unique_ptr<uint8_t[]> _aSamplesBuffer;
-        std::thread _decodeThread;
+        int _samplesInBuffer;
-        std::thread _vPlayThread;
+        FILE* _asFile;
        std::mutex _mutex;
        std::condition_variable _stateCond;
        FrameQueue _videoFrames;
        FrameQueue _audioFrames;
        std::thread _decodeThread;
        std::thread _vPlayThread;
    public:
-        Player(std::shared_ptr<IVideoSink> vSink);
+        Player(std::shared_ptr<IVideoSink> vSink, std::shared_ptr<IAudioSink> aSink);
        ~Player();
        void setMedia(std::string path);
@ -57,6 +79,9 @@ namespace ffcpp {
    private:
        void decode();
        void displayFrames();
    private:
        void fillSampleBuffer(uint8_t *data, int length) override;
    };
 }
--- a/include/ffcpp/Resampler.h
+++ b/include/ffcpp/Resampler.h
@ -10,16 +10,19 @@ extern "C" {
 namespace ffcpp {
 	typedef std::shared_ptr<class Resampler> ResamplerPtr;
 	class Resampler {
 	private:
 		SwrContext* _swrContext;
 		int _dstChannelCount;
 		int _dstChannelLayout;
 		AVSampleFormat _dstSampleFormat;
 		int _dstSampleRate;
 	public:
-		Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat,
+		Resampler(int inChannelCount, int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat,
-		          int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat);
+				  int outChannelCount, int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat);
 		Resampler(CodecPtr decoder, CodecPtr encoder);
 		~Resampler();
--- a/include/ffcpp/atomicops.h
+++ b/include/ffcpp/atomicops.h
@ -0,0 +1,665 @@
 // ©2013-2016 Cameron Desrochers.
 // Distributed under the simplified BSD license (see the license file that
 // should have come with this header).
 // Uses Jeff Preshing's semaphore implementation (under the terms of its
 // separate zlib license, embedded below).
 #pragma once
 // Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant) implementation
 // of low-level memory barriers, plus a few semi-portable utility macros (for inlining and alignment).
 // Also has a basic atomic type (limited to hardware-supported atomics with no memory ordering guarantees).
 // Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols.
 #include <cassert>
 #include <type_traits>
 #include <cerrno>
 #include <cstdint>
 #include <ctime>
 // Platform detection
 #if defined(__INTEL_COMPILER)
 #define AE_ICC
 #elif defined(_MSC_VER)
 #define AE_VCPP
 #elif defined(__GNUC__)
 #define AE_GCC
 #endif
 #if defined(_M_IA64) || defined(__ia64__)
 #define AE_ARCH_IA64
 #elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
 #define AE_ARCH_X64
 #elif defined(_M_IX86) || defined(__i386__)
 #define AE_ARCH_X86
 #elif defined(_M_PPC) || defined(__powerpc__)
 #define AE_ARCH_PPC
 #else
 #define AE_ARCH_UNKNOWN
 #endif
 // AE_UNUSED
 #define AE_UNUSED(x) ((void)x)
 // AE_FORCEINLINE
 #if defined(AE_VCPP) || defined(AE_ICC)
 #define AE_FORCEINLINE __forceinline
 #elif defined(AE_GCC)
 //#define AE_FORCEINLINE __attribute__((always_inline)) 
 #define AE_FORCEINLINE inline
 #else
 #define AE_FORCEINLINE inline
 #endif
 // AE_ALIGN
 #if defined(AE_VCPP) || defined(AE_ICC)
 #define AE_ALIGN(x) __declspec(align(x))
 #elif defined(AE_GCC)
 #define AE_ALIGN(x) __attribute__((aligned(x)))
 #else
 // Assume GCC compliant syntax...
 #define AE_ALIGN(x) __attribute__((aligned(x)))
 #endif
 // Portable atomic fences implemented below:
 namespace moodycamel {
 enum memory_order {
 	memory_order_relaxed,
 	memory_order_acquire,
 	memory_order_release,
 	memory_order_acq_rel,
 	memory_order_seq_cst,
 	// memory_order_sync: Forces a full sync:
 	// #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
 	memory_order_sync = memory_order_seq_cst
 };
 }    // end namespace moodycamel
 #if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || defined(AE_ICC)
 // VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
 #include <intrin.h>
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 #define AeFullSync _mm_mfence
 #define AeLiteSync _mm_mfence
 #elif defined(AE_ARCH_IA64)
 #define AeFullSync __mf
 #define AeLiteSync __mf
 #elif defined(AE_ARCH_PPC)
 #include <ppcintrinsics.h>
 #define AeFullSync __sync
 #define AeLiteSync __lwsync
 #endif
 #ifdef AE_VCPP
 #pragma warning(push)
 #pragma warning(disable: 4365)		// Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert`
 #ifdef __cplusplus_cli
 #pragma managed(push, off)
 #endif
 #endif
 namespace moodycamel {
 AE_FORCEINLINE void compiler_fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: _ReadBarrier(); break;
 		case memory_order_release: _WriteBarrier(); break;
 		case memory_order_acq_rel: _ReadWriteBarrier(); break;
 		case memory_order_seq_cst: _ReadWriteBarrier(); break;
 		default: assert(false);
 	}
 }
 // x86/x64 have a strong memory model -- all loads and stores have
 // acquire and release semantics automatically (so only need compiler
 // barriers for those).
 #if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
 AE_FORCEINLINE void fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: _ReadBarrier(); break;
 		case memory_order_release: _WriteBarrier(); break;
 		case memory_order_acq_rel: _ReadWriteBarrier(); break;
 		case memory_order_seq_cst:
 			_ReadWriteBarrier();
 			AeFullSync();
 			_ReadWriteBarrier();
 			break;
 		default: assert(false);
 	}
 }
 #else
 AE_FORCEINLINE void fence(memory_order order)
 {
 	// Non-specialized arch, use heavier memory barriers everywhere just in case :-(
 	switch (order) {
 		case memory_order_relaxed:
 			break;
 		case memory_order_acquire:
 			_ReadBarrier();
 			AeLiteSync();
 			_ReadBarrier();
 			break;
 		case memory_order_release:
 			_WriteBarrier();
 			AeLiteSync();
 			_WriteBarrier();
 			break;
 		case memory_order_acq_rel:
 			_ReadWriteBarrier();
 			AeLiteSync();
 			_ReadWriteBarrier();
 			break;
 		case memory_order_seq_cst:
 			_ReadWriteBarrier();
 			AeFullSync();
 			_ReadWriteBarrier();
 			break;
 		default: assert(false);
 	}
 }
 #endif
 }    // end namespace moodycamel
 #else
 // Use standard library of atomics
 #include <atomic>
 namespace moodycamel {
 AE_FORCEINLINE void compiler_fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
 		case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
 		case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
 		case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
 		default: assert(false);
 	}
 }
 AE_FORCEINLINE void fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: std::atomic_thread_fence(std::memory_order_acquire); break;
 		case memory_order_release: std::atomic_thread_fence(std::memory_order_release); break;
 		case memory_order_acq_rel: std::atomic_thread_fence(std::memory_order_acq_rel); break;
 		case memory_order_seq_cst: std::atomic_thread_fence(std::memory_order_seq_cst); break;
 		default: assert(false);
 	}
 }
 }    // end namespace moodycamel
 #endif
 #if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
 #define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 #endif
 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 #include <atomic>
 #endif
 #include <utility>
 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
 // Provides basic support for atomic variables -- no memory ordering guarantees are provided.
 // The guarantee of atomicity is only made for types that already have atomic load and store guarantees
 // at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
 namespace moodycamel {
 template<typename T>
 class weak_atomic
 {
 public:
 	weak_atomic() { }
 #ifdef AE_VCPP
 #pragma warning(push)
 #pragma warning(disable: 4100)		// Get rid of (erroneous) 'unreferenced formal parameter' warning
 #endif
 	template<typename U> weak_atomic(U&& x) : value(std::forward<U>(x)) {  }
 #ifdef __cplusplus_cli
 	// Work around bug with universal reference/nullptr combination that only appears when /clr is on
 	weak_atomic(nullptr_t) : value(nullptr) {  }
 #endif
 	weak_atomic(weak_atomic const& other) : value(other.value) {  }
 	weak_atomic(weak_atomic&& other) : value(std::move(other.value)) {  }
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif
 	AE_FORCEINLINE operator T() const { return load(); }
 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 	template<typename U> AE_FORCEINLINE weak_atomic const& operator=(U&& x) { value = std::forward<U>(x); return *this; }
 	AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) { value = other.value; return *this; }
 	AE_FORCEINLINE T load() const { return value; }
 	AE_FORCEINLINE T fetch_add_acquire(T increment)
 	{
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
 		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
 		assert(false && "T must be either a 32 or 64 bit type");
 		return value;
 	}
 	AE_FORCEINLINE T fetch_add_release(T increment)
 	{
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
 		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
 		assert(false && "T must be either a 32 or 64 bit type");
 		return value;
 	}
 #else
 	template<typename U>
 	AE_FORCEINLINE weak_atomic const& operator=(U&& x)
 	{
 		value.store(std::forward<U>(x), std::memory_order_relaxed);
 		return *this;
 	}
 	AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other)
 	{
 		value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
 		return *this;
 	}
 	AE_FORCEINLINE T load() const { return value.load(std::memory_order_relaxed); }
 	AE_FORCEINLINE T fetch_add_acquire(T increment)
 	{
 		return value.fetch_add(increment, std::memory_order_acquire);
 	}
 	AE_FORCEINLINE T fetch_add_release(T increment)
 	{
 		return value.fetch_add(increment, std::memory_order_release);
 	}
 #endif
 private:
 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 	// No std::atomic support, but still need to circumvent compiler optimizations.
 	// `volatile` will make memory access slow, but is guaranteed to be reliable.
 	volatile T value;
 #else
 	std::atomic<T> value;
 #endif
 };
 }	// end namespace moodycamel
 // Portable single-producer, single-consumer semaphore below:
 #if defined(_WIN32)
 // Avoid including windows.h in a header; we only need a handful of
 // items, so we'll redeclare them here (this is relatively safe since
 // the API generally has to remain stable between Windows versions).
 // I know this is an ugly hack but it still beats polluting the global
 // namespace with thousands of generic names or adding a .cpp for nothing.
 extern "C" {
 	struct _SECURITY_ATTRIBUTES;
 	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
 	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
 	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
 	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
 }
 #elif defined(__MACH__)
 #include <mach/mach.h>
 #elif defined(__unix__)
 #include <semaphore.h>
 #endif
 namespace moodycamel
 {
 	// Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
 	// portable + lightweight semaphore implementations, originally from
 	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
 	// LICENSE:
 	// Copyright (c) 2015 Jeff Preshing
 	//
 	// This software is provided 'as-is', without any express or implied
 	// warranty. In no event will the authors be held liable for any damages
 	// arising from the use of this software.
 	//
 	// Permission is granted to anyone to use this software for any purpose,
 	// including commercial applications, and to alter it and redistribute it
 	// freely, subject to the following restrictions:
 	//
 	// 1. The origin of this software must not be misrepresented; you must not
 	//    claim that you wrote the original software. If you use this software
 	//    in a product, an acknowledgement in the product documentation would be
 	//    appreciated but is not required.
 	// 2. Altered source versions must be plainly marked as such, and must not be
 	//    misrepresented as being the original software.
 	// 3. This notice may not be removed or altered from any source distribution.
 	namespace spsc_sema
 	{
 #if defined(_WIN32)
 		class Semaphore
 		{
 		private:
 		    void* m_hSema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        const long maxLong = 0x7fffffff;
 		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
 		    }
 		    ~Semaphore()
 		    {
 		        CloseHandle(m_hSema);
 		    }
 		    void wait()
 		    {
 		    	const unsigned long infinite = 0xffffffff;
 		        WaitForSingleObject(m_hSema, infinite);
 		    }
 			bool try_wait()
 			{
 				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
 				return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
 			}
 			bool timed_wait(std::uint64_t usecs)
 			{
 				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
 				return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT;
 			}
 		    void signal(int count = 1)
 		    {
 		        ReleaseSemaphore(m_hSema, count, nullptr);
 		    }
 		};
 #elif defined(__MACH__)
 		//---------------------------------------------------------
 		// Semaphore (Apple iOS and OSX)
 		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
 		//---------------------------------------------------------
 		class Semaphore
 		{
 		private:
 		    semaphore_t m_sema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
 		    }
 		    ~Semaphore()
 		    {
 		        semaphore_destroy(mach_task_self(), m_sema);
 		    }
 		    void wait()
 		    {
 		        semaphore_wait(m_sema);
 		    }
 			bool try_wait()
 			{
 				return timed_wait(0);
 			}
 			bool timed_wait(std::int64_t timeout_usecs)
 			{
 				mach_timespec_t ts;
 				ts.tv_sec = timeout_usecs / 1000000;
 				ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
 				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
 				kern_return_t rc = semaphore_timedwait(m_sema, ts);
 				return rc != KERN_OPERATION_TIMED_OUT;
 			}
 		    void signal()
 		    {
 		        semaphore_signal(m_sema);
 		    }
 		    void signal(int count)
 		    {
 		        while (count-- > 0)
 		        {
 		            semaphore_signal(m_sema);
 		        }
 		    }
 		};
 #elif defined(__unix__)
 		//---------------------------------------------------------
 		// Semaphore (POSIX, Linux)
 		//---------------------------------------------------------
 		class Semaphore
 		{
 		private:
 		    sem_t m_sema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        sem_init(&m_sema, 0, initialCount);
 		    }
 		    ~Semaphore()
 		    {
 		        sem_destroy(&m_sema);
 		    }
 		    void wait()
 		    {
 		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
 		        int rc;
 		        do
 		        {
 		            rc = sem_wait(&m_sema);
 		        }
 		        while (rc == -1 && errno == EINTR);
 		    }
 			bool try_wait()
 			{
 				int rc;
 				do {
 					rc = sem_trywait(&m_sema);
 				} while (rc == -1 && errno == EINTR);
 				return !(rc == -1 && errno == EAGAIN);
 			}
 			bool timed_wait(std::uint64_t usecs)
 			{
 				struct timespec ts;
 				const int usecs_in_1_sec = 1000000;
 				const int nsecs_in_1_sec = 1000000000;
 				clock_gettime(CLOCK_REALTIME, &ts);
 				ts.tv_sec += usecs / usecs_in_1_sec;
 				ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
 				// sem_timedwait bombs if you have more than 1e9 in tv_nsec
 				// so we have to clean things up before passing it in
 				if (ts.tv_nsec >= nsecs_in_1_sec) {
 					ts.tv_nsec -= nsecs_in_1_sec;
 					++ts.tv_sec;
 				}
 				int rc;
 				do {
 					rc = sem_timedwait(&m_sema, &ts);
 				} while (rc == -1 && errno == EINTR);
 				return !(rc == -1 && errno == ETIMEDOUT);
 			}
 		    void signal()
 		    {
 		        sem_post(&m_sema);
 		    }
 		    void signal(int count)
 		    {
 		        while (count-- > 0)
 		        {
 		            sem_post(&m_sema);
 		        }
 		    }
 		};
 #else
 #error Unsupported platform! (No semaphore wrapper available)
 #endif
 		//---------------------------------------------------------
 		// LightweightSemaphore
 		//---------------------------------------------------------
 		class LightweightSemaphore
 		{
 		public:
 			typedef std::make_signed<std::size_t>::type ssize_t;
 		private:
 		    weak_atomic<ssize_t> m_count;
 		    Semaphore m_sema;
 		    bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
 		    {
 		        ssize_t oldCount;
 		        // Is there a better way to set the initial spin count?
 		        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
 		        // as threads start hitting the kernel semaphore.
 		        int spin = 10000;
 		        while (--spin >= 0)
 		        {
 		            if (m_count.load() > 0)
 		            {
 		                m_count.fetch_add_acquire(-1);
 		                return true;
 		            }
 		            compiler_fence(memory_order_acquire);     // Prevent the compiler from collapsing the loop.
 		        }
 		        oldCount = m_count.fetch_add_acquire(-1);
 				if (oldCount > 0)
 					return true;
 		        if (timeout_usecs < 0)
 				{
 					m_sema.wait();
 					return true;
 				}
 				if (m_sema.timed_wait(timeout_usecs))
 					return true;
 				// At this point, we've timed out waiting for the semaphore, but the
 				// count is still decremented indicating we may still be waiting on
 				// it. So we have to re-adjust the count, but only if the semaphore
 				// wasn't signaled enough times for us too since then. If it was, we
 				// need to release the semaphore too.
 				while (true)
 				{
 					oldCount = m_count.fetch_add_release(1);
 					if (oldCount < 0)
 						return false;    // successfully restored things to the way they were
 					// Oh, the producer thread just signaled the semaphore after all. Try again:
 					oldCount = m_count.fetch_add_acquire(-1);
 					if (oldCount > 0 && m_sema.try_wait())
 						return true;
 				}
 		    }
 		public:
 		    LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
 		    {
 		        assert(initialCount >= 0);
 		    }
 		    bool tryWait()
 		    {
 		        if (m_count.load() > 0)
 		        {
 		        	m_count.fetch_add_acquire(-1);
 		        	return true;
 		        }
 		        return false;
 		    }
 		    void wait()
 		    {
 		        if (!tryWait())
 		            waitWithPartialSpinning();
 		    }
 			bool wait(std::int64_t timeout_usecs)
 			{
 				return tryWait() || waitWithPartialSpinning(timeout_usecs);
 			}
 		    void signal(ssize_t count = 1)
 		    {
 		    	assert(count >= 0);
 		        ssize_t oldCount = m_count.fetch_add_release(count);
 		        assert(oldCount >= -1);
 		        if (oldCount < 0)
 		        {
 		            m_sema.signal(1);
 		        }
 		    }
 		    ssize_t availableApprox() const
 		    {
 		    	ssize_t count = m_count.load();
 		    	return count > 0 ? count : 0;
 		    }
 		};
 	}	// end namespace spsc_sema
 }	// end namespace moodycamel
 #if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
 #pragma warning(pop)
 #ifdef __cplusplus_cli
 #pragma managed(pop)
 #endif
 #endif
--- a/include/ffcpp/readerwriterqueue.h
+++ b/include/ffcpp/readerwriterqueue.h
@ -0,0 +1,854 @@
 // ©2013-2016 Cameron Desrochers.
 // Distributed under the simplified BSD license (see the license file that
 // should have come with this header).
 #pragma once
 #include "atomicops.h"
 #include <type_traits>
 #include <utility>
 #include <cassert>
 #include <stdexcept>
 #include <new>
 #include <cstdint>
 #include <cstdlib>		// For malloc/free/abort & size_t
 #if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
 #include <chrono>
 #endif
 // A lock-free queue for a single-consumer, single-producer architecture.
 // The queue is also wait-free in the common path (except if more memory
 // needs to be allocated, in which case malloc is called).
 // Allocates memory sparingly (O(lg(n) times, amortized), and only once if
 // the original maximum size estimate is never exceeded.
 // Tested on x86/x64 processors, but semantics should be correct for all
 // architectures (given the right implementations in atomicops.h), provided
 // that aligned integer and pointer accesses are naturally atomic.
 // Note that there should only be one consumer thread and producer thread;
 // Switching roles of the threads, or using multiple consecutive threads for
 // one role, is not safe unless properly synchronized.
 // Using the queue exclusively from one thread is fine, though a bit silly.
 #ifndef MOODYCAMEL_CACHE_LINE_SIZE
 #define MOODYCAMEL_CACHE_LINE_SIZE 64
 #endif
 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
 #define MOODYCAMEL_EXCEPTIONS_ENABLED
 #endif
 #endif
 #ifdef AE_VCPP
 #pragma warning(push)
 #pragma warning(disable: 4324)	// structure was padded due to __declspec(align())
 #pragma warning(disable: 4820)	// padding was added
 #pragma warning(disable: 4127)	// conditional expression is constant
 #endif
 namespace moodycamel {
 template<typename T, size_t MAX_BLOCK_SIZE = 512>
 class ReaderWriterQueue
 {
 	// Design: Based on a queue-of-queues. The low-level queues are just
 	// circular buffers with front and tail indices indicating where the
 	// next element to dequeue is and where the next element can be enqueued,
 	// respectively. Each low-level queue is called a "block". Each block
 	// wastes exactly one element's worth of space to keep the design simple
 	// (if front == tail then the queue is empty, and can't be full).
 	// The high-level queue is a circular linked list of blocks; again there
 	// is a front and tail, but this time they are pointers to the blocks.
 	// The front block is where the next element to be dequeued is, provided
 	// the block is not empty. The back block is where elements are to be
 	// enqueued, provided the block is not full.
 	// The producer thread owns all the tail indices/pointers. The consumer
 	// thread owns all the front indices/pointers. Both threads read each
 	// other's variables, but only the owning thread updates them. E.g. After
 	// the consumer reads the producer's tail, the tail may change before the
 	// consumer is done dequeuing an object, but the consumer knows the tail
 	// will never go backwards, only forwards.
 	// If there is no room to enqueue an object, an additional block (of
 	// equal size to the last block) is added. Blocks are never removed.
 public:
 	// Constructs a queue that can hold maxSize elements without further
 	// allocations. If more than MAX_BLOCK_SIZE elements are requested,
 	// then several blocks of MAX_BLOCK_SIZE each are reserved (including
 	// at least one extra buffer block).
 	explicit ReaderWriterQueue(size_t maxSize = 15)
 #ifndef NDEBUG
 		: enqueuing(false)
 		,dequeuing(false)
 #endif
 	{
 		assert(maxSize > 0);
 		assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
 		assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
 		Block* firstBlock = nullptr;
 		largestBlockSize = ceilToPow2(maxSize + 1);		// We need a spare slot to fit maxSize elements in the block
 		if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
 			// We need a spare block in case the producer is writing to a different block the consumer is reading from, and
 			// wants to enqueue the maximum number of elements. We also need a spare element in each block to avoid the ambiguity
 			// between front == tail meaning "empty" and "full".
 			// So the effective number of slots that are guaranteed to be usable at any time is the block size - 1 times the
 			// number of blocks - 1. Solving for maxSize and applying a ceiling to the division gives us (after simplifying):
 			size_t initialBlockCount = (maxSize + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
 			largestBlockSize = MAX_BLOCK_SIZE;
 			Block* lastBlock = nullptr;
 			for (size_t i = 0; i != initialBlockCount; ++i) {
 				auto block = make_block(largestBlockSize);
 				if (block == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
 					throw std::bad_alloc();
 #else
 					abort();
 #endif
 				}
 				if (firstBlock == nullptr) {
 					firstBlock = block;
 				}
 				else {
 					lastBlock->next = block;
 				}
 				lastBlock = block;
 				block->next = firstBlock;
 			}
 		}
 		else {
 			firstBlock = make_block(largestBlockSize);
 			if (firstBlock == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
 				throw std::bad_alloc();
 #else
 				abort();
 #endif
 			}
 			firstBlock->next = firstBlock;
 		}
 		frontBlock = firstBlock;
 		tailBlock = firstBlock;
 		// Make sure the reader/writer threads will have the initialized memory setup above:
 		fence(memory_order_sync);
 	}
 	// Note: The queue should not be accessed concurrently while it's
 	// being moved. It's up to the user to synchronize this.
 	ReaderWriterQueue(ReaderWriterQueue&& other)
 		: frontBlock(other.frontBlock.load()),
 		tailBlock(other.tailBlock.load()),
 		largestBlockSize(other.largestBlockSize)
 #ifndef NDEBUG
 		,enqueuing(false)
 		,dequeuing(false)
 #endif
 	{
 		other.largestBlockSize = 32;
 		Block* b = other.make_block(other.largestBlockSize);
 		if (b == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
 			throw std::bad_alloc();
 #else
 			abort();
 #endif
 		}
 		b->next = b;
 		other.frontBlock = b;
 		other.tailBlock = b;
 	}
 	// Note: The queue should not be accessed concurrently while it's
 	// being moved. It's up to the user to synchronize this.
 	ReaderWriterQueue& operator=(ReaderWriterQueue&& other)
 	{
 		Block* b = frontBlock.load();
 		frontBlock = other.frontBlock.load();
 		other.frontBlock = b;
 		b = tailBlock.load();
 		tailBlock = other.tailBlock.load();
 		other.tailBlock = b;
 		std::swap(largestBlockSize, other.largestBlockSize);
 		return *this;
 	}
 	// Note: The queue should not be accessed concurrently while it's
 	// being deleted. It's up to the user to synchronize this.
 	~ReaderWriterQueue()
 	{
 		// Make sure we get the latest version of all variables from other CPUs:
 		fence(memory_order_sync);
 		// Destroy any remaining objects in queue and free memory
 		Block* frontBlock_ = frontBlock;
 		Block* block = frontBlock_;
 		do {
 			Block* nextBlock = block->next;
 			size_t blockFront = block->front;
 			size_t blockTail = block->tail;
 			for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
 				auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
 				element->~T();
 				(void)element;
 			}
 			auto rawBlock = block->rawThis;
 			block->~Block();
 			std::free(rawBlock);
 			block = nextBlock;
 		} while (block != frontBlock_);
 	}
 	// Enqueues a copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T const& element)
 	{
 		return inner_enqueue<CannotAlloc>(element);
 	}
 	// Enqueues a moved copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T&& element)
 	{
 		return inner_enqueue<CannotAlloc>(std::forward<T>(element));
 	}
 	// Enqueues a copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T const& element)
 	{
 		return inner_enqueue<CanAlloc>(element);
 	}
 	// Enqueues a moved copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T&& element)
 	{
 		return inner_enqueue<CanAlloc>(std::forward<T>(element));
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// returns false instead. If the queue has at least one element,
 	// moves front to result using operator=, then returns true.
 	template<typename U>
 	bool try_dequeue(U& result)
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// High-level pseudocode:
 		// Remember where the tail block is
 		// If the front block has an element in it, dequeue it
 		// Else
 		//     If front block was the tail block when we entered the function, return false
 		//     Else advance to next block and dequeue the item there
 		// Note that we have to use the value of the tail block from before we check if the front
 		// block is full or not, in case the front block is empty and then, before we check if the
 		// tail block is at the front block or not, the producer fills up the front block *and
 		// moves on*, which would make us skip a filled block. Seems unlikely, but was consistently
 		// reproducible in practice.
 		// In order to avoid overhead in the common case, though, we do a double-checked pattern
 		// where we have the fast path if the front block is not empty, then read the tail block,
 		// then re-read the front block and check if it's not empty again, then check if the tail
 		// block has advanced.
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			// Front block not empty, dequeue from here
 			auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 			result = std::move(*element);
 			element->~T();
 			blockFront = (blockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = blockFront;
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				// Oh look, the front block isn't empty after all
 				goto non_empty_front_block;
 			}
 			// Front block is empty but there's another block ahead, advance to it
 			Block* nextBlock = frontBlock_->next;
 			// Don't need an acquire fence here since next can only ever be set on the tailBlock,
 			// and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which
 			// ensures next is up-to-date on this CPU in case we recently were at tailBlock.
 			size_t nextBlockFront = nextBlock->front.load();
 			size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
 			fence(memory_order_acquire);
 			// Since the tailBlock is only ever advanced after being written to,
 			// we know there's for sure an element to dequeue on it
 			assert(nextBlockFront != nextBlockTail);
 			AE_UNUSED(nextBlockTail);
 			// We're done with this block, let the producer use it if it needs
 			fence(memory_order_release);		// Expose possibly pending changes to frontBlock->front from last dequeue
 			frontBlock = frontBlock_ = nextBlock;
 			compiler_fence(memory_order_release);	// Not strictly needed
 			auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
 			result = std::move(*element);
 			element->~T();
 			nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = nextBlockFront;
 		}
 		else {
 			// No elements in current block and no other block to advance to
 			return false;
 		}
 		return true;
 	}
 	// Returns a pointer to the front element in the queue (the one that
 	// would be removed next by a call to `try_dequeue` or `pop`). If the
 	// queue appears empty at the time the method is called, nullptr is
 	// returned instead.
 	// Must be called only from the consumer thread.
 	T* peek()
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// See try_dequeue() for reasoning
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				goto non_empty_front_block;
 			}
 			Block* nextBlock = frontBlock_->next;
 			size_t nextBlockFront = nextBlock->front.load();
 			fence(memory_order_acquire);
 			assert(nextBlockFront != nextBlock->tail.load());
 			return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
 		}
 		return nullptr;
 	}
 	// Removes the front element from the queue, if any, without returning it.
 	// Returns true on success, or false if the queue appeared empty at the time
 	// `pop` was called.
 	bool pop()
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// See try_dequeue() for reasoning
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 			element->~T();
 			blockFront = (blockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = blockFront;
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				goto non_empty_front_block;
 			}
 			// Front block is empty but there's another block ahead, advance to it
 			Block* nextBlock = frontBlock_->next;
 			size_t nextBlockFront = nextBlock->front.load();
 			size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
 			fence(memory_order_acquire);
 			assert(nextBlockFront != nextBlockTail);
 			AE_UNUSED(nextBlockTail);
 			fence(memory_order_release);
 			frontBlock = frontBlock_ = nextBlock;
 			compiler_fence(memory_order_release);
 			auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
 			element->~T();
 			nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = nextBlockFront;
 		}
 		else {
 			// No elements in current block and no other block to advance to
 			return false;
 		}
 		return true;
 	}
 	// Returns the approximate number of items currently in the queue.
 	// Safe to call from both the producer and consumer threads.
 	inline size_t size_approx() const
 	{
 		size_t result = 0;
 		Block* frontBlock_ = frontBlock.load();
 		Block* block = frontBlock_;
 		do {
 			fence(memory_order_acquire);
 			size_t blockFront = block->front.load();
 			size_t blockTail = block->tail.load();
 			result += (blockTail - blockFront) & block->sizeMask;
 			block = block->next.load();
 		} while (block != frontBlock_);
 		return result;
 	}
 private:
 	enum AllocationMode { CanAlloc, CannotAlloc };
 	template<AllocationMode canAlloc, typename U>
 	bool inner_enqueue(U&& element)
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->enqueuing);
 #endif
 		// High-level pseudocode (assuming we're allowed to alloc a new block):
 		// If room in tail block, add to tail
 		// Else check next block
 		//     If next block is not the head block, enqueue on next block
 		//     Else create a new block and enqueue there
 		//     Advance tail to the block we just enqueued to
 		Block* tailBlock_ = tailBlock.load();
 		size_t blockFront = tailBlock_->localFront;
 		size_t blockTail = tailBlock_->tail.load();
 		size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
 		if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
 			fence(memory_order_acquire);
 			// This block has room for at least one more element
 			char* location = tailBlock_->data + blockTail * sizeof(T);
 			new (location) T(std::forward<U>(element));
 			fence(memory_order_release);
 			tailBlock_->tail = nextBlockTail;
 		}
 		else {
 			fence(memory_order_acquire);
 			if (tailBlock_->next.load() != frontBlock) {
 				// Note that the reason we can't advance to the frontBlock and start adding new entries there
 				// is because if we did, then dequeue would stay in that block, eventually reading the new values,
 				// instead of advancing to the next full block (whose values were enqueued first and so should be
 				// consumed first).
 				fence(memory_order_acquire);		// Ensure we get latest writes if we got the latest frontBlock
 				// tailBlock is full, but there's a free block ahead, use it
 				Block* tailBlockNext = tailBlock_->next.load();
 				size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
 				nextBlockTail = tailBlockNext->tail.load();
 				fence(memory_order_acquire);
 				// This block must be empty since it's not the head block and we
 				// go through the blocks in a circle
 				assert(nextBlockFront == nextBlockTail);
 				tailBlockNext->localFront = nextBlockFront;
 				char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
 				new (location) T(std::forward<U>(element));
 				tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
 				fence(memory_order_release);
 				tailBlock = tailBlockNext;
 			}
 			else if (canAlloc == CanAlloc) {
 				// tailBlock is full and there's no free block ahead; create a new block
 				auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
 				auto newBlock = make_block(newBlockSize);
 				if (newBlock == nullptr) {
 					// Could not allocate a block!
 					return false;
 				}
 				largestBlockSize = newBlockSize;
 				new (newBlock->data) T(std::forward<U>(element));
 				assert(newBlock->front == 0);
 				newBlock->tail = newBlock->localTail = 1;
 				newBlock->next = tailBlock_->next.load();
 				tailBlock_->next = newBlock;
 				// Might be possible for the dequeue thread to see the new tailBlock->next
 				// *without* seeing the new tailBlock value, but this is OK since it can't
 				// advance to the next block until tailBlock is set anyway (because the only
 				// case where it could try to read the next is if it's already at the tailBlock,
 				// and it won't advance past tailBlock in any circumstance).
 				fence(memory_order_release);
 				tailBlock = newBlock;
 			}
 			else if (canAlloc == CannotAlloc) {
 				// Would have had to allocate a new block to enqueue, but not allowed
 				return false;
 			}
 			else {
 				assert(false && "Should be unreachable code");
 				return false;
 			}
 		}
 		return true;
 	}
 	// Disable copying
 	ReaderWriterQueue(ReaderWriterQueue const&) {  }
 	// Disable assignment
 	ReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
 	AE_FORCEINLINE static size_t ceilToPow2(size_t x)
 	{
 		// From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
 		--x;
 		x |= x >> 1;
 		x |= x >> 2;
 		x |= x >> 4;
 		for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
 			x |= x >> (i << 3);
 		}
 		++x;
 		return x;
 	}
 	template<typename U>
 	static AE_FORCEINLINE char* align_for(char* ptr)
 	{
 		const std::size_t alignment = std::alignment_of<U>::value;
 		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
 	}
 private:
 #ifndef NDEBUG
 	struct ReentrantGuard
 	{
 		ReentrantGuard(bool& _inSection)
 			: inSection(_inSection)
 		{
 			assert(!inSection && "ReaderWriterQueue does not support enqueuing or dequeuing elements from other elements' ctors and dtors");
 			inSection = true;
 		}
 		~ReentrantGuard() { inSection = false; }
 	private:
 		ReentrantGuard& operator=(ReentrantGuard const&);
 	private:
 		bool& inSection;
 	};
 #endif
 	struct Block
 	{
 		// Avoid false-sharing by putting highly contended variables on their own cache lines
 		weak_atomic<size_t> front;	// (Atomic) Elements are read from here
 		size_t localTail;			// An uncontended shadow copy of tail, owned by the consumer
 		char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
 		weak_atomic<size_t> tail;	// (Atomic) Elements are enqueued here
 		size_t localFront;
 		char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];	// next isn't very contended, but we don't want it on the same cache line as tail (which is)
 		weak_atomic<Block*> next;	// (Atomic)
 		char* data;		// Contents (on heap) are aligned to T's alignment
 		const size_t sizeMask;
 		// size must be a power of two (and greater than 0)
 		Block(size_t const& _size, char* _rawThis, char* _data)
 			: front(0), localTail(0), tail(0), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis)
 		{
 		}
 	private:
 		// C4512 - Assignment operator could not be generated
 		Block& operator=(Block const&);
 	public:
 		char* rawThis;
 	};
 	static Block* make_block(size_t capacity)
 	{
 		// Allocate enough memory for the block itself, as well as all the elements it will contain
 		auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
 		size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
 		auto newBlockRaw = static_cast<char*>(std::malloc(size));
 		if (newBlockRaw == nullptr) {
 			return nullptr;
 		}
 		auto newBlockAligned = align_for<Block>(newBlockRaw);
 		auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
 		return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
 	}
 private:
 	weak_atomic<Block*> frontBlock;		// (Atomic) Elements are enqueued to this block
 	char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
 	weak_atomic<Block*> tailBlock;		// (Atomic) Elements are dequeued from this block
 	size_t largestBlockSize;
 #ifndef NDEBUG
 	bool enqueuing;
 	bool dequeuing;
 #endif
 };
 // Like ReaderWriterQueue, but also providees blocking operations
 template<typename T, size_t MAX_BLOCK_SIZE = 512>
 class BlockingReaderWriterQueue
 {
 private:
 	typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
 public:
 	explicit BlockingReaderWriterQueue(size_t maxSize = 15)
 		: inner(maxSize)
 	{ }
 	// Enqueues a copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T const& element)
 	{
 		if (inner.try_enqueue(element)) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a moved copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T&& element)
 	{
 		if (inner.try_enqueue(std::forward<T>(element))) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T const& element)
 	{
 		if (inner.enqueue(element)) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a moved copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T&& element)
 	{
 		if (inner.enqueue(std::forward<T>(element))) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// returns false instead. If the queue has at least one element,
 	// moves front to result using operator=, then returns true.
 	template<typename U>
 	bool try_dequeue(U& result)
 	{
 		if (sema.tryWait()) {
 			bool success = inner.try_dequeue(result);
 			assert(success);
 			AE_UNUSED(success);
 			return true;
 		}
 		return false;
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available, then dequeues it.
 	template<typename U>
 	void wait_dequeue(U& result)
 	{
 		sema.wait();
 		bool success = inner.try_dequeue(result);
 		AE_UNUSED(result);
 		assert(success);
 		AE_UNUSED(success);
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available up to the specified timeout,
 	// then dequeues it and returns true, or returns false if the timeout
 	// expires before an element can be dequeued.
 	// Using a negative timeout indicates an indefinite timeout,
 	// and is thus functionally equivalent to calling wait_dequeue.
 	template<typename U>
 	bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs)
 	{
 		if (!sema.wait(timeout_usecs)) {
 			return false;
 		}
 		bool success = inner.try_dequeue(result);
 		AE_UNUSED(result);
 		assert(success);
 		AE_UNUSED(success);
 		return true;
 	}
 #if __cplusplus > 199711L || _MSC_VER >= 1700
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available up to the specified timeout,
 	// then dequeues it and returns true, or returns false if the timeout
 	// expires before an element can be dequeued.
 	// Using a negative timeout indicates an indefinite timeout,
 	// and is thus functionally equivalent to calling wait_dequeue.
 	template<typename U, typename Rep, typename Period>
 	inline bool wait_dequeue_timed(U& result, std::chrono::duration<Rep, Period> const& timeout)
 	{
        return wait_dequeue_timed(result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
 	}
 #endif
 	// Returns a pointer to the front element in the queue (the one that
 	// would be removed next by a call to `try_dequeue` or `pop`). If the
 	// queue appears empty at the time the method is called, nullptr is
 	// returned instead.
 	// Must be called only from the consumer thread.
 	AE_FORCEINLINE T* peek()
 	{
 		return inner.peek();
 	}
 	// Removes the front element from the queue, if any, without returning it.
 	// Returns true on success, or false if the queue appeared empty at the time
 	// `pop` was called.
 	AE_FORCEINLINE bool pop()
 	{
 		if (sema.tryWait()) {
 			bool result = inner.pop();
 			assert(result);
 			AE_UNUSED(result);
 			return true;
 		}
 		return false;
 	}
 	// Returns the approximate number of items currently in the queue.
 	// Safe to call from both the producer and consumer threads.
 	AE_FORCEINLINE size_t size_approx() const
 	{
 		return sema.availableApprox();
 	}
 private:
 	// Disable copying & assignment
 	BlockingReaderWriterQueue(ReaderWriterQueue const&) {  }
 	BlockingReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
 private:
 	ReaderWriterQueue inner;
 	spsc_sema::LightweightSemaphore sema;
 };
 }    // end namespace moodycamel
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -30,7 +30,9 @@ set(SOURCE_FILES    MediaFile.cpp
                    ../include/ffcpp/Resampler.h
                    Player.cpp
                    ../include/ffcpp/Player.h
-                    ../include/ffcpp/TSQueue.h)
+                    ../include/ffcpp/TSQueue.h
                    ../include/ffcpp/atomicops.h
                    ../include/ffcpp/readerwriterqueue.h)
 add_library(ffcpp ${SOURCE_FILES})
 target_link_libraries(ffcpp ${FFMPEG_LIBRARIES})
--- a/src/Codec.cpp
+++ b/src/Codec.cpp
@ -115,6 +115,8 @@ namespace ffcpp {
 		if(_codecCtx->codec_type == AVMEDIA_TYPE_VIDEO) {
 			frame->guessPts();
 		} else if(_codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) {
 			frame->guessChannelLayout();
 		}
 		return frame;
@ -125,7 +127,7 @@ namespace ffcpp {
 		int gotPacket = 0;
 		auto encFunc = (_codecCtx->codec_type == AVMEDIA_TYPE_VIDEO ? avcodec_encode_video2 : avcodec_encode_audio2);
-        int res = encFunc(_codecCtx, packet, frame->nativePtr(), &gotPacket);
+        int res = encFunc(_codecCtx, packet, frame ? frame->nativePtr() : nullptr, &gotPacket);
 		if(res < 0) throw std::runtime_error("cannot encode frame");
 		return packet;
--- a/src/Frame.cpp
+++ b/src/Frame.cpp
@ -86,4 +86,14 @@ namespace ffcpp {
 		return _frame->pts;
 	}
 	void Frame::guessChannelLayout() {
 		if(_frame->channel_layout == 0) {
 			_frame->channel_layout = (uint64_t)av_get_default_channel_layout(_frame->channels);
 		}
 	}
    int Frame::size() const {
        return _frame->pkt_size >= 0 ? _frame->pkt_size : _frame->linesize[0];
    }
 }
--- a/src/MediaFile.cpp
+++ b/src/MediaFile.cpp
@ -16,6 +16,10 @@ namespace ffcpp {
 			_streams.reserve(_formatCtx->nb_streams);
 			for(size_t i = 0; i < _formatCtx->nb_streams; ++i) {
 				auto codecType = _formatCtx->streams[i]->codec->codec_type;
 				if(codecType != AVMEDIA_TYPE_VIDEO && codecType != AVMEDIA_TYPE_AUDIO)
 					continue;
 				auto stream = std::make_shared<Stream>(_formatCtx->streams[i]);
 				_streams.emplace_back(stream);
 			}
--- a/src/Player.cpp
+++ b/src/Player.cpp
@ -1,24 +1,33 @@
 #include "ffcpp/Player.h"
 #include "ffcpp/Stream.h"
 #include "ffcpp/Scaler.h"
 #include "ffcpp/Resampler.h"
 #include <iostream>
 #include <chrono>
 #include <ctime>
 namespace ffcpp {
-    Player::Player(std::shared_ptr<IVideoSink> vSink): _vSink(vSink),
+    Player::Player(std::shared_ptr<IVideoSink> vSink,
                   std::shared_ptr<IAudioSink> aSink): _vSink(vSink),
                                                       _aSink(aSink),
                                                       _curMedia(nullptr),
                                                       _aStream(nullptr),
                                                       _vStream(nullptr),
                                                       _state(PlayerState::Stopped),
                                                       _aSamplesBuffer(new uint8_t[AUDIO_BUFFER_LENGTH]),
                                                       _samplesInBuffer(0),
                                                       _decodeThread(&Player::decode, this),
                                                       _vPlayThread(&Player::displayFrames, this),
-                                                       _decodedFrames(10)
+                                                       _videoFrames(100),
                                                       _audioFrames(100)
    {
        init();
        _aSink->setAudioSource(this);
    }
    Player::~Player() {
        std::cout << "Player destructor" << std::endl;
 //        _state = PlayerState::Shutdown;
 //        std::cout << "destructor" << std::endl;
 //        _stateCond.notify_all();
@ -37,11 +46,24 @@ namespace ffcpp {
        _curMedia = std::make_unique<MediaFile>(path, Mode::Read);
        _vStream = _curMedia->videoStream();
        _aStream = _curMedia->audioStream();
        auto codec = _aStream->codec().get();
        _resampler = std::make_shared<Resampler>(_aStream->codec()->channels(),
                                                 _aStream->codec()->channelLayout(),
                                                 _aStream->codec()->sampleRate(),
                                                 _aStream->codec()->sampleFormat(),
                                                 _aSink->getChannelsCount(),
                                                 av_get_default_channel_layout(_aSink->getChannelsCount()),
                                                 _aSink->getSampleRate(),
                                                 _aSink->getSampleFormat());
    }
    void Player::setVideoSize(size_t width, size_t height) {
        std::lock_guard<std::mutex> lock(_mutex);
-        _scaler = std::make_shared<Scaler>(_vStream->codec()->width(), _vStream->codec()->height(), _vStream->codec()->pixelFormat(),
+        _scaler = std::make_shared<Scaler>(_vStream->codec()->width(),
                                           _vStream->codec()->height(),
                                           _vStream->codec()->pixelFormat(),
                                           width, height, _vSink->getPixelFormat());
    }
@ -74,7 +96,18 @@ namespace ffcpp {
                auto frame = _vStream->codec()->decode(packet);
                frame = _scaler->scale(frame);
                lock.unlock();
-                _decodedFrames.pushOrWait(frame);
+                while(!_videoFrames.try_enqueue(frame)) {
                    std::cout << "waiting for enqueue video frame" << std::endl;
                    std::this_thread::sleep_for(std::chrono::milliseconds(10));
                }
            } else if(packetType == AVMEDIA_TYPE_AUDIO) {
                auto frame = _aStream->codec()->decode(packet);
                frame = _resampler->resample(frame);
                lock.unlock();
                while(!_audioFrames.try_enqueue(frame)) {
                    std::cout << "waiting for enqueue audio frame" << std::endl;
                    std::this_thread::sleep_for(std::chrono::milliseconds(10));
                }
            }
        }
    }
@ -91,15 +124,58 @@ namespace ffcpp {
            }
            lock.unlock();
-            auto frame = _decodedFrames.popOrWait();
+
            int fps = _vStream->fps();
            FramePtr frame;
            if(_videoFrames.try_dequeue(frame)) {
                lock.lock();
                AVFrame* f = frame->nativePtr();
                _vSink->drawPlanarYUVFrame(f->data[0], f->data[1], f->data[2],
                                           f->linesize[0], f->linesize[1], f->linesize[2]);
            int fps = _vStream->fps();
                lock.unlock();
            } else {
                std::cout << "=============== skip video frame" << std::endl;
            }
            std::this_thread::sleep_for(std::chrono::milliseconds(1000/fps));
        }
    }
    uint64_t time = 0;
    void Player::fillSampleBuffer(uint8_t *data, int length) {
        int copied = 0;
        if(_samplesInBuffer > 0) {
            memcpy(data, _aSamplesBuffer.get(), _samplesInBuffer);
            copied = _samplesInBuffer;
            _samplesInBuffer = 0;
        }
        while (copied < length) {
            FramePtr frame;
            while(!_audioFrames.try_dequeue(frame)) {
                std::cout << "waiting for audio frame" << std::endl;
                std::this_thread::sleep_for(std::chrono::milliseconds(10));
            }
 //            uint64_t curTime = std::chrono::system_clock::now().time_since_epoch().count();
 //            std::cout << "fill samples buffer: " << length << ", " << (curTime - time) << std::endl;
 //            time = curTime;
            AVFrame* f = frame->nativePtr();
            int frameSize = frame->size();
            if(copied + frameSize > length) {
                memcpy(data + copied, f->data[0], length - copied);
                memcpy(_aSamplesBuffer.get(), f->data + length - copied, frameSize - length + copied);
                _samplesInBuffer = frameSize - length + copied;
                copied = length;
            } else {
                memcpy(data + copied, f->data[0], frameSize);
                copied += frameSize;
            }
        }
    }
 }
--- a/src/Resampler.cpp
+++ b/src/Resampler.cpp
@ -9,8 +9,9 @@ extern "C" {
 namespace ffcpp {
-	Resampler::Resampler(int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat, int outChannelLayout,
+	Resampler::Resampler(int inChannelCount, int inChannelLayout, int inSampleRate, AVSampleFormat inSampleFormat,
-	                     int outSampleRate, AVSampleFormat outSampleFormat) {
+                         int outChannelCount, int outChannelLayout, int outSampleRate, AVSampleFormat outSampleFormat) {
 		_dstChannelCount = outChannelCount;
 		_dstChannelLayout = outChannelLayout;
 		_dstSampleFormat = outSampleFormat;
 		_dstSampleRate = outSampleRate;
@ -20,10 +21,12 @@ namespace ffcpp {
 			throw new std::runtime_error("cannot create resampler");
 		}
        av_opt_set_int(_swrContext, "in_channel_count",     inChannelCount, 0);
 		av_opt_set_int(_swrContext, "in_channel_layout",    inChannelLayout, 0);
 		av_opt_set_int(_swrContext, "in_sample_rate",       inSampleRate, 0);
 		av_opt_set_sample_fmt(_swrContext, "in_sample_fmt", inSampleFormat, 0);
        av_opt_set_int(_swrContext, "out_channel_count",     outChannelCount, 0);
 		av_opt_set_int(_swrContext, "out_channel_layout",    outChannelLayout, 0);
 		av_opt_set_int(_swrContext, "out_sample_rate",       outSampleRate, 0);
 		av_opt_set_sample_fmt(_swrContext, "out_sample_fmt", outSampleFormat, 0);
@ -33,8 +36,8 @@ namespace ffcpp {
 	}
 	Resampler::Resampler(CodecPtr decoder, CodecPtr encoder)
-			: Resampler(decoder->channelLayout(), decoder->sampleRate(), decoder->sampleFormat(),
+			: Resampler(decoder->channels(), decoder->channelLayout(), decoder->sampleRate(), decoder->sampleFormat(),
-			            encoder->channelLayout(), encoder->sampleRate(), encoder->sampleFormat()) {
+			            encoder->channels(), encoder->channelLayout(), encoder->sampleRate(), encoder->sampleFormat()) {
 	}
 	Resampler::~Resampler() {
@ -44,11 +47,12 @@ namespace ffcpp {
 	}
    FramePtr Resampler::resample(FramePtr inFrame) {
 		int channelsCount = av_get_channel_layout_nb_channels(_dstChannelLayout);
        int outSamples = swr_get_out_samples(_swrContext, inFrame->samplesCount());
-        FramePtr outFrame = std::make_shared<Frame>(outSamples, channelsCount, _dstSampleFormat, _dstSampleRate);
+        FramePtr outFrame = std::make_shared<Frame>(outSamples, _dstChannelCount, _dstSampleFormat, _dstSampleRate);
-        int res = swr_convert_frame(_swrContext, outFrame->nativePtr(), inFrame->nativePtr());
+		AVFrame *out = outFrame->nativePtr(), *in = inFrame->nativePtr();
        int res = swr_convert_frame(_swrContext, out, in);
 		throwIfError(res, "cannot convert audio frame");
 		return outFrame;