audio_demo/aecm_receiver.cpp

#include <iostream>
#include <unistd.h>
#include <cmath>
#include "timing.h"
#include "log/logger.h"
#include "common.h"

#include <modules/audio_processing/include/audio_processing.h>
#include <modules/audio_processing/include/config.h>
#include <thread>
#include <mutex>
#include "alsa_dev.h"

using namespace std;
using namespace toolkit;

#define MIX_INPUT_CHANNELS 2
#define MIX_INPUT_SAMPLES  (10 * MIX_INPUT_SAMPLE_RATE/1000)
#define MIX_INPUT_SAMPLE_RATE 44100

struct audio_buf_t
{
    uint8_t* data;
    int index;
    int size;
};

struct RtmpConfig {
    char url[1024];
    AVFormatContext *formatCtx;
    AVStream *stream;
    AVCodecContext *codecCtx;
    SwrContext *swrCtx;

    std::thread *thread;
    std::mutex *mutex;
    bool quit;
};

static SampleInfo kPcmSampleInfo;

//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量（db）
#define MIN_DB (-10)
#define MAX_DB (20)
// 最大/小音量： 0: 静音； 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)

static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
    int scaler[MAX_VOLUME + 1];     // 音量表
    int zeroDb;                     // 0db在scaler中的索引
    // 自定义需要调节的音量
    int micVolume;
    VolumeCtlUnit() {
        // 音量控制器初始化
        zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
        micVolume = 100;
    }
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;

static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
    double tabdb[MAX_VOLUME + 1];
    double tabf [MAX_VOLUME + 1];
    int    z, i;

    for (i = 0; i < (MAX_VOLUME + 1); i++) {
        // (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
        tabdb[i]  = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
        // dB = 20 * log(A1 / A2)，当A1，A2相等时，db为0
        // 这里以(1 << 14)作为原始声音振幅，得到调节后的振幅(A1),将A1存入音量表中
        tabf [i]  = pow(10.0, tabdb[i] / 20.0);
        scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
    }

    z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
    z = MAX(z, 0  );
    z = MIN(z, MAX_VOLUME);
    scaler[0] = 0;        // 音量表中，0标识静音
    scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db，即不做增益处理

    return z;
}

static void vol_scaler_run(int16_t *buf, int n, int volume)
{
    /* 简易版
    while (n--) {
        *buf = (*buf) * multiplier / 100.0;
        *buf = std::max((int)*buf, -0x7fff);
        *buf = std::min((int)*buf, 0x7fff);
        buf++;
    }
    */
    int multiplier = kVolCtrlUnit.scaler[volume];
    if (multiplier > (1 << 14)) {
        int32_t v;
        while (n--) {
            v = ((int32_t)*buf * multiplier) >> 14;
            v = MAX(v,-0x7fff);
            v = MIN(v, 0x7fff);
            *buf++ = (int16_t)v;
        }
    } else if (multiplier < (1 << 14)) {
        while (n--) {
            *buf = ((int32_t)*buf * multiplier) >> 14;
            buf++;
        }
    }
}
//----------------------------------------------

webrtc::AudioProcessing::Config webtcConfigInit()
{
    webrtc::AudioProcessing::Config apmConfig;
    apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE;
    apmConfig.pipeline.multi_channel_capture = true;
    apmConfig.pipeline.multi_channel_render = true;
    //PreAmplifier
    apmConfig.pre_amplifier.enabled = false;
    apmConfig.pre_amplifier.fixed_gain_factor = 0.7f;
    //HighPassFilter
    apmConfig.high_pass_filter.enabled = false;
    apmConfig.high_pass_filter.apply_in_full_band = false;
    //EchoCanceller
    apmConfig.echo_canceller.enabled = false;
    apmConfig.echo_canceller.mobile_mode = false;
    apmConfig.echo_canceller.export_linear_aec_output = false;
    apmConfig.echo_canceller.enforce_high_pass_filtering = true;
    //NoiseSuppression
    apmConfig.noise_suppression.enabled = true;
    apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
    apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false;
    //TransientSuppression
    apmConfig.transient_suppression.enabled = false;
    //VoiceDetection
    apmConfig.voice_detection.enabled = true;
    //GainController1
    apmConfig.gain_controller1.enabled = true;
    apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog;
    apmConfig.gain_controller1.target_level_dbfs = 3;
    apmConfig.gain_controller1.compression_gain_db = 12;
    apmConfig.gain_controller1.enable_limiter = true;
    apmConfig.gain_controller1.analog_level_minimum = 0;
    apmConfig.gain_controller1.analog_level_maximum = 496;
    apmConfig.gain_controller1.analog_gain_controller.enabled = true;
    apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume;
    apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin;
    apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false;
    apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true;
    //GainController2
    apmConfig.gain_controller2.enabled = false;
    apmConfig.gain_controller2.fixed_digital.gain_db = 0.f;
    apmConfig.gain_controller2.adaptive_digital.enabled = false;
    apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f;
    apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms;
    apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1;
    apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true;
    apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f;
    apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f;
    apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1;
    apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f;
    apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f;
    //ResidualEchoDetector
    apmConfig.residual_echo_detector.enabled = false;
    //LevelEstimation
    apmConfig.level_estimation.enabled = false;

    return apmConfig;
}

void pullDestory(RtmpConfig *config)
{
    if (config->formatCtx)
        avformat_close_input(&config->formatCtx);
    if (config->codecCtx) {
        avcodec_close(config->codecCtx);
        avcodec_free_context(&config->codecCtx);
    }
    if (config->swrCtx) {
        swr_close(config->swrCtx);
        swr_free(&config->swrCtx);
    }
}

int pullInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate)
{
    if (nullptr == strstr(config->url, "rtmp://")) {
        LogE("url error, url: %s\n", config->url);
        return -1;
    }
    int ret = 0;
    int scan_all_pmts_set = 0;
    int st_index = -1;
    AVDictionary *format_opts = nullptr;
    AVFormatContext *ic = nullptr;
    AVCodecParameters *codecPar = nullptr;
    AVCodec *codec = nullptr;
    AVCodecContext *codecCtx = nullptr;
    SwrContext *swrCtx = nullptr;

    ic = avformat_alloc_context();
    if (!ic) {
        throw(std::runtime_error("avformat_alloc_context failed."));
    }

    if (!av_dict_get(format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) {
        av_dict_set(&format_opts, "scan_all_pmts", "1", AV_DICT_DONT_OVERWRITE);
        scan_all_pmts_set = 1;
    }
    // 禁用缓冲
    av_dict_set(&format_opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE);
    // 设置媒体流分析最大字节数
    av_dict_set(&format_opts, "probesize", "10000", AV_DICT_MATCH_CASE);

retry:
    // 打开输入流
    ret = avformat_open_input(&ic, config->url, nullptr, &format_opts);
    if (ret < 0) {
        LogE("avformat_open_input failed.\n");
        goto fail;
    }
    if (scan_all_pmts_set)
        av_dict_set(&format_opts, "scan_all_pmts", nullptr, AV_DICT_MATCH_CASE);

    av_format_inject_global_side_data(ic);

    ret = avformat_find_stream_info(ic, nullptr);
    if (ret < 0) {
//        LOG(ERROR) << url << ": could not find codec parameters";
        LogE("{} : could not find codec parameters\n", config->url);
        goto fail;
    }

    if (ic->pb)
        ic->pb->eof_reached = 0;

    // 打印输入流参数
    av_dump_format(ic, 0, config->url, 0);

    st_index = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
    if (st_index >= 0) {
        //
        config->stream = ic->streams[st_index];
    }
    else {
        LogW("find audio stream failed, try again.\n");
        avformat_close_input(&ic);
        goto retry;
    }

    // 初始化解码器
    codecPar = config->stream->codecpar;
    codec = avcodec_find_decoder(codecPar->codec_id);
    if (!codec) {
        LogE("find codec failed.\n");
        goto fail;
    }

    codecCtx = avcodec_alloc_context3(codec);
    if (!codecCtx) {
        LogE("avcodec_alloc_context3 failed.\n");
        goto fail;
    }
    ret = avcodec_parameters_to_context(codecCtx, codecPar);
    if (ret < 0) {
        LogE("avcodec_parameters_to_context\n");
        goto fail;
    }

    codecCtx->time_base = config->stream->time_base;
    // 打开解码器
    if (avcodec_open2(codecCtx, codec, nullptr) < 0){
        LogE("avcodec_open2 failed\n");
        goto fail;
    }
    // 重采样初始化
    swrCtx = swr_alloc_set_opts(nullptr,
                                av_get_default_channel_layout(channels),
                                format,
                                sample_rate,
                                codecCtx->channel_layout,
                                codecCtx->sample_fmt,
                                codecCtx->sample_rate,
                                0, nullptr);
    if (!swrCtx) {
        LogE("swr_alloc_set_opts failed.\n");
        goto fail;
    }
    swr_init(swrCtx);

    config->formatCtx = ic;
    config->codecCtx = codecCtx;
    config->swrCtx = swrCtx;
    config->stream->discard = AVDISCARD_DEFAULT;

    av_dict_free(&format_opts);

    return 0;
fail:
    if (format_opts)
        av_dict_free(&format_opts);
    if (ic)
        avformat_close_input(&ic);
    if (codecCtx) {
        avcodec_close(codecCtx);
        avcodec_free_context(&codecCtx);
    }
    if (swrCtx) {
        swr_close(swrCtx);
        swr_free(&swrCtx);
    }
    return -1;
}

void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list, 
        webrtc::AudioProcessing *apm, alsa::AlsaDev* play);

int main(int argc, char *argv[])
{
    if (argc < 3) {
        fprintf(stderr, "usage %s card_num url\n", argv[0]);
        return -1;
    }
    //初始化日志系统
    Logger::Instance().add(std::make_shared<ConsoleChannel> ());
    Logger::Instance().add(std::make_shared<FileChannel>());
    Logger::Instance().setWriter(std::make_shared<AsyncLogWriter>());

    // 初始化声卡设备
    int card = atoi(argv[1]);
    alsa::Config alsaConfig;
    alsaConfig.period_time = 10000;
    alsaConfig.buffer_time = 50000;
    alsaConfig.channels    = MIX_INPUT_CHANNELS;
    alsaConfig.format      = SND_PCM_FORMAT_S16_LE;
    alsaConfig.rate        = MIX_INPUT_SAMPLE_RATE;
    if (card < 0)
        sprintf(alsaConfig.device, "default");
    else
        sprintf(alsaConfig.device, "plughw:%d", card);
    alsa::AlsaDev usbPlaybackDev;
    if (usbPlaybackDev.applyConfig(alsaConfig) < 0) {
        PrintE("alsa config failed.\n");
        return -1;
    }
    // PrintI("alsa before init: %s\n", usbPlaybackDev.configToString());
    if (usbPlaybackDev.init(SND_PCM_STREAM_PLAYBACK) < 0) {
        PrintE("alsa init failed.\n");
        return -1;
    }
    PrintI("alsa init: %s\n", usbPlaybackDev.configToString());

    // webrtc初始化
    webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create();
    if (!apm) {
        LogI("create apm failed.\n");
        return -1;
    }
    webrtc::AudioProcessing::Config apmConfig = webtcConfigInit();
    apm->ApplyConfig(apmConfig);
    apm->Initialize();
    apm->set_stream_analog_level(408);
    
    LogI("webrtc params: {\n%s\n}\n", apmConfig.ToString().c_str());

    // 拉流初始化
    RtmpConfig rtmp;
    memset(&rtmp, 0, sizeof(rtmp));
    strcpy(rtmp.url, argv[2]);
    if (pullInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) {
        return -1;
    }
    AVPacket *pkt = av_packet_alloc();
    AVFrame *outputFrame = av_frame_alloc();
    int maxBuffSize = 1024 * 4 * 2;
    uint8_t *swrBuffer = (uint8_t *)calloc(maxBuffSize, sizeof(uint8_t));

    int ret;
    std::vector<audio_buf_t> swr_list;

    rtmp.mutex = new std::mutex;
    rtmp.thread = new std::thread(playbackLoop, &rtmp, &swr_list, apm, &usbPlaybackDev);
    rtmp.quit = false;
    while (true)
    {
        if (av_read_frame(rtmp.formatCtx, pkt) >= 0 &&
                pkt->stream_index == rtmp.stream->index) {
            ret = avcodec_send_packet(rtmp.codecCtx, pkt);
            if (ret == AVERROR(EAGAIN)) {
                LogW("send packet again.\n");
                av_usleep(10*1000);
                continue;
            }
            else if (ret < 0) {
                LogE("send packet error ret={}\n", ret);
                break;
            }

            while ( avcodec_receive_frame(rtmp.codecCtx, outputFrame) >= 0 ) {
                int outSamples = swr_convert(rtmp.swrCtx, &swrBuffer, maxBuffSize/(sizeof(int16_t) * MIX_INPUT_CHANNELS),
                                          (uint8_t const **) (outputFrame->data), outputFrame->nb_samples);
                int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);

                {
                    int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
                    uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t));
                    memcpy(buffer, swrBuffer, size);

                    std::unique_lock<std::mutex> lck(*rtmp.mutex);
                    audio_buf_t out;
                    out.data = buffer;
                    out.index = 0;
                    out.size = size;
                    swr_list.emplace_back(out);

                    // if (out_fp) fwrite(buffer, 1, size, out_fp);
                }
            }

            av_frame_unref(outputFrame);
        }
        av_packet_unref(pkt);
    }

    if (apm) {
        delete apm;
        apm = nullptr;
    }
    pullDestory(&rtmp);
    return 0;
}


void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list, webrtc::AudioProcessing *apm, alsa::AlsaDev* play)
{
    //
    webrtc::StreamConfig playConfig;
    playConfig.set_has_keyboard(false);
    playConfig.set_num_channels(kPcmSampleInfo.channels);
    playConfig.set_sample_rate_hz(kPcmSampleInfo.sample_rate);

    int sampleSize = 0;
    int outSize = MIX_INPUT_SAMPLES * MIX_INPUT_CHANNELS * sizeof(int16_t);
    uint8_t *outBuffer = (uint8_t *)calloc(outSize, sizeof(uint8_t));
    // FILE *out_fp = fopen("/root/swr_out.pcm", "wb");
    while (!rtmp->quit) {
        // 获取 MIX_INPUT_SAMPLES 长度的解码音频，填充到outBuffer中
        sampleSize = outSize;
        while (sampleSize > 0)
        {
            if (list->size() <= 0) {
                av_usleep(1000);
                continue;
            }
            std::unique_lock<std::mutex> lck(*rtmp->mutex);
            auto data = list->begin();

            int readSize = sampleSize < (data->size - data->index) ? sampleSize : (data->size - data->index);

            memcpy(outBuffer + outSize - sampleSize, data->data + data->index, readSize);
            sampleSize -= readSize;
            data->index += readSize;
            if (data->index >= data->size) {
                free(data->data);
                list->erase(list->begin());
            }
        }
        // if (out_fp) fwrite(outBuffer, 1, outSize, out_fp);

        // 音频处理
        {
            apm->ProcessStream((int16_t *)outBuffer, playConfig, playConfig, (int16_t *)outBuffer);
        }

        play->write(outBuffer, outSize);
    }
}