audio_demo/aecm_sender.cpp

#include <iostream>
#include <unistd.h>
#include <cmath>
#include "timing.h"
#include "log/logger.h"
#include "common.h"

#include <modules/audio_processing/include/audio_processing.h>
#include <modules/audio_processing/include/config.h>
#include "alsa_dev.h"

using namespace std;
using namespace toolkit;

#define MIX_INPUT_CHANNELS 2
#define MIX_INPUT_SAMPLES  (10 * MIX_INPUT_SAMPLE_RATE/1000)
// #define MIX_INPUT_SAMPLES 1024
#define MIX_INPUT_SAMPLE_RATE 44100

struct audio_buf_t
{
    uint8_t* data;
    int index;
    int size;
};

struct RtmpConfig {
    char url[1024];
    AVFormatContext *formatCtx;
    AVStream *stream;
    AVCodecContext *codecCtx;
    SwrContext *swrCtx;
};

static SampleInfo kPcmSampleInfo;

//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量（db）
#define MIN_DB (-10)
#define MAX_DB (60)
// 最大/小音量： 0: 静音； 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)

static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
    int scaler[MAX_VOLUME + 1];     // 音量表
    int zeroDb;                     // 0db在scaler中的索引
    // 自定义需要调节的音量
    int micVolume;
    VolumeCtlUnit() {
        // 音量控制器初始化
        zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
        micVolume = 100;
    }
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;

static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
    double tabdb[MAX_VOLUME + 1];
    double tabf [MAX_VOLUME + 1];
    int    z, i;

    for (i = 0; i < (MAX_VOLUME + 1); i++) {
        // (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
        tabdb[i]  = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
        // dB = 20 * log(A1 / A2)，当A1，A2相等时，db为0
        // 这里以(1 << 14)作为原始声音振幅，得到调节后的振幅(A1),将A1存入音量表中
        tabf [i]  = pow(10.0, tabdb[i] / 20.0);
        scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
    }

    z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
    z = MAX(z, 0  );
    z = MIN(z, MAX_VOLUME);
    scaler[0] = 0;        // 音量表中，0标识静音
    scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db，即不做增益处理

    return z;
}

static void vol_scaler_run(int16_t *buf, int n, int volume)
{
    /* 简易版
    while (n--) {
        *buf = (*buf) * multiplier / 100.0;
        *buf = std::max((int)*buf, -0x7fff);
        *buf = std::min((int)*buf, 0x7fff);
        buf++;
    }
    */
    int multiplier = kVolCtrlUnit.scaler[volume];
    if (multiplier > (1 << 14)) {
        int32_t v;
        while (n--) {
            v = ((int32_t)*buf * multiplier) >> 14;
            v = MAX(v,-0x7fff);
            v = MIN(v, 0x7fff);
            *buf++ = (int16_t)v;
        }
    } else if (multiplier < (1 << 14)) {
        while (n--) {
            *buf = ((int32_t)*buf * multiplier) >> 14;
            buf++;
        }
    }
}
//----------------------------------------------

webrtc::AudioProcessing::Config webtcConfigInit()
{
    webrtc::AudioProcessing::Config apmConfig;
    apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE;
    apmConfig.pipeline.multi_channel_capture = true;
    apmConfig.pipeline.multi_channel_render = true;
    //PreAmplifier
    apmConfig.pre_amplifier.enabled = false;
    apmConfig.pre_amplifier.fixed_gain_factor = 0.7f;
    //HighPassFilter
    apmConfig.high_pass_filter.enabled = false;
    apmConfig.high_pass_filter.apply_in_full_band = false;
    //EchoCanceller
    apmConfig.echo_canceller.enabled = false;
    apmConfig.echo_canceller.mobile_mode = false;
    apmConfig.echo_canceller.export_linear_aec_output = false;
    apmConfig.echo_canceller.enforce_high_pass_filtering = true;
    //NoiseSuppression
    apmConfig.noise_suppression.enabled = true;
    apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
    apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false;
    //TransientSuppression
    apmConfig.transient_suppression.enabled = false;
    //VoiceDetection
    apmConfig.voice_detection.enabled = true;
    //GainController1
    apmConfig.gain_controller1.enabled = true;
    // kAdaptiveAnalog 自适应模拟模式
    // kAdaptiveDigital 自适应数字增益模式
    // kFixedDigital 固定数字增益模式
    apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital;
    apmConfig.gain_controller1.target_level_dbfs = 3;   // 目标音量
    apmConfig.gain_controller1.compression_gain_db = 12; // 增益能力
    apmConfig.gain_controller1.enable_limiter = true;   // 压限器开关
    apmConfig.gain_controller1.analog_level_minimum = 0;
    apmConfig.gain_controller1.analog_level_maximum = 496;
    apmConfig.gain_controller1.analog_gain_controller.enabled = true;
    apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume;
    apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin;
    apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false;
    apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true;
    //GainController2
    apmConfig.gain_controller2.enabled = false;
    apmConfig.gain_controller2.fixed_digital.gain_db = 0.f;
    apmConfig.gain_controller2.adaptive_digital.enabled = false;
    apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f;
    apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms;
    apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1;
    apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true;
    apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f;
    apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f;
    apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1;
    apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f;
    apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f;
    //ResidualEchoDetector
    apmConfig.residual_echo_detector.enabled = false;
    //LevelEstimation
    apmConfig.level_estimation.enabled = false;

    return apmConfig;
}

void pushDestory(RtmpConfig *config) {
    if (config->formatCtx) {
        if (config->formatCtx->pb)
            avio_close(config->formatCtx->pb);
        avformat_free_context(config->formatCtx);
    }
    if (config->codecCtx) {
        avcodec_close(config->codecCtx);
        avcodec_free_context(&config->codecCtx);
    }
    if (config->swrCtx) {
        swr_close(config->swrCtx);
        swr_free(&config->swrCtx);
    }
}

int pushInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate)
{
    if (nullptr == strstr(config->url, "rtmp://")) {
        PrintE("url error, url: %s\n", config->url);
        return -1;
    }
    AVCodec *codec = nullptr;
    AVCodecContext *codecCtx = nullptr;
    AVFormatContext *afctx = nullptr;
    AVCodecParameters *codecPar = nullptr;
    SwrContext *swrCtx = nullptr;
    AVStream *audio_st = nullptr;
    AVDictionary *opts = nullptr;
    int ret;

    // 打开输出流
    ret = avformat_alloc_output_context2(&afctx, nullptr, "flv", config->url);
    if (ret < 0) {
        PrintE("open output failed.\n");
        goto fail;
    }
    if ( !(afctx->oformat->flags & AVFMT_NOFILE) ) {
        ret = avio_open(&afctx->pb, config->url, AVIO_FLAG_WRITE);
        if (ret < 0) {
            PrintE("avio_open failed.\n");
            goto fail;
        }
    }

    // 创建音频流
    audio_st = avformat_new_stream(afctx, codec);
    if (!audio_st) {
        PrintE("alloc new audio stream failed.\n");
        goto fail;
    }
    // 设置编码参数
    codecPar = afctx->streams[audio_st->index]->codecpar;
    codecPar->codec_id          = AV_CODEC_ID_AAC;
    codecPar->codec_type        = AVMEDIA_TYPE_AUDIO;
    codecPar->codec_tag         = 0;
    codecPar->bit_rate          = 128 * 1024;
    codecPar->sample_rate       = sample_rate;
    codecPar->channel_layout    = av_get_default_channel_layout(channels);
    codecPar->channels          = av_get_channel_layout_nb_channels(codecPar->channel_layout);
    codecPar->format            = AV_SAMPLE_FMT_FLTP;

    // 编码器初始化
    codec = avcodec_find_encoder(codecPar->codec_id);
    if (!codec) {
        PrintE("find codec aac failed.\n");
        return -1;
    }

    codecCtx = avcodec_alloc_context3(codec);
    if (!codecCtx) {
        PrintE("alloc codec context failed.\n");
        goto fail;
    }

    ret = avcodec_parameters_to_context(codecCtx, codecPar);
    if (ret < 0) {
        PrintE("copt codec params failed.\n");
        goto fail;
    }
    // 禁用缓冲
    av_dict_set(&opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE);
//    av_dict_set(&opts, "rtmp_live", "1", AV_DICT_MATCH_CASE);
    // 打开编码器
    ret = avcodec_open2(codecCtx, codec, &opts);
    if (ret < 0) {
        PrintE("open codec {} failed.\n", codec->id);
        goto fail;
    }
    audio_st->codecpar->codec_tag = 0;
    // 释放字典资源
    av_dict_free(&opts);

    // 打印输出流信息
    av_dump_format(afctx, 0, config->url, 1);

    // 重采样初始化
    swrCtx = swr_alloc_set_opts(nullptr,
                                codecCtx->channel_layout,
                                codecCtx->sample_fmt,
                                codecCtx->sample_rate,
                                av_get_default_channel_layout(channels),
                                format,
                                sample_rate,
                                0, nullptr);
    if (!swrCtx) {
        PrintE("swr_alloc_set_opts failed.\n");
        goto fail;
    }
    swr_init(swrCtx);

    config->codecCtx = codecCtx;
    config->formatCtx = afctx;
    config->stream = audio_st;
    config->swrCtx = swrCtx;
    PrintI("rtmp push init ok.\n");
    return 0;
fail:
    if (afctx) {
        if (afctx->pb)
            avio_close(afctx->pb);
        avformat_free_context(afctx);
    }
    if (codecCtx) {
        avcodec_close(codecCtx);
        avcodec_free_context(&codecCtx);
    }
    if (swrCtx) {
        swr_close(swrCtx);
        swr_free(&swrCtx);
    }
    return -1;
}

int main(int argc, char *argv[])
{
    if (argc < 4) {
        fprintf(stderr, "usage %s card_num left(0) or right(1) url \n", argv[0]);
        return -1;
    }
    //初始化日志系统
    Logger::Instance().add(std::make_shared<ConsoleChannel> ());
    Logger::Instance().add(std::make_shared<FileChannel>());
    Logger::Instance().setWriter(std::make_shared<AsyncLogWriter>());

    // 初始化声卡设备
    int card = atoi(argv[1]);
    int type = atoi(argv[2]);
    alsa::Config alsaConfig;
    alsaConfig.period_time = MIX_INPUT_SAMPLES * 1000000 / MIX_INPUT_SAMPLE_RATE;
    alsaConfig.buffer_time = 5 * alsaConfig.period_time;
    alsaConfig.channels    = MIX_INPUT_CHANNELS;
    alsaConfig.format      = SND_PCM_FORMAT_S16_LE;
    alsaConfig.rate        = MIX_INPUT_SAMPLE_RATE;
    if (card < 0)
        sprintf(alsaConfig.device, "default");
    else
        sprintf(alsaConfig.device, "plughw:%d", card);
    alsa::AlsaDev usbCaptureDev;
    if (usbCaptureDev.applyConfig(alsaConfig) < 0) {
        PrintE("alsa config failed.\n");
        return -1;
    }
    PrintI("alsa before init: %s\n", usbCaptureDev.configToString());
    if (usbCaptureDev.init(SND_PCM_STREAM_CAPTURE) < 0) {
        PrintE("alsa init failed.\n");
        return -1;
    }
    PrintI("alsa init: %s\n", usbCaptureDev.configToString());

    // webrtc初始化
    webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create();
    if (!apm) {
        PrintI("create apm failed.\n");
        return -1;
    }
    webrtc::AudioProcessing::Config config = webtcConfigInit();
    apm->ApplyConfig(config);
    apm->Initialize();
    apm->set_stream_analog_level(408);
    webrtc::StreamConfig capConfig;
    capConfig.set_has_keyboard(false);
    capConfig.set_num_channels(alsaConfig.channels);
    capConfig.set_sample_rate_hz(alsaConfig.rate);
    PrintI("webrtc params: {\n%s\n}\n", config.ToString().c_str());

    int ret = 0;
    uint8_t *capData = nullptr;
    int buffer_size = usbCaptureDev.getFrames() * usbCaptureDev.getFrameSize();
    capData = (uint8_t *)malloc(buffer_size);
    assert(capData);
    uint8_t *listenBuffer = nullptr;
    size_t listenSize = buffer_size;
    listenBuffer = (uint8_t*)malloc(listenSize);
    assert(listenBuffer);
    

    // 推流初始化
    RtmpConfig rtmp;
    AVRational av;
    int64_t pts = 0;
    AVPacket *pkt = av_packet_alloc();
    memset(&rtmp, 0, sizeof(rtmp));
    strcpy(rtmp.url, argv[3]);
    if (pushInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) {
        return -1;
    }
    av.den = rtmp.codecCtx->sample_rate;
    av.num = 1;
    AVFrame *inputFrame = av_frame_alloc();
    {
        inputFrame->sample_rate      = rtmp.codecCtx->sample_rate;
        inputFrame->format           = rtmp.codecCtx->sample_fmt;
        inputFrame->channels         = rtmp.codecCtx->channels;
        inputFrame->nb_samples       = 1024;
        inputFrame->channel_layout   = rtmp.codecCtx->channel_layout;

        int size = av_samples_get_buffer_size(nullptr,
                                          rtmp.codecCtx->channels, inputFrame->nb_samples, rtmp.codecCtx->sample_fmt, 1);
        uint8_t *buffer = (uint8_t *)av_malloc(size);
        avcodec_fill_audio_frame(inputFrame, inputFrame->channels, rtmp.codecCtx->sample_fmt,
                                (const uint8_t*)buffer, size, 1);
    }
    AVFrame *outputFrame = av_frame_alloc();
    {
        outputFrame->format = rtmp.codecCtx->sample_fmt;
        outputFrame->channel_layout = rtmp.codecCtx->channel_layout;
        outputFrame->sample_rate = rtmp.codecCtx->sample_rate;
        outputFrame->nb_samples = rtmp.codecCtx->frame_size;

        int output_bz = av_samples_get_buffer_size(NULL, rtmp.codecCtx->channels, rtmp.codecCtx->frame_size, rtmp.codecCtx->sample_fmt, 0);
        uint8_t *samples_data = (uint8_t *)av_malloc(output_bz);
        avcodec_fill_audio_frame(outputFrame, rtmp.codecCtx->channels, rtmp.codecCtx->sample_fmt, samples_data, output_bz, 0);
    }

    // 写入帧头
    ret = avformat_write_header(rtmp.formatCtx, nullptr);
    if (ret < 0) {
        PrintE("avformat_write_header failed.\n");
        return ret;
    }

    FILE *ns_fp = fopen("/root/ns_out.pcm", "wb");
    FILE *input_fp = fopen("/root/input_out.pcm", "wb");
    std::vector<audio_buf_t> ns_out_list;
    int frames = 0;
    PrintI("input frame samples=%d, output frame samples=%d\n", inputFrame->nb_samples, outputFrame->nb_samples);
    PrintI("------------------ start ------------------\n");
    while (true)
    {
        // 采集
        size_t read_size = usbCaptureDev.read(capData, buffer_size);
        // PrintI("alsa read %d\n", read_size);
        if (read_size <= 0) {
            msleep(1);
            continue;
        }

        // 分流
        if (type >= 0) {
            uint8_t *ptr  = capData;
            for (int i = 0; i < usbCaptureDev.getFrames(); ++i) {
                // int size = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
                int size = usbCaptureDev.getSampleSize();
                if (type == 0) {
                    // 左声道 (2.4G)
                    memcpy(listenBuffer + i * 2 * size, ptr + i * 2 * size, size);
                    memcpy(listenBuffer + (i * 2 + 1) * size, ptr + i * 2 * size, size);
                } else {
                    // 右声道 (LINE IN)
                    memcpy(listenBuffer + i * 2 * size, ptr + (i * 2 + 1) * size, size);
                    memcpy(listenBuffer + (i * 2 + 1) * size, ptr + (i * 2 + 1) * size, size);
                }
            }
        } else {
            memcpy(listenBuffer, capData, read_size * usbCaptureDev.getFrameSize());
        }
        

        // vol_scaler_run((int16_t *)listenBuffer, capData->GetSamples() * kPcmSampleInfo.channels, kVolCtrlUnit.micVolume);
        
        // 降噪
        apm->ProcessStream((int16_t *)listenBuffer, capConfig, capConfig, (int16_t *)listenBuffer);
        {
            uint8_t *buffer = (uint8_t *)malloc(listenSize);
            memcpy(buffer, listenBuffer, listenSize);
            audio_buf_t out;
            out.data = buffer;
            out.index = 0;
            out.size = listenSize;
            ns_out_list.emplace_back(out);
        }
        
        // 填充音频
        if (frames <= 0) frames = inputFrame->nb_samples;
        while (frames > 0 && ns_out_list.size() > 0)
        {
            auto nsData = ns_out_list.begin();


            int needSize = frames * sizeof(int16_t) * inputFrame->channels;
            int readSize = (nsData->size - nsData->index) >= needSize ? needSize : (nsData->size - nsData->index);

            memcpy(inputFrame->data[0] + (inputFrame->nb_samples - frames)*sizeof(int16_t)*inputFrame->channels, nsData->data + nsData->index, readSize);

            frames -= readSize/(sizeof(int16_t) * inputFrame->channels);
            nsData->index += readSize;

            if (nsData->index >= nsData->size) {
                free(nsData->data);
                ns_out_list.erase(ns_out_list.begin());
            }
        }
        if (frames > 0) continue;
        // if (input_fp) fwrite(inputFrame->data[0], 1, inputFrame->nb_samples * inputFrame->channels * sizeof(int16_t), input_fp);
        // 重采样
        {
            const uint8_t** in = (const uint8_t**)inputFrame->data;
            uint8_t **out = outputFrame->data;

            int len2, out_data_size;

            len2 = swr_convert(rtmp.swrCtx, out, outputFrame->nb_samples, in, inputFrame->nb_samples);
            if (len2 < 0) {
                printf("swr_convert failed. \n");
                break;
            }

            out_data_size = len2 * rtmp.codecCtx->channels * av_get_bytes_per_sample(rtmp.codecCtx->sample_fmt);
            // if (ns_fp) fwrite(outputFrame->data[0], 1, out_data_size, ns_fp);
        }
        

        // 推流到远端
        if (pts > INT64_MAX) pts = 0;
        outputFrame->pts = pts;
        pts += av_rescale_q(outputFrame->nb_samples, av, rtmp.codecCtx->time_base);

        ret = avcodec_send_frame(rtmp.codecCtx, outputFrame);
        if (ret < 0) {
            PrintE("avcodec_send_frame failed: %d\n", ret);
            break;
        }

        while (ret >= 0) {
            ret = avcodec_receive_packet(rtmp.codecCtx, pkt);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                break;
            } else if (ret < 0) {
                fprintf(stderr, "Error during encoding\n");
                break;
            }

            // 将数据包时间戳从编码器时间基转换到流时间基
            pkt->stream_index = rtmp.stream->index;
            av_packet_rescale_ts(pkt, rtmp.codecCtx->time_base, rtmp.stream->time_base);
            pkt->duration = av_rescale_q(pkt->duration, rtmp.codecCtx->time_base, rtmp.stream->time_base);

            // 写入数据包到输出媒体文件
            ret = av_interleaved_write_frame(rtmp.formatCtx, pkt);
            if (ret < 0) {
                fprintf(stderr, "Error while writing audio frame\n");
                break;
            }

            // 释放数据包
            av_packet_unref(pkt);
        }
    }
    // 写入帧尾
    av_write_trailer(rtmp.formatCtx);

    if (apm) {
        delete apm;
        apm = nullptr;
    }
    pushDestory(&rtmp);
    av_packet_free(&pkt);
    av_frame_free(&inputFrame);
    av_frame_free(&outputFrame);
    av_free(listenBuffer);
    for (auto buffer: ns_out_list) {
        free(buffer.data);
    }
    return 0;
}