audio_demo/aecm_sender.cpp
2025-02-14 08:58:27 +08:00

561 lines
20 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include <unistd.h>
#include <cmath>
#include "timing.h"
#include "log/logger.h"
#include "common.h"
#include <modules/audio_processing/include/audio_processing.h>
#include <modules/audio_processing/include/config.h>
#include "alsa_dev.h"
using namespace std;
using namespace toolkit;
#define MIX_INPUT_CHANNELS 2
#define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000)
// #define MIX_INPUT_SAMPLES 1024
#define MIX_INPUT_SAMPLE_RATE 44100
struct audio_buf_t
{
uint8_t* data;
int index;
int size;
};
struct RtmpConfig {
char url[1024];
AVFormatContext *formatCtx;
AVStream *stream;
AVCodecContext *codecCtx;
SwrContext *swrCtx;
};
static SampleInfo kPcmSampleInfo;
//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量db
#define MIN_DB (-10)
#define MAX_DB (60)
// 最大/小音量: 0: 静音; 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)
static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
int scaler[MAX_VOLUME + 1]; // 音量表
int zeroDb; // 0db在scaler中的索引
// 自定义需要调节的音量
int micVolume;
VolumeCtlUnit() {
// 音量控制器初始化
zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
micVolume = 100;
}
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;
static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
double tabdb[MAX_VOLUME + 1];
double tabf [MAX_VOLUME + 1];
int z, i;
for (i = 0; i < (MAX_VOLUME + 1); i++) {
// (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
// dB = 20 * log(A1 / A2)当A1A2相等时db为0
// 这里以(1 << 14)作为原始声音振幅,得到调节后的振幅(A1),将A1存入音量表中
tabf [i] = pow(10.0, tabdb[i] / 20.0);
scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
}
z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
z = MAX(z, 0 );
z = MIN(z, MAX_VOLUME);
scaler[0] = 0; // 音量表中0标识静音
scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db即不做增益处理
return z;
}
static void vol_scaler_run(int16_t *buf, int n, int volume)
{
/* 简易版
while (n--) {
*buf = (*buf) * multiplier / 100.0;
*buf = std::max((int)*buf, -0x7fff);
*buf = std::min((int)*buf, 0x7fff);
buf++;
}
*/
int multiplier = kVolCtrlUnit.scaler[volume];
if (multiplier > (1 << 14)) {
int32_t v;
while (n--) {
v = ((int32_t)*buf * multiplier) >> 14;
v = MAX(v,-0x7fff);
v = MIN(v, 0x7fff);
*buf++ = (int16_t)v;
}
} else if (multiplier < (1 << 14)) {
while (n--) {
*buf = ((int32_t)*buf * multiplier) >> 14;
buf++;
}
}
}
//----------------------------------------------
webrtc::AudioProcessing::Config webtcConfigInit()
{
webrtc::AudioProcessing::Config apmConfig;
apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE;
apmConfig.pipeline.multi_channel_capture = true;
apmConfig.pipeline.multi_channel_render = true;
//PreAmplifier
apmConfig.pre_amplifier.enabled = false;
apmConfig.pre_amplifier.fixed_gain_factor = 0.7f;
//HighPassFilter
apmConfig.high_pass_filter.enabled = false;
apmConfig.high_pass_filter.apply_in_full_band = false;
//EchoCanceller
apmConfig.echo_canceller.enabled = false;
apmConfig.echo_canceller.mobile_mode = false;
apmConfig.echo_canceller.export_linear_aec_output = false;
apmConfig.echo_canceller.enforce_high_pass_filtering = true;
//NoiseSuppression
apmConfig.noise_suppression.enabled = true;
apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false;
//TransientSuppression
apmConfig.transient_suppression.enabled = false;
//VoiceDetection
apmConfig.voice_detection.enabled = true;
//GainController1
apmConfig.gain_controller1.enabled = true;
// kAdaptiveAnalog 自适应模拟模式
// kAdaptiveDigital 自适应数字增益模式
// kFixedDigital 固定数字增益模式
apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital;
apmConfig.gain_controller1.target_level_dbfs = 3; // 目标音量
apmConfig.gain_controller1.compression_gain_db = 12; // 增益能力
apmConfig.gain_controller1.enable_limiter = true; // 压限器开关
apmConfig.gain_controller1.analog_level_minimum = 0;
apmConfig.gain_controller1.analog_level_maximum = 496;
apmConfig.gain_controller1.analog_gain_controller.enabled = true;
apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume;
apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin;
apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false;
apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true;
//GainController2
apmConfig.gain_controller2.enabled = false;
apmConfig.gain_controller2.fixed_digital.gain_db = 0.f;
apmConfig.gain_controller2.adaptive_digital.enabled = false;
apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f;
apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms;
apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true;
apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f;
apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f;
apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f;
apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f;
//ResidualEchoDetector
apmConfig.residual_echo_detector.enabled = false;
//LevelEstimation
apmConfig.level_estimation.enabled = false;
return apmConfig;
}
void pushDestory(RtmpConfig *config) {
if (config->formatCtx) {
if (config->formatCtx->pb)
avio_close(config->formatCtx->pb);
avformat_free_context(config->formatCtx);
}
if (config->codecCtx) {
avcodec_close(config->codecCtx);
avcodec_free_context(&config->codecCtx);
}
if (config->swrCtx) {
swr_close(config->swrCtx);
swr_free(&config->swrCtx);
}
}
int pushInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate)
{
if (nullptr == strstr(config->url, "rtmp://")) {
PrintE("url error, url: %s\n", config->url);
return -1;
}
AVCodec *codec = nullptr;
AVCodecContext *codecCtx = nullptr;
AVFormatContext *afctx = nullptr;
AVCodecParameters *codecPar = nullptr;
SwrContext *swrCtx = nullptr;
AVStream *audio_st = nullptr;
AVDictionary *opts = nullptr;
int ret;
// 打开输出流
ret = avformat_alloc_output_context2(&afctx, nullptr, "flv", config->url);
if (ret < 0) {
PrintE("open output failed.\n");
goto fail;
}
if ( !(afctx->oformat->flags & AVFMT_NOFILE) ) {
ret = avio_open(&afctx->pb, config->url, AVIO_FLAG_WRITE);
if (ret < 0) {
PrintE("avio_open failed.\n");
goto fail;
}
}
// 创建音频流
audio_st = avformat_new_stream(afctx, codec);
if (!audio_st) {
PrintE("alloc new audio stream failed.\n");
goto fail;
}
// 设置编码参数
codecPar = afctx->streams[audio_st->index]->codecpar;
codecPar->codec_id = AV_CODEC_ID_AAC;
codecPar->codec_type = AVMEDIA_TYPE_AUDIO;
codecPar->codec_tag = 0;
codecPar->bit_rate = 128 * 1024;
codecPar->sample_rate = sample_rate;
codecPar->channel_layout = av_get_default_channel_layout(channels);
codecPar->channels = av_get_channel_layout_nb_channels(codecPar->channel_layout);
codecPar->format = AV_SAMPLE_FMT_FLTP;
// 编码器初始化
codec = avcodec_find_encoder(codecPar->codec_id);
if (!codec) {
PrintE("find codec aac failed.\n");
return -1;
}
codecCtx = avcodec_alloc_context3(codec);
if (!codecCtx) {
PrintE("alloc codec context failed.\n");
goto fail;
}
ret = avcodec_parameters_to_context(codecCtx, codecPar);
if (ret < 0) {
PrintE("copt codec params failed.\n");
goto fail;
}
// 禁用缓冲
av_dict_set(&opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE);
// av_dict_set(&opts, "rtmp_live", "1", AV_DICT_MATCH_CASE);
// 打开编码器
ret = avcodec_open2(codecCtx, codec, &opts);
if (ret < 0) {
PrintE("open codec {} failed.\n", codec->id);
goto fail;
}
audio_st->codecpar->codec_tag = 0;
// 释放字典资源
av_dict_free(&opts);
// 打印输出流信息
av_dump_format(afctx, 0, config->url, 1);
// 重采样初始化
swrCtx = swr_alloc_set_opts(nullptr,
codecCtx->channel_layout,
codecCtx->sample_fmt,
codecCtx->sample_rate,
av_get_default_channel_layout(channels),
format,
sample_rate,
0, nullptr);
if (!swrCtx) {
PrintE("swr_alloc_set_opts failed.\n");
goto fail;
}
swr_init(swrCtx);
config->codecCtx = codecCtx;
config->formatCtx = afctx;
config->stream = audio_st;
config->swrCtx = swrCtx;
PrintI("rtmp push init ok.\n");
return 0;
fail:
if (afctx) {
if (afctx->pb)
avio_close(afctx->pb);
avformat_free_context(afctx);
}
if (codecCtx) {
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
}
if (swrCtx) {
swr_close(swrCtx);
swr_free(&swrCtx);
}
return -1;
}
int main(int argc, char *argv[])
{
if (argc < 4) {
fprintf(stderr, "usage %s card_num left(0) or right(1) url \n", argv[0]);
return -1;
}
//初始化日志系统
Logger::Instance().add(std::make_shared<ConsoleChannel> ());
Logger::Instance().add(std::make_shared<FileChannel>());
Logger::Instance().setWriter(std::make_shared<AsyncLogWriter>());
// 初始化声卡设备
int card = atoi(argv[1]);
int type = atoi(argv[2]);
alsa::Config alsaConfig;
alsaConfig.period_time = MIX_INPUT_SAMPLES * 1000000 / MIX_INPUT_SAMPLE_RATE;
alsaConfig.buffer_time = 5 * alsaConfig.period_time;
alsaConfig.channels = MIX_INPUT_CHANNELS;
alsaConfig.format = SND_PCM_FORMAT_S16_LE;
alsaConfig.rate = MIX_INPUT_SAMPLE_RATE;
if (card < 0)
sprintf(alsaConfig.device, "default");
else
sprintf(alsaConfig.device, "plughw:%d", card);
alsa::AlsaDev usbCaptureDev;
if (usbCaptureDev.applyConfig(alsaConfig) < 0) {
PrintE("alsa config failed.\n");
return -1;
}
PrintI("alsa before init: %s\n", usbCaptureDev.configToString());
if (usbCaptureDev.init(SND_PCM_STREAM_CAPTURE) < 0) {
PrintE("alsa init failed.\n");
return -1;
}
PrintI("alsa init: %s\n", usbCaptureDev.configToString());
// webrtc初始化
webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create();
if (!apm) {
PrintI("create apm failed.\n");
return -1;
}
webrtc::AudioProcessing::Config config = webtcConfigInit();
apm->ApplyConfig(config);
apm->Initialize();
apm->set_stream_analog_level(408);
webrtc::StreamConfig capConfig;
capConfig.set_has_keyboard(false);
capConfig.set_num_channels(alsaConfig.channels);
capConfig.set_sample_rate_hz(alsaConfig.rate);
PrintI("webrtc params: {\n%s\n}\n", config.ToString().c_str());
int ret = 0;
uint8_t *capData = nullptr;
int buffer_size = usbCaptureDev.getFrames() * usbCaptureDev.getFrameSize();
capData = (uint8_t *)malloc(buffer_size);
assert(capData);
uint8_t *listenBuffer = nullptr;
size_t listenSize = buffer_size;
listenBuffer = (uint8_t*)malloc(listenSize);
assert(listenBuffer);
// 推流初始化
RtmpConfig rtmp;
AVRational av;
int64_t pts = 0;
AVPacket *pkt = av_packet_alloc();
memset(&rtmp, 0, sizeof(rtmp));
strcpy(rtmp.url, argv[3]);
if (pushInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) {
return -1;
}
av.den = rtmp.codecCtx->sample_rate;
av.num = 1;
AVFrame *inputFrame = av_frame_alloc();
{
inputFrame->sample_rate = rtmp.codecCtx->sample_rate;
inputFrame->format = rtmp.codecCtx->sample_fmt;
inputFrame->channels = rtmp.codecCtx->channels;
inputFrame->nb_samples = 1024;
inputFrame->channel_layout = rtmp.codecCtx->channel_layout;
int size = av_samples_get_buffer_size(nullptr,
rtmp.codecCtx->channels, inputFrame->nb_samples, rtmp.codecCtx->sample_fmt, 1);
uint8_t *buffer = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(inputFrame, inputFrame->channels, rtmp.codecCtx->sample_fmt,
(const uint8_t*)buffer, size, 1);
}
AVFrame *outputFrame = av_frame_alloc();
{
outputFrame->format = rtmp.codecCtx->sample_fmt;
outputFrame->channel_layout = rtmp.codecCtx->channel_layout;
outputFrame->sample_rate = rtmp.codecCtx->sample_rate;
outputFrame->nb_samples = rtmp.codecCtx->frame_size;
int output_bz = av_samples_get_buffer_size(NULL, rtmp.codecCtx->channels, rtmp.codecCtx->frame_size, rtmp.codecCtx->sample_fmt, 0);
uint8_t *samples_data = (uint8_t *)av_malloc(output_bz);
avcodec_fill_audio_frame(outputFrame, rtmp.codecCtx->channels, rtmp.codecCtx->sample_fmt, samples_data, output_bz, 0);
}
// 写入帧头
ret = avformat_write_header(rtmp.formatCtx, nullptr);
if (ret < 0) {
PrintE("avformat_write_header failed.\n");
return ret;
}
FILE *ns_fp = fopen("/root/ns_out.pcm", "wb");
FILE *input_fp = fopen("/root/input_out.pcm", "wb");
std::vector<audio_buf_t> ns_out_list;
int frames = 0;
PrintI("input frame samples=%d, output frame samples=%d\n", inputFrame->nb_samples, outputFrame->nb_samples);
PrintI("------------------ start ------------------\n");
while (true)
{
// 采集
size_t read_size = usbCaptureDev.read(capData, buffer_size);
// PrintI("alsa read %d\n", read_size);
if (read_size <= 0) {
msleep(1);
continue;
}
// 分流
if (type >= 0) {
uint8_t *ptr = capData;
for (int i = 0; i < usbCaptureDev.getFrames(); ++i) {
// int size = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
int size = usbCaptureDev.getSampleSize();
if (type == 0) {
// 左声道 (2.4G)
memcpy(listenBuffer + i * 2 * size, ptr + i * 2 * size, size);
memcpy(listenBuffer + (i * 2 + 1) * size, ptr + i * 2 * size, size);
} else {
// 右声道 (LINE IN)
memcpy(listenBuffer + i * 2 * size, ptr + (i * 2 + 1) * size, size);
memcpy(listenBuffer + (i * 2 + 1) * size, ptr + (i * 2 + 1) * size, size);
}
}
} else {
memcpy(listenBuffer, capData, read_size * usbCaptureDev.getFrameSize());
}
// vol_scaler_run((int16_t *)listenBuffer, capData->GetSamples() * kPcmSampleInfo.channels, kVolCtrlUnit.micVolume);
// 降噪
apm->ProcessStream((int16_t *)listenBuffer, capConfig, capConfig, (int16_t *)listenBuffer);
{
uint8_t *buffer = (uint8_t *)malloc(listenSize);
memcpy(buffer, listenBuffer, listenSize);
audio_buf_t out;
out.data = buffer;
out.index = 0;
out.size = listenSize;
ns_out_list.emplace_back(out);
}
// 填充音频
if (frames <= 0) frames = inputFrame->nb_samples;
while (frames > 0 && ns_out_list.size() > 0)
{
auto nsData = ns_out_list.begin();
int needSize = frames * sizeof(int16_t) * inputFrame->channels;
int readSize = (nsData->size - nsData->index) >= needSize ? needSize : (nsData->size - nsData->index);
memcpy(inputFrame->data[0] + (inputFrame->nb_samples - frames)*sizeof(int16_t)*inputFrame->channels, nsData->data + nsData->index, readSize);
frames -= readSize/(sizeof(int16_t) * inputFrame->channels);
nsData->index += readSize;
if (nsData->index >= nsData->size) {
free(nsData->data);
ns_out_list.erase(ns_out_list.begin());
}
}
if (frames > 0) continue;
// if (input_fp) fwrite(inputFrame->data[0], 1, inputFrame->nb_samples * inputFrame->channels * sizeof(int16_t), input_fp);
// 重采样
{
const uint8_t** in = (const uint8_t**)inputFrame->data;
uint8_t **out = outputFrame->data;
int len2, out_data_size;
len2 = swr_convert(rtmp.swrCtx, out, outputFrame->nb_samples, in, inputFrame->nb_samples);
if (len2 < 0) {
printf("swr_convert failed. \n");
break;
}
out_data_size = len2 * rtmp.codecCtx->channels * av_get_bytes_per_sample(rtmp.codecCtx->sample_fmt);
// if (ns_fp) fwrite(outputFrame->data[0], 1, out_data_size, ns_fp);
}
// 推流到远端
if (pts > INT64_MAX) pts = 0;
outputFrame->pts = pts;
pts += av_rescale_q(outputFrame->nb_samples, av, rtmp.codecCtx->time_base);
ret = avcodec_send_frame(rtmp.codecCtx, outputFrame);
if (ret < 0) {
PrintE("avcodec_send_frame failed: %d\n", ret);
break;
}
while (ret >= 0) {
ret = avcodec_receive_packet(rtmp.codecCtx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
break;
}
// 将数据包时间戳从编码器时间基转换到流时间基
pkt->stream_index = rtmp.stream->index;
av_packet_rescale_ts(pkt, rtmp.codecCtx->time_base, rtmp.stream->time_base);
pkt->duration = av_rescale_q(pkt->duration, rtmp.codecCtx->time_base, rtmp.stream->time_base);
// 写入数据包到输出媒体文件
ret = av_interleaved_write_frame(rtmp.formatCtx, pkt);
if (ret < 0) {
fprintf(stderr, "Error while writing audio frame\n");
break;
}
// 释放数据包
av_packet_unref(pkt);
}
}
// 写入帧尾
av_write_trailer(rtmp.formatCtx);
if (apm) {
delete apm;
apm = nullptr;
}
pushDestory(&rtmp);
av_packet_free(&pkt);
av_frame_free(&inputFrame);
av_frame_free(&outputFrame);
av_free(listenBuffer);
for (auto buffer: ns_out_list) {
free(buffer.data);
}
return 0;
}