audio_demo/aecm_receiver.cpp

484 lines
16 KiB
C++
Raw Normal View History

2025-02-14 08:58:27 +08:00
#include <iostream>
#include <unistd.h>
#include <cmath>
#include "timing.h"
#include "log/logger.h"
#include "common.h"
#include <modules/audio_processing/include/audio_processing.h>
#include <modules/audio_processing/include/config.h>
#include <thread>
#include <mutex>
#include "alsa_dev.h"
using namespace std;
using namespace toolkit;
#define MIX_INPUT_CHANNELS 2
#define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000)
#define MIX_INPUT_SAMPLE_RATE 44100
struct audio_buf_t
{
uint8_t* data;
int index;
int size;
};
struct RtmpConfig {
char url[1024];
AVFormatContext *formatCtx;
AVStream *stream;
AVCodecContext *codecCtx;
SwrContext *swrCtx;
std::thread *thread;
std::mutex *mutex;
bool quit;
};
static SampleInfo kPcmSampleInfo;
//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量db
#define MIN_DB (-10)
#define MAX_DB (20)
// 最大/小音量: 0: 静音; 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)
static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
int scaler[MAX_VOLUME + 1]; // 音量表
int zeroDb; // 0db在scaler中的索引
// 自定义需要调节的音量
int micVolume;
VolumeCtlUnit() {
// 音量控制器初始化
zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
micVolume = 100;
}
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;
static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
double tabdb[MAX_VOLUME + 1];
double tabf [MAX_VOLUME + 1];
int z, i;
for (i = 0; i < (MAX_VOLUME + 1); i++) {
// (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
// dB = 20 * log(A1 / A2)当A1A2相等时db为0
// 这里以(1 << 14)作为原始声音振幅,得到调节后的振幅(A1),将A1存入音量表中
tabf [i] = pow(10.0, tabdb[i] / 20.0);
scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
}
z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
z = MAX(z, 0 );
z = MIN(z, MAX_VOLUME);
scaler[0] = 0; // 音量表中0标识静音
scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db即不做增益处理
return z;
}
static void vol_scaler_run(int16_t *buf, int n, int volume)
{
/* 简易版
while (n--) {
*buf = (*buf) * multiplier / 100.0;
*buf = std::max((int)*buf, -0x7fff);
*buf = std::min((int)*buf, 0x7fff);
buf++;
}
*/
int multiplier = kVolCtrlUnit.scaler[volume];
if (multiplier > (1 << 14)) {
int32_t v;
while (n--) {
v = ((int32_t)*buf * multiplier) >> 14;
v = MAX(v,-0x7fff);
v = MIN(v, 0x7fff);
*buf++ = (int16_t)v;
}
} else if (multiplier < (1 << 14)) {
while (n--) {
*buf = ((int32_t)*buf * multiplier) >> 14;
buf++;
}
}
}
//----------------------------------------------
webrtc::AudioProcessing::Config webtcConfigInit()
{
webrtc::AudioProcessing::Config apmConfig;
apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE;
apmConfig.pipeline.multi_channel_capture = true;
apmConfig.pipeline.multi_channel_render = true;
//PreAmplifier
apmConfig.pre_amplifier.enabled = false;
apmConfig.pre_amplifier.fixed_gain_factor = 0.7f;
//HighPassFilter
apmConfig.high_pass_filter.enabled = false;
apmConfig.high_pass_filter.apply_in_full_band = false;
//EchoCanceller
apmConfig.echo_canceller.enabled = false;
apmConfig.echo_canceller.mobile_mode = false;
apmConfig.echo_canceller.export_linear_aec_output = false;
apmConfig.echo_canceller.enforce_high_pass_filtering = true;
//NoiseSuppression
apmConfig.noise_suppression.enabled = true;
apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false;
//TransientSuppression
apmConfig.transient_suppression.enabled = false;
//VoiceDetection
apmConfig.voice_detection.enabled = true;
//GainController1
apmConfig.gain_controller1.enabled = true;
apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog;
apmConfig.gain_controller1.target_level_dbfs = 3;
apmConfig.gain_controller1.compression_gain_db = 12;
apmConfig.gain_controller1.enable_limiter = true;
apmConfig.gain_controller1.analog_level_minimum = 0;
apmConfig.gain_controller1.analog_level_maximum = 496;
apmConfig.gain_controller1.analog_gain_controller.enabled = true;
apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume;
apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin;
apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false;
apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true;
//GainController2
apmConfig.gain_controller2.enabled = false;
apmConfig.gain_controller2.fixed_digital.gain_db = 0.f;
apmConfig.gain_controller2.adaptive_digital.enabled = false;
apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f;
apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms;
apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true;
apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f;
apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f;
apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f;
apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f;
//ResidualEchoDetector
apmConfig.residual_echo_detector.enabled = false;
//LevelEstimation
apmConfig.level_estimation.enabled = false;
return apmConfig;
}
void pullDestory(RtmpConfig *config)
{
if (config->formatCtx)
avformat_close_input(&config->formatCtx);
if (config->codecCtx) {
avcodec_close(config->codecCtx);
avcodec_free_context(&config->codecCtx);
}
if (config->swrCtx) {
swr_close(config->swrCtx);
swr_free(&config->swrCtx);
}
}
int pullInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate)
{
if (nullptr == strstr(config->url, "rtmp://")) {
LogE("url error, url: %s\n", config->url);
return -1;
}
int ret = 0;
int scan_all_pmts_set = 0;
int st_index = -1;
AVDictionary *format_opts = nullptr;
AVFormatContext *ic = nullptr;
AVCodecParameters *codecPar = nullptr;
AVCodec *codec = nullptr;
AVCodecContext *codecCtx = nullptr;
SwrContext *swrCtx = nullptr;
ic = avformat_alloc_context();
if (!ic) {
throw(std::runtime_error("avformat_alloc_context failed."));
}
if (!av_dict_get(format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) {
av_dict_set(&format_opts, "scan_all_pmts", "1", AV_DICT_DONT_OVERWRITE);
scan_all_pmts_set = 1;
}
// 禁用缓冲
av_dict_set(&format_opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE);
// 设置媒体流分析最大字节数
av_dict_set(&format_opts, "probesize", "10000", AV_DICT_MATCH_CASE);
retry:
// 打开输入流
ret = avformat_open_input(&ic, config->url, nullptr, &format_opts);
if (ret < 0) {
LogE("avformat_open_input failed.\n");
goto fail;
}
if (scan_all_pmts_set)
av_dict_set(&format_opts, "scan_all_pmts", nullptr, AV_DICT_MATCH_CASE);
av_format_inject_global_side_data(ic);
ret = avformat_find_stream_info(ic, nullptr);
if (ret < 0) {
// LOG(ERROR) << url << ": could not find codec parameters";
LogE("{} : could not find codec parameters\n", config->url);
goto fail;
}
if (ic->pb)
ic->pb->eof_reached = 0;
// 打印输入流参数
av_dump_format(ic, 0, config->url, 0);
st_index = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
if (st_index >= 0) {
//
config->stream = ic->streams[st_index];
}
else {
LogW("find audio stream failed, try again.\n");
avformat_close_input(&ic);
goto retry;
}
// 初始化解码器
codecPar = config->stream->codecpar;
codec = avcodec_find_decoder(codecPar->codec_id);
if (!codec) {
LogE("find codec failed.\n");
goto fail;
}
codecCtx = avcodec_alloc_context3(codec);
if (!codecCtx) {
LogE("avcodec_alloc_context3 failed.\n");
goto fail;
}
ret = avcodec_parameters_to_context(codecCtx, codecPar);
if (ret < 0) {
LogE("avcodec_parameters_to_context\n");
goto fail;
}
codecCtx->time_base = config->stream->time_base;
// 打开解码器
if (avcodec_open2(codecCtx, codec, nullptr) < 0){
LogE("avcodec_open2 failed\n");
goto fail;
}
// 重采样初始化
swrCtx = swr_alloc_set_opts(nullptr,
av_get_default_channel_layout(channels),
format,
sample_rate,
codecCtx->channel_layout,
codecCtx->sample_fmt,
codecCtx->sample_rate,
0, nullptr);
if (!swrCtx) {
LogE("swr_alloc_set_opts failed.\n");
goto fail;
}
swr_init(swrCtx);
config->formatCtx = ic;
config->codecCtx = codecCtx;
config->swrCtx = swrCtx;
config->stream->discard = AVDISCARD_DEFAULT;
av_dict_free(&format_opts);
return 0;
fail:
if (format_opts)
av_dict_free(&format_opts);
if (ic)
avformat_close_input(&ic);
if (codecCtx) {
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
}
if (swrCtx) {
swr_close(swrCtx);
swr_free(&swrCtx);
}
return -1;
}
void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list,
webrtc::AudioProcessing *apm, alsa::AlsaDev* play);
int main(int argc, char *argv[])
{
if (argc < 3) {
fprintf(stderr, "usage %s card_num url\n", argv[0]);
return -1;
}
//初始化日志系统
Logger::Instance().add(std::make_shared<ConsoleChannel> ());
Logger::Instance().add(std::make_shared<FileChannel>());
Logger::Instance().setWriter(std::make_shared<AsyncLogWriter>());
// 初始化声卡设备
int card = atoi(argv[1]);
alsa::Config alsaConfig;
alsaConfig.period_time = 10000;
alsaConfig.buffer_time = 50000;
alsaConfig.channels = MIX_INPUT_CHANNELS;
alsaConfig.format = SND_PCM_FORMAT_S16_LE;
alsaConfig.rate = MIX_INPUT_SAMPLE_RATE;
if (card < 0)
sprintf(alsaConfig.device, "default");
else
sprintf(alsaConfig.device, "plughw:%d", card);
alsa::AlsaDev usbPlaybackDev;
if (usbPlaybackDev.applyConfig(alsaConfig) < 0) {
PrintE("alsa config failed.\n");
return -1;
}
// PrintI("alsa before init: %s\n", usbPlaybackDev.configToString());
if (usbPlaybackDev.init(SND_PCM_STREAM_PLAYBACK) < 0) {
PrintE("alsa init failed.\n");
return -1;
}
PrintI("alsa init: %s\n", usbPlaybackDev.configToString());
// webrtc初始化
webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create();
if (!apm) {
LogI("create apm failed.\n");
return -1;
}
webrtc::AudioProcessing::Config apmConfig = webtcConfigInit();
apm->ApplyConfig(apmConfig);
apm->Initialize();
apm->set_stream_analog_level(408);
LogI("webrtc params: {\n%s\n}\n", apmConfig.ToString().c_str());
// 拉流初始化
RtmpConfig rtmp;
memset(&rtmp, 0, sizeof(rtmp));
strcpy(rtmp.url, argv[2]);
if (pullInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) {
return -1;
}
AVPacket *pkt = av_packet_alloc();
AVFrame *outputFrame = av_frame_alloc();
int maxBuffSize = 1024 * 4 * 2;
uint8_t *swrBuffer = (uint8_t *)calloc(maxBuffSize, sizeof(uint8_t));
int ret;
std::vector<audio_buf_t> swr_list;
rtmp.mutex = new std::mutex;
rtmp.thread = new std::thread(playbackLoop, &rtmp, &swr_list, apm, &usbPlaybackDev);
rtmp.quit = false;
while (true)
{
if (av_read_frame(rtmp.formatCtx, pkt) >= 0 &&
pkt->stream_index == rtmp.stream->index) {
ret = avcodec_send_packet(rtmp.codecCtx, pkt);
if (ret == AVERROR(EAGAIN)) {
LogW("send packet again.\n");
av_usleep(10*1000);
continue;
}
else if (ret < 0) {
LogE("send packet error ret={}\n", ret);
break;
}
while ( avcodec_receive_frame(rtmp.codecCtx, outputFrame) >= 0 ) {
int outSamples = swr_convert(rtmp.swrCtx, &swrBuffer, maxBuffSize/(sizeof(int16_t) * MIX_INPUT_CHANNELS),
(uint8_t const **) (outputFrame->data), outputFrame->nb_samples);
int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
{
int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t));
memcpy(buffer, swrBuffer, size);
std::unique_lock<std::mutex> lck(*rtmp.mutex);
audio_buf_t out;
out.data = buffer;
out.index = 0;
out.size = size;
swr_list.emplace_back(out);
// if (out_fp) fwrite(buffer, 1, size, out_fp);
}
}
av_frame_unref(outputFrame);
}
av_packet_unref(pkt);
}
if (apm) {
delete apm;
apm = nullptr;
}
pullDestory(&rtmp);
return 0;
}
void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list, webrtc::AudioProcessing *apm, alsa::AlsaDev* play)
{
//
webrtc::StreamConfig playConfig;
playConfig.set_has_keyboard(false);
playConfig.set_num_channels(kPcmSampleInfo.channels);
playConfig.set_sample_rate_hz(kPcmSampleInfo.sample_rate);
int sampleSize = 0;
int outSize = MIX_INPUT_SAMPLES * MIX_INPUT_CHANNELS * sizeof(int16_t);
uint8_t *outBuffer = (uint8_t *)calloc(outSize, sizeof(uint8_t));
// FILE *out_fp = fopen("/root/swr_out.pcm", "wb");
while (!rtmp->quit) {
// 获取 MIX_INPUT_SAMPLES 长度的解码音频填充到outBuffer中
sampleSize = outSize;
while (sampleSize > 0)
{
if (list->size() <= 0) {
av_usleep(1000);
continue;
}
std::unique_lock<std::mutex> lck(*rtmp->mutex);
auto data = list->begin();
int readSize = sampleSize < (data->size - data->index) ? sampleSize : (data->size - data->index);
memcpy(outBuffer + outSize - sampleSize, data->data + data->index, readSize);
sampleSize -= readSize;
data->index += readSize;
if (data->index >= data->size) {
free(data->data);
list->erase(list->begin());
}
}
// if (out_fp) fwrite(outBuffer, 1, outSize, out_fp);
// 音频处理
{
apm->ProcessStream((int16_t *)outBuffer, playConfig, playConfig, (int16_t *)outBuffer);
}
play->write(outBuffer, outSize);
}
}