audio_demo/aecm_receiver.cpp
2025-02-14 08:58:27 +08:00

484 lines
16 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include <unistd.h>
#include <cmath>
#include "timing.h"
#include "log/logger.h"
#include "common.h"
#include <modules/audio_processing/include/audio_processing.h>
#include <modules/audio_processing/include/config.h>
#include <thread>
#include <mutex>
#include "alsa_dev.h"
using namespace std;
using namespace toolkit;
#define MIX_INPUT_CHANNELS 2
#define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000)
#define MIX_INPUT_SAMPLE_RATE 44100
struct audio_buf_t
{
uint8_t* data;
int index;
int size;
};
struct RtmpConfig {
char url[1024];
AVFormatContext *formatCtx;
AVStream *stream;
AVCodecContext *codecCtx;
SwrContext *swrCtx;
std::thread *thread;
std::mutex *mutex;
bool quit;
};
static SampleInfo kPcmSampleInfo;
//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量db
#define MIN_DB (-10)
#define MAX_DB (20)
// 最大/小音量: 0: 静音; 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)
static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
int scaler[MAX_VOLUME + 1]; // 音量表
int zeroDb; // 0db在scaler中的索引
// 自定义需要调节的音量
int micVolume;
VolumeCtlUnit() {
// 音量控制器初始化
zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
micVolume = 100;
}
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;
static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
double tabdb[MAX_VOLUME + 1];
double tabf [MAX_VOLUME + 1];
int z, i;
for (i = 0; i < (MAX_VOLUME + 1); i++) {
// (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
// dB = 20 * log(A1 / A2)当A1A2相等时db为0
// 这里以(1 << 14)作为原始声音振幅,得到调节后的振幅(A1),将A1存入音量表中
tabf [i] = pow(10.0, tabdb[i] / 20.0);
scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
}
z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
z = MAX(z, 0 );
z = MIN(z, MAX_VOLUME);
scaler[0] = 0; // 音量表中0标识静音
scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db即不做增益处理
return z;
}
static void vol_scaler_run(int16_t *buf, int n, int volume)
{
/* 简易版
while (n--) {
*buf = (*buf) * multiplier / 100.0;
*buf = std::max((int)*buf, -0x7fff);
*buf = std::min((int)*buf, 0x7fff);
buf++;
}
*/
int multiplier = kVolCtrlUnit.scaler[volume];
if (multiplier > (1 << 14)) {
int32_t v;
while (n--) {
v = ((int32_t)*buf * multiplier) >> 14;
v = MAX(v,-0x7fff);
v = MIN(v, 0x7fff);
*buf++ = (int16_t)v;
}
} else if (multiplier < (1 << 14)) {
while (n--) {
*buf = ((int32_t)*buf * multiplier) >> 14;
buf++;
}
}
}
//----------------------------------------------
webrtc::AudioProcessing::Config webtcConfigInit()
{
webrtc::AudioProcessing::Config apmConfig;
apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE;
apmConfig.pipeline.multi_channel_capture = true;
apmConfig.pipeline.multi_channel_render = true;
//PreAmplifier
apmConfig.pre_amplifier.enabled = false;
apmConfig.pre_amplifier.fixed_gain_factor = 0.7f;
//HighPassFilter
apmConfig.high_pass_filter.enabled = false;
apmConfig.high_pass_filter.apply_in_full_band = false;
//EchoCanceller
apmConfig.echo_canceller.enabled = false;
apmConfig.echo_canceller.mobile_mode = false;
apmConfig.echo_canceller.export_linear_aec_output = false;
apmConfig.echo_canceller.enforce_high_pass_filtering = true;
//NoiseSuppression
apmConfig.noise_suppression.enabled = true;
apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false;
//TransientSuppression
apmConfig.transient_suppression.enabled = false;
//VoiceDetection
apmConfig.voice_detection.enabled = true;
//GainController1
apmConfig.gain_controller1.enabled = true;
apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog;
apmConfig.gain_controller1.target_level_dbfs = 3;
apmConfig.gain_controller1.compression_gain_db = 12;
apmConfig.gain_controller1.enable_limiter = true;
apmConfig.gain_controller1.analog_level_minimum = 0;
apmConfig.gain_controller1.analog_level_maximum = 496;
apmConfig.gain_controller1.analog_gain_controller.enabled = true;
apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume;
apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin;
apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false;
apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true;
//GainController2
apmConfig.gain_controller2.enabled = false;
apmConfig.gain_controller2.fixed_digital.gain_db = 0.f;
apmConfig.gain_controller2.adaptive_digital.enabled = false;
apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f;
apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms;
apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true;
apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f;
apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f;
apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1;
apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f;
apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f;
//ResidualEchoDetector
apmConfig.residual_echo_detector.enabled = false;
//LevelEstimation
apmConfig.level_estimation.enabled = false;
return apmConfig;
}
void pullDestory(RtmpConfig *config)
{
if (config->formatCtx)
avformat_close_input(&config->formatCtx);
if (config->codecCtx) {
avcodec_close(config->codecCtx);
avcodec_free_context(&config->codecCtx);
}
if (config->swrCtx) {
swr_close(config->swrCtx);
swr_free(&config->swrCtx);
}
}
int pullInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate)
{
if (nullptr == strstr(config->url, "rtmp://")) {
LogE("url error, url: %s\n", config->url);
return -1;
}
int ret = 0;
int scan_all_pmts_set = 0;
int st_index = -1;
AVDictionary *format_opts = nullptr;
AVFormatContext *ic = nullptr;
AVCodecParameters *codecPar = nullptr;
AVCodec *codec = nullptr;
AVCodecContext *codecCtx = nullptr;
SwrContext *swrCtx = nullptr;
ic = avformat_alloc_context();
if (!ic) {
throw(std::runtime_error("avformat_alloc_context failed."));
}
if (!av_dict_get(format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) {
av_dict_set(&format_opts, "scan_all_pmts", "1", AV_DICT_DONT_OVERWRITE);
scan_all_pmts_set = 1;
}
// 禁用缓冲
av_dict_set(&format_opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE);
// 设置媒体流分析最大字节数
av_dict_set(&format_opts, "probesize", "10000", AV_DICT_MATCH_CASE);
retry:
// 打开输入流
ret = avformat_open_input(&ic, config->url, nullptr, &format_opts);
if (ret < 0) {
LogE("avformat_open_input failed.\n");
goto fail;
}
if (scan_all_pmts_set)
av_dict_set(&format_opts, "scan_all_pmts", nullptr, AV_DICT_MATCH_CASE);
av_format_inject_global_side_data(ic);
ret = avformat_find_stream_info(ic, nullptr);
if (ret < 0) {
// LOG(ERROR) << url << ": could not find codec parameters";
LogE("{} : could not find codec parameters\n", config->url);
goto fail;
}
if (ic->pb)
ic->pb->eof_reached = 0;
// 打印输入流参数
av_dump_format(ic, 0, config->url, 0);
st_index = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
if (st_index >= 0) {
//
config->stream = ic->streams[st_index];
}
else {
LogW("find audio stream failed, try again.\n");
avformat_close_input(&ic);
goto retry;
}
// 初始化解码器
codecPar = config->stream->codecpar;
codec = avcodec_find_decoder(codecPar->codec_id);
if (!codec) {
LogE("find codec failed.\n");
goto fail;
}
codecCtx = avcodec_alloc_context3(codec);
if (!codecCtx) {
LogE("avcodec_alloc_context3 failed.\n");
goto fail;
}
ret = avcodec_parameters_to_context(codecCtx, codecPar);
if (ret < 0) {
LogE("avcodec_parameters_to_context\n");
goto fail;
}
codecCtx->time_base = config->stream->time_base;
// 打开解码器
if (avcodec_open2(codecCtx, codec, nullptr) < 0){
LogE("avcodec_open2 failed\n");
goto fail;
}
// 重采样初始化
swrCtx = swr_alloc_set_opts(nullptr,
av_get_default_channel_layout(channels),
format,
sample_rate,
codecCtx->channel_layout,
codecCtx->sample_fmt,
codecCtx->sample_rate,
0, nullptr);
if (!swrCtx) {
LogE("swr_alloc_set_opts failed.\n");
goto fail;
}
swr_init(swrCtx);
config->formatCtx = ic;
config->codecCtx = codecCtx;
config->swrCtx = swrCtx;
config->stream->discard = AVDISCARD_DEFAULT;
av_dict_free(&format_opts);
return 0;
fail:
if (format_opts)
av_dict_free(&format_opts);
if (ic)
avformat_close_input(&ic);
if (codecCtx) {
avcodec_close(codecCtx);
avcodec_free_context(&codecCtx);
}
if (swrCtx) {
swr_close(swrCtx);
swr_free(&swrCtx);
}
return -1;
}
void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list,
webrtc::AudioProcessing *apm, alsa::AlsaDev* play);
int main(int argc, char *argv[])
{
if (argc < 3) {
fprintf(stderr, "usage %s card_num url\n", argv[0]);
return -1;
}
//初始化日志系统
Logger::Instance().add(std::make_shared<ConsoleChannel> ());
Logger::Instance().add(std::make_shared<FileChannel>());
Logger::Instance().setWriter(std::make_shared<AsyncLogWriter>());
// 初始化声卡设备
int card = atoi(argv[1]);
alsa::Config alsaConfig;
alsaConfig.period_time = 10000;
alsaConfig.buffer_time = 50000;
alsaConfig.channels = MIX_INPUT_CHANNELS;
alsaConfig.format = SND_PCM_FORMAT_S16_LE;
alsaConfig.rate = MIX_INPUT_SAMPLE_RATE;
if (card < 0)
sprintf(alsaConfig.device, "default");
else
sprintf(alsaConfig.device, "plughw:%d", card);
alsa::AlsaDev usbPlaybackDev;
if (usbPlaybackDev.applyConfig(alsaConfig) < 0) {
PrintE("alsa config failed.\n");
return -1;
}
// PrintI("alsa before init: %s\n", usbPlaybackDev.configToString());
if (usbPlaybackDev.init(SND_PCM_STREAM_PLAYBACK) < 0) {
PrintE("alsa init failed.\n");
return -1;
}
PrintI("alsa init: %s\n", usbPlaybackDev.configToString());
// webrtc初始化
webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create();
if (!apm) {
LogI("create apm failed.\n");
return -1;
}
webrtc::AudioProcessing::Config apmConfig = webtcConfigInit();
apm->ApplyConfig(apmConfig);
apm->Initialize();
apm->set_stream_analog_level(408);
LogI("webrtc params: {\n%s\n}\n", apmConfig.ToString().c_str());
// 拉流初始化
RtmpConfig rtmp;
memset(&rtmp, 0, sizeof(rtmp));
strcpy(rtmp.url, argv[2]);
if (pullInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) {
return -1;
}
AVPacket *pkt = av_packet_alloc();
AVFrame *outputFrame = av_frame_alloc();
int maxBuffSize = 1024 * 4 * 2;
uint8_t *swrBuffer = (uint8_t *)calloc(maxBuffSize, sizeof(uint8_t));
int ret;
std::vector<audio_buf_t> swr_list;
rtmp.mutex = new std::mutex;
rtmp.thread = new std::thread(playbackLoop, &rtmp, &swr_list, apm, &usbPlaybackDev);
rtmp.quit = false;
while (true)
{
if (av_read_frame(rtmp.formatCtx, pkt) >= 0 &&
pkt->stream_index == rtmp.stream->index) {
ret = avcodec_send_packet(rtmp.codecCtx, pkt);
if (ret == AVERROR(EAGAIN)) {
LogW("send packet again.\n");
av_usleep(10*1000);
continue;
}
else if (ret < 0) {
LogE("send packet error ret={}\n", ret);
break;
}
while ( avcodec_receive_frame(rtmp.codecCtx, outputFrame) >= 0 ) {
int outSamples = swr_convert(rtmp.swrCtx, &swrBuffer, maxBuffSize/(sizeof(int16_t) * MIX_INPUT_CHANNELS),
(uint8_t const **) (outputFrame->data), outputFrame->nb_samples);
int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
{
int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t));
memcpy(buffer, swrBuffer, size);
std::unique_lock<std::mutex> lck(*rtmp.mutex);
audio_buf_t out;
out.data = buffer;
out.index = 0;
out.size = size;
swr_list.emplace_back(out);
// if (out_fp) fwrite(buffer, 1, size, out_fp);
}
}
av_frame_unref(outputFrame);
}
av_packet_unref(pkt);
}
if (apm) {
delete apm;
apm = nullptr;
}
pullDestory(&rtmp);
return 0;
}
void playbackLoop(RtmpConfig *rtmp, std::vector<audio_buf_t> *list, webrtc::AudioProcessing *apm, alsa::AlsaDev* play)
{
//
webrtc::StreamConfig playConfig;
playConfig.set_has_keyboard(false);
playConfig.set_num_channels(kPcmSampleInfo.channels);
playConfig.set_sample_rate_hz(kPcmSampleInfo.sample_rate);
int sampleSize = 0;
int outSize = MIX_INPUT_SAMPLES * MIX_INPUT_CHANNELS * sizeof(int16_t);
uint8_t *outBuffer = (uint8_t *)calloc(outSize, sizeof(uint8_t));
// FILE *out_fp = fopen("/root/swr_out.pcm", "wb");
while (!rtmp->quit) {
// 获取 MIX_INPUT_SAMPLES 长度的解码音频填充到outBuffer中
sampleSize = outSize;
while (sampleSize > 0)
{
if (list->size() <= 0) {
av_usleep(1000);
continue;
}
std::unique_lock<std::mutex> lck(*rtmp->mutex);
auto data = list->begin();
int readSize = sampleSize < (data->size - data->index) ? sampleSize : (data->size - data->index);
memcpy(outBuffer + outSize - sampleSize, data->data + data->index, readSize);
sampleSize -= readSize;
data->index += readSize;
if (data->index >= data->size) {
free(data->data);
list->erase(list->begin());
}
}
// if (out_fp) fwrite(outBuffer, 1, outSize, out_fp);
// 音频处理
{
apm->ProcessStream((int16_t *)outBuffer, playConfig, playConfig, (int16_t *)outBuffer);
}
play->write(outBuffer, outSize);
}
}