audio_demo/main.cpp

430 lines
14 KiB
C++
Raw Permalink Normal View History

2025-02-14 08:58:27 +08:00
#include <iostream>
#include "ns/noise_suppressor.h"
#include "common.h"
#include <cmath>
#include "agcm.h"
#define MIX_INPUT_CHANNELS 2
// #define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000)
#define MIX_INPUT_SAMPLES 1440
// #define MIX_INPUT_SAMPLES 1024
#define MIX_INPUT_SAMPLE_RATE 48000
//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量db
#define MIN_DB (-10)
#define MAX_DB (60)
// 最大/小音量: 0: 静音; 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)
static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
int scaler[MAX_VOLUME + 1]; // 音量表
int zeroDb; // 0db在scaler中的索引
// 自定义需要调节的音量
int micVolume;
VolumeCtlUnit() {
// 音量控制器初始化
zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
micVolume = 100;
}
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;
static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
double tabdb[MAX_VOLUME + 1];
double tabf [MAX_VOLUME + 1];
int z, i;
for (i = 0; i < (MAX_VOLUME + 1); i++) {
// (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
// dB = 20 * log(A1 / A2)当A1A2相等时db为0
// 这里以(1 << 14)作为原始声音振幅,得到调节后的振幅(A1),将A1存入音量表中
tabf [i] = pow(10.0, tabdb[i] / 20.0);
scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
}
z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
z = MAX(z, 0 );
z = MIN(z, MAX_VOLUME);
scaler[0] = 0; // 音量表中0标识静音
scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db即不做增益处理
return z;
}
static void vol_scaler_run(int16_t *buf, int n, int volume)
{
/* 简易版
while (n--) {
*buf = (*buf) * multiplier / 100.0;
*buf = std::max((int)*buf, -0x7fff);
*buf = std::min((int)*buf, 0x7fff);
buf++;
}
*/
int multiplier = kVolCtrlUnit.scaler[volume];
if (multiplier > (1 << 14)) {
int32_t v;
while (n--) {
v = ((int32_t)*buf * multiplier) >> 14;
v = MAX(v,-0x7fff);
v = MIN(v, 0x7fff);
*buf++ = (int16_t)v;
}
} else if (multiplier < (1 << 14)) {
while (n--) {
*buf = ((int32_t)*buf * multiplier) >> 14;
buf++;
}
}
}
//----------------------------------------------
// usb声卡设备句柄
static rkStreamPtr usbCaptureDev = nullptr;
static SampleInfo kPcmSampleInfo;
// 降噪
using namespace webrtc;
struct audio_buf_t
{
uint8_t* data;
int index;
int size;
};
int main(int argc, char *argv[])
{
if (argc < 1) {
printf("usage: %s rtmp_url \n", argv[0]);
return 0;
}
std::string url = argv[1];
// PCM参数
kPcmSampleInfo.channels = MIX_INPUT_CHANNELS;
kPcmSampleInfo.fmt = SAMPLE_FMT_S16;
kPcmSampleInfo.sample_rate = MIX_INPUT_SAMPLE_RATE;
kPcmSampleInfo.nb_samples = MIX_INPUT_SAMPLES;
// 声卡设备初始化
RkStreamInit(2, capture, kPcmSampleInfo, usbCaptureDev);
// 采集buffer
int ret = 0;
std::shared_ptr<easymedia::SampleBuffer> capData = nullptr;
int buffer_size = GetSampleSize(kPcmSampleInfo) * kPcmSampleInfo.nb_samples;
void *ptr = malloc(buffer_size);
capData = std::make_shared<easymedia::SampleBuffer>(easymedia::MediaBuffer(ptr, buffer_size, -1, ptr, free_memory),
kPcmSampleInfo);
assert(capData);
uint8_t *lineBuffer = nullptr;
size_t lineSize = buffer_size;
lineBuffer = (uint8_t*)malloc(lineSize);
assert(lineBuffer);
// 初始化 FFmpeg
AVFormatContext *oc = NULL;
AVOutputFormat *fmt = NULL;
AVStream *audio_st = NULL;
AVCodecContext *c = NULL;
AVCodecParameters *codec_par = nullptr;
AVCodec *codec = NULL;
avformat_alloc_output_context2(&oc, NULL, "flv", url.c_str()); // RTMP 使用 FLV 封装格式
if (!oc) {
fprintf(stderr, "Could not create output context\n");
return -1;
}
fmt = oc->oformat;
// 创建音频流
audio_st = avformat_new_stream(oc, NULL);
if (!audio_st) {
fprintf(stderr, "Could not create audio stream\n");
return -1;
}
codec_par = audio_st->codecpar;
codec_par->codec_id = AV_CODEC_ID_AAC;
codec_par->codec_type = AVMEDIA_TYPE_AUDIO;
codec_par->codec_tag = 0;
codec_par->bit_rate = 128 * 1024;
codec_par->sample_rate = MIX_INPUT_SAMPLE_RATE;
codec_par->channel_layout = av_get_default_channel_layout(MIX_INPUT_CHANNELS);
codec_par->channels = MIX_INPUT_CHANNELS;
codec_par->format = AV_SAMPLE_FMT_FLTP;
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!codec) {
fprintf(stderr, "Could not find encoder\n");
return -1;
}
c = avcodec_alloc_context3(codec);
if (avcodec_parameters_to_context(c, codec_par) < 0) {
fprintf(stderr, "avcodec_parameters_to_context failed.\n");
return -1;
}
// 打开编码器
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open encoder\n");
return -1;
}
// 写入头信息
av_dump_format(oc, 0, url.c_str(), 1);
if (!(fmt->flags & AVFMT_NOFILE)) {
if (avio_open(&oc->pb, url.c_str(), AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "Could not open '%s'\n", url.c_str());
return -1;
}
}
// 写入流头信息
if (avformat_write_header(oc, NULL) < 0) {
fprintf(stderr, "Error occurred when opening output URL\n");
return -1;
}
AVFrame *frame = av_frame_alloc();
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
frame->sample_rate = c->sample_rate;
frame->nb_samples = c->frame_size;
// 分配音频帧缓冲区
int output_bz = av_samples_get_buffer_size(NULL, c->channels, c->frame_size, c->sample_fmt, 0);
uint8_t *samples_data = (uint8_t *)av_malloc(output_bz);
avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, samples_data, output_bz, 0);
printf("cap_bz = %d, frame_bz=%d, chn=%d, frame_size=%d, fmt=%d\n", buffer_size, output_bz, c->channels, c->frame_size, c->sample_fmt);
// 重采样
uint64_t output_channel_layout = av_get_default_channel_layout(c->channels);
SwrContext *swrCtx = swr_alloc_set_opts(nullptr,
output_channel_layout,
c->sample_fmt,
c->sample_rate,
av_get_default_channel_layout(MIX_INPUT_CHANNELS),
AV_SAMPLE_FMT_S16,
MIX_INPUT_SAMPLE_RATE,
0, nullptr);
if (!swrCtx) {
printf("swr_alloc_set_opts failed.\n");
return 0;
}
if (swr_init(swrCtx) < 0) {
printf("swr_init failed.\n");
return 0;
}
// 降噪初始化
AudioBuffer audio(MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS,
MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS,
MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
StreamConfig stream_config(MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
NsConfig cfg;
cfg.target_level = NsConfig::SuppressionLevel::k12dB;
NoiseSuppressor ns(cfg, MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
bool split_bands = MIX_INPUT_SAMPLE_RATE > 16000;
std::vector<audio_buf_t> cap_in_list;
std::vector<audio_buf_t> ns_out_list;
printf("ns config: %d\n", stream_config.num_samples());
// 推流循环
uint64_t pts = 0;
// FILE *swr_fp = fopen("/root/swr_out.pcm", "wb");
FILE *input_fp = fopen("/root/input_out.pcm", "wb");
while(true)
{
// 采集
size_t read_size = usbCaptureDev->Read(capData->GetPtr(), capData->GetSampleSize(), kPcmSampleInfo.nb_samples);
if (!read_size && errno != EAGAIN) {
printf("capture error: %s\n", strerror(errno));
msleep(10);
continue ;
}
capData->SetSamples(read_size);
// 分流
uint8_t *ptr = (uint8_t*)capData->GetPtr();
for (int i = 0; i < capData->GetSamples(); i++) {
int size = capData->GetSampleSize() / capData->GetSampleInfo().channels;
// 左声道 (2.4G)
// memcpy(wirelessBuffer + i * 2 * size, ptr + i * 2 * size, size);
// memcpy(wirelessBuffer + (i * 2 + 1) * size, ptr + i * 2 * size, size);
// 右声道 (LINE IN)
memcpy(lineBuffer + i * 2 * size, ptr + (i * 2 + 1) * size, size);
memcpy(lineBuffer + (i * 2 + 1) * size, ptr + (i * 2 + 1) * size, size);
}
vol_scaler_run((int16_t *)lineBuffer, capData->GetSamples() * kPcmSampleInfo.channels, kVolCtrlUnit.micVolume);
// 降噪处理
#if 1
uint8_t *cap_in = (uint8_t *)malloc(lineSize);
memcpy(cap_in, lineBuffer, lineSize);
audio_buf_t cap_in_buf;
cap_in_buf.data = cap_in;
cap_in_buf.index = 0;
cap_in_buf.size = lineSize;
cap_in_list.emplace_back(cap_in_buf);
int frames = capData->GetSamples();
int frameSize = stream_config.num_samples() * sizeof(int16_t);
while (frames > 0)
{
auto capBuf = cap_in_list.begin();
int readSize = (capBuf->size - capBuf->index) >= frameSize ? frameSize : (capBuf->size - capBuf->index);
uint8_t *data = (uint8_t *)malloc(readSize);
memcpy(data, capBuf->data + capBuf->index, readSize);
// 降噪
{
short *buffer = (short *)data;
audio.CopyFrom(buffer, stream_config);
if (split_bands)
audio.SplitIntoFrequencyBands();
ns.Analyze(audio);
ns.Process(&audio);
if (split_bands)
audio.MergeFrequencyBands();
audio.CopyTo(stream_config, buffer);
}
// 存储降噪后的音频
audio_buf_t out;
out.data = data;
out.index = 0;
out.size = readSize;
ns_out_list.emplace_back(out);
// 更新index
frames -= stream_config.num_samples();
capBuf->index += readSize;
// 释放音频资源
if (capBuf->index >= capBuf->size) {
free(capBuf->data);
cap_in_list.erase(cap_in_list.begin());
}
}
if (ns_out_list.size() <= 2) {
continue;
}
frames = c->frame_size;
while (frames > 0) {
auto nsData = ns_out_list.begin();
int needSize = frames * sizeof(int16_t);
int readSize = (nsData->size - nsData->index) >= needSize ? needSize : (nsData->size - nsData->index);
memcpy(lineBuffer + (c->frame_size - frames)*sizeof(int16_t), nsData->data + nsData->index, readSize);
frames -= readSize/sizeof(int16_t);
nsData->index += readSize;
if (nsData->index >= nsData->size) {
free(nsData->data);
ns_out_list.erase(ns_out_list.begin());
}
}
#endif
if (input_fp) fwrite(lineBuffer, 1, lineSize, input_fp);
// 重采样
{
const uint8_t** in = (const uint8_t**)&lineBuffer;
uint8_t **out = frame->data;
int len2, out_data_size;
len2 = swr_convert(swrCtx, out, frame->nb_samples, in, capData->GetSamples());
if (len2 < 0) {
printf("swr_convert failed. \n");
break;
}
out_data_size = len2 * c->channels * av_get_bytes_per_sample(c->sample_fmt);
// if (swr_fp) fwrite(frame->data[0], 1, out_data_size, swr_fp);
}
// 设置帧时间戳
// frame->pts = av_rescale_q(audio_st->cur_dts, audio_st->time_base, c->time_base);
pts += c->frame_size * 1000 / MIX_INPUT_SAMPLE_RATE;
frame->pts = pts;
// printf("frame pts=%lld, cur_dts=%lld stream_tb=(%d/%d), ctx_tb(%d/%d)\n", frame->pts, audio_st->cur_dts,
// audio_st->time_base.num, audio_st->time_base.den,c->time_base.num, c->time_base.den);
// 发送帧到编码器
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame for encoding\n");
break;
}
// 接收编码后的数据包
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
while (ret >= 0) {
ret = avcodec_receive_packet(c, &pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
break;
}
// 将数据包时间戳从编码器时间基转换到流时间基
pkt.stream_index = audio_st->index;
av_packet_rescale_ts(&pkt, c->time_base, audio_st->time_base);
pkt.duration = av_rescale_q(pkt.duration, c->time_base, audio_st->time_base);
// 写入数据包到输出媒体文件
ret = av_interleaved_write_frame(oc, &pkt);
if (ret < 0) {
fprintf(stderr, "Error while writing audio frame\n");
break;
}
// 释放数据包
av_packet_unref(&pkt);
}
}
if (swrCtx)
swr_free(&swrCtx);
if (c)
avcodec_close(c);
if (oc) {
avio_close(oc->pb);
avformat_free_context(oc);
}
if (frame) {
av_frame_free(&frame);
}
if (lineBuffer)
av_free(lineBuffer);
return 0;
}