|
|
|
@ -461,6 +461,134 @@ def check_params(req: dict):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 分贝调整策略
|
|
|
|
|
|
|
|
|
|
def _to_float32_mono(x: np.ndarray) -> np.ndarray:
|
|
|
|
|
if x.dtype == np.int16:
|
|
|
|
|
return x.astype(np.float32) / 32768.0
|
|
|
|
|
return x.astype(np.float32)
|
|
|
|
|
|
|
|
|
|
def _peak_dbfs(xf: np.ndarray) -> float:
|
|
|
|
|
if xf.size == 0:
|
|
|
|
|
return -float("inf")
|
|
|
|
|
peak = float(np.max(np.abs(xf)))
|
|
|
|
|
if peak <= 1e-12:
|
|
|
|
|
return -float("inf")
|
|
|
|
|
return 20.0 * np.log10(peak)
|
|
|
|
|
|
|
|
|
|
def _rms_dbfs(xf: np.ndarray) -> float:
|
|
|
|
|
if xf.size == 0:
|
|
|
|
|
return -float("inf")
|
|
|
|
|
rms = float(np.sqrt(np.mean(xf * xf)))
|
|
|
|
|
if rms <= 1e-12:
|
|
|
|
|
return -float("inf")
|
|
|
|
|
return 20.0 * np.log10(rms)
|
|
|
|
|
|
|
|
|
|
def _apply_gain_linear(xf: np.ndarray, gain_db: float) -> np.ndarray:
|
|
|
|
|
gain = 10.0 ** (gain_db / 20.0)
|
|
|
|
|
return xf * gain
|
|
|
|
|
|
|
|
|
|
def _limiter_peak(xf: np.ndarray, thresh_db: float = -1.0, soft: bool = True):
|
|
|
|
|
"""
|
|
|
|
|
简易峰值限幅器
|
|
|
|
|
thresh_db 为阈值,默认 -1 dBFS
|
|
|
|
|
soft 为 True 时使用软限幅(tanh),听感更顺滑
|
|
|
|
|
返回 (处理后波形, 是否触发限幅)
|
|
|
|
|
"""
|
|
|
|
|
if xf.size == 0:
|
|
|
|
|
return xf, False
|
|
|
|
|
thresh_lin = 10.0 ** (thresh_db / 20.0)
|
|
|
|
|
peak = float(np.max(np.abs(xf)))
|
|
|
|
|
if peak <= thresh_lin or peak <= 1e-12:
|
|
|
|
|
return xf, False
|
|
|
|
|
if soft:
|
|
|
|
|
k = 2.0
|
|
|
|
|
out = np.tanh(k * xf / peak) * thresh_lin
|
|
|
|
|
return out.astype(np.float32), True
|
|
|
|
|
scale = thresh_lin / peak
|
|
|
|
|
out = xf * scale
|
|
|
|
|
return out.astype(np.float32), True
|
|
|
|
|
|
|
|
|
|
class DynamicGainState:
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
target_peak_db: float = -1.0,
|
|
|
|
|
max_boost_db: float = 18.0,
|
|
|
|
|
max_cut_db: float = 24.0,
|
|
|
|
|
min_rms_gate_db: float = -45.0,
|
|
|
|
|
quiet_boost_cap_db: float = 6.0,
|
|
|
|
|
attack_fast: float = 0.25,
|
|
|
|
|
release_slow: float = 0.08,
|
|
|
|
|
limiter_thresh_db: float = -1.0,
|
|
|
|
|
limiter_soft: bool = True,
|
|
|
|
|
):
|
|
|
|
|
self.target_peak_db = target_peak_db
|
|
|
|
|
self.max_boost_db = max_boost_db
|
|
|
|
|
self.max_cut_db = max_cut_db
|
|
|
|
|
self.min_rms_gate_db = min_rms_gate_db
|
|
|
|
|
self.quiet_boost_cap_db = quiet_boost_cap_db
|
|
|
|
|
self.attack_fast = attack_fast
|
|
|
|
|
self.release_slow = release_slow
|
|
|
|
|
self.limiter_thresh_db = limiter_thresh_db
|
|
|
|
|
self.limiter_soft = limiter_soft
|
|
|
|
|
self.prev_gain_db = 0.0
|
|
|
|
|
|
|
|
|
|
def compute_chunk(self, x: np.ndarray):
|
|
|
|
|
xf = _to_float32_mono(x)
|
|
|
|
|
|
|
|
|
|
peak_db = _peak_dbfs(xf)
|
|
|
|
|
rms_db = _rms_dbfs(xf)
|
|
|
|
|
|
|
|
|
|
if peak_db == -float("inf"):
|
|
|
|
|
ideal_gain_db = self.max_boost_db
|
|
|
|
|
else:
|
|
|
|
|
ideal_gain_db = self.target_peak_db - peak_db
|
|
|
|
|
|
|
|
|
|
if rms_db != -float("inf") and rms_db < self.min_rms_gate_db:
|
|
|
|
|
ideal_gain_db = min(ideal_gain_db, self.quiet_boost_cap_db)
|
|
|
|
|
|
|
|
|
|
ideal_gain_db = max(-self.max_cut_db, min(self.max_boost_db, ideal_gain_db))
|
|
|
|
|
|
|
|
|
|
if ideal_gain_db > self.prev_gain_db:
|
|
|
|
|
alpha = self.attack_fast
|
|
|
|
|
else:
|
|
|
|
|
alpha = self.release_slow
|
|
|
|
|
gain_db = self.prev_gain_db + alpha * (ideal_gain_db - self.prev_gain_db)
|
|
|
|
|
self.prev_gain_db = gain_db
|
|
|
|
|
|
|
|
|
|
y = _apply_gain_linear(xf, gain_db)
|
|
|
|
|
y, limited = _limiter_peak(y, self.limiter_thresh_db, soft=self.limiter_soft)
|
|
|
|
|
|
|
|
|
|
post_peak_db = _peak_dbfs(y)
|
|
|
|
|
post_rms_db = _rms_dbfs(y)
|
|
|
|
|
|
|
|
|
|
info = {
|
|
|
|
|
"peak_db": peak_db,
|
|
|
|
|
"rms_db": rms_db,
|
|
|
|
|
"ideal_gain_db": ideal_gain_db,
|
|
|
|
|
"applied_gain_db": gain_db,
|
|
|
|
|
"limited": limited,
|
|
|
|
|
"post_peak_db": post_peak_db,
|
|
|
|
|
"post_rms_db": post_rms_db,
|
|
|
|
|
}
|
|
|
|
|
return y.astype(np.float32), info
|
|
|
|
|
|
|
|
|
|
dyn_state = DynamicGainState(
|
|
|
|
|
target_peak_db=-1.0,
|
|
|
|
|
max_boost_db=18.0,
|
|
|
|
|
max_cut_db=24.0,
|
|
|
|
|
min_rms_gate_db=-45.0,
|
|
|
|
|
quiet_boost_cap_db=6.0,
|
|
|
|
|
attack_fast=0.25,
|
|
|
|
|
release_slow=0.08,
|
|
|
|
|
limiter_thresh_db=-1.0,
|
|
|
|
|
limiter_soft=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def tts_handle(req: dict):
|
|
|
|
|
"""
|
|
|
|
|
Text to speech handler.
|
|
|
|
@ -514,11 +642,20 @@ async def tts_handle(req: dict):
|
|
|
|
|
def streaming_generator(tts_generator: Generator, media_type: str):
|
|
|
|
|
if_frist_chunk = True
|
|
|
|
|
for sr, chunk in tts_generator:
|
|
|
|
|
processed, info = dyn_state.compute_chunk(chunk)
|
|
|
|
|
|
|
|
|
|
print(
|
|
|
|
|
f"[响度] 原峰值 {info['peak_db']:.2f} dBFS | 原RMS {info['rms_db']:.2f} dBFS | "
|
|
|
|
|
f"理想增益 {info['ideal_gain_db']:.2f} dB | 实际增益 {info['applied_gain_db']:.2f} dB | "
|
|
|
|
|
f"限幅 {info['limited']} | 处理后峰值 {info['post_peak_db']:.2f} dBFS | 处理后RMS {info['post_rms_db']:.2f} dBFS"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if if_frist_chunk and media_type == "wav":
|
|
|
|
|
yield wave_header_chunk(sample_rate=sr)
|
|
|
|
|
media_type = "raw"
|
|
|
|
|
if_frist_chunk = False
|
|
|
|
|
yield pack_audio(BytesIO(), chunk, sr, media_type).getvalue()
|
|
|
|
|
|
|
|
|
|
yield pack_audio(BytesIO(), processed, sr, media_type).getvalue()
|
|
|
|
|
|
|
|
|
|
# _media_type = f"audio/{media_type}" if not (streaming_mode and media_type in ["wav", "raw"]) else f"audio/x-{media_type}"
|
|
|
|
|
return StreamingResponse(
|
|
|
|
@ -531,6 +668,13 @@ async def tts_handle(req: dict):
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
sr, audio_data = next(tts_generator)
|
|
|
|
|
processed, info = dyn_state.compute_chunk(audio_data)
|
|
|
|
|
|
|
|
|
|
print(
|
|
|
|
|
f"[响度] 原峰值 {info['peak_db']:.2f} dBFS | 原RMS {info['rms_db']:.2f} dBFS | "
|
|
|
|
|
f"理想增益 {info['ideal_gain_db']:.2f} dB | 实际增益 {info['applied_gain_db']:.2f} dB | "
|
|
|
|
|
f"限幅 {info['limited']} | 处理后峰值 {info['post_peak_db']:.2f} dBFS | 处理后RMS {info['post_rms_db']:.2f} dBFS"
|
|
|
|
|
)
|
|
|
|
|
audio_data = pack_audio(BytesIO(), audio_data, sr, media_type).getvalue()
|
|
|
|
|
return Response(audio_data, media_type=f"audio/{media_type}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|