mirror of
https://github.com/Electric-Special/ha-core.git
synced 2026-03-21 02:03:27 +01:00
Revert back to microVAD (#160821)
This commit is contained in:
@@ -3,9 +3,8 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
import math
|
||||
|
||||
from pysilero_vad import SileroVoiceActivityDetector
|
||||
from pymicro_vad import MicroVad
|
||||
from pyspeex_noise import AudioProcessor
|
||||
|
||||
from .const import BYTES_PER_CHUNK
|
||||
@@ -43,8 +42,8 @@ class AudioEnhancer(ABC):
|
||||
"""Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""
|
||||
|
||||
|
||||
class SileroVadSpeexEnhancer(AudioEnhancer):
|
||||
"""Audio enhancer that runs Silero VAD and speex."""
|
||||
class MicroVadSpeexEnhancer(AudioEnhancer):
|
||||
"""Audio enhancer that runs microVAD and speex."""
|
||||
|
||||
def __init__(
|
||||
self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
|
||||
@@ -70,49 +69,21 @@ class SileroVadSpeexEnhancer(AudioEnhancer):
|
||||
self.noise_suppression,
|
||||
)
|
||||
|
||||
self.vad: SileroVoiceActivityDetector | None = None
|
||||
|
||||
# We get 10ms chunks but Silero works on 32ms chunks, so we have to
|
||||
# buffer audio. The previous speech probability is used until enough
|
||||
# audio has been buffered.
|
||||
self._vad_buffer: bytearray | None = None
|
||||
self._vad_buffer_chunks = 0
|
||||
self._vad_buffer_chunk_idx = 0
|
||||
self._last_speech_probability: float | None = None
|
||||
self.vad: MicroVad | None = None
|
||||
|
||||
if self.is_vad_enabled:
|
||||
self.vad = SileroVoiceActivityDetector()
|
||||
|
||||
# VAD buffer is a multiple of 10ms, but Silero VAD needs 32ms.
|
||||
self._vad_buffer_chunks = int(
|
||||
math.ceil(self.vad.chunk_bytes() / BYTES_PER_CHUNK)
|
||||
)
|
||||
self._vad_leftover_bytes = self.vad.chunk_bytes() - BYTES_PER_CHUNK
|
||||
self._vad_buffer = bytearray(self.vad.chunk_bytes())
|
||||
_LOGGER.debug("Initialized Silero VAD")
|
||||
self.vad = MicroVad()
|
||||
_LOGGER.debug("Initialized microVAD")
|
||||
|
||||
def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
|
||||
"""Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
|
||||
speech_probability: float | None = None
|
||||
|
||||
assert len(audio) == BYTES_PER_CHUNK
|
||||
|
||||
if self.vad is not None:
|
||||
# Run VAD
|
||||
assert self._vad_buffer is not None
|
||||
start_idx = self._vad_buffer_chunk_idx * BYTES_PER_CHUNK
|
||||
self._vad_buffer[start_idx : start_idx + BYTES_PER_CHUNK] = audio
|
||||
|
||||
self._vad_buffer_chunk_idx += 1
|
||||
if self._vad_buffer_chunk_idx >= self._vad_buffer_chunks:
|
||||
# We have enough data to run Silero VAD (32 ms)
|
||||
self._last_speech_probability = self.vad.process_chunk(
|
||||
self._vad_buffer[: self.vad.chunk_bytes()]
|
||||
)
|
||||
|
||||
# Copy leftover audio that wasn't processed to start
|
||||
self._vad_buffer[: self._vad_leftover_bytes] = self._vad_buffer[
|
||||
-self._vad_leftover_bytes :
|
||||
]
|
||||
self._vad_buffer_chunk_idx = 0
|
||||
speech_probability = self.vad.Process10ms(audio)
|
||||
|
||||
if self.audio_processor is not None:
|
||||
# Run noise suppression and auto gain
|
||||
@@ -121,5 +92,5 @@ class SileroVadSpeexEnhancer(AudioEnhancer):
|
||||
return EnhancedAudioChunk(
|
||||
audio=audio,
|
||||
timestamp_ms=timestamp_ms,
|
||||
speech_probability=self._last_speech_probability,
|
||||
speech_probability=speech_probability,
|
||||
)
|
||||
|
||||
@@ -8,5 +8,5 @@
|
||||
"integration_type": "system",
|
||||
"iot_class": "local_push",
|
||||
"quality_scale": "internal",
|
||||
"requirements": ["pysilero-vad==3.2.0", "pyspeex-noise==1.0.2"]
|
||||
"requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"]
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ from homeassistant.util import (
|
||||
from homeassistant.util.hass_dict import HassKey
|
||||
from homeassistant.util.limited_size_dict import LimitedSizeDict
|
||||
|
||||
from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, SileroVadSpeexEnhancer
|
||||
from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, MicroVadSpeexEnhancer
|
||||
from .const import (
|
||||
ACKNOWLEDGE_PATH,
|
||||
BYTES_PER_CHUNK,
|
||||
@@ -633,7 +633,7 @@ class PipelineRun:
|
||||
# Initialize with audio settings
|
||||
if self.audio_settings.needs_processor and (self.audio_enhancer is None):
|
||||
# Default audio enhancer
|
||||
self.audio_enhancer = SileroVadSpeexEnhancer(
|
||||
self.audio_enhancer = MicroVadSpeexEnhancer(
|
||||
self.audio_settings.auto_gain_dbfs,
|
||||
self.audio_settings.noise_suppression_level,
|
||||
self.audio_settings.is_vad_enabled,
|
||||
|
||||
@@ -53,10 +53,10 @@ Pillow==12.0.0
|
||||
propcache==0.4.1
|
||||
psutil-home-assistant==0.0.1
|
||||
PyJWT==2.10.1
|
||||
pymicro-vad==1.0.1
|
||||
PyNaCl==1.6.0
|
||||
pyOpenSSL==25.3.0
|
||||
pyserial==3.5
|
||||
pysilero-vad==3.2.0
|
||||
pyspeex-noise==1.0.2
|
||||
python-slugify==8.0.4
|
||||
PyTurboJPEG==1.8.0
|
||||
|
||||
2
requirements.txt
generated
2
requirements.txt
generated
@@ -39,8 +39,8 @@ Pillow==12.0.0
|
||||
propcache==0.4.1
|
||||
psutil-home-assistant==0.0.1
|
||||
PyJWT==2.10.1
|
||||
pymicro-vad==1.0.1
|
||||
pyOpenSSL==25.3.0
|
||||
pysilero-vad==3.2.0
|
||||
pyspeex-noise==1.0.2
|
||||
python-slugify==8.0.4
|
||||
PyTurboJPEG==1.8.0
|
||||
|
||||
6
requirements_all.txt
generated
6
requirements_all.txt
generated
@@ -2201,6 +2201,9 @@ pymediaroom==0.6.5.4
|
||||
# homeassistant.components.meteoclimatic
|
||||
pymeteoclimatic==0.1.0
|
||||
|
||||
# homeassistant.components.assist_pipeline
|
||||
pymicro-vad==1.0.1
|
||||
|
||||
# homeassistant.components.miele
|
||||
pymiele==0.6.1
|
||||
|
||||
@@ -2408,9 +2411,6 @@ pysiaalarm==3.1.1
|
||||
# homeassistant.components.signal_messenger
|
||||
pysignalclirestapi==0.3.24
|
||||
|
||||
# homeassistant.components.assist_pipeline
|
||||
pysilero-vad==3.2.0
|
||||
|
||||
# homeassistant.components.sky_hub
|
||||
pyskyqhub==0.1.4
|
||||
|
||||
|
||||
6
requirements_test_all.txt
generated
6
requirements_test_all.txt
generated
@@ -1863,6 +1863,9 @@ pymata-express==1.19
|
||||
# homeassistant.components.meteoclimatic
|
||||
pymeteoclimatic==0.1.0
|
||||
|
||||
# homeassistant.components.assist_pipeline
|
||||
pymicro-vad==1.0.1
|
||||
|
||||
# homeassistant.components.miele
|
||||
pymiele==0.6.1
|
||||
|
||||
@@ -2034,9 +2037,6 @@ pysiaalarm==3.1.1
|
||||
# homeassistant.components.signal_messenger
|
||||
pysignalclirestapi==0.3.24
|
||||
|
||||
# homeassistant.components.assist_pipeline
|
||||
pysilero-vad==3.2.0
|
||||
|
||||
# homeassistant.components.sma
|
||||
pysma==1.1.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user