diff --git a/homeassistant/components/assist_pipeline/audio_enhancer.py b/homeassistant/components/assist_pipeline/audio_enhancer.py index 18f00d58d8a..1fabc7790e7 100644 --- a/homeassistant/components/assist_pipeline/audio_enhancer.py +++ b/homeassistant/components/assist_pipeline/audio_enhancer.py @@ -3,9 +3,8 @@ from abc import ABC, abstractmethod from dataclasses import dataclass import logging -import math -from pysilero_vad import SileroVoiceActivityDetector +from pymicro_vad import MicroVad from pyspeex_noise import AudioProcessor from .const import BYTES_PER_CHUNK @@ -43,8 +42,8 @@ class AudioEnhancer(ABC): """Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples.""" -class SileroVadSpeexEnhancer(AudioEnhancer): - """Audio enhancer that runs Silero VAD and speex.""" +class MicroVadSpeexEnhancer(AudioEnhancer): + """Audio enhancer that runs microVAD and speex.""" def __init__( self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool @@ -70,49 +69,21 @@ class SileroVadSpeexEnhancer(AudioEnhancer): self.noise_suppression, ) - self.vad: SileroVoiceActivityDetector | None = None - - # We get 10ms chunks but Silero works on 32ms chunks, so we have to - # buffer audio. The previous speech probability is used until enough - # audio has been buffered. - self._vad_buffer: bytearray | None = None - self._vad_buffer_chunks = 0 - self._vad_buffer_chunk_idx = 0 - self._last_speech_probability: float | None = None + self.vad: MicroVad | None = None if self.is_vad_enabled: - self.vad = SileroVoiceActivityDetector() - - # VAD buffer is a multiple of 10ms, but Silero VAD needs 32ms. - self._vad_buffer_chunks = int( - math.ceil(self.vad.chunk_bytes() / BYTES_PER_CHUNK) - ) - self._vad_leftover_bytes = self.vad.chunk_bytes() - BYTES_PER_CHUNK - self._vad_buffer = bytearray(self.vad.chunk_bytes()) - _LOGGER.debug("Initialized Silero VAD") + self.vad = MicroVad() + _LOGGER.debug("Initialized microVAD") def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk: """Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples.""" + speech_probability: float | None = None + assert len(audio) == BYTES_PER_CHUNK if self.vad is not None: # Run VAD - assert self._vad_buffer is not None - start_idx = self._vad_buffer_chunk_idx * BYTES_PER_CHUNK - self._vad_buffer[start_idx : start_idx + BYTES_PER_CHUNK] = audio - - self._vad_buffer_chunk_idx += 1 - if self._vad_buffer_chunk_idx >= self._vad_buffer_chunks: - # We have enough data to run Silero VAD (32 ms) - self._last_speech_probability = self.vad.process_chunk( - self._vad_buffer[: self.vad.chunk_bytes()] - ) - - # Copy leftover audio that wasn't processed to start - self._vad_buffer[: self._vad_leftover_bytes] = self._vad_buffer[ - -self._vad_leftover_bytes : - ] - self._vad_buffer_chunk_idx = 0 + speech_probability = self.vad.Process10ms(audio) if self.audio_processor is not None: # Run noise suppression and auto gain @@ -121,5 +92,5 @@ class SileroVadSpeexEnhancer(AudioEnhancer): return EnhancedAudioChunk( audio=audio, timestamp_ms=timestamp_ms, - speech_probability=self._last_speech_probability, + speech_probability=speech_probability, ) diff --git a/homeassistant/components/assist_pipeline/manifest.json b/homeassistant/components/assist_pipeline/manifest.json index 04b6acd8885..d88e4352130 100644 --- a/homeassistant/components/assist_pipeline/manifest.json +++ b/homeassistant/components/assist_pipeline/manifest.json @@ -8,5 +8,5 @@ "integration_type": "system", "iot_class": "local_push", "quality_scale": "internal", - "requirements": ["pysilero-vad==3.2.0", "pyspeex-noise==1.0.2"] + "requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"] } diff --git a/homeassistant/components/assist_pipeline/pipeline.py b/homeassistant/components/assist_pipeline/pipeline.py index abfc4e72782..0948413d4cc 100644 --- a/homeassistant/components/assist_pipeline/pipeline.py +++ b/homeassistant/components/assist_pipeline/pipeline.py @@ -55,7 +55,7 @@ from homeassistant.util import ( from homeassistant.util.hass_dict import HassKey from homeassistant.util.limited_size_dict import LimitedSizeDict -from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, SileroVadSpeexEnhancer +from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, MicroVadSpeexEnhancer from .const import ( ACKNOWLEDGE_PATH, BYTES_PER_CHUNK, @@ -633,7 +633,7 @@ class PipelineRun: # Initialize with audio settings if self.audio_settings.needs_processor and (self.audio_enhancer is None): # Default audio enhancer - self.audio_enhancer = SileroVadSpeexEnhancer( + self.audio_enhancer = MicroVadSpeexEnhancer( self.audio_settings.auto_gain_dbfs, self.audio_settings.noise_suppression_level, self.audio_settings.is_vad_enabled, diff --git a/homeassistant/package_constraints.txt b/homeassistant/package_constraints.txt index 386981e6d2b..bc32bc514d7 100644 --- a/homeassistant/package_constraints.txt +++ b/homeassistant/package_constraints.txt @@ -53,10 +53,10 @@ Pillow==12.0.0 propcache==0.4.1 psutil-home-assistant==0.0.1 PyJWT==2.10.1 +pymicro-vad==1.0.1 PyNaCl==1.6.0 pyOpenSSL==25.3.0 pyserial==3.5 -pysilero-vad==3.2.0 pyspeex-noise==1.0.2 python-slugify==8.0.4 PyTurboJPEG==1.8.0 diff --git a/requirements.txt b/requirements.txt index b58bbd9079e..1d0d75c6f6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,8 +39,8 @@ Pillow==12.0.0 propcache==0.4.1 psutil-home-assistant==0.0.1 PyJWT==2.10.1 +pymicro-vad==1.0.1 pyOpenSSL==25.3.0 -pysilero-vad==3.2.0 pyspeex-noise==1.0.2 python-slugify==8.0.4 PyTurboJPEG==1.8.0 diff --git a/requirements_all.txt b/requirements_all.txt index 90db2d89388..2d1193928d4 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -2201,6 +2201,9 @@ pymediaroom==0.6.5.4 # homeassistant.components.meteoclimatic pymeteoclimatic==0.1.0 +# homeassistant.components.assist_pipeline +pymicro-vad==1.0.1 + # homeassistant.components.miele pymiele==0.6.1 @@ -2408,9 +2411,6 @@ pysiaalarm==3.1.1 # homeassistant.components.signal_messenger pysignalclirestapi==0.3.24 -# homeassistant.components.assist_pipeline -pysilero-vad==3.2.0 - # homeassistant.components.sky_hub pyskyqhub==0.1.4 diff --git a/requirements_test_all.txt b/requirements_test_all.txt index 5a56f73da76..aee3b8b69ee 100644 --- a/requirements_test_all.txt +++ b/requirements_test_all.txt @@ -1863,6 +1863,9 @@ pymata-express==1.19 # homeassistant.components.meteoclimatic pymeteoclimatic==0.1.0 +# homeassistant.components.assist_pipeline +pymicro-vad==1.0.1 + # homeassistant.components.miele pymiele==0.6.1 @@ -2034,9 +2037,6 @@ pysiaalarm==3.1.1 # homeassistant.components.signal_messenger pysignalclirestapi==0.3.24 -# homeassistant.components.assist_pipeline -pysilero-vad==3.2.0 - # homeassistant.components.sma pysma==1.1.0