From 5f20b38088ea9b4903482b554c046a37fb10f578 Mon Sep 17 00:00:00 2001 From: Kai Date: Fri, 13 Feb 2026 17:09:54 +0100 Subject: [PATCH] Simplify client modes and add VAD retention policy --- bridge/.env.example | 3 ++- bridge/Dockerfile | 3 ++- bridge/README.md | 6 +++-- bridge/app.py | 38 ++++++++++++++++++++-------- src/main.cpp | 60 ++++++++++++++++----------------------------- 5 files changed, 57 insertions(+), 53 deletions(-) diff --git a/bridge/.env.example b/bridge/.env.example index 7bc4534..fd373d1 100644 --- a/bridge/.env.example +++ b/bridge/.env.example @@ -26,7 +26,8 @@ WAV_KEEP_FILES=10 VAD_ENABLED=true VAD_DIR=/data/vad -VAD_KEEP_FILES=100 +VAD_KEEP_FILES=200 +VAD_MAX_AGE_DAYS=7 VAD_PREROLL_MS=1000 VAD_POSTROLL_MS=1000 VAD_START_THRESHOLD=900 diff --git a/bridge/Dockerfile b/bridge/Dockerfile index ca1dac6..812cf15 100644 --- a/bridge/Dockerfile +++ b/bridge/Dockerfile @@ -30,7 +30,8 @@ ENV WS_HOST=0.0.0.0 \ WAV_KEEP_FILES=10 \ VAD_ENABLED=true \ VAD_DIR=/data/vad \ - VAD_KEEP_FILES=100 \ + VAD_KEEP_FILES=200 \ + VAD_MAX_AGE_DAYS=7 \ VAD_PREROLL_MS=1000 \ VAD_POSTROLL_MS=1000 \ VAD_START_THRESHOLD=900 \ diff --git a/bridge/README.md b/bridge/README.md index b469d7f..598a146 100644 --- a/bridge/README.md +++ b/bridge/README.md @@ -96,7 +96,8 @@ You can build automations on these events (for STT/TTS pipelines or Node-RED han - `VAD_POSTROLL_MS=1000` keeps 1s after speech end - `VAD_START_THRESHOLD` / `VAD_STOP_THRESHOLD` tune sensitivity - `VAD_DIR` stores per-utterance WAV files - - `VAD_KEEP_FILES` limits stored VAD WAV files + - `VAD_KEEP_FILES=200` limits number of stored VAD WAV files + - `VAD_MAX_AGE_DAYS=7` deletes VAD WAV files older than 7 days - MQTT is recommended for control/events, WebSocket for streaming audio ## 7) Build and push to Gitea registry @@ -148,7 +149,8 @@ services: WAV_KEEP_FILES: "10" VAD_ENABLED: "true" VAD_DIR: "/data/vad" - VAD_KEEP_FILES: "100" + VAD_KEEP_FILES: "200" + VAD_MAX_AGE_DAYS: "7" VAD_PREROLL_MS: "1000" VAD_POSTROLL_MS: "1000" VAD_START_THRESHOLD: "900" diff --git a/bridge/app.py b/bridge/app.py index bc5bc01..f3be889 100644 --- a/bridge/app.py +++ b/bridge/app.py @@ -53,7 +53,8 @@ WAV_HEADER_BYTES = 44 VAD_ENABLED = getenv_bool("VAD_ENABLED", True) VAD_DIR = Path(os.getenv("VAD_DIR", "/data/vad")) -VAD_KEEP_FILES = int(os.getenv("VAD_KEEP_FILES", "100")) +VAD_KEEP_FILES = int(os.getenv("VAD_KEEP_FILES", "200")) +VAD_MAX_AGE_DAYS = int(os.getenv("VAD_MAX_AGE_DAYS", "7")) VAD_PREROLL_MS = int(os.getenv("VAD_PREROLL_MS", "1000")) VAD_POSTROLL_MS = int(os.getenv("VAD_POSTROLL_MS", "1000")) VAD_START_THRESHOLD = int(os.getenv("VAD_START_THRESHOLD", "900")) @@ -136,17 +137,34 @@ async def call_ha_webhook(event: str, payload: dict) -> None: log.exception("ha webhook call failed") -def enforce_wav_retention(directory: Path, keep_files: int) -> None: - if keep_files <= 0: +def enforce_wav_retention(directory: Path, keep_files: int, max_age_days: int = 0) -> None: + if keep_files <= 0 and max_age_days <= 0: return try: directory.mkdir(parents=True, exist_ok=True) - wavs = sorted( - [p for p in directory.glob("*.wav") if p.is_file()], - key=lambda p: p.stat().st_mtime, - ) - while len(wavs) > keep_files: - oldest = wavs.pop(0) + wavs = [] + now = time.time() + max_age_seconds = max_age_days * 86400 + for p in directory.glob("*.wav"): + if not p.is_file(): + continue + try: + st = p.stat() + except Exception: + continue + if max_age_seconds > 0 and (now - st.st_mtime) > max_age_seconds: + try: + p.unlink() + log.info("deleted old wav by age: %s", p) + except Exception: + log.exception("failed to delete old wav by age: %s", p) + continue + wavs.append((p, st.st_mtime)) + + wavs.sort(key=lambda x: x[1]) + files = [p for (p, _) in wavs] + while keep_files > 0 and len(files) > keep_files: + oldest = files.pop(0) try: oldest.unlink() log.info("deleted old wav: %s", oldest) @@ -198,7 +216,7 @@ def write_vad_wav_segment(session: DeviceSession, pcm: bytes) -> Optional[str]: wf.writeframes(pcm) wf.close() session.vad_segment_index += 1 - enforce_wav_retention(VAD_DIR, VAD_KEEP_FILES) + enforce_wav_retention(VAD_DIR, VAD_KEEP_FILES, VAD_MAX_AGE_DAYS) return str(path) except Exception: log.exception("failed to write vad wav segment for %s", session.device_id) diff --git a/src/main.cpp b/src/main.cpp index 92455b0..021431c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,7 +6,6 @@ #include "secrets.h" using namespace websockets; -static constexpr bool kDefaultStreamMode = EVS_DEFAULT_STREAM_MODE; static constexpr bool kSerialCommandEcho = EVS_SERIAL_COMMAND_ECHO; static constexpr bool kMicUseRightChannel = EVS_MIC_USE_RIGHT_CHANNEL; static constexpr int kMicS24ToS16Shift = EVS_MIC_S24_TO_S16_SHIFT; @@ -38,8 +37,7 @@ static constexpr float PI_F = 3.14159265358979323846f; enum class DeviceMode : uint8_t { Idle, - StreamToServer, // Placeholder: ship PCM to remote STT/LLM/TTS service - LocalLoopback, // Debug mode: mic directly to speaker + StreamToServer, // Ship PCM to remote STT/LLM/TTS service }; static DeviceMode g_mode = DeviceMode::Idle; @@ -57,6 +55,8 @@ static size_t g_rxHead = 0; static size_t g_rxTail = 0; static size_t g_rxCount = 0; +static void setMode(DeviceMode mode); + static bool initMicI2s() { const i2s_config_t i2sConfig = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), @@ -207,18 +207,18 @@ static void onWsMessageCallback(WebsocketsMessage message) { static void onWsEventCallback(WebsocketsEvent event, String) { if (event == WebsocketsEvent::ConnectionOpened) { g_wsConnected = true; - if (g_mode == DeviceMode::StreamToServer && !g_streamingActive) { - g_ws.send("{\"type\":\"start\"}"); - g_streamingActive = true; - playStartTone(); - } + // Connection-driven mode: always stream on connect. + setMode(DeviceMode::StreamToServer); Serial.println("WS connected"); } else if (event == WebsocketsEvent::ConnectionClosed) { - if (g_streamingActive) { - playStopTone(); - } + const bool wasStreaming = g_streamingActive; g_wsConnected = false; g_streamingActive = false; + // Connection-driven mode: always idle on disconnect. + setMode(DeviceMode::Idle); + if (wasStreaming) { + playStopTone(); + } Serial.println("WS disconnected"); } } @@ -352,13 +352,11 @@ static void serviceSpeaker() { static void printHelp() { Serial.println(); Serial.println("Commands:"); - Serial.println(" i = idle"); - Serial.println(" s = stream mode"); - Serial.println(" l = local loopback mode"); Serial.println(" p = print network status"); Serial.println(" h = help"); - Serial.print("Default on boot: "); - Serial.println(kDefaultStreamMode ? "StreamToServer" : "LocalLoopback"); + Serial.println("Connection policy:"); + Serial.println(" connect -> StreamToServer (start)"); + Serial.println(" disconnect -> Idle"); } static void handleSerialCommands() { @@ -368,22 +366,15 @@ static void handleSerialCommands() { Serial.print("RX cmd: "); Serial.println(c); } - if (c == 'i') { - setMode(DeviceMode::Idle); - Serial.println("Mode -> Idle"); - } else if (c == 's') { - setMode(DeviceMode::StreamToServer); - Serial.println("Mode -> StreamToServer"); - } else if (c == 'l') { - setMode(DeviceMode::LocalLoopback); - Serial.println("Mode -> LocalLoopback"); - } else if (c == 'p') { + if (c == 'p') { Serial.print("WiFi: "); Serial.print((WiFi.status() == WL_CONNECTED) ? "connected " : "disconnected "); Serial.print("IP="); Serial.println(WiFi.localIP()); Serial.print("WS: "); Serial.println(g_wsConnected ? "connected" : "disconnected"); + Serial.print("Mode: "); + Serial.println((g_mode == DeviceMode::StreamToServer) ? "StreamToServer" : "Idle"); } else if (c == 'h') { printHelp(); } else if (c != '\r' && c != '\n') { @@ -410,11 +401,8 @@ void setup() { g_ws.onEvent(onWsEventCallback); g_nextOutUs = micros(); - if (kDefaultStreamMode) { - setMode(DeviceMode::StreamToServer); - } else { - setMode(DeviceMode::LocalLoopback); - } + // Wait in idle until WS connect event switches to StreamToServer. + setMode(DeviceMode::Idle); Serial.println("Audio init ok"); Serial.println("Set local environment values in include/secrets.h"); printHelp(); @@ -422,10 +410,8 @@ void setup() { void loop() { handleSerialCommands(); - if (g_mode != DeviceMode::LocalLoopback) { - ensureConnectivity(); - g_ws.poll(); - } + ensureConnectivity(); + g_ws.poll(); serviceSpeaker(); size_t bytesRead = 0; @@ -444,10 +430,6 @@ void loop() { if (g_mode == DeviceMode::StreamToServer) { handleFrameForServer(pcm16, sampleCount); publishMicTelemetryIfDue(pcm16, sampleCount); - } else if (g_mode == DeviceMode::LocalLoopback) { - for (size_t i = 0; i < sampleCount; ++i) { - enqueuePcmSample(pcm16[i]); - } } else { // idle }