Simplify client modes and add VAD retention policy
Some checks failed
Build and Push EVS Bridge Image / docker (push) Has been cancelled

This commit is contained in:
Kai
2026-02-13 17:09:54 +01:00
parent d4d4c7224b
commit 5f20b38088
5 changed files with 57 additions and 53 deletions

View File

@@ -26,7 +26,8 @@ WAV_KEEP_FILES=10
VAD_ENABLED=true VAD_ENABLED=true
VAD_DIR=/data/vad VAD_DIR=/data/vad
VAD_KEEP_FILES=100 VAD_KEEP_FILES=200
VAD_MAX_AGE_DAYS=7
VAD_PREROLL_MS=1000 VAD_PREROLL_MS=1000
VAD_POSTROLL_MS=1000 VAD_POSTROLL_MS=1000
VAD_START_THRESHOLD=900 VAD_START_THRESHOLD=900

View File

@@ -30,7 +30,8 @@ ENV WS_HOST=0.0.0.0 \
WAV_KEEP_FILES=10 \ WAV_KEEP_FILES=10 \
VAD_ENABLED=true \ VAD_ENABLED=true \
VAD_DIR=/data/vad \ VAD_DIR=/data/vad \
VAD_KEEP_FILES=100 \ VAD_KEEP_FILES=200 \
VAD_MAX_AGE_DAYS=7 \
VAD_PREROLL_MS=1000 \ VAD_PREROLL_MS=1000 \
VAD_POSTROLL_MS=1000 \ VAD_POSTROLL_MS=1000 \
VAD_START_THRESHOLD=900 \ VAD_START_THRESHOLD=900 \

View File

@@ -96,7 +96,8 @@ You can build automations on these events (for STT/TTS pipelines or Node-RED han
- `VAD_POSTROLL_MS=1000` keeps 1s after speech end - `VAD_POSTROLL_MS=1000` keeps 1s after speech end
- `VAD_START_THRESHOLD` / `VAD_STOP_THRESHOLD` tune sensitivity - `VAD_START_THRESHOLD` / `VAD_STOP_THRESHOLD` tune sensitivity
- `VAD_DIR` stores per-utterance WAV files - `VAD_DIR` stores per-utterance WAV files
- `VAD_KEEP_FILES` limits stored VAD WAV files - `VAD_KEEP_FILES=200` limits number of stored VAD WAV files
- `VAD_MAX_AGE_DAYS=7` deletes VAD WAV files older than 7 days
- MQTT is recommended for control/events, WebSocket for streaming audio - MQTT is recommended for control/events, WebSocket for streaming audio
## 7) Build and push to Gitea registry ## 7) Build and push to Gitea registry
@@ -148,7 +149,8 @@ services:
WAV_KEEP_FILES: "10" WAV_KEEP_FILES: "10"
VAD_ENABLED: "true" VAD_ENABLED: "true"
VAD_DIR: "/data/vad" VAD_DIR: "/data/vad"
VAD_KEEP_FILES: "100" VAD_KEEP_FILES: "200"
VAD_MAX_AGE_DAYS: "7"
VAD_PREROLL_MS: "1000" VAD_PREROLL_MS: "1000"
VAD_POSTROLL_MS: "1000" VAD_POSTROLL_MS: "1000"
VAD_START_THRESHOLD: "900" VAD_START_THRESHOLD: "900"

View File

@@ -53,7 +53,8 @@ WAV_HEADER_BYTES = 44
VAD_ENABLED = getenv_bool("VAD_ENABLED", True) VAD_ENABLED = getenv_bool("VAD_ENABLED", True)
VAD_DIR = Path(os.getenv("VAD_DIR", "/data/vad")) VAD_DIR = Path(os.getenv("VAD_DIR", "/data/vad"))
VAD_KEEP_FILES = int(os.getenv("VAD_KEEP_FILES", "100")) VAD_KEEP_FILES = int(os.getenv("VAD_KEEP_FILES", "200"))
VAD_MAX_AGE_DAYS = int(os.getenv("VAD_MAX_AGE_DAYS", "7"))
VAD_PREROLL_MS = int(os.getenv("VAD_PREROLL_MS", "1000")) VAD_PREROLL_MS = int(os.getenv("VAD_PREROLL_MS", "1000"))
VAD_POSTROLL_MS = int(os.getenv("VAD_POSTROLL_MS", "1000")) VAD_POSTROLL_MS = int(os.getenv("VAD_POSTROLL_MS", "1000"))
VAD_START_THRESHOLD = int(os.getenv("VAD_START_THRESHOLD", "900")) VAD_START_THRESHOLD = int(os.getenv("VAD_START_THRESHOLD", "900"))
@@ -136,17 +137,34 @@ async def call_ha_webhook(event: str, payload: dict) -> None:
log.exception("ha webhook call failed") log.exception("ha webhook call failed")
def enforce_wav_retention(directory: Path, keep_files: int) -> None: def enforce_wav_retention(directory: Path, keep_files: int, max_age_days: int = 0) -> None:
if keep_files <= 0: if keep_files <= 0 and max_age_days <= 0:
return return
try: try:
directory.mkdir(parents=True, exist_ok=True) directory.mkdir(parents=True, exist_ok=True)
wavs = sorted( wavs = []
[p for p in directory.glob("*.wav") if p.is_file()], now = time.time()
key=lambda p: p.stat().st_mtime, max_age_seconds = max_age_days * 86400
) for p in directory.glob("*.wav"):
while len(wavs) > keep_files: if not p.is_file():
oldest = wavs.pop(0) continue
try:
st = p.stat()
except Exception:
continue
if max_age_seconds > 0 and (now - st.st_mtime) > max_age_seconds:
try:
p.unlink()
log.info("deleted old wav by age: %s", p)
except Exception:
log.exception("failed to delete old wav by age: %s", p)
continue
wavs.append((p, st.st_mtime))
wavs.sort(key=lambda x: x[1])
files = [p for (p, _) in wavs]
while keep_files > 0 and len(files) > keep_files:
oldest = files.pop(0)
try: try:
oldest.unlink() oldest.unlink()
log.info("deleted old wav: %s", oldest) log.info("deleted old wav: %s", oldest)
@@ -198,7 +216,7 @@ def write_vad_wav_segment(session: DeviceSession, pcm: bytes) -> Optional[str]:
wf.writeframes(pcm) wf.writeframes(pcm)
wf.close() wf.close()
session.vad_segment_index += 1 session.vad_segment_index += 1
enforce_wav_retention(VAD_DIR, VAD_KEEP_FILES) enforce_wav_retention(VAD_DIR, VAD_KEEP_FILES, VAD_MAX_AGE_DAYS)
return str(path) return str(path)
except Exception: except Exception:
log.exception("failed to write vad wav segment for %s", session.device_id) log.exception("failed to write vad wav segment for %s", session.device_id)

View File

@@ -6,7 +6,6 @@
#include "secrets.h" #include "secrets.h"
using namespace websockets; using namespace websockets;
static constexpr bool kDefaultStreamMode = EVS_DEFAULT_STREAM_MODE;
static constexpr bool kSerialCommandEcho = EVS_SERIAL_COMMAND_ECHO; static constexpr bool kSerialCommandEcho = EVS_SERIAL_COMMAND_ECHO;
static constexpr bool kMicUseRightChannel = EVS_MIC_USE_RIGHT_CHANNEL; static constexpr bool kMicUseRightChannel = EVS_MIC_USE_RIGHT_CHANNEL;
static constexpr int kMicS24ToS16Shift = EVS_MIC_S24_TO_S16_SHIFT; static constexpr int kMicS24ToS16Shift = EVS_MIC_S24_TO_S16_SHIFT;
@@ -38,8 +37,7 @@ static constexpr float PI_F = 3.14159265358979323846f;
enum class DeviceMode : uint8_t { enum class DeviceMode : uint8_t {
Idle, Idle,
StreamToServer, // Placeholder: ship PCM to remote STT/LLM/TTS service StreamToServer, // Ship PCM to remote STT/LLM/TTS service
LocalLoopback, // Debug mode: mic directly to speaker
}; };
static DeviceMode g_mode = DeviceMode::Idle; static DeviceMode g_mode = DeviceMode::Idle;
@@ -57,6 +55,8 @@ static size_t g_rxHead = 0;
static size_t g_rxTail = 0; static size_t g_rxTail = 0;
static size_t g_rxCount = 0; static size_t g_rxCount = 0;
static void setMode(DeviceMode mode);
static bool initMicI2s() { static bool initMicI2s() {
const i2s_config_t i2sConfig = { const i2s_config_t i2sConfig = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
@@ -207,18 +207,18 @@ static void onWsMessageCallback(WebsocketsMessage message) {
static void onWsEventCallback(WebsocketsEvent event, String) { static void onWsEventCallback(WebsocketsEvent event, String) {
if (event == WebsocketsEvent::ConnectionOpened) { if (event == WebsocketsEvent::ConnectionOpened) {
g_wsConnected = true; g_wsConnected = true;
if (g_mode == DeviceMode::StreamToServer && !g_streamingActive) { // Connection-driven mode: always stream on connect.
g_ws.send("{\"type\":\"start\"}"); setMode(DeviceMode::StreamToServer);
g_streamingActive = true;
playStartTone();
}
Serial.println("WS connected"); Serial.println("WS connected");
} else if (event == WebsocketsEvent::ConnectionClosed) { } else if (event == WebsocketsEvent::ConnectionClosed) {
if (g_streamingActive) { const bool wasStreaming = g_streamingActive;
playStopTone();
}
g_wsConnected = false; g_wsConnected = false;
g_streamingActive = false; g_streamingActive = false;
// Connection-driven mode: always idle on disconnect.
setMode(DeviceMode::Idle);
if (wasStreaming) {
playStopTone();
}
Serial.println("WS disconnected"); Serial.println("WS disconnected");
} }
} }
@@ -352,13 +352,11 @@ static void serviceSpeaker() {
static void printHelp() { static void printHelp() {
Serial.println(); Serial.println();
Serial.println("Commands:"); Serial.println("Commands:");
Serial.println(" i = idle");
Serial.println(" s = stream mode");
Serial.println(" l = local loopback mode");
Serial.println(" p = print network status"); Serial.println(" p = print network status");
Serial.println(" h = help"); Serial.println(" h = help");
Serial.print("Default on boot: "); Serial.println("Connection policy:");
Serial.println(kDefaultStreamMode ? "StreamToServer" : "LocalLoopback"); Serial.println(" connect -> StreamToServer (start)");
Serial.println(" disconnect -> Idle");
} }
static void handleSerialCommands() { static void handleSerialCommands() {
@@ -368,22 +366,15 @@ static void handleSerialCommands() {
Serial.print("RX cmd: "); Serial.print("RX cmd: ");
Serial.println(c); Serial.println(c);
} }
if (c == 'i') { if (c == 'p') {
setMode(DeviceMode::Idle);
Serial.println("Mode -> Idle");
} else if (c == 's') {
setMode(DeviceMode::StreamToServer);
Serial.println("Mode -> StreamToServer");
} else if (c == 'l') {
setMode(DeviceMode::LocalLoopback);
Serial.println("Mode -> LocalLoopback");
} else if (c == 'p') {
Serial.print("WiFi: "); Serial.print("WiFi: ");
Serial.print((WiFi.status() == WL_CONNECTED) ? "connected " : "disconnected "); Serial.print((WiFi.status() == WL_CONNECTED) ? "connected " : "disconnected ");
Serial.print("IP="); Serial.print("IP=");
Serial.println(WiFi.localIP()); Serial.println(WiFi.localIP());
Serial.print("WS: "); Serial.print("WS: ");
Serial.println(g_wsConnected ? "connected" : "disconnected"); Serial.println(g_wsConnected ? "connected" : "disconnected");
Serial.print("Mode: ");
Serial.println((g_mode == DeviceMode::StreamToServer) ? "StreamToServer" : "Idle");
} else if (c == 'h') { } else if (c == 'h') {
printHelp(); printHelp();
} else if (c != '\r' && c != '\n') { } else if (c != '\r' && c != '\n') {
@@ -410,11 +401,8 @@ void setup() {
g_ws.onEvent(onWsEventCallback); g_ws.onEvent(onWsEventCallback);
g_nextOutUs = micros(); g_nextOutUs = micros();
if (kDefaultStreamMode) { // Wait in idle until WS connect event switches to StreamToServer.
setMode(DeviceMode::StreamToServer); setMode(DeviceMode::Idle);
} else {
setMode(DeviceMode::LocalLoopback);
}
Serial.println("Audio init ok"); Serial.println("Audio init ok");
Serial.println("Set local environment values in include/secrets.h"); Serial.println("Set local environment values in include/secrets.h");
printHelp(); printHelp();
@@ -422,10 +410,8 @@ void setup() {
void loop() { void loop() {
handleSerialCommands(); handleSerialCommands();
if (g_mode != DeviceMode::LocalLoopback) { ensureConnectivity();
ensureConnectivity(); g_ws.poll();
g_ws.poll();
}
serviceSpeaker(); serviceSpeaker();
size_t bytesRead = 0; size_t bytesRead = 0;
@@ -444,10 +430,6 @@ void loop() {
if (g_mode == DeviceMode::StreamToServer) { if (g_mode == DeviceMode::StreamToServer) {
handleFrameForServer(pcm16, sampleCount); handleFrameForServer(pcm16, sampleCount);
publishMicTelemetryIfDue(pcm16, sampleCount); publishMicTelemetryIfDue(pcm16, sampleCount);
} else if (g_mode == DeviceMode::LocalLoopback) {
for (size_t i = 0; i < sampleCount; ++i) {
enqueuePcmSample(pcm16[i]);
}
} else { } else {
// idle // idle
} }