Retain transcript messages and add MQTT publish diagnostics
Some checks failed
Build and Push EVS Bridge Image / docker (push) Has been cancelled

This commit is contained in:
Kai
2026-02-13 18:23:36 +01:00
parent 9153e06ce5
commit e4170d9f42
6 changed files with 28 additions and 5 deletions

View File

@@ -38,6 +38,8 @@ VAD_MIN_SPEECH_MS=300
MQTT_VAD_TOPIC=evs/+/vad_segment MQTT_VAD_TOPIC=evs/+/vad_segment
MQTT_TRANSCRIPT_TOPIC_TEMPLATE=evs/{device_id}/transcript MQTT_TRANSCRIPT_TOPIC_TEMPLATE=evs/{device_id}/transcript
MQTT_STT_ERROR_TOPIC_TEMPLATE=evs/{device_id}/stt_error MQTT_STT_ERROR_TOPIC_TEMPLATE=evs/{device_id}/stt_error
STT_TRANSCRIPT_RETAIN=true
STT_ERROR_RETAIN=false
STT_MODEL=small STT_MODEL=small
STT_DEVICE=cpu STT_DEVICE=cpu
STT_COMPUTE_TYPE=int8 STT_COMPUTE_TYPE=int8

View File

@@ -108,6 +108,7 @@ You can build automations on these events (for STT/TTS pipelines or Node-RED han
- reads `wav_path` from event JSON - reads `wav_path` from event JSON
- transcribes with `faster-whisper` - transcribes with `faster-whisper`
- publishes transcript to `evs/<device_id>/transcript` - publishes transcript to `evs/<device_id>/transcript`
- `STT_TRANSCRIPT_RETAIN=true` keeps latest transcript visible in MQTT UIs
## 6.1) STT Worker Config ## 6.1) STT Worker Config

View File

@@ -24,6 +24,8 @@ services:
MQTT_VAD_TOPIC: "${MQTT_VAD_TOPIC:-evs/+/vad_segment}" MQTT_VAD_TOPIC: "${MQTT_VAD_TOPIC:-evs/+/vad_segment}"
MQTT_TRANSCRIPT_TOPIC_TEMPLATE: "${MQTT_TRANSCRIPT_TOPIC_TEMPLATE:-evs/{device_id}/transcript}" MQTT_TRANSCRIPT_TOPIC_TEMPLATE: "${MQTT_TRANSCRIPT_TOPIC_TEMPLATE:-evs/{device_id}/transcript}"
MQTT_STT_ERROR_TOPIC_TEMPLATE: "${MQTT_STT_ERROR_TOPIC_TEMPLATE:-evs/{device_id}/stt_error}" MQTT_STT_ERROR_TOPIC_TEMPLATE: "${MQTT_STT_ERROR_TOPIC_TEMPLATE:-evs/{device_id}/stt_error}"
STT_TRANSCRIPT_RETAIN: "${STT_TRANSCRIPT_RETAIN:-true}"
STT_ERROR_RETAIN: "${STT_ERROR_RETAIN:-false}"
STT_MODEL: "${STT_MODEL:-small}" STT_MODEL: "${STT_MODEL:-small}"
STT_DEVICE: "${STT_DEVICE:-cpu}" STT_DEVICE: "${STT_DEVICE:-cpu}"
STT_COMPUTE_TYPE: "${STT_COMPUTE_TYPE:-int8}" STT_COMPUTE_TYPE: "${STT_COMPUTE_TYPE:-int8}"

View File

@@ -16,6 +16,8 @@ ENV LOG_LEVEL=INFO \
MQTT_VAD_TOPIC=evs/+/vad_segment \ MQTT_VAD_TOPIC=evs/+/vad_segment \
MQTT_TRANSCRIPT_TOPIC_TEMPLATE=evs/{device_id}/transcript \ MQTT_TRANSCRIPT_TOPIC_TEMPLATE=evs/{device_id}/transcript \
MQTT_STT_ERROR_TOPIC_TEMPLATE=evs/{device_id}/stt_error \ MQTT_STT_ERROR_TOPIC_TEMPLATE=evs/{device_id}/stt_error \
STT_TRANSCRIPT_RETAIN=true \
STT_ERROR_RETAIN=false \
STT_MODEL=small \ STT_MODEL=small \
STT_DEVICE=cpu \ STT_DEVICE=cpu \
STT_COMPUTE_TYPE=int8 \ STT_COMPUTE_TYPE=int8 \

View File

@@ -7,6 +7,9 @@ Flow:
- reads: `wav_path` from JSON payload - reads: `wav_path` from JSON payload
- output topic: `evs/<device_id>/transcript` - output topic: `evs/<device_id>/transcript`
- error topic: `evs/<device_id>/stt_error` - error topic: `evs/<device_id>/stt_error`
- default retain:
- transcript retained (`STT_TRANSCRIPT_RETAIN=true`)
- error non-retained (`STT_ERROR_RETAIN=false`)
Default model: Default model:
- `STT_MODEL=small` - `STT_MODEL=small`

View File

@@ -35,6 +35,8 @@ MQTT_TRANSCRIPT_TOPIC_TEMPLATE = os.getenv(
MQTT_STT_ERROR_TOPIC_TEMPLATE = os.getenv( MQTT_STT_ERROR_TOPIC_TEMPLATE = os.getenv(
"MQTT_STT_ERROR_TOPIC_TEMPLATE", f"{MQTT_BASE_TOPIC}" + "/{device_id}/stt_error" "MQTT_STT_ERROR_TOPIC_TEMPLATE", f"{MQTT_BASE_TOPIC}" + "/{device_id}/stt_error"
) )
STT_TRANSCRIPT_RETAIN = getenv_bool("STT_TRANSCRIPT_RETAIN", True)
STT_ERROR_RETAIN = getenv_bool("STT_ERROR_RETAIN", False)
STT_MODEL = os.getenv("STT_MODEL", "small") STT_MODEL = os.getenv("STT_MODEL", "small")
STT_DEVICE = os.getenv("STT_DEVICE", "cpu") STT_DEVICE = os.getenv("STT_DEVICE", "cpu")
@@ -66,11 +68,22 @@ class WorkerState:
state = WorkerState() state = WorkerState()
def publish_json(topic: str, payload: dict) -> None: def publish_json(topic: str, payload: dict, retain: bool = False) -> None:
if not state.client: if not state.client:
return return
try: try:
state.client.publish(topic, json.dumps(payload), qos=0, retain=False) body = json.dumps(payload)
info = state.client.publish(topic, body, qos=0, retain=retain)
rc = getattr(info, "rc", "n/a")
mid = getattr(info, "mid", "n/a")
log.info(
"mqtt publish: topic=%s retain=%s rc=%s mid=%s bytes=%s",
topic,
retain,
rc,
mid,
len(body),
)
except Exception: except Exception:
log.exception("mqtt publish failed: topic=%s", topic) log.exception("mqtt publish failed: topic=%s", topic)
@@ -93,7 +106,7 @@ def transcribe_wav(device_id: str, wav_path: str) -> None:
"wav_path": wav_path, "wav_path": wav_path,
"error": "wav_not_found", "error": "wav_not_found",
} }
publish_json(topic_for_error(device_id), payload) publish_json(topic_for_error(device_id), payload, retain=STT_ERROR_RETAIN)
log.warning("wav not found: device=%s path=%s", device_id, wav_path) log.warning("wav not found: device=%s path=%s", device_id, wav_path)
return return
@@ -122,7 +135,7 @@ def transcribe_wav(device_id: str, wav_path: str) -> None:
"language_probability": getattr(info, "language_probability", 0.0), "language_probability": getattr(info, "language_probability", 0.0),
"model": STT_MODEL, "model": STT_MODEL,
} }
publish_json(topic_for_transcript(device_id), payload) publish_json(topic_for_transcript(device_id), payload, retain=STT_TRANSCRIPT_RETAIN)
log.info("transcript: device=%s chars=%s wav=%s", device_id, len(text), wav_path) log.info("transcript: device=%s chars=%s wav=%s", device_id, len(text), wav_path)
except Exception as exc: except Exception as exc:
payload = { payload = {
@@ -132,7 +145,7 @@ def transcribe_wav(device_id: str, wav_path: str) -> None:
"wav_path": wav_path, "wav_path": wav_path,
"error": str(exc), "error": str(exc),
} }
publish_json(topic_for_error(device_id), payload) publish_json(topic_for_error(device_id), payload, retain=STT_ERROR_RETAIN)
log.exception("transcription failed: device=%s wav=%s", device_id, wav_path) log.exception("transcription failed: device=%s wav=%s", device_id, wav_path)