From 04c59c3b258b70b800dec18ba082b5670732210a Mon Sep 17 00:00:00 2001 From: Kai Date: Sun, 15 Feb 2026 13:16:41 +0100 Subject: [PATCH] Add EVS control portal, io_mode switching, and DAC-only speaker path --- README.md | 111 +++--- bridge/README.md | 7 + bridge/app.py | 25 +- control-portal/Dockerfile | 16 + control-portal/README.md | 59 +++ control-portal/app.py | 60 +++ control-portal/requirements.txt | 2 + control-portal/templates/index.html | 214 +++++++++++ include/secrets.example.h | 17 + mumble-bridge/Dockerfile | 2 +- mumble-bridge/README.md | 30 ++ mumble-bridge/app.py | 241 +++++++++++- src/main.cpp | 562 ++++++++++++++++++++++++++-- 13 files changed, 1257 insertions(+), 89 deletions(-) create mode 100644 control-portal/Dockerfile create mode 100644 control-portal/README.md create mode 100644 control-portal/app.py create mode 100644 control-portal/requirements.txt create mode 100644 control-portal/templates/index.html diff --git a/README.md b/README.md index 972ddff..ae74169 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,89 @@ # EVS - Embedded Voice System -ESP32-basierter Voice-Client mit: -- `INMP441` als Mikrofon (I2S) -- `PAM8403` als Verstärker (PWM-Audio vom ESP32) -- WebSocket-Audio-Streaming zur zentralen Bridge -- optionaler Home-Assistant/MQTT-Integration +ESP32-based voice client with: +- `INMP441` microphone (I2S) +- `PAM8403` amplifier (analog input from ESP32 DAC) +- WebSocket audio streaming to central bridge +- MQTT remote control +- optional UDP microphone stream (remote switchable) -## Ziel +## Project Structure -Dieses Projekt bildet die Basis für eine lokale "Echo-Alternative": -- Audioaufnahme am ESP32 -- Verarbeitung zentral (z. B. Home Assistant / Container) -- Audioantwort zurück an den ESP32-Lautsprecher +- `src/main.cpp`: Firmware (audio, WiFi, WebSocket, MQTT commands, UDP stream) +- `include/secrets.h`: local environment values (not versioned) +- `include/secrets.example.h`: template for `secrets.h` +- `bridge/`: EVS bridge (WebSocket + MQTT + VAD + STT integration) +- `stt-worker/`: MQTT-based STT worker (`vad_segment` -> `transcript`) -## Projektstruktur +## Firmware Behavior -- `src/main.cpp`: Firmware (Audio, WiFi, WebSocket, Modi) -- `include/secrets.h`: lokale Umgebungswerte (nicht versioniert) -- `include/secrets.example.h`: Vorlage für `secrets.h` -- `bridge/`: Dockerisierte EVS-Bridge (WebSocket + MQTT + HA-Hooks) +- On WebSocket connect: mode switches automatically to `StreamToServer` +- On WebSocket disconnect: mode switches automatically to `Idle` +- Serial commands (`115200 baud`): + - `p` = print network and mode status + - `h` = help -## Firmware-Modi +## MQTT Remote Control -Serielle Kommandos (`115200 Baud`): -- `l` = Local Loopback (Mic direkt auf Speaker) -- `s` = Stream to Server (Audio zur Bridge) -- `i` = Idle -- `p` = Netzwerkstatus -- `h` = Hilfe +Topics: +- command: `evs//command` +- status: `evs//status` -## Setup (ESP32) +Example commands: +- `{"cmd":"status"}` +- `{"cmd":"mode","value":"idle"}` +- `{"cmd":"mode","value":"stream"}` +- `{"cmd":"io_mode","value":"mic"}` +- `{"cmd":"io_mode","value":"spk"}` +- `{"cmd":"udp_stream","enabled":true,"target_host":"10.100.3.247","target_port":5004}` +- `{"cmd":"udp_stream","enabled":false}` -1. `include/secrets.example.h` nach `include/secrets.h` kopieren. -2. In `include/secrets.h` setzen: -- WiFi-Zugang +Status payload includes: +- current mode +- current `io_mode` (`mic` or `spk`) +- WS/MQTT connectivity +- UDP stream state and target (only relevant in `mic` mode) + +## ESP32 Setup + +1. Copy `include/secrets.example.h` to `include/secrets.h`. +2. Set values in `include/secrets.h`: +- WiFi credentials - `EVS_BRIDGE_HOST` - `EVS_WS_PORT`, `EVS_WS_PATH` - `EVS_DEVICE_ID` -3. Firmware bauen/flashen: +- role config: + - `EVS_ENABLE_MIC` + - `EVS_ENABLE_SPEAKER` + - `EVS_DEFAULT_IO_MODE` (`"mic"` or `"spk"`) + - `EVS_SPK_DAC_PIN` (`25` or `26`) +- optional MQTT overrides: + - `EVS_MQTT_HOST`, `EVS_MQTT_PORT` + - `EVS_MQTT_USER`, `EVS_MQTT_PASSWORD` + - `EVS_MQTT_BASE_TOPIC` +3. Build/flash: ```bash pio run -e esp32dev_core2 -t upload pio device monitor -b 115200 ``` -## Setup (Bridge) - -1. In `bridge/` wechseln. -2. `.env.example` nach `.env` kopieren und Werte setzen. -3. Bridge starten: -```bash -docker compose up -d --build -``` - -Standard-WebSocket-Endpunkt: -- `ws://:8765/audio` - -## Audioformat +## Audio Format - PCM16LE -- Mono +- mono - 16 kHz -## Hinweise +## Notes -- `include/secrets.h` ist absichtlich in `.gitignore`. -- Für den Core-3-Test ist in `platformio.ini` eine eigene Environment vorbereitet. -- Details zur Bridge findest du in `bridge/README.md`. +- `include/secrets.h` is intentionally ignored in git. +- Speaker path is DAC-only (PWM removed): + - `GPIO25` (DAC1) or `GPIO26` (DAC2) -> amplifier input (`L+` or `R+`) + - common `GND` between ESP32 and amplifier is mandatory +- INMP441 wiring (default in firmware): + - `WS` -> `GPIO25` + - `SCK` -> `GPIO26` + - `SD` -> `GPIO33` + - `VDD` -> `3V3` + - `GND` -> `GND` +- For newer Arduino core testing, `esp32dev_core3` is available in `platformio.ini`. +- Bridge and STT worker details are in `bridge/README.md` and `stt-worker/README.md`. diff --git a/bridge/README.md b/bridge/README.md index 6b39027..1d74c2d 100644 --- a/bridge/README.md +++ b/bridge/README.md @@ -9,6 +9,7 @@ It provides: - Optional Home Assistant webhook callbacks (`connected`, `start`, `stop`, `disconnected`) - VAD auto-segmentation (`vad_segment`) with pre-roll/post-roll - Optional STT worker (`vad_segment` -> `transcript`) via MQTT +- Optional 1:1 device pairing (`mic_device -> speaker_device`) for echo routing ## 1) Start the bridge @@ -47,6 +48,11 @@ In `include/secrets.h`: - set bridge host - set WS port/path - set unique `EVS_DEVICE_ID` +- set runtime IO mode: + - `EVS_DEFAULT_IO_MODE "mic"` for microphone device + - `EVS_DEFAULT_IO_MODE "spk"` for speaker device +- set DAC output pin on speaker device: + - `EVS_SPK_DAC_PIN 25` or `26` Then upload firmware. @@ -160,6 +166,7 @@ services: MQTT_BASE_TOPIC: "evs" MQTT_TTS_TOPIC: "evs/+/play_pcm16le" MQTT_STATUS_RETAIN: "true" + DEVICE_PAIR_MAP: '{"esp32-evs-1-mic":"esp32-evs-1-spk"}' HA_WEBHOOK_URL: "" SAVE_SESSIONS: "true" SESSIONS_DIR: "/data/sessions" diff --git a/bridge/app.py b/bridge/app.py index 67fdf6d..564d413 100644 --- a/bridge/app.py +++ b/bridge/app.py @@ -41,6 +41,7 @@ MQTT_PASSWORD = os.getenv("MQTT_PASSWORD", "") MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "evs") MQTT_TTS_TOPIC = os.getenv("MQTT_TTS_TOPIC", f"{MQTT_BASE_TOPIC}/+/play_pcm16le") MQTT_STATUS_RETAIN = getenv_bool("MQTT_STATUS_RETAIN", True) +DEVICE_PAIR_MAP_JSON = os.getenv("DEVICE_PAIR_MAP", "").strip() HA_WEBHOOK_URL = os.getenv("HA_WEBHOOK_URL", "").strip() SAVE_SESSIONS = getenv_bool("SAVE_SESSIONS", True) @@ -125,6 +126,21 @@ class BridgeState: state = BridgeState() +DEVICE_PAIR_MAP: Dict[str, str] = {} +if DEVICE_PAIR_MAP_JSON: + try: + raw = json.loads(DEVICE_PAIR_MAP_JSON) + if isinstance(raw, dict): + DEVICE_PAIR_MAP = {str(k): str(v) for k, v in raw.items() if str(k) and str(v)} + log.info("device pair map loaded: %s", DEVICE_PAIR_MAP) + else: + log.warning("DEVICE_PAIR_MAP must be a JSON object") + except Exception: + log.exception("failed to parse DEVICE_PAIR_MAP") + + +def paired_output_device(device_id: str) -> str: + return DEVICE_PAIR_MAP.get(device_id, device_id) def build_metrics(device_id: str, session: DeviceSession) -> dict: @@ -408,14 +424,16 @@ async def handle_text_message(device_id: str, session: DeviceSession, raw: str) "peak": msg.get("peak", 0), "avg_abs": msg.get("avg_abs", 0), "samples": msg.get("samples", 0), + "mic_gain": msg.get("mic_gain", 0), } state.publish_status(device_id, payload) log.info( - "mic_level: device=%s peak=%s avg_abs=%s samples=%s", + "mic_level: device=%s peak=%s avg_abs=%s samples=%s mic_gain=%s", device_id, payload["peak"], payload["avg_abs"], payload["samples"], + payload["mic_gain"], ) return @@ -437,7 +455,10 @@ async def handle_binary_message(device_id: str, session: DeviceSession, data: by drop = len(session.pcm_bytes) - MAX_SESSION_BYTES del session.pcm_bytes[:drop] if ECHO_ENABLED: - await session.ws.send(data) + target_device = paired_output_device(device_id) + ok = await state.send_binary_to_device(target_device, data) + if not ok and target_device != device_id: + log.debug("paired output device not connected: src=%s target=%s", device_id, target_device) def parse_device_id(path: str) -> str: diff --git a/control-portal/Dockerfile b/control-portal/Dockerfile new file mode 100644 index 0000000..b05b657 --- /dev/null +++ b/control-portal/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app.py . +COPY templates ./templates + +ENV PORTAL_BIND_HOST=0.0.0.0 +ENV PORTAL_BIND_PORT=8088 + +EXPOSE 8088 + +CMD ["python", "app.py"] diff --git a/control-portal/README.md b/control-portal/README.md new file mode 100644 index 0000000..0023c0b --- /dev/null +++ b/control-portal/README.md @@ -0,0 +1,59 @@ +# EVS Control Portal + +Web UI to publish EVS MQTT command payloads without manually crafting JSON. + +## Features + +- Device picker (`evs//command`) +- Buttons/forms for: + - `status` + - `mode` (`idle` / `stream`) + - `io_mode` (`mic` / `spk`) + - `udp_stream` start/stop + - `mic_gain` set/up/down +- Raw JSON publish for advanced commands + +## Environment + +- `MQTT_HOST` (default `127.0.0.1`) +- `MQTT_PORT` (default `1883`) +- `MQTT_USER` (optional) +- `MQTT_PASSWORD` (optional) +- `MQTT_BASE_TOPIC` (default `evs`) +- `PORTAL_BIND_HOST` (default `0.0.0.0`) +- `PORTAL_BIND_PORT` (default `8088`) + +## Build + Run + +```bash +docker build -f control-portal/Dockerfile -t evs-control-portal:latest control-portal +docker run --rm -p 8088:8088 \ + -e MQTT_HOST=10.100.3.247 \ + -e MQTT_PORT=1883 \ + -e MQTT_BASE_TOPIC=evs \ + evs-control-portal:latest +``` + +Then open: + +`http://:8088` + +## Portainer Service Example + +```yaml +services: + evs-control-portal: + image: git.khnm-zimmerling.de/kai/evs-control-portal:latest + container_name: evs-control-portal + restart: unless-stopped + ports: + - "8088:8088" + environment: + MQTT_HOST: "10.100.3.247" + MQTT_PORT: "1883" + MQTT_USER: "" + MQTT_PASSWORD: "" + MQTT_BASE_TOPIC: "evs" + PORTAL_BIND_HOST: "0.0.0.0" + PORTAL_BIND_PORT: "8088" +``` diff --git a/control-portal/app.py b/control-portal/app.py new file mode 100644 index 0000000..e46ffd0 --- /dev/null +++ b/control-portal/app.py @@ -0,0 +1,60 @@ +import json +import os +from typing import Any, Dict + +from flask import Flask, jsonify, render_template, request +import paho.mqtt.client as mqtt + + +MQTT_HOST = os.getenv("MQTT_HOST", "127.0.0.1") +MQTT_PORT = int(os.getenv("MQTT_PORT", "1883")) +MQTT_USER = os.getenv("MQTT_USER", "") +MQTT_PASSWORD = os.getenv("MQTT_PASSWORD", "") +MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "evs") +PORTAL_BIND_HOST = os.getenv("PORTAL_BIND_HOST", "0.0.0.0") +PORTAL_BIND_PORT = int(os.getenv("PORTAL_BIND_PORT", "8088")) + + +app = Flask(__name__) +mqtt_client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="evs-control-portal") +if MQTT_USER: + mqtt_client.username_pw_set(MQTT_USER, MQTT_PASSWORD) +mqtt_client.connect(MQTT_HOST, MQTT_PORT, keepalive=30) +mqtt_client.loop_start() + + +def _topic_for_device(device_id: str) -> str: + return f"{MQTT_BASE_TOPIC}/{device_id}/command" + + +def _publish(device_id: str, payload: Dict[str, Any]) -> Dict[str, Any]: + topic = _topic_for_device(device_id) + encoded = json.dumps(payload, separators=(",", ":")) + info = mqtt_client.publish(topic, encoded, qos=0, retain=False) + return { + "ok": info.rc == mqtt.MQTT_ERR_SUCCESS, + "topic": topic, + "payload": payload, + "mqtt_rc": info.rc, + } + + +@app.get("/") +def index(): + return render_template("index.html", mqtt_host=MQTT_HOST, mqtt_port=MQTT_PORT, base_topic=MQTT_BASE_TOPIC) + + +@app.post("/api/publish") +def api_publish(): + body = request.get_json(force=True, silent=True) or {} + device_id = str(body.get("device_id", "")).strip() + payload = body.get("payload") + if not device_id: + return jsonify({"ok": False, "error": "device_id is required"}), 400 + if not isinstance(payload, dict): + return jsonify({"ok": False, "error": "payload must be a JSON object"}), 400 + return jsonify(_publish(device_id, payload)) + + +if __name__ == "__main__": + app.run(host=PORTAL_BIND_HOST, port=PORTAL_BIND_PORT, debug=False) diff --git a/control-portal/requirements.txt b/control-portal/requirements.txt new file mode 100644 index 0000000..b9865a8 --- /dev/null +++ b/control-portal/requirements.txt @@ -0,0 +1,2 @@ +flask==3.0.3 +paho-mqtt==2.1.0 diff --git a/control-portal/templates/index.html b/control-portal/templates/index.html new file mode 100644 index 0000000..dbc0685 --- /dev/null +++ b/control-portal/templates/index.html @@ -0,0 +1,214 @@ + + + + + + EVS Control Portal + + + +
+
+

EVS Control Portal

+
MQTT: {{ mqtt_host }}:{{ mqtt_port }} | Base Topic: {{ base_topic }}/<device_id>/command
+ + +
+ +
+
+

Status / Mode

+ +
+ + +
+
+ +
+

IO Mode

+
+ + +
+
+ +
+

UDP Stream

+ + + + +
+ + +
+
+ +
+

Mic Gain

+ + + +
+ + +
+
+ +
+

Raw JSON

+ + + +
+
+ +
+
+ + + + diff --git a/include/secrets.example.h b/include/secrets.example.h index f129e1a..da23c32 100644 --- a/include/secrets.example.h +++ b/include/secrets.example.h @@ -11,6 +11,11 @@ static const char* WIFI_PASSWORD = "REPLACE_WIFI_PASSWORD"; static const char* EVS_BRIDGE_HOST = "REPLACE_BRIDGE_IP_OR_HOST"; static constexpr uint16_t EVS_WS_PORT = 8765; static const char* EVS_WS_PATH = "/audio"; +static const char* EVS_MQTT_HOST = "REPLACE_MQTT_IP_OR_HOST"; +static constexpr uint16_t EVS_MQTT_PORT = 1883; +static const char* EVS_MQTT_USER = ""; +static const char* EVS_MQTT_PASSWORD = ""; +static const char* EVS_MQTT_BASE_TOPIC = "evs"; // Unique device name per ESP32 static const char* EVS_DEVICE_ID = "esp32-room-name"; @@ -20,6 +25,18 @@ static constexpr uint32_t EVS_RECONNECT_MS = 5000; static constexpr bool EVS_DEFAULT_STREAM_MODE = true; static constexpr bool EVS_SERIAL_COMMAND_ECHO = true; +// Device role flags: +// - mic-only client: EVS_ENABLE_MIC=1, EVS_ENABLE_SPEAKER=0 +// - speaker-only client: EVS_ENABLE_MIC=0, EVS_ENABLE_SPEAKER=1 +// - combined client: EVS_ENABLE_MIC=1, EVS_ENABLE_SPEAKER=1 +#define EVS_ENABLE_MIC 1 +#define EVS_ENABLE_SPEAKER 1 +// Runtime IO mode at boot ("mic" or "spk"), can be changed later via MQTT command. +#define EVS_DEFAULT_IO_MODE "mic" +// Speaker output uses ESP32 internal DAC only. +// Valid pins: GPIO25 or GPIO26. +#define EVS_SPK_DAC_PIN 25 + // INMP441 tuning // L/R pin on mic: // - GND usually = left channel diff --git a/mumble-bridge/Dockerfile b/mumble-bridge/Dockerfile index bebd24e..ae36f04 100644 --- a/mumble-bridge/Dockerfile +++ b/mumble-bridge/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11-slim WORKDIR /app RUN apt-get update \ - && apt-get install -y --no-install-recommends libopus0 \ + && apt-get install -y --no-install-recommends libopus0 openssl \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . diff --git a/mumble-bridge/README.md b/mumble-bridge/README.md index 034cdb0..b64512d 100644 --- a/mumble-bridge/README.md +++ b/mumble-bridge/README.md @@ -15,6 +15,14 @@ Deploy one container per EVS client so every device appears as its own Mumble us - `MUMBLE_USERNAME`: username for this EVS client (example: `EVS-esp32-evs-1`) - `MUMBLE_PASSWORD`: optional - `UDP_LISTEN_PORT`: UDP port this client will stream to +- `MUMBLE_CERTFILE`: optional path to client certificate file for registration/auth +- `MUMBLE_KEYFILE`: optional path to private key file (if not bundled in cert) +- `MUMBLE_AUTO_CERT`: auto-generate self-signed cert/key when missing (default `true`) +- `MUMBLE_CERT_DIR`: target dir for auto-generated certs (default `/data/certs`) +- `MUMBLE_CERT_DAYS`: validity days for auto-generated cert (default `3650`) +- `MUMBLE_CERT_SUBJECT`: optional openssl subject, default `/CN=` +- `MUMBLE_CERT_AUTO_RENEW`: renew cert automatically at startup when close to expiry (default `false`) +- `MUMBLE_CERT_RENEW_BEFORE_DAYS`: renewal threshold in days (default `30`) ## Optional ENV @@ -30,6 +38,13 @@ Deploy one container per EVS client so every device appears as its own Mumble us - `MUMBLE_SAMPLE_RATE`: default `48000` - `MUMBLE_FRAME_MS`: default `20` - `MUMBLE_AUDIO_GAIN`: default `1.0` +- `BRIDGE_STATS_INTERVAL_SEC`: periodic runtime stats in logs, default `5` (`0` disables) +- `VAD_ENABLED`: enable speech gating before sending to Mumble (`true|false`, default `false`) +- `VAD_RMS_THRESHOLD`: RMS gate threshold on incoming PCM16 (default `700`) +- `VAD_OPEN_FRAMES`: consecutive voice frames to open gate (default `2`) +- `VAD_CLOSE_FRAMES`: consecutive silence frames to close gate (default `20`) +- `HPF_ENABLED`: enable high-pass/DC blocker to reduce low-frequency rumble (default `true`) +- `HPF_CUTOFF_HZ`: high-pass cutoff frequency (default `120.0`) ## Example docker compose service @@ -50,5 +65,20 @@ services: MUMBLE_PORT: "64738" MUMBLE_USERNAME: "EVS-esp32-evs-1" MUMBLE_PASSWORD: "" + MUMBLE_CERTFILE: "/run/secrets/mumble_client.crt" + MUMBLE_KEYFILE: "/run/secrets/mumble_client.key" + MUMBLE_AUTO_CERT: "false" MUMBLE_CHANNEL: "Bots" ``` + +For automatic self-signed cert generation when files are missing: + +```yaml + MUMBLE_AUTO_CERT: "true" + MUMBLE_CERT_DIR: "/data/certs" + MUMBLE_CERT_DAYS: "3650" + MUMBLE_CERT_AUTO_RENEW: "true" + MUMBLE_CERT_RENEW_BEFORE_DAYS: "30" + # optional: + # MUMBLE_CERT_SUBJECT: "/CN=EVS-esp32-evs-1" +``` diff --git a/mumble-bridge/app.py b/mumble-bridge/app.py index 7807a3c..7eeae0d 100644 --- a/mumble-bridge/app.py +++ b/mumble-bridge/app.py @@ -1,7 +1,9 @@ import audioop import logging +import math import os import socket +import subprocess import time from typing import Optional, Tuple @@ -35,12 +37,27 @@ MUMBLE_HOST = os.getenv("MUMBLE_HOST", "") MUMBLE_PORT = int(os.getenv("MUMBLE_PORT", "64738")) MUMBLE_USERNAME = os.getenv("MUMBLE_USERNAME", f"EVS-{DEVICE_ID}") MUMBLE_PASSWORD = os.getenv("MUMBLE_PASSWORD", "") +MUMBLE_CERTFILE = os.getenv("MUMBLE_CERTFILE", "").strip() +MUMBLE_KEYFILE = os.getenv("MUMBLE_KEYFILE", "").strip() +MUMBLE_AUTO_CERT = getenv_bool("MUMBLE_AUTO_CERT", True) +MUMBLE_CERT_DIR = os.getenv("MUMBLE_CERT_DIR", "/data/certs").strip() +MUMBLE_CERT_DAYS = int(os.getenv("MUMBLE_CERT_DAYS", "3650")) +MUMBLE_CERT_SUBJECT = os.getenv("MUMBLE_CERT_SUBJECT", "").strip() +MUMBLE_CERT_AUTO_RENEW = getenv_bool("MUMBLE_CERT_AUTO_RENEW", False) +MUMBLE_CERT_RENEW_BEFORE_DAYS = int(os.getenv("MUMBLE_CERT_RENEW_BEFORE_DAYS", "30")) MUMBLE_CHANNEL = os.getenv("MUMBLE_CHANNEL", "").strip() MUMBLE_CHANNEL_ID = int(os.getenv("MUMBLE_CHANNEL_ID", "0")) MUMBLE_RECONNECT_SEC = int(os.getenv("MUMBLE_RECONNECT_SEC", "5")) MUMBLE_VERBOSE = getenv_bool("MUMBLE_VERBOSE", False) MUMBLE_CONNECT_TIMEOUT_SEC = int(os.getenv("MUMBLE_CONNECT_TIMEOUT_SEC", "30")) MUMBLE_CONNECT_STRICT = getenv_bool("MUMBLE_CONNECT_STRICT", False) +BRIDGE_STATS_INTERVAL_SEC = int(os.getenv("BRIDGE_STATS_INTERVAL_SEC", "5")) +VAD_ENABLED = getenv_bool("VAD_ENABLED", False) +VAD_RMS_THRESHOLD = int(os.getenv("VAD_RMS_THRESHOLD", "700")) +VAD_OPEN_FRAMES = int(os.getenv("VAD_OPEN_FRAMES", "2")) +VAD_CLOSE_FRAMES = int(os.getenv("VAD_CLOSE_FRAMES", "20")) +HPF_ENABLED = getenv_bool("HPF_ENABLED", True) +HPF_CUTOFF_HZ = float(os.getenv("HPF_CUTOFF_HZ", "120.0")) def _channel_name(ch) -> str: @@ -179,18 +196,26 @@ def connect_mumble() -> pymumble.Mumble: raise RuntimeError("MUMBLE_HOST is required") log.info( - "connecting mumble: host=%s port=%s user=%s channel=%s channel_id=%s", + "connecting mumble: host=%s port=%s user=%s channel=%s channel_id=%s cert=%s key=%s", MUMBLE_HOST, MUMBLE_PORT, MUMBLE_USERNAME, MUMBLE_CHANNEL or "", MUMBLE_CHANNEL_ID, + MUMBLE_CERTFILE or "", + MUMBLE_KEYFILE or "", ) + + certfile, keyfile = resolve_cert_paths() + ensure_cert_material(certfile, keyfile) + mumble = pymumble.Mumble( MUMBLE_HOST, MUMBLE_USERNAME, password=MUMBLE_PASSWORD or None, port=MUMBLE_PORT, + certfile=certfile, + keyfile=keyfile, reconnect=True, debug=MUMBLE_VERBOSE, ) @@ -266,6 +291,101 @@ def connect_mumble() -> pymumble.Mumble: return mumble +def resolve_cert_paths() -> Tuple[Optional[str], Optional[str]]: + certfile = MUMBLE_CERTFILE or None + keyfile = MUMBLE_KEYFILE or None + + if certfile is None and MUMBLE_AUTO_CERT: + certfile = os.path.join(MUMBLE_CERT_DIR, f"{DEVICE_ID}.crt") + if keyfile is None and MUMBLE_AUTO_CERT: + keyfile = os.path.join(MUMBLE_CERT_DIR, f"{DEVICE_ID}.key") + + return certfile, keyfile + + +def ensure_cert_material(certfile: Optional[str], keyfile: Optional[str]) -> None: + if certfile is None and keyfile is None: + return + + cert_exists = bool(certfile and os.path.exists(certfile)) + key_exists = bool(keyfile and os.path.exists(keyfile)) + + should_renew = False + if cert_exists and key_exists and MUMBLE_CERT_AUTO_RENEW: + should_renew = cert_needs_renewal(certfile) + if should_renew: + log.warning( + "cert renewal required: cert=%s renew_before_days=%d", + certfile, + MUMBLE_CERT_RENEW_BEFORE_DAYS, + ) + + if cert_exists and key_exists and not should_renew: + return + + if not MUMBLE_AUTO_CERT: + if certfile and not cert_exists: + raise RuntimeError(f"MUMBLE_CERTFILE not found: {certfile}") + if keyfile and not key_exists: + raise RuntimeError(f"MUMBLE_KEYFILE not found: {keyfile}") + return + + if not certfile or not keyfile: + raise RuntimeError("auto cert generation requires both certfile and keyfile paths") + + os.makedirs(os.path.dirname(certfile), exist_ok=True) + os.makedirs(os.path.dirname(keyfile), exist_ok=True) + + subject = MUMBLE_CERT_SUBJECT or f"/CN={MUMBLE_USERNAME}" + cmd = [ + "openssl", + "req", + "-x509", + "-newkey", + "rsa:2048", + "-nodes", + "-keyout", + keyfile, + "-out", + certfile, + "-days", + str(MUMBLE_CERT_DAYS), + "-subj", + subject, + ] + log.info("creating self-signed mumble cert: cert=%s key=%s subject=%s", certfile, keyfile, subject) + try: + subprocess.run(cmd, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as exc: + raise RuntimeError( + f"openssl cert generation failed (rc={exc.returncode}): {exc.stderr.strip()}" + ) from exc + + +def cert_needs_renewal(certfile: str) -> bool: + if MUMBLE_CERT_RENEW_BEFORE_DAYS <= 0: + return False + check_seconds = MUMBLE_CERT_RENEW_BEFORE_DAYS * 86400 + cmd = [ + "openssl", + "x509", + "-in", + certfile, + "-checkend", + str(check_seconds), + "-noout", + ] + try: + # openssl x509 -checkend returns: + # 0 -> certificate valid longer than check_seconds + # 1 -> certificate will expire within check_seconds + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + return result.returncode != 0 + except Exception: + log.exception("failed to check cert expiration, forcing renewal") + return True + + def open_udp_socket() -> socket.socket: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.bind((UDP_LISTEN_HOST, UDP_LISTEN_PORT)) @@ -274,7 +394,43 @@ def open_udp_socket() -> socket.socket: return sock -def process_audio_chunk(raw_pcm: bytes, rate_state: Optional[Tuple]) -> Tuple[bytes, Optional[Tuple]]: +def apply_highpass_pcm16(data: bytes, prev_x: float, prev_y: float, sample_rate: int) -> Tuple[bytes, float, float]: + if len(data) < 2 or sample_rate <= 0: + return data, prev_x, prev_y + if HPF_CUTOFF_HZ <= 0.0: + return data, prev_x, prev_y + + # 1st-order DC-block / high-pass: + # y[n] = alpha * (y[n-1] + x[n] - x[n-1]) + rc = 1.0 / (2.0 * math.pi * HPF_CUTOFF_HZ) + dt = 1.0 / float(sample_rate) + alpha = rc / (rc + dt) + + samples = memoryview(data).cast("h") + out = bytearray(len(data)) + out_mv = memoryview(out).cast("h") + + x_prev = prev_x + y_prev = prev_y + for i, x in enumerate(samples): + y = alpha * (y_prev + float(x) - x_prev) + if y > 32767.0: + y = 32767.0 + elif y < -32768.0: + y = -32768.0 + out_mv[i] = int(y) + x_prev = float(x) + y_prev = y + + return bytes(out), x_prev, y_prev + + +def process_audio_chunk( + raw_pcm: bytes, + rate_state: Optional[Tuple], + hp_prev_x: float, + hp_prev_y: float, +) -> Tuple[bytes, Optional[Tuple], float, float]: data = raw_pcm if INPUT_SAMPLE_RATE != MUMBLE_SAMPLE_RATE: data, rate_state = audioop.ratecv( @@ -287,47 +443,120 @@ def process_audio_chunk(raw_pcm: bytes, rate_state: Optional[Tuple]) -> Tuple[by ) if MUMBLE_AUDIO_GAIN != 1.0: data = audioop.mul(data, 2, MUMBLE_AUDIO_GAIN) - return data, rate_state + if HPF_ENABLED: + data, hp_prev_x, hp_prev_y = apply_highpass_pcm16(data, hp_prev_x, hp_prev_y, MUMBLE_SAMPLE_RATE) + return data, rate_state, hp_prev_x, hp_prev_y def run() -> None: udp = open_udp_socket() mumble = None rate_state = None + hp_prev_x = 0.0 + hp_prev_y = 0.0 out_buffer = bytearray() frame_bytes = int((MUMBLE_SAMPLE_RATE * 2 * FRAME_MS) / 1000) # mono, 16-bit + udp_packets = 0 + udp_bytes = 0 + frames_sent = 0 + send_errors = 0 + vad_dropped_packets = 0 + vad_open = not VAD_ENABLED + vad_voice_frames = 0 + vad_silence_frames = 0 + stats_t0 = time.time() + last_udp_from = None while True: if mumble is None: try: mumble = connect_mumble() log.info("mumble ready") + # Reset ratecv state when reconnecting so timing is clean. + rate_state = None + hp_prev_x = 0.0 + hp_prev_y = 0.0 except Exception: log.exception("mumble connect failed, retrying in %ss", MUMBLE_RECONNECT_SEC) time.sleep(MUMBLE_RECONNECT_SEC) continue try: - packet, _addr = udp.recvfrom(8192) + packet, addr = udp.recvfrom(8192) except socket.timeout: # No UDP data right now; keep loop alive. - continue + packet = None except Exception: log.exception("udp receive failed") continue + now = time.time() + if BRIDGE_STATS_INTERVAL_SEC > 0 and (now - stats_t0) >= BRIDGE_STATS_INTERVAL_SEC: + dt = max(0.001, now - stats_t0) + log.info( + "bridge stats: udp_packets=%d udp_bytes=%d udp_kbps=%.1f frames_sent=%d send_errors=%d buffer_bytes=%d vad_open=%s vad_dropped=%d last_from=%s", + udp_packets, + udp_bytes, + (udp_bytes * 8.0 / 1000.0) / dt, + frames_sent, + send_errors, + len(out_buffer), + vad_open, + vad_dropped_packets, + last_udp_from or "-", + ) + udp_packets = 0 + udp_bytes = 0 + frames_sent = 0 + send_errors = 0 + vad_dropped_packets = 0 + stats_t0 = now + if not packet: continue try: - processed, rate_state = process_audio_chunk(packet, rate_state) + udp_packets += 1 + udp_bytes += len(packet) + if last_udp_from != str(addr): + last_udp_from = str(addr) + log.info("udp source: %s", last_udp_from) + + if VAD_ENABLED: + rms = audioop.rms(packet, 2) + if rms >= VAD_RMS_THRESHOLD: + vad_voice_frames += 1 + vad_silence_frames = 0 + if not vad_open and vad_voice_frames >= VAD_OPEN_FRAMES: + vad_open = True + log.info("vad open: rms=%d threshold=%d", rms, VAD_RMS_THRESHOLD) + else: + vad_silence_frames += 1 + vad_voice_frames = 0 + if vad_open and vad_silence_frames >= VAD_CLOSE_FRAMES: + vad_open = False + out_buffer.clear() + log.info("vad close: rms=%d threshold=%d", rms, VAD_RMS_THRESHOLD) + + if not vad_open: + vad_dropped_packets += 1 + continue + + processed, rate_state, hp_prev_x, hp_prev_y = process_audio_chunk( + packet, + rate_state, + hp_prev_x, + hp_prev_y, + ) out_buffer.extend(processed) while len(out_buffer) >= frame_bytes: frame = bytes(out_buffer[:frame_bytes]) del out_buffer[:frame_bytes] mumble.sound_output.add_sound(frame) + frames_sent += 1 except Exception: + send_errors += 1 log.exception("audio processing/send failed") mumble = None time.sleep(MUMBLE_RECONNECT_SEC) diff --git a/src/main.cpp b/src/main.cpp index 021431c..88c258b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,15 +1,36 @@ #include #include +#include #include +#include #include #include "driver/i2s.h" #include "secrets.h" using namespace websockets; +#ifndef EVS_ENABLE_MIC +#define EVS_ENABLE_MIC 1 +#endif + +#ifndef EVS_ENABLE_SPEAKER +#define EVS_ENABLE_SPEAKER 1 +#endif + +#ifndef EVS_DEFAULT_IO_MODE +#define EVS_DEFAULT_IO_MODE "mic" +#endif + +#ifndef EVS_SPK_DAC_PIN +#define EVS_SPK_DAC_PIN 25 +#endif + static constexpr bool kSerialCommandEcho = EVS_SERIAL_COMMAND_ECHO; static constexpr bool kMicUseRightChannel = EVS_MIC_USE_RIGHT_CHANNEL; static constexpr int kMicS24ToS16Shift = EVS_MIC_S24_TO_S16_SHIFT; -static constexpr float kMicGain = EVS_MIC_GAIN; +static constexpr bool kMicHardwareAvailable = (EVS_ENABLE_MIC != 0); +static constexpr bool kSpeakerHardwareAvailable = (EVS_ENABLE_SPEAKER != 0); +static constexpr float MIC_GAIN_MIN = 0.1f; +static constexpr float MIC_GAIN_MAX = 8.0f; // --------------------------- // Project config @@ -24,14 +45,14 @@ static constexpr int PIN_I2S_WS = 25; static constexpr int PIN_I2S_SCK = 26; static constexpr int PIN_I2S_SD = 33; -// ESP32 PWM -> PAM8403 IN (L+ or R+) -static constexpr int PIN_AUDIO_OUT = 27; -static constexpr int PWM_CHANNEL = 0; -static constexpr uint32_t PWM_FREQ = 22050; -static constexpr uint8_t PWM_RES_BITS = 8; +// ESP32 DAC -> amplifier IN (L+ or R+) static constexpr uint32_t SPEAKER_SAMPLE_RATE = 16000; static constexpr uint32_t MIC_TELEMETRY_INTERVAL_MS = 1000; static constexpr float PI_F = 3.14159265358979323846f; +static constexpr uint32_t MQTT_RECONNECT_MS = 5000; +static constexpr uint16_t MQTT_BUFFER_SIZE = 512; +static constexpr uint16_t UDP_MAX_PACKET_BYTES = MIC_FRAME_SAMPLES * sizeof(int16_t); +static constexpr uint32_t UDP_STATUS_INTERVAL_MS = 2000; // WiFi / WebSocket @@ -40,7 +61,13 @@ enum class DeviceMode : uint8_t { StreamToServer, // Ship PCM to remote STT/LLM/TTS service }; +enum class IoMode : uint8_t { + Mic, + Speaker, +}; + static DeviceMode g_mode = DeviceMode::Idle; +static IoMode g_ioMode = IoMode::Mic; static int32_t g_micBuffer[MIC_FRAME_SAMPLES]; static WebsocketsClient g_ws; static bool g_wsConnected = false; @@ -48,6 +75,22 @@ static uint32_t g_lastConnectTryMs = 0; static uint32_t g_nextOutUs = 0; static bool g_streamingActive = false; static uint32_t g_lastMicTelemetryMs = 0; +static WiFiClient g_mqttNet; +static PubSubClient g_mqtt(g_mqttNet); +static bool g_mqttConnected = false; +static uint32_t g_lastMqttConnectTryMs = 0; +static WiFiUDP g_udp; +static bool g_udpEnabled = false; +static String g_udpHost; +static uint16_t g_udpPort = 0; +static IPAddress g_udpIp; +static bool g_udpIpValid = false; +static uint32_t g_udpPacketsSent = 0; +static float g_micGain = EVS_MIC_GAIN; +static String g_mqttCmdTopic; +static String g_mqttStatusTopic; +static String g_mqttUdpStatusTopic; +static uint32_t g_lastUdpStatusMs = 0; static constexpr size_t RX_SAMPLES_CAP = 16000; static int16_t g_rxSamples[RX_SAMPLES_CAP]; @@ -56,8 +99,22 @@ static size_t g_rxTail = 0; static size_t g_rxCount = 0; static void setMode(DeviceMode mode); +static void setIoMode(IoMode mode); +static void publishClientStatus(); +static void publishUdpStatus(bool force = false); +static void handleUdpCommand(const String& msg); +static bool parseJsonStringField(const String& msg, const char* key, String& out); +static bool parseJsonIntField(const String& msg, const char* key, int& out); +static bool parseJsonBoolField(const String& msg, const char* key, bool& out); +static bool parseJsonFloatField(const String& msg, const char* key, float& out); +static bool isMicModeActive() { return kMicHardwareAvailable && g_ioMode == IoMode::Mic; } +static bool isSpeakerModeActive() { return kSpeakerHardwareAvailable && g_ioMode == IoMode::Speaker; } +static bool isDacPinValid(int pin) { return pin == 25 || pin == 26; } static bool initMicI2s() { + if (!kMicHardwareAvailable) { + return true; + } const i2s_config_t i2sConfig = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), .sample_rate = MIC_SAMPLE_RATE, @@ -88,26 +145,25 @@ static bool initMicI2s() { return true; } -static bool initPwmOut() { -#if ESP_ARDUINO_VERSION_MAJOR >= 3 - return ledcAttach(PIN_AUDIO_OUT, PWM_FREQ, PWM_RES_BITS); -#else - ledcSetup(PWM_CHANNEL, PWM_FREQ, PWM_RES_BITS); - ledcAttachPin(PIN_AUDIO_OUT, PWM_CHANNEL); - return true; -#endif +static bool initSpeakerOut() { + if (!kSpeakerHardwareAvailable) { + return true; + } + return isDacPinValid(EVS_SPK_DAC_PIN); } -static inline void pwmWrite(uint8_t value) { -#if ESP_ARDUINO_VERSION_MAJOR >= 3 - ledcWrite(PIN_AUDIO_OUT, value); -#else - ledcWrite(PWM_CHANNEL, value); -#endif +static inline void speakerWriteU8(uint8_t value) { + if (!kSpeakerHardwareAvailable) { + return; + } + if (!isDacPinValid(EVS_SPK_DAC_PIN)) { + return; + } + dacWrite(EVS_SPK_DAC_PIN, value); } -// Convert signed 16-bit PCM to unsigned 8-bit PWM domain. -static inline uint8_t pcm16ToPwm8(int16_t s) { +// Convert signed 16-bit PCM to unsigned 8-bit DAC domain. +static inline uint8_t pcm16ToU8(int16_t s) { return (uint16_t)(s + 32768) >> 8; } @@ -132,6 +188,9 @@ static bool dequeuePcmSample(int16_t& out) { } static void enqueuePcmFrame(const int16_t* frame, size_t count) { + if (!kSpeakerHardwareAvailable) { + return; + } for (size_t i = 0; i < count; ++i) { if (!enqueuePcmSample(frame[i])) { break; @@ -140,6 +199,9 @@ static void enqueuePcmFrame(const int16_t* frame, size_t count) { } static void enqueueTone(uint16_t freqHz, uint16_t durationMs, int16_t amplitude) { + if (!kSpeakerHardwareAvailable) { + return; + } if (freqHz == 0 || durationMs == 0) { return; } @@ -160,6 +222,9 @@ static void enqueueTone(uint16_t freqHz, uint16_t durationMs, int16_t amplitude) } static void enqueueSilenceMs(uint16_t durationMs) { + if (!kSpeakerHardwareAvailable) { + return; + } const uint32_t sampleCount = (uint32_t)SPEAKER_SAMPLE_RATE * durationMs / 1000U; for (uint32_t i = 0; i < sampleCount; ++i) { if (!enqueuePcmSample(0)) { @@ -183,6 +248,9 @@ static void playStopTone() { } static void onWsMessageCallback(WebsocketsMessage message) { + if (!isSpeakerModeActive()) { + return; + } if (!message.isBinary()) { return; } @@ -207,9 +275,14 @@ static void onWsMessageCallback(WebsocketsMessage message) { static void onWsEventCallback(WebsocketsEvent event, String) { if (event == WebsocketsEvent::ConnectionOpened) { g_wsConnected = true; - // Connection-driven mode: always stream on connect. - setMode(DeviceMode::StreamToServer); + // Mic-mode clients stream immediately on connect. + if (isMicModeActive()) { + setMode(DeviceMode::StreamToServer); + } else { + setMode(DeviceMode::Idle); + } Serial.println("WS connected"); + publishClientStatus(); } else if (event == WebsocketsEvent::ConnectionClosed) { const bool wasStreaming = g_streamingActive; g_wsConnected = false; @@ -220,6 +293,7 @@ static void onWsEventCallback(WebsocketsEvent event, String) { playStopTone(); } Serial.println("WS disconnected"); + publishClientStatus(); } } @@ -256,20 +330,382 @@ static void ensureConnectivity() { } } +static bool resolveUdpHost() { + if (g_udpHost.length() == 0 || g_udpPort == 0) { + g_udpIpValid = false; + return false; + } + // Fast path for numeric IPv4 literals to avoid DNS dependency. + if (g_udpIp.fromString(g_udpHost)) { + g_udpIpValid = true; + return true; + } + if (WiFi.hostByName(g_udpHost.c_str(), g_udpIp)) { + g_udpIpValid = true; + return true; + } + g_udpIpValid = false; + return false; +} + +static String mqttClientId() { + String id = "evs-client-"; + id += EVS_DEVICE_ID; + return id; +} + +static void mqttMessageCallback(char* topic, uint8_t* payload, unsigned int length) { + String msg; + msg.reserve(length); + for (unsigned int i = 0; i < length; ++i) msg += (char)payload[i]; + Serial.print("MQTT cmd ["); + Serial.print(topic); + Serial.print("]: "); + Serial.println(msg); + + String cmd; + if (!parseJsonStringField(msg, "cmd", cmd)) { + return; + } + cmd.toLowerCase(); + if (cmd == "status") { + publishClientStatus(); + return; + } + if (cmd == "mode") { + String value; + if (parseJsonStringField(msg, "value", value)) { + value.toLowerCase(); + if (value == "idle") { + setMode(DeviceMode::Idle); + } else if (value == "stream" || value == "streamtoserver") { + setMode(DeviceMode::StreamToServer); + } + publishClientStatus(); + } + return; + } + if (cmd == "io_mode") { + String value; + if (parseJsonStringField(msg, "value", value)) { + value.toLowerCase(); + if (value == "mic") { + setIoMode(IoMode::Mic); + } else if (value == "spk" || value == "speaker") { + setIoMode(IoMode::Speaker); + } + publishClientStatus(); + publishUdpStatus(true); + } + return; + } + if (cmd == "udp_stream") { + if (!isMicModeActive()) { + g_udpEnabled = false; + g_udpIpValid = false; + Serial.println("UDP stream ignored: io_mode is not mic"); + publishUdpStatus(true); + publishClientStatus(); + return; + } + handleUdpCommand(msg); + publishClientStatus(); + return; + } + if (cmd == "mic_gain") { + if (!isMicModeActive()) { + Serial.println("MIC gain command ignored: io_mode is not mic"); + return; + } + float value = 0.0f; + bool changed = false; + if (parseJsonFloatField(msg, "value", value)) { + g_micGain = value; + changed = true; + } + + float delta = 0.0f; + if (parseJsonFloatField(msg, "delta", delta)) { + g_micGain += delta; + changed = true; + } + + String action; + if (parseJsonStringField(msg, "action", action)) { + action.toLowerCase(); + float step = 0.1f; + parseJsonFloatField(msg, "step", step); + if (step < 0.0f) step = -step; + if (action == "up" || action == "inc" || action == "increase") { + g_micGain += step; + changed = true; + } else if (action == "down" || action == "dec" || action == "decrease") { + g_micGain -= step; + changed = true; + } + } + + if (g_micGain < MIC_GAIN_MIN) g_micGain = MIC_GAIN_MIN; + if (g_micGain > MIC_GAIN_MAX) g_micGain = MIC_GAIN_MAX; + if (changed) { + Serial.print("MIC gain set to "); + Serial.println(g_micGain, 3); + } + publishClientStatus(); + return; + } +} + +static void ensureMqttConnectivity() { + if (WiFi.status() != WL_CONNECTED) return; + if (g_mqttConnected && g_mqtt.connected()) return; + + const uint32_t now = millis(); + if ((now - g_lastMqttConnectTryMs) < MQTT_RECONNECT_MS) return; + g_lastMqttConnectTryMs = now; + + g_mqtt.setServer(EVS_MQTT_HOST, EVS_MQTT_PORT); + g_mqtt.setCallback(mqttMessageCallback); + + String clientId = mqttClientId(); + bool ok = false; + if (strlen(EVS_MQTT_USER) > 0) { + ok = g_mqtt.connect(clientId.c_str(), EVS_MQTT_USER, EVS_MQTT_PASSWORD); + } else { + ok = g_mqtt.connect(clientId.c_str()); + } + g_mqttConnected = ok; + if (!ok) { + Serial.print("MQTT connect failed, state="); + Serial.println(g_mqtt.state()); + return; + } + Serial.println("MQTT connected"); + g_mqtt.subscribe(g_mqttCmdTopic.c_str()); + publishClientStatus(); +} + static void setMode(DeviceMode mode) { if (g_mode == mode) { return; } - if (g_mode == DeviceMode::StreamToServer && g_wsConnected && g_streamingActive) { + if (isMicModeActive() && g_mode == DeviceMode::StreamToServer && g_wsConnected && g_streamingActive) { g_ws.send("{\"type\":\"stop\"}"); g_streamingActive = false; - playStopTone(); + if (isSpeakerModeActive()) playStopTone(); } g_mode = mode; - if (g_mode == DeviceMode::StreamToServer && g_wsConnected && !g_streamingActive) { + if (isMicModeActive() && g_mode == DeviceMode::StreamToServer && g_wsConnected && !g_streamingActive) { g_ws.send("{\"type\":\"start\"}"); g_streamingActive = true; - playStartTone(); + if (isSpeakerModeActive()) playStartTone(); + } +} + +static void setIoMode(IoMode mode) { + if (mode == g_ioMode) { + return; + } + if (mode == IoMode::Mic && !kMicHardwareAvailable) { + Serial.println("io_mode mic rejected: mic hardware disabled"); + return; + } + if (mode == IoMode::Speaker && !kSpeakerHardwareAvailable) { + Serial.println("io_mode spk rejected: speaker hardware disabled"); + return; + } + + if (g_streamingActive && g_wsConnected) { + g_ws.send("{\"type\":\"stop\"}"); + g_streamingActive = false; + } + g_udpEnabled = false; + g_udpIpValid = false; + g_udpPacketsSent = 0; + + g_ioMode = mode; + g_rxHead = g_rxTail = g_rxCount = 0; + + if (isMicModeActive() && g_wsConnected) { + setMode(DeviceMode::StreamToServer); + } else { + setMode(DeviceMode::Idle); + } + + Serial.print("io_mode set to "); + Serial.println(g_ioMode == IoMode::Mic ? "mic" : "spk"); +} + +static bool parseJsonStringField(const String& msg, const char* key, String& out) { + String pattern = "\""; + pattern += key; + pattern += "\""; + int k = msg.indexOf(pattern); + if (k < 0) return false; + int colon = msg.indexOf(':', k + pattern.length()); + if (colon < 0) return false; + int q1 = msg.indexOf('"', colon + 1); + if (q1 < 0) return false; + int q2 = msg.indexOf('"', q1 + 1); + if (q2 < 0) return false; + out = msg.substring(q1 + 1, q2); + return true; +} + +static bool parseJsonIntField(const String& msg, const char* key, int& out) { + String pattern = "\""; + pattern += key; + pattern += "\""; + int k = msg.indexOf(pattern); + if (k < 0) return false; + int colon = msg.indexOf(':', k + pattern.length()); + if (colon < 0) return false; + int start = colon + 1; + while (start < (int)msg.length() && (msg[start] == ' ' || msg[start] == '\t')) start++; + int end = start; + while (end < (int)msg.length() && isDigit(msg[end])) end++; + if (end <= start) return false; + out = msg.substring(start, end).toInt(); + return true; +} + +static bool parseJsonBoolField(const String& msg, const char* key, bool& out) { + String pattern = "\""; + pattern += key; + pattern += "\""; + int k = msg.indexOf(pattern); + if (k < 0) return false; + int colon = msg.indexOf(':', k + pattern.length()); + if (colon < 0) return false; + int start = colon + 1; + while (start < (int)msg.length() && (msg[start] == ' ' || msg[start] == '\t')) start++; + if (msg.startsWith("true", start)) { + out = true; + return true; + } + if (msg.startsWith("false", start)) { + out = false; + return true; + } + return false; +} + +static bool parseJsonFloatField(const String& msg, const char* key, float& out) { + String pattern = "\""; + pattern += key; + pattern += "\""; + int k = msg.indexOf(pattern); + if (k < 0) return false; + int colon = msg.indexOf(':', k + pattern.length()); + if (colon < 0) return false; + int start = colon + 1; + while (start < (int)msg.length() && (msg[start] == ' ' || msg[start] == '\t')) start++; + int end = start; + while (end < (int)msg.length()) { + const char c = msg[end]; + if ((c >= '0' && c <= '9') || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E') { + end++; + } else { + break; + } + } + if (end <= start) return false; + out = msg.substring(start, end).toFloat(); + return true; +} + +static void handleUdpCommand(const String& msg) { + bool enabled = g_udpEnabled; + const bool hasEnabled = parseJsonBoolField(msg, "enabled", enabled); + + String host; + const bool hasHost = parseJsonStringField(msg, "target_host", host); + if (hasHost) { + g_udpHost = host; + } + int p = 0; + const bool hasPort = parseJsonIntField(msg, "target_port", p); + if (hasPort && p > 0 && p <= 65535) { + g_udpPort = (uint16_t)p; + } + + Serial.print("UDP cmd parsed enabled="); + Serial.print(hasEnabled ? (enabled ? "true" : "false") : ""); + Serial.print(" host="); + Serial.print(hasHost ? host : ""); + Serial.print(" port="); + Serial.println(hasPort ? String(p) : ""); + + if (enabled) { + if (!resolveUdpHost()) { + Serial.println("UDP target resolve failed"); + g_udpEnabled = false; + return; + } + g_udpEnabled = true; + Serial.print("UDP target resolved: "); + Serial.print(g_udpIp); + Serial.print(":"); + Serial.println(g_udpPort); + } else { + g_udpEnabled = false; + Serial.println("UDP stream disabled"); + } + publishUdpStatus(true); +} + +static void publishClientStatus() { + if (!g_mqttConnected || !g_mqtt.connected()) return; + String msg = "{\"type\":\"client_status\",\"device_id\":\""; + msg += EVS_DEVICE_ID; + msg += "\",\"mic_enabled\":"; + msg += isMicModeActive() ? "true" : "false"; + msg += ",\"speaker_enabled\":"; + msg += isSpeakerModeActive() ? "true" : "false"; + msg += ",\"io_mode\":\""; + msg += (g_ioMode == IoMode::Mic) ? "mic" : "spk"; + msg += "\",\"mode\":\""; + msg += (g_mode == DeviceMode::StreamToServer) ? "stream" : "idle"; + msg += "\",\"ws_connected\":"; + msg += g_wsConnected ? "true" : "false"; + msg += ",\"mqtt_connected\":"; + msg += (g_mqttConnected && g_mqtt.connected()) ? "true" : "false"; + msg += "}"; + const bool ok = g_mqtt.publish(g_mqttStatusTopic.c_str(), msg.c_str(), true); + if (!ok) { + Serial.print("MQTT status publish failed, len="); + Serial.print(msg.length()); + Serial.print(" state="); + Serial.println(g_mqtt.state()); + } +} + +static void publishUdpStatus(bool force) { + if (!g_mqttConnected || !g_mqtt.connected()) return; + const uint32_t now = millis(); + if (!force && (now - g_lastUdpStatusMs) < UDP_STATUS_INTERVAL_MS) return; + g_lastUdpStatusMs = now; + + String msg = "{\"type\":\"udp_status\",\"device_id\":\""; + msg += EVS_DEVICE_ID; + msg += "\",\"enabled\":"; + msg += g_udpEnabled ? "true" : "false"; + msg += ",\"target_resolved\":"; + msg += g_udpIpValid ? "true" : "false"; + msg += ",\"target_host\":\""; + msg += g_udpHost; + msg += "\",\"target_port\":"; + msg += String(g_udpPort); + msg += ",\"packets_sent\":"; + msg += String(g_udpPacketsSent); + msg += "}"; + + const bool ok = g_mqtt.publish(g_mqttUdpStatusTopic.c_str(), msg.c_str(), true); + if (!ok) { + Serial.print("MQTT udp status publish failed, len="); + Serial.print(msg.length()); + Serial.print(" state="); + Serial.println(g_mqtt.state()); } } @@ -282,6 +718,9 @@ static void handleFrameForServer(const int16_t* frame, size_t count) { } static void publishMicTelemetryIfDue(const int16_t* frame, size_t count) { + if (!isMicModeActive()) { + return; + } if (count == 0) { return; } @@ -306,6 +745,8 @@ static void publishMicTelemetryIfDue(const int16_t* frame, size_t count) { Serial.print(peak); Serial.print(" avg_abs="); Serial.print(avg_abs); + Serial.print(" gain="); + Serial.print(g_micGain, 3); Serial.print(" ws="); Serial.println(g_wsConnected ? "connected" : "disconnected"); @@ -316,22 +757,30 @@ static void publishMicTelemetryIfDue(const int16_t* frame, size_t count) { msg += String(avg_abs); msg += ",\"samples\":"; msg += String((unsigned)count); + msg += ",\"mic_gain\":"; + msg += String(g_micGain, 3); msg += "}"; g_ws.send(msg); } } static inline int16_t convertMicSampleToPcm16(int32_t raw32) { + if (!isMicModeActive()) { + return 0; + } // INMP441 uses 24-bit signed samples packed into 32-bit I2S slots. int32_t s24 = raw32 >> 8; int32_t s16 = s24 >> kMicS24ToS16Shift; - float scaled = (float)s16 * kMicGain; + float scaled = (float)s16 * g_micGain; if (scaled > 32767.0f) scaled = 32767.0f; if (scaled < -32768.0f) scaled = -32768.0f; return (int16_t)scaled; } static void serviceSpeaker() { + if (!isSpeakerModeActive()) { + return; + } const uint32_t periodUs = 1000000UL / SPEAKER_SAMPLE_RATE; uint32_t now = micros(); int processed = 0; @@ -340,9 +789,9 @@ static void serviceSpeaker() { g_nextOutUs += periodUs; int16_t s = 0; if (dequeuePcmSample(s)) { - pwmWrite(pcm16ToPwm8(s)); + speakerWriteU8(pcm16ToU8(s)); } else { - pwmWrite(128); + speakerWriteU8(128); } now = micros(); ++processed; @@ -354,6 +803,8 @@ static void printHelp() { Serial.println("Commands:"); Serial.println(" p = print network status"); Serial.println(" h = help"); + Serial.println("MQTT cmd:"); + Serial.println(" {\"cmd\":\"io_mode\",\"value\":\"mic|spk\"}"); Serial.println("Connection policy:"); Serial.println(" connect -> StreamToServer (start)"); Serial.println(" disconnect -> Idle"); @@ -392,18 +843,38 @@ void setup() { Serial.println("ERROR: I2S init failed"); while (true) delay(1000); } - if (!initPwmOut()) { - Serial.println("ERROR: PWM init failed"); + if (!initSpeakerOut()) { + Serial.println("ERROR: speaker DAC init failed (EVS_SPK_DAC_PIN must be 25 or 26)"); while (true) delay(1000); } g_ws.onMessage(onWsMessageCallback); g_ws.onEvent(onWsEventCallback); + g_mqtt.setBufferSize(MQTT_BUFFER_SIZE); + g_mqttCmdTopic = String(EVS_MQTT_BASE_TOPIC) + "/" + EVS_DEVICE_ID + "/command"; + g_mqttStatusTopic = String(EVS_MQTT_BASE_TOPIC) + "/" + EVS_DEVICE_ID + "/status"; + g_mqttUdpStatusTopic = String(EVS_MQTT_BASE_TOPIC) + "/" + EVS_DEVICE_ID + "/udp_status"; g_nextOutUs = micros(); + String defaultMode = EVS_DEFAULT_IO_MODE; + defaultMode.toLowerCase(); + if (defaultMode == "spk" || defaultMode == "speaker") { + g_ioMode = IoMode::Speaker; + } else { + g_ioMode = IoMode::Mic; + } + // Wait in idle until WS connect event switches to StreamToServer. setMode(DeviceMode::Idle); Serial.println("Audio init ok"); + Serial.print("Hardware: mic="); + Serial.print(kMicHardwareAvailable ? "yes" : "no"); + Serial.print(" speaker="); + Serial.println(kSpeakerHardwareAvailable ? "yes" : "no"); + Serial.print("Active io_mode: "); + Serial.println(g_ioMode == IoMode::Mic ? "mic" : "spk"); + Serial.print("Speaker DAC pin: "); + Serial.println(EVS_SPK_DAC_PIN); Serial.println("Set local environment values in include/secrets.h"); printHelp(); } @@ -411,9 +882,22 @@ void setup() { void loop() { handleSerialCommands(); ensureConnectivity(); + ensureMqttConnectivity(); g_ws.poll(); + if (g_mqtt.connected()) { + g_mqtt.loop(); + g_mqttConnected = true; + } else { + g_mqttConnected = false; + } + publishUdpStatus(false); serviceSpeaker(); + if (!isMicModeActive()) { + delay(5); + return; + } + size_t bytesRead = 0; const esp_err_t res = i2s_read( MIC_I2S_PORT, g_micBuffer, sizeof(g_micBuffer), &bytesRead, 0); @@ -430,7 +914,15 @@ void loop() { if (g_mode == DeviceMode::StreamToServer) { handleFrameForServer(pcm16, sampleCount); publishMicTelemetryIfDue(pcm16, sampleCount); - } else { - // idle + } + + if (g_udpEnabled && g_udpIpValid && g_udpPort > 0) { + const size_t nBytes = sampleCount * sizeof(int16_t); + if (nBytes <= UDP_MAX_PACKET_BYTES) { + g_udp.beginPacket(g_udpIp, g_udpPort); + g_udp.write(reinterpret_cast(pcm16), nBytes); + g_udp.endPacket(); + ++g_udpPacketsSent; + } } }