From 30863b96f75b0d50549a3ef2dc6953d077a16241 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Tue, 30 Jun 2026 19:39:00 +0100 Subject: [PATCH 01/10] feat(conversation_kit): add product-agnostic language utilities for voice agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New leaf subpackage signalwire.conversation_kit — the deterministic pieces a voice agent needs to understand input, compute values, and speak output correctly, none tied to any product: - dates: compute_date (spoken day -> ISO calendar math), WEEKDAYS, and RESOLVE_DATE_PARAMS (a resolve_date tool's JSON-schema fragment). - inputs: validate_input + is_valid_email/phone/number, and input_request_payload for the typed-input (on-screen keypad) channel. - verbalizer: TTS-ready per-language output (number/unit/date/email/spell/ measure_text) plus prompt guidance(), behind a small plugin registry. English and Polish ship; get(lang) falls back to English. Zero dependencies. 20 unit tests under tests/unit/conversation_kit. --- .../signalwire/conversation_kit/README.md | 120 ++++++ .../signalwire/conversation_kit/__init__.py | 54 +++ .../signalwire/conversation_kit/dates.py | 121 ++++++ .../signalwire/conversation_kit/inputs.py | 81 ++++ .../conversation_kit/verbalizer/__init__.py | 39 ++ .../conversation_kit/verbalizer/base.py | 165 ++++++++ .../verbalizer/languages/__init__.py | 9 + .../verbalizer/languages/en.py | 30 ++ .../verbalizer/languages/pl.py | 367 ++++++++++++++++++ .../conversation_kit/verbalizer/registry.py | 42 ++ tests/unit/conversation_kit/test_dates.py | 56 +++ tests/unit/conversation_kit/test_inputs.py | 57 +++ .../conversation_kit/test_verbalizer_pl.py | 160 ++++++++ 13 files changed, 1301 insertions(+) create mode 100644 signalwire/signalwire/conversation_kit/README.md create mode 100644 signalwire/signalwire/conversation_kit/__init__.py create mode 100644 signalwire/signalwire/conversation_kit/dates.py create mode 100644 signalwire/signalwire/conversation_kit/inputs.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/__init__.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/base.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/languages/__init__.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/languages/en.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py create mode 100644 signalwire/signalwire/conversation_kit/verbalizer/registry.py create mode 100644 tests/unit/conversation_kit/test_dates.py create mode 100644 tests/unit/conversation_kit/test_inputs.py create mode 100644 tests/unit/conversation_kit/test_verbalizer_pl.py diff --git a/signalwire/signalwire/conversation_kit/README.md b/signalwire/signalwire/conversation_kit/README.md new file mode 100644 index 00000000..52e5381f --- /dev/null +++ b/signalwire/signalwire/conversation_kit/README.md @@ -0,0 +1,120 @@ +# conversation_kit + +Product-agnostic language utilities for voice agents — the deterministic +pieces an agent needs to *understand what the caller said*, *compute the right value*, and +*say things back correctly*, without baking in any one product's wording. + +``` +caller speech ──▶ understand (dates, inputs) ──▶ compute ──▶ speak back (verbalizer) ──▶ TTS +``` + +It ships with the SignalWire SDK; an agent calls these helpers and wraps the results into +its SWAIG responses and prompts. + +## Why it exists + +Models are excellent at understanding intent and unreliable at two things that must be +exact on a live call: + +1. **Calendar math** — "next Saturday" must resolve to one specific date, every time. +2. **Speaking values aloud** — "0.156 mm/s", "a.b@gmail.com", or "2026-07-04" must come out + as natural, correct speech in the caller's language, not as digits a TTS engine mangles. + +`conversation_kit` does both deterministically, so the model never has to. + +## The three layers + +```python +from signalwire.conversation_kit import compute_date, validate_input, verbalizer +``` + +### `dates` — spoken-date math + +The model passes the *semantic parts* it heard; the arithmetic happens here, so a wrong date +can never be spoken. + +```python +from datetime import date +from signalwire.conversation_kit import compute_date, RESOLVE_DATE_PARAMS, WEEKDAYS + +today = date(2026, 6, 30) # a Tuesday +compute_date({"weekday": "saturday", "which": "next"}, today) # -> date(2026, 7, 11) +compute_date({"relative": "tomorrow"}, today) # -> date(2026, 7, 1) +compute_date({"day": 15, "month": 7}, today) # -> date(2026, 7, 15) +``` + +`next ` = that weekday in the following calendar week; a bare/"this"/"coming" weekday += the soonest upcoming one (a same-day weekday rolls forward, so the agent never silently books +"today"). Returns `None` when nothing resolvable was supplied. `RESOLVE_DATE_PARAMS` is a +ready-made JSON-schema fragment for a `resolve_date` tool's parameters; `WEEKDAYS` is the +canonical lowercase list. + +### `inputs` — validation + typed-input channel + +```python +from signalwire.conversation_kit import validate_input, input_request_payload + +validate_input("a.b@gmail.com", "email") # True (also "phone", "number") +input_request_payload("installer_email", label="Installer's email", input_type="email") +# -> {"type": "input_request", "field": "installer_email", +# "label": "Installer's email", "input_type": "email"} +``` + +`input_request_payload(...)` is the small event an agent sends to a connected app to reveal + +focus an on-screen field (for values speech-to-text can't reliably capture); `validate_input` +checks the typed value before it's accepted. + +### `verbalizer` — TTS-ready, per-language output + +```python +from signalwire.conversation_kit import verbalizer + +v = verbalizer.get("pl") +v.number("2.6") # 'dwa przecinek sześć' +v.unit("0.156", "mm/s") # 'zero przecinek sto pięćdziesiąt sześć milimetra na sekundę' +v.date("2026-07-04") # 'sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku' +v.email("a.b@gmail.com") # 'a kropka b małpka gmail kropka com' +verbalizer.available() # ['en', 'pl'] +``` + +Two helpers the agent leans on most: + +- **`measure_text(text)`** rewrites measured values + units found in a model-produced sentence + into spoken form (idempotent, safe to run over any reply). +- **`guidance(glossary=None)`** returns per-language speaking rules woven with an optional + product glossary, ready to drop into the model's prompt. + +`get(lang)` falls back to English for an unregistered language, so callers never guard. + +## Adding a language + +`Verbalizer` is a concrete, language-neutral base — subclass it and override only what differs +(`number`, `date`, usually `unit`/`spell`, plus the `SEPARATORS`/`LETTERS`/`MEASURE_UNITS`/ +`INSTRUCTION` class attributes; `email`/`measure_text` are driven by those attributes). Because +the base is a safe fallback, a partial plugin still works. + +```python +from typing import ClassVar +from signalwire.conversation_kit.verbalizer import Verbalizer, register + +class GermanVerbalizer(Verbalizer): + lang: ClassVar[str] = "de" + def number(self, value): ... + def date(self, iso): ... + +register(GermanVerbalizer()) # get("de") now resolves to it +``` + +Built-in languages (EN, PL) register in `verbalizer/languages/`; an application can register +its own at runtime with `register(...)` without modifying the SDK. A language is only fully +"supported" when three things line up: a verbalizer plugin, inclusion in the agent's multilingual +`allowed` set, and a TTS voice. + +## Design principles + +- **Deterministic, not generative** — same input, same output; the model decides *what*, this + decides the exact value and wording. +- **Zero dependencies** — lightweight, trivially unit-testable. +- **Product-agnostic** — no product names, no business logic, no I/O; the agent supplies the + product wording and wraps these outputs into its own responses. +- **Plugin languages** — output is per-language behind one interface; new languages are additive. diff --git a/signalwire/signalwire/conversation_kit/__init__.py b/signalwire/signalwire/conversation_kit/__init__.py new file mode 100644 index 00000000..71a5ef89 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/__init__.py @@ -0,0 +1,54 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +conversation-kit — product-agnostic language utilities for voice agents. + +Three layers a spoken-conversation agent needs, none tied to any particular product: + + from conversation_kit import compute_date, validate_input, verbalizer + + compute_date({"weekday": "saturday", "which": "next"}, date.today()) # spoken -> ISO date + validate_input("a@b.com", "email") # input checks + verbalizer.get("pl").number("2.6") # -> 'dwa przecinek sześć' # value -> spoken + +- ``dates`` : spoken-date math (resolve a day the caller named to a calendar date). +- ``inputs`` : input validation + the typed-input (on-screen keypad) channel payload. +- ``verbalizer`` : TTS-ready, per-language OUTPUT (numbers / units / dates / emails) plus + model-prompt ``guidance()``. Plugin registry — add a language by + subclassing ``verbalizer.Verbalizer`` and ``register()``-ing it. + +Understand input -> compute -> speak output: the two halves of a voice turn's language +layer in one place. Zero dependencies; the agent layer wraps these into its +SignalWire SWAIG results and prompts. +""" + +from __future__ import annotations + +from . import verbalizer +from .dates import RESOLVE_DATE_PARAMS, WEEKDAYS, compute_date +from .inputs import ( + INPUT_REQUEST_TYPE, + input_request_payload, + is_valid_email, + is_valid_number, + is_valid_phone, + validate_input, +) + +__all__ = [ + "INPUT_REQUEST_TYPE", + "RESOLVE_DATE_PARAMS", + "WEEKDAYS", + "compute_date", + "input_request_payload", + "is_valid_email", + "is_valid_number", + "is_valid_phone", + "validate_input", + "verbalizer", +] diff --git a/signalwire/signalwire/conversation_kit/dates.py b/signalwire/signalwire/conversation_kit/dates.py new file mode 100644 index 00000000..47aa4d5c --- /dev/null +++ b/signalwire/signalwire/conversation_kit/dates.py @@ -0,0 +1,121 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Spoken-date resolution — the inverse of verbalizer's `date()` (spoken → ISO). + +The model is good at NLU, bad at calendar math: it passes the SEMANTIC parts it +heard (a weekday + this/next, today/tomorrow, or an explicit day/month/year) and +`compute_date` does the arithmetic, so a voice agent can never speak a wrong date +(a live call once resolved 'next Saturday' to a Thursday). + +No third-party dependencies; product-agnostic. The agent layer wraps +`compute_date` in whatever SWAIG result / wording it needs. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from datetime import date, timedelta +from typing import Any + +WEEKDAYS = [ + "monday", + "tuesday", + "wednesday", + "thursday", + "friday", + "saturday", + "sunday", +] + +# JSON-schema `properties` for a `resolve_date` tool — the universal contract +# (multilingual hints baked in). An agent drops this straight into its tool schema: +# "resolve_date": {"type": "object", "properties": RESOLVE_DATE_PARAMS} +RESOLVE_DATE_PARAMS = { + "weekday": { + "type": "string", + "enum": WEEKDAYS, + "description": "Weekday the caller named, lowercase English (PL 'sobota'/'czwartek' -> 'saturday'/'thursday', DE 'Samstag' -> 'saturday').", + }, + "which": { + "type": "string", + "enum": ["this", "next"], + "description": "For a weekday: 'next' (caller said next / przyszły / nächste) = that weekday in the FOLLOWING calendar week; 'this' (this / coming / nearest / najbliższa / kommende, or a bare weekday) = the soonest upcoming one. Default 'this'.", + }, + "relative": { + "type": "string", + "enum": ["today", "tomorrow"], + "description": "Use instead of weekday for 'today'/'dziś'/'heute' or 'tomorrow'/'jutro'/'morgen'.", + }, + "day": { + "type": "integer", + "description": "Day-of-month for an explicit date ('the 15th' -> 15).", + }, + "month": { + "type": "integer", + "description": "Month 1-12 for an explicit date ('July' -> 7). Omit to use the current month.", + }, + "year": { + "type": "integer", + "description": "4-digit year, only if the caller stated one.", + }, +} + + +def compute_date(args: Mapping[str, Any], today: date) -> date | None: + """Pure arithmetic for resolve_date (testable: pin `today`). Returns a date or + None. Convention: 'next ' = that weekday in the FOLLOWING calendar + week; this/coming/nearest/bare = the soonest upcoming one (never today — a + same-day weekday rolls to next week; readback can adjust).""" + # 1) Explicit calendar date: day (+ optional month/year). If it lands in the + # past with no year stated, roll forward to the next occurrence: an explicit + # month -> same date next year; a bare day -> the same day next month ("the + # 4th" on Jun 26 = Jul 4, not Jun 4 next year). + day = args.get("day") + if isinstance(day, int) and 1 <= day <= 31: + has_month = isinstance(args.get("month"), int) and 1 <= args["month"] <= 12 + has_year = isinstance(args.get("year"), int) + month = args["month"] if has_month else today.month + year = args["year"] if has_year else today.year + try: + target = date(year, month, day) + except ValueError: + return None + if target < today and not has_year: + try: + if has_month: + target = date(year + 1, month, day) + else: + target = ( + date(year + 1, 1, day) + if month == 12 + else date(year, month + 1, day) + ) + except ValueError: + return None + return target + + # 2) Relative day word. + rel = str(args.get("relative") or "").strip().lower() + if rel == "today": + return today + if rel == "tomorrow": + return today + timedelta(days=1) + + # 3) Weekday (+ which). + wd = str(args.get("weekday") or "").strip().lower() + if wd in WEEKDAYS: + wd_idx = WEEKDAYS.index(wd) + which = str(args.get("which") or "this").strip().lower() + if which in ("next", "next_week", "following"): + next_monday = today + timedelta(days=7 - today.weekday()) + return next_monday + timedelta(days=wd_idx) + ahead = (wd_idx - today.weekday()) % 7 + return today + timedelta(days=ahead or 7) + + return None diff --git a/signalwire/signalwire/conversation_kit/inputs.py b/signalwire/signalwire/conversation_kit/inputs.py new file mode 100644 index 00000000..371b1e51 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/inputs.py @@ -0,0 +1,81 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Typed-input validation + request-payload helpers (product-agnostic). + +A voice agent collects values the speech channel can't capture (email, phone, +number, ...) via a keypad: it emits an `input_request` event, the app types the +value back, and a validator decides accept-vs-re-prompt. These are the PURE +pieces — format validation and the request-payload shape — reusable by any +agent. The SDK-coupled bits (emitting the user_event, wait_for_user, the +re-prompt result) live in the agent. No third-party dependencies. +""" + +from __future__ import annotations + +import re + +# Pragmatic, TTS/keypad-oriented email shape: a@b.c with no spaces. Deliberately +# permissive on the local part — we reject only what is clearly NOT an address +# (so a real-but-unusual address is never bounced); the human read-back catches +# a valid-format-but-wrong-person typo, which no regex can. +_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$") + + +def is_valid_email(value: str) -> bool: + return bool(_EMAIL_RE.match((value or "").strip())) + + +def is_valid_phone(value: str) -> bool: + digits = re.sub(r"\D", "", value or "") + return 7 <= len(digits) <= 15 + + +def is_valid_number(value: str) -> bool: + try: + float((value or "").strip().replace(",", ".")) + return True + except ValueError: + return False + + +_VALIDATORS = { + "email": is_valid_email, + "tel": is_valid_phone, + "phone": is_valid_phone, + "number": is_valid_number, +} + + +def validate_input(value: str, input_type: str) -> bool: + """True if `value` is acceptable for `input_type`. Empty is never valid; + an unknown input_type accepts any non-empty value (the agent still does the + human read-back).""" + v = (value or "").strip() + if not v: + return False + fn = _VALIDATORS.get((input_type or "").lower()) + return fn(v) if fn else True + + +# The event-type string of the universal typed-input channel (agent emits it as +# an SWML user_event; app reveals + focuses the matching field). +INPUT_REQUEST_TYPE = "input_request" + + +def input_request_payload( + field: str, label: str = "", input_type: str = "text" +) -> dict: + """The payload an agent emits (as a user_event) to ask the app to reveal a + typed-input field. `field` is the key the typed value comes back under.""" + return { + "type": INPUT_REQUEST_TYPE, + "field": field, + "label": label, + "input_type": input_type, + } diff --git a/signalwire/signalwire/conversation_kit/verbalizer/__init__.py b/signalwire/signalwire/conversation_kit/verbalizer/__init__.py new file mode 100644 index 00000000..db5daa4c --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/__init__.py @@ -0,0 +1,39 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +verbalizer — language-agnostic, TTS-ready verbalization with language plugins. + + from conversation_kit.verbalizer import get + v = get("pl") + v.number("2.6") # 'dwa przecinek sześć' + v.unit("0.156", "mm/s") # 'zero przecinek sto pięćdziesiąt sześć milimetra na sekundę' + v.date("2026-07-04") # 'sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku' + v.email("a.b@gmail.com") # 'a kropka b małpka gmail kropka com' + +Zero dependencies. Add a language by subclassing `Verbalizer` +and calling `register(MyVerbalizer())`. +""" + +from __future__ import annotations + +from .base import Verbalizer +from .registry import available, get, register +from .languages.en import EnglishVerbalizer +from .languages.pl import PolishVerbalizer + +register(EnglishVerbalizer()) +register(PolishVerbalizer()) + +__all__ = [ + "EnglishVerbalizer", + "PolishVerbalizer", + "Verbalizer", + "available", + "get", + "register", +] diff --git a/signalwire/signalwire/conversation_kit/verbalizer/base.py b/signalwire/signalwire/conversation_kit/verbalizer/base.py new file mode 100644 index 00000000..c08a8734 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/base.py @@ -0,0 +1,165 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Language-agnostic verbalizer interface. + +A *verbalizer* turns structured values (numbers, units, dates, emails) into +spoken-form text that a TTS engine will read correctly in a given language. +This is the contract every language plugin implements; subclass ``Verbalizer`` +and override only the methods that differ for your language. + +The base class is intentionally a usable, language-neutral fallback (English-ish +/ passthrough) so an unregistered language still produces sane output instead of +crashing. It depends on nothing beyond the standard library — keep it that way so +conversation_kit stays a self-contained leaf subpackage and trivially unit-testable. +""" + +from __future__ import annotations + +import re +from typing import ClassVar + +#: Values the numeric methods accept (a spoken-form string, or a raw number). +Numeric = str | int | float + + +class Verbalizer: + """Base interface + safe default behaviour. One subclass per language.""" + + #: BCP-47 primary subtag this plugin handles (e.g. "pl", "de", "en"). + lang: ClassVar[str] = "und" + + #: Spoken word for each email/identifier separator. Override per language. + SEPARATORS: ClassVar[dict[str, str]] = { + "@": "at", + ".": "dot", + "-": "dash", + "_": "underscore", + "+": "plus", + } + + #: Spelling-alphabet letter names (lowercase letter -> spoken name). + #: Empty = spell with the bare character. + LETTERS: ClassVar[dict[str, str]] = {} + + #: Unit tokens ``measure_text`` verbalizes in free text (e.g. ("mm/s", "Hz")). + #: Empty = ``measure_text`` is a no-op (base / English passthrough). + MEASURE_UNITS: ClassVar[tuple[str, ...]] = () + + #: Base LLM directive for this language (e.g. "Mów po polsku."). Optional. + INSTRUCTION: ClassVar[str] = "" + + def guidance(self, glossary: dict[str, str] | None = None) -> str: + """LLM speaking instructions for everything done via instruction (not + deterministic transforms). These rules are GENERIC and LANGUAGE-AGNOSTIC — + every plugin inherits them; they're phrased about "the conversation + language", not a specific one. The number rule is added only when this + plugin actually verbalizes numbers (MEASURE_UNITS set). The caller's domain + ``glossary`` is woven into a "use these terms, never coin a word" rule. + + Subclasses normally DON'T override this — they inherit it and just set + INSTRUCTION / MEASURE_UNITS / SEPARATORS. Override only to add a genuinely + language-specific note, or to opt out (English returns ""). + """ + parts = [ + "Speak the conversation language naturally and idiomatically. NEVER transliterate, " + "calque, or invent a word from another language; if there is no native word, keep " + "the original term or describe it briefly — never coin one.", + "EMAILS: never voice raw @ or . symbols, and NEVER narrate the assembly aloud (don't " + "say things like 'add the at-sign before gmail' or 'with dot com') — the caller must " + "not hear the mechanics. Say an email only via its spoken-words form; if a part is " + "unclear, ask the caller to say the whole address again.", + "DATES: say a date as weekday + day + month in the conversation language, never the " + "ISO or numeric form.", + "Do not spell words out letter by letter unless asked; read abbreviations and IDs as " + "whole tokens.", + ] + if self.MEASURE_UNITS: + parts.insert( + 1, + "NUMBERS: every reading in the tool data is already written as " + "correct words in the conversation language — say it EXACTLY as " + "written; never turn it back into digits, re-translate, or re-phrase " + "the number.", + ) + if glossary: + terms = "; ".join(f"{k} = {v}" for k, v in glossary.items()) + parts.append(f"Use these established terms (never coin a word): {terms}.") + return " ".join(parts) + + # --- numeric -------------------------------------------------------- # + + def number(self, value: Numeric) -> str: + """A bare number as words. Base: passthrough (English TTS reads digits).""" + return str(value).strip() + + def unit(self, value: Numeric, unit: str) -> str: + """A measured value + its unit, agreement-correct. Base: ' '.""" + return f"{self.number(value)} {unit}".strip() + + # --- temporal ------------------------------------------------------- # + + def date(self, iso: str) -> str: + """An ISO date (YYYY-MM-DD) spoken naturally. Base: passthrough.""" + return iso + + # --- identifiers (structure is universal; only the words differ) ---- # + + def email(self, address: str) -> str: + """Speak an email/identifier: keep the alphanumeric runs as words, replace + each separator with its spoken word. 'a.b@gmail.com' -> 'a b + gmail com'. Shared across languages via ``SEPARATORS``. + """ + a = (address or "").strip() + if not a: + return "" + keys = "".join(re.escape(k) for k in self.SEPARATORS) + parts = re.split(f"([{keys}])", a) + return " ".join(self.SEPARATORS.get(p, p) for p in parts if p != "").strip() + + def measure_text(self, text: str) -> str: + """Verbalize every ' ' (and '- ' ranges) in free + text, for the units in ``MEASURE_UNITS``. Everything else is left untouched — + ISO/DIN codes, dates, versions, bare numbers — so there are no false + positives. No-op unless ``MEASURE_UNITS`` is set. The unit token may be + attached ('2.6mm/s') or spaced ('2.6 mm/s'). + """ + if not text or not self.MEASURE_UNITS: + return text + units = "|".join( + re.escape(u) for u in sorted(self.MEASURE_UNITS, key=len, reverse=True) + ) + num = r"-?\d+(?:[.,]\d+)?" + # Ranges first (a U+2013 en dash or a hyphen between two numbers, e.g. + # "10-100 Hz") so the single-value pass doesn't grab only the second + # number. Both separators are accepted because an LLM may emit either. The + # en dash in the character class below is a deliberate alternative + # separator, so RUF001 (ambiguous-character) is suppressed on that line. + text = re.sub( + rf"({num})\s*[–-]\s*({num})\s*({units})(?![\w])", # noqa: RUF001 + lambda m: ( + f"{self.number(m.group(1))} do {self.unit(m.group(2), m.group(3))}" + ), + text, + ) + return re.sub( + rf"({num})\s*({units})(?![\w])", + lambda m: self.unit(m.group(1), m.group(2)), + text, + ) + + def spell(self, token: str) -> str: + """Spell a token out character by character (fallback for stubborn STT).""" + out = [] + for ch in (token or "").strip(): + lc = ch.lower() + if lc in self.LETTERS: + out.append(self.LETTERS[lc]) + else: + out.append(ch) + return " ".join(out) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/languages/__init__.py b/signalwire/signalwire/conversation_kit/verbalizer/languages/__init__.py new file mode 100644 index 00000000..3dfbd272 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/languages/__init__.py @@ -0,0 +1,9 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Built-in language plugins.""" diff --git a/signalwire/signalwire/conversation_kit/verbalizer/languages/en.py b/signalwire/signalwire/conversation_kit/verbalizer/languages/en.py new file mode 100644 index 00000000..017950cf --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/languages/en.py @@ -0,0 +1,30 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +English reference plugin. + +English TTS reads digits, emails ("at"/"dot") and dates natively, so the neutral +base behaviour is already correct. This exists mainly as the fallback target and +a worked example of a minimal plugin — override here only if a real gap appears. +""" + +from __future__ import annotations + +from typing import ClassVar + +from ..base import Verbalizer + + +class EnglishVerbalizer(Verbalizer): + lang: ClassVar[str] = "en" + # Base SEPARATORS are already English ("at"/"dot"); nothing to override yet. + + def guidance(self, glossary: dict[str, str] | None = None) -> str: + # English is read natively by the LLM/TTS — no special guidance needed. + # This is also the fallback for unregistered languages, so they get "" too. + return "" diff --git a/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py new file mode 100644 index 00000000..fe8e47e5 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py @@ -0,0 +1,367 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Polish verbalizer plugin. + +Deterministic spoken Polish for numbers, units, dates and emails — the cases a +TTS/LLM mangles. Self-contained (no num2words dependency): Polish cardinals are +a closed algorithm and we need exact control over decimal place-value reading +and unit case agreement anyway. + +Grammar per the project's polish-tts-verbalization spec: +- Decimals read " przecinek " (place-value, never + digit-by-digit), and the unit goes GENITIVE SINGULAR after any decimal. +- Integer unit agreement by Polish buckets (see `_bucket`). +- Dates: weekday nominative + day ordinal-genitive + month genitive + year. +""" + +from __future__ import annotations + +from datetime import date as _date +from decimal import Decimal +from typing import ClassVar + +from ..base import Numeric, Verbalizer + +# --- cardinals ---------------------------------------------------------- # + +_ONES = [ + "zero", + "jeden", + "dwa", + "trzy", + "cztery", + "pięć", + "sześć", + "siedem", + "osiem", + "dziewięć", + "dziesięć", + "jedenaście", + "dwanaście", + "trzynaście", + "czternaście", + "piętnaście", + "szesnaście", + "siedemnaście", + "osiemnaście", + "dziewiętnaście", +] +_TENS = { + 2: "dwadzieścia", + 3: "trzydzieści", + 4: "czterdzieści", + 5: "pięćdziesiąt", + 6: "sześćdziesiąt", + 7: "siedemdziesiąt", + 8: "osiemdziesiąt", + 9: "dziewięćdziesiąt", +} +_HUNDREDS = { + 1: "sto", + 2: "dwieście", + 3: "trzysta", + 4: "czterysta", + 5: "pięćset", + 6: "sześćset", + 7: "siedemset", + 8: "osiemset", + 9: "dziewięćset", +} + + +def _bucket(n: int) -> str: + """Polish quantity-agreement bucket for a non-negative integer count.""" + if n == 1: + return "nom_sg" + if n % 100 in (12, 13, 14): + return "gen_pl" + if n % 10 in (2, 3, 4): + return "nom_pl" + return "gen_pl" + + +def _under_1000(n: int) -> str: + parts = [] + if n // 100: + parts.append(_HUNDREDS[n // 100]) + rem = n % 100 + if rem < 20: + if rem: + parts.append(_ONES[rem]) + else: + parts.append(_TENS[rem // 10]) + if rem % 10: + parts.append(_ONES[rem % 10]) + return " ".join(parts) + + +def cardinal(n: int) -> str: + """Non-negative integer -> Polish words (0..999_999, covers our value range).""" + if n < 0: + return "minus " + cardinal(-n) + if n == 0: + return "zero" + if n < 1000: + return _under_1000(n) + th, rem = divmod(n, 1000) + parts = [] + if th == 1: + parts.append("tysiąc") + else: + word = {"nom_sg": "tysiąc", "nom_pl": "tysiące", "gen_pl": "tysięcy"}[ + _bucket(th) + ] + parts.append(_under_1000(th)) + parts.append(word) + if rem: + parts.append(_under_1000(rem)) + return " ".join(parts) + + +# --- ordinals (genitive) for dates -------------------------------------- # + +_ORD_ONES = { + 1: "pierwszego", + 2: "drugiego", + 3: "trzeciego", + 4: "czwartego", + 5: "piątego", + 6: "szóstego", + 7: "siódmego", + 8: "ósmego", + 9: "dziewiątego", + 10: "dziesiątego", + 11: "jedenastego", + 12: "dwunastego", + 13: "trzynastego", + 14: "czternastego", + 15: "piętnastego", + 16: "szesnastego", + 17: "siedemnastego", + 18: "osiemnastego", + 19: "dziewiętnastego", +} +_ORD_TENS = { + 2: "dwudziestego", + 3: "trzydziestego", + 4: "czterdziestego", + 5: "pięćdziesiątego", + 6: "sześćdziesiątego", + 7: "siedemdziesiątego", + 8: "osiemdziesiątego", + 9: "dziewięćdziesiątego", +} + + +def _ordinal_gen(n: int) -> str: + """Genitive ordinal 1..99 (day-of-month and year tail share these forms).""" + if n < 20: + return _ORD_ONES[n] + t, o = divmod(n, 10) + return _ORD_TENS[t] if o == 0 else f"{_ORD_TENS[t]} {_ORD_ONES[o]}" + + +def _year(y: int) -> str: + if 2001 <= y <= 2099: + return f"dwa tysiące {_ordinal_gen(y - 2000)} roku" + return f"{cardinal(y)} roku" + + +_MONTHS = { + 1: "stycznia", + 2: "lutego", + 3: "marca", + 4: "kwietnia", + 5: "maja", + 6: "czerwca", + 7: "lipca", + 8: "sierpnia", + 9: "września", + 10: "października", + 11: "listopada", + 12: "grudnia", +} +_WEEKDAYS = [ + "poniedziałek", + "wtorek", + "środa", + "czwartek", + "piątek", + "sobota", + "niedziela", +] + + +# --- units (nom_sg / nom_pl / gen_pl for integers, gen_sg for decimals) -- # + +_UNITS = { + "mm/s": { + "nom_sg": "milimetr", + "nom_pl": "milimetry", + "gen_pl": "milimetrów", + "gen_sg": "milimetra", + "suffix": " na sekundę", + }, + "Hz": { + "nom_sg": "herc", + "nom_pl": "herce", + "gen_pl": "herców", + "gen_sg": "herca", + "suffix": "", + }, + "°C": { + "nom_sg": "stopień", + "nom_pl": "stopnie", + "gen_pl": "stopni", + "gen_sg": "stopnia", + "suffix": " Celsjusza", + }, + "hPa": { + "nom_sg": "hektopaskal", + "nom_pl": "hektopaskale", + "gen_pl": "hektopaskali", + "gen_sg": "hektopaskala", + "suffix": "", + }, + # "procent" is invariant after a number in modern usage (5 procent, 22 procent). + "%": { + "nom_sg": "procent", + "nom_pl": "procent", + "gen_pl": "procent", + "gen_sg": "procent", + "suffix": "", + }, + "m/s²": { + "nom_sg": "metr", + "nom_pl": "metry", + "gen_pl": "metrów", + "gen_sg": "metra", + "suffix": " na sekundę do kwadratu", + }, + "m/s2": { + "nom_sg": "metr", + "nom_pl": "metry", + "gen_pl": "metrów", + "gen_sg": "metra", + "suffix": " na sekundę do kwadratu", + }, + "km/h": { + "nom_sg": "kilometr", + "nom_pl": "kilometry", + "gen_pl": "kilometrów", + "gen_sg": "kilometra", + "suffix": " na godzinę", + }, +} + +_PL_LETTERS = { + "a": "a", + "ą": "ą", + "b": "be", + "c": "ce", + "ć": "cie", + "d": "de", + "e": "e", + "ę": "ę", + "f": "ef", + "g": "gie", + "h": "ha", + "i": "i", + "j": "jot", + "k": "ka", + "l": "el", + "ł": "eł", + "m": "em", + "n": "en", + "ń": "eń", + "o": "o", + "ó": "o kreskowane", + "p": "pe", + "q": "ku", + "r": "er", + "s": "es", + "ś": "eś", + "t": "te", + "u": "u", + "v": "fau", + "w": "wu", + "x": "iks", + "y": "igrek", + "z": "zet", + "ź": "ziet", + "ż": "żet", +} + + +def _decimal(value: Numeric) -> Decimal: + return Decimal(str(value).strip().replace(",", ".")) + + +class PolishVerbalizer(Verbalizer): + lang: ClassVar[str] = "pl" + # email/identifier separators spoken in Polish + SEPARATORS: ClassVar[dict[str, str]] = { + "@": "małpka", + ".": "kropka", + "-": "myślnik", + "_": "podkreślnik", + "+": "plus", + } + LETTERS: ClassVar[dict[str, str]] = _PL_LETTERS + MEASURE_UNITS: ClassVar[tuple[str, ...]] = tuple(_UNITS) + INSTRUCTION: ClassVar[str] = "Mów po polsku. Odpowiadaj w języku polskim." + # guidance() is inherited from the base — the speaking rules are language-agnostic. + # Polish-ness comes from INSTRUCTION + the glossary terms + the transforms above. + + def number(self, value: Numeric) -> str: + d = _decimal(value) + neg = d < 0 + d = abs(d) + int_part = int(d) + txt = format(d, "f") + frac = txt.split(".")[1].rstrip("0") if "." in txt else "" + words = cardinal(int_part) + if frac: + lead = len(frac) - len(frac.lstrip("0")) + rest = frac.lstrip("0") + fwords = ["zero"] * lead + ([cardinal(int(rest))] if rest else []) + words = f"{words} przecinek {' '.join(fwords)}" + return f"minus {words}" if neg else words + + def unit(self, value: Numeric, unit: str) -> str: + forms = _UNITS.get(unit) + num = self.number(value) + if not forms: + return f"{num} {unit}".strip() + d = _decimal(value) + key = "gen_sg" if d != d.to_integral_value() else _bucket(abs(int(d))) + return f"{num} {forms[key]}{forms['suffix']}" + + def spell(self, token: str) -> str: + out = [] + for ch in (token or "").strip(): + lc = ch.lower() + if lc in self.LETTERS: + out.append(self.LETTERS[lc]) + elif ch.isdigit(): + out.append(cardinal(int(ch))) + else: + out.append(ch) + return " ".join(out) + + def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> str: + y, m, d = (int(p) for p in iso.split("-")) + parts = [] + if with_weekday: + parts.append(_WEEKDAYS[_date(y, m, d).weekday()] + ",") + parts.append(_ordinal_gen(d)) + parts.append(_MONTHS[m]) + if with_year: + parts.append(_year(y)) + return " ".join(parts) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/registry.py b/signalwire/signalwire/conversation_kit/verbalizer/registry.py new file mode 100644 index 00000000..b37f8c32 --- /dev/null +++ b/signalwire/signalwire/conversation_kit/verbalizer/registry.py @@ -0,0 +1,42 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Language plugin registry + dispatch. + +`get(lang)` returns the verbalizer for a language tag, falling back to English +and finally a language-neutral base instance, so callers never have to special- +case a missing plugin. +""" + +from __future__ import annotations + +from .base import Verbalizer + +_REGISTRY: dict[str, Verbalizer] = {} +_DEFAULT = Verbalizer() + + +def register(verbalizer: Verbalizer) -> Verbalizer: + """Register a language plugin under its `lang` code. Returns it (chainable).""" + _REGISTRY[verbalizer.lang.lower()] = verbalizer + return verbalizer + + +def get(lang: str | None) -> Verbalizer: + """Resolve a verbalizer for a BCP-47 tag ('pl', 'pl-PL', 'de-DE', …). + + Falls back to the 'en' plugin, then a neutral passthrough base, so this + never returns None and never raises on an unknown language. + """ + code = (lang or "").replace("_", "-").split("-")[0].lower() + return _REGISTRY.get(code) or _REGISTRY.get("en") or _DEFAULT + + +def available() -> list[str]: + """The language codes currently registered.""" + return sorted(_REGISTRY) diff --git a/tests/unit/conversation_kit/test_dates.py b/tests/unit/conversation_kit/test_dates.py new file mode 100644 index 00000000..63f239ad --- /dev/null +++ b/tests/unit/conversation_kit/test_dates.py @@ -0,0 +1,56 @@ +"""Tests for signalwire.conversation_kit.dates.compute_date — `today` pinned for determinism.""" + +from datetime import date + +from signalwire.conversation_kit import compute_date + + +def test_relative_today_tomorrow(): + today = date(2026, 6, 28) + assert compute_date({"relative": "today"}, today) == today + assert compute_date({"relative": "tomorrow"}, today) == date(2026, 6, 29) + + +def test_weekday_this_vs_next_midweek(): + # Wed 2026-07-01: 'this' = this week's upcoming day; 'next' = the FOLLOWING + # calendar week. (On a week-boundary day like Sunday the two collapse, so the + # distinction must be pinned mid-week.) + wed = date(2026, 7, 1) + assert compute_date({"weekday": "saturday", "which": "this"}, wed) == date( + 2026, 7, 4 + ) + assert compute_date({"weekday": "saturday", "which": "next"}, wed) == date( + 2026, 7, 11 + ) + # A bare weekday = the soonest upcoming one (same as 'this'). + assert compute_date({"weekday": "friday"}, wed) == date(2026, 7, 3) + + +def test_bare_same_weekday_rolls_to_next_week(): + # 'wednesday' ON a Wednesday -> next week's Wednesday, never today. + assert compute_date({"weekday": "wednesday"}, date(2026, 7, 1)) == date(2026, 7, 8) + + +def test_explicit_day_month(): + today = date(2026, 6, 28) + assert compute_date({"day": 4, "month": 7}, today) == date(2026, 7, 4) + assert compute_date({"day": 15, "month": 8, "year": 2027}, today) == date( + 2027, 8, 15 + ) + + +def test_explicit_bare_day_in_past_rolls_to_next_month(): + # 'the 4th' on Jun 28 -> Jul 4 (next month), not Jun 4. + assert compute_date({"day": 4}, date(2026, 6, 28)) == date(2026, 7, 4) + + +def test_explicit_month_in_past_rolls_to_next_year(): + # 'the 10th of January' from June -> next January. + assert compute_date({"day": 10, "month": 1}, date(2026, 6, 28)) == date(2027, 1, 10) + + +def test_unresolvable_returns_none(): + today = date(2026, 6, 28) + assert compute_date({}, today) is None + assert compute_date({"weekday": "funday"}, today) is None + assert compute_date({"day": 99}, today) is None diff --git a/tests/unit/conversation_kit/test_inputs.py b/tests/unit/conversation_kit/test_inputs.py new file mode 100644 index 00000000..be7a1d1f --- /dev/null +++ b/tests/unit/conversation_kit/test_inputs.py @@ -0,0 +1,57 @@ +"""Tests for signalwire.conversation_kit.inputs — typed-input validation + payload.""" + +from signalwire.conversation_kit import ( + input_request_payload, + is_valid_email, + is_valid_number, + is_valid_phone, + validate_input, +) + + +def test_email(): + assert is_valid_email("karolczyk.jakub@gmail.com") + assert is_valid_email("a@b.co") + # A valid-format typo is still valid format (only the human read-back catches it). + assert is_valid_email("karolczyk.jakib@gmail.com") + assert not is_valid_email("karolczyk.jakib") # no @ + assert not is_valid_email("jakub@gmail") # no TLD dot + assert not is_valid_email("a b@gmail.com") # space + assert not is_valid_email("") + + +def test_phone(): + assert is_valid_phone("+48 600 700 800") + assert is_valid_phone("1234567") + assert not is_valid_phone("12345") # too short + assert not is_valid_phone("") + + +def test_number(): + assert is_valid_number("42") + assert is_valid_number("3,14") # PL decimal comma + assert is_valid_number("3.14") + assert not is_valid_number("abc") + assert not is_valid_number("") + + +def test_validate_input_dispatch(): + assert validate_input("a@b.co", "email") + assert not validate_input("nope", "email") + assert validate_input("+48600700800", "tel") + # Unknown type accepts any non-empty, rejects empty. + assert validate_input("anything", "text") + assert not validate_input("", "text") + assert not validate_input(" ", "email") + + +def test_input_request_payload(): + p = input_request_payload("typed_installer_email", "Installer email", "email") + assert p == { + "type": "input_request", + "field": "typed_installer_email", + "label": "Installer email", + "input_type": "email", + } + # Defaults. + assert input_request_payload("x")["input_type"] == "text" diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py new file mode 100644 index 00000000..c22ffdca --- /dev/null +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -0,0 +1,160 @@ +"""Polish verbalizer tests for signalwire.conversation_kit.verbalizer.""" + +from signalwire.conversation_kit.verbalizer import get + +PL = get("pl") + + +def _check(cases, fn): + bad = [(inp, exp, got) for inp, exp in cases if (got := fn(inp)) != exp] + assert not bad, "\n".join(f" {i!r}: expected {e!r}, got {g!r}" for i, e, g in bad) + + +def test_lang_dispatch(): + assert get("pl").lang == "pl" + assert get("pl-PL").lang == "pl" + assert get("PL").lang == "pl" + assert get("fr").lang == "en" # unknown -> English fallback + assert get(None).lang == "en" + + +def test_cardinals(): + _check( + [ + ("0", "zero"), + ("2", "dwa"), + ("5", "pięć"), + ("11", "jedenaście"), + ("21", "dwadzieścia jeden"), + ("156", "sto pięćdziesiąt sześć"), + ("1000", "tysiąc"), + ("2026", "dwa tysiące dwadzieścia sześć"), + ("1019", "tysiąc dziewiętnaście"), + ("5000", "pięć tysięcy"), + ], + PL.number, + ) + + +def test_decimals_place_value(): + _check( + [ + ("2.6", "dwa przecinek sześć"), + ("2,6", "dwa przecinek sześć"), # comma input + ("0.156", "zero przecinek sto pięćdziesiąt sześć"), + ("30.3", "trzydzieści przecinek trzy"), + ("0.05", "zero przecinek zero pięć"), # leading fractional zero + ("-1.5", "minus jeden przecinek pięć"), + ], + PL.number, + ) + + +def test_unit_agreement(): + cases = [ + (("1", "mm/s"), "jeden milimetr na sekundę"), + (("2", "mm/s"), "dwa milimetry na sekundę"), + (("5", "mm/s"), "pięć milimetrów na sekundę"), + (("21", "mm/s"), "dwadzieścia jeden milimetrów na sekundę"), + (("2.6", "mm/s"), "dwa przecinek sześć milimetra na sekundę"), + (("30.3", "°C"), "trzydzieści przecinek trzy stopnia Celsjusza"), + (("2", "Hz"), "dwa herce"), + (("1019", "hPa"), "tysiąc dziewiętnaście hektopaskali"), + ] + bad = [(a, e, g) for a, e in cases if (g := PL.unit(*a)) != e] + assert not bad, "\n".join(f" {a}: expected {e!r}, got {g!r}" for a, e, g in bad) + + +def test_dates(): + _check( + [ + ( + "2026-07-04", + "sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku", + ), + ( + "2026-06-30", + "wtorek, trzydziestego czerwca dwa tysiące dwudziestego szóstego roku", + ), + ( + "2026-01-01", + "czwartek, pierwszego stycznia dwa tysiące dwudziestego szóstego roku", + ), + ], + PL.date, + ) + + +def test_measure_text(): + # real handler formats: spaced, attached, ranges, and things that must NOT match + _check( + [ + ( + "RMS velocity: 0.156 mm/s on x", + "RMS velocity: zero przecinek sto pięćdziesiąt sześć milimetra na sekundę on x", + ), + ( + "PPV:2.6mm/s freq:100Hz", + "PPV:dwa przecinek sześć milimetra na sekundę freq:sto herców", + ), + ( + "Temperature: 30.3°C", + "Temperature: trzydzieści przecinek trzy stopnia Celsjusza", + ), + ("max 5.0 mm/s", "max pięć milimetrów na sekundę"), + ("1019 hPa", "tysiąc dziewiętnaście hektopaskali"), + ( + "range 20.5–25.3°C today", # noqa: RUF001 + "range dwadzieścia przecinek pięć do dwadzieścia pięć przecinek trzy stopnia Celsjusza today", + ), + ("band 10-100 Hz", "band dziesięć do sto herców"), + ("45% of the limit", "czterdzieści pięć procent of the limit"), + ("peak 0.5 m/s²", "peak zero przecinek pięć metra na sekundę do kwadratu"), + ("gusts 12 km/h", "gusts dwanaście kilometrów na godzinę"), + # must be left alone (no unit / structural): + ("ISO 10816 zone", "ISO 10816 zone"), + ("DIN 4150-3 referenced", "DIN 4150-3 referenced"), + ("on 2026-07-04 at 14:30", "on 2026-07-04 at 14:30"), + ("version 2.5 build", "version 2.5 build"), + ], + PL.measure_text, + ) + + +def test_email(): + _check( + [ + ( + "karolczyk.jakub@gmail.com", + "karolczyk kropka jakub małpka gmail kropka com", + ), + ("a-b_c@x.pl", "a myślnik b podkreślnik c małpka x kropka pl"), + ], + PL.email, + ) + + +def test_guidance(): + # PL inherits the generic, language-agnostic guidance from the base. + g = PL.guidance({"severity": "nasilenie", "RMS vibration": "poziom drgań (RMS)"}) + assert "naturally and idiomatically" in g # generic speak-naturally rule + assert "EXACTLY as written" in g # number rule (PL has MEASURE_UNITS) + assert "narrate the assembly" in g # email-narration rule (now shared/base) + assert "never the ISO" in g # date rule + assert ( + "severity = nasilenie" in g and "poziom drgań (RMS)" in g + ) # glossary woven in + assert PL.INSTRUCTION.startswith("Mów po polsku") + assert get("en").guidance() == "" # English opts out + # the number rule is gated on MEASURE_UNITS — a unit-less base verbalizer omits it + from signalwire.conversation_kit.verbalizer.base import Verbalizer + + assert "EXACTLY as written" not in Verbalizer().guidance() + + +if __name__ == "__main__": + for name, fn in sorted(globals().items()): + if name.startswith("test_") and callable(fn): + fn() + print(f"ok {name}") + print("ALL PASS") From ba2288e2a51d8bfbfb93823ffb17a989a73f97bd Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Tue, 30 Jun 2026 20:01:48 +0100 Subject: [PATCH 02/10] =?UTF-8?q?feat(skills):=20add=20typed=5Finput=20ski?= =?UTF-8?q?ll=20=E2=80=94=20collect=20typed=20values=20via=20on-screen=20k?= =?UTF-8?q?eypad?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A multi-instance skill (one instance per field) for collecting a value the caller TYPES on an on-screen keypad — email, phone, account number — when speech-to-text can't capture it reliably. - request_: speak a "type it on screen" line, emit an input_request user event so a connected client reveals/focuses the field, then wait_for_user. - confirm_: read the raw typed value from global_data['typed_'], validate it, reopen on missing/invalid, else read it back to confirm. The value is never a model argument, so a typo can't be silently altered. Per-language prompts resolve against global_data['language'] at call time, so one instance serves a multilingual agent. Validation, the user-event payload, and the spoken read-back come from signalwire.conversation_kit. 12 unit tests. --- .../signalwire/skills/typed_input/README.md | 61 +++++ .../signalwire/skills/typed_input/__init__.py | 11 + .../signalwire/skills/typed_input/skill.py | 233 ++++++++++++++++++ tests/unit/skills/test_typed_input_skill.py | 146 +++++++++++ 4 files changed, 451 insertions(+) create mode 100644 signalwire/signalwire/skills/typed_input/README.md create mode 100644 signalwire/signalwire/skills/typed_input/__init__.py create mode 100644 signalwire/signalwire/skills/typed_input/skill.py create mode 100644 tests/unit/skills/test_typed_input_skill.py diff --git a/signalwire/signalwire/skills/typed_input/README.md b/signalwire/signalwire/skills/typed_input/README.md new file mode 100644 index 00000000..fea8f124 --- /dev/null +++ b/signalwire/signalwire/skills/typed_input/README.md @@ -0,0 +1,61 @@ +# Typed Input Skill + +Collect a value the caller **types** on an on-screen keypad — an email, phone number, account +number, anything speech-to-text can't capture reliably. Add one instance per field. + +## How it works + +1. `request_` speaks a short "type it on your screen" line, emits an `input_request` user + event (a connected client reveals + focuses the field), and parks via `wait_for_user` until + the caller submits. +2. The client posts the typed value into `global_data['typed_']`. +3. `confirm_` reads that raw value back, validates it, reopens the keypad if it is missing + or invalid, otherwise reads it back for the caller to confirm. + +The value is never a model argument, so the model can't silently alter or "correct" a typo. +Validation, the user-event payload, and the spoken read-back come from `signalwire.conversation_kit`. + +## Requirements + +- **Packages**: none (uses `signalwire.conversation_kit`, part of the SDK). +- A connected client that listens for the `input_request` user event and posts the typed value + back into `global_data['typed_']`. + +## Parameters + +- `field` (string, **required**) — field key, e.g. `installer_email`. Tools become + `request_` / `confirm_`; the typed value lands in `global_data['typed_']`. +- `input_type` (string, default `text`) — one of `email`, `phone`, `number`, `text`. Drives + validation and the read-back form (an email is read as words; anything else is spelled out). +- `open_prompt` (object, **required**) — per-language map `{lang: text}` spoken when the keypad opens. +- `field_label` (object, **required**) — per-language map of the on-screen field label. +- `invalid_prompt` (object, **required**) — per-language map spoken when the typed value fails + validation, before the keypad reopens. + +Prompts resolve against `global_data['language']` at call time (falling back to `en`), so a single +instance serves a multilingual agent. + +## Multiple instances + +Add it once per field; each instance gets its own `request_`/`confirm_` tools. + +```python +agent.add_skill("typed_input", { + "field": "installer_email", + "input_type": "email", + "open_prompt": { + "en": "Please type the email on your screen.", + "pl": "Wpisz adres e-mail na ekranie.", + }, + "field_label": {"en": "Installer's email", "pl": "Adres e-mail instalatora"}, + "invalid_prompt": { + "en": "That does not look like a valid email; please type it again.", + "pl": "To nie wygląda na poprawny adres; wpisz go ponownie.", + }, +}) +``` + +## Tools created + +- `request_` — open the keypad and wait for the typed value. +- `confirm_` — validate and read back the typed value (reopens the keypad on missing/invalid). diff --git a/signalwire/signalwire/skills/typed_input/__init__.py b/signalwire/signalwire/skills/typed_input/__init__.py new file mode 100644 index 00000000..08ed64c6 --- /dev/null +++ b/signalwire/signalwire/skills/typed_input/__init__.py @@ -0,0 +1,11 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Typed-input skill for SignalWire Agents — collect a value the caller types on an +on-screen keypad (email, phone, ...) when speech-to-text can't capture it. +""" diff --git a/signalwire/signalwire/skills/typed_input/skill.py b/signalwire/signalwire/skills/typed_input/skill.py new file mode 100644 index 00000000..4daecb3d --- /dev/null +++ b/signalwire/signalwire/skills/typed_input/skill.py @@ -0,0 +1,233 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Typed-input skill — collect a value the caller TYPES on an on-screen keypad +(email, phone, account number, ...) for the cases speech-to-text can't capture +reliably. + +The flow, deterministic by design so the model can never alter the value: + 1. request_ speaks a short "type it on screen" line, emits an + ``input_request`` user-event (so a connected app reveals + focuses the + field), and parks via wait_for_user until the caller submits. + 2. The app posts the typed value into ``global_data['typed_']``. + 3. confirm_ reads that RAW value back (never a model argument, so a + typo is never silently "corrected"), validates it, reopens the keypad if it + is missing/invalid, otherwise reads it back for the caller to confirm. + +Multi-instance: add it once per field. Prompts are per-language maps resolved +against ``global_data['language']`` at call time, so one instance serves a +multilingual agent. Validation, the user-event payload, and the spoken read-back +come from ``signalwire.conversation_kit``. +""" + +from __future__ import annotations + +from typing import Any, ClassVar + +from signalwire.conversation_kit import input_request_payload, validate_input +from signalwire.conversation_kit.verbalizer import get as get_verbalizer +from signalwire.core.function_result import FunctionResult +from signalwire.core.skill_base import SkillBase + +_DEFAULT_LANG = "en" + + +class TypedInputSkill(SkillBase): + """Collect a typed value over the on-screen keypad channel (one instance per field).""" + + SKILL_NAME = "typed_input" + SKILL_DESCRIPTION = ( + "Collect a value the caller types on an on-screen keypad (email, phone, ...) " + "when speech-to-text can't capture it reliably" + ) + SKILL_VERSION = "1.0.0" + REQUIRED_PACKAGES: ClassVar[list[str]] = [] + REQUIRED_ENV_VARS: ClassVar[list[str]] = [] + SUPPORTS_MULTIPLE_INSTANCES = True + + @classmethod + def get_parameter_schema(cls) -> dict[str, dict[str, Any]]: + """Parameters: the field, its validation type, and the per-language prompts.""" + schema = super().get_parameter_schema() + schema.update( + { + "field": { + "type": "string", + "description": ( + "Field key, e.g. 'installer_email'. Tools become request_ / " + "confirm_; the typed value lands in global_data['typed_']." + ), + "required": True, + }, + "input_type": { + "type": "string", + "description": "Validation + read-back style for the typed value.", + "default": "text", + "enum": ["email", "phone", "number", "text"], + "required": False, + }, + "open_prompt": { + "type": "object", + "description": ( + "Per-language map ({lang: text}) of the line spoken when the keypad " + "opens ('please type it on your screen'). Falls back to 'en'." + ), + "required": True, + }, + "field_label": { + "type": "object", + "description": "Per-language map ({lang: text}) of the on-screen field label.", + "required": True, + }, + "invalid_prompt": { + "type": "object", + "description": ( + "Per-language map ({lang: text}) spoken when the typed value fails " + "validation, before the keypad reopens." + ), + "required": True, + }, + } + ) + return schema + + def get_instance_key(self) -> str: + """One instance per field, so several typed fields can coexist on one agent.""" + field = self.params.get("field") + return f"{self.SKILL_NAME}_{field}" if field else self.SKILL_NAME + + def setup(self) -> bool: + """Validate params and derive the per-field tool/global-data names.""" + field = self.params.get("field") + if not field or not isinstance(field, str): + self.logger.error( + "typed_input requires a non-empty string 'field' parameter" + ) + return False + self.field: str = field + self.input_type: str = self.params.get("input_type", "text") + self.gd_key: str = f"typed_{field}" + self.request_tool: str = f"request_{field}" + self.confirm_tool: str = f"confirm_{field}" + self.open_prompt: dict[str, str] = self.params.get("open_prompt") or {} + self.field_label: dict[str, str] = self.params.get("field_label") or {} + self.invalid_prompt: dict[str, str] = self.params.get("invalid_prompt") or {} + return True + + def register_tools(self) -> None: + """Register the request_ opener and confirm_ read-back tools.""" + self.define_tool( + name=self.request_tool, + description=( + f"Open the on-screen keypad for the caller to type their {self.field}. Use this " + f"instead of asking for it by voice; the typed value arrives in " + f"global_data.{self.gd_key}, then call {self.confirm_tool} to read it back." + ), + parameters={}, + handler=self._open_handler, + ) + self.define_tool( + name=self.confirm_tool, + description=( + f"Read back the {self.field} the caller typed (from global_data.{self.gd_key}) and " + f"confirm it. Reopens the keypad if the value is missing or invalid. Call this after " + f"{self.request_tool}, never before the value has been typed." + ), + parameters={}, + handler=self._confirm_handler, + ) + + # ------------------------------------------------------------------ # + # Handlers + # ------------------------------------------------------------------ # + + def _open_handler( + self, args: dict[str, Any], raw_data: dict[str, Any] + ) -> FunctionResult: + """Speak the open prompt, reveal the keypad, and park until the value is typed.""" + return self._open(self._lang(raw_data)) + + def _confirm_handler( + self, args: dict[str, Any], raw_data: dict[str, Any] + ) -> FunctionResult: + """Validate the typed value; reopen on missing/invalid, else read it back to confirm.""" + lang = self._lang(raw_data) + global_data = ( + raw_data.get("global_data", {}) if isinstance(raw_data, dict) else {} + ) + value = str(global_data.get(self.gd_key) or "").strip() + if not value or not validate_input(value, self.input_type): + return self._open(lang, spoken=self._pick(self.invalid_prompt, lang)) + spoken = self._spoken_value(value, lang) + return FunctionResult( + f'The caller TYPED this on screen; read it back EXACTLY as "{spoken}" ' + "(do not voice any @ or dot symbols, and do not change it), then ask if it is " + f"correct. On their YES, proceed with the value {value!r}. If it is wrong, call " + f"{self.request_tool} to reopen the keypad so they re-type it; never ask for it by voice." + ) + + # ------------------------------------------------------------------ # + # Internals + # ------------------------------------------------------------------ # + + def _open(self, lang: str, spoken: str | None = None) -> FunctionResult: + """Open (or reopen) the keypad: speak ``spoken`` (default: the open prompt), emit the + input-request user-event so the app focuses the field, and park for the typed value.""" + return ( + FunctionResult( + spoken if spoken is not None else self._pick(self.open_prompt, lang) + ) + .swml_user_event( + input_request_payload( + self.gd_key, + label=self._pick(self.field_label, lang), + input_type=self.input_type, + ) + ) + .wait_for_user(answer_first=True) + ) + + def _spoken_value(self, value: str, lang: str) -> str: + """Read-back form: an email said as words, anything else spelled out so it is unambiguous.""" + v = get_verbalizer(lang) + if self.input_type == "email": + return v.email(value) + return v.spell(value) + + @staticmethod + def _lang(raw_data: dict[str, Any]) -> str: + global_data = ( + raw_data.get("global_data", {}) if isinstance(raw_data, dict) else {} + ) + return str(global_data.get("language") or _DEFAULT_LANG) + + def _pick(self, mapping: dict[str, str], lang: str) -> str: + """Resolve a per-language map for the call language; fall back to English, then any value.""" + return ( + mapping.get(lang) + or mapping.get(_DEFAULT_LANG) + or (next(iter(mapping.values())) if mapping else "") + ) + + def get_hints(self) -> list[str]: + """No ASR hints: the value is typed on the keypad, not spoken, so STT never sees it.""" + return [] + + def get_prompt_sections(self) -> list[dict[str, Any]]: + """Tell the model to route this field through the keypad, never voice.""" + return [ + { + "title": f"Typed input: {self.field}", + "body": ( + f"To collect the caller's {self.field}, do NOT ask for it by voice. Call " + f"{self.request_tool} to open the on-screen keypad; the caller types it and the " + f"value arrives in global_data.{self.gd_key}. Then call {self.confirm_tool} to " + "read it back and confirm. If it is wrong, call the request tool again to reopen." + ), + } + ] diff --git a/tests/unit/skills/test_typed_input_skill.py b/tests/unit/skills/test_typed_input_skill.py new file mode 100644 index 00000000..4ac69943 --- /dev/null +++ b/tests/unit/skills/test_typed_input_skill.py @@ -0,0 +1,146 @@ +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Unit tests for the typed_input skill. +""" + +import json +from unittest.mock import Mock + +from signalwire.core.function_result import FunctionResult +from signalwire.skills.typed_input.skill import TypedInputSkill + +_PARAMS = { + "field": "installer_email", + "input_type": "email", + "open_prompt": { + "en": "Please type the email on your screen.", + "pl": "Wpisz adres e-mail na ekranie.", + }, + "field_label": {"en": "Installer's email", "pl": "Adres e-mail instalatora"}, + "invalid_prompt": { + "en": "That does not look like a valid email; please type it again.", + "pl": "To nie wygląda na poprawny adres; wpisz go ponownie.", + }, +} + + +def _make_skill(params=None): + """Create a TypedInputSkill with a mocked agent and run setup().""" + skill = TypedInputSkill( + agent=Mock(), params=dict(params if params is not None else _PARAMS) + ) + assert skill.setup() is True + return skill + + +def _actions_json(result: FunctionResult) -> str: + return json.dumps(result.action, ensure_ascii=False) + + +class TestClassAttributes: + def test_name_and_multi_instance(self): + assert TypedInputSkill.SKILL_NAME == "typed_input" + assert TypedInputSkill.SKILL_VERSION == "1.0.0" + assert TypedInputSkill.SUPPORTS_MULTIPLE_INSTANCES is True + assert TypedInputSkill.REQUIRED_PACKAGES == [] + + +class TestParameterSchema: + def test_declares_field_and_prompt_params(self): + schema = TypedInputSkill.get_parameter_schema() + for key in ( + "field", + "input_type", + "open_prompt", + "field_label", + "invalid_prompt", + ): + assert key in schema + assert schema["field"]["required"] is True + assert schema["input_type"]["enum"] == ["email", "phone", "number", "text"] + + +class TestSetup: + def test_derives_per_field_names(self): + skill = _make_skill() + assert skill.gd_key == "typed_installer_email" + assert skill.request_tool == "request_installer_email" + assert skill.confirm_tool == "confirm_installer_email" + + def test_instance_key_is_per_field(self): + assert _make_skill().get_instance_key() == "typed_input_installer_email" + + def test_setup_fails_without_field(self): + skill = TypedInputSkill( + agent=Mock(), params={"open_prompt": {}, "field_label": {}} + ) + assert skill.setup() is False + + +class TestRegisterTools: + def test_registers_request_and_confirm(self): + skill = _make_skill() + skill.agent.define_tool = Mock() + skill.register_tools() + names = [c.kwargs["name"] for c in skill.agent.define_tool.call_args_list] + assert names == ["request_installer_email", "confirm_installer_email"] + # the value comes from global_data, never a model argument + for c in skill.agent.define_tool.call_args_list: + assert c.kwargs["parameters"] == {} + + +class TestOpenHandler: + def test_opens_keypad_in_call_language(self): + skill = _make_skill() + result = skill._open_handler({}, {"global_data": {"language": "pl"}}) + assert isinstance(result, FunctionResult) + assert result.response == "Wpisz adres e-mail na ekranie." + actions = _actions_json(result) + assert "input_request" in actions + assert "typed_installer_email" in actions + assert "email" in actions + assert "wait_for_user" in actions + + def test_falls_back_to_english_for_unknown_language(self): + skill = _make_skill() + result = skill._open_handler({}, {"global_data": {"language": "fr"}}) + assert result.response == "Please type the email on your screen." + + def test_defaults_to_english_with_no_language(self): + skill = _make_skill() + result = skill._open_handler({}, {}) + assert result.response == "Please type the email on your screen." + + +class TestConfirmHandler: + def test_valid_email_reads_back_spoken_form(self): + skill = _make_skill() + raw = { + "global_data": {"language": "pl", "typed_installer_email": "a.b@gmail.com"} + } + result = skill._confirm_handler({}, raw) + # Polish spoken form, the raw value, and the reopen tool for a NO. + assert "a kropka b małpka gmail kropka com" in result.response + assert "a.b@gmail.com" in result.response + assert "request_installer_email" in result.response + # a read-back does NOT reopen the keypad + assert "input_request" not in _actions_json(result) + + def test_missing_value_reopens_keypad(self): + skill = _make_skill() + result = skill._confirm_handler({}, {"global_data": {"language": "en"}}) + assert result.response == _PARAMS["invalid_prompt"]["en"] + assert "input_request" in _actions_json(result) + + def test_invalid_email_reopens_keypad(self): + skill = _make_skill() + raw = {"global_data": {"language": "en", "typed_installer_email": "notanemail"}} + result = skill._confirm_handler({}, raw) + assert result.response == _PARAMS["invalid_prompt"]["en"] + assert "input_request" in _actions_json(result) From 0da650f5485c433dfb1c63263059df246e71d18d Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Wed, 1 Jul 2026 12:16:18 +0100 Subject: [PATCH 03/10] feat(conversation_kit): verbalizer spells known acronyms letter-by-letter Add Verbalizer.spell_acronyms(text): reads generic technical acronyms (DIN, ISO, PPV, RMS, UTC) letter-by-letter via the per-language alphabet, so a TTS engine says "er em es" instead of mangling "RMS" into a word. Whole-token, case-sensitive matching (longest first) so it never touches a lowercase word ("din"), a substring inside a longer word ("isolation"), or an unknown all-caps name/code. The acronym set is a ClassVar, extensible per subclass. Two unit tests. --- .../conversation_kit/verbalizer/base.py | 22 +++++++++++++++ .../conversation_kit/test_verbalizer_pl.py | 28 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/base.py b/signalwire/signalwire/conversation_kit/verbalizer/base.py index c08a8734..bc80e7ad 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/base.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/base.py @@ -54,6 +54,13 @@ class Verbalizer: #: Base LLM directive for this language (e.g. "Mów po polsku."). Optional. INSTRUCTION: ClassVar[str] = "" + #: Generic technical acronyms ``spell_acronyms`` reads letter-by-letter so a TTS + #: engine says "er em es", not "rms". Language-agnostic membership (only the + #: spelling differs per language); extend by overriding in a subclass. Kept to + #: well-known acronyms on purpose — capitalization alone never triggers spelling, + #: so an all-caps name (a customer code, a device id) is left spoken as-is. + ACRONYMS: ClassVar[frozenset[str]] = frozenset({"DIN", "ISO", "PPV", "RMS", "UTC"}) + def guidance(self, glossary: dict[str, str] | None = None) -> str: """LLM speaking instructions for everything done via instruction (not deterministic transforms). These rules are GENERIC and LANGUAGE-AGNOSTIC — @@ -163,3 +170,18 @@ def spell(self, token: str) -> str: else: out.append(ch) return " ".join(out) + + def spell_acronyms(self, text: str) -> str: + """Read every known acronym (``ACRONYMS``) in free text letter-by-letter, so a + TTS engine says "er em es" instead of mangling "RMS" into a word. Matched as a + whole token and CASE-SENSITIVELY, so it never touches a lowercase word (English + "din"), a longer word ("isolation" contains "iso"), or an unknown all-caps name + (a customer code) — only the exact, known acronyms. Longest matched first so a + shorter acronym can't partially consume a longer one. + """ + if not text or not self.ACRONYMS: + return text + alternation = "|".join( + re.escape(a) for a in sorted(self.ACRONYMS, key=len, reverse=True) + ) + return re.sub(rf"\b({alternation})\b", lambda m: self.spell(m.group(1)), text) diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py index c22ffdca..21b17f03 100644 --- a/tests/unit/conversation_kit/test_verbalizer_pl.py +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -152,6 +152,34 @@ def test_guidance(): assert "EXACTLY as written" not in Verbalizer().guidance() +def test_spell_acronyms(): + # known acronyms -> spelled letter-by-letter in Polish; numbers untouched + _check( + [ + ("RMS", "er em es"), + ("PPV", "pe pe fau"), + ("UTC", "u te ce"), + ("ISO 10816", "i es o 10816"), + ("DIN 4150-3", "de i en 4150-3"), + ("Czas 08:13 UTC", "Czas 08:13 u te ce"), + ("poziom RMS na ISO", "poziom er em es na i es o"), + # NEVER spelled: lowercase word (case-sensitive), substring in a longer word, + # a boundary near-miss, or an unknown all-caps name (a customer code): + ("din w hali", "din w hali"), + ("izolacja", "izolacja"), + ("DINO", "DINO"), + ("klient ITH", "klient ITH"), + ], + PL.spell_acronyms, + ) + + +def test_spell_acronyms_english(): + en = get("en") + assert en.spell_acronyms("RMS at ISO 10816") == "R M S at I S O 10816" + assert en.spell_acronyms("the din of the machine") == "the din of the machine" + + if __name__ == "__main__": for name, fn in sorted(globals().items()): if name.startswith("test_") and callable(fn): From dd48ea218ea8f1141b77fe0c0ebe5c01b6e97a39 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Wed, 1 Jul 2026 13:25:33 +0100 Subject: [PATCH 04/10] conversation_kit(verbalizer): spoken dates/times via datetime_text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The verbalizer already read ISO measured values and acronyms; add temporal verbalization so a language that needs it (Polish) speaks dates and clock times naturally instead of letting the model guess at the digits — on a combined timestamp it reads the day into the minutes. - base.Verbalizer: extend date() with with_weekday/with_year, add time() (24h passthrough) and a VERBALIZES_DATETIME opt-in flag; datetime_text() rewrites ISO dates and date-times in free text (date-times first; a trailing UTC/Z is left in place for spell_acronyms to read). Base and English stay a no-op — they read ISO acceptably. - pl: _PL_HOURS feminine hour names + time() (on-the-hour reads hour only; a single-digit minute keeps its leading zero), VERBALIZES_DATETIME=True. - tests: time() and datetime_text() PL cases + English no-op (24 total). --- .../conversation_kit/verbalizer/base.py | 32 +++++++++++++++- .../verbalizer/languages/pl.py | 37 +++++++++++++++++++ .../conversation_kit/test_verbalizer_pl.py | 26 +++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/base.py b/signalwire/signalwire/conversation_kit/verbalizer/base.py index bc80e7ad..b47c3970 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/base.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/base.py @@ -61,6 +61,10 @@ class Verbalizer: #: so an all-caps name (a customer code, a device id) is left spoken as-is. ACRONYMS: ClassVar[frozenset[str]] = frozenset({"DIN", "ISO", "PPV", "RMS", "UTC"}) + #: Whether this language verbalizes dates/times in ``datetime_text``. The base and + #: English read ISO dates/times acceptably as-is, so it stays a no-op there. + VERBALIZES_DATETIME: ClassVar[bool] = False + def guidance(self, glossary: dict[str, str] | None = None) -> str: """LLM speaking instructions for everything done via instruction (not deterministic transforms). These rules are GENERIC and LANGUAGE-AGNOSTIC — @@ -111,10 +115,14 @@ def unit(self, value: Numeric, unit: str) -> str: # --- temporal ------------------------------------------------------- # - def date(self, iso: str) -> str: + def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> str: """An ISO date (YYYY-MM-DD) spoken naturally. Base: passthrough.""" return iso + def time(self, hour: int, minute: int) -> str: + """A 24-hour clock time spoken naturally. Base: passthrough 'HH:MM'.""" + return f"{hour:02d}:{minute:02d}" + # --- identifiers (structure is universal; only the words differ) ---- # def email(self, address: str) -> str: @@ -185,3 +193,25 @@ def spell_acronyms(self, text: str) -> str: re.escape(a) for a in sorted(self.ACRONYMS, key=len, reverse=True) ) return re.sub(rf"\b({alternation})\b", lambda m: self.spell(m.group(1)), text) + + def datetime_text(self, text: str) -> str: + """Verbalize ISO dates and date-times in free text, so a TTS engine reads them + naturally instead of the model guessing at the digits (on a combined timestamp it + mixes the day into the minutes). No-op unless the language sets VERBALIZES_DATETIME. + Date-times are matched before bare dates; a trailing "UTC"/"Z" is left in place for + the acronym pass to spell. + """ + if not text or not self.VERBALIZES_DATETIME: + return text + + def _datetime(m: re.Match[str]) -> str: + iso, hour, minute = m.group(1), int(m.group(2)), int(m.group(3)) + suffix = m.group(4) or "" + return f"{self.date(iso, with_weekday=False)}, {self.time(hour, minute)}{suffix}" + + text = re.sub( + r"\b(\d{4}-\d{2}-\d{2})[ T](\d{2}):(\d{2})(?::\d{2})?( ?UTC| ?Z)?\b", + _datetime, + text, + ) + return re.sub(r"\b(\d{4}-\d{2}-\d{2})\b", lambda m: self.date(m.group(1)), text) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py index fe8e47e5..a7ef7837 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py @@ -298,6 +298,35 @@ def _year(y: int) -> str: "ż": "żet", } +# 24-hour clock: feminine nominative hour names ("godzina jedenasta"). Hour 0 reads as +# the cardinal "zero" (the standard 24h form); minutes are plain cardinals. +_PL_HOURS = { + 0: "zero", + 1: "pierwsza", + 2: "druga", + 3: "trzecia", + 4: "czwarta", + 5: "piąta", + 6: "szósta", + 7: "siódma", + 8: "ósma", + 9: "dziewiąta", + 10: "dziesiąta", + 11: "jedenasta", + 12: "dwunasta", + 13: "trzynasta", + 14: "czternasta", + 15: "piętnasta", + 16: "szesnasta", + 17: "siedemnasta", + 18: "osiemnasta", + 19: "dziewiętnasta", + 20: "dwudziesta", + 21: "dwudziesta pierwsza", + 22: "dwudziesta druga", + 23: "dwudziesta trzecia", +} + def _decimal(value: Numeric) -> Decimal: return Decimal(str(value).strip().replace(",", ".")) @@ -316,6 +345,7 @@ class PolishVerbalizer(Verbalizer): LETTERS: ClassVar[dict[str, str]] = _PL_LETTERS MEASURE_UNITS: ClassVar[tuple[str, ...]] = tuple(_UNITS) INSTRUCTION: ClassVar[str] = "Mów po polsku. Odpowiadaj w języku polskim." + VERBALIZES_DATETIME: ClassVar[bool] = True # guidance() is inherited from the base — the speaking rules are language-agnostic. # Polish-ness comes from INSTRUCTION + the glossary terms + the transforms above. @@ -365,3 +395,10 @@ def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> s if with_year: parts.append(_year(y)) return " ".join(parts) + + def time(self, hour: int, minute: int) -> str: + h = _PL_HOURS.get(hour, str(hour)) + if minute == 0: + return h + mins = f"zero {cardinal(minute)}" if 1 <= minute <= 9 else cardinal(minute) + return f"{h} {mins}" diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py index 21b17f03..f517cacb 100644 --- a/tests/unit/conversation_kit/test_verbalizer_pl.py +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -180,6 +180,32 @@ def test_spell_acronyms_english(): assert en.spell_acronyms("the din of the machine") == "the din of the machine" +def test_time(): + assert PL.time(11, 31) == "jedenasta trzydzieści jeden" + assert ( + PL.time(8, 5) == "ósma zero pięć" + ) # single-digit minute reads the leading zero + assert PL.time(11, 0) == "jedenasta" # on the hour: hour only + assert PL.time(0, 31) == "zero trzydzieści jeden" + assert PL.time(23, 59) == "dwudziesta trzecia pięćdziesiąt dziewięć" + + +def test_datetime_text(): + # combined timestamp -> spoken date (no weekday) + time; UTC left for the acronym pass + assert PL.datetime_text("Czas: 2026-07-01 11:31 UTC") == ( + "Czas: pierwszego lipca dwa tysiące dwudziestego szóstego roku, " + "jedenasta trzydzieści jeden UTC" + ) + # a bare date -> full spoken date (weekday + day + month + year) + out = PL.datetime_text("spike on 2026-06-28") + assert "2026-06-28" not in out and "czerwca" in out + # English / base read ISO dates natively -> no-op + assert ( + get("en").datetime_text("Time: 2026-07-01 11:31 UTC") + == "Time: 2026-07-01 11:31 UTC" + ) + + if __name__ == "__main__": for name, fn in sorted(globals().items()): if name.startswith("test_") and callable(fn): From 2dd1320712ff3e47aa88aea1a271e9dfddb5883f Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Wed, 1 Jul 2026 17:15:18 +0100 Subject: [PATCH 05/10] conversation_kit(docs): bring README current + make it agent-navigable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The README predated the acronym-spelling and date/time verbalization work, so spell_acronyms/datetime_text/time() and the ACRONYMS/VERBALIZES_DATETIME attrs were undocumented. Update it and add the pieces an AI coding agent needs to extend the package safely: - Module map: every file, its responsibility, and its public names. - Full verbalizer surface: methods table + the three free-text passes (measure_text -> datetime_text -> spell_acronyms) with their gating class attrs and required run-order. - Adding a language: the class attributes that drive the shared methods, and the VERBALIZES_DATETIME opt-in. - Testing: the pytest command + the reference plugin to mirror. - Invariants: zero deps, no SDK import, product-agnostic, base-is-a-fallback, passes-are-no-op-by-default — the contract not to break. Docs only; no runtime change. --- .../signalwire/conversation_kit/README.md | 139 ++++++++++++++---- 1 file changed, 113 insertions(+), 26 deletions(-) diff --git a/signalwire/signalwire/conversation_kit/README.md b/signalwire/signalwire/conversation_kit/README.md index 52e5381f..47eb0a7c 100644 --- a/signalwire/signalwire/conversation_kit/README.md +++ b/signalwire/signalwire/conversation_kit/README.md @@ -17,17 +17,34 @@ Models are excellent at understanding intent and unreliable at two things that m exact on a live call: 1. **Calendar math** — "next Saturday" must resolve to one specific date, every time. -2. **Speaking values aloud** — "0.156 mm/s", "a.b@gmail.com", or "2026-07-04" must come out - as natural, correct speech in the caller's language, not as digits a TTS engine mangles. +2. **Speaking values aloud** — "0.156 mm/s", "a.b@gmail.com", "2026-07-04", or "RMS" must come + out as natural, correct speech in the caller's language, not as digits or letters a TTS + engine mangles. `conversation_kit` does both deterministically, so the model never has to. -## The three layers +## Module map + +Where each thing lives — start here when extending: + +| File | Responsibility | Public names | +|------|----------------|--------------| +| `__init__.py` | Package facade — re-exports the whole surface | `compute_date`, `validate_input`, `input_request_payload`, `verbalizer`, … | +| `dates.py` | Spoken-date → calendar-date math | `compute_date`, `RESOLVE_DATE_PARAMS`, `WEEKDAYS` | +| `inputs.py` | Input validation + typed-input (keypad) channel payload | `validate_input`, `is_valid_email/phone/number`, `input_request_payload`, `INPUT_REQUEST_TYPE` | +| `verbalizer/__init__.py` | Verbalizer facade — registers the built-in languages | `get`, `register`, `available`, `Verbalizer` | +| `verbalizer/base.py` | The `Verbalizer` interface **and** a safe language-neutral fallback | `Verbalizer` | +| `verbalizer/registry.py` | Language lookup with English fallback | `get`, `register`, `available` | +| `verbalizer/languages/en.py`, `pl.py` | Built-in language plugins | `EnglishVerbalizer`, `PolishVerbalizer` | + +Everything imports through the package root: ```python from signalwire.conversation_kit import compute_date, validate_input, verbalizer ``` +## The three layers + ### `dates` — spoken-date math The model passes the *semantic parts* it heard; the arithmetic happens here, so a wrong date @@ -69,29 +86,60 @@ checks the typed value before it's accepted. ```python from signalwire.conversation_kit import verbalizer -v = verbalizer.get("pl") -v.number("2.6") # 'dwa przecinek sześć' -v.unit("0.156", "mm/s") # 'zero przecinek sto pięćdziesiąt sześć milimetra na sekundę' -v.date("2026-07-04") # 'sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku' -v.email("a.b@gmail.com") # 'a kropka b małpka gmail kropka com' -verbalizer.available() # ['en', 'pl'] +# English — mostly passthrough: a TTS engine already reads English numbers and dates +# correctly, so the plugin only steps in where it must (e.g. spelling out an email's +# separators). This is why the outputs below look close to the inputs. +en = verbalizer.get("en") +en.number("2.6") # '2.6' +en.unit("0.156", "mm/s") # '0.156 mm/s' +en.date("2026-07-04") # '2026-07-04' +en.time(11, 31) # '11:31' +en.email("a.b@gmail.com") # 'a dot b at gmail dot com' +en.spell("PV") # 'P V' + +# Polish — the exact same calls, fully verbalized. This is where the value is: a TTS +# engine mangles these, so the plugin produces correct spoken Polish deterministically. +pl = verbalizer.get("pl") +pl.number("2.6") # 'dwa przecinek sześć' +pl.unit("0.156", "mm/s") # 'zero przecinek sto pięćdziesiąt sześć milimetra na sekundę' +pl.date("2026-07-04") # 'sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku' +pl.time(11, 31) # 'jedenasta trzydzieści jeden' +pl.email("a.b@gmail.com") # 'a kropka b małpka gmail kropka com' +pl.spell("PV") # 'pe fau' + +verbalizer.available() # ['en', 'pl'] ``` -Two helpers the agent leans on most: +**Full method surface** (override per language; the base is a safe fallback for every one): + +| Method | Does | Base behaviour | +|--------|------|----------------| +| `number(value)` | A bare number → words | passthrough (English TTS reads digits) | +| `unit(value, unit)` | Measured value + unit, agreement-correct | `" "` | +| `date(iso, with_weekday=True, with_year=True)` | ISO date → spoken date | passthrough (returns the ISO) | +| `time(hour, minute)` | 24h clock → spoken time | `"HH:MM"` | +| `email(address)` | Speak an email/identifier via `SEPARATORS` | shared across languages | +| `spell(token)` | Spell a token letter-by-letter via `LETTERS` | bare characters | +| `guidance(glossary=None)` | Per-language LLM speaking rules + optional glossary | generic, language-agnostic rules | -- **`measure_text(text)`** rewrites measured values + units found in a model-produced sentence - into spoken form (idempotent, safe to run over any reply). -- **`guidance(glossary=None)`** returns per-language speaking rules woven with an optional - product glossary, ready to drop into the model's prompt. +Three **free-text passes** the agent runs over a model-produced reply before TTS (each a no-op +unless the language opts in, so they're safe to run over any string): -`get(lang)` falls back to English for an unregistered language, so callers never guard. +| Pass | Rewrites | Gated on | Notes | +|------|----------|----------|-------| +| `measure_text(text)` | every ` ` and `- ` range | `MEASURE_UNITS` set | leaves ISO codes, dates, versions, bare numbers untouched | +| `datetime_text(text)` | ISO dates and date-times (`2026-07-01 11:31 UTC`) | `VERBALIZES_DATETIME` | date-times first; leaves a trailing `UTC`/`Z` for `spell_acronyms` | +| `spell_acronyms(text)` | known acronyms → letter-by-letter (`RMS` → `er em es`) | `ACRONYMS` non-empty | case-sensitive, whole-token; never touches lowercase words or unknown all-caps names | + +Run them in this order — `measure_text` → `datetime_text` → `spell_acronyms` — so the datetime +pass can hand its trailing `UTC` to the acronym pass. `get(lang)` falls back to English for an +unregistered language, so callers never guard. ## Adding a language -`Verbalizer` is a concrete, language-neutral base — subclass it and override only what differs -(`number`, `date`, usually `unit`/`spell`, plus the `SEPARATORS`/`LETTERS`/`MEASURE_UNITS`/ -`INSTRUCTION` class attributes; `email`/`measure_text` are driven by those attributes). Because -the base is a safe fallback, a partial plugin still works. +`Verbalizer` is a concrete, language-neutral base — subclass it and override only what differs. +Because the base is a safe fallback, a partial plugin still works; you can ship `number`/`date` +first and fill in the rest later. ```python from typing import ClassVar @@ -99,22 +147,61 @@ from signalwire.conversation_kit.verbalizer import Verbalizer, register class GermanVerbalizer(Verbalizer): lang: ClassVar[str] = "de" + + # Class attributes drive the shared methods — set these and email()/spell()/ + # measure_text()/spell_acronyms() work without overriding them: + SEPARATORS: ClassVar[dict[str, str]] = {"@": "at", ".": "Punkt", "-": "Bindestrich"} + LETTERS: ClassVar[dict[str, str]] = {"a": "a", "b": "be", ...} + MEASURE_UNITS: ClassVar[tuple[str, ...]] = ("mm/s", "Hz", "°C") + INSTRUCTION: ClassVar[str] = "Sprich auf Deutsch." + VERBALIZES_DATETIME: ClassVar[bool] = True # opt in to date()/time()/datetime_text() + # ACRONYMS defaults to {DIN, ISO, PPV, RMS, UTC}; override to extend per language. + def number(self, value): ... - def date(self, iso): ... + def date(self, iso, with_weekday=True, with_year=True): ... + def time(self, hour, minute): ... register(GermanVerbalizer()) # get("de") now resolves to it ``` -Built-in languages (EN, PL) register in `verbalizer/languages/`; an application can register -its own at runtime with `register(...)` without modifying the SDK. A language is only fully -"supported" when three things line up: a verbalizer plugin, inclusion in the agent's multilingual -`allowed` set, and a TTS voice. +Built-in languages (EN, PL) register in `verbalizer/languages/` and are wired up in +`verbalizer/__init__.py`; an application can also `register(...)` its own at runtime without +modifying the SDK. A language is only fully "supported" when three things line up: a verbalizer +plugin, inclusion in the agent's multilingual `allowed` set, and a TTS voice. + +## Testing + +Unit tests live at the SDK repo root under `tests/unit/conversation_kit/`. From the repo root: + +```bash +PYTHONPATH=signalwire python3 -m pytest tests/unit/conversation_kit/ -q +``` + +`pl.py` is the reference plugin — `tests/unit/conversation_kit/test_verbalizer_pl.py` exercises +cardinals, decimal place-value, unit agreement, dates/times, emails, `measure_text`, +`spell_acronyms`, and `datetime_text`; mirror it when adding a language. + +## Invariants (do not break) + +These hold the package's "product-agnostic leaf" contract — an agent editing this code must keep +all of them: + +- **Zero dependencies.** Standard library only. No third-party imports, ever — it keeps the + subpackage a self-contained, trivially testable leaf. +- **No SignalWire SDK import.** Even though it ships inside the SDK, `conversation_kit` never + imports the rest of it. The dependency arrow points one way: the agent imports this. +- **Product-agnostic.** No product names, no business logic, no I/O. The agent supplies product + wording (e.g. via `guidance()`'s glossary) and wraps these outputs into its own responses. +- **The base is a real fallback.** `Verbalizer()` and `get("")` must never raise — they + return sane English-ish passthrough. New methods on the base need a safe default. +- **Text passes stay no-op-by-default.** `measure_text`/`datetime_text`/`spell_acronyms` return + the input unchanged unless the relevant class attribute (`MEASURE_UNITS` / `VERBALIZES_DATETIME` + / `ACRONYMS`) opts in, and must not create false positives on prose, IDs, or versions. ## Design principles - **Deterministic, not generative** — same input, same output; the model decides *what*, this decides the exact value and wording. - **Zero dependencies** — lightweight, trivially unit-testable. -- **Product-agnostic** — no product names, no business logic, no I/O; the agent supplies the - product wording and wraps these outputs into its own responses. +- **Product-agnostic** — no product names, no business logic, no I/O. - **Plugin languages** — output is per-language behind one interface; new languages are additive. From 42236e362f6576aa8d1ee40e5ed3928a8edd2300 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Wed, 1 Jul 2026 23:46:46 +0100 Subject: [PATCH 06/10] conversation_kit(dates): resolve 'in N days' offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compute_date could express today/tomorrow and weekdays but not a spoken day COUNT ('in two days' / 'za dwa dni') — the model had to approximate it as 'tomorrow' or, worse, as day-of-month 2. Add an in_days integer param + handling (today + N), kept distinct from the calendar day-of-month so an offset never lands on the wrong date. Tests cover the offset and that day-of-month still wins. --- signalwire/signalwire/conversation_kit/dates.py | 13 ++++++++++++- tests/unit/conversation_kit/test_dates.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/signalwire/signalwire/conversation_kit/dates.py b/signalwire/signalwire/conversation_kit/dates.py index 47aa4d5c..e1a1b00b 100644 --- a/signalwire/signalwire/conversation_kit/dates.py +++ b/signalwire/signalwire/conversation_kit/dates.py @@ -52,9 +52,13 @@ "enum": ["today", "tomorrow"], "description": "Use instead of weekday for 'today'/'dziś'/'heute' or 'tomorrow'/'jutro'/'morgen'.", }, + "in_days": { + "type": "integer", + "description": "Relative day COUNT from today when the caller says 'in N days' ('za dwa dni'/'in two days' -> 2; 'za tydzień'/'in a week' -> 7). Use this for a spoken offset, NOT for a calendar day-of-month number.", + }, "day": { "type": "integer", - "description": "Day-of-month for an explicit date ('the 15th' -> 15).", + "description": "Day-of-month for an explicit date ('the 15th' -> 15). A calendar day number, NOT an 'in N days' offset (that is in_days).", }, "month": { "type": "integer", @@ -107,6 +111,13 @@ def compute_date(args: Mapping[str, Any], today: date) -> date | None: if rel == "tomorrow": return today + timedelta(days=1) + # 2b) Relative day COUNT: "in N days" ("za dwa dni" -> today + 2). Kept distinct + # from `day` (a calendar day-of-month) so a spoken offset never lands on the + # wrong date. + in_days = args.get("in_days") + if isinstance(in_days, int) and in_days > 0: + return today + timedelta(days=in_days) + # 3) Weekday (+ which). wd = str(args.get("weekday") or "").strip().lower() if wd in WEEKDAYS: diff --git a/tests/unit/conversation_kit/test_dates.py b/tests/unit/conversation_kit/test_dates.py index 63f239ad..bc063168 100644 --- a/tests/unit/conversation_kit/test_dates.py +++ b/tests/unit/conversation_kit/test_dates.py @@ -49,6 +49,17 @@ def test_explicit_month_in_past_rolls_to_next_year(): assert compute_date({"day": 10, "month": 1}, date(2026, 6, 28)) == date(2027, 1, 10) +def test_in_days_offset(): + # 'in N days' ('za dwa dni') = today + N, distinct from a day-of-month number. + today = date(2026, 7, 1) + assert compute_date({"in_days": 1}, today) == date(2026, 7, 2) + assert compute_date({"in_days": 2}, today) == date(2026, 7, 3) # NOT day-of-month 2 + assert compute_date({"in_days": 7}, today) == date(2026, 7, 8) + assert compute_date({"in_days": 0}, today) is None # a zero offset is not a date + # an explicit day-of-month still wins when the caller names a calendar number + assert compute_date({"day": 2}, today) == date(2026, 7, 2) + + def test_unresolvable_returns_none(): today = date(2026, 6, 28) assert compute_date({}, today) is None From 7c7b5a04050ff5f38edbb9220a14fbc428a1a7d5 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Thu, 2 Jul 2026 01:12:51 +0100 Subject: [PATCH 07/10] conversation_kit: pre-PR review fixes (crashes, robustness, product-agnostic) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses an independent critical review. CI green: ruff format+check, mypy (check_untyped_defs), pytest (33 passed, was 25). Crashes: - pl cardinal() rewritten to a general 1000-grouping algorithm with millions + milliards tiers (KeyError'd >= 1_000_000, incl. long fractions); raises ValueError above the milliard scale. - base measure_text ranges used a hardcoded Polish 'do'; add RANGE_WORD ClassVar ('to' base, 'do' pl) so a non-Polish plugin's ranges aren't Polish. - datetime_text guards its callbacks + pl.date() validates up front, so a date-shaped-but-invalid token ('2026-13-45', '25:99') is left untouched. Robustness / correctness: - compute_date: 'the 31st' in a short month rolls forward; an out-of-range explicit month/year is None (not silently today's); bool excluded from int day/month/year/in_days; drop undocumented which-synonyms. - datetime_text normalizes a trailing Z/UTC to a spellable ' UTC'. - time() validates 0-23 / 0-59 (base + pl). - inputs.is_valid_number rejects nan/inf. - registry.get() falls back to the neutral BASE (not English) for an unknown language, so it keeps the generic guidance(); docstrings + README updated. - fix package docstrings (from signalwire.conversation_kit ...); export Numeric; input_request_payload -> dict[str,str]; WEEKDAYS -> tuple. Product-agnostic: - drop domain-specific 'PPV' from the base ACRONYMS default (apps add domain acronyms by subclass); scrub product-y test data. Deferred: Polish 'od do ' range grammar (needs a genitive declension table) — kept the nominative form as a documented simplification. --- .../signalwire/conversation_kit/README.md | 10 +- .../signalwire/conversation_kit/__init__.py | 2 +- .../signalwire/conversation_kit/dates.py | 70 ++++++++------ .../signalwire/conversation_kit/inputs.py | 7 +- .../conversation_kit/verbalizer/__init__.py | 5 +- .../conversation_kit/verbalizer/base.py | 51 ++++++++--- .../verbalizer/languages/pl.py | 58 ++++++++---- .../conversation_kit/verbalizer/registry.py | 14 ++- tests/unit/conversation_kit/test_dates.py | 38 +++++++- tests/unit/conversation_kit/test_inputs.py | 15 ++- .../conversation_kit/test_verbalizer_pl.py | 91 +++++++++++++++++-- 11 files changed, 281 insertions(+), 80 deletions(-) diff --git a/signalwire/signalwire/conversation_kit/README.md b/signalwire/signalwire/conversation_kit/README.md index 47eb0a7c..b9d917c5 100644 --- a/signalwire/signalwire/conversation_kit/README.md +++ b/signalwire/signalwire/conversation_kit/README.md @@ -128,12 +128,13 @@ unless the language opts in, so they're safe to run over any string): | Pass | Rewrites | Gated on | Notes | |------|----------|----------|-------| | `measure_text(text)` | every ` ` and `- ` range | `MEASURE_UNITS` set | leaves ISO codes, dates, versions, bare numbers untouched | -| `datetime_text(text)` | ISO dates and date-times (`2026-07-01 11:31 UTC`) | `VERBALIZES_DATETIME` | date-times first; leaves a trailing `UTC`/`Z` for `spell_acronyms` | +| `datetime_text(text)` | ISO dates and date-times (`2026-07-01 11:31 UTC`) | `VERBALIZES_DATETIME` | date-times first; normalizes a trailing `Z`/`UTC` to a spellable ` UTC`; a date-shaped-but-invalid token is left untouched | | `spell_acronyms(text)` | known acronyms → letter-by-letter (`RMS` → `er em es`) | `ACRONYMS` non-empty | case-sensitive, whole-token; never touches lowercase words or unknown all-caps names | Run them in this order — `measure_text` → `datetime_text` → `spell_acronyms` — so the datetime -pass can hand its trailing `UTC` to the acronym pass. `get(lang)` falls back to English for an -unregistered language, so callers never guard. +pass can hand its trailing `UTC` to the acronym pass. `get(lang)` falls back to the neutral base +verbalizer for an unregistered language (which keeps the generic `guidance()` — English opts out +of it), so callers never guard. ## Adding a language @@ -155,7 +156,8 @@ class GermanVerbalizer(Verbalizer): MEASURE_UNITS: ClassVar[tuple[str, ...]] = ("mm/s", "Hz", "°C") INSTRUCTION: ClassVar[str] = "Sprich auf Deutsch." VERBALIZES_DATETIME: ClassVar[bool] = True # opt in to date()/time()/datetime_text() - # ACRONYMS defaults to {DIN, ISO, PPV, RMS, UTC}; override to extend per language. + # ACRONYMS defaults to domain-neutral {DIN, ISO, RMS, UTC}; add your app's own + # domain acronyms by overriding (e.g. ACRONYMS = Verbalizer.ACRONYMS | {"PPV"}). def number(self, value): ... def date(self, iso, with_weekday=True, with_year=True): ... diff --git a/signalwire/signalwire/conversation_kit/__init__.py b/signalwire/signalwire/conversation_kit/__init__.py index 71a5ef89..edcee7e3 100644 --- a/signalwire/signalwire/conversation_kit/__init__.py +++ b/signalwire/signalwire/conversation_kit/__init__.py @@ -10,7 +10,7 @@ Three layers a spoken-conversation agent needs, none tied to any particular product: - from conversation_kit import compute_date, validate_input, verbalizer + from signalwire.conversation_kit import compute_date, validate_input, verbalizer compute_date({"weekday": "saturday", "which": "next"}, date.today()) # spoken -> ISO date validate_input("a@b.com", "email") # input checks diff --git a/signalwire/signalwire/conversation_kit/dates.py b/signalwire/signalwire/conversation_kit/dates.py index e1a1b00b..ba347842 100644 --- a/signalwire/signalwire/conversation_kit/dates.py +++ b/signalwire/signalwire/conversation_kit/dates.py @@ -23,7 +23,7 @@ from datetime import date, timedelta from typing import Any -WEEKDAYS = [ +WEEKDAYS = ( "monday", "tuesday", "wednesday", @@ -31,7 +31,7 @@ "friday", "saturday", "sunday", -] +) # JSON-schema `properties` for a `resolve_date` tool — the universal contract # (multilingual hints baked in). An agent drops this straight into its tool schema: @@ -76,33 +76,51 @@ def compute_date(args: Mapping[str, Any], today: date) -> date | None: None. Convention: 'next ' = that weekday in the FOLLOWING calendar week; this/coming/nearest/bare = the soonest upcoming one (never today — a same-day weekday rolls to next week; readback can adjust).""" - # 1) Explicit calendar date: day (+ optional month/year). If it lands in the - # past with no year stated, roll forward to the next occurrence: an explicit - # month -> same date next year; a bare day -> the same day next month ("the - # 4th" on Jun 26 = Jul 4, not Jun 4 next year). + # 1) Explicit calendar date: day (+ optional month/year). With no year stated, + # resolve to the SOONEST occurrence on/after `today`: a bare day rolls + # month-by-month to the next month that actually contains it ("the 4th" on + # Jun 26 = Jul 4; "the 31st" in a 30-day month = the next month with a 31st); + # an explicit month rolls to next year. `bool` is excluded (it is an `int` + # subclass, so `True` would otherwise read as day/month 1). day = args.get("day") - if isinstance(day, int) and 1 <= day <= 31: - has_month = isinstance(args.get("month"), int) and 1 <= args["month"] <= 12 - has_year = isinstance(args.get("year"), int) - month = args["month"] if has_month else today.month - year = args["year"] if has_year else today.year - try: - target = date(year, month, day) - except ValueError: + if isinstance(day, int) and not isinstance(day, bool) and 1 <= day <= 31: + month_arg = args.get("month") + year_arg = args.get("year") + # An explicitly-supplied month/year that is out of range is a hard error — + # never silently substitute today's (a garbled 'month:13' must not book). + if month_arg is not None and not ( + isinstance(month_arg, int) + and not isinstance(month_arg, bool) + and 1 <= month_arg <= 12 + ): return None - if target < today and not has_year: + if year_arg is not None and not ( + isinstance(year_arg, int) + and not isinstance(year_arg, bool) + and 1000 <= year_arg <= 9999 + ): + return None + # Fully-stated y/m/d: take it verbatim (even if past); an impossible date + # (e.g. Feb 30) is None. + if year_arg is not None: try: - if has_month: - target = date(year + 1, month, day) - else: - target = ( - date(year + 1, 1, day) - if month == 12 - else date(year, month + 1, day) - ) + return date(year_arg, month_arg or today.month, day) except ValueError: return None - return target + # No year: soonest matching date >= today. With an explicit month the month + # is fixed (rolls to next year); a bare day walks forward month by month. + for i in range(14): + y = today.year + (today.month - 1 + i) // 12 + m = (today.month - 1 + i) % 12 + 1 + if month_arg is not None and m != month_arg: + continue + try: + cand = date(y, m, day) + except ValueError: + continue + if cand >= today: + return cand + return None # 2) Relative day word. rel = str(args.get("relative") or "").strip().lower() @@ -115,7 +133,7 @@ def compute_date(args: Mapping[str, Any], today: date) -> date | None: # from `day` (a calendar day-of-month) so a spoken offset never lands on the # wrong date. in_days = args.get("in_days") - if isinstance(in_days, int) and in_days > 0: + if isinstance(in_days, int) and not isinstance(in_days, bool) and in_days > 0: return today + timedelta(days=in_days) # 3) Weekday (+ which). @@ -123,7 +141,7 @@ def compute_date(args: Mapping[str, Any], today: date) -> date | None: if wd in WEEKDAYS: wd_idx = WEEKDAYS.index(wd) which = str(args.get("which") or "this").strip().lower() - if which in ("next", "next_week", "following"): + if which == "next": next_monday = today + timedelta(days=7 - today.weekday()) return next_monday + timedelta(days=wd_idx) ahead = (wd_idx - today.weekday()) % 7 diff --git a/signalwire/signalwire/conversation_kit/inputs.py b/signalwire/signalwire/conversation_kit/inputs.py index 371b1e51..810f68b8 100644 --- a/signalwire/signalwire/conversation_kit/inputs.py +++ b/signalwire/signalwire/conversation_kit/inputs.py @@ -18,6 +18,7 @@ from __future__ import annotations +import math import re # Pragmatic, TTS/keypad-oriented email shape: a@b.c with no spaces. Deliberately @@ -38,10 +39,10 @@ def is_valid_phone(value: str) -> bool: def is_valid_number(value: str) -> bool: try: - float((value or "").strip().replace(",", ".")) - return True + n = float((value or "").strip().replace(",", ".")) except ValueError: return False + return math.isfinite(n) # reject nan/inf/-inf — the verbalizer can't speak them _VALIDATORS = { @@ -70,7 +71,7 @@ def validate_input(value: str, input_type: str) -> bool: def input_request_payload( field: str, label: str = "", input_type: str = "text" -) -> dict: +) -> dict[str, str]: """The payload an agent emits (as a user_event) to ask the app to reveal a typed-input field. `field` is the key the typed value comes back under.""" return { diff --git a/signalwire/signalwire/conversation_kit/verbalizer/__init__.py b/signalwire/signalwire/conversation_kit/verbalizer/__init__.py index db5daa4c..09350002 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/__init__.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/__init__.py @@ -8,7 +8,7 @@ verbalizer — language-agnostic, TTS-ready verbalization with language plugins. - from conversation_kit.verbalizer import get + from signalwire.conversation_kit.verbalizer import get v = get("pl") v.number("2.6") # 'dwa przecinek sześć' v.unit("0.156", "mm/s") # 'zero przecinek sto pięćdziesiąt sześć milimetra na sekundę' @@ -21,7 +21,7 @@ from __future__ import annotations -from .base import Verbalizer +from .base import Numeric, Verbalizer from .registry import available, get, register from .languages.en import EnglishVerbalizer from .languages.pl import PolishVerbalizer @@ -31,6 +31,7 @@ __all__ = [ "EnglishVerbalizer", + "Numeric", "PolishVerbalizer", "Verbalizer", "available", diff --git a/signalwire/signalwire/conversation_kit/verbalizer/base.py b/signalwire/signalwire/conversation_kit/verbalizer/base.py index b47c3970..40911ebb 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/base.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/base.py @@ -56,15 +56,21 @@ class Verbalizer: #: Generic technical acronyms ``spell_acronyms`` reads letter-by-letter so a TTS #: engine says "er em es", not "rms". Language-agnostic membership (only the - #: spelling differs per language); extend by overriding in a subclass. Kept to - #: well-known acronyms on purpose — capitalization alone never triggers spelling, - #: so an all-caps name (a customer code, a device id) is left spoken as-is. - ACRONYMS: ClassVar[frozenset[str]] = frozenset({"DIN", "ISO", "PPV", "RMS", "UTC"}) + #: spelling differs per language); kept to widely-recognized, domain-neutral + #: acronyms on purpose. An application adds its OWN domain acronyms by subclassing + #: and overriding (e.g. ``ACRONYMS = Verbalizer.ACRONYMS | {"PPV"}``). Capitalization + #: alone never triggers spelling, so an all-caps name (a customer code, a device id) + #: is left spoken as-is. + ACRONYMS: ClassVar[frozenset[str]] = frozenset({"DIN", "ISO", "RMS", "UTC"}) #: Whether this language verbalizes dates/times in ``datetime_text``. The base and #: English read ISO dates/times acceptably as-is, so it stays a no-op there. VERBALIZES_DATETIME: ClassVar[bool] = False + #: Spoken connector between the two ends of a numeric range in ``measure_text`` + #: ("10 to 100 Hz"). Override per language (Polish "do", German "bis"). + RANGE_WORD: ClassVar[str] = "to" + def guidance(self, glossary: dict[str, str] | None = None) -> str: """LLM speaking instructions for everything done via instruction (not deterministic transforms). These rules are GENERIC and LANGUAGE-AGNOSTIC — @@ -120,7 +126,10 @@ def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> s return iso def time(self, hour: int, minute: int) -> str: - """A 24-hour clock time spoken naturally. Base: passthrough 'HH:MM'.""" + """A 24-hour clock time spoken naturally. Base: passthrough 'HH:MM'. + Raises ValueError outside 0-23 / 0-59 (``datetime_text`` catches it).""" + if not (0 <= hour <= 23 and 0 <= minute <= 59): + raise ValueError(f"time out of range: {hour}:{minute}") return f"{hour:02d}:{minute:02d}" # --- identifiers (structure is universal; only the words differ) ---- # @@ -155,10 +164,14 @@ def measure_text(self, text: str) -> str: # number. Both separators are accepted because an LLM may emit either. The # en dash in the character class below is a deliberate alternative # separator, so RUF001 (ambiguous-character) is suppressed on that line. + # ``RANGE_WORD`` is the language's connector ("to"/"do"/"bis"). The two ends + # are read in isolation; a language that inflects range endpoints for case + # (Polish idiomatically "od do ") is a documented simplification. text = re.sub( rf"({num})\s*[–-]\s*({num})\s*({units})(?![\w])", # noqa: RUF001 lambda m: ( - f"{self.number(m.group(1))} do {self.unit(m.group(2), m.group(3))}" + f"{self.number(m.group(1))} {self.RANGE_WORD} " + f"{self.unit(m.group(2), m.group(3))}" ), text, ) @@ -198,20 +211,34 @@ def datetime_text(self, text: str) -> str: """Verbalize ISO dates and date-times in free text, so a TTS engine reads them naturally instead of the model guessing at the digits (on a combined timestamp it mixes the day into the minutes). No-op unless the language sets VERBALIZES_DATETIME. - Date-times are matched before bare dates; a trailing "UTC"/"Z" is left in place for - the acronym pass to spell. + Date-times are matched before bare dates; a trailing "Z"/"UTC" is normalized to a + spoken " UTC" for the acronym pass to spell. A date-shaped but INVALID token + (e.g. "2026-13-45", "25:99") is left untouched — these passes are safe over any text. """ if not text or not self.VERBALIZES_DATETIME: return text def _datetime(m: re.Match[str]) -> str: - iso, hour, minute = m.group(1), int(m.group(2)), int(m.group(3)) - suffix = m.group(4) or "" - return f"{self.date(iso, with_weekday=False)}, {self.time(hour, minute)}{suffix}" + try: + spoken_date = self.date(m.group(1), with_weekday=False) + spoken_time = self.time(int(m.group(2)), int(m.group(3))) + except (ValueError, KeyError): + return m.group(0) # not a real date/time — leave the token alone + suffix = " UTC" if m.group(4) else "" # normalize Z/UTC -> spellable " UTC" + return f"{spoken_date}, {spoken_time}{suffix}" + + def _bare_date(m: re.Match[str]) -> str: + try: + return self.date(m.group(1)) + except (ValueError, KeyError): + return m.group(0) text = re.sub( r"\b(\d{4}-\d{2}-\d{2})[ T](\d{2}):(\d{2})(?::\d{2})?( ?UTC| ?Z)?\b", _datetime, text, ) - return re.sub(r"\b(\d{4}-\d{2}-\d{2})\b", lambda m: self.date(m.group(1)), text) + # A date FOLLOWED BY a clock time is the date-time pass's job — the lookahead + # keeps this bare-date pass from re-grabbing the date half of a date-time whose + # time was invalid (so "2026-07-01 25:99" stays wholly untouched, not split). + return re.sub(r"\b(\d{4}-\d{2}-\d{2})\b(?![ T]\d{2}:\d{2})", _bare_date, text) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py index a7ef7837..35db4ea2 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/languages/pl.py @@ -101,26 +101,46 @@ def _under_1000(n: int) -> str: return " ".join(parts) +# Scale words per 1000^k (k = 1..3): (nom_sg, nom_pl, gen_pl). Polish groups digits +# in threes; each non-zero group takes its scale word in the count-agreement form +# its own value selects (see _bucket). +_SCALES = ( + ("tysiąc", "tysiące", "tysięcy"), + ("milion", "miliony", "milionów"), + ("miliard", "miliardy", "miliardów"), +) + + def cardinal(n: int) -> str: - """Non-negative integer -> Polish words (0..999_999, covers our value range).""" + """Non-negative integer -> Polish words, up to 999_999_999_999. A larger value + raises ValueError (we cover up to the miliard scale).""" if n < 0: return "minus " + cardinal(-n) if n == 0: return "zero" - if n < 1000: - return _under_1000(n) - th, rem = divmod(n, 1000) + if n >= 1_000_000_000_000: + raise ValueError("value too large to verbalize") + groups = [] + rest = n + while rest > 0: + rest, g = divmod(rest, 1000) + groups.append(g) # [0]=units, [1]=thousands, [2]=millions, [3]=milliards parts = [] - if th == 1: - parts.append("tysiąc") - else: - word = {"nom_sg": "tysiąc", "nom_pl": "tysiące", "gen_pl": "tysięcy"}[ - _bucket(th) - ] - parts.append(_under_1000(th)) - parts.append(word) - if rem: - parts.append(_under_1000(rem)) + for idx in range(len(groups) - 1, -1, -1): + g = groups[idx] + if g == 0: + continue + if idx == 0: + parts.append(_under_1000(g)) + continue + nom_sg, nom_pl, gen_pl = _SCALES[idx - 1] + if g == 1: # 'tysiąc'/'milion', never 'jeden tysiąc' + parts.append(nom_sg) + else: + parts.append(_under_1000(g)) + parts.append( + {"nom_sg": nom_sg, "nom_pl": nom_pl, "gen_pl": gen_pl}[_bucket(g)] + ) return " ".join(parts) @@ -346,6 +366,7 @@ class PolishVerbalizer(Verbalizer): MEASURE_UNITS: ClassVar[tuple[str, ...]] = tuple(_UNITS) INSTRUCTION: ClassVar[str] = "Mów po polsku. Odpowiadaj w języku polskim." VERBALIZES_DATETIME: ClassVar[bool] = True + RANGE_WORD: ClassVar[str] = "do" # "10 do 100 Hz" (base default "to" is English) # guidance() is inherited from the base — the speaking rules are language-agnostic. # Polish-ness comes from INSTRUCTION + the glossary terms + the transforms above. @@ -387,9 +408,12 @@ def spell(self, token: str) -> str: def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> str: y, m, d = (int(p) for p in iso.split("-")) + dt = _date( + y, m, d + ) # validates y/m/d up front: an impossible date raises ValueError parts = [] if with_weekday: - parts.append(_WEEKDAYS[_date(y, m, d).weekday()] + ",") + parts.append(_WEEKDAYS[dt.weekday()] + ",") parts.append(_ordinal_gen(d)) parts.append(_MONTHS[m]) if with_year: @@ -397,7 +421,9 @@ def date(self, iso: str, with_weekday: bool = True, with_year: bool = True) -> s return " ".join(parts) def time(self, hour: int, minute: int) -> str: - h = _PL_HOURS.get(hour, str(hour)) + if not (0 <= hour <= 23 and 0 <= minute <= 59): + raise ValueError(f"time out of range: {hour}:{minute}") + h = _PL_HOURS[hour] if minute == 0: return h mins = f"zero {cardinal(minute)}" if 1 <= minute <= 9 else cardinal(minute) diff --git a/signalwire/signalwire/conversation_kit/verbalizer/registry.py b/signalwire/signalwire/conversation_kit/verbalizer/registry.py index b37f8c32..4bbe3bc5 100644 --- a/signalwire/signalwire/conversation_kit/verbalizer/registry.py +++ b/signalwire/signalwire/conversation_kit/verbalizer/registry.py @@ -18,6 +18,9 @@ from .base import Verbalizer _REGISTRY: dict[str, Verbalizer] = {} +# The neutral, language-agnostic base. Returned by get() for any UNregistered +# language (e.g. "de" with no plugin), so unknown languages keep the base's generic +# guidance() and safe passthrough output instead of inheriting English's opt-outs. _DEFAULT = Verbalizer() @@ -30,13 +33,16 @@ def register(verbalizer: Verbalizer) -> Verbalizer: def get(lang: str | None) -> Verbalizer: """Resolve a verbalizer for a BCP-47 tag ('pl', 'pl-PL', 'de-DE', …). - Falls back to the 'en' plugin, then a neutral passthrough base, so this - never returns None and never raises on an unknown language. + Falls back to the neutral base ``Verbalizer`` for an unregistered language — NOT + to English. The base keeps the generic ``guidance()`` a real language still needs; + English is a specific plugin that deliberately opts out of guidance. Never returns + None, never raises on an unknown language. """ code = (lang or "").replace("_", "-").split("-")[0].lower() - return _REGISTRY.get(code) or _REGISTRY.get("en") or _DEFAULT + return _REGISTRY.get(code) or _DEFAULT def available() -> list[str]: - """The language codes currently registered.""" + """The language codes currently registered. (Registration is import-time, so no + concurrent-mutation guard is needed; sorted() already returns a fresh list.)""" return sorted(_REGISTRY) diff --git a/tests/unit/conversation_kit/test_dates.py b/tests/unit/conversation_kit/test_dates.py index bc063168..a33e43d3 100644 --- a/tests/unit/conversation_kit/test_dates.py +++ b/tests/unit/conversation_kit/test_dates.py @@ -1,4 +1,13 @@ -"""Tests for signalwire.conversation_kit.dates.compute_date — `today` pinned for determinism.""" +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Tests for signalwire.conversation_kit.dates.compute_date — `today` pinned for determinism. +""" from datetime import date @@ -60,6 +69,33 @@ def test_in_days_offset(): assert compute_date({"day": 2}, today) == date(2026, 7, 2) +def test_bare_day_absent_in_month_rolls_forward(): + # "the 31st" in a 30-day month resolves to the next month that has a 31st, + # not None. Same for "the 30th" in February. + assert compute_date({"day": 31}, date(2026, 6, 15)) == date(2026, 7, 31) + assert compute_date({"day": 30}, date(2026, 2, 10)) == date(2026, 3, 30) + assert compute_date({"day": 29}, date(2026, 2, 1)) == date( + 2026, 3, 29 + ) # 2026 not leap + + +def test_out_of_range_month_or_year_is_none(): + # An explicitly-supplied but invalid month/year is a hard error, never silently + # coerced to today's month/year. + d = date(2026, 1, 1) + assert compute_date({"day": 5, "month": 13}, d) is None + assert compute_date({"day": 5, "month": 0}, d) is None + assert compute_date({"day": 5, "year": 99}, d) is None + + +def test_bool_is_not_an_int_day_or_offset(): + # bool ⊂ int in Python — True must NOT read as day/month/in_days = 1. + d = date(2026, 1, 1) + assert compute_date({"day": True}, d) is None + assert compute_date({"in_days": True}, d) is None + assert compute_date({"day": 5, "month": True}, d) is None + + def test_unresolvable_returns_none(): today = date(2026, 6, 28) assert compute_date({}, today) is None diff --git a/tests/unit/conversation_kit/test_inputs.py b/tests/unit/conversation_kit/test_inputs.py index be7a1d1f..28d33b62 100644 --- a/tests/unit/conversation_kit/test_inputs.py +++ b/tests/unit/conversation_kit/test_inputs.py @@ -1,4 +1,13 @@ -"""Tests for signalwire.conversation_kit.inputs — typed-input validation + payload.""" +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Tests for signalwire.conversation_kit.inputs — typed-input validation + payload. +""" from signalwire.conversation_kit import ( input_request_payload, @@ -33,6 +42,10 @@ def test_number(): assert is_valid_number("3.14") assert not is_valid_number("abc") assert not is_valid_number("") + # float() parses these, but the verbalizer can't speak them -> reject as invalid. + assert not is_valid_number("nan") + assert not is_valid_number("inf") + assert not is_valid_number("-inf") def test_validate_input_dispatch(): diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py index f517cacb..47aac6aa 100644 --- a/tests/unit/conversation_kit/test_verbalizer_pl.py +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -1,4 +1,15 @@ -"""Polish verbalizer tests for signalwire.conversation_kit.verbalizer.""" +""" +Copyright (c) 2026 SignalWire + +This file is part of the SignalWire SDK. + +Licensed under the MIT License. +See LICENSE file in the project root for full license information. + +Polish verbalizer tests for signalwire.conversation_kit.verbalizer. +""" + +import pytest from signalwire.conversation_kit.verbalizer import get @@ -14,8 +25,14 @@ def test_lang_dispatch(): assert get("pl").lang == "pl" assert get("pl-PL").lang == "pl" assert get("PL").lang == "pl" - assert get("fr").lang == "en" # unknown -> English fallback - assert get(None).lang == "en" + # An unregistered language falls back to the neutral BASE (lang "und"), NOT English, + # so it keeps the base's generic guidance() instead of English's deliberate opt-out. + assert get("fr").lang == "und" + assert get(None).lang == "und" + assert ( + get("de").guidance() != "" + ) # generic guidance survives for an unknown language + assert get("en").guidance() == "" # the registered English plugin still opts out def test_cardinals(): @@ -136,14 +153,12 @@ def test_email(): def test_guidance(): # PL inherits the generic, language-agnostic guidance from the base. - g = PL.guidance({"severity": "nasilenie", "RMS vibration": "poziom drgań (RMS)"}) + g = PL.guidance({"severity": "nasilenie", "threshold": "próg"}) assert "naturally and idiomatically" in g # generic speak-naturally rule assert "EXACTLY as written" in g # number rule (PL has MEASURE_UNITS) assert "narrate the assembly" in g # email-narration rule (now shared/base) assert "never the ISO" in g # date rule - assert ( - "severity = nasilenie" in g and "poziom drgań (RMS)" in g - ) # glossary woven in + assert "severity = nasilenie" in g and "threshold = próg" in g # glossary woven in assert PL.INSTRUCTION.startswith("Mów po polsku") assert get("en").guidance() == "" # English opts out # the number rule is gated on MEASURE_UNITS — a unit-less base verbalizer omits it @@ -157,18 +172,19 @@ def test_spell_acronyms(): _check( [ ("RMS", "er em es"), - ("PPV", "pe pe fau"), ("UTC", "u te ce"), ("ISO 10816", "i es o 10816"), ("DIN 4150-3", "de i en 4150-3"), ("Czas 08:13 UTC", "Czas 08:13 u te ce"), ("poziom RMS na ISO", "poziom er em es na i es o"), # NEVER spelled: lowercase word (case-sensitive), substring in a longer word, - # a boundary near-miss, or an unknown all-caps name (a customer code): + # a boundary near-miss, an unknown all-caps name (a customer code), or a + # DOMAIN acronym not in the generic default (PPV — an app adds it by subclass): ("din w hali", "din w hali"), ("izolacja", "izolacja"), ("DINO", "DINO"), - ("klient ITH", "klient ITH"), + ("klient ACME", "klient ACME"), + ("poziom PPV tu", "poziom PPV tu"), ], PL.spell_acronyms, ) @@ -206,6 +222,61 @@ def test_datetime_text(): ) +def test_large_numbers(): + # millions / milliards must not KeyError (regression: cardinal() capped at thousands) + _check( + [ + ("1000000", "milion"), + ("2000000", "dwa miliony"), + ("5000000", "pięć milionów"), + ("1000000000", "miliard"), + ( + "1234567", + "milion dwieście trzydzieści cztery tysiące " + "pięćset sześćdziesiąt siedem", + ), + ], + PL.number, + ) + assert PL.unit("1000000", "Hz") == "milion herców" + # a long fraction reads its digits as one cardinal, so it must scale too + assert PL.number("1.1234567").startswith("jeden przecinek milion") + # above the milliard scale we fail loudly rather than mis-say a number + with pytest.raises(ValueError): + PL.number("1" + "0" * 12) + + +def test_range_word(): + # base/English range connector is "to"; Polish overrides to "do" + assert PL.measure_text("band 10-100 Hz") == "band dziesięć do sto herców" + from signalwire.conversation_kit.verbalizer.base import Verbalizer + + class _EnLike(Verbalizer): + MEASURE_UNITS = ("Hz",) + + assert _EnLike().measure_text("band 10-100 Hz") == "band 10 to 100 Hz" + + +def test_time_out_of_range(): + for bad in [(24, 0), (11, 60), (-1, 0), (0, -5)]: + with pytest.raises(ValueError): + PL.time(*bad) + + +def test_datetime_text_invalid_is_untouched(): + # date-shaped but impossible -> left exactly as-is (these passes are text-safe) + assert PL.datetime_text("x 2026-13-45 y") == "x 2026-13-45 y" + assert PL.datetime_text("t 2026-07-01 25:99 z") == "t 2026-07-01 25:99 z" + assert PL.datetime_text("2026-02-30 11:31") == "2026-02-30 11:31" + + +def test_datetime_text_z_suffix(): + # ISO "11:31Z" (no space, Z) -> normalized to a spellable " UTC", not glued + out = PL.datetime_text("2026-07-01T11:31Z") + assert out.endswith("jedenasta trzydzieści jeden UTC") + assert "jedenZ" not in out and "jedenastaZ" not in out + + if __name__ == "__main__": for name, fn in sorted(globals().items()): if name.startswith("test_") and callable(fn): From 019544866465bf331c25c2a5b3b73b432bf8ff99 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Thu, 2 Jul 2026 01:24:20 +0100 Subject: [PATCH 08/10] conversation_kit: scrub product/personal fixtures from tests + README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the package free of any originating-product fingerprint before PR: - replace a real personal-name email (karolczyk.jakub@…) with a fictional jan.kowalski@example.com - rename the typed-input example field installer_email -> contact_email - neutralize domain-flavored sample text: 'RMS velocity'/'PPV:' -> 'reading'/ 'value'; the vibration standards ISO 10816 / DIN 4150-3 -> domain-neutral ISO 9001 / DIN 5008-1 Tests + README only; no runtime change (33 tests still pass). --- .../signalwire/conversation_kit/README.md | 6 ++--- tests/unit/conversation_kit/test_inputs.py | 14 ++++++------ .../conversation_kit/test_verbalizer_pl.py | 22 +++++++++---------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/signalwire/signalwire/conversation_kit/README.md b/signalwire/signalwire/conversation_kit/README.md index b9d917c5..127f8ab4 100644 --- a/signalwire/signalwire/conversation_kit/README.md +++ b/signalwire/signalwire/conversation_kit/README.md @@ -72,9 +72,9 @@ canonical lowercase list. from signalwire.conversation_kit import validate_input, input_request_payload validate_input("a.b@gmail.com", "email") # True (also "phone", "number") -input_request_payload("installer_email", label="Installer's email", input_type="email") -# -> {"type": "input_request", "field": "installer_email", -# "label": "Installer's email", "input_type": "email"} +input_request_payload("contact_email", label="Contact email", input_type="email") +# -> {"type": "input_request", "field": "contact_email", +# "label": "Contact email", "input_type": "email"} ``` `input_request_payload(...)` is the small event an agent sends to a connected app to reveal + diff --git a/tests/unit/conversation_kit/test_inputs.py b/tests/unit/conversation_kit/test_inputs.py index 28d33b62..6e6a8adf 100644 --- a/tests/unit/conversation_kit/test_inputs.py +++ b/tests/unit/conversation_kit/test_inputs.py @@ -19,12 +19,12 @@ def test_email(): - assert is_valid_email("karolczyk.jakub@gmail.com") + assert is_valid_email("jan.kowalski@example.com") assert is_valid_email("a@b.co") # A valid-format typo is still valid format (only the human read-back catches it). - assert is_valid_email("karolczyk.jakib@gmail.com") - assert not is_valid_email("karolczyk.jakib") # no @ - assert not is_valid_email("jakub@gmail") # no TLD dot + assert is_valid_email("jan.kowalsky@example.com") + assert not is_valid_email("jan.kowalsky") # no @ + assert not is_valid_email("jan@example") # no TLD dot assert not is_valid_email("a b@gmail.com") # space assert not is_valid_email("") @@ -59,11 +59,11 @@ def test_validate_input_dispatch(): def test_input_request_payload(): - p = input_request_payload("typed_installer_email", "Installer email", "email") + p = input_request_payload("contact_email", "Contact email", "email") assert p == { "type": "input_request", - "field": "typed_installer_email", - "label": "Installer email", + "field": "contact_email", + "label": "Contact email", "input_type": "email", } # Defaults. diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py index 47aac6aa..ad0a8d88 100644 --- a/tests/unit/conversation_kit/test_verbalizer_pl.py +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -107,12 +107,12 @@ def test_measure_text(): _check( [ ( - "RMS velocity: 0.156 mm/s on x", - "RMS velocity: zero przecinek sto pięćdziesiąt sześć milimetra na sekundę on x", + "reading: 0.156 mm/s on x", + "reading: zero przecinek sto pięćdziesiąt sześć milimetra na sekundę on x", ), ( - "PPV:2.6mm/s freq:100Hz", - "PPV:dwa przecinek sześć milimetra na sekundę freq:sto herców", + "value:2.6mm/s freq:100Hz", + "value:dwa przecinek sześć milimetra na sekundę freq:sto herców", ), ( "Temperature: 30.3°C", @@ -129,8 +129,8 @@ def test_measure_text(): ("peak 0.5 m/s²", "peak zero przecinek pięć metra na sekundę do kwadratu"), ("gusts 12 km/h", "gusts dwanaście kilometrów na godzinę"), # must be left alone (no unit / structural): - ("ISO 10816 zone", "ISO 10816 zone"), - ("DIN 4150-3 referenced", "DIN 4150-3 referenced"), + ("ISO 9001 zone", "ISO 9001 zone"), + ("DIN 5008-1 referenced", "DIN 5008-1 referenced"), ("on 2026-07-04 at 14:30", "on 2026-07-04 at 14:30"), ("version 2.5 build", "version 2.5 build"), ], @@ -142,8 +142,8 @@ def test_email(): _check( [ ( - "karolczyk.jakub@gmail.com", - "karolczyk kropka jakub małpka gmail kropka com", + "jan.kowalski@example.com", + "jan kropka kowalski małpka example kropka com", ), ("a-b_c@x.pl", "a myślnik b podkreślnik c małpka x kropka pl"), ], @@ -173,8 +173,8 @@ def test_spell_acronyms(): [ ("RMS", "er em es"), ("UTC", "u te ce"), - ("ISO 10816", "i es o 10816"), - ("DIN 4150-3", "de i en 4150-3"), + ("ISO 9001", "i es o 9001"), + ("DIN 5008-1", "de i en 5008-1"), ("Czas 08:13 UTC", "Czas 08:13 u te ce"), ("poziom RMS na ISO", "poziom er em es na i es o"), # NEVER spelled: lowercase word (case-sensitive), substring in a longer word, @@ -192,7 +192,7 @@ def test_spell_acronyms(): def test_spell_acronyms_english(): en = get("en") - assert en.spell_acronyms("RMS at ISO 10816") == "R M S at I S O 10816" + assert en.spell_acronyms("RMS at ISO 9001") == "R M S at I S O 9001" assert en.spell_acronyms("the din of the machine") == "the din of the machine" From ae0d499bff165ddffc2226b0e76658afd51d3016 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Thu, 2 Jul 2026 01:58:24 +0100 Subject: [PATCH 09/10] skills/typed_input: product-agnostic examples + fail-loud on missing prompts Pre-PR review of the typed_input skill (CI already green; it correctly reuses conversation_kit.inputs rather than duplicating). Two fixes: - scrub the domain-flavored 'installer_email' / 'Installer's email' example from the docstring, README, and tests -> neutral 'contact_email' / 'Contact email'. - setup() now validates the three required per-language prompt maps (open_prompt / field_label / invalid_prompt): the schema marks them required but the loader doesn't enforce it, so a missing one would silently speak '' at runtime. Fail loud instead; test added. CI green: ruff format+check, mypy, pytest (13 typed_input tests). --- .../signalwire/skills/typed_input/README.md | 6 ++-- .../signalwire/skills/typed_input/skill.py | 17 +++++++--- tests/unit/skills/test_typed_input_skill.py | 32 ++++++++++++------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/signalwire/signalwire/skills/typed_input/README.md b/signalwire/signalwire/skills/typed_input/README.md index fea8f124..f8b8962d 100644 --- a/signalwire/signalwire/skills/typed_input/README.md +++ b/signalwire/signalwire/skills/typed_input/README.md @@ -23,7 +23,7 @@ Validation, the user-event payload, and the spoken read-back come from `signalwi ## Parameters -- `field` (string, **required**) — field key, e.g. `installer_email`. Tools become +- `field` (string, **required**) — field key, e.g. `contact_email`. Tools become `request_` / `confirm_`; the typed value lands in `global_data['typed_']`. - `input_type` (string, default `text`) — one of `email`, `phone`, `number`, `text`. Drives validation and the read-back form (an email is read as words; anything else is spelled out). @@ -41,13 +41,13 @@ Add it once per field; each instance gets its own `request_`/`confirm_` tools. ```python agent.add_skill("typed_input", { - "field": "installer_email", + "field": "contact_email", "input_type": "email", "open_prompt": { "en": "Please type the email on your screen.", "pl": "Wpisz adres e-mail na ekranie.", }, - "field_label": {"en": "Installer's email", "pl": "Adres e-mail instalatora"}, + "field_label": {"en": "Contact email", "pl": "Adres e-mail kontaktowy"}, "invalid_prompt": { "en": "That does not look like a valid email; please type it again.", "pl": "To nie wygląda na poprawny adres; wpisz go ponownie.", diff --git a/signalwire/signalwire/skills/typed_input/skill.py b/signalwire/signalwire/skills/typed_input/skill.py index 4daecb3d..7e344752 100644 --- a/signalwire/signalwire/skills/typed_input/skill.py +++ b/signalwire/signalwire/skills/typed_input/skill.py @@ -59,7 +59,7 @@ def get_parameter_schema(cls) -> dict[str, dict[str, Any]]: "field": { "type": "string", "description": ( - "Field key, e.g. 'installer_email'. Tools become request_ / " + "Field key, e.g. 'contact_email'. Tools become request_ / " "confirm_; the typed value lands in global_data['typed_']." ), "required": True, @@ -109,14 +109,23 @@ def setup(self) -> bool: "typed_input requires a non-empty string 'field' parameter" ) return False + # The three per-language prompt maps are required (the schema marks them so, + # but the loader doesn't enforce it) — fail loud rather than speak "" at runtime. + for name in ("open_prompt", "field_label", "invalid_prompt"): + val = self.params.get(name) + if not isinstance(val, dict) or not val: + self.logger.error( + f"typed_input requires a non-empty '{name}' per-language map" + ) + return False self.field: str = field self.input_type: str = self.params.get("input_type", "text") self.gd_key: str = f"typed_{field}" self.request_tool: str = f"request_{field}" self.confirm_tool: str = f"confirm_{field}" - self.open_prompt: dict[str, str] = self.params.get("open_prompt") or {} - self.field_label: dict[str, str] = self.params.get("field_label") or {} - self.invalid_prompt: dict[str, str] = self.params.get("invalid_prompt") or {} + self.open_prompt: dict[str, str] = self.params["open_prompt"] + self.field_label: dict[str, str] = self.params["field_label"] + self.invalid_prompt: dict[str, str] = self.params["invalid_prompt"] return True def register_tools(self) -> None: diff --git a/tests/unit/skills/test_typed_input_skill.py b/tests/unit/skills/test_typed_input_skill.py index 4ac69943..e4e10eca 100644 --- a/tests/unit/skills/test_typed_input_skill.py +++ b/tests/unit/skills/test_typed_input_skill.py @@ -16,13 +16,13 @@ from signalwire.skills.typed_input.skill import TypedInputSkill _PARAMS = { - "field": "installer_email", + "field": "contact_email", "input_type": "email", "open_prompt": { "en": "Please type the email on your screen.", "pl": "Wpisz adres e-mail na ekranie.", }, - "field_label": {"en": "Installer's email", "pl": "Adres e-mail instalatora"}, + "field_label": {"en": "Contact email", "pl": "Adres e-mail kontaktowy"}, "invalid_prompt": { "en": "That does not look like a valid email; please type it again.", "pl": "To nie wygląda na poprawny adres; wpisz go ponownie.", @@ -69,12 +69,12 @@ def test_declares_field_and_prompt_params(self): class TestSetup: def test_derives_per_field_names(self): skill = _make_skill() - assert skill.gd_key == "typed_installer_email" - assert skill.request_tool == "request_installer_email" - assert skill.confirm_tool == "confirm_installer_email" + assert skill.gd_key == "typed_contact_email" + assert skill.request_tool == "request_contact_email" + assert skill.confirm_tool == "confirm_contact_email" def test_instance_key_is_per_field(self): - assert _make_skill().get_instance_key() == "typed_input_installer_email" + assert _make_skill().get_instance_key() == "typed_input_contact_email" def test_setup_fails_without_field(self): skill = TypedInputSkill( @@ -82,6 +82,16 @@ def test_setup_fails_without_field(self): ) assert skill.setup() is False + def test_setup_fails_without_a_required_prompt(self): + # field present but a required per-language prompt map missing -> fail loud, + # so the skill never silently speaks "" at runtime. + for missing in ("open_prompt", "field_label", "invalid_prompt"): + params = {k: v for k, v in _PARAMS.items() if k != missing} + skill = TypedInputSkill(agent=Mock(), params=params) + assert skill.setup() is False, ( + f"setup() should fail when {missing} is absent" + ) + class TestRegisterTools: def test_registers_request_and_confirm(self): @@ -89,7 +99,7 @@ def test_registers_request_and_confirm(self): skill.agent.define_tool = Mock() skill.register_tools() names = [c.kwargs["name"] for c in skill.agent.define_tool.call_args_list] - assert names == ["request_installer_email", "confirm_installer_email"] + assert names == ["request_contact_email", "confirm_contact_email"] # the value comes from global_data, never a model argument for c in skill.agent.define_tool.call_args_list: assert c.kwargs["parameters"] == {} @@ -103,7 +113,7 @@ def test_opens_keypad_in_call_language(self): assert result.response == "Wpisz adres e-mail na ekranie." actions = _actions_json(result) assert "input_request" in actions - assert "typed_installer_email" in actions + assert "typed_contact_email" in actions assert "email" in actions assert "wait_for_user" in actions @@ -122,13 +132,13 @@ class TestConfirmHandler: def test_valid_email_reads_back_spoken_form(self): skill = _make_skill() raw = { - "global_data": {"language": "pl", "typed_installer_email": "a.b@gmail.com"} + "global_data": {"language": "pl", "typed_contact_email": "a.b@gmail.com"} } result = skill._confirm_handler({}, raw) # Polish spoken form, the raw value, and the reopen tool for a NO. assert "a kropka b małpka gmail kropka com" in result.response assert "a.b@gmail.com" in result.response - assert "request_installer_email" in result.response + assert "request_contact_email" in result.response # a read-back does NOT reopen the keypad assert "input_request" not in _actions_json(result) @@ -140,7 +150,7 @@ def test_missing_value_reopens_keypad(self): def test_invalid_email_reopens_keypad(self): skill = _make_skill() - raw = {"global_data": {"language": "en", "typed_installer_email": "notanemail"}} + raw = {"global_data": {"language": "en", "typed_contact_email": "notanemail"}} result = skill._confirm_handler({}, raw) assert result.response == _PARAMS["invalid_prompt"]["en"] assert "input_request" in _actions_json(result) From 17018201ef4597eee8aa4453a32f9541aa3c4351 Mon Sep 17 00:00:00 2001 From: Jakub Karolczyk Date: Thu, 2 Jul 2026 02:20:48 +0100 Subject: [PATCH 10/10] tests(conversation_kit): inline verbalizer assertions for the no-cheat audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _check() helper asserted inside itself, so the no-cheat audit's static scan saw the test bodies as assertion-free and flagged six as cheat tests. Inline each case as a visible 'assert fn(value) == expected' loop and drop the helper — the coverage is identical, now visible to the audit. No behaviour change; 46 tests pass. --- .../conversation_kit/test_verbalizer_pl.py | 226 ++++++++---------- 1 file changed, 103 insertions(+), 123 deletions(-) diff --git a/tests/unit/conversation_kit/test_verbalizer_pl.py b/tests/unit/conversation_kit/test_verbalizer_pl.py index ad0a8d88..101a9d24 100644 --- a/tests/unit/conversation_kit/test_verbalizer_pl.py +++ b/tests/unit/conversation_kit/test_verbalizer_pl.py @@ -16,11 +16,6 @@ PL = get("pl") -def _check(cases, fn): - bad = [(inp, exp, got) for inp, exp in cases if (got := fn(inp)) != exp] - assert not bad, "\n".join(f" {i!r}: expected {e!r}, got {g!r}" for i, e, g in bad) - - def test_lang_dispatch(): assert get("pl").lang == "pl" assert get("pl-PL").lang == "pl" @@ -36,35 +31,31 @@ def test_lang_dispatch(): def test_cardinals(): - _check( - [ - ("0", "zero"), - ("2", "dwa"), - ("5", "pięć"), - ("11", "jedenaście"), - ("21", "dwadzieścia jeden"), - ("156", "sto pięćdziesiąt sześć"), - ("1000", "tysiąc"), - ("2026", "dwa tysiące dwadzieścia sześć"), - ("1019", "tysiąc dziewiętnaście"), - ("5000", "pięć tysięcy"), - ], - PL.number, - ) + for value, expected in [ + ("0", "zero"), + ("2", "dwa"), + ("5", "pięć"), + ("11", "jedenaście"), + ("21", "dwadzieścia jeden"), + ("156", "sto pięćdziesiąt sześć"), + ("1000", "tysiąc"), + ("2026", "dwa tysiące dwadzieścia sześć"), + ("1019", "tysiąc dziewiętnaście"), + ("5000", "pięć tysięcy"), + ]: + assert PL.number(value) == expected def test_decimals_place_value(): - _check( - [ - ("2.6", "dwa przecinek sześć"), - ("2,6", "dwa przecinek sześć"), # comma input - ("0.156", "zero przecinek sto pięćdziesiąt sześć"), - ("30.3", "trzydzieści przecinek trzy"), - ("0.05", "zero przecinek zero pięć"), # leading fractional zero - ("-1.5", "minus jeden przecinek pięć"), - ], - PL.number, - ) + for value, expected in [ + ("2.6", "dwa przecinek sześć"), + ("2,6", "dwa przecinek sześć"), # comma input + ("0.156", "zero przecinek sto pięćdziesiąt sześć"), + ("30.3", "trzydzieści przecinek trzy"), + ("0.05", "zero przecinek zero pięć"), # leading fractional zero + ("-1.5", "minus jeden przecinek pięć"), + ]: + assert PL.number(value) == expected def test_unit_agreement(): @@ -83,72 +74,66 @@ def test_unit_agreement(): def test_dates(): - _check( - [ - ( - "2026-07-04", - "sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku", - ), - ( - "2026-06-30", - "wtorek, trzydziestego czerwca dwa tysiące dwudziestego szóstego roku", - ), - ( - "2026-01-01", - "czwartek, pierwszego stycznia dwa tysiące dwudziestego szóstego roku", - ), - ], - PL.date, - ) + for value, expected in [ + ( + "2026-07-04", + "sobota, czwartego lipca dwa tysiące dwudziestego szóstego roku", + ), + ( + "2026-06-30", + "wtorek, trzydziestego czerwca dwa tysiące dwudziestego szóstego roku", + ), + ( + "2026-01-01", + "czwartek, pierwszego stycznia dwa tysiące dwudziestego szóstego roku", + ), + ]: + assert PL.date(value) == expected def test_measure_text(): # real handler formats: spaced, attached, ranges, and things that must NOT match - _check( - [ - ( - "reading: 0.156 mm/s on x", - "reading: zero przecinek sto pięćdziesiąt sześć milimetra na sekundę on x", - ), - ( - "value:2.6mm/s freq:100Hz", - "value:dwa przecinek sześć milimetra na sekundę freq:sto herców", - ), - ( - "Temperature: 30.3°C", - "Temperature: trzydzieści przecinek trzy stopnia Celsjusza", - ), - ("max 5.0 mm/s", "max pięć milimetrów na sekundę"), - ("1019 hPa", "tysiąc dziewiętnaście hektopaskali"), - ( - "range 20.5–25.3°C today", # noqa: RUF001 - "range dwadzieścia przecinek pięć do dwadzieścia pięć przecinek trzy stopnia Celsjusza today", - ), - ("band 10-100 Hz", "band dziesięć do sto herców"), - ("45% of the limit", "czterdzieści pięć procent of the limit"), - ("peak 0.5 m/s²", "peak zero przecinek pięć metra na sekundę do kwadratu"), - ("gusts 12 km/h", "gusts dwanaście kilometrów na godzinę"), - # must be left alone (no unit / structural): - ("ISO 9001 zone", "ISO 9001 zone"), - ("DIN 5008-1 referenced", "DIN 5008-1 referenced"), - ("on 2026-07-04 at 14:30", "on 2026-07-04 at 14:30"), - ("version 2.5 build", "version 2.5 build"), - ], - PL.measure_text, - ) + for value, expected in [ + ( + "reading: 0.156 mm/s on x", + "reading: zero przecinek sto pięćdziesiąt sześć milimetra na sekundę on x", + ), + ( + "value:2.6mm/s freq:100Hz", + "value:dwa przecinek sześć milimetra na sekundę freq:sto herców", + ), + ( + "Temperature: 30.3°C", + "Temperature: trzydzieści przecinek trzy stopnia Celsjusza", + ), + ("max 5.0 mm/s", "max pięć milimetrów na sekundę"), + ("1019 hPa", "tysiąc dziewiętnaście hektopaskali"), + ( + "range 20.5–25.3°C today", # noqa: RUF001 + "range dwadzieścia przecinek pięć do dwadzieścia pięć przecinek trzy stopnia Celsjusza today", + ), + ("band 10-100 Hz", "band dziesięć do sto herców"), + ("45% of the limit", "czterdzieści pięć procent of the limit"), + ("peak 0.5 m/s²", "peak zero przecinek pięć metra na sekundę do kwadratu"), + ("gusts 12 km/h", "gusts dwanaście kilometrów na godzinę"), + # must be left alone (no unit / structural): + ("ISO 9001 zone", "ISO 9001 zone"), + ("DIN 5008-1 referenced", "DIN 5008-1 referenced"), + ("on 2026-07-04 at 14:30", "on 2026-07-04 at 14:30"), + ("version 2.5 build", "version 2.5 build"), + ]: + assert PL.measure_text(value) == expected def test_email(): - _check( - [ - ( - "jan.kowalski@example.com", - "jan kropka kowalski małpka example kropka com", - ), - ("a-b_c@x.pl", "a myślnik b podkreślnik c małpka x kropka pl"), - ], - PL.email, - ) + for value, expected in [ + ( + "jan.kowalski@example.com", + "jan kropka kowalski małpka example kropka com", + ), + ("a-b_c@x.pl", "a myślnik b podkreślnik c małpka x kropka pl"), + ]: + assert PL.email(value) == expected def test_guidance(): @@ -169,25 +154,23 @@ def test_guidance(): def test_spell_acronyms(): # known acronyms -> spelled letter-by-letter in Polish; numbers untouched - _check( - [ - ("RMS", "er em es"), - ("UTC", "u te ce"), - ("ISO 9001", "i es o 9001"), - ("DIN 5008-1", "de i en 5008-1"), - ("Czas 08:13 UTC", "Czas 08:13 u te ce"), - ("poziom RMS na ISO", "poziom er em es na i es o"), - # NEVER spelled: lowercase word (case-sensitive), substring in a longer word, - # a boundary near-miss, an unknown all-caps name (a customer code), or a - # DOMAIN acronym not in the generic default (PPV — an app adds it by subclass): - ("din w hali", "din w hali"), - ("izolacja", "izolacja"), - ("DINO", "DINO"), - ("klient ACME", "klient ACME"), - ("poziom PPV tu", "poziom PPV tu"), - ], - PL.spell_acronyms, - ) + for value, expected in [ + ("RMS", "er em es"), + ("UTC", "u te ce"), + ("ISO 9001", "i es o 9001"), + ("DIN 5008-1", "de i en 5008-1"), + ("Czas 08:13 UTC", "Czas 08:13 u te ce"), + ("poziom RMS na ISO", "poziom er em es na i es o"), + # NEVER spelled: lowercase word (case-sensitive), substring in a longer word, + # a boundary near-miss, an unknown all-caps name (a customer code), or a + # DOMAIN acronym not in the generic default (PPV — an app adds it by subclass): + ("din w hali", "din w hali"), + ("izolacja", "izolacja"), + ("DINO", "DINO"), + ("klient ACME", "klient ACME"), + ("poziom PPV tu", "poziom PPV tu"), + ]: + assert PL.spell_acronyms(value) == expected def test_spell_acronyms_english(): @@ -224,20 +207,17 @@ def test_datetime_text(): def test_large_numbers(): # millions / milliards must not KeyError (regression: cardinal() capped at thousands) - _check( - [ - ("1000000", "milion"), - ("2000000", "dwa miliony"), - ("5000000", "pięć milionów"), - ("1000000000", "miliard"), - ( - "1234567", - "milion dwieście trzydzieści cztery tysiące " - "pięćset sześćdziesiąt siedem", - ), - ], - PL.number, - ) + for value, expected in [ + ("1000000", "milion"), + ("2000000", "dwa miliony"), + ("5000000", "pięć milionów"), + ("1000000000", "miliard"), + ( + "1234567", + "milion dwieście trzydzieści cztery tysiące pięćset sześćdziesiąt siedem", + ), + ]: + assert PL.number(value) == expected assert PL.unit("1000000", "Hz") == "milion herców" # a long fraction reads its digits as one cardinal, so it must scale too assert PL.number("1.1234567").startswith("jeden przecinek milion")