working streaming example

2025-10-10 02:44:44 +03:00 · 2025-10-08 14:22:26 +01:00
parent 7c97519a54
commit 302f3e6228
4 changed files with 21 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ from neuttsair.neutts import NeuTTSAir
 import soundfile as sf

 tts = NeuTTSAir(
-   backbone_repo="neuphonic/neutts-air", # or 'neutts-air-q4-gguf' wit llama-cpp-python installed
+   backbone_repo="neuphonic/neutts-air", # or 'neutts-air-q4-gguf' with llama-cpp-python installed
   backbone_device="cpu",
   codec_repo="neuphonic/neucodec",
   codec_device="cpu"
--- a/examples/README.md
+++ b/examples/README.md
@@ -35,4 +35,16 @@ python -m examples.onnx_example \
  --ref_codes samples/dave.pt \
  --ref_text samples/dave.txt \
  --backbone neuphonic/neutts-air-q4-gguf
-```
+```
+
+### Streaming Support 
+
+To stream the model output in chunks, try out the `onnx_streaming.py` example. For streaming, only the GGUF backends are currently supported. Ensure you have `llama-cpp-pyhon`, `onnxruntime` and `pyaudio` installed to run this example.
+
+```bash
+python -m examples.onnx_example_streaming \
+  --input_text "My name is Dave, and um, I'm from London" \
+  --ref_codes samples/dave.pt \
+  --ref_text samples/dave.txt \
+  --backbone neuphonic/neutts-air-q4-gguf
+```
--- a/examples/onnx_example_streaming.py
+++ b/examples/onnx_example_streaming.py
@@ -5,6 +5,9 @@ import numpy as np
 from neuttsair.neutts import NeuTTSAir
 import pyaudio

+from phonemizer.backend.espeak.wrapper import EspeakWrapper
+_ESPEAK_LIBRARY = '/opt/homebrew/Cellar/espeak/1.48.04_1/lib/libespeak.1.1.48.dylib'  #use the Path to the library.
+EspeakWrapper.set_library(_ESPEAK_LIBRARY)

 def main(input_text, ref_codes_path, ref_text, backbone):
    assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming only supported by llama-cpp."
@@ -36,7 +39,7 @@ def main(input_text, ref_codes_path, ref_text, backbone):
    print("Streaming...")
    for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
        audio = (chunk * 32767).astype(np.int16)
-        print(audio)
+        print(audio.shape)
        stream.write(audio.tobytes())
    
    stream.stop_stream()
--- a/neuttsair/neutts.py
+++ b/neuttsair/neutts.py
@@ -53,9 +53,9 @@ class NeuTTSAir:
        self.max_context = 2048
        self.hop_length = 480
        self.streaming_overlap_frames = 1
-        self.streaming_frames_per_chunk = 15
-        self.streaming_lookforward = 50
-        self.streaming_lookback = 150
+        self.streaming_frames_per_chunk = 25
+        self.streaming_lookforward = 5
+        self.streaming_lookback = 50
        self.streaming_stride_samples = self.streaming_frames_per_chunk * self.hop_length

        # ggml & onnx flags