From e001f120bd248829822bd32803d2e5f35c71efed Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 11:19:06 +0100
Subject: [PATCH 1/6] first streaing impl

---
 examples/onnx_example_streaming.py |  88 +++++++++++++++++
 neuttsair/neutts.py                | 149 ++++++++++++++++++++++++++++-
 2 files changed, 234 insertions(+), 3 deletions(-)
 create mode 100644 examples/onnx_example_streaming.py

diff --git a/examples/onnx_example_streaming.py b/examples/onnx_example_streaming.py
new file mode 100644
index 0000000..a7fd6c4
--- /dev/null
+++ b/examples/onnx_example_streaming.py
@@ -0,0 +1,88 @@
+import os
+import soundfile as sf
+import torch
+import numpy as np
+from neuttsair.neutts import NeuTTSAir
+import pyaudio
+
+
+def main(input_text, ref_codes_path, ref_text, backbone):
+    assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming only supported by llama-cpp."
+    
+    # Initialize NeuTTSAir with the desired model and codec
+    tts = NeuTTSAir(
+        backbone_repo=backbone,
+        backbone_device="cpu",
+        codec_repo="neuphonic/neucodec-onnx-decoder",
+        codec_device="cpu"
+    )
+
+    # Check if ref_text is a path if it is read it if not just return string
+    if ref_text and os.path.exists(ref_text):
+        with open(ref_text, "r") as f:
+            ref_text = f.read().strip()
+
+    if ref_codes_path and os.path.exists(ref_codes_path):
+        ref_codes = torch.load(ref_codes_path)
+
+    print(f"Generating audio for input text: {input_text}")
+    p = pyaudio.PyAudio()
+    stream = p.open(
+        format=pyaudio.paInt16,
+        channels=1,
+        rate=24_000,
+        output=True
+    )
+    print("Streaming...")
+    for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
+        audio = (chunk * 32767).astype(np.int16)
+        stream.write(audio.tobytes())
+    
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+
+if __name__ == "__main__":
+    # get arguments from command line
+    import argparse
+
+    parser = argparse.ArgumentParser(description="NeuTTSAir Example")
+    parser.add_argument(
+        "--input_text", 
+        type=str, 
+        required=True, 
+        help="Input text to be converted to speech"
+    )
+    parser.add_argument(
+        "--ref_codes", 
+        type=str, 
+        default="./samples/dave.pt", 
+        help="Path to pre-encoded reference audio"
+    )
+    parser.add_argument(
+        "--ref_text",
+        type=str,
+        default="./samples/dave.txt", 
+        help="Reference text corresponding to the reference audio",
+    )
+    parser.add_argument(
+        "--output_path", 
+        type=str, 
+        default="output.wav", 
+        help="Path to save the output audio"
+    )
+    parser.add_argument(
+        "--backbone", 
+        type=str, 
+        default="neuphonic/neutts-air-q8-gguf", 
+        help="Huggingface repo containing the backbone checkpoint. Must be GGUF."
+    )
+    args = parser.parse_args()
+    main(
+        input_text=args.input_text,
+        ref_codes_path=args.ref_codes,
+        ref_text=args.ref_text,
+        backbone=args.backbone,
+        output_path=args.output_path,
+    )
diff --git a/neuttsair/neutts.py b/neuttsair/neutts.py
index f58829c..1dfcf73 100644
--- a/neuttsair/neutts.py
+++ b/neuttsair/neutts.py
@@ -1,3 +1,4 @@
+from typing import Generator
 from pathlib import Path
 import librosa
 import numpy as np
@@ -6,7 +7,36 @@ import re
 import perth
 from neucodec import NeuCodec, DistillNeuCodec
 from phonemizer.backend import EspeakBackend
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
+
+
+def _linear_overlap_add(frames: list[torch.Tensor], stride: int):
+    # original impl --> https://github.com/facebookresearch/encodec/blob/main/encodec/utils.py
+    assert len(frames)
+    device = frames[0].device
+    dtype = frames[0].dtype
+    shape = frames[0].shape[:-1]
+
+    total_size = 0
+    for i, frame in enumerate(frames):
+        frame_end = stride * i + frame.shape[-1]
+        total_size = max(total_size, frame_end)
+
+    sum_weight = torch.zeros(total_size, device=device, dtype=dtype)
+    out = torch.zeros(*shape, total_size, device=device, dtype=dtype)
+
+    offset: int = 0
+    for frame in frames:
+        frame_length = frame.shape[-1]
+        t = torch.linspace(0, 1, frame_length + 2, device=device, dtype=dtype)[1:-1]
+        weight = 0.5 - (t - 0.5).abs()
+
+        out[..., offset : offset + frame_length] += weight * frame
+        sum_weight[offset : offset + frame_length] += weight
+        offset += stride
+    assert sum_weight.min() > 0
+    return out / sum_weight
 
 
 class NeuTTSAir:
@@ -22,9 +52,14 @@ class NeuTTSAir:
         # Consts
         self.sample_rate = 24_000
         self.max_context = 2048
+        self.hop_length = 480
+        self.streaming_overlap_frames = 1
+        self.streaming_frames_per_chunk = 15
+        self.streaming_lookforward = 50
+        self.streaming_lookback = 150
+        self.streaming_stride_samples = self.streaming_frames_per_chunk * self.hop_length
 
         # ggml & onnx flags
-        self._grammar = None  # set with a ggml model
         self._is_quantized_model = False
         self._is_onnx_codec = False
 
@@ -133,6 +168,24 @@ class NeuTTSAir:
         watermarked_wav = self.watermarker.apply_watermark(wav, sample_rate=24_000)
 
         return watermarked_wav
+    
+    def infer_stream(self, text: str, ref_codes: np.ndarray | torch.Tensor, ref_text: str) -> Generator[np.ndarray]:
+        """
+        Perform streaming inference to generate speech from text using the TTS model and reference audio.
+
+        Args:
+            text (str): Input text to be converted to speech.
+            ref_codes (np.ndarray | torch.tensor): Encoded reference.
+            ref_text (str): Reference text for reference audio. Defaults to None.
+        Yields:
+            np.ndarray: Generated speech waveform.
+        """ 
+
+        if self._is_quantized_model:
+            yield self._infer_stream_ggml(ref_codes, ref_text, text)
+
+        else:
+            raise NotImplementedError("Streaming is not implemented for the torch backend!")
 
     def encode_reference(self, ref_audio_path: str | Path):
         wav, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
@@ -221,7 +274,7 @@ class NeuTTSAir:
             output_tokens[0, input_length:].cpu().numpy().tolist(), add_special_tokens=False
         )
         return output_str
-
+    
     def _infer_ggml(self, ref_codes: list[int], ref_text: str, input_text: str) -> str:
         ref_text = self._to_phones(ref_text)
         input_text = self._to_phones(input_text)
@@ -240,3 +293,93 @@ class NeuTTSAir:
         )
         output_str = output["choices"][0]["text"]
         return output_str
+
+    def _infer_stream_ggml(self, ref_codes: list[int], ref_text: str, input_text: str) -> Generator[np.ndarray]:
+        ref_text = self._to_phones(ref_text)
+        input_text = self._to_phones(input_text)
+
+        codes_str = "".join([f"<|speech_{idx}|>" for idx in ref_codes])
+        prompt = (
+            f"user: Convert the text to speech:<|TEXT_PROMPT_START|>{ref_text} {input_text}"
+            f"<|TEXT_PROMPT_END|>\nassistant:<|SPEECH_GENERATION_START|>{codes_str}"
+        )
+
+        audio_cache = []
+        token_cache = ref_codes
+        n_decoded_samples = 0
+        n_decoded_tokens = len(ref_codes)
+
+        for item in self.backbone(
+            prompt,
+            max_tokens=self.max_context,
+            temperature=1.0,
+            top_k=50,
+            stop=["<|SPEECH_GENERATION_END|>"],
+            stream=True
+        ):
+            output_str = item["choices"][0]["text"]
+            token_cache.append(output_str)
+
+            if len(token_cache[n_decoded_tokens:]) >= self.streaming_frames_per_chunk + self.streaming_lookforward:
+
+                # decode chunk
+                tokens_start = max(
+                    n_decoded_tokens
+                    - self.streaming_lookback
+                    - self.streaming_overlap_frames,
+                    0
+                )
+                tokens_end = (
+                    n_decoded_tokens
+                    + self.streaming_frames_per_chunk
+                    + self.streaming_lookforward
+                    + self.streaming_overlap_frames
+                )
+                sample_start = (
+                    n_decoded_tokens - tokens_start
+                ) * self.hop_length
+                sample_end = (
+                    sample_start
+                    + (self.streaming_frames_per_chunk + 2 * self.streaming_overlap_frames) * self.hop_length
+                )
+                curr_codes = token_cache[tokens_start:tokens_end]
+                recon = self._decode(curr_codes)
+                recon = self.watermarker.apply_watermark(recon, sample_rate=24_000)
+                recon = recon[sample_start:sample_end]
+                audio_cache.append(recon)
+
+                # postprocess
+                processed_recon = _linear_overlap_add(
+                    audio_cache, stride=self.streaming_stride_samples
+                )
+                new_samples_end = len(audio_cache) * self.streaming_stride_samples
+                processed_recon = processed_recon[
+                    n_decoded_samples:new_samples_end
+                ]
+                n_decoded_samples = new_samples_end
+                n_decoded_tokens += self.streaming_frames_per_chunk
+                yield processed_recon
+
+        # final decoding handled seperately as non-constant chunk size
+        remaining_tokens = len(token_cache) - n_decoded_tokens
+        if len(token_cache) > n_decoded_tokens:
+            tokens_start = max(
+                len(token_cache)
+                - (self.streaming_lookback + self.streaming_overlap_frames + remaining_tokens), 
+                0
+            )
+            sample_start = (
+                len(token_cache) 
+                - tokens_start 
+                - remaining_tokens 
+                - self.streaming_overlap_frames
+            ) * self.hop_length
+            curr_codes = token_cache[tokens_start:]
+            recon = self._decode(curr_codes)
+            recon = self.watermarker.apply_watermark(recon, sample_rate=24_000)
+            recon = recon[sample_start:]
+            audio_cache.append(recon)
+
+            processed_recon = _linear_overlap_add(audio_cache, stride=self.streaming_stride_samples)
+            processed_recon = processed_recon[n_decoded_samples:]
+            yield processed_recon
\ No newline at end of file

From 7c97519a54f9881601499f4637cd24750ffce7d2 Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 11:44:03 +0100
Subject: [PATCH 2/6] working streaming example

---
 examples/onnx_example_streaming.py |  2 +-
 neuttsair/neutts.py                | 29 ++++++++++++++---------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/examples/onnx_example_streaming.py b/examples/onnx_example_streaming.py
index a7fd6c4..b56830f 100644
--- a/examples/onnx_example_streaming.py
+++ b/examples/onnx_example_streaming.py
@@ -36,6 +36,7 @@ def main(input_text, ref_codes_path, ref_text, backbone):
     print("Streaming...")
     for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
         audio = (chunk * 32767).astype(np.int16)
+        print(audio)
         stream.write(audio.tobytes())
     
     stream.stop_stream()
@@ -84,5 +85,4 @@ if __name__ == "__main__":
         ref_codes_path=args.ref_codes,
         ref_text=args.ref_text,
         backbone=args.backbone,
-        output_path=args.output_path,
     )
diff --git a/neuttsair/neutts.py b/neuttsair/neutts.py
index 1dfcf73..55d6bd4 100644
--- a/neuttsair/neutts.py
+++ b/neuttsair/neutts.py
@@ -11,10 +11,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
 from threading import Thread
 
 
-def _linear_overlap_add(frames: list[torch.Tensor], stride: int):
+def _linear_overlap_add(frames: list[np.ndarray], stride: int) -> np.ndarray:
     # original impl --> https://github.com/facebookresearch/encodec/blob/main/encodec/utils.py
     assert len(frames)
-    device = frames[0].device
     dtype = frames[0].dtype
     shape = frames[0].shape[:-1]
 
@@ -23,14 +22,14 @@ def _linear_overlap_add(frames: list[torch.Tensor], stride: int):
         frame_end = stride * i + frame.shape[-1]
         total_size = max(total_size, frame_end)
 
-    sum_weight = torch.zeros(total_size, device=device, dtype=dtype)
-    out = torch.zeros(*shape, total_size, device=device, dtype=dtype)
+    sum_weight = np.zeros(total_size, dtype=dtype)
+    out = np.zeros(*shape, total_size, dtype=dtype)
 
     offset: int = 0
     for frame in frames:
         frame_length = frame.shape[-1]
-        t = torch.linspace(0, 1, frame_length + 2, device=device, dtype=dtype)[1:-1]
-        weight = 0.5 - (t - 0.5).abs()
+        t = np.linspace(0, 1, frame_length + 2, dtype=dtype)[1:-1]
+        weight = np.abs(0.5 - (t - 0.5))
 
         out[..., offset : offset + frame_length] += weight * frame
         sum_weight[offset : offset + frame_length] += weight
@@ -169,7 +168,7 @@ class NeuTTSAir:
 
         return watermarked_wav
     
-    def infer_stream(self, text: str, ref_codes: np.ndarray | torch.Tensor, ref_text: str) -> Generator[np.ndarray]:
+    def infer_stream(self, text: str, ref_codes: np.ndarray | torch.Tensor, ref_text: str) -> Generator[np.ndarray, None, None]:
         """
         Perform streaming inference to generate speech from text using the TTS model and reference audio.
 
@@ -182,7 +181,7 @@ class NeuTTSAir:
         """ 
 
         if self._is_quantized_model:
-            yield self._infer_stream_ggml(ref_codes, ref_text, text)
+            return self._infer_stream_ggml(ref_codes, ref_text, text)
 
         else:
             raise NotImplementedError("Streaming is not implemented for the torch backend!")
@@ -294,7 +293,7 @@ class NeuTTSAir:
         output_str = output["choices"][0]["text"]
         return output_str
 
-    def _infer_stream_ggml(self, ref_codes: list[int], ref_text: str, input_text: str) -> Generator[np.ndarray]:
+    def _infer_stream_ggml(self, ref_codes: torch.Tensor, ref_text: str, input_text: str) -> Generator[np.ndarray, None, None]:
         ref_text = self._to_phones(ref_text)
         input_text = self._to_phones(input_text)
 
@@ -304,10 +303,10 @@ class NeuTTSAir:
             f"<|TEXT_PROMPT_END|>\nassistant:<|SPEECH_GENERATION_START|>{codes_str}"
         )
 
-        audio_cache = []
-        token_cache = ref_codes
-        n_decoded_samples = 0
-        n_decoded_tokens = len(ref_codes)
+        audio_cache: list[np.ndarray] = []
+        token_cache: list[str] = [f"<|speech_{idx}|>" for idx in ref_codes]
+        n_decoded_samples: int = 0
+        n_decoded_tokens: int = len(ref_codes)
 
         for item in self.backbone(
             prompt,
@@ -343,7 +342,7 @@ class NeuTTSAir:
                     + (self.streaming_frames_per_chunk + 2 * self.streaming_overlap_frames) * self.hop_length
                 )
                 curr_codes = token_cache[tokens_start:tokens_end]
-                recon = self._decode(curr_codes)
+                recon = self._decode("".join(curr_codes))
                 recon = self.watermarker.apply_watermark(recon, sample_rate=24_000)
                 recon = recon[sample_start:sample_end]
                 audio_cache.append(recon)
@@ -375,7 +374,7 @@ class NeuTTSAir:
                 - self.streaming_overlap_frames
             ) * self.hop_length
             curr_codes = token_cache[tokens_start:]
-            recon = self._decode(curr_codes)
+            recon = self._decode("".join(curr_codes))
             recon = self.watermarker.apply_watermark(recon, sample_rate=24_000)
             recon = recon[sample_start:]
             audio_cache.append(recon)

From 302f3e62285e0d544beba2fb9324a8bf615c9a13 Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 14:22:26 +0100
Subject: [PATCH 3/6] working streaming example

---
 README.md                          |  2 +-
 examples/README.md                 | 14 +++++++++++++-
 examples/onnx_example_streaming.py |  5 ++++-
 neuttsair/neutts.py                |  6 +++---
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 03a7160..b42360f 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ from neuttsair.neutts import NeuTTSAir
 import soundfile as sf
 
 tts = NeuTTSAir(
-   backbone_repo="neuphonic/neutts-air", # or 'neutts-air-q4-gguf' wit llama-cpp-python installed
+   backbone_repo="neuphonic/neutts-air", # or 'neutts-air-q4-gguf' with llama-cpp-python installed
    backbone_device="cpu",
    codec_repo="neuphonic/neucodec",
    codec_device="cpu"
diff --git a/examples/README.md b/examples/README.md
index 231e3a0..af192b4 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -35,4 +35,16 @@ python -m examples.onnx_example \
   --ref_codes samples/dave.pt \
   --ref_text samples/dave.txt \
   --backbone neuphonic/neutts-air-q4-gguf
-```
\ No newline at end of file
+```
+
+### Streaming Support 
+
+To stream the model output in chunks, try out the `onnx_streaming.py` example. For streaming, only the GGUF backends are currently supported. Ensure you have `llama-cpp-pyhon`, `onnxruntime` and `pyaudio` installed to run this example.
+
+```bash
+python -m examples.onnx_example_streaming \
+  --input_text "My name is Dave, and um, I'm from London" \
+  --ref_codes samples/dave.pt \
+  --ref_text samples/dave.txt \
+  --backbone neuphonic/neutts-air-q4-gguf
+```
diff --git a/examples/onnx_example_streaming.py b/examples/onnx_example_streaming.py
index b56830f..c73e621 100644
--- a/examples/onnx_example_streaming.py
+++ b/examples/onnx_example_streaming.py
@@ -5,6 +5,9 @@ import numpy as np
 from neuttsair.neutts import NeuTTSAir
 import pyaudio
 
+from phonemizer.backend.espeak.wrapper import EspeakWrapper
+_ESPEAK_LIBRARY = '/opt/homebrew/Cellar/espeak/1.48.04_1/lib/libespeak.1.1.48.dylib'  #use the Path to the library.
+EspeakWrapper.set_library(_ESPEAK_LIBRARY)
 
 def main(input_text, ref_codes_path, ref_text, backbone):
     assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming only supported by llama-cpp."
@@ -36,7 +39,7 @@ def main(input_text, ref_codes_path, ref_text, backbone):
     print("Streaming...")
     for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
         audio = (chunk * 32767).astype(np.int16)
-        print(audio)
+        print(audio.shape)
         stream.write(audio.tobytes())
     
     stream.stop_stream()
diff --git a/neuttsair/neutts.py b/neuttsair/neutts.py
index 55d6bd4..3bbc1ba 100644
--- a/neuttsair/neutts.py
+++ b/neuttsair/neutts.py
@@ -53,9 +53,9 @@ class NeuTTSAir:
         self.max_context = 2048
         self.hop_length = 480
         self.streaming_overlap_frames = 1
-        self.streaming_frames_per_chunk = 15
-        self.streaming_lookforward = 50
-        self.streaming_lookback = 150
+        self.streaming_frames_per_chunk = 25
+        self.streaming_lookforward = 5
+        self.streaming_lookback = 50
         self.streaming_stride_samples = self.streaming_frames_per_chunk * self.hop_length
 
         # ggml & onnx flags

From 9d219b6a153104a1291d1cd9c33a8c888fbfe9bd Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 14:42:10 +0100
Subject: [PATCH 4/6] cleanup

---
 examples/onnx_example_streaming.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/onnx_example_streaming.py b/examples/onnx_example_streaming.py
index c73e621..f9a4922 100644
--- a/examples/onnx_example_streaming.py
+++ b/examples/onnx_example_streaming.py
@@ -5,9 +5,6 @@ import numpy as np
 from neuttsair.neutts import NeuTTSAir
 import pyaudio
 
-from phonemizer.backend.espeak.wrapper import EspeakWrapper
-_ESPEAK_LIBRARY = '/opt/homebrew/Cellar/espeak/1.48.04_1/lib/libespeak.1.1.48.dylib'  #use the Path to the library.
-EspeakWrapper.set_library(_ESPEAK_LIBRARY)
 
 def main(input_text, ref_codes_path, ref_text, backbone):
     assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming only supported by llama-cpp."

From 68b68060f4226cdb038a95d4696fcc8a5bff54a7 Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 14:53:42 +0100
Subject: [PATCH 5/6] cleanup

---
 examples/README.md                                           | 2 +-
 ...{onnx_example_streaming.py => basic_streaming_example.py} | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)
 rename examples/{onnx_example_streaming.py => basic_streaming_example.py} (95%)

diff --git a/examples/README.md b/examples/README.md
index af192b4..facdab9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -42,7 +42,7 @@ python -m examples.onnx_example \
 To stream the model output in chunks, try out the `onnx_streaming.py` example. For streaming, only the GGUF backends are currently supported. Ensure you have `llama-cpp-pyhon`, `onnxruntime` and `pyaudio` installed to run this example.
 
 ```bash
-python -m examples.onnx_example_streaming \
+python -m examples.basic_streaming_example \
   --input_text "My name is Dave, and um, I'm from London" \
   --ref_codes samples/dave.pt \
   --ref_text samples/dave.txt \
diff --git a/examples/onnx_example_streaming.py b/examples/basic_streaming_example.py
similarity index 95%
rename from examples/onnx_example_streaming.py
rename to examples/basic_streaming_example.py
index f9a4922..f1fd5db 100644
--- a/examples/onnx_example_streaming.py
+++ b/examples/basic_streaming_example.py
@@ -7,7 +7,7 @@ import pyaudio
 
 
 def main(input_text, ref_codes_path, ref_text, backbone):
-    assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming only supported by llama-cpp."
+    assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming is only currently supported by llama-cpp."
     
     # Initialize NeuTTSAir with the desired model and codec
     tts = NeuTTSAir(
@@ -32,8 +32,8 @@ def main(input_text, ref_codes_path, ref_text, backbone):
         channels=1,
         rate=24_000,
         output=True
-    )
     print("Streaming...")
+    )
     for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
         audio = (chunk * 32767).astype(np.int16)
         print(audio.shape)
@@ -45,7 +45,6 @@ def main(input_text, ref_codes_path, ref_text, backbone):
 
 
 if __name__ == "__main__":
-    # get arguments from command line
     import argparse
 
     parser = argparse.ArgumentParser(description="NeuTTSAir Example")

From 2f7817280ace18fe46c25ad9b93a0a8eeeac8152 Mon Sep 17 00:00:00 2001
From: harryjulian <harryjulian16@gmail.com>
Date: Wed, 8 Oct 2025 14:56:45 +0100
Subject: [PATCH 6/6] typo

---
 examples/basic_streaming_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/basic_streaming_example.py b/examples/basic_streaming_example.py
index f1fd5db..dd9e640 100644
--- a/examples/basic_streaming_example.py
+++ b/examples/basic_streaming_example.py
@@ -32,8 +32,8 @@ def main(input_text, ref_codes_path, ref_text, backbone):
         channels=1,
         rate=24_000,
         output=True
-    print("Streaming...")
     )
+    print("Streaming...")
     for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
         audio = (chunk * 32767).astype(np.int16)
         print(audio.shape)