diff --git a/README.md b/README.md
index c67a7a0..9b124f5 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ pip install "git+https://github.com/facebookresearch/pytorch3d.git"
 pip install tensorflow-gpu==2.8.0
 pip install --upgrade "protobuf<=3.20.1"
 ```
-如果用pytorch2.1，torchvision用0.16（可以去torchvision官网根据pytorch版本找匹配的）,cudatoolkit可以不用装  
+如果用pytorch2.1，torchvision用0.16（可以去[torchvision官网](https://github.com/pytorch/vision)根据pytorch版本找匹配的）,cudatoolkit可以不用装  
 安装常见问题[FAQ](/assets/faq.md)  
 linux cuda环境搭建可以参考这篇文章 https://zhuanlan.zhihu.com/p/674972886
 
diff --git a/ttsreal.py b/ttsreal.py
index 665ff46..b4da862 100644
--- a/ttsreal.py
+++ b/ttsreal.py
@@ -138,7 +138,7 @@ class VoitsTTS(BaseTTS):
             'ref_audio_path':reffile,
             'prompt_text':reftext,
             'prompt_lang':language,
-            'media_type':'raw',
+            'media_type':'ogg',
             'streaming_mode':True
         }
         # req["text"] = text
@@ -162,7 +162,8 @@ class VoitsTTS(BaseTTS):
                 
             first = True
         
-            for chunk in res.iter_content(chunk_size=12800): # 1280 32K*20ms*2
+            for chunk in res.iter_content(chunk_size=None): #12800 1280 32K*20ms*2
+                print('chunk len:',len(chunk))
                 if first:
                     end = time.perf_counter()
                     print(f"gpt_sovits Time to first chunk: {end-start}s")
@@ -173,13 +174,29 @@ class VoitsTTS(BaseTTS):
         except Exception as e:
             print(e)
 
+    def __create_bytes_stream(self,byte_stream):
+        #byte_stream=BytesIO(buffer)
+        stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64
+        print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}')
+        stream = stream.astype(np.float32)
+
+        if stream.ndim > 1:
+            print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.')
+            stream = stream[:, 0]
+    
+        if sample_rate != self.sample_rate and stream.shape[0]>0:
+            print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self.sample_rate}.')
+            stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self.sample_rate)
+
+        return stream
+
     def stream_tts(self,audio_stream):
         for chunk in audio_stream:
             if chunk is not None and len(chunk)>0:          
-                stream = np.frombuffer(chunk, dtype=np.int16).astype(np.float32) / 32767
-                stream = resampy.resample(x=stream, sr_orig=32000, sr_new=self.sample_rate)
-                #byte_stream=BytesIO(buffer)
-                #stream = self.__create_bytes_stream(byte_stream)
+                #stream = np.frombuffer(chunk, dtype=np.int16).astype(np.float32) / 32767
+                #stream = resampy.resample(x=stream, sr_orig=32000, sr_new=self.sample_rate)
+                byte_stream=BytesIO(chunk)
+                stream = self.__create_bytes_stream(byte_stream)
                 streamlen = stream.shape[0]
                 idx=0
                 while streamlen >= self.chunk: