diff --git a/app.py b/app.py
index 65ddb8a..5dc4665 100644
--- a/app.py
+++ b/app.py
@@ -157,6 +157,29 @@ async def human(request):
         ),
     )
 
+async def humanaudio(request):
+    try:
+        form= await request.post()
+        sessionid = int(form.get('sessionid',0))
+        fileobj = form["file"]
+        filename=fileobj.filename
+        filebytes=fileobj.file.read()
+        nerfreals[sessionid].put_audio_file(filebytes)
+
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": 0, "msg":"ok"}
+            ),
+        )
+    except Exception as e:
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg":"err","data": ""+e.args[0]+""}
+            ),
+        )
+
 async def set_audiotype(request):
     params = await request.json()
 
@@ -455,6 +478,7 @@ if __name__ == '__main__':
     appasync.on_shutdown.append(on_shutdown)
     appasync.router.add_post("/offer", offer)
     appasync.router.add_post("/human", human)
+    appasync.router.add_post("/humanaudio", humanaudio)
     appasync.router.add_post("/set_audiotype", set_audiotype)
     appasync.router.add_post("/record", record)
     appasync.router.add_post("/is_speaking", is_speaking)
diff --git a/basereal.py b/basereal.py
index e21e3a7..8836531 100644
--- a/basereal.py
+++ b/basereal.py
@@ -8,6 +8,7 @@ import cv2
 import glob
 import pickle
 import copy
+import resampy
 
 import queue
 from queue import Queue
@@ -64,6 +65,32 @@ class BaseReal:
     def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
         self.asr.put_audio_frame(audio_chunk)
 
+    def put_audio_file(self,filebyte): 
+        input_stream = BytesIO(filebyte)
+        stream = self.__create_bytes_stream(input_stream)
+        streamlen = stream.shape[0]
+        idx=0
+        while streamlen >= self.chunk:  #and self.state==State.RUNNING
+            self.put_audio_frame(stream[idx:idx+self.chunk])
+            streamlen -= self.chunk
+            idx += self.chunk
+    
+    def __create_bytes_stream(self,byte_stream):
+        #byte_stream=BytesIO(buffer)
+        stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64
+        print(f'[INFO]put audio stream {sample_rate}: {stream.shape}')
+        stream = stream.astype(np.float32)
+
+        if stream.ndim > 1:
+            print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.')
+            stream = stream[:, 0]
+    
+        if sample_rate != self.sample_rate and stream.shape[0]>0:
+            print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self.sample_rate}.')
+            stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self.sample_rate)
+
+        return stream
+
     def pause_talk(self):
         self.tts.pause_talk()
         self.asr.pause_talk()