Problem
I am trying to make an application where when I press the start button from the browser then, my media recorder will start recording and send chunks to my server and then server after doing some processing on that chunks, sends the audio chunks in bytes format to client.
This application is running smoothly in my localhost butt when I am trying to deploy the same application to AWS ec2 instance using Nginx, I am getting error like when I open my browser and type the server URL, the audio is received by server and server after processing, when send the audio back to client in chunks, the first chunk is not picked by client, saying websocket connection failed and the other chunks picked by server.
I attached the screenshot of issue as well. if anyone know whats the issue it will be great assistance for me thanks.
Server side code
import asyncioimport socketiofrom aiohttp import webfrom deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptionsimport tracebackimport timeimport llm_responseimport ttsfrom pydub import AudioSegmentimport reimport iois_finals = []user_message = []silence_threshold = 2000 # 2 seconds in millisecondsdict_connection = {}API_KEY = "API KEY HERE"config = DeepgramClientOptions( options={"keepalive": "true"})deepgram = DeepgramClient(API_KEY, config)dg_connection = deepgram.listen.live.v("1")async def handle_root(request): return web.FileResponse('./index.html')def remove_special_characters(input_str): cleaned_str = re.sub(r'[^A-Za-z0-9\s]', '', input_str) return cleaned_strdef get_response(query): response_generator = llm_response.response(query) collected_str = "" audio_data = io.BytesIO() for value in response_generator: if value: collected_str += str(value) if (value in [",", "."] or value is None) and collected_str: print("Response: ", collected_str) audio = tts.tts_main(collected_str) audio.export(audio_data, format="wav") yield audio_data.getvalue() collected_str = ""# def on_close(self, close, **kwargs):# print(f"Connection Closed from deepgram connection!")last_audio_timestamp = Nonedef on_message(self, result, **kwargs): sentence = result.channel.alternatives[0].transcript if sentence: print(f"Transcription: {sentence}") global user_message, last_audio_timestamp # print("User Message list: ", user_message) if sentence: if len(user_message) > 0: if user_message[-1] != sentence: # print(f"Transcription: {sentence}") user_message.append(sentence) else: # print(f"Transcription: {sentence}") user_message.append(sentence) last_audio_timestamp = time.time()async def send_large_payload_to_client(payload, sid): chunk_size = 1024 # Adjust chunk size as needed total_chunks = len(payload) // chunk_size + 1 for i in range(total_chunks): start = i * chunk_size end = min((i + 1) * chunk_size, len(payload)) chunk = payload[start:end] await sio.emit('audio_chunk', chunk, room=sid)def on_error(self, error, **kwargs): print(f"Error: {error}")async def socketio_handler(sio, sid, data): global last_audio_timestamp global dict_connection dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Error, on_error) # dg_connection.on(LiveTranscriptionEvents.Close, on_close) if dict_connection[sid]==False: options = LiveOptions( punctuate=True, interim_results=False, language='en-GB' ) dg_connection.start(options) dict_connection[sid]=True print('connection status: ', dict_connection[sid]) # file = open("audio.wav", "wb") try: if data['type'] == 'binary': # file.write(data['data']) dg_connection.send(data['data']) # print("data is also sending to deepgram api .....") # last_audio_timestamp = time.time() # global user_message # print("last audio timestamp value: ", last_audio_timestamp) # print("User message: ", user_message) if last_audio_timestamp is not None: if last_audio_timestamp is not None and time.time() - last_audio_timestamp > 2: global user_message print("Final Message by user: ", ''.join(user_message)) temp_message = ''.join(user_message) user_message = [] last_audio_timestamp = None print("Finished") for audio_chunk in get_response(temp_message): # print("Audio chunk is: ", audio_chunk) print("SID is: ", sid) # await sio.emit('audio_chunk', audio_chunk, room=sid) # Send the large payload to the client in chunks # await send_large_payload_to_client(audio_chunk, sid) await sio.emit('audio_chunk', audio_chunk, room=sid) audio_segment = AudioSegment.from_file(io.BytesIO(audio_chunk)) await asyncio.sleep((len(audio_segment) - 2500) / 1000) else: # pass print("connection stoppped!") dict_connection[sid]=False dg_connection.finish() except Exception as e: print(f"Exception: {str(e)}") print(traceback.print_exc())# sio = socketio.AsyncServer(async_mode='aiohttp',cors_allowed_origins=["http://142.93.211.74",'http://localhost:5173','http://127.0.0.1:8000'])sio = socketio.AsyncServer(async_mode='aiohttp',cors_allowed_origins=['http://127.0.0.1:8000', 'http://142.93.211.74'])app = web.Application()sio.attach(app)@sio.eventasync def connect(sid, environ): print('connect ', sid) global dict_connection dict_connection[sid] = False # await sio.emit('text_chunk', "Hi! How are you ?", room=sid)@sio.eventasync def disconnect(sid): print('disconnect ', sid) dg_connection.finish() @sio.on('audio_chunk')async def handle_audio_chunk(sid, data): await socketio_handler(sio, sid, data)app.router.add_get('/', handle_root)app.router.add_static('/', path='./', name='static')if __name__ == '__main__': web.run_app(app, port=8000)
Client Side code
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Microphone Stream</title><script src="https://cdn.socket.io/4.0.0/socket.io.min.js"></script></head><body><h1>Microphone Stream</h1><button id="toggleButton">Start</button><pre id="transcript"></pre><script> let mediaRecorder; let socket; const transcriptElement = document.getElementById('transcript'); const toggleButton = document.getElementById('toggleButton'); const audioQueue = []; let isPlaying = false; let stream; let isConnected = false; toggleButton.onclick = async () => { if (isConnected) { stopStreaming(); } else { await startStreaming(); } }; async function startStreaming() { try { // Get audio stream from the microphone stream = await navigator.mediaDevices.getUserMedia({ audio: true }); mediaRecorder = new MediaRecorder(stream); socket = io('http://127.0.0.1:8000'); socket.on('connect', () => { console.log('Socket connected'); isConnected = true; toggleButton.textContent = 'Stop'; }); socket.on('connect_error', (error) => { console.error('Socket connection error:', error); }); socket.on('disconnect', () => { console.log('Socket disconnected'); isConnected = false; toggleButton.textContent = 'Start'; }); socket.on('transcript', (transcript) => { transcriptElement.textContent += transcript +'\n'; }); // Handle receiving audio chunks from the server socket.on('audio_chunk', (audioData) => { audioQueue.push(audioData); if (!isPlaying) { playNextAudio(); } }); // Send audio data to the server when available mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0 && socket.connected) { socket.emit('audio_chunk', { type: 'binary', data: event.data }); } }; initRecording(); } catch (err) { console.error('Error accessing media devices:', err); } } function stopStreaming() { if (mediaRecorder && mediaRecorder.state === 'recording') { mediaRecorder.stop(); console.log('MediaRecorder stopped'); } if (socket && socket.connected) { socket.disconnect(); mediaRecorder.stop(); console.log('Socket disconnected'); } if (stream) { stream.getTracks().forEach(track => track.stop()); console.log('Stream stopped'); } isConnected = false; toggleButton.textContent = 'Start'; } // Start recording audio function initRecording() { if (mediaRecorder && mediaRecorder.state === 'inactive') { mediaRecorder.start(500); // Record in chunks of 250ms console.log('MediaRecorder started'); } } // Stop recording audio function stopRecording() { if (mediaRecorder && mediaRecorder.state === 'recording') { mediaRecorder.stop(); console.log('MediaRecorder stopped'); } } // Play the next audio chunk from the queue function playNextAudio() { if (audioQueue.length > 0) { stopRecording(); // Stop recording before playing audio console.log("Audio recording stopped!"); isPlaying = true; const audioBlob = new Blob([audioQueue.shift()], { type: 'audio/wav' }); const audioUrl = URL.createObjectURL(audioBlob); const audio = new Audio(audioUrl); audio.addEventListener('ended', () => { isPlaying = false; console.log("Audio recording resumed!"); initRecording(); // Resume recording after playing audio playNextAudio(); // Play the next audio chunk in the queue }); audio.play(); } else { initRecording(); // Ensure recording is resumed if queue is empty } }</script></body></html>