How to process an recorded audio file in flask backend?

82 Views Asked by At

I have developed an Flask application, where I will record user speech using javascript and want to process the audio using Flask backend. I am using whisper openai to transcribe the recorded audio. I am able to achieve that but the problem is I have to save the recorded audio data beforehand than have to process it. I need a different approach where I can directly transcribe the audio speech without saving it locally. Hare's the code


    function startRecording() {
      if (!isRecording) {
        navigator.mediaDevices
          .getUserMedia({ audio: true })
          .then(function (stream) {
            mediaRecorder = new MediaRecorder(stream);
            mediaRecorder.ondataavailable = function (event) {
              audioChunks.push(event.data);
            };
    
            mediaRecorder.onstop = function () {
              const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
              // const audioUrl = URL.createObjectURL(audioBlob);
              audioChunks = [];
              // Play the recorded audio
              // const audioElement = document.getElementById("audio");
              // audioElement.src = audioUrl;
              // audioElement.play();
              sendAudioData(audioBlob, "translation1", "translation2");
              isRecording = false;
            };
    
            mediaRecorder.start();
            isRecording = true;
          })
          .catch(function (error) {
            console.error("Error accessing microphone:", error);
          });
      } else {
        mediaRecorder.stop();
      }
    }
    
    function sendAudioData(
      audioBlob,
      translationTextareaId,
      transcription2TextareaId
    ) {
      showLoadingAnimation();
      const formData = new FormData();
      formData.append("audio", audioBlob);
    
      fetch("/translate", {
        method: "POST",
        body: formData,
      }).then((response) => {
        // Handle the response from the Flask route
        response.json().then((data) => {
          const translation_in_english = data.translation1;
          const translation_in_assamese = data.translation2;
          // document.getElementById(translationTextareaId).textContent =
          //   translation_in_english;
          const translationTextarea = document.getElementById(
            translationTextareaId
          );
          hideLoadingAnimation();
          animateText(translationTextarea, translation_in_english);
          document.getElementById(transcription2TextareaId).textContent =
            translation_in_assamese;
        });
      });
    }

here is the route


    from flask import render_template, request, jsonify,Blueprint
    from app import app
    import uuid
    # _download(_MODELS["large-v2"], "/mnt/d/whisper-backend/models", False)
    
    UPLOADED_FOLDER = '/mnt/d/whisper-backend/recordings'
    MODEL_PATH_LARGE = '/mnt/d/whisper-backend/models/large-v2.pt'
    MODEL_PATH_TINY = '/mnt/d/whisper-backend/models/tiny.pt'
    
    # Determine the directory of your Flask app script
    current_directory = os.path.dirname(__file__)
    
    model = whisper.load_model('./models/tiny.pt')
    
    
    translate_route_blueprint = Blueprint("translate", __name__)
    
    @translate_route_blueprint.route('/translate', methods=['POST'])
    def translate():
        audio_file = request.files['audio']
        # Generate a unique filename using a UUID
        unique_filename = str(uuid.uuid4()) + '.wav'
        filepath = os.path.join(UPLOADED_FOLDER, unique_filename)
        audio_file.save(filepath)
        transcription = ""
        translation_1 = ""
        result = model.transcribe(filepath)
        transcription = result["text"]
        return jsonify({'translation1': transcription, 'translation2': translation_1})

I had to save the recorded file first using uinque name unique_filename = str(uuid.uuid4()) + '.wav' and then procedding with the transcribe. please suggest a different method to do that without saving it.

0

There are 0 best solutions below