I have developed an Flask application, where I will record user speech using javascript and want to process the audio using Flask backend. I am using whisper openai to transcribe the recorded audio. I am able to achieve that but the problem is I have to save the recorded audio data beforehand than have to process it. I need a different approach where I can directly transcribe the audio speech without saving it locally. Hare's the code
function startRecording() {
if (!isRecording) {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then(function (stream) {
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = function (event) {
audioChunks.push(event.data);
};
mediaRecorder.onstop = function () {
const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
// const audioUrl = URL.createObjectURL(audioBlob);
audioChunks = [];
// Play the recorded audio
// const audioElement = document.getElementById("audio");
// audioElement.src = audioUrl;
// audioElement.play();
sendAudioData(audioBlob, "translation1", "translation2");
isRecording = false;
};
mediaRecorder.start();
isRecording = true;
})
.catch(function (error) {
console.error("Error accessing microphone:", error);
});
} else {
mediaRecorder.stop();
}
}
function sendAudioData(
audioBlob,
translationTextareaId,
transcription2TextareaId
) {
showLoadingAnimation();
const formData = new FormData();
formData.append("audio", audioBlob);
fetch("/translate", {
method: "POST",
body: formData,
}).then((response) => {
// Handle the response from the Flask route
response.json().then((data) => {
const translation_in_english = data.translation1;
const translation_in_assamese = data.translation2;
// document.getElementById(translationTextareaId).textContent =
// translation_in_english;
const translationTextarea = document.getElementById(
translationTextareaId
);
hideLoadingAnimation();
animateText(translationTextarea, translation_in_english);
document.getElementById(transcription2TextareaId).textContent =
translation_in_assamese;
});
});
}
here is the route
from flask import render_template, request, jsonify,Blueprint
from app import app
import uuid
# _download(_MODELS["large-v2"], "/mnt/d/whisper-backend/models", False)
UPLOADED_FOLDER = '/mnt/d/whisper-backend/recordings'
MODEL_PATH_LARGE = '/mnt/d/whisper-backend/models/large-v2.pt'
MODEL_PATH_TINY = '/mnt/d/whisper-backend/models/tiny.pt'
# Determine the directory of your Flask app script
current_directory = os.path.dirname(__file__)
model = whisper.load_model('./models/tiny.pt')
translate_route_blueprint = Blueprint("translate", __name__)
@translate_route_blueprint.route('/translate', methods=['POST'])
def translate():
audio_file = request.files['audio']
# Generate a unique filename using a UUID
unique_filename = str(uuid.uuid4()) + '.wav'
filepath = os.path.join(UPLOADED_FOLDER, unique_filename)
audio_file.save(filepath)
transcription = ""
translation_1 = ""
result = model.transcribe(filepath)
transcription = result["text"]
return jsonify({'translation1': transcription, 'translation2': translation_1})
I had to save the recorded file first using uinque name unique_filename = str(uuid.uuid4()) + '.wav' and then procedding with the transcribe. please suggest a different method to do that without saving it.