I hope you are all well. I was trying to implement a multimodal sentiment analysis fyp. I was modifying my audio component. I was curious how to save all of a recently recorded real-time audio into a .wav or a .mp3 file?
For example suppose there was a program in which i was performing audio analysis, i dont know how to record all of audio into a .wav or a .mp3 file. Here is my current audio sentiment analysis code:
import pyttsx3
engine = pyttsx3.init()
# Import necessary libraries
import speech_recognition as sr
import pyaudio
import wave
#import sentiment_analysis_library
#import llm_library
#import text_to_speech_library
from transformers import pipeline
from gtts import gTTS
# Initialize recognizer class (for recognizing the speech)
r = sr.Recognizer()
# Function to transcribe audio
def transcribe_audio(audio):
return r.recognize_google(audio)
# Function to analyze waveform
def analyze_waveform(audio):
# Your waveform analysis code here
pass
# Function to analyze sentiment
def analyze_sentiment(text):
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=False)
prediction = classifier(text)
return prediction
# Function to get response from llm
def get_llm_response(text):
return llm_library.get_response(text)
# Function to convert text to speech
def text_to_speech(text):
engine.say(str(text))
engine.runAndWait()
# Main function to handle real-time audio
def handle_real_time_audio():
while True:
with sr.Microphone() as source:
print("Listening...")
audio = r.listen(source)
# Transcribe the audio to text
text = transcribe_audio(audio)
# Analyze the waveform
analyze_waveform(audio)
# Analyze the sentiment
sentiment = analyze_sentiment(text)
print("Current sentiment is: " + str(sentiment))
# Feed the transcribed speech into an llm
llm_response = get_llm_response(text)
# Convert the llm response to audio
text_to_speech(str(llm_response))
# Call the main function
handle_real_time_audio()