PJSUA2 integrate caller's audio with OpenAI's API

232 Views Asked by At

I have set so far a softphone using PJSUA2 library in python, so far I managed to setup the transmittions between my local machine and the caller, for testing purposes I did this to heard and speak on both ends. I also setup a recorded for the call since I thought I could use the wav file to stream the audio to my voice assistant but unfortunately is not possible.

import pjsua2 as pj


class MyCall(pj.Call):
    def __init__(self, account, call_id, ep):
        super().__init__(account, call_id)
        self.account = account  # Store reference to the account
        self.recorder = None
        self.ep = ep
        self.player = None

    def onCallState(self, prm):
        ci = self.getInfo()
        if ci.state == pj.PJSIP_INV_STATE_DISCONNECTED:
            if self.recorder:
                self.recorder = None

                # # Delete the audio file
                # try:
                #     os.remove("my_recording.wav")
                # except OSError:
                #     print("Error while deleting file")

            # Remove this call from the account's list of calls
            self.account.calls.remove(self)

    def onCallMediaState(self, prm):
        print("onCallMediaState")
        ci = self.getInfo()

        for mi in ci.media:
            if mi.type == pj.PJMEDIA_TYPE_AUDIO and mi.status == pj.PJSUA_CALL_MEDIA_ACTIVE:
                audioMedia = self.getAudioMedia(mi.index)

                # audio_filename = play_text("Bienvenido al call center. ¿En qué puedo ayudarte?")
                audio_filename = "greeting.wav"

                # Create a player
                self.player = pj.AudioMediaPlayer()
                # Create a recorder
                self.recorder = pj.AudioMediaRecorder()

                # Capture Device Media is basically the device where the call is being made
                captureDevMed = self.ep.audDevManager().getCaptureDevMedia()  # Get capture device media
                playbackDevMed = self.ep.audDevManager().getPlaybackDevMedia()

                try:
                    # Create Plater with Audio file
                    self.player.createPlayer(file_name=audio_filename, options=1)

                    # Start transmitting the media player to the call's audio media
                    self.player.startTransmit(sink=audioMedia)

                    # Create a recorder and start recording to a file
                    self.recorder.createRecorder(file_name="my_recording.wav", options=1)

                    # Start transmitting the call's audio to the recorder
                    audioMedia.startTransmit(self.recorder)

                    # Transmit the call's audio media to the capture device (for you to hear the caller)
                    audioMedia.startTransmit(sink=playbackDevMed)

                    # Transmit from the local machine's microphone to the call's audio media
                    captureDevMed.startTransmit(sink=audioMedia)

                except Exception as e:
                    print(f"Error occurred: {e}")

I have been reading PJSUA2's docs to see if here is a way to get the audio's call stream so I can set it up with a speech to text from google to set up this call with OpenAI's API.

But no luck so far in finding something usefull.

1

There are 1 best solutions below

0
Will Smith R On

I don't know your specific case but in case your machine lacks sound device you need to set the following in your endpoint:

ep = pj.Endpoint()
ep.libCreate()
ep.libInit(ep_config)   # Whatever ep_config you're using
ep.audDevManager().setNullDev()

In my machine there is no sound device so for a reason I don't know I couldn't record audio and my code is really similar to yours:

  def onCallMediaState(self, prm):
    with open("file_log.txt", "a") as log_file:
      log_file.write("onCallMediaState\n")
    self.recorder = pj.AudioMediaRecorder()
    try:
      self.recorder.createRecorder("record.wav")
    except:
      with open("file_log.txt", "a") as log_file:
        log_file.write("Couldnt record constructor failed\n")
      return

    callInfo = self.getInfo()
    for media_index, media in enumerate(callInfo.media):
      if media.type == pj.PJMEDIA_TYPE_AUDIO and media.status == pj.PJSUA_CALL_MEDIA_ACTIVE:
        audioMedia = self.getAudioMedia(media_index)
        break
        
    if (audioMedia):
      audioMedia.startTransmit(sink=self.recorder)

After setting device to null it recorded fine.