Encode MediaRecorder stream in PCM format for AWS transcribe

265 Views Asked by At

So I'm trying to use AWS transcribe medical but unable to achieve as AWS transcribe medical require "pcm" encoded audio data. I tried multiple custom function but no luck. I want to use something like MediaRecorder start method because i want to send data in chunks in real time.

import React, { useState, useRef } from "react";
import {
  TranscribeStreamingClient,
  StartMedicalStreamTranscriptionCommand,
} from "@aws-sdk/client-transcribe-streaming";
import logo from "./logo.svg";
import "./App.css";

function App() {
  const [isRecording, setIsRecording] = useState(false);
  const mediaRecorder = useRef(null);
  const audioChunks = useRef([]);
  const transcribeClient = useRef(null);
  const client = new TranscribeStreamingClient({
    region: "region",
    credentials: {
      accessKeyId: "accessKeyId",
      secretAccessKey: "secretAccessKey",
    },
  });

  const handleMicClick = () => {
    if (!isRecording) {
      startRecording();
    } else {
      stopRecording();
    }
  };

  const startRecording = async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      mediaRecorder.current = new MediaRecorder(stream);
      mediaRecorder.current.addEventListener(
        "dataavailable",
        handleDataAvailable
      );
      mediaRecorder.current.start(1000);
      setIsRecording(true);
    } catch (error) {
      console.error("Error starting recording: ", error);
    }
  };

  const stopRecording = () => {
    mediaRecorder.current.stop();
    setIsRecording(false);
    transcribeClient.current.destroy();
    transcribeClient.current = null;
  };

  const handleDataAvailable = (event) => {
    audioChunks.current.push(event.data);
    if (transcribeClient.current === null) {
      transcribeClient.current = client;
    }
    sendAudio(event.data);
  };

  const sendAudio = async (audioData) => {
    const input = {
      LanguageCode: "en-US",
      MediaSampleRateHertz: 44100,
      MediaEncoding: "pcm", // required
      Specialty: "PRIMARYCARE", // required
      Type: "CONVERSATION", // required
      AudioStream: audioData, // -> This needs to be in pcm format
    };

    const command = new StartMedicalStreamTranscriptionCommand(input);
    const response = await client.send(command);
    console.log("Response: ", response.TranscriptResultStream);

    for await (const event of response.TranscriptResultStream) {
      if (event.TranscriptEvent) {
        const message = event.TranscriptEvent;
        // Get multiple possible results
        const results = event.TranscriptEvent.Transcript.Results;
        // Print all the possible transcripts
        results.map((result) => {
          (result.Alternatives || []).map((alternative) => {
            const transcript = alternative.Items.map(
              (item) => item.Content
            ).join(" ");
            console.log("something is happening", transcript);
          });
        });
      }
    }
  };

  return (
    <div className="App">
      <header className="App-header">
        <img src={logo} className="App-logo" alt="logo" />
        <p>
          Edit <code>src/App.js</code> and save to reload.
        </p>
        <a
          className="App-link"
          href="https://reactjs.org"
          target="_blank"
          rel="noopener noreferrer"
        >
          Learn React
        </a>
        <button onClick={handleMicClick}>
          {isRecording ? "Stop" : "Record"}
        </button>
      </header>
    </div>
  );
}

export default App;

If there is any package which will do the job then please mention.

0

There are 0 best solutions below