How to transcribe audio from microphone using google speech API V2 in nodeJs?

89 Views Asked by At

I am trying to transcribe the audio from the microphone using Google speech-to-text API version 2. Everything working great with v1 APIs.

I am creating an audio stream and trying to create chunks, I think there is some issue in this process.

It works when I transcribe the audio from a file using createReadStream, it is not working when I try to do that from microphone audio. and it is giving following error

Request message serialization failure: invalid encoding

async function main() {

  const recognizerName = 'projects/effective-aria-393109/locations/global/recognizers/recognizer'

  const filename = './harvard.wav';
  const fs = require('fs');

  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v2;
  const recorder = require('node-record-lpcm16');

  async function streamingRecognize() {
    // Creates a client
    const client = new speech.SpeechClient();

    const recognitionConfig = {
      // autoDecodingConfig removes the need to specify audio encoding.
      // This field only needs to be present in the recognitionConfig
      autoDecodingConfig: {},
    };
    const streamingConfig = {
      config: recognitionConfig,
    };

    const configRequest = {
      recognizer: recognizerName,
      streamingConfig: streamingConfig,
    };

    

    const audioStream = recorder.record({
      sampleRate: 16000, // Sample rate (adjust as needed)
      channels: 1, // Mono audio
      audioType: 'raw', // Output audio type
      endOnSilence: true
    }).stream();

    
    const chunks:any = [];
    let writeStream:any;
    
    audioStream
      .on('data', (chunk:any) => {
        // const request = {
        //   audio: chunk.toString(),
        // };
        chunks.push(chunk.toString());
        // console.log(chunk.toString());
      })
      .on('end', () => {
        // Config-only request should be first in stream of requests
        writeStream.write(configRequest);
        for (const chunk of chunks) {
          writeStream.write({audio:chunk});
        }
        writeStream.end();
      
      });

    await new Promise((resolve, reject) => {
      writeStream = client
        ._streamingRecognize()
        .on('data', (response:any) => {
          console.log(response);
          const {results} = response;
          const {transcript} = results[0].alternatives[0];
          return resolve(transcript);
        })
        .on('error', (err:any) => {
          console.error(err.message);
          return reject(err);
        });
    }).then(transcript => {
      console.log(transcript);
    });
  }
  await streamingRecognize();
  
}

exports.streamingRecognizeV2 = main;
0

There are 0 best solutions below