Speaker Identity using azure speech recognition

60 Views Asked by At

i am trying to identify different speakers id and want to show their dialog with their id/name. here is my code. but i am getting error on this line 'var speaker = e.Result.Properties.GetProperty(PropertyId**.Speaker**)' that PropertyId does not contain a definition for 'Speaker'. i am new to using speech recognition service so can anybody guide what to do and why i am getting this error. i am using 1.35.0 version of speech recognition service and using winforms c#.

private async void ProcessWavFile(string filePath)
{
    try
    {
        // Replace with your subscription key and region
        string subscriptionKey = "mykey";
        string region = "eastus2";

        // Configure speech recognizer for the WAV file
        var config = SpeechConfig.FromSubscription(subscriptionKey, region);
        using (var audioConfig = AudioConfig.FromWavFileInput(filePath))
        using (var recognizer = new SpeechRecognizer(config, audioConfig))
        {
            // Subscribe to Recognized event for continuous recognition
            recognizer.Recognized += async (s, e) =>
            {
                if (e.Result.Reason == ResultReason.RecognizedSpeech)
                {
                    var speaker = e.Result.Properties.GetProperty(PropertyId**.Speaker**);
                    if (speaker != null)
                    {
                        var speakerId = speaker.ToString();
                        // Use the speaker ID as needed
                        recognizedTextBox.Invoke((MethodInvoker)delegate
                        {
                            recognizedTextBox.AppendText($"Speaker ID: {speakerId}, Text: {e.Result.Text}{Environment.NewLine}");
                        });
                    }
                    else
                    {
                        recognizedTextBox.Invoke((MethodInvoker)delegate
                        {
                            recognizedTextBox.AppendText($"Speaker ID not available, Text: {e.Result.Text}{Environment.NewLine}");
                        });
                    }
                }
            };

            // Start continuous recognition
            await recognizer.StartContinuousRecognitionAsync();

            // Wait for recognition to complete
            await Task.Delay(TimeSpan.FromSeconds(100)); // Adjust the delay as needed

            // Stop continuous recognition
            await recognizer.StopContinuousRecognitionAsync();
        }
    }
    catch (Exception ex)
    {
        // Handle any exceptions that occur during processing
        MessageBox.Show($"An error occurred: {ex.Message}", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
}

i want to identify all differenct speaker in an audio separtely with name or id.

1

There are 1 best solutions below

0
Dasari Kamali On

I have successfully retrieved the speaker ID from the WinForms app below with a default speaker.

Code :

using System;
using System.Windows.Forms;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System.Threading.Tasks;

namespace WinFormsApp2
{
    public partial class Form1 : Form
    {
        private SpeechRecognizer recognizer;
        private string currentSpeakerId = "Guest-1"; 
        public Form1()
        {
            InitializeComponent();
        }

        private async void Form1_Load(object sender, EventArgs e)
        {
            string filePath = "path/to/.wav file";
            string subscriptionKey = "<speech_key>";
            string region = "<speech_reion>";
            await ProcessWavFile(filePath, subscriptionKey, region);
        }

        private async Task ProcessWavFile(string filePath, string subscriptionKey, string region)
        {
            try
            {
                var config = SpeechConfig.FromSubscription(subscriptionKey, region);
                using (var audioConfig = AudioConfig.FromWavFileInput(filePath))
                {
                    recognizer = new SpeechRecognizer(config, audioConfig);
                    recognizer.Recognized += async (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            recognizedTextBox.Invoke((MethodInvoker)delegate
                            {
                                recognizedTextBox.AppendText($"Speaker ID: {currentSpeakerId}, Text: {e.Result.Text}{Environment.NewLine}");
                            });
                        }
                    };
                    await recognizer.StartContinuousRecognitionAsync();
                    await Task.Delay(TimeSpan.FromSeconds(100)); 
                    await recognizer.StopContinuousRecognitionAsync();
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show($"An error occurred: {ex.Message}", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
    }
}

Form1.Designer.cs :

namespace WinFormsApp2
{
    partial class Form1
    {
        private System.ComponentModel.IContainer components = null;
        protected override void Dispose(bool disposing)
        {
            if (disposing && (components != null))
            {
                components.Dispose();
            }
            base.Dispose(disposing);
        }

        #region Windows Form Designer generated code
        private void InitializeComponent()
        {
            this.recognizedTextBox = new System.Windows.Forms.TextBox();
            this.SuspendLayout();

            this.recognizedTextBox.Location = new System.Drawing.Point(12, 12);
            this.recognizedTextBox.Multiline = true;
            this.recognizedTextBox.Name = "recognizedTextBox";
            this.recognizedTextBox.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
            this.recognizedTextBox.Size = new System.Drawing.Size(776, 426);
            this.recognizedTextBox.TabIndex = 0;

            this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
            this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
            this.ClientSize = new System.Drawing.Size(800, 450);
            this.Controls.Add(this.recognizedTextBox);
            this.Name = "Form1";
            this.Text = "Speech Transcription";
            this.Load += new System.EventHandler(this.Form1_Load);
            this.ResumeLayout(false);
            this.PerformLayout();

        }

        #endregion

        private System.Windows.Forms.TextBox recognizedTextBox;
    }
}

Output :

The WinForms project ran successfully, providing the Speaker ID and text output as shown below.

Speaker ID: Guest-1, Text: Hello, this is a test of the speech synthesis service.

enter image description here