Send wav file from python and receive in Unity: high pitch noizy result

171 Views Asked by At

I'm being stuck for two days by this seemingly simple problem of sending an audio file from Python to unity.
One of the files I used for testing can be downloaded here: https://www2.cs.uic.edu/~i101/SoundFiles/BabyElephantWalk60.wav

The sending code:

import socket

# Server configuration
HOST = '127.0.0.1'
PORT = 12345

# Read the audio file data (without the WAV header)
with open('BabyElephantWalk60.wav', 'rb') as audio_file:
    # Skip the WAV header (44 bytes)
    audio_file.seek(44)
    audio_data = audio_file.read()

# Create a TCP socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind((HOST, PORT))
server_socket.listen()

print("Server listening on {}:{}".format(HOST, PORT))

# Accept a client connection
client_socket, client_address = server_socket.accept()
print("Connected to:", client_address)

# Send the audio data (without the WAV header)
client_socket.send(audio_data)

# Calculate the audio data length
audio_data_length = len(audio_data)
print("Sending audio data of length:", audio_data_length)
# Close the connections
client_socket.close()
server_socket.close()

and the receiving code in Unity:

using System;
using System.Net.Sockets;
using System.IO;
using UnityEngine;

public class RealTimeAudioReceiver : MonoBehaviour
{
    public AudioSource audioSource;

    private TcpClient client;
    private NetworkStream stream;
    private BinaryReader reader;

    private const int SampleRate = 22050;
    private const int Channels = 1;
    private const int BitsPerSample = 32;

    private byte[] receivedAudioData;

    private bool play=false;

    private void Start()
    {
        Debug.Log("Time.timeScale: " + Time.timeScale);
        ConnectToServer();
        
    }
    private void Update(){
        if(play){
            audioSource.Play();
            play=false;
        }
    }

    private void ConnectToServer()
    {
        try
        {
            client = new TcpClient("127.0.0.1", 12345);
            stream = client.GetStream();
            reader = new BinaryReader(stream);

            // Start receiving audio data
            StartCoroutine(ReceiveAudioData());
        }
        catch (Exception e)
        {
            Debug.LogError("Error connecting to server: " + e.Message);
        }
    }

    private System.Collections.IEnumerator ReceiveAudioData()
    {
        using (MemoryStream memoryStream = new MemoryStream())
        {
            byte[] buffer = new byte[1024];
            int bytesRead;

            while ((bytesRead = reader.Read(buffer, 0, buffer.Length)) > 0)
            {
                memoryStream.Write(buffer, 0, bytesRead);
            }

            receivedAudioData = memoryStream.ToArray();
        }

        // Create and play the AudioClip
        CreateAndPlayAudioClip();

        // Clean up
        reader.Close();
        stream.Close();
        client.Close();

        yield return null;
    }


    private void CreateAndPlayAudioClip()
    {
        if (receivedAudioData != null && receivedAudioData.Length > 0)
        {
            AudioClip clip = AudioClip.Create("ReceivedAudio", receivedAudioData.Length / 4, Channels, SampleRate, false);
            float[] samples = new float[receivedAudioData.Length / 4];

            for (int i = 0; i < samples.Length; i++)
            {
                samples[i] = BitConverter.ToSingle(receivedAudioData, i * 4);
            }

            clip.SetData(samples, 0);
            audioSource.clip = clip;

            //int receivedLength = BitConverter.ToInt32(lengthBytes, 0);
            Debug.Log("Received audio data of length: " + receivedAudioData.Length);

            // // Adjust the playback speed based on the sample rate difference
            // float pitchFactor = (float)SampleRate / AudioSettings.outputSampleRate;
            // audioSource.pitch = pitchFactor;

            // audioSource.Play();
            play=true;
        }
    }
}

I've used Audacity to check the sample rate and channel and also printed out the number of bytes received. It looks all good. But when playing the audio it sounds like the audio is beinng accelerated and with lots of noize.
I'm so desperate and would be very grateful if someone could help!!!

1

There are 1 best solutions below

0
On BEST ANSWER
  • I think the main issue is that you are receiving an audio tat uses a 16-bit channel, meaning always 16-bit (= 2 bytes) make one sample.

    You, however, always combine 4 bytes (32-bit) into one sample (float).

    You rather want to use 16-bit samples.

  • Further what you get is encoded in 16-bit Signed Integer not floats => you need to convert them into Unity's float based data by dividing by the Int16.MaxValue.

  • Note that your .wav file also has a header of 44 bytes that you would need to properly parse and / or at least skip!

like e.g.

const int SAMPLE_SIZE = sizeof(Int16);
const int WAV_HEADER_SIZE = 44; 

private void CreateAndPlayAudioClip()
{
    if (receivedAudioData != null && receivedAudioData.Length > 0)
    {
        var sampleCount = (receivedAudioData.Length - WAV_HEADER_SIZE) / SAMPLE_SIZE;
        AudioClip clip = AudioClip.Create("ReceivedAudio", sampleCount, Channels, SampleRate, false);

        float[] samples = new float[sampleCount];

        for (int i = 0; i < samples.Length; i++)
        {
            samples[i] = (float)BitConverter.ToInt16(receivedAudioData, WAV_HEADER_SIZE + i * SAMPLE_SIZE) / Int16.MaxValue;
        }

        clip.SetData(samples, 0);
        audioSource.clip = clip;

        play=true;
    }
}

See also