Windows Phone Encoding and Decoding audio using NSpeex. Having issue with decoding?

1.5k Views Asked by At

I am trying to encode a recorded audio using Nspeex and then transfer it over internet and decode on the other end. I am doing all this in Windows Phone 7/8. To encode and decode I am using following code. But while decoding I am not getting the result back correctly which I can play again. Can anyone provide me with encoding and decoding code which runs on WP7/8 recorded audio:

private static Microphone mic = Microphone.Default;

        private static byte[] EncodeSpeech(byte[] buf, int len)
        {
            BandMode mode = GetBandMode(mic.SampleRate);
            SpeexEncoder encoder = new SpeexEncoder(mode);

            // set encoding quality to lowest (which will generate the smallest size in the fastest time)
            encoder.Quality = 1;
            int inDataSize = len / 2;

            // convert to short array
            short[] data = new short[inDataSize];
            int sampleIndex = 0;
            for (int index = 0; index < len; index += 2, sampleIndex++)
            {
                data[sampleIndex] = BitConverter.ToInt16(buf, index);
            }

            // note: the number of samples per frame must be a multiple of encoder.FrameSize
            inDataSize = inDataSize - inDataSize % encoder.FrameSize;
            var encodedData = new byte[len];
            int encodedBytes = encoder.Encode(data, 0, inDataSize, encodedData, 0, len);
            if (encodedBytes != 0)
            {
                // each chunk is laid out as follows:
                // | 4-byte total chunk size | 4-byte encoded buffer size | <encoded-bytes> |
                byte[] inDataSizeBuf = BitConverter.GetBytes(inDataSize);
                byte[] sizeBuf = BitConverter.GetBytes(encodedBytes + inDataSizeBuf.Length);
                byte[] returnBuf = new byte[encodedBytes + sizeBuf.Length + inDataSizeBuf.Length];
                sizeBuf.CopyTo(returnBuf, 0);
                inDataSizeBuf.CopyTo(returnBuf, sizeBuf.Length);
                Array.Copy(encodedData, 0, returnBuf, sizeBuf.Length + inDataSizeBuf.Length, encodedBytes);
                return returnBuf;
            }
            else
                return buf;
        }


        private byte[] DecodeSpeech(byte[] buf)
        {
            BandMode mode = GetBandMode(mic.SampleRate);
            SpeexDecoder decoder = new SpeexDecoder(mode);

            byte[] inDataSizeBuf = new byte[4];
            byte[] sizeBuf = new byte[4];
            byte[] encodedBuf = new byte[buf.Length - 8];
            Array.Copy(buf, 0, sizeBuf, 0, 4);
            Array.Copy(buf, 4, inDataSizeBuf, 0, 4);
            Array.Copy(buf, 8, encodedBuf, 0, buf.Length - 8);

            int inDataSize = BitConverter.ToInt32(inDataSizeBuf, 0);
            int size = BitConverter.ToInt32(sizeBuf, 0);
            short[] decodedBuf = new short[inDataSize];
            int decodedSize = decoder.Decode(encodedBuf, 0, encodedBuf.Length, decodedBuf, 0, false);

            byte[] returnBuf = new byte[inDataSize * 2];
            for (int index = 0; index < decodedBuf.Length; index++)
            {
                byte[] temp = BitConverter.GetBytes(decodedBuf[index]);
                Array.Copy(temp, 0, returnBuf, index * 2, 2);
            }

            return returnBuf;
        }


        private static BandMode GetBandMode(int sampleRate)
        {

            if (sampleRate <= 8000)

                return BandMode.Narrow;

            if (sampleRate <= 16000)

                return BandMode.Wide;

            return BandMode.UltraWide;

        }
1

There are 1 best solutions below

0
On BEST ANSWER

I think your problem may be that you are newing up a new SpeexEncoder every time you want to encode audio. You should try making that a member for your class and re-use it.

I looked at the code for Nspeex I noticed that SpeexEncoder uses NbEncoder for the narrow band. In that class it looks like it keeps a history of some previous audio data in order perform the encoding. This should mean that the output for different instances of encoders would not go together.

private static Microphone mic = Microphone.Default;
private static SpeexEncoder encoder = CreateEncoder();

    private static SpeexEncoder CreateEncoder()
    {
        BandMode mode = GetBandMode(mic.SampleRate);
        SpeexEncoder encoder = new SpeexEncoder(mode);

        // set encoding quality to lowest (which will generate the smallest size in the fastest time)
        encoder.Quality = 1;
        return encoder;
    }

    private static byte[] EncodeSpeech(byte[] buf, int len)
    {
        int inDataSize = len / 2;

        ...