Android: How to shift pitch of output sound (realtime)

18.5k Views Asked by At

I'm new in Android development. I'm looking for any method that applies pitch shifting to output sound (in real-time). But I couldn't find any point to start.

I've found this topic but I still don't know how can I apply this.

Any suggestions?

3

There are 3 best solutions below

4
On BEST ANSWER

In general, the algorithm is called a phase vocoder -- searching for that on the Internets should get you started.

There are a few open source phase vocoders out there, you should be able to use those for reference too.

You can do phase vocoder in real-time -- the main component used is the FFT, so you'll need a fast FFT. The Android libraries can do this for you, see this documentation: http://developer.android.com/reference/android/media/audiofx/Visualizer.html

As it happens, I'm about to release an open source FFT for ARM that is faster than Apple's vDSP library (which was hitherto the fastest). I'll post back in a few days when I've uploaded it to github.com.

Good luck.

1
On

There is no built-in pitch shifting algorithm in the Android SDK. You have to code your own. Pitch shifting is a real hardcore DSP algorithm; good sounding algorithms are results of many months or rather years of development...

I personally do not know any Java implementation so I suggest you to adopt some of the free C++ PS algorithms, the best one - which I use in my audio applications, is SoundTouch:

http://www.surina.net/soundtouch/

I played with its code a little and it seems it would not be too much complicated to rewrite it in Java.

1
On

HOME URL: http://www.dspdimension.com

public class AudioPitch{

//region Private Static Memebers
private static int MAX_FRAME_LENGTH = 8192;
private static double M_PI = 3.14159265358979323846;
private static float[] gInFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gOutFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gFFTworksp = new float[2 * MAX_FRAME_LENGTH];
private static float[] gLastPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gSumPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gOutputAccum = new float[2 * MAX_FRAME_LENGTH];
private static float[] gAnaFreq = new float[MAX_FRAME_LENGTH];
private static float[] gAnaMagn = new float[MAX_FRAME_LENGTH];
private static float[] gSynFreq = new float[MAX_FRAME_LENGTH];
private static float[] gSynMagn = new float[MAX_FRAME_LENGTH];
private static long gRover;
//endregion



public static void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize/*(long)2048*/, long osamp/*(long)10*/, float sampleRate, float[] indata)            
{

    double magn, phase, tmp, window, real, imag;
    double freqPerBin, expct;
    long i, k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;

    float[] outdata = indata;
    /* set up some handy variables */
    fftFrameSize2 = fftFrameSize / 2;
    stepSize = fftFrameSize / osamp;
    freqPerBin = sampleRate / (double)fftFrameSize;
    expct = 2.0 * M_PI * (double)stepSize / (double)fftFrameSize;
    inFifoLatency = fftFrameSize - stepSize;
    if (gRover == 0) gRover = inFifoLatency;


    /* main processing loop */
    for (i = 0; i < numSampsToProcess; i++)
    {
        /* As long as we have not yet collected enough data just read in */
        gInFIFO[(int) gRover] = indata[(int) i];
        outdata[(int) i] = gOutFIFO[(int) (gRover - inFifoLatency)];
        gRover++;

        /* now we have enough data for processing */
        if (gRover >= fftFrameSize)
        {
            gRover = inFifoLatency;

            /* do windowing and re,im interleave */
            for (k = 0; k < fftFrameSize; k++)
            {
                window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
                gFFTworksp[(int) (2 * k)] = (float)(gInFIFO[(int) k] * window);
                gFFTworksp[(int) (2 * k + 1)] = 0.0F;
            }


            /* ***************** ANALYSIS ******************* */
            /* do transform */
            ShortTimeFourierTransform(gFFTworksp, fftFrameSize, -1);

            /* this is the analysis step */
            for (k = 0; k <= fftFrameSize2; k++)
            {

                /* de-interlace FFT buffer */
                real = gFFTworksp[(int) (2 * k)];
                imag = gFFTworksp[(int) (2 * k + 1)];

                /* compute magnitude and phase */
                magn = 2.0 * Math.sqrt(real * real + imag * imag);
                phase = smbAtan2(imag, real);

                /* compute phase difference */
                tmp = phase - gLastPhase[(int) k];
                gLastPhase[(int) k] = (float)phase;

                /* subtract expected phase difference */
                tmp -= (double)k * expct;

                /* map delta phase into +/- Pi interval */
                qpd = (long)(tmp / M_PI);
                if (qpd >= 0) qpd += qpd & 1;
                else qpd -= qpd & 1;
                tmp -= M_PI * (double)qpd;

                /* get deviation from bin frequency from the +/- Pi interval */
                tmp = osamp * tmp / (2.0 * M_PI);

                /* compute the k-th partials' true frequency */
                tmp = (double)k * freqPerBin + tmp * freqPerBin;

                /* store magnitude and true frequency in analysis arrays */
                gAnaMagn[(int) k] = (float)magn;
                gAnaFreq[(int) k] = (float)tmp;

            }

            /* ***************** PROCESSING ******************* */
            /* this does the actual pitch shifting */
            for (int zero = 0; zero < fftFrameSize; zero++)
            {
                gSynMagn[zero] = 0;
                gSynFreq[zero] = 0;
            }

            for (k = 0; k <= fftFrameSize2; k++)
            {
                index = (long)(k * pitchShift);
                if (index <= fftFrameSize2)
                {
                    gSynMagn[(int) index] += gAnaMagn[(int) k];
                    gSynFreq[(int) index] = gAnaFreq[(int) k] * pitchShift;
                }
            }

            /* ***************** SYNTHESIS ******************* */
            /* this is the synthesis step */
            for (k = 0; k <= fftFrameSize2; k++)
            {

                /* get magnitude and true frequency from synthesis arrays */
                magn = gSynMagn[(int) k];
                tmp = gSynFreq[(int) k];

                /* subtract bin mid frequency */
                tmp -= (double)k * freqPerBin;

                /* get bin deviation from freq deviation */
                tmp /= freqPerBin;

                /* take osamp into account */
                tmp = 2.0 * M_PI * tmp / osamp;

                /* add the overlap phase advance back in */
                tmp += (double)k * expct;

                /* accumulate delta phase to get bin phase */
                gSumPhase[(int) k] += (float)tmp;
                phase = gSumPhase[(int) k];

                /* get real and imag part and re-interleave */
                gFFTworksp[(int) (2 * k)] = (float)(magn * Math.cos(phase));
                gFFTworksp[(int) (2 * k + 1)] = (float)(magn * Math.sin(phase));
            }

            /* zero negative frequencies */
            for (k = fftFrameSize + 2; k < 2 * fftFrameSize; k++) gFFTworksp[(int) k] = 0.0F;

            /* do inverse transform */
            ShortTimeFourierTransform(gFFTworksp, fftFrameSize, 1);

            /* do windowing and add to output accumulator */
            for (k = 0; k < fftFrameSize; k++)
            {
                window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
                gOutputAccum[(int) k] += (float)(2.0 * window * gFFTworksp[(int) (2 * k)] / (fftFrameSize2 * osamp));
            }
            for (k = 0; k < stepSize; k++) gOutFIFO[(int) k] = gOutputAccum[(int) k];

            /* shift accumulator */
            //memmove(gOutputAccum, gOutputAccum + stepSize, fftFrameSize * sizeof(float));
            for (k = 0; k < fftFrameSize; k++)
            {
                gOutputAccum[(int) k] = gOutputAccum[(int) (k + stepSize)];
            }

            /* move input FIFO */
            for (k = 0; k < inFifoLatency; k++) gInFIFO[(int) k] = gInFIFO[(int) (k + stepSize)];
        }
    }
}
//endregion


//region Private Static Methods
public static void ShortTimeFourierTransform(float[] fftBuffer, long fftFrameSize, long sign)
{
    float wr, wi, arg, temp;
    float tr, ti, ur, ui;
    long i, bitm, j, le, le2, k;

    for (i = 2; i < 2 * fftFrameSize - 2; i += 2)
    {
        for (bitm = 2, j = 0; bitm < 2 * fftFrameSize; bitm <<= 1)
        {
            if ((i & bitm) != 0) j++;
            j <<= 1;
        }
        if (i < j)
        {
            temp = fftBuffer[(int) i];
            fftBuffer[(int) i] = fftBuffer[(int) j];
            fftBuffer[(int) j] = temp;
            temp = fftBuffer[(int) (i + 1)];
            fftBuffer[(int) (i + 1)] = fftBuffer[(int) (j + 1)];
            fftBuffer[(int) (j + 1)] = temp;
        }
    }
    long max = (long)(Math.log(fftFrameSize) / Math.log(2.0) + .5);
    for (k = 0, le = 2; k < max; k++)
    {
        le <<= 1;
        le2 = le >> 1;
        ur = 1.0F;
        ui = 0.0F;
        arg = (float)M_PI / (le2 >> 1);
        wr = (float)Math.cos(arg);
        wi = (float)(sign * Math.sin(arg));
        for (j = 0; j < le2; j += 2)
        {

            for (i = j; i < 2 * fftFrameSize; i += le)
            {
                tr = fftBuffer[(int) (i + le2)] * ur - fftBuffer[(int) (i + le2 + 1)] * ui;
                ti = fftBuffer[(int) (i + le2)] * ui + fftBuffer[(int) (i + le2 + 1)] * ur;
                fftBuffer[(int) (i + le2)] = fftBuffer[(int) i] - tr;
                fftBuffer[(int) (i + le2 + 1)] = fftBuffer[(int) (i + 1)] - ti;
                fftBuffer[(int) i] += tr;
                fftBuffer[(int) (i + 1)] += ti;

            }
            tr = ur * wr - ui * wi;
            ui = ur * wi + ui * wr;
            ur = tr;
        }
    }
}
//endregion


private static double smbAtan2(double x, double y)
{
  double signx;
  if (x > 0.) signx = 1.;  
  else signx = -1.;

  if (x == 0.) return 0.;
  if (y == 0.) return signx * M_PI / 2.;
  return Math.atan2(x, y);
}

}

this code working too but very consumption cpu usage.

pitchShift between 0.5 -2.0

call this class as below:

int maxValueOFShort = 32768;             
short [] buffer = new short[800];               
float[] inData = new float[buffer.length];
while (audiorackIsRun) 
{                               
 int m =  recorder.read(buffer, 0, buffer.length);                  
 for(int n=0; n<buffer.length;n++)
      inData[n] =  buffer[n]/(float)maxValueOFShort;    

 AudioPitch.PitchShift(1, buffer.length, 4096, 4, 44100, inData);

 for(int n=0; n<buffer.length;n++)
      buffer[n] = (short)(inData[n]*maxValueOFShort);  

  player.write(buffer, 0, buffer.length); 
}