diff options
author | Juan Linietsky <reduzio@gmail.com> | 2017-01-23 01:45:43 -0300 |
---|---|---|
committer | Juan Linietsky <reduzio@gmail.com> | 2017-01-23 01:45:43 -0300 |
commit | cff80bb1cc0aa5a548254b9c89d0fab28982b504 (patch) | |
tree | cea35b87f03ec7a37d82b77a2c9a4ee196c69272 /servers/audio | |
parent | 4d9bc8b00cd9a561005ad145b3b95c58d4fbfa00 (diff) |
Pretty high quality pitch Shifting effect using smbPitchShift
Diffstat (limited to 'servers/audio')
-rw-r--r-- | servers/audio/effects/audio_effect_pitch_shift.cpp | 297 | ||||
-rw-r--r-- | servers/audio/effects/audio_effect_pitch_shift.h | 89 |
2 files changed, 386 insertions, 0 deletions
diff --git a/servers/audio/effects/audio_effect_pitch_shift.cpp b/servers/audio/effects/audio_effect_pitch_shift.cpp new file mode 100644 index 0000000000..a11ae37675 --- /dev/null +++ b/servers/audio/effects/audio_effect_pitch_shift.cpp @@ -0,0 +1,297 @@ +#include "audio_effect_pitch_shift.h" +#include "servers/audio_server.h" +/**************************************************************************** +* +* NAME: smbPitchShift.cpp +* VERSION: 1.2 +* HOME URL: http://blogs.zynaptiq.com/bernsee +* KNOWN BUGS: none +* +* SYNOPSIS: Routine for doing pitch shifting while maintaining +* duration using the Short Time Fourier Transform. +* +* DESCRIPTION: The routine takes a pitchShift factor value which is between 0.5 +* (one octave down) and 2. (one octave up). A value of exactly 1 does not change +* the pitch. numSampsToProcess tells the routine how many samples in indata[0... +* numSampsToProcess-1] should be pitch shifted and moved to outdata[0 ... +* numSampsToProcess-1]. The two buffers can be identical (ie. it can process the +* data in-place). fftFrameSize defines the FFT frame size used for the +* processing. Typical values are 1024, 2048 and 4096. It may be any value <= +* MAX_FRAME_LENGTH but it MUST be a power of 2. osamp is the STFT +* oversampling factor which also determines the overlap between adjacent STFT +* frames. It should at least be 4 for moderate scaling ratios. A value of 32 is +* recommended for best quality. sampleRate takes the sample rate for the signal +* in unit Hz, ie. 44100 for 44.1 kHz audio. The data passed to the routine in +* indata[] should be in the range [-1.0, 1.0), which is also the output range +* for the data, make sure you scale the data accordingly (for 16bit signed integers +* you would have to divide (and multiply) by 32768). +* +* COPYRIGHT 1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com> +* +* The Wide Open License (WOL) +* +* Permission to use, copy, modify, distribute and sell this software and its +* documentation for any purpose is hereby granted without fee, provided that +* the above copyright notice and this license appear in all source copies. +* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF +* ANY KIND. See http://www.dspguru.com/wol.htm for more information. +* +*****************************************************************************/ + + +void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata,int stride) { + + + /* + Routine smbPitchShift(). See top of file for explanation + Purpose: doing pitch shifting while maintaining duration using the Short + Time Fourier Transform. + Author: (c)1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com> + */ + + double magn, phase, tmp, window, real, imag; + double freqPerBin, expct; + long i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2; + + /* set up some handy variables */ + fftFrameSize2 = fftFrameSize/2; + stepSize = fftFrameSize/osamp; + freqPerBin = sampleRate/(double)fftFrameSize; + expct = 2.*M_PI*(double)stepSize/(double)fftFrameSize; + inFifoLatency = fftFrameSize-stepSize; + if (gRover == 0) gRover = inFifoLatency; + + /* initialize our static arrays */ + + /* main processing loop */ + for (i = 0; i < numSampsToProcess; i++){ + + /* As long as we have not yet collected enough data just read in */ + gInFIFO[gRover] = indata[i*stride]; + outdata[i*stride] = gOutFIFO[gRover-inFifoLatency]; + gRover++; + + /* now we have enough data for processing */ + if (gRover >= fftFrameSize) { + gRover = inFifoLatency; + + /* do windowing and re,im interleave */ + for (k = 0; k < fftFrameSize;k++) { + window = -.5*cos(2.*M_PI*(double)k/(double)fftFrameSize)+.5; + gFFTworksp[2*k] = gInFIFO[k] * window; + gFFTworksp[2*k+1] = 0.; + } + + + /* ***************** ANALYSIS ******************* */ + /* do transform */ + smbFft(gFFTworksp, fftFrameSize, -1); + + /* this is the analysis step */ + for (k = 0; k <= fftFrameSize2; k++) { + + /* de-interlace FFT buffer */ + real = gFFTworksp[2*k]; + imag = gFFTworksp[2*k+1]; + + /* compute magnitude and phase */ + magn = 2.*sqrt(real*real + imag*imag); + phase = atan2(imag,real); + + /* compute phase difference */ + tmp = phase - gLastPhase[k]; + gLastPhase[k] = phase; + + /* subtract expected phase difference */ + tmp -= (double)k*expct; + + /* map delta phase into +/- Pi interval */ + qpd = tmp/M_PI; + if (qpd >= 0) qpd += qpd&1; + else qpd -= qpd&1; + tmp -= M_PI*(double)qpd; + + /* get deviation from bin frequency from the +/- Pi interval */ + tmp = osamp*tmp/(2.*M_PI); + + /* compute the k-th partials' true frequency */ + tmp = (double)k*freqPerBin + tmp*freqPerBin; + + /* store magnitude and true frequency in analysis arrays */ + gAnaMagn[k] = magn; + gAnaFreq[k] = tmp; + + } + + /* ***************** PROCESSING ******************* */ + /* this does the actual pitch shifting */ + memset(gSynMagn, 0, fftFrameSize*sizeof(float)); + memset(gSynFreq, 0, fftFrameSize*sizeof(float)); + for (k = 0; k <= fftFrameSize2; k++) { + index = k*pitchShift; + if (index <= fftFrameSize2) { + gSynMagn[index] += gAnaMagn[k]; + gSynFreq[index] = gAnaFreq[k] * pitchShift; + } + } + + /* ***************** SYNTHESIS ******************* */ + /* this is the synthesis step */ + for (k = 0; k <= fftFrameSize2; k++) { + + /* get magnitude and true frequency from synthesis arrays */ + magn = gSynMagn[k]; + tmp = gSynFreq[k]; + + /* subtract bin mid frequency */ + tmp -= (double)k*freqPerBin; + + /* get bin deviation from freq deviation */ + tmp /= freqPerBin; + + /* take osamp into account */ + tmp = 2.*M_PI*tmp/osamp; + + /* add the overlap phase advance back in */ + tmp += (double)k*expct; + + /* accumulate delta phase to get bin phase */ + gSumPhase[k] += tmp; + phase = gSumPhase[k]; + + /* get real and imag part and re-interleave */ + gFFTworksp[2*k] = magn*cos(phase); + gFFTworksp[2*k+1] = magn*sin(phase); + } + + /* zero negative frequencies */ + for (k = fftFrameSize+2; k < 2*fftFrameSize; k++) gFFTworksp[k] = 0.; + + /* do inverse transform */ + smbFft(gFFTworksp, fftFrameSize, 1); + + /* do windowing and add to output accumulator */ + for(k=0; k < fftFrameSize; k++) { + window = -.5*cos(2.*M_PI*(double)k/(double)fftFrameSize)+.5; + gOutputAccum[k] += 2.*window*gFFTworksp[2*k]/(fftFrameSize2*osamp); + } + for (k = 0; k < stepSize; k++) gOutFIFO[k] = gOutputAccum[k]; + + /* shift accumulator */ + memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(float)); + + /* move input FIFO */ + for (k = 0; k < inFifoLatency; k++) gInFIFO[k] = gInFIFO[k+stepSize]; + } + } + + + +} + + +void SMBPitchShift::smbFft(float *fftBuffer, long fftFrameSize, long sign) +/* + FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse) + Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the + time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes + and returns the cosine and sine parts in an interleaved manner, ie. + fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize + must be a power of 2. It expects a complex input signal (see footnote 2), + ie. when working with 'common' audio signals our input signal has to be + passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform + of the frequencies of interest is in fftBuffer[0...fftFrameSize]. +*/ +{ + float wr, wi, arg, *p1, *p2, temp; + float tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i; + long i, bitm, j, le, le2, k; + + for (i = 2; i < 2*fftFrameSize-2; i += 2) { + for (bitm = 2, j = 0; bitm < 2*fftFrameSize; bitm <<= 1) { + if (i & bitm) j++; + j <<= 1; + } + if (i < j) { + p1 = fftBuffer+i; p2 = fftBuffer+j; + temp = *p1; *(p1++) = *p2; + *(p2++) = temp; temp = *p1; + *p1 = *p2; *p2 = temp; + } + } + for (k = 0, le = 2; k < (long)(log(fftFrameSize)/log(2.)+.5); k++) { + le <<= 1; + le2 = le>>1; + ur = 1.0; + ui = 0.0; + arg = M_PI / (le2>>1); + wr = cos(arg); + wi = sign*sin(arg); + for (j = 0; j < le2; j += 2) { + p1r = fftBuffer+j; p1i = p1r+1; + p2r = p1r+le2; p2i = p2r+1; + for (i = j; i < 2*fftFrameSize; i += le) { + tr = *p2r * ur - *p2i * ui; + ti = *p2r * ui + *p2i * ur; + *p2r = *p1r - tr; *p2i = *p1i - ti; + *p1r += tr; *p1i += ti; + p1r += le; p1i += le; + p2r += le; p2i += le; + } + tr = ur*wr - ui*wi; + ui = ur*wi + ui*wr; + ur = tr; + } + } +} + + +void AudioEffectPitchShiftInstance::process(const AudioFrame *p_src_frames,AudioFrame *p_dst_frames,int p_frame_count) { + + float sample_rate = AudioServer::get_singleton()->get_mix_rate(); + + float *in_l = (float*)p_src_frames; + float *in_r = in_l + 1; + + float *out_l = (float*)p_dst_frames; + float *out_r = out_l + 1; + + shift_l.PitchShift(base->pitch_scale,p_frame_count,2048,4,sample_rate,in_l,out_l,2); + shift_r.PitchShift(base->pitch_scale,p_frame_count,2048,4,sample_rate,in_r,out_r,2); + +} + + +Ref<AudioEffectInstance> AudioEffectPitchShift::instance() { + Ref<AudioEffectPitchShiftInstance> ins; + ins.instance(); + ins->base=Ref<AudioEffectPitchShift>(this); + + + return ins; +} + +void AudioEffectPitchShift::set_pitch_scale(float p_adjust) { + + pitch_scale=p_adjust; +} + +float AudioEffectPitchShift::get_pitch_scale() const { + + return pitch_scale; +} + + +void AudioEffectPitchShift::_bind_methods() { + + ClassDB::bind_method(_MD("set_pitch_scale","rate"),&AudioEffectPitchShift::set_pitch_scale); + ClassDB::bind_method(_MD("get_pitch_scale"),&AudioEffectPitchShift::get_pitch_scale); + + ADD_PROPERTY(PropertyInfo(Variant::REAL,"pitch_scale",PROPERTY_HINT_RANGE,"0.01,16,0.01"),_SCS("set_pitch_scale"),_SCS("get_pitch_scale")); + +} + +AudioEffectPitchShift::AudioEffectPitchShift() { + pitch_scale=1.0; + +} diff --git a/servers/audio/effects/audio_effect_pitch_shift.h b/servers/audio/effects/audio_effect_pitch_shift.h new file mode 100644 index 0000000000..d1343a0745 --- /dev/null +++ b/servers/audio/effects/audio_effect_pitch_shift.h @@ -0,0 +1,89 @@ +#ifndef AUDIO_EFFECT_PITCH_SHIFT_H +#define AUDIO_EFFECT_PITCH_SHIFT_H + + +#include "servers/audio/audio_effect.h" + +class SMBPitchShift { + + enum { + MAX_FRAME_LENGTH=8192 + }; + + float gInFIFO[MAX_FRAME_LENGTH]; + float gOutFIFO[MAX_FRAME_LENGTH]; + float gFFTworksp[2*MAX_FRAME_LENGTH]; + float gLastPhase[MAX_FRAME_LENGTH/2+1]; + float gSumPhase[MAX_FRAME_LENGTH/2+1]; + float gOutputAccum[2*MAX_FRAME_LENGTH]; + float gAnaFreq[MAX_FRAME_LENGTH]; + float gAnaMagn[MAX_FRAME_LENGTH]; + float gSynFreq[MAX_FRAME_LENGTH]; + float gSynMagn[MAX_FRAME_LENGTH]; + long gRover; + + void smbFft(float *fftBuffer, long fftFrameSize, long sign); +public: + void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata, int stride); + + SMBPitchShift() { + gRover=0; + memset(gInFIFO, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gOutFIFO, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gFFTworksp, 0, 2*MAX_FRAME_LENGTH*sizeof(float)); + memset(gLastPhase, 0, (MAX_FRAME_LENGTH/2+1)*sizeof(float)); + memset(gSumPhase, 0, (MAX_FRAME_LENGTH/2+1)*sizeof(float)); + memset(gOutputAccum, 0, 2*MAX_FRAME_LENGTH*sizeof(float)); + memset(gAnaFreq, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gAnaMagn, 0, MAX_FRAME_LENGTH*sizeof(float)); + } + + +}; + + +class AudioEffectPitchShift; + +class AudioEffectPitchShiftInstance : public AudioEffectInstance { + GDCLASS(AudioEffectPitchShiftInstance,AudioEffectInstance) +friend class AudioEffectPitchShift; + Ref<AudioEffectPitchShift> base; + + SMBPitchShift shift_l; + SMBPitchShift shift_r; + + +public: + + virtual void process(const AudioFrame *p_src_frames,AudioFrame *p_dst_frames,int p_frame_count); + +}; + + +class AudioEffectPitchShift : public AudioEffect { + GDCLASS(AudioEffectPitchShift,AudioEffect) + +friend class AudioEffectPitchShiftInstance; + + float pitch_scale; + int window_size; + float wet; + float dry; + bool filter; + +protected: + + static void _bind_methods(); +public: + + + Ref<AudioEffectInstance> instance(); + + void set_pitch_scale(float p_adjust); + float get_pitch_scale() const; + + AudioEffectPitchShift(); +}; + + +#endif // AUDIO_EFFECT_PITCH_SHIFT_H |