加入频谱

2023-08-20 02:11:50 +08:00 · 2023-08-20 02:11:50 +08:00 · 016641f04d
commit 016641f04d
parent 8c0b908ce9
16 changed files with 2275 additions and 34 deletions
--- a/Source/Cut5/FFT/_kiss_fft_guts.h
+++ b/Source/Cut5/FFT/_kiss_fft_guts.h
@ -0,0 +1,167 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+
+#ifndef _kiss_fft_guts_h
+#define _kiss_fft_guts_h
+
+#include "kiss_fft.h"
+#include "kiss_fft_log.h"
+#include <limits.h>
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+    int nfft;
+    int inverse;
+    int factors[2*MAXFACTORS];
+    kiss_fft_cpx twiddles[1];
+};
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#include <stdint.h>
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX INT32_MAX
+#define SAMP_MIN INT32_MIN
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t
+#define SAMP_MAX INT16_MAX
+#define SAMP_MIN INT16_MIN
+#endif
+
+#if defined(CHECK_OVERFLOW)
+#  define CHECK_OVERFLOW_OP(a,op,b)  \
+    if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+        KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); }
+#endif
+
+
+#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
+#   define sround( x )  (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#   define S_MUL(a,b) sround( smul(a,b) )
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+#   define DIVSCALAR(x,k) \
+    (x) = sround( smul(  x, SAMP_MAX/k ) )
+
+#   define C_FIXDIV(c,div) \
+    do {    DIVSCALAR( (c).r , div);  \
+        DIVSCALAR( (c).i  , div); }while (0)
+
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
+        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define  C_ADD( res, a,b)\
+    do { \
+        CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+        CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+        (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+        CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+        CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+        (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+        CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+        CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+        (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+        CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+        CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+        (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+
+
+#ifdef FIXED_POINT
+#  define KISS_FFT_COS(phase)  floor(.5+SAMP_MAX * cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+SAMP_MAX * sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
+#endif
+
+#define  kf_cexp(x,phase) \
+    do{ \
+        (x)->r = KISS_FFT_COS(phase);\
+        (x)->i = KISS_FFT_SIN(phase);\
+    }while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+    KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i))
+
+
+#ifdef KISS_FFT_USE_ALLOCA
+// define this to allow use of alloca instead of malloc for temporary buffers
+// Temporary buffers are used in two case:
+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
+// 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
+#include <alloca.h>
+#define  KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
+#define  KISS_FFT_TMP_FREE(ptr)
+#else
+#define  KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
+#define  KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
+#endif
+
+#endif /* _kiss_fft_guts_h */
+
--- a/Source/Cut5/FFT/kfc.c
+++ b/Source/Cut5/FFT/kfc.c
@ -0,0 +1,109 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kfc.h"
+
+typedef struct cached_fft *kfc_cfg;
+
+struct cached_fft
+{
+    int nfft;
+    int inverse;
+    kiss_fft_cfg cfg;
+    kfc_cfg next;
+};
+
+static kfc_cfg cache_root=NULL;
+static int ncached=0;
+
+static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
+{
+    size_t len;
+    kfc_cfg  cur=cache_root;
+    kfc_cfg  prev=NULL;
+    while ( cur ) {
+        if ( cur->nfft == nfft && inverse == cur->inverse )
+            break;/*found the right node*/
+        prev = cur;
+        cur = prev->next;
+    }
+    if (cur== NULL) {
+        /* no cached node found, need to create a new one*/
+        kiss_fft_alloc(nfft,inverse,0,&len);
+#ifdef USE_SIMD
+        int padding = (16-sizeof(struct cached_fft)) & 15;
+        // make sure the cfg aligns on a 16 byte boundary
+        len += padding;
+#endif
+        cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
+        if (cur == NULL)
+            return NULL;
+        cur->cfg = (kiss_fft_cfg)(cur+1);
+#ifdef USE_SIMD
+        cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
+#endif
+        kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
+        cur->nfft=nfft;
+        cur->inverse=inverse;
+        cur->next = NULL;
+        if ( prev )
+            prev->next = cur;
+        else
+            cache_root = cur;
+        ++ncached;
+    }
+    return cur->cfg;
+}
+
+void kfc_cleanup(void)
+{
+    kfc_cfg  cur=cache_root;
+    kfc_cfg  next=NULL;
+    while (cur){
+        next = cur->next;
+        free(cur);
+        cur=next;
+    }
+    ncached=0;
+    cache_root = NULL;
+}
+void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+    kiss_fft( find_cached_fft(nfft,0),fin,fout );
+}
+
+void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+    kiss_fft( find_cached_fft(nfft,1),fin,fout );
+}
+
+#ifdef KFC_TEST
+static void check(int nc)
+{
+    if (ncached != nc) {
+        fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached);
+        exit(1);
+    }
+}
+
+int main(void)
+{
+    kiss_fft_cpx buf1[1024],buf2[1024];
+    memset(buf1,0,sizeof(buf1));
+    check(0);
+    kfc_fft(512,buf1,buf2);
+    check(1);
+    kfc_fft(512,buf1,buf2);
+    check(1);
+    kfc_ifft(512,buf1,buf2);
+    check(2);
+    kfc_cleanup();
+    check(0);
+    return 0;
+}
+#endif
--- a/Source/Cut5/FFT/kfc.h
+++ b/Source/Cut5/FFT/kfc.h
@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KFC_H
+#define KFC_H
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+KFC -- Kiss FFT Cache
+
+Not needing to deal with kiss_fft_alloc and a config 
+object may be handy for a lot of programs.
+
+KFC uses the underlying KISS FFT functions, but caches the config object. 
+The first time kfc_fft or kfc_ifft for a given FFT size, the cfg 
+object is created for it.  All subsequent calls use the cached 
+configuration object.
+
+NOTE:
+You should probably not use this if your program will be using a lot 
+of various sizes of FFTs.  There is a linear search through the
+cached objects.  If you are only using one or two FFT sizes, this
+will be negligible. Otherwise, you may want to use another method 
+of managing the cfg objects.
+ 
+ There is no automated cleanup of the cached objects.  This could lead 
+to large memory usage in a program that uses a lot of *DIFFERENT* 
+sized FFTs.  If you want to force all cached cfg objects to be freed,
+call kfc_cleanup.
+ 
+ */
+
+/*forward complex FFT */
+void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+/*reverse complex FFT */
+void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+
+/*free all cached objects*/
+void KISS_FFT_API kfc_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/Source/Cut5/FFT/kiss_fft.c
+++ b/Source/Cut5/FFT/kiss_fft.c
@ -0,0 +1,420 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers.  It also delares the kf_ internal functions.
+ */
+
+static void kf_bfly2(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx * Fout2;
+    kiss_fft_cpx * tw1 = st->twiddles;
+    kiss_fft_cpx t;
+    Fout2 = Fout + m;
+    do{
+        C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+        C_MUL (t,  *Fout2 , *tw1);
+        tw1 += fstride;
+        C_SUB( *Fout2 ,  *Fout , t );
+        C_ADDTO( *Fout ,  t );
+        ++Fout2;
+        ++Fout;
+    }while (--m);
+}
+
+static void kf_bfly4(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        const size_t m
+        )
+{
+    kiss_fft_cpx *tw1,*tw2,*tw3;
+    kiss_fft_cpx scratch[6];
+    size_t k=m;
+    const size_t m2=2*m;
+    const size_t m3=3*m;
+
+
+    tw3 = tw2 = tw1 = st->twiddles;
+
+    do {
+        C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+        C_MUL(scratch[0],Fout[m] , *tw1 );
+        C_MUL(scratch[1],Fout[m2] , *tw2 );
+        C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+        C_SUB( scratch[5] , *Fout, scratch[1] );
+        C_ADDTO(*Fout, scratch[1]);
+        C_ADD( scratch[3] , scratch[0] , scratch[2] );
+        C_SUB( scratch[4] , scratch[0] , scratch[2] );
+        C_SUB( Fout[m2], *Fout, scratch[3] );
+        tw1 += fstride;
+        tw2 += fstride*2;
+        tw3 += fstride*3;
+        C_ADDTO( *Fout , scratch[3] );
+
+        if(st->inverse) {
+            Fout[m].r = scratch[5].r - scratch[4].i;
+            Fout[m].i = scratch[5].i + scratch[4].r;
+            Fout[m3].r = scratch[5].r + scratch[4].i;
+            Fout[m3].i = scratch[5].i - scratch[4].r;
+        }else{
+            Fout[m].r = scratch[5].r + scratch[4].i;
+            Fout[m].i = scratch[5].i - scratch[4].r;
+            Fout[m3].r = scratch[5].r - scratch[4].i;
+            Fout[m3].i = scratch[5].i + scratch[4].r;
+        }
+        ++Fout;
+    }while(--k);
+}
+
+static void kf_bfly3(
+         kiss_fft_cpx * Fout,
+         const size_t fstride,
+         const kiss_fft_cfg st,
+         size_t m
+         )
+{
+     size_t k=m;
+     const size_t m2 = 2*m;
+     kiss_fft_cpx *tw1,*tw2;
+     kiss_fft_cpx scratch[5];
+     kiss_fft_cpx epi3;
+     epi3 = st->twiddles[fstride*m];
+
+     tw1=tw2=st->twiddles;
+
+     do{
+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+     }while(--k);
+}
+
+static void kf_bfly5(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+    int u;
+    kiss_fft_cpx scratch[13];
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx *tw;
+    kiss_fft_cpx ya,yb;
+    ya = twiddles[fstride*m];
+    yb = twiddles[fstride*2*m];
+
+    Fout0=Fout;
+    Fout1=Fout0+m;
+    Fout2=Fout0+2*m;
+    Fout3=Fout0+3*m;
+    Fout4=Fout0+4*m;
+
+    tw=st->twiddles;
+    for ( u=0; u<m; ++u ) {
+        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+        scratch[0] = *Fout0;
+
+        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+        C_ADD( scratch[7],scratch[1],scratch[4]);
+        C_SUB( scratch[10],scratch[1],scratch[4]);
+        C_ADD( scratch[8],scratch[2],scratch[3]);
+        C_SUB( scratch[9],scratch[2],scratch[3]);
+
+        Fout0->r += scratch[7].r + scratch[8].r;
+        Fout0->i += scratch[7].i + scratch[8].i;
+
+        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+        C_SUB(*Fout1,scratch[5],scratch[6]);
+        C_ADD(*Fout4,scratch[5],scratch[6]);
+
+        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+        C_ADD(*Fout2,scratch[11],scratch[12]);
+        C_SUB(*Fout3,scratch[11],scratch[12]);
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+    }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m,
+        int p
+        )
+{
+    int u,k,q1,q;
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx t;
+    int Norig = st->nfft;
+
+    kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
+    if (scratch == NULL){
+        KISS_FFT_ERROR("Memory allocation failed.");
+        return;
+    }
+
+    for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            scratch[q1] = Fout[ k  ];
+            C_FIXDIV(scratch[q1],p);
+            k += m;
+        }
+
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            int twidx=0;
+            Fout[ k ] = scratch[0];
+            for (q=1;q<p;++q ) {
+                twidx += fstride * k;
+                if (twidx>=Norig) twidx-=Norig;
+                C_MUL(t,scratch[q] , twiddles[twidx] );
+                C_ADDTO( Fout[ k ] ,t);
+            }
+            k += m;
+        }
+    }
+    KISS_FFT_TMP_FREE(scratch);
+}
+
+static
+void kf_work(
+        kiss_fft_cpx * Fout,
+        const kiss_fft_cpx * f,
+        const size_t fstride,
+        int in_stride,
+        int * factors,
+        const kiss_fft_cfg st
+        )
+{
+    kiss_fft_cpx * Fout_beg=Fout;
+    const int p=*factors++; /* the radix  */
+    const int m=*factors++; /* stage's fft length/p */
+    const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+#ifdef _OPENMP
+    // use openmp extensions at the
+    // top-level (not recursive)
+    if (fstride==1 && p<=5 && m!=1)
+    {
+        int k;
+
+        // execute the p different work units in different threads
+#       pragma omp parallel for
+        for (k=0;k<p;++k)
+            kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
+        // all threads have joined by this point
+
+        switch (p) {
+            case 2: kf_bfly2(Fout,fstride,st,m); break;
+            case 3: kf_bfly3(Fout,fstride,st,m); break;
+            case 4: kf_bfly4(Fout,fstride,st,m); break;
+            case 5: kf_bfly5(Fout,fstride,st,m); break;
+            default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+        }
+        return;
+    }
+#endif
+
+    if (m==1) {
+        do{
+            *Fout = *f;
+            f += fstride*in_stride;
+        }while(++Fout != Fout_end );
+    }else{
+        do{
+            // recursive call:
+            // DFT of size m*p performed by doing
+            // p instances of smaller DFTs of size m,
+            // each one takes a decimated version of the input
+            kf_work( Fout , f, fstride*p, in_stride, factors,st);
+            f += fstride*in_stride;
+        }while( (Fout += m) != Fout_end );
+    }
+
+    Fout=Fout_beg;
+
+    // recombine the p smaller DFTs
+    switch (p) {
+        case 2: kf_bfly2(Fout,fstride,st,m); break;
+        case 3: kf_bfly3(Fout,fstride,st,m); break;
+        case 4: kf_bfly4(Fout,fstride,st,m); break;
+        case 5: kf_bfly5(Fout,fstride,st,m); break;
+        default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+    }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static
+void kf_factor(int n,int * facbuf)
+{
+    int p=4;
+    double floor_sqrt;
+    floor_sqrt = floor( sqrt((double)n) );
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p > floor_sqrt)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+        *facbuf++ = p;
+        *facbuf++ = n;
+    } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc.  As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+    KISS_FFT_ALIGN_CHECK(mem)
+
+    kiss_fft_cfg st=NULL;
+    size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
+        + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_cfg)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        int i;
+        st->nfft=nfft;
+        st->inverse = inverse_fft;
+
+        for (i=0;i<nfft;++i) {
+            const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+            double phase = -2*pi*i / nfft;
+            if (st->inverse)
+                phase *= -1;
+            kf_cexp(st->twiddles+i, phase );
+        }
+
+        kf_factor(nfft,st->factors);
+    }
+    return st;
+}
+
+
+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+    if (fin == fout) {
+        //NOTE: this is not really an in-place FFT algorithm.
+        //It just performs an out-of-place FFT into a temp buffer
+        if (fout == NULL){
+            KISS_FFT_ERROR("fout buffer NULL.");
+        return;
+        }
+
+        kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
+        if (tmpbuf == NULL){
+            KISS_FFT_ERROR("Memory allocation error.");
+        return;
+        }
+
+
+
+        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
+        memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+        KISS_FFT_TMP_FREE(tmpbuf);
+    }else{
+        kf_work( fout, fin, 1,in_stride, st->factors,st );
+    }
+}
+
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+void kiss_fft_cleanup(void)
+{
+    // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n)
+{
+    while(1) {
+        int m=n;
+        while ( (m%2) == 0 ) m/=2;
+        while ( (m%3) == 0 ) m/=3;
+        while ( (m%5) == 0 ) m/=5;
+        if (m<=1)
+            break; /* n is completely factorable by twos, threes, and fives */
+        n++;
+    }
+    return n;
+}
--- a/Source/Cut5/FFT/kiss_fft.h
+++ b/Source/Cut5/FFT/kiss_fft.h
@ -0,0 +1,160 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+// Define KISS_FFT_SHARED macro to properly export symbols
+#ifdef KISS_FFT_SHARED
+# ifdef _WIN32
+#  ifdef KISS_FFT_BUILD
+#   define KISS_FFT_API __declspec(dllexport)
+#  else
+#   define KISS_FFT_API __declspec(dllimport)
+#  endif
+# else
+#  define KISS_FFT_API __attribute__ ((visibility ("default")))
+# endif
+#else
+# define KISS_FFT_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
+  in the tools/ directory.
+*/
+
+/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+# ifndef KISS_FFT_MALLOC
+#  define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
+#  define KISS_FFT_ALIGN_CHECK(ptr) 
+#  define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL)
+# endif
+# ifndef KISS_FFT_FREE
+#  define KISS_FFT_FREE _mm_free
+# endif
+#else
+# define KISS_FFT_ALIGN_CHECK(ptr)
+# define KISS_FFT_ALIGN_SIZE_UP(size) (size)
+# ifndef KISS_FFT_MALLOC
+#  define KISS_FFT_MALLOC malloc
+# endif
+# ifndef KISS_FFT_FREE
+#  define KISS_FFT_FREE free
+# endif
+#endif
+
+
+#ifdef FIXED_POINT
+#include <stdint.h>
+# if (FIXED_POINT == 32)
+#  define kiss_fft_scalar int32_t
+# else	
+#  define kiss_fft_scalar int16_t
+# endif
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/* 
+ *  kiss_fft_alloc
+ *  
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *  
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *  
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg 
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
+
+/*
+ * kiss_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous 
+   buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free KISS_FFT_FREE
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 
+ your compiler output to call this before you exit.
+*/
+void KISS_FFT_API kiss_fft_cleanup(void);
+	
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int KISS_FFT_API kiss_fft_next_fast_size(int n);
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+        (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
+#ifdef __cplusplus
+} 
+#endif
+
+#endif
--- a/Source/Cut5/FFT/kiss_fft_log.h
+++ b/Source/Cut5/FFT/kiss_fft_log.h
@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef kiss_fft_log_h
+#define kiss_fft_log_h
+
+#define ERROR 1
+#define WARNING 2
+#define INFO 3
+#define DEBUG 4
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+#if defined(NDEBUG)
+# define KISS_FFT_LOG_MSG(severity, ...) ((void)0)
+#else
+# define KISS_FFT_LOG_MSG(severity, ...) \
+    fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \
+    fprintf(stderr, __VA_ARGS__); \
+    fprintf(stderr, "\n")
+#endif
+
+#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__)
+#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__)
+#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__)
+#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__)
+
+
+
+#endif /* kiss_fft_log_h */
--- a/Source/Cut5/FFT/kiss_fftnd.c
+++ b/Source/Cut5/FFT/kiss_fftnd.c
@ -0,0 +1,188 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kiss_fftnd.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftnd_state{
+    int dimprod; /* dimsum would be mighty tasty right now */
+    int ndims; 
+    int *dims;
+    kiss_fft_cfg *states; /* cfg states for each dimension */
+    kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */
+};
+
+kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+    KISS_FFT_ALIGN_CHECK(mem)
+
+    kiss_fftnd_cfg st = NULL;
+    int i;
+    int dimprod=1;
+    size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+    char * ptr = NULL;
+
+    for (i=0;i<ndims;++i) {
+        size_t sublen=0;
+        kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
+        memneeded += sublen;   /* st->states[i] */
+        dimprod *= dims[i];
+    }
+    memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/*  st->dims */
+    memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states  */
+    memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */
+
+    if (lenmem == NULL) {/* allocate for the caller*/
+        ptr = (char *) malloc (memneeded);
+    } else { /* initialize supplied buffer if big enough */
+        if (*lenmem >= memneeded)
+            ptr = (char *) mem;
+        *lenmem = memneeded; /*tell caller how big struct is (or would be) */
+    }
+    if (!ptr)
+        return NULL; /*malloc failed or buffer too small */
+
+    st = (kiss_fftnd_cfg) ptr;
+    st->dimprod = dimprod;
+    st->ndims = ndims;
+    ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+
+    st->states = (kiss_fft_cfg *)ptr;
+    ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);
+
+    st->dims = (int*)ptr;
+    ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);
+
+    st->tmpbuf = (kiss_fft_cpx*)ptr;
+    ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod);
+
+    for (i=0;i<ndims;++i) {
+        size_t len;
+        st->dims[i] = dims[i];
+        kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len);
+        st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len);
+        ptr += len;
+    }
+    /*
+Hi there!
+
+If you're looking at this particular code, it probably means you've got a brain-dead bounds checker 
+that thinks the above code overwrites the end of the array.
+
+It doesn't.
+
+-- Mark 
+
+P.S.
+The below code might give you some warm fuzzies and help convince you.
+       */
+    if ( ptr - (char*)st != (int)memneeded ) {
+        fprintf(stderr,
+                "################################################################################\n"
+                "Internal error! Memory allocation miscalculation\n"
+                "################################################################################\n"
+               );
+    }
+    return st;
+}
+
+/*
+ This works by tackling one dimension at a time.
+
+ In effect,
+ Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
+ A Di-sized fft is taken of each column, transposing the matrix as it goes.
+
+Here's a 3-d example:
+Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
+ [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
+   [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
+
+Stage 0 ( D=2): treat the buffer as a 2x12 matrix
+   [ [a b ... k l]
+     [m n ... w x] ]
+
+   FFT each column with size 2.
+   Transpose the matrix at the same time using kiss_fft_stride.
+
+   [ [ a+m a-m ]
+     [ b+n b-n]
+     ...
+     [ k+w k-w ]
+     [ l+x l-x ] ]
+
+   Note fft([x y]) == [x+y x-y]
+
+Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
+   [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] 
+     [ e+q e-q f+r f-r g+s g-s h+t h-t ]
+     [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
+
+   And perform FFTs (size=3) on each of the columns as above, transposing 
+   the matrix as it goes.  The output of stage 1 is 
+       (Legend: ap = [ a+m e+q i+u ]
+                am = [ a-m e-q i-u ] )
+   
+   [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
+     [ sum(am) fft(am)[0] fft(am)[1] ]
+     [ sum(bp) fft(bp)[0] fft(bp)[1] ]
+     [ sum(bm) fft(bm)[0] fft(bm)[1] ]
+     [ sum(cp) fft(cp)[0] fft(cp)[1] ]
+     [ sum(cm) fft(cm)[0] fft(cm)[1] ]
+     [ sum(dp) fft(dp)[0] fft(dp)[1] ]
+     [ sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
+
+Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
+   [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
+     [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
+     [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
+     [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
+
+   Then FFTs each column, transposing as it goes.
+
+   The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
+
+   Note as a sanity check that the first element of the final 
+   stage's output (DC term) is 
+   sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
+   , i.e. the summation of all 24 input elements. 
+
+*/
+void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    int i,k;
+    const kiss_fft_cpx * bufin=fin;
+    kiss_fft_cpx * bufout;
+
+    /*arrange it so the last bufout == fout*/
+    if ( st->ndims & 1 ) {
+        bufout = fout;
+        if (fin==fout) {
+            memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod );
+            bufin = st->tmpbuf;
+        }
+    }else
+        bufout = st->tmpbuf;
+
+    for ( k=0; k < st->ndims; ++k) {
+        int curdim = st->dims[k];
+        int stride = st->dimprod / curdim;
+
+        for ( i=0 ; i<stride ; ++i ) 
+            kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride );
+
+        /*toggle back and forth between the two buffers*/
+        if (bufout == st->tmpbuf){
+            bufout = fout;
+            bufin = st->tmpbuf;
+        }else{
+            bufout = st->tmpbuf;
+            bufin = fout;
+        }
+    }
+}
--- a/Source/Cut5/FFT/kiss_fftnd.h
+++ b/Source/Cut5/FFT/kiss_fftnd.h
@ -0,0 +1,26 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FFTND_H
+#define KISS_FFTND_H
+
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct kiss_fftnd_state * kiss_fftnd_cfg;
+    
+kiss_fftnd_cfg KISS_FFT_API kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+void KISS_FFT_API kiss_fftnd(kiss_fftnd_cfg  cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/Source/Cut5/FFT/kiss_fftndr.c
+++ b/Source/Cut5/FFT/kiss_fftndr.c
@ -0,0 +1,120 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kiss_fftndr.h"
+#include "_kiss_fft_guts.h"
+#define MAX(x,y) ( ( (x)<(y) )?(y):(x) )
+
+struct kiss_fftndr_state
+{
+    int dimReal;
+    int dimOther;
+    kiss_fftr_cfg cfg_r;
+    kiss_fftnd_cfg cfg_nd;
+    void * tmpbuf;
+};
+
+static int prod(const int *dims, int ndims)
+{
+    int x=1;
+    while (ndims--) 
+        x *= *dims++;
+    return x;
+}
+
+kiss_fftndr_cfg kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+    KISS_FFT_ALIGN_CHECK(mem)
+
+    kiss_fftndr_cfg st = NULL;
+    size_t nr=0 , nd=0,ntmp=0;
+    int dimReal = dims[ndims-1];
+    int dimOther = prod(dims,ndims-1);
+    size_t memneeded;
+    char * ptr = NULL;
+
+    (void)kiss_fftr_alloc(dimReal,inverse_fft,NULL,&nr);
+    (void)kiss_fftnd_alloc(dims,ndims-1,inverse_fft,NULL,&nd);
+    ntmp =
+        MAX( 2*dimOther , dimReal+2) * sizeof(kiss_fft_scalar)  // freq buffer for one pass
+        + dimOther*(dimReal+2) * sizeof(kiss_fft_scalar);  // large enough to hold entire input in case of in-place
+
+    memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof( struct kiss_fftndr_state )) + KISS_FFT_ALIGN_SIZE_UP(nr) + KISS_FFT_ALIGN_SIZE_UP(nd) + KISS_FFT_ALIGN_SIZE_UP(ntmp);
+
+    if (lenmem==NULL) {
+        ptr = (char*) malloc(memneeded);
+    }else{
+        if (*lenmem >= memneeded)
+            ptr = (char *)mem;
+        *lenmem = memneeded; 
+    }
+    if (ptr==NULL)
+        return NULL;
+    
+    st = (kiss_fftndr_cfg) ptr;
+    memset( st , 0 , memneeded);
+    ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftndr_state));
+    
+    st->dimReal = dimReal;
+    st->dimOther = dimOther;
+    st->cfg_r = kiss_fftr_alloc( dimReal,inverse_fft,ptr,&nr);
+    ptr += KISS_FFT_ALIGN_SIZE_UP(nr);
+    st->cfg_nd = kiss_fftnd_alloc(dims,ndims-1,inverse_fft, ptr,&nd);
+    ptr += KISS_FFT_ALIGN_SIZE_UP(nd);
+    st->tmpbuf = ptr;
+
+    return st;
+}
+
+void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+    int k1,k2;
+    int dimReal = st->dimReal;
+    int dimOther = st->dimOther;
+    int nrbins = dimReal/2+1;
+
+    kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; 
+    kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+    // timedata is N0 x N1 x ... x Nk real
+
+    // take a real chunk of data, fft it and place the output at correct intervals
+    for (k1=0;k1<dimOther;++k1) {
+        kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points
+        for (k2=0;k2<nrbins;++k2)
+           tmp2[ k2*dimOther+k1 ] = tmp1[k2];
+    }
+
+    for (k2=0;k2<nrbins;++k2) {
+        kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1);  // tmp1 now holds dimOther complex points
+        for (k1=0;k1<dimOther;++k1) 
+            freqdata[ k1*(nrbins) + k2] = tmp1[k1];
+    }
+}
+
+void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+    int k1,k2;
+    int dimReal = st->dimReal;
+    int dimOther = st->dimOther;
+    int nrbins = dimReal/2+1;
+    kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; 
+    kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+    for (k2=0;k2<nrbins;++k2) {
+        for (k1=0;k1<dimOther;++k1) 
+            tmp1[k1] = freqdata[ k1*(nrbins) + k2 ];
+        kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther);
+    }
+
+    for (k1=0;k1<dimOther;++k1) {
+        for (k2=0;k2<nrbins;++k2)
+            tmp1[k2] = tmp2[ k2*dimOther+k1 ];
+        kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal);
+    }
+}
--- a/Source/Cut5/FFT/kiss_fftndr.h
+++ b/Source/Cut5/FFT/kiss_fftndr.h
@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_NDR_H
+#define KISS_NDR_H
+
+#include "kiss_fft.h"
+#include "kiss_fftr.h"
+#include "kiss_fftnd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+typedef struct kiss_fftndr_state *kiss_fftndr_cfg;
+
+
+kiss_fftndr_cfg KISS_FFT_API kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+/*
+ dims[0] must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL 
+*/
+
+
+void KISS_FFT_API kiss_fftndr(
+        kiss_fftndr_cfg cfg,
+        const kiss_fft_scalar *timedata,
+        kiss_fft_cpx *freqdata);
+/*
+ input timedata has dims[0] X dims[1] X ... X  dims[ndims-1] scalar points
+ output freqdata has dims[0] X dims[1] X ... X  dims[ndims-1]/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftndri(
+        kiss_fftndr_cfg cfg,
+        const kiss_fft_cpx *freqdata,
+        kiss_fft_scalar *timedata);
+/*
+ input and output dimensions are the exact opposite of kiss_fftndr
+*/
+
+
+#define kiss_fftndr_free free
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/Source/Cut5/FFT/kiss_fftr.c
+++ b/Source/Cut5/FFT/kiss_fftr.c
@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kiss_fftr.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftr_state{
+    kiss_fft_cfg substate;
+    kiss_fft_cpx * tmpbuf;
+    kiss_fft_cpx * super_twiddles;
+#ifdef USE_SIMD
+    void * pad;
+#endif
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+	KISS_FFT_ALIGN_CHECK(mem)
+
+    int i;
+    kiss_fftr_cfg st = NULL;
+    size_t subsize = 0, memneeded;
+
+    if (nfft & 1) {
+        KISS_FFT_ERROR("Real FFT optimization must be even.");
+        return NULL;
+    }
+    nfft >>= 1;
+
+    kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
+
+    if (lenmem == NULL) {
+        st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
+    } else {
+        if (*lenmem >= memneeded)
+            st = (kiss_fftr_cfg) mem;
+        *lenmem = memneeded;
+    }
+    if (!st)
+        return NULL;
+
+    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+    st->super_twiddles = st->tmpbuf + nfft;
+    kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+    for (i = 0; i < nfft/2; ++i) {
+        double phase =
+            -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
+        if (inverse_fft)
+            phase *= -1;
+        kf_cexp (st->super_twiddles+i,phase);
+    }
+    return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k,ncfft;
+    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+    if ( st->substate->inverse) {
+        KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+        return;/* The caller did not call the correct function */
+    }
+
+    ncfft = st->substate->nfft;
+
+    /*perform the parallel fft of two real signals packed in real,imag*/
+    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+     * contains the sum of the even-numbered elements of the input time sequence
+     * The imag part is the sum of the odd-numbered elements
+     *
+     * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+     *      yielding DC of input time sequence
+     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
+     *      yielding Nyquist bin of input time sequence
+     */
+
+    tdc.r = st->tmpbuf[0].r;
+    tdc.i = st->tmpbuf[0].i;
+    C_FIXDIV(tdc,2);
+    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+    freqdata[0].r = tdc.r + tdc.i;
+    freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD
+    freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+    freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+    for ( k=1;k <= ncfft/2 ; ++k ) {
+        fpk    = st->tmpbuf[k];
+        fpnk.r =   st->tmpbuf[ncfft-k].r;
+        fpnk.i = - st->tmpbuf[ncfft-k].i;
+        C_FIXDIV(fpk,2);
+        C_FIXDIV(fpnk,2);
+
+        C_ADD( f1k, fpk , fpnk );
+        C_SUB( f2k, fpk , fpnk );
+        C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+        freqdata[k].r = HALF_OF(f1k.r + tw.r);
+        freqdata[k].i = HALF_OF(f1k.i + tw.i);
+        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+    }
+}
+
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k, ncfft;
+
+    if (st->substate->inverse == 0) {
+        KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+        return;/* The caller did not call the correct function */
+    }
+
+    ncfft = st->substate->nfft;
+
+    st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+    st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+    C_FIXDIV(st->tmpbuf[0],2);
+
+    for (k = 1; k <= ncfft / 2; ++k) {
+        kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+        fk = freqdata[k];
+        fnkc.r = freqdata[ncfft - k].r;
+        fnkc.i = -freqdata[ncfft - k].i;
+        C_FIXDIV( fk , 2 );
+        C_FIXDIV( fnkc , 2 );
+
+        C_ADD (fek, fk, fnkc);
+        C_SUB (tmp, fk, fnkc);
+        C_MUL (fok, tmp, st->super_twiddles[k-1]);
+        C_ADD (st->tmpbuf[k],     fek, fok);
+        C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD
+        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+        st->tmpbuf[ncfft - k].i *= -1;
+#endif
+    }
+    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
--- a/Source/Cut5/FFT/kiss_fftr.h
+++ b/Source/Cut5/FFT/kiss_fftr.h
@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    
+/* 
+ 
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+ 
+ 
+ */
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+
+kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL 
+*/
+
+
+void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
+/*
+ input freqdata has  nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_free KISS_FFT_FREE
+
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/Source/Cut5/FFT/kissfft.hh
+++ b/Source/Cut5/FFT/kissfft.hh
@ -0,0 +1,361 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISSFFT_CLASS_HH
+#define KISSFFT_CLASS_HH
+#include <complex>
+#include <utility>
+#include <vector>
+
+
+template <typename scalar_t>
+class kissfft
+{
+    public:
+
+        typedef std::complex<scalar_t> cpx_t;
+
+        kissfft( const std::size_t nfft,
+                 const bool inverse )
+            :_nfft(nfft)
+            ,_inverse(inverse)
+        {
+            // fill twiddle factors
+            _twiddles.resize(_nfft);
+            const scalar_t phinc =  (_inverse?2:-2)* std::acos( (scalar_t) -1)  / _nfft;
+            for (std::size_t i=0;i<_nfft;++i)
+                _twiddles[i] = std::exp( cpx_t(0,i*phinc) );
+
+            //factorize
+            //start factoring out 4's, then 2's, then 3,5,7,9,...
+            std::size_t n= _nfft;
+            std::size_t p=4;
+            do {
+                while (n % p) {
+                    switch (p) {
+                        case 4: p = 2; break;
+                        case 2: p = 3; break;
+                        default: p += 2; break;
+                    }
+                    if (p*p>n)
+                        p = n;// no more factors
+                }
+                n /= p;
+                _stageRadix.push_back(p);
+                _stageRemainder.push_back(n);
+            }while(n>1);
+        }
+
+
+        /// Changes the FFT-length and/or the transform direction.
+        ///
+        /// @post The @c kissfft object will be in the same state as if it
+        /// had been newly constructed with the passed arguments.
+        /// However, the implementation may be faster than constructing a
+        /// new fft object.
+        void assign( const std::size_t nfft,
+                     const bool inverse )
+        {
+            if ( nfft != _nfft )
+            {
+                kissfft tmp( nfft, inverse ); // O(n) time.
+                std::swap( tmp, *this ); // this is O(1) in C++11, O(n) otherwise.
+            }
+            else if ( inverse != _inverse )
+            {
+                // conjugate the twiddle factors.
+                for ( typename std::vector<cpx_t>::iterator it = _twiddles.begin();
+                      it != _twiddles.end(); ++it )
+                    it->imag( -it->imag() );
+            }
+        }
+
+        /// Calculates the complex Discrete Fourier Transform.
+        ///
+        /// The size of the passed arrays must be passed in the constructor.
+        /// The sum of the squares of the absolute values in the @c dst
+        /// array will be @c N times the sum of the squares of the absolute
+        /// values in the @c src array, where @c N is the size of the array.
+        /// In other words, the l_2 norm of the resulting array will be
+        /// @c sqrt(N) times as big as the l_2 norm of the input array.
+        /// This is also the case when the inverse flag is set in the
+        /// constructor. Hence when applying the same transform twice, but with
+        /// the inverse flag changed the second time, then the result will
+        /// be equal to the original input times @c N.
+        void transform(const cpx_t * fft_in, cpx_t * fft_out, const std::size_t stage = 0, const std::size_t fstride = 1, const std::size_t in_stride = 1) const
+        {
+            const std::size_t p = _stageRadix[stage];
+            const std::size_t m = _stageRemainder[stage];
+            cpx_t * const Fout_beg = fft_out;
+            cpx_t * const Fout_end = fft_out + p*m;
+
+            if (m==1) {
+                do{
+                    *fft_out = *fft_in;
+                    fft_in += fstride*in_stride;
+                }while(++fft_out != Fout_end );
+            }else{
+                do{
+                    // recursive call:
+                    // DFT of size m*p performed by doing
+                    // p instances of smaller DFTs of size m,
+                    // each one takes a decimated version of the input
+                    transform(fft_in, fft_out, stage+1, fstride*p,in_stride);
+                    fft_in += fstride*in_stride;
+                }while( (fft_out += m) != Fout_end );
+            }
+
+            fft_out=Fout_beg;
+
+            // recombine the p smaller DFTs
+            switch (p) {
+                case 2: kf_bfly2(fft_out,fstride,m); break;
+                case 3: kf_bfly3(fft_out,fstride,m); break;
+                case 4: kf_bfly4(fft_out,fstride,m); break;
+                case 5: kf_bfly5(fft_out,fstride,m); break;
+                default: kf_bfly_generic(fft_out,fstride,m,p); break;
+            }
+        }
+
+        /// Calculates the Discrete Fourier Transform (DFT) of a real input
+        /// of size @c 2*N.
+        ///
+        /// The 0-th and N-th value of the DFT are real numbers. These are
+        /// stored in @c dst[0].real() and @c dst[0].imag() respectively.
+        /// The remaining DFT values up to the index N-1 are stored in
+        /// @c dst[1] to @c dst[N-1].
+        /// The other half of the DFT values can be calculated from the
+        /// symmetry relation
+        ///     @code
+        ///         DFT(src)[2*N-k] == conj( DFT(src)[k] );
+        ///     @endcode
+        /// The same scaling factors as in @c transform() apply.
+        ///
+        /// @note For this to work, the types @c scalar_t and @c cpx_t
+        /// must fulfill the following requirements:
+        ///
+        /// For any object @c z of type @c cpx_t,
+        /// @c reinterpret_cast<scalar_t(&)[2]>(z)[0] is the real part of @c z and
+        /// @c reinterpret_cast<scalar_t(&)[2]>(z)[1] is the imaginary part of @c z.
+        /// For any pointer to an element of an array of @c cpx_t named @c p
+        /// and any valid array index @c i, @c reinterpret_cast<T*>(p)[2*i]
+        /// is the real part of the complex number @c p[i], and
+        /// @c reinterpret_cast<T*>(p)[2*i+1] is the imaginary part of the
+        /// complex number @c p[i].
+        ///
+        /// Since C++11, these requirements are guaranteed to be satisfied for
+        /// @c scalar_ts being @c float, @c double or @c long @c double
+        /// together with @c cpx_t being @c std::complex<scalar_t>.
+        void transform_real( const scalar_t * const src,
+                             cpx_t * const dst ) const
+        {
+            const std::size_t N = _nfft;
+            if ( N == 0 )
+                return;
+
+            // perform complex FFT
+            transform( reinterpret_cast<const cpx_t*>(src), dst );
+
+            // post processing for k = 0 and k = N
+            dst[0] = cpx_t( dst[0].real() + dst[0].imag(),
+                               dst[0].real() - dst[0].imag() );
+
+            // post processing for all the other k = 1, 2, ..., N-1
+            const scalar_t pi = std::acos( (scalar_t) -1);
+            const scalar_t half_phi_inc = ( _inverse ? pi : -pi ) / N;
+            const cpx_t twiddle_mul = std::exp( cpx_t(0, half_phi_inc) );
+            for ( std::size_t k = 1; 2*k < N; ++k )
+            {
+                const cpx_t w = (scalar_t)0.5 * cpx_t(
+                     dst[k].real() + dst[N-k].real(),
+                     dst[k].imag() - dst[N-k].imag() );
+                const cpx_t z = (scalar_t)0.5 * cpx_t(
+                     dst[k].imag() + dst[N-k].imag(),
+                    -dst[k].real() + dst[N-k].real() );
+                const cpx_t twiddle =
+                    k % 2 == 0 ?
+                    _twiddles[k/2] :
+                    _twiddles[k/2] * twiddle_mul;
+                dst[  k] =       w + twiddle * z;
+                dst[N-k] = std::conj( w - twiddle * z );
+            }
+            if ( N % 2 == 0 )
+                dst[N/2] = std::conj( dst[N/2] );
+        }
+
+    private:
+
+        void kf_bfly2( cpx_t * Fout, const size_t fstride, const std::size_t m) const
+        {
+            for (std::size_t k=0;k<m;++k) {
+                const cpx_t t = Fout[m+k] * _twiddles[k*fstride];
+                Fout[m+k] = Fout[k] - t;
+                Fout[k] += t;
+            }
+        }
+
+        void kf_bfly3( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const
+        {
+            std::size_t k=m;
+            const std::size_t m2 = 2*m;
+            const cpx_t *tw1,*tw2;
+            cpx_t scratch[5];
+            const cpx_t epi3 = _twiddles[fstride*m];
+
+            tw1=tw2=&_twiddles[0];
+
+            do{
+                scratch[1] = Fout[m]  * *tw1;
+                scratch[2] = Fout[m2] * *tw2;
+
+                scratch[3] = scratch[1] + scratch[2];
+                scratch[0] = scratch[1] - scratch[2];
+                tw1 += fstride;
+                tw2 += fstride*2;
+
+                Fout[m] = Fout[0] - scratch[3]*scalar_t(0.5);
+                scratch[0] *= epi3.imag();
+
+                Fout[0] += scratch[3];
+
+                Fout[m2] = cpx_t(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+
+                Fout[m] += cpx_t( -scratch[0].imag(),scratch[0].real() );
+                ++Fout;
+            }while(--k);
+        }
+
+        void kf_bfly4( cpx_t * const Fout, const std::size_t fstride, const std::size_t m) const
+        {
+            cpx_t scratch[7];
+            const scalar_t negative_if_inverse = _inverse ? -1 : +1;
+            for (std::size_t k=0;k<m;++k) {
+                scratch[0] = Fout[k+  m] * _twiddles[k*fstride  ];
+                scratch[1] = Fout[k+2*m] * _twiddles[k*fstride*2];
+                scratch[2] = Fout[k+3*m] * _twiddles[k*fstride*3];
+                scratch[5] = Fout[k] - scratch[1];
+
+                Fout[k] += scratch[1];
+                scratch[3] = scratch[0] + scratch[2];
+                scratch[4] = scratch[0] - scratch[2];
+                scratch[4] = cpx_t( scratch[4].imag()*negative_if_inverse ,
+                                      -scratch[4].real()*negative_if_inverse );
+
+                Fout[k+2*m]  = Fout[k] - scratch[3];
+                Fout[k    ]+= scratch[3];
+                Fout[k+  m] = scratch[5] + scratch[4];
+                Fout[k+3*m] = scratch[5] - scratch[4];
+            }
+        }
+
+        void kf_bfly5( cpx_t * const Fout, const std::size_t fstride, const std::size_t m) const
+        {
+            cpx_t *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+            cpx_t scratch[13];
+            const cpx_t ya = _twiddles[fstride*m];
+            const cpx_t yb = _twiddles[fstride*2*m];
+
+            Fout0=Fout;
+            Fout1=Fout0+m;
+            Fout2=Fout0+2*m;
+            Fout3=Fout0+3*m;
+            Fout4=Fout0+4*m;
+
+            for ( std::size_t u=0; u<m; ++u ) {
+                scratch[0] = *Fout0;
+
+                scratch[1] = *Fout1 * _twiddles[  u*fstride];
+                scratch[2] = *Fout2 * _twiddles[2*u*fstride];
+                scratch[3] = *Fout3 * _twiddles[3*u*fstride];
+                scratch[4] = *Fout4 * _twiddles[4*u*fstride];
+
+                scratch[7] = scratch[1] + scratch[4];
+                scratch[10]= scratch[1] - scratch[4];
+                scratch[8] = scratch[2] + scratch[3];
+                scratch[9] = scratch[2] - scratch[3];
+
+                *Fout0 += scratch[7];
+                *Fout0 += scratch[8];
+
+                scratch[5] = scratch[0] + cpx_t(
+                        scratch[7].real()*ya.real() + scratch[8].real()*yb.real(),
+                        scratch[7].imag()*ya.real() + scratch[8].imag()*yb.real()
+                        );
+
+                scratch[6] =  cpx_t(
+                         scratch[10].imag()*ya.imag() + scratch[9].imag()*yb.imag(),
+                        -scratch[10].real()*ya.imag() - scratch[9].real()*yb.imag()
+                        );
+
+                *Fout1 = scratch[5] - scratch[6];
+                *Fout4 = scratch[5] + scratch[6];
+
+                scratch[11] = scratch[0] +
+                    cpx_t(
+                            scratch[7].real()*yb.real() + scratch[8].real()*ya.real(),
+                            scratch[7].imag()*yb.real() + scratch[8].imag()*ya.real()
+                            );
+
+                scratch[12] = cpx_t(
+                        -scratch[10].imag()*yb.imag() + scratch[9].imag()*ya.imag(),
+                         scratch[10].real()*yb.imag() - scratch[9].real()*ya.imag()
+                        );
+
+                *Fout2 = scratch[11] + scratch[12];
+                *Fout3 = scratch[11] - scratch[12];
+
+                ++Fout0;
+                ++Fout1;
+                ++Fout2;
+                ++Fout3;
+                ++Fout4;
+            }
+        }
+
+        /* perform the butterfly for one stage of a mixed radix FFT */
+        void kf_bfly_generic(
+                cpx_t * const Fout,
+                const size_t fstride,
+                const std::size_t m,
+                const std::size_t p
+                ) const
+        {
+            const cpx_t * twiddles = &_twiddles[0];
+
+            if(p > _scratchbuf.size()) _scratchbuf.resize(p);
+
+            for ( std::size_t u=0; u<m; ++u ) {
+                std::size_t k = u;
+                for ( std::size_t q1=0 ; q1<p ; ++q1 ) {
+                    _scratchbuf[q1] = Fout[ k  ];
+                    k += m;
+                }
+
+                k=u;
+                for ( std::size_t q1=0 ; q1<p ; ++q1 ) {
+                    std::size_t twidx=0;
+                    Fout[ k ] = _scratchbuf[0];
+                    for ( std::size_t q=1;q<p;++q ) {
+                        twidx += fstride * k;
+                        if (twidx>=_nfft)
+                          twidx-=_nfft;
+                        Fout[ k ] += _scratchbuf[q] * twiddles[twidx];
+                    }
+                    k += m;
+                }
+            }
+        }
+
+        std::size_t _nfft;
+        bool _inverse;
+        std::vector<cpx_t> _twiddles;
+        std::vector<std::size_t> _stageRadix;
+        std::vector<std::size_t> _stageRemainder;
+        mutable std::vector<cpx_t> _scratchbuf;
+};
+#endif
--- a/Source/Cut5/FFT/kissfft_i32.hh
+++ b/Source/Cut5/FFT/kissfft_i32.hh
@ -0,0 +1,304 @@
+#ifndef KISSFFT_I32_CLASS_HH
+#define KISSFFT_I32_CLASS_HH
+
+#include <complex>
+#include <utility>
+#include <vector>
+
+// TODO1: substitute complex<type> (behaviour not defined for nonfloats), should be faster
+// TODO2: use std:: namespace
+// TODO3: make unittests for all ffts (c, cpp, i32)
+
+template <typename DType>
+struct complex_s
+{
+    DType real;
+    DType imag;
+};
+
+class kissfft_i32
+{
+private:
+
+    using scalar_type = int32_t;
+    using cpx_type    = complex<int32_t>;
+
+    scalar_type _scale_factor;
+    std::size_t _nfft;
+    bool _inverse;
+    std::vector<cpx_type> _twiddles;
+    std::vector<std::size_t> _stageRadix;
+    std::vector<std::size_t> _stageRemainder;
+
+public:
+
+    // scale_factor: upscale twiddle-factors otherwise they lie between 0..1 (out of range for integer) --> fixed point math
+    kissfft_i32(const std::size_t nfft, const bool inverse, const double scale_factor = 1024.0)
+            : _scale_factor(scalar_type(scale_factor)), _nfft(nfft), _inverse(inverse)
+    {
+        // fill twiddle factors
+        _twiddles.resize(_nfft);
+        const double phinc = (_inverse ? 2 : -2) * acos(-1.0) / _nfft;
+        for (std::size_t i = 0; i < _nfft; ++i)
+        {
+            _twiddles[i] = scale_factor * exp(complex<double>(0, i * phinc));
+        }
+        //factorize
+        //start factoring out 4's, then 2's, then 3,5,7,9,...
+        std::size_t n = _nfft;
+        std::size_t p = 4;
+        do
+        {
+            while (n % p)
+            {
+                switch (p)
+                {
+                    case 4:
+                        p = 2;
+                        break;
+                    case 2:
+                        p = 3;
+                        break;
+                    default:
+                        p += 2;
+                        break;
+                }
+                if (p * p > n)  p = n;// no more factors
+            }
+            n /= p;
+            _stageRadix.push_back(p);
+            _stageRemainder.push_back(n);
+        } while (n > 1);
+    }
+
+    /// Calculates the complex Discrete Fourier Transform.
+    ///
+    /// The size of the passed arrays must be passed in the constructor.
+    /// The sum of the squares of the absolute values in the @c dst
+    /// array will be @c N times the sum of the squares of the absolute
+    /// values in the @c src array, where @c N is the size of the array.
+    /// In other words, the l_2 norm of the resulting array will be
+    /// @c sqrt(N) times as big as the l_2 norm of the input array.
+    /// This is also the case when the inverse flag is set in the
+    /// constructor. Hence when applying the same transform twice, but with
+    /// the inverse flag changed the second time, then the result will
+    /// be equal to the original input times @c N.
+    void transform(const cpx_type * FSrc,
+                   cpx_type * FDst,
+                   const std::size_t stage = 0,
+                   const std::size_t fstride = 1,
+                   const std::size_t in_stride = 1) const
+    {
+        const std::size_t p = _stageRadix[stage];
+        const std::size_t m = _stageRemainder[stage];
+        cpx_type *const Fout_beg = FDst;
+        cpx_type *const Fout_end = FDst + p * m;
+
+        if (m == 1)
+        {
+            do
+            {
+                *FDst = *FSrc;
+                FSrc += fstride * in_stride;
+            } while (++FDst != Fout_end);
+        }
+        else
+        {
+            do
+            {
+                // recursive call:
+                // DFT of size m*p performed by doing
+                // p instances of smaller DFTs of size m,
+                // each one takes a decimated version of the input
+                transform(FSrc, FDst, stage + 1, fstride * p, in_stride);
+                FSrc += fstride * in_stride;
+            } while ((FDst += m) != Fout_end);
+        }
+
+        FDst = Fout_beg;
+
+        // recombine the p smaller DFTs
+        switch (p)
+        {
+            case 2:
+                kf_bfly2(FDst, fstride, m);
+                break;
+            case 3:
+                kf_bfly3(FDst, fstride, m);
+                break;
+            case 4:
+                kf_bfly4(FDst, fstride, m);
+                break;
+            case 5:
+                kf_bfly5(FDst, fstride, m);
+                break;
+            default:
+                kf_bfly_generic(FDst, fstride, m, p);
+                break;
+        }
+    }
+
+private:
+
+    void kf_bfly2(cpx_type *const Fout, const size_t fstride, const std::size_t m) const
+    {
+        for (std::size_t k = 0; k < m; ++k)
+        {
+            const cpx_type t = (Fout[m + k] * _twiddles[k * fstride]) / _scale_factor;
+            Fout[m + k] = Fout[k] - t;
+            Fout[k] += t;
+        }
+    }
+
+    void kf_bfly3(cpx_type *Fout, const std::size_t fstride, const std::size_t m) const
+    {
+        std::size_t k = m;
+        const std::size_t m2 = 2 * m;
+        const cpx_type *tw1, *tw2;
+        cpx_type scratch[5];
+        const cpx_type epi3 = _twiddles[fstride * m];
+
+        tw1 = tw2 = &_twiddles[0];
+
+        do
+        {
+            scratch[1] = (Fout[m] * *tw1) / _scale_factor;
+            scratch[2] = (Fout[m2] * *tw2) / _scale_factor;
+
+            scratch[3] = scratch[1] + scratch[2];
+            scratch[0] = scratch[1] - scratch[2];
+            tw1 += fstride;
+            tw2 += fstride * 2;
+
+            Fout[m] = Fout[0] - (scratch[3] / 2);
+            scratch[0] *= epi3.imag();
+            scratch[0] /= _scale_factor;
+
+            Fout[0] += scratch[3];
+
+            Fout[m2] = cpx_type(Fout[m].real() + scratch[0].imag(), Fout[m].imag() - scratch[0].real());
+
+            Fout[m] += cpx_type(-scratch[0].imag(), scratch[0].real());
+            ++Fout;
+        } while (--k);
+    }
+
+    void kf_bfly4(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const
+    {
+        cpx_type scratch[7];
+        const scalar_type negative_if_inverse = _inverse ? -1 : +1;
+
+        for (std::size_t k = 0; k < m; ++k)
+        {
+            scratch[0] = (Fout[k + m] * _twiddles[k * fstride]) / _scale_factor;
+            scratch[1] = (Fout[k + 2 * m] * _twiddles[k * fstride * 2]) / _scale_factor;
+            scratch[2] = (Fout[k + 3 * m] * _twiddles[k * fstride * 3]) / _scale_factor;
+            scratch[5] = Fout[k] - scratch[1];
+
+            Fout[k] += scratch[1];
+            scratch[3] = scratch[0] + scratch[2];
+            scratch[4] = scratch[0] - scratch[2];
+            scratch[4] = cpx_type(scratch[4].imag() * negative_if_inverse,
+                                  -scratch[4].real() * negative_if_inverse);
+
+            Fout[k + 2 * m] = Fout[k] - scratch[3];
+            Fout[k] += scratch[3];
+            Fout[k + m] = scratch[5] + scratch[4];
+            Fout[k + 3 * m] = scratch[5] - scratch[4];
+        }
+    }
+
+    void kf_bfly5(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const
+    {
+        cpx_type *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
+        cpx_type scratch[13];
+        const cpx_type ya = _twiddles[fstride * m];
+        const cpx_type yb = _twiddles[fstride * 2 * m];
+
+        Fout0 = Fout;
+        Fout1 = Fout0 + m;
+        Fout2 = Fout0 + 2 * m;
+        Fout3 = Fout0 + 3 * m;
+        Fout4 = Fout0 + 4 * m;
+
+        for (std::size_t u = 0; u < m; ++u)
+        {
+            scratch[0] = *Fout0;
+
+            scratch[1] = (*Fout1 * _twiddles[u * fstride]) / _scale_factor;
+            scratch[2] = (*Fout2 * _twiddles[2 * u * fstride]) / _scale_factor;
+            scratch[3] = (*Fout3 * _twiddles[3 * u * fstride]) / _scale_factor;
+            scratch[4] = (*Fout4 * _twiddles[4 * u * fstride]) / _scale_factor;
+
+            scratch[7] = scratch[1] + scratch[4];
+            scratch[10] = scratch[1] - scratch[4];
+            scratch[8] = scratch[2] + scratch[3];
+            scratch[9] = scratch[2] - scratch[3];
+
+            *Fout0 += scratch[7];
+            *Fout0 += scratch[8];
+
+            scratch[5] = scratch[0] + (cpx_type(
+                    scratch[7].real() * ya.real() + scratch[8].real() * yb.real(),
+                    scratch[7].imag() * ya.real() + scratch[8].imag() * yb.real() ) / _scale_factor);
+
+            scratch[6] = cpx_type(
+                    scratch[10].imag() * ya.imag() + scratch[9].imag() * yb.imag(),
+                    -scratch[10].real() * ya.imag() - scratch[9].real() * yb.imag() ) / _scale_factor;
+
+            *Fout1 = scratch[5] - scratch[6];
+            *Fout4 = scratch[5] + scratch[6];
+
+            scratch[11] = scratch[0] + (cpx_type(
+                    scratch[7].real() * yb.real() + scratch[8].real() * ya.real(),
+                    scratch[7].imag() * yb.real() + scratch[8].imag() * ya.real() ) / _scale_factor);
+
+            scratch[12] = cpx_type(
+                    -scratch[10].imag() * yb.imag() + scratch[9].imag() * ya.imag(),
+                    scratch[10].real() * yb.imag() - scratch[9].real() * ya.imag() ) / _scale_factor;
+
+            *Fout2 = scratch[11] + scratch[12];
+            *Fout3 = scratch[11] - scratch[12];
+
+            ++Fout0;
+            ++Fout1;
+            ++Fout2;
+            ++Fout3;
+            ++Fout4;
+        }
+    }
+
+    /* perform the butterfly for one stage of a mixed radix FFT */
+    void kf_bfly_generic(cpx_type * const Fout, const size_t fstride, const std::size_t m, const std::size_t p) const
+    {
+        const cpx_type *twiddles = &_twiddles[0];
+        cpx_type scratchbuf[p];
+
+        for (std::size_t u = 0; u < m; ++u)
+        {
+            std::size_t k = u;
+            for (std::size_t q1 = 0; q1 < p; ++q1)
+            {
+                scratchbuf[q1] = Fout[k];
+                k += m;
+            }
+
+            k = u;
+            for (std::size_t q1 = 0; q1 < p; ++q1)
+            {
+                std::size_t twidx = 0;
+                Fout[k] = scratchbuf[0];
+                for (std::size_t q = 1; q < p; ++q)
+                {
+                    twidx += fstride * k;
+                    if (twidx >= _nfft)
+                        twidx -= _nfft;
+                    Fout[k] += (scratchbuf[q] * twiddles[twidx]) / _scale_factor;
+                }
+                k += m;
+            }
+        }
+    }
+};
+
+#endif
--- a/Source/Cut5/Utils/FFMPEGUtils.cpp
+++ b/Source/Cut5/Utils/FFMPEGUtils.cpp
@ -3,6 +3,7 @@
 #include "CanvasTypes.h"
 #include "ImageUtils.h"
 #include "Utils.h"
+#include "Cut5/FFT/kiss_fft.h"
 #include "Engine/Canvas.h"
 #include "Engine/TextureRenderTarget2D.h"
 #include "Kismet/KismetRenderingLibrary.h"
@ -326,11 +327,50 @@ TArray<FSlateBrush> FFFMPEGUtils::GetAudioBrush(FClipData* ClipData)
 {
 	const float TimeLength = (ClipData->ClipEndFrame - ClipData->ClipStartFrame) * FGlobalData::DefaultTimeTickSpace;

-	float MaxValue = 0;
-	float MinValue = 0;
+	const int32 PicLength = TimeLength / 8.0;
+
+	
+	int32 DownSampleSpace = 128;
+	TArray<float> DownSampledData;
+	DownSampledData.SetNumZeroed(ClipData->ResourcePropertyDataPtr->AudioData.Num() / 4 / DownSampleSpace);
 	for (int32 i = 0; i < ClipData->ResourcePropertyDataPtr->AudioData.Num() / 4; i++)
 	{
 		float NewFloat = *reinterpret_cast<float*>(ClipData->ResourcePropertyDataPtr->AudioData.GetData() + (i * 4));
+		if (i % DownSampleSpace == 0)
+		{
+			DownSampledData[i / DownSampleSpace] = NewFloat;
+		}
+	}
+
+	
+	const float FFTSize = DownSampledData.Num();
+	kiss_fft_cfg Cfg = kiss_fft_alloc(FFTSize, 0, nullptr, nullptr);
+
+	TArray<kiss_fft_cpx> KissIn, KissOut;
+	KissIn.SetNumZeroed(FFTSize);
+	KissOut.SetNumZeroed(FFTSize);
+	
+	for (int32 i = 0; i < DownSampledData.Num(); i++)
+	{
+		float NewFloat = DownSampledData[i];
+		KissIn[i].r = NewFloat;
+		KissIn[i].i = 0;
+	}
+
+	kiss_fft(Cfg, KissIn.GetData(), KissOut.GetData());
+
+	TArray<float> Spectrum;
+	Spectrum.SetNumZeroed(FFTSize);
+	for (int32 i = 0; i < DownSampledData.Num(); i++)
+	{
+		Spectrum[i] = sqrt(KissOut[i].r * KissOut[i].r + KissOut[i].i * KissOut[i].i);
+	}
+	
+	float MaxValue = 0;
+	float MinValue = 0;
+	for (int32 i = 0; i < Spectrum.Num(); i++)
+	{
+		float NewFloat = ClipData->ResourcePropertyDataPtr->AudioData[i];

 		if (NewFloat >= MaxValue)
 		{
@ -352,21 +392,12 @@ TArray<FSlateBrush> FFFMPEGUtils::GetAudioBrush(FClipData* ClipData)
 	ClipData->AudioBrushes.Empty();
 	int32 Index = 0;
 	int32 TotalLength = TimeLength;
-	while (TotalLength > 0)
+	float LastPoint = 0.0;
+	for (int32 i = 0; i < TotalLength / PicLength; i++)
 	{
-		int32 CurrentLength = 0;
-		if (TotalLength > 4096)
-		{
-			TotalLength -= 4096;
-			CurrentLength = 4096;
-		}
-		else
-		{
-			CurrentLength = TotalLength;
-			TotalLength = 0;
-		}
+		FLinearColor RenderColor(i * (360.0 / 8.0), 1.0, 1.0);
 		UTextureRenderTarget2D* TextureRenderTarget2D = NewObject<UTextureRenderTarget2D>();
-		TextureRenderTarget2D->InitCustomFormat(int32(CurrentLength), int32(FGlobalData::DefaultTrackHeight), PF_B8G8R8A8, false);
+		TextureRenderTarget2D->InitCustomFormat(PicLength, int32(FGlobalData::DefaultTrackHeight), PF_B8G8R8A8, false);
 		TextureRenderTarget2D->UpdateResourceImmediate();
 		UKismetRenderingLibrary::ClearRenderTarget2D(GWorld->GetWorld(), TextureRenderTarget2D, FLinearColor(0, 0, 0, 0));
 		UCanvas* Canvas;
@ -374,19 +405,16 @@ TArray<FSlateBrush> FFFMPEGUtils::GetAudioBrush(FClipData* ClipData)
 		FDrawToRenderTargetContext RenderTargetContext;
 		UKismetRenderingLibrary::BeginDrawCanvasToRenderTarget(GWorld->GetWorld(), TextureRenderTarget2D, Canvas, Size, RenderTargetContext);
 		
+		int32 StartIndex = (Index * PicLength) / 16;
 		
-		float LastPoint = 0.0;
-		int32 StartIndex = (Index * 4096 + CurrentLength) * 4;
-		
-		for (int32 i = 0; i < CurrentLength; i++)
+		for (int32 j = 0; j < PicLength; j++)
 		{
-			float CurrentData = *reinterpret_cast<float*>(ClipData->ResourcePropertyDataPtr->AudioData.GetData() + StartIndex);
+			float CurrentData = Spectrum[StartIndex];
 			float NormalizedData = Normalize(CurrentData);
-			Canvas->K2_DrawLine(FVector2D(i - 1, LastPoint), FVector2D(i, NormalizedData), 1, FLinearColor::White);
+			
+			Canvas->K2_DrawLine(FVector2D(j - 1, LastPoint), FVector2D(j, NormalizedData), 1, RenderColor.HSVToLinearRGB());
 			LastPoint = NormalizedData;
-
-
-			StartIndex += 4;
+			StartIndex += 1;
 		}
 		
 		
@ -400,11 +428,11 @@ TArray<FSlateBrush> FFFMPEGUtils::GetAudioBrush(FClipData* ClipData)
 		FImageUtils::ExportRenderTarget2DAsPNG(TextureRenderTarget2D, Buffer);
 		FFileHelper::SaveArrayToFile(Buffer, *FPaths::Combine(FUtils::GetTempPath(), ClipData->ClipGuid.ToString(), FString::FromInt(Index) + ".png"));

-		FSlateDynamicImageBrush SlateBrush = FSlateDynamicImageBrush(*FPaths::Combine(FUtils::GetTempPath(), ClipData->ClipGuid.ToString(), FString::FromInt(Index) + ".png"), FVector2D(CurrentLength, FGlobalData::DefaultTrackHeight));
-		ClipData->AudioBrushLength.Add(CurrentLength);
+		FSlateDynamicImageBrush SlateBrush = FSlateDynamicImageBrush(*FPaths::Combine(FUtils::GetTempPath(), ClipData->ClipGuid.ToString(), FString::FromInt(Index) + ".png"), FVector2D(PicLength, FGlobalData::DefaultTrackHeight));
+		ClipData->AudioBrushLength.Add(PicLength);
 		ClipData->AudioBrushes.Add(SlateBrush);
 		Index++;
-	}
+	}	
 	
 	
 	
--- a/Source/Cut5/Widgets/STimelineClip.cpp
+++ b/Source/Cut5/Widgets/STimelineClip.cpp
@ -692,15 +692,19 @@ int32 STimelineClip::OnPaint(const FPaintArgs& Args, const FGeometry& AllottedGe

 	const float XLength = AllottedGeometry.GetLocalSize().X;
 	{
-		int32 i = 0;
-		float RenderPos = 0;
-		for (FSlateBrush& SlateBrush : ClipData->AudioBrushes)
+
+		const float TotalLength = ((ClipData->ClipEndFrame - ClipData->ClipStartFrame) * FGlobalData::DefaultTimeTickSpace);
+		if (ClipData->AudioBrushes.Num() > 0)
 		{
-			FSlateDrawElement::MakeBox(OutDrawElements, LayerId + 2, AllottedGeometry.ToPaintGeometry(FVector2f(ClipData->AudioBrushLength[i], AllottedGeometry.GetLocalSize().Y), FSlateLayoutTransform(FVector2f(RenderPos, 0))), &SlateBrush);
-			
-			RenderPos += ClipData->AudioBrushLength[i];
-			i++;
+			int32 i = 0;
+			const int32 PicLength = TotalLength / ClipData->AudioBrushes.Num();
+			for (FSlateBrush& SlateBrush : ClipData->AudioBrushes)
+			{
+				FSlateDrawElement::MakeBox(OutDrawElements, LayerId + 2, AllottedGeometry.ToPaintGeometry(FVector2f(PicLength, AllottedGeometry.GetLocalSize().Y), FSlateLayoutTransform(FVector2f(i * PicLength, 0))), &SlateBrush);
+				i++;
+			}
 		}
+
 	}