50 changed files with 3869 additions and 142 deletions
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
*.o |
||||
*.swp |
||||
*.so |
||||
*.so.* |
||||
*.a |
||||
*.dylib |
||||
test/testcpp |
||||
test/bm_fftw_double |
||||
test/bm_fftw_float |
||||
test/bm_fftw_int16_t |
||||
test/bm_fftw_int32_t |
||||
test/bm_fftw_simd |
||||
test/bm_kiss_double |
||||
test/bm_kiss_float |
||||
test/bm_kiss_int16_t |
||||
test/bm_kiss_int32_t |
||||
test/bm_kiss_simd |
||||
test/st_double |
||||
test/st_float |
||||
test/st_int16_t |
||||
test/st_int32_t |
||||
test/st_simd |
||||
test/tkfc_double |
||||
test/tkfc_float |
||||
test/tkfc_int16_t |
||||
test/tkfc_int32_t |
||||
test/tkfc_simd |
||||
test/tr_double |
||||
test/tr_float |
||||
test/tr_int16_t |
||||
test/tr_int32_t |
||||
test/tr_simd |
||||
tools/fastconv_double |
||||
tools/fastconv_float |
||||
tools/fastconv_int16_t |
||||
tools/fastconv_int32_t |
||||
tools/fastconv_simd |
||||
tools/fastconvr_double |
||||
tools/fastconvr_float |
||||
tools/fastconvr_int16_t |
||||
tools/fastconvr_int32_t |
||||
tools/fastconvr_simd |
||||
tools/fft_double |
||||
tools/fft_float |
||||
tools/fft_int16_t |
||||
tools/fft_int32_t |
||||
tools/fft_simd |
||||
test/test_simd |
||||
build |
@ -0,0 +1,123 @@
@@ -0,0 +1,123 @@
|
||||
1.3.0 2012-07-18 |
||||
removed non-standard malloc.h from kiss_fft.h |
||||
|
||||
moved -lm to end of link line |
||||
|
||||
checked various return values |
||||
|
||||
converted python Numeric code to NumPy |
||||
|
||||
fixed test of int32_t on 64 bit OS |
||||
|
||||
added padding in a couple of places to allow SIMD alignment of structs |
||||
|
||||
1.2.9 2010-05-27 |
||||
threadsafe ( including OpenMP ) |
||||
|
||||
first edition of kissfft.hh the C++ template fft engine |
||||
|
||||
1.2.8 |
||||
Changed memory.h to string.h -- apparently more standard |
||||
|
||||
Added openmp extensions. This can have fairly linear speedups for larger FFT sizes. |
||||
|
||||
1.2.7 |
||||
Shrank the real-fft memory footprint. Thanks to Galen Seitz. |
||||
|
||||
1.2.6 (Nov 14, 2006) The "thanks to GenArts" release. |
||||
Added multi-dimensional real-optimized FFT, see tools/kiss_fftndr |
||||
Thanks go to GenArts, Inc. for sponsoring the development. |
||||
|
||||
1.2.5 (June 27, 2006) The "release for no good reason" release. |
||||
Changed some harmless code to make some compilers' warnings go away. |
||||
Added some more digits to pi -- why not. |
||||
Added kiss_fft_next_fast_size() function to help people decide how much to pad. |
||||
Changed multidimensional test from 8 dimensions to only 3 to avoid testing |
||||
problems with fixed point (sorry Buckaroo Banzai). |
||||
|
||||
1.2.4 (Oct 27, 2005) The "oops, inverse fixed point real fft was borked" release. |
||||
Fixed scaling bug for inverse fixed point real fft -- also fixed test code that should've been failing. |
||||
Thanks to Jean-Marc Valin for bug report. |
||||
|
||||
Use sys/types.h for more portable types than short,int,long => int16_t,int32_t,int64_t |
||||
If your system does not have these, you may need to define them -- but at least it breaks in a |
||||
loud and easily fixable way -- unlike silently using the wrong size type. |
||||
|
||||
Hopefully tools/psdpng.c is fixed -- thanks to Steve Kellog for pointing out the weirdness. |
||||
|
||||
1.2.3 (June 25, 2005) The "you want to use WHAT as a sample" release. |
||||
Added ability to use 32 bit fixed point samples -- requires a 64 bit intermediate result, a la 'long long' |
||||
|
||||
Added ability to do 4 FFTs in parallel by using SSE SIMD instructions. This is accomplished by |
||||
using the __m128 (vector of 4 floats) as kiss_fft_scalar. Define USE_SIMD to use this. |
||||
|
||||
I know, I know ... this is drifting a bit from the "kiss" principle, but the speed advantages |
||||
make it worth it for some. Also recent gcc makes it SOO easy to use vectors of 4 floats like a POD type. |
||||
|
||||
1.2.2 (May 6, 2005) The Matthew release |
||||
Replaced fixed point division with multiply&shift. Thanks to Jean-Marc Valin for |
||||
discussions regarding. Considerable speedup for fixed-point. |
||||
|
||||
Corrected overflow protection in real fft routines when using fixed point. |
||||
Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug. |
||||
This also led to the CHECK_OVERFLOW_OP macro. |
||||
|
||||
1.2.1 (April 4, 2004) |
||||
compiles cleanly with just about every -W warning flag under the sun |
||||
|
||||
reorganized kiss_fft_state so it could be read-only/const. This may be useful for embedded systems |
||||
that are willing to predeclare twiddle factors, factorization. |
||||
|
||||
Fixed C_MUL,S_MUL on 16-bit platforms. |
||||
|
||||
tmpbuf will only be allocated if input & output buffers are same |
||||
scratchbuf will only be allocated for ffts that are not multiples of 2,3,5 |
||||
|
||||
NOTE: The tmpbuf,scratchbuf changes may require synchronization code for multi-threaded apps. |
||||
|
||||
|
||||
1.2 (Feb 23, 2004) |
||||
interface change -- cfg object is forward declaration of struct instead of void* |
||||
This maintains type saftey and lets the compiler warn/error about stupid mistakes. |
||||
(prompted by suggestion from Erik de Castro Lopo) |
||||
|
||||
small speed improvements |
||||
|
||||
added psdpng.c -- sample utility that will create png spectrum "waterfalls" from an input file |
||||
( not terribly useful yet) |
||||
|
||||
1.1.1 (Feb 1, 2004 ) |
||||
minor bug fix -- only affects odd rank, in-place, multi-dimensional FFTs |
||||
|
||||
1.1 : (Jan 30,2004) |
||||
split sample_code/ into test/ and tools/ |
||||
|
||||
Removed 2-D fft and added N-D fft (arbitrary) |
||||
|
||||
modified fftutil.c to allow multi-d FFTs |
||||
|
||||
Modified core fft routine to allow an input stride via kiss_fft_stride() |
||||
(eased support of multi-D ffts) |
||||
|
||||
Added fast convolution filtering (FIR filtering using overlap-scrap method, with tail scrap) |
||||
|
||||
Add kfc.[ch]: the KISS FFT Cache. It takes care of allocs for you ( suggested by Oscar Lesta ). |
||||
|
||||
1.0.1 (Dec 15, 2003) |
||||
fixed bug that occurred when nfft==1. Thanks to Steven Johnson. |
||||
|
||||
1.0 : (Dec 14, 2003) |
||||
changed kiss_fft function from using a single buffer, to two buffers. |
||||
If the same buffer pointer is supplied for both in and out, kiss will |
||||
manage the buffer copies. |
||||
|
||||
added kiss_fft2d and kiss_fftr as separate source files (declarations in kiss_fft.h ) |
||||
|
||||
0.4 :(Nov 4,2003) optimized for radix 2,3,4,5 |
||||
|
||||
0.3 :(Oct 28, 2003) woops, version 2 didn't actually factor out any radices other than 2. |
||||
Thanks to Steven Johnson for finding this one. |
||||
|
||||
0.2 :(Oct 27, 2003) added mixed radix, only radix 2,4 optimized versions |
||||
|
||||
0.1 :(May 19 2003) initial release, radix 2 only |
@ -0,0 +1,11 @@
@@ -0,0 +1,11 @@
|
||||
Copyright (c) 2003-2010 Mark Borgerding . All rights reserved. |
||||
|
||||
KISS FFT is provided under: |
||||
|
||||
SPDX-License-Identifier: BSD-3-Clause |
||||
|
||||
Being under the terms of the BSD 3-clause "New" or "Revised" License, |
||||
according with: |
||||
|
||||
LICENSES/BSD-3-Clause |
||||
|
@ -0,0 +1,35 @@
@@ -0,0 +1,35 @@
|
||||
Valid-License-Identifier: BSD-3-Clause |
||||
SPDX-URL: https://spdx.org/licenses/BSD-3-Clause.html |
||||
Usage-Guide: |
||||
To use the BSD 3-clause "New" or "Revised" License put the following SPDX |
||||
tag/value pair into a comment according to the placement guidelines in |
||||
the licensing rules documentation: |
||||
SPDX-License-Identifier: BSD-3-Clause |
||||
License-Text: |
||||
|
||||
Copyright (c) <year> <owner> . All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, |
||||
are permitted provided that the following conditions are met: |
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, |
||||
this list of conditions and the following disclaimer. |
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, |
||||
this list of conditions and the following disclaimer in the documentation |
||||
and/or other materials provided with the distribution. |
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors |
||||
may be used to endorse or promote products derived from this software without |
||||
specific prior written permission. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,30 @@
@@ -0,0 +1,30 @@
|
||||
Valid-License-Identifier: Unlicense |
||||
SPDX-URL: https://spdx.org/licenses/Unlicense.html |
||||
Usage-Guide: |
||||
To use the Unlicense put the following SPDX tag/value pair into a |
||||
comment according to the placement guidelines in the licensing rules |
||||
documentation: |
||||
SPDX-License-Identifier: Unlicense |
||||
License-Text: |
||||
|
||||
This is free and unencumbered software released into the public domain. |
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or distribute |
||||
this software, either in source code form or as a compiled binary, for any |
||||
purpose, commercial or non-commercial, and by any means. |
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors of this |
||||
software dedicate any and all copyright interest in the software to the public |
||||
domain. We make this dedication for the benefit of the public at large and |
||||
to the detriment of our heirs and successors. We intend this dedication to be |
||||
an overt act of relinquishment in perpetuity of all present and future rights |
||||
to this software under copyright law. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS |
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH |
||||
THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
|
||||
For more information, please refer to <http://unlicense.org/> |
@ -0,0 +1,323 @@
@@ -0,0 +1,323 @@
|
||||
#
|
||||
# Semantic versioning
|
||||
#
|
||||
# KFVER_MAJOR denotes the ABI version.
|
||||
#
|
||||
# - It must be bumped only if API public members are removed or
|
||||
# changed in the incompatible
|
||||
#
|
||||
# KFVER_MINOR denotes the minor version within a compatible ABI.
|
||||
#
|
||||
# - It should be bumped if new API public members are added
|
||||
# (but not removed!) so programs linked against the same library
|
||||
# version continue operating properly
|
||||
#
|
||||
# KFVER_PATCH denotes bugfix count since the last minor update.
|
||||
#
|
||||
# - It should be bumped whenever a bug fix is pushed.
|
||||
#
|
||||
|
||||
export KFVER_MAJOR = 131 |
||||
export KFVER_MINOR = 1 |
||||
export KFVER_PATCH = 0 |
||||
|
||||
#
|
||||
# Data type (float / int16_t / int32_t / simd)
|
||||
#
|
||||
|
||||
export KISSFFT_DATATYPE ?= float |
||||
|
||||
#
|
||||
# Default options
|
||||
#
|
||||
|
||||
export KISSFFT_OPENMP ?= 0 |
||||
export KISSFFT_STATIC ?= 0 |
||||
export KISSFFT_TOOLS ?= 1 |
||||
export KISSFFT_USE_ALLOCA ?= 0 |
||||
|
||||
#
|
||||
# Installation directories
|
||||
#
|
||||
|
||||
PREFIX ?= /usr/local |
||||
export ABS_PREFIX = $(abspath $(PREFIX)) |
||||
|
||||
BINDIR ?= $(ABS_PREFIX)/bin |
||||
export ABS_BINDIR = $(abspath $(BINDIR)) |
||||
|
||||
INCLUDEDIR ?= $(ABS_PREFIX)/include |
||||
export ABS_INCLUDEDIR = $(abspath $(INCLUDEDIR)) |
||||
export ABS_PKGINCLUDEDIR = $(ABS_INCLUDEDIR)/kissfft |
||||
|
||||
#
|
||||
# Override LIBDIR with lib64 following CMake's
|
||||
# GNUInstallDirs logic:
|
||||
#
|
||||
|
||||
CANDIDATE_LIBDIR_NAME = lib |
||||
|
||||
ifneq ($(MAKECMDGOALS),clean) |
||||
ifeq ($(shell uname -s),Linux) |
||||
_UNAME_ARCH = $(shell uname -i) |
||||
|
||||
ifeq (,$(_UNAME_ARCH)) |
||||
_UNAME_ARCH = $(shell uname -m) |
||||
|
||||
ifeq (,$(_UNAME_ARCH)) |
||||
$(warning WARNING: Can not detect system architecture!) |
||||
endif |
||||
endif |
||||
|
||||
ifeq ($(_UNAME_ARCH),x86_64) |
||||
CANDIDATE_LIBDIR_NAME = lib64 |
||||
endif |
||||
endif |
||||
endif |
||||
|
||||
CANDIDATE_LIBDIR = $(PREFIX)/$(CANDIDATE_LIBDIR_NAME) |
||||
LIBDIR ?= $(CANDIDATE_LIBDIR) |
||||
|
||||
export ABS_LIBDIR = $(abspath $(LIBDIR)) |
||||
|
||||
export INSTALL ?= install |
||||
|
||||
#
|
||||
# Library name and version
|
||||
#
|
||||
|
||||
ifeq ($(KISSFFT_OPENMP), 1) |
||||
KISSFFTLIB_SHORTNAME = kissfft-$(KISSFFT_DATATYPE)-openmp |
||||
KISSFFT_PKGCONFIG = kissfft-$(KISSFFT_DATATYPE)-openmp.pc |
||||
KISSFFTLIB_FLAGS = -fopenmp |
||||
TYPEFLAGS = -fopenmp |
||||
PKGCONFIG_OPENMP = -fopenmp |
||||
else |
||||
KISSFFTLIB_SHORTNAME = kissfft-$(KISSFFT_DATATYPE) |
||||
KISSFFT_PKGCONFIG = kissfft-$(KISSFFT_DATATYPE).pc |
||||
TYPEFLAGS = |
||||
PKGCONFIG_OPENMP = |
||||
endif |
||||
|
||||
ifeq ($(KISSFFT_STATIC), 1) |
||||
KISSFFTLIB_NAME = lib$(KISSFFTLIB_SHORTNAME).a |
||||
KISSFFTLIB_FLAGS += -static |
||||
else ifeq ($(shell uname -s),Darwin) |
||||
KISSFFTLIB_NAME = lib$(KISSFFTLIB_SHORTNAME).dylib |
||||
KISSFFTLIB_FLAGS += -shared -Wl,-install_name,$(KISSFFTLIB_NAME) |
||||
else |
||||
KISSFFTLIB_SODEVELNAME = lib$(KISSFFTLIB_SHORTNAME).so |
||||
KISSFFTLIB_SONAME = $(KISSFFTLIB_SODEVELNAME).$(KFVER_MAJOR) |
||||
KISSFFTLIB_NAME = $(KISSFFTLIB_SONAME).$(KFVER_MINOR).$(KFVER_PATCH) |
||||
KISSFFTLIB_FLAGS += -shared -Wl,-soname,$(KISSFFTLIB_SONAME) |
||||
endif |
||||
|
||||
export KISSFFTLIB_SHORTNAME |
||||
|
||||
#
|
||||
# Compile-time definitions by datatype
|
||||
#
|
||||
#
|
||||
# Note: -DKISS_FFT_BUILD and -DKISS_FFT_SHARED control
|
||||
# C symbol visibility.
|
||||
#
|
||||
|
||||
ifeq "$(KISSFFT_DATATYPE)" "int32_t" |
||||
TYPEFLAGS += -DFIXED_POINT=32 |
||||
else ifeq "$(KISSFFT_DATATYPE)" "int16_t" |
||||
TYPEFLAGS += -DFIXED_POINT=16 |
||||
else ifeq "$(KISSFFT_DATATYPE)" "simd" |
||||
TYPEFLAGS += -DUSE_SIMD=1 -msse |
||||
else ifeq "$(KISSFFT_DATATYPE)" "float" |
||||
TYPEFLAGS += -Dkiss_fft_scalar=$(KISSFFT_DATATYPE) |
||||
else ifeq "$(KISSFFT_DATATYPE)" "double" |
||||
TYPEFLAGS += -Dkiss_fft_scalar=$(KISSFFT_DATATYPE) |
||||
else |
||||
$(error ERROR: KISSFFT_DATATYPE must be one of: float double int16_t int32_t simd) |
||||
endif |
||||
|
||||
ifneq ($(KISSFFT_STATIC), 1) |
||||
TYPEFLAGS += -DKISS_FFT_SHARED |
||||
endif |
||||
|
||||
ifeq ($(KISSFFT_USE_ALLOCA), 1) |
||||
TYPEFLAGS += -DKISS_FFT_USE_ALLOCA=1 |
||||
endif |
||||
|
||||
#
|
||||
# Compile-time definitions
|
||||
#
|
||||
|
||||
#
|
||||
# Save pkgconfig variables before appending
|
||||
# -DKISS_FFT_BUILD to TYPEFLAGS
|
||||
#
|
||||
|
||||
ifneq ($(shell uname -s),Darwin) |
||||
PKGCONFIG_KISSFFT_VERSION = $(KFVER_MAJOR).$(KFVER_MINOR).$(KFVER_PATCH) |
||||
PKGCONFIG_KISSFFT_OUTPUT_NAME = $(KISSFFTLIB_SHORTNAME) |
||||
PKGCONFIG_PKG_KISSFFT_DEFS = $(TYPEFLAGS) |
||||
PKGCONFIG_KISSFFT_PREFIX = $(ABS_PREFIX) |
||||
ifeq ($(ABS_INCLUDEDIR),$(ABS_PREFIX)/include) |
||||
PKGCONFIG_KISSFFT_INCLUDEDIR = $${prefix}/include |
||||
else |
||||
PKGCONFIG_KISSFFT_INCLUDEDIR = $(ABS_INCLUDEDIR) |
||||
|
||||
endif |
||||
ifeq ($(ABS_LIBDIR),$(ABS_PREFIX)/$(CANDIDATE_LIBDIR_NAME)) |
||||
PKGCONFIG_KISSFFT_LIBDIR = $${prefix}/$(CANDIDATE_LIBDIR_NAME) |
||||
else |
||||
PKGCONFIG_KISSFFT_LIBDIR = $(ABS_LIBDIR) |
||||
endif |
||||
PKGCONFIG_KISSFFT_PKGINCLUDEDIR = $${includedir}/kissfft |
||||
endif |
||||
|
||||
export TYPEFLAGS |
||||
|
||||
# Compile .c into .o
|
||||
#
|
||||
|
||||
#
|
||||
# -DKISS_FFT_BUILD is used for library artifacts, so
|
||||
# consumer executable in 'test' and 'tools' do _NOT_
|
||||
# need it. pkg-config output does not need it either.
|
||||
#
|
||||
|
||||
%.c.o: %.c |
||||
$(CC) -Wall -fPIC \
|
||||
-o $@ \
|
||||
$(CFLAGS) $(TYPEFLAGS) -DKISS_FFT_BUILD \
|
||||
-c $< |
||||
|
||||
#
|
||||
# Target: "make all"
|
||||
#
|
||||
|
||||
all: kfc.c.o kiss_fft.c.o kiss_fftnd.c.o kiss_fftndr.c.o kiss_fftr.c.o |
||||
ifneq ($(KISSFFT_STATIC), 1) |
||||
$(CC) $(KISSFFTLIB_FLAGS) -o $(KISSFFTLIB_NAME) $^ |
||||
ifneq ($(shell uname -s),Darwin) |
||||
ln -sf $(KISSFFTLIB_NAME) $(KISSFFTLIB_SONAME) |
||||
ln -sf $(KISSFFTLIB_NAME) $(KISSFFTLIB_SODEVELNAME) |
||||
endif |
||||
else |
||||
$(AR) crus $(KISSFFTLIB_NAME) $^ |
||||
endif |
||||
ifneq ($(KISSFFT_TOOLS), 0) |
||||
make -C tools CFLAGADD="$(CFLAGADD)" all |
||||
endif |
||||
|
||||
#
|
||||
# Target: "make install"
|
||||
#
|
||||
|
||||
install: all |
||||
$(INSTALL) -Dt $(ABS_PKGINCLUDEDIR) -m 644 \
|
||||
kiss_fft.h \
|
||||
kissfft.hh \
|
||||
kiss_fftnd.h \
|
||||
kiss_fftndr.h \
|
||||
kiss_fftr.h |
||||
$(INSTALL) -Dt $(ABS_LIBDIR) -m 644 $(KISSFFTLIB_NAME) |
||||
ifneq ($(KISSFFT_STATIC), 1) |
||||
ifneq ($(shell uname -s),Darwin) |
||||
cd $(LIBDIR) && \
|
||||
ln -sf $(KISSFFTLIB_NAME) $(KISSFFTLIB_SONAME) && \
|
||||
ln -sf $(KISSFFTLIB_NAME) $(KISSFFTLIB_SODEVELNAME) |
||||
endif |
||||
endif |
||||
ifneq ($(shell uname -s),Darwin) |
||||
mkdir "$(ABS_LIBDIR)/pkgconfig" |
||||
sed \
|
||||
-e 's+@PKGCONFIG_KISSFFT_VERSION@+$(PKGCONFIG_KISSFFT_VERSION)+' \
|
||||
-e 's+@KISSFFT_OUTPUT_NAME@+$(PKGCONFIG_KISSFFT_OUTPUT_NAME)+' \
|
||||
-e 's+@PKG_KISSFFT_DEFS@+$(PKGCONFIG_PKG_KISSFFT_DEFS)+' \
|
||||
-e 's+@PKG_OPENMP@+$(PKGCONFIG_OPENMP)+' \
|
||||
-e 's+@PKGCONFIG_KISSFFT_PREFIX@+$(PKGCONFIG_KISSFFT_PREFIX)+' \
|
||||
-e 's+@PKGCONFIG_KISSFFT_INCLUDEDIR@+$(PKGCONFIG_KISSFFT_INCLUDEDIR)+' \
|
||||
-e 's+@PKGCONFIG_KISSFFT_LIBDIR@+$(PKGCONFIG_KISSFFT_LIBDIR)+' \
|
||||
-e 's+@PKGCONFIG_KISSFFT_PKGINCLUDEDIR@+$(PKGCONFIG_KISSFFT_PKGINCLUDEDIR)+' \
|
||||
kissfft.pc.in 1>"$(ABS_LIBDIR)/pkgconfig/$(KISSFFT_PKGCONFIG)" |
||||
endif |
||||
ifneq ($(KISSFFT_TOOLS), 0) |
||||
make -C tools install |
||||
endif |
||||
|
||||
#
|
||||
# Target: "make doc"
|
||||
#
|
||||
|
||||
doc: |
||||
$(warning Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall') |
||||
$(warning but be aware that 'make testall' has dependencies that the basic kissfft software does not.) |
||||
$(warning It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings) |
||||
$(warning of kissfft and would like to make use of its regression tests.) |
||||
|
||||
#
|
||||
# Target: "make testsingle"
|
||||
#
|
||||
|
||||
testsingle: |
||||
make clean |
||||
make all |
||||
make -C test CFLAGADD="$(CFLAGADD)" test testcpp |
||||
|
||||
#
|
||||
# Target: "make testall"
|
||||
#
|
||||
|
||||
testall: |
||||
# Shared libraries |
||||
make KISSFFT_DATATYPE=double testsingle |
||||
make KISSFFT_DATATYPE=float testsingle |
||||
make KISSFFT_DATATYPE=int16_t testsingle |
||||
# The simd and int32_t types may or may not work on your machine |
||||
make KISSFFT_DATATYPE=int32_t testsingle |
||||
make KISSFFT_DATATYPE=simd testsingle |
||||
# Static libraries |
||||
make KISSFFT_DATATYPE=double KISSFFT_STATIC=1 testsingle |
||||
make KISSFFT_DATATYPE=float KISSFFT_STATIC=1 testsingle |
||||
make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 testsingle |
||||
# The simd and int32_t types may or may not work on your machine |
||||
make KISSFFT_DATATYPE=int32_t KISSFFT_STATIC=1 testsingle |
||||
make KISSFFT_DATATYPE=simd KISSFFT_STATIC=1 testsingle |
||||
# OpenMP libraries |
||||
make KISSFFT_DATATYPE=double KISSFFT_OPENMP=1 testsingle |
||||
make KISSFFT_DATATYPE=float KISSFFT_OPENMP=1 testsingle |
||||
make KISSFFT_DATATYPE=int16_t KISSFFT_OPENMP=1 testsingle |
||||
# The simd and int32_t types may or may not work on your machine |
||||
make KISSFFT_DATATYPE=int32_t KISSFFT_OPENMP=1 testsingle |
||||
make KISSFFT_DATATYPE=simd KISSFFT_OPENMP=1 testsingle |
||||
$(warning All tests passed!) |
||||
|
||||
#
|
||||
# Target: "make tarball"
|
||||
#
|
||||
|
||||
tarball: clean |
||||
git archive --prefix=kissfft/ -o kissfft$(KFVER).tar.gz v$(KFVER) |
||||
git archive --prefix=kissfft/ -o kissfft$(KFVER).zip v$(KFVER) |
||||
|
||||
#
|
||||
# Target: "make clean"
|
||||
#
|
||||
|
||||
clean: |
||||
rm -f *.o *.a *.so *.so.* |
||||
cd test && make clean |
||||
cd tools && make clean |
||||
rm -f kiss_fft*.tar.gz *~ *.pyc kiss_fft*.zip |
||||
|
||||
#
|
||||
# Target: "make asm"
|
||||
#
|
||||
|
||||
asm: kiss_fft.s |
||||
|
||||
# TODO: Sort out if we should add kfc / other C headers
|
||||
|
||||
kiss_fft.s: kiss_fft.c kiss_fft.h _kiss_fft_guts.h |
||||
[ -e kiss_fft.s ] && mv kiss_fft.s kiss_fft.s~ || true |
||||
$(CC) -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -unroll-loops -dA -fverbose-asm |
||||
$(CC) -o kiss_fft_short.s -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT |
||||
[ -e kiss_fft.s~ ] && diff kiss_fft.s~ kiss_fft.s || true |
@ -0,0 +1,245 @@
@@ -0,0 +1,245 @@
|
||||
# KISS FFT [](https://travis-ci.com/mborgerding/kissfft) |
||||
|
||||
KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle, |
||||
"Keep It Simple, Stupid." |
||||
|
||||
There are many great fft libraries already around. Kiss FFT is not trying |
||||
to be better than any of them. It only attempts to be a reasonably efficient, |
||||
moderately useful FFT that can use fixed or floating data types and can be |
||||
incorporated into someone's C program in a few minutes with trivial licensing. |
||||
|
||||
## USAGE: |
||||
|
||||
The basic usage for 1-d complex FFT is: |
||||
|
||||
```c |
||||
#include "kiss_fft.h" |
||||
kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 ); |
||||
while ... |
||||
|
||||
... // put kth sample in cx_in[k].r and cx_in[k].i |
||||
|
||||
kiss_fft( cfg , cx_in , cx_out ); |
||||
|
||||
... // transformed. DC is in cx_out[0].r and cx_out[0].i |
||||
|
||||
kiss_fft_free(cfg); |
||||
``` |
||||
- **Note**: frequency-domain data is stored from dc up to 2pi. |
||||
so cx_out[0] is the dc bin of the FFT |
||||
and cx_out[nfft/2] is the Nyquist bin (if exists) |
||||
|
||||
Declarations are in "kiss_fft.h", along with a brief description of the |
||||
functions you'll need to use. |
||||
|
||||
Code definitions for 1d complex FFTs are in kiss_fft.c. |
||||
|
||||
You can do other cool stuff with the extras you'll find in tools/ |
||||
> - multi-dimensional FFTs |
||||
> - real-optimized FFTs (returns the positive half-spectrum: |
||||
(nfft/2+1) complex frequency bins) |
||||
> - fast convolution FIR filtering (not available for fixed point) |
||||
> - spectrum image creation |
||||
|
||||
The core fft and most tools/ code can be compiled to use float, double, |
||||
Q15 short or Q31 samples. The default is float. |
||||
|
||||
## BUILDING: |
||||
|
||||
There are two functionally-equivalent build systems supported by kissfft: |
||||
|
||||
- Make (traditional Makefiles for Unix / Linux systems) |
||||
- CMake (more modern and feature-rich build system developed by Kitware) |
||||
|
||||
To build kissfft, the following build environment can be used: |
||||
|
||||
- GNU build environment with GCC, Clang and GNU Make or CMake (>= 3.6) |
||||
- Microsoft Visual C++ (MSVC) with CMake (>= 3.6) |
||||
|
||||
Additional libraries required to build and test kissfft include: |
||||
|
||||
- libpng for psdpng tool, |
||||
- libfftw3 to validate kissfft results against it, |
||||
- python 2/3 with Numpy to validate kissfft results against it. |
||||
- OpenMP supported by GCC, Clang or MSVC for multi-core FFT transformations |
||||
|
||||
Environments like Cygwin and MinGW can be highly likely used to build kissfft |
||||
targeting Windows platform, but no tests were performed to the date. |
||||
|
||||
Both Make and CMake builds are easily configurable: |
||||
|
||||
- `KISSFFT_DATATYPE=<datatype>` (for Make) or `-DKISSFFT_DATATYPE=<datatype>` |
||||
(for CMake) denote the principal datatype used by kissfft. It can be one |
||||
of the following: |
||||
|
||||
- float (default) |
||||
- double |
||||
- int16_t |
||||
- int32_t |
||||
- SIMD (requires SSE instruction set support on target CPU) |
||||
|
||||
- `KISSFFT_OPENMP=1` (for Make) or `-DKISSFFT_OPENMP=ON` (for CMake) builds kissfft |
||||
with OpenMP support. Please note that a supported compiler is required and this |
||||
option is turned off by default. |
||||
|
||||
- `KISSFFT_STATIC=1` (for Make) or `-DKISSFFT_STATIC=ON` (for CMake) instructs |
||||
the builder to create static library ('.lib' for Windows / '.a' for Unix or Linux). |
||||
By default, this option is turned off and the shared library is created |
||||
('.dll' for Windows, '.so' for Linux or Unix, '.dylib' for Mac OSX) |
||||
|
||||
- `-DKISSFFT_TEST=OFF` (for CMake) disables building tests for kissfft. On Make, |
||||
building tests is done separately by 'make testall' or 'make testsingle', so |
||||
no specific setting is required. |
||||
|
||||
- `KISSFFT_TOOLS=0` (for Make) or `-DKISSFFT_TOOLS=OFF` (for CMake) builds kissfft |
||||
without command-line tools like 'fastconv'. By default the tools are built. |
||||
|
||||
- `KISSFFT_USE_ALLOCA=1` (for Make) or `-DKISSFFT_USE_ALLOCA=ON` (for CMake) |
||||
build kissfft with 'alloca' usage instead of 'malloc' / 'free'. |
||||
|
||||
- `PREFIX=/full/path/to/installation/prefix/directory` (for Make) or |
||||
`-DCMAKE_INSTALL_PREFIX=/full/path/to/installation/prefix/directory` (for CMake) |
||||
specifies the prefix directory to install kissfft into. |
||||
|
||||
For example, to build kissfft as a static library with 'int16_t' datatype and |
||||
OpenMP support using Make, run the command from kissfft source tree: |
||||
|
||||
``` |
||||
make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 all |
||||
``` |
||||
|
||||
The same configuration for CMake is: |
||||
|
||||
``` |
||||
mkdir build && cd build |
||||
cmake -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. |
||||
make all |
||||
``` |
||||
|
||||
To specify '/tmp/1234' as installation prefix directory, run: |
||||
|
||||
|
||||
``` |
||||
make PREFIX=/tmp/1234 KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 install |
||||
``` |
||||
|
||||
or |
||||
|
||||
``` |
||||
mkdir build && cd build |
||||
cmake -DCMAKE_INSTALL_PREFIX=/tmp/1234 -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. |
||||
make all |
||||
make install |
||||
``` |
||||
|
||||
## TESTING: |
||||
|
||||
To validate the build configured as an example above, run the following command from |
||||
kissfft source tree: |
||||
|
||||
``` |
||||
make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 testsingle |
||||
``` |
||||
|
||||
if using Make, or: |
||||
|
||||
``` |
||||
make test |
||||
``` |
||||
|
||||
if using CMake. |
||||
|
||||
To test all possible build configurations, please run an extended testsuite from |
||||
kissfft source tree: |
||||
|
||||
``` |
||||
sh test/kissfft-testsuite.sh |
||||
``` |
||||
|
||||
Please note that the extended testsuite takes around 20-40 minutes depending on device |
||||
it runs on. This testsuite is useful for reporting bugs or testing the pull requests. |
||||
|
||||
## BACKGROUND |
||||
|
||||
I started coding this because I couldn't find a fixed point FFT that didn't |
||||
use assembly code. I started with floating point numbers so I could get the |
||||
theory straight before working on fixed point issues. In the end, I had a |
||||
little bit of code that could be recompiled easily to do ffts with short, float |
||||
or double (other types should be easy too). |
||||
|
||||
Once I got my FFT working, I was curious about the speed compared to |
||||
a well respected and highly optimized fft library. I don't want to criticize |
||||
this great library, so let's call it FFT_BRANDX. |
||||
During this process, I learned: |
||||
|
||||
> 1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d). |
||||
> 2. It took me an embarrassingly long time to get FFT_BRANDX working. |
||||
> 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size). |
||||
> 4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode. |
||||
|
||||
It is wonderful that free, highly optimized libraries like FFT_BRANDX exist. |
||||
But such libraries carry a huge burden of complexity necessary to extract every |
||||
last bit of performance. |
||||
|
||||
**Sometimes simpler is better, even if it's not better.** |
||||
|
||||
## FREQUENTLY ASKED QUESTIONS: |
||||
> Q: Can I use kissfft in a project with a ___ license?</br> |
||||
> A: Yes. See LICENSE below. |
||||
|
||||
> Q: Why don't I get the output I expect?</br> |
||||
> A: The two most common causes of this are |
||||
> 1) scaling : is there a constant multiplier between what you got and what you want? |
||||
> 2) mixed build environment -- all code must be compiled with same preprocessor |
||||
> definitions for FIXED_POINT and kiss_fft_scalar |
||||
|
||||
> Q: Will you write/debug my code for me?</br> |
||||
> A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but |
||||
> I may refer you to a book, a forum, or some other resource. |
||||
|
||||
|
||||
## PERFORMANCE |
||||
(on Athlon XP 2100+, with gcc 2.96, float data type) |
||||
|
||||
Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time. |
||||
For comparison, it took md5sum twice as long to process the same amount of data. |
||||
Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024). |
||||
|
||||
**DO NOT:** |
||||
- use Kiss if you need the Fastest Fourier Transform in the World |
||||
- ask me to add features that will bloat the code |
||||
|
||||
## UNDER THE HOOD |
||||
|
||||
Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer |
||||
and output buffer that are the same, a temporary buffer will be created to hold the data. |
||||
|
||||
No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).[ |
||||
|
||||
No scaling is done for the floating point version (for speed). |
||||
Scaling is done both ways for the fixed-point version (for overflow prevention). |
||||
|
||||
Optimized butterflies are used for factors 2,3,4, and 5. |
||||
|
||||
The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length |
||||
FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is |
||||
nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web. |
||||
|
||||
The fast convolution filtering uses the overlap-scrap method, slightly |
||||
modified to put the scrap at the tail. |
||||
|
||||
## LICENSE |
||||
Revised BSD License, see COPYING for verbiage. |
||||
Basically, "free to use&change, give credit where due, no guarantees" |
||||
Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at |
||||
the other end. See http://www.fsf.org/licensing/licenses |
||||
|
||||
## TODO |
||||
- Add real optimization for odd length FFTs |
||||
- Document/revisit the input/output fft scaling |
||||
- Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c |
||||
- Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others) |
||||
|
||||
## AUTHOR |
||||
Mark Borgerding |
||||
Mark@Borgerding.net |
@ -0,0 +1,78 @@
@@ -0,0 +1,78 @@
|
||||
If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft |
||||
to do 4 *separate* FFTs at once. |
||||
|
||||
Beware! Beyond here there be dragons! |
||||
|
||||
This API is not easy to use, is not well documented, and breaks the KISS principle. |
||||
|
||||
|
||||
Still reading? Okay, you may get rewarded for your patience with a considerable speedup |
||||
(2-3x) on intel x86 machines with SSE if you are willing to jump through some hoops. |
||||
|
||||
The basic idea is to use the packed 4 float __m128 data type as a scalar element. |
||||
This means that the format is pretty convoluted. It performs 4 FFTs per fft call on signals A,B,C,D. |
||||
|
||||
For complex data, the data is interlaced as follows: |
||||
rA0,rB0,rC0,rD0, iA0,iB0,iC0,iD0, rA1,rB1,rC1,rD1, iA1,iB1,iC1,iD1 ... |
||||
where "rA0" is the real part of the zeroth sample for signal A |
||||
|
||||
Real-only data is laid out: |
||||
rA0,rB0,rC0,rD0, rA1,rB1,rC1,rD1, ... |
||||
|
||||
Compile with gcc flags something like |
||||
-O3 -mpreferred-stack-boundary=4 -DUSE_SIMD=1 -msse |
||||
|
||||
Be aware of SIMD alignment. This is the most likely cause of segfaults. |
||||
The code within kissfft uses scratch variables on the stack. |
||||
With SIMD, these must have addresses on 16 byte boundaries. |
||||
Search on "SIMD alignment" for more info. |
||||
|
||||
|
||||
|
||||
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft. |
||||
I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions |
||||
(out of place). |
||||
|
||||
void SSETools::pack128(float* target, float* source, unsigned long size128) |
||||
{ |
||||
__m128* pDest = (__m128*)target; |
||||
__m128* pDestEnd = pDest+size128; |
||||
float* source0=source; |
||||
float* source1=source0+size128; |
||||
float* source2=source1+size128; |
||||
float* source3=source2+size128; |
||||
|
||||
while(pDest<pDestEnd) |
||||
{ |
||||
*pDest=_mm_set_ps(*source3,*source2,*source1,*source0); |
||||
source0++; |
||||
source1++; |
||||
source2++; |
||||
source3++; |
||||
pDest++; |
||||
} |
||||
} |
||||
|
||||
void SSETools::unpack128(float* target, float* source, unsigned long size128) |
||||
{ |
||||
|
||||
float* pSrc = source; |
||||
float* pSrcEnd = pSrc+size128*4; |
||||
float* target0=target; |
||||
float* target1=target0+size128; |
||||
float* target2=target1+size128; |
||||
float* target3=target2+size128; |
||||
|
||||
while(pSrc<pSrcEnd) |
||||
{ |
||||
*target0=pSrc[0]; |
||||
*target1=pSrc[1]; |
||||
*target2=pSrc[2]; |
||||
*target3=pSrc[3]; |
||||
target0++; |
||||
target1++; |
||||
target2++; |
||||
target3++; |
||||
pSrc+=4; |
||||
} |
||||
} |
@ -0,0 +1,39 @@
@@ -0,0 +1,39 @@
|
||||
Speed: |
||||
* If you want to use multiple cores, then compile with -openmp or -fopenmp (see your compiler docs). |
||||
Realize that larger FFTs will reap more benefit than smaller FFTs. This generally uses more CPU time, but |
||||
less wall time. |
||||
|
||||
* experiment with compiler flags |
||||
Special thanks to Oscar Lesta. He suggested some compiler flags |
||||
for gcc that make a big difference. They shave 10-15% off |
||||
execution time on some systems. Try some combination of: |
||||
-march=pentiumpro |
||||
-ffast-math |
||||
-fomit-frame-pointer |
||||
|
||||
* If the input data has no imaginary component, use the kiss_fftr code under tools/. |
||||
Real ffts are roughly twice as fast as complex. |
||||
|
||||
* If you can rearrange your code to do 4 FFTs in parallel and you are on a recent Intel or AMD machine, |
||||
then you might want to experiment with the USE_SIMD code. See README.simd |
||||
|
||||
|
||||
Reducing code size: |
||||
* remove some of the butterflies. There are currently butterflies optimized for radices |
||||
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain |
||||
other factors, they just won't be quite as fast. You can decide for yourself |
||||
whether to keep radix 2 or 4. If you do some work in this area, let me |
||||
know what you find. |
||||
|
||||
* For platforms where ROM/code space is more plentiful than RAM, |
||||
consider creating a hardcoded kiss_fft_state. In other words, decide which |
||||
FFT size(s) you want and make a structure with the correct factors and twiddles. |
||||
|
||||
* Frank van der Hulst offered numerous suggestions for smaller code size and correct operation |
||||
on embedded targets. "I'm happy to help anyone who is trying to implement KISSFFT on a micro" |
||||
|
||||
Some of these were rolled into the mainline code base: |
||||
- using long casts to promote intermediate results of short*short multiplication |
||||
- delaying allocation of buffers that are sometimes unused. |
||||
In some cases, it may be desirable to limit capability in order to better suit the target: |
||||
- predefining the twiddle tables for the desired fft size. |
@ -0,0 +1,167 @@
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. |
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
* |
||||
* SPDX-License-Identifier: BSD-3-Clause |
||||
* See COPYING file for more information. |
||||
*/ |
||||
|
||||
/* kiss_fft.h
|
||||
defines kiss_fft_scalar as either short or a float type |
||||
and defines |
||||
typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ |
||||
|
||||
#ifndef _kiss_fft_guts_h |
||||
#define _kiss_fft_guts_h |
||||
|
||||
#include "kiss_fft.h" |
||||
#include "kiss_fft_log.h" |
||||
#include <limits.h> |
||||
|
||||
#define MAXFACTORS 32 |
||||
/* e.g. an fft of length 128 has 4 factors
|
||||
as far as kissfft is concerned |
||||
4*4*4*2 |
||||
*/ |
||||
|
||||
struct kiss_fft_state{ |
||||
int nfft; |
||||
int inverse; |
||||
int factors[2*MAXFACTORS]; |
||||
kiss_fft_cpx twiddles[1]; |
||||
}; |
||||
|
||||
/*
|
||||
Explanation of macros dealing with complex math: |
||||
|
||||
C_MUL(m,a,b) : m = a*b |
||||
C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise |
||||
C_SUB( res, a,b) : res = a - b |
||||
C_SUBFROM( res , a) : res -= a |
||||
C_ADDTO( res , a) : res += a |
||||
* */ |
||||
#ifdef FIXED_POINT |
||||
#include <stdint.h> |
||||
#if (FIXED_POINT==32) |
||||
# define FRACBITS 31 |
||||
# define SAMPPROD int64_t |
||||
#define SAMP_MAX INT32_MAX |
||||
#define SAMP_MIN INT32_MIN |
||||
#else |
||||
# define FRACBITS 15 |
||||
# define SAMPPROD int32_t |
||||
#define SAMP_MAX INT16_MAX |
||||
#define SAMP_MIN INT16_MIN |
||||
#endif |
||||
|
||||
#if defined(CHECK_OVERFLOW) |
||||
# define CHECK_OVERFLOW_OP(a,op,b) \ |
||||
if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ |
||||
KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); } |
||||
#endif |
||||
|
||||
|
||||
# define smul(a,b) ( (SAMPPROD)(a)*(b) ) |
||||
# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) |
||||
|
||||
# define S_MUL(a,b) sround( smul(a,b) ) |
||||
|
||||
# define C_MUL(m,a,b) \ |
||||
do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ |
||||
(m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) |
||||
|
||||
# define DIVSCALAR(x,k) \ |
||||
(x) = sround( smul( x, SAMP_MAX/k ) ) |
||||
|
||||
# define C_FIXDIV(c,div) \ |
||||
do { DIVSCALAR( (c).r , div); \ |
||||
DIVSCALAR( (c).i , div); }while (0) |
||||
|
||||
# define C_MULBYSCALAR( c, s ) \ |
||||
do{ (c).r = sround( smul( (c).r , s ) ) ;\ |
||||
(c).i = sround( smul( (c).i , s ) ) ; }while(0) |
||||
|
||||
#else /* not FIXED_POINT*/ |
||||
|
||||
# define S_MUL(a,b) ( (a)*(b) ) |
||||
#define C_MUL(m,a,b) \ |
||||
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ |
||||
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0) |
||||
# define C_FIXDIV(c,div) /* NOOP */ |
||||
# define C_MULBYSCALAR( c, s ) \ |
||||
do{ (c).r *= (s);\ |
||||
(c).i *= (s); }while(0) |
||||
#endif |
||||
|
||||
#ifndef CHECK_OVERFLOW_OP |
||||
# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ |
||||
#endif |
||||
|
||||
#define C_ADD( res, a,b)\ |
||||
do { \ |
||||
CHECK_OVERFLOW_OP((a).r,+,(b).r)\ |
||||
CHECK_OVERFLOW_OP((a).i,+,(b).i)\ |
||||
(res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ |
||||
}while(0) |
||||
#define C_SUB( res, a,b)\ |
||||
do { \ |
||||
CHECK_OVERFLOW_OP((a).r,-,(b).r)\ |
||||
CHECK_OVERFLOW_OP((a).i,-,(b).i)\ |
||||
(res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ |
||||
}while(0) |
||||
#define C_ADDTO( res , a)\ |
||||
do { \ |
||||
CHECK_OVERFLOW_OP((res).r,+,(a).r)\ |
||||
CHECK_OVERFLOW_OP((res).i,+,(a).i)\ |
||||
(res).r += (a).r; (res).i += (a).i;\ |
||||
}while(0) |
||||
|
||||
#define C_SUBFROM( res , a)\ |
||||
do {\ |
||||
CHECK_OVERFLOW_OP((res).r,-,(a).r)\ |
||||
CHECK_OVERFLOW_OP((res).i,-,(a).i)\ |
||||
(res).r -= (a).r; (res).i -= (a).i; \ |
||||
}while(0) |
||||
|
||||
|
||||
#ifdef FIXED_POINT |
||||
# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) |
||||
# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) |
||||
# define HALF_OF(x) ((x)>>1) |
||||
#elif defined(USE_SIMD) |
||||
# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) |
||||
# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) |
||||
# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) |
||||
#else |
||||
# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) |
||||
# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) |
||||
# define HALF_OF(x) ((x)*((kiss_fft_scalar).5)) |
||||
#endif |
||||
|
||||
#define kf_cexp(x,phase) \ |
||||
do{ \ |
||||
(x)->r = KISS_FFT_COS(phase);\ |
||||
(x)->i = KISS_FFT_SIN(phase);\ |
||||
}while(0) |
||||
|
||||
|
||||
/* a debugging function */ |
||||
#define pcpx(c)\ |
||||
KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i)) |
||||
|
||||
|
||||
#ifdef KISS_FFT_USE_ALLOCA |
||||
// define this to allow use of alloca instead of malloc for temporary buffers
|
||||
// Temporary buffers are used in two case:
|
||||
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
|
||||
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
|
||||
#include <alloca.h> |
||||
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) |
||||
#define KISS_FFT_TMP_FREE(ptr) |
||||
#else |
||||
#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) |
||||
#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) |
||||
#endif |
||||
|
||||
#endif /* _kiss_fft_guts_h */ |
||||
|
@ -0,0 +1,109 @@
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. |
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
* |
||||
* SPDX-License-Identifier: BSD-3-Clause |
||||
* See COPYING file for more information. |
||||
*/ |
||||
|
||||
#include "kfc.h" |
||||
|
||||
typedef struct cached_fft *kfc_cfg; |
||||
|
||||
struct cached_fft |
||||
{ |
||||
int nfft; |
||||
int inverse; |
||||
kiss_fft_cfg cfg; |
||||
kfc_cfg next; |
||||
}; |
||||
|
||||
static kfc_cfg cache_root=NULL; |
||||
static int ncached=0; |
||||
|
||||
static kiss_fft_cfg find_cached_fft(int nfft,int inverse) |
||||
{ |
||||
size_t len; |
||||
kfc_cfg cur=cache_root; |
||||
kfc_cfg prev=NULL; |
||||
while ( cur ) { |
||||
if ( cur->nfft == nfft && inverse == cur->inverse ) |
||||
break;/*found the right node*/ |
||||
prev = cur; |
||||
cur = prev->next; |
||||
} |
||||
if (cur== NULL) { |
||||
/* no cached node found, need to create a new one*/ |
||||
kiss_fft_alloc(nfft,inverse,0,&len); |
||||
#ifdef USE_SIMD |
||||
int padding = (16-sizeof(struct cached_fft)) & 15; |
||||
// make sure the cfg aligns on a 16 byte boundary
|
||||
len += padding; |
||||
#endif |
||||
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); |
||||
if (cur == NULL) |
||||
return NULL; |
||||
cur->cfg = (kiss_fft_cfg)(cur+1); |
||||
#ifdef USE_SIMD |
||||
cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding); |
||||
#endif |
||||
kiss_fft_alloc(nfft,inverse,cur->cfg,&len); |
||||
cur->nfft=nfft; |
||||
cur->inverse=inverse; |
||||
cur->next = NULL; |
||||
if ( prev ) |
||||
prev->next = cur; |
||||
else |
||||
cache_root = cur; |
||||
++ncached; |
||||
} |
||||
return cur->cfg; |
||||
} |
||||
|
||||
void kfc_cleanup(void) |
||||
{ |
||||
kfc_cfg cur=cache_root; |
||||
kfc_cfg next=NULL; |
||||
while (cur){ |
||||
next = cur->next; |
||||
free(cur); |
||||
cur=next; |
||||
} |
||||
ncached=0; |
||||
cache_root = NULL; |
||||
} |
||||
void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) |
||||
{ |
||||
kiss_fft( find_cached_fft(nfft,0),fin,fout ); |
||||
} |
||||
|
||||
void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) |
||||
{ |
||||
kiss_fft( find_cached_fft(nfft,1),fin,fout ); |
||||
} |
||||
|
||||
#ifdef KFC_TEST |
||||
static void check(int nc) |
||||
{ |
||||
if (ncached != nc) { |
||||
fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached); |
||||
exit(1); |
||||
} |
||||
} |
||||
|
||||
int main(void) |
||||
{ |
||||
kiss_fft_cpx buf1[1024],buf2[1024]; |
||||
memset(buf1,0,sizeof(buf1)); |
||||
check(0); |
||||
kfc_fft(512,buf1,buf2); |
||||
check(1); |
||||
kfc_fft(512,buf1,buf2); |
||||
check(1); |
||||
kfc_ifft(512,buf1,buf2); |
||||
check(2); |
||||
kfc_cleanup(); |
||||
check(0); |
||||
return 0; |
||||
} |
||||
#endif |
@ -0,0 +1,54 @@
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. |
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
* |
||||
* SPDX-License-Identifier: BSD-3-Clause |
||||
* See COPYING file for more information. |
||||
*/ |
||||
|
||||
#ifndef KFC_H |
||||
#define KFC_H |
||||
#include "kiss_fft.h" |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
/*
|
||||
KFC -- Kiss FFT Cache |
||||
|
||||
Not needing to deal with kiss_fft_alloc and a config |
||||
object may be handy for a lot of programs. |
||||
|
||||
KFC uses the underlying KISS FFT functions, but caches the config object. |
||||
The first time kfc_fft or kfc_ifft for a given FFT size, the cfg |
||||
object is created for it. All subsequent calls use the cached |
||||
configuration object. |
||||
|
||||
NOTE: |
||||
You should probably not use this if your program will be using a lot |
||||
of various sizes of FFTs. There is a linear search through the |
||||
cached objects. If you are only using one or two FFT sizes, this |
||||
will be negligible. Otherwise, you may want to use another method |
||||
of managing the cfg objects. |
||||
|
||||
There is no automated cleanup of the cached objects. This could lead |
||||
to large memory usage in a program that uses a lot of *DIFFERENT* |
||||
sized FFTs. If you want to force all cached cfg objects to be freed, |
||||
call kfc_cleanup. |
||||
|
||||
*/ |
||||
|
||||
/*forward complex FFT */ |
||||
void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); |
||||
/*reverse complex FFT */ |
||||
void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); |
||||
|
||||
/*free all cached objects*/ |
||||
void KISS_FFT_API kfc_cleanup(void); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,420 @@
@@ -0,0 +1,420 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. |
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
* |
||||
* SPDX-License-Identifier: BSD-3-Clause |
||||
* See COPYING file for more information. |
||||
*/ |
||||
|
||||
|
||||
#include "_kiss_fft_guts.h" |
||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||
fixed or floating point complex numbers. It also delares the kf_ internal functions. |
||||
*/ |
||||
|
||||
static void kf_bfly2( |
||||
kiss_fft_cpx * Fout, |
||||
const size_t fstride, |
||||
const kiss_fft_cfg st, |
||||
int m |
||||
) |
||||
{ |
||||
kiss_fft_cpx * Fout2; |
||||
kiss_fft_cpx * tw1 = st->twiddles; |
||||
kiss_fft_cpx t; |
||||
Fout2 = Fout + m; |
||||
do{ |
||||
C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); |
||||
|
||||
C_MUL (t, *Fout2 , *tw1); |
||||
tw1 += fstride; |
||||
C_SUB( *Fout2 , *Fout , t ); |
||||
C_ADDTO( *Fout , t ); |
||||
++Fout2; |
||||
++Fout; |
||||
}while (--m); |
||||
} |
||||
|
||||
static void kf_bfly4( |
||||
kiss_fft_cpx * Fout, |
||||
const size_t fstride, |
||||
const kiss_fft_cfg st, |
||||
const size_t m |
||||
) |
||||
{ |
||||
kiss_fft_cpx *tw1,*tw2,*tw3; |
||||
kiss_fft_cpx scratch[6]; |
||||
size_t k=m; |
||||
const size_t m2=2*m; |
||||
const size_t m3=3*m; |
||||
|
||||
|
||||
tw3 = tw2 = tw1 = st->twiddles; |
||||
|
||||
do { |
||||
C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); |
||||
|
||||
C_MUL(scratch[0],Fout[m] , *tw1 ); |
||||
C_MUL(scratch[1],Fout[m2] , *tw2 ); |
||||
C_MUL(scratch[2],Fout[m3] , *tw3 ); |
||||
|
||||
C_SUB( scratch[5] , *Fout, scratch[1] ); |
||||
C_ADDTO(*Fout, scratch[1]); |
||||
C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
||||
C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
||||
C_SUB( Fout[m2], *Fout, scratch[3] ); |
||||
tw1 += fstride; |
||||
tw2 += fstride*2; |
||||
tw3 += fstride*3; |
||||
C_ADDTO( *Fout , scratch[3] ); |
||||
|
||||
if(st->inverse) { |
||||
Fout[m].r = scratch[5].r - scratch[4].i; |
||||
Fout[m].i = scratch[5].i + scratch[4].r; |
||||
Fout[m3].r = scratch[5].r + scratch[4].i; |
||||
Fout[m3].i = scratch[5].i - scratch[4].r; |
||||
}else{ |
||||
Fout[m].r = scratch[5].r + scratch[4].i; |
||||
Fout[m].i = scratch[5].i - scratch[4].r; |
||||
Fout[m3].r = scratch[5].r - scratch[4].i; |
||||
Fout[m3].i = scratch[5].i + scratch[4].r; |
||||
} |
||||
++Fout; |
||||
}while(--k); |
||||
} |
||||
|
||||
static void kf_bfly3( |
||||
kiss_fft_cpx * Fout, |
||||
const size_t fstride, |
||||
const kiss_fft_cfg st, |
||||
size_t m |
||||
) |
||||
{ |
||||
size_t k=m; |
||||
const size_t m2 = 2*m; |
||||
kiss_fft_cpx *tw1,*tw2; |
||||
kiss_fft_cpx scratch[5]; |
||||
kiss_fft_cpx epi3; |
||||
epi3 = st->twiddles[fstride*m]; |
||||
|
||||
tw1=tw2=st->twiddles; |
||||
|
||||
do{ |
||||
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); |
||||
|
||||
C_MUL(scratch[1],Fout[m] , *tw1); |
||||
C_MUL(scratch[2],Fout[m2] , *tw2); |
||||
|
||||
C_ADD(scratch[3],scratch[1],scratch[2]); |
||||
C_SUB(scratch[0],scratch[1],scratch[2]); |
||||
tw1 += fstride; |
||||
tw2 += fstride*2; |
||||
|
||||
Fout[m].r = Fout->r - HALF_OF(scratch[3].r); |
||||
Fout[m].i = Fout->i - HALF_OF(scratch[3].i); |
||||
|
||||
C_MULBYSCALAR( scratch[0] , epi3.i ); |
||||
|
||||
C_ADDTO(*Fout,scratch[3]); |
||||
|
||||
Fout[m2].r = Fout[m].r + scratch[0].i; |
||||
Fout[m2].i = Fout[m].i - scratch[0].r; |
||||
|
||||
Fout[m].r -= scratch[0].i; |
||||
Fout[m].i += scratch[0].r; |
||||
|
||||
++Fout; |
||||
}while(--k); |
||||
} |
||||
|
||||
static void kf_bfly5( |
||||
kiss_fft_cpx * Fout, |
||||
const size_t fstride, |
||||
const kiss_fft_cfg st, |
||||
int m |
||||
) |
||||
{ |
||||
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
||||
int u; |
||||
kiss_fft_cpx scratch[13]; |
||||
kiss_fft_cpx * twiddles = st->twiddles; |
||||
kiss_fft_cpx *tw; |
||||
kiss_fft_cpx ya,yb; |
||||
ya = twiddles[fstride*m]; |
||||
yb = twiddles[fstride*2*m]; |
||||
|
||||
Fout0=Fout; |
||||
Fout1=Fout0+m; |
||||
Fout2=Fout0+2*m; |
||||
Fout3=Fout0+3*m; |
||||
Fout4=Fout0+4*m; |
||||
|
||||
tw=st->twiddles; |
||||
for ( u=0; u<m; ++u ) { |
||||
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); |
||||
scratch[0] = *Fout0; |
||||
|
||||
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
||||
C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
||||
C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
||||
C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
||||
|
||||
C_ADD( scratch[7],scratch[1],scratch[4]); |
||||
C_SUB( scratch[10],scratch[1],scratch[4]); |
||||
C_ADD( scratch[8],scratch[2],scratch[3]); |
||||
C_SUB( scratch[9],scratch[2],scratch[3]); |
||||
|
||||
Fout0->r += scratch[7].r + scratch[8].r; |
||||
Fout0->i += scratch[7].i + scratch[8].i; |
||||
|
||||
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); |
||||
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); |
||||
|
||||
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); |
||||
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); |
||||
|
||||
C_SUB(*Fout1,scratch[5],scratch[6]); |
||||
C_ADD(*Fout4,scratch[5],scratch[6]); |
||||
|
||||
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); |
||||
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); |
||||
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); |
||||
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); |
||||
|
||||
C_ADD(*Fout2,scratch[11],scratch[12]); |
||||
C_SUB(*Fout3,scratch[11],scratch[12]); |
||||
|
||||
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
||||
} |
||||
} |
||||
|
||||
/* perform the butterfly for one stage of a mixed radix FFT */ |
||||
static void kf_bfly_generic( |
||||
kiss_fft_cpx * Fout, |
||||
const size_t fstride, |
||||
const kiss_fft_cfg st, |
||||
int m, |
||||
int p |
||||
) |
||||
{ |
||||
int u,k,q1,q; |
||||
kiss_fft_cpx * twiddles = st->twiddles; |
||||
kiss_fft_cpx t; |
||||
int Norig = st->nfft; |
||||
|
||||
kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); |
||||
if (scratch == NULL){ |
||||
KISS_FFT_ERROR("Memory allocation failed."); |
||||
return; |
||||
} |
||||
|
||||
for ( u=0; u<m; ++u ) { |
||||
k=u; |
||||
for ( q1=0 ; q1<p ; ++q1 ) { |
||||
scratch[q1] = Fout[ k ]; |
||||
C_FIXDIV(scratch[q1],p); |
||||
k += m; |
||||
} |
||||
|
||||
k=u; |
||||
for ( q1=0 ; q1<p ; ++q1 ) { |
||||
int twidx=0; |
||||
Fout[ k ] = scratch[0]; |
||||
for (q=1;q<p;++q ) { |
||||
twidx += fstride * k; |
||||
if (twidx>=Norig) twidx-=Norig; |
||||
C_MUL(t,scratch[q] , twiddles[twidx] ); |
||||
C_ADDTO( Fout[ k ] ,t); |
||||
} |
||||
k += m; |
||||
} |
||||
} |
||||
KISS_FFT_TMP_FREE(scratch); |
||||
} |
||||
|
||||
static |
||||
void kf_work( |
||||
kiss_fft_cpx * Fout, |
||||
const kiss_fft_cpx * f, |
||||
const size_t fstride, |
||||
int in_stride, |
||||
int * factors, |
||||
const kiss_fft_cfg st |
||||
) |
||||
{ |
||||
kiss_fft_cpx * Fout_beg=Fout; |
||||
const int p=*factors++; /* the radix */ |
||||
const int m=*factors++; /* stage's fft length/p */ |
||||
const kiss_fft_cpx * Fout_end = Fout + p*m; |
||||
|
||||
#ifdef _OPENMP |
||||
// use openmp extensions at the
|
||||
// top-level (not recursive)
|
||||
if (fstride==1 && p<=5 && m!=1) |
||||
{ |
||||
int k; |
||||
|
||||
// execute the p different work units in different threads
|
||||
# pragma omp parallel for |
||||
for (k=0;k<p;++k) |
||||
kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); |
||||
// all threads have joined by this point
|
||||
|
||||
switch (p) { |
||||
case 2: kf_bfly2(Fout,fstride,st,m); break; |
||||
case 3: kf_bfly3(Fout,fstride,st,m); break; |
||||
case 4: kf_bfly4(Fout,fstride,st,m); break; |
||||
case 5: kf_bfly5(Fout,fstride,st,m); break; |