// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. 
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_FFT_H
#define EIGEN_FFT_H

#include <complex>
#include <vector>
#include <map>
#include <Eigen/Core>


/**
  * \defgroup FFT_Module Fast Fourier Transform module
  *
  * \code
  * #include <unsupported/Eigen/FFT>
  * \endcode
  *
  * This module provides Fast Fourier transformation, with a configurable backend
  * implementation.
  *
  * The default implementation is based on kissfft. It is a small, free, and
  * reasonably efficient default.
  *
  * There are currently two implementation backend:
  *
  * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
  * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form.
  *
  * \section FFTDesign Design
  *
  * The following design decisions were made concerning scaling and
  * half-spectrum for real FFT.
  *
  * The intent is to facilitate generic programming and ease migrating code
  * from  Matlab/octave.
  * We think the default behavior of Eigen/FFT should favor correctness and
  * generality over speed. Of course, the caller should be able to "opt-out" from this
  * behavior and get the speed increase if they want it.
  *
  * 1) %Scaling:
  * Other libraries (FFTW,IMKL,KISSFFT)  do not perform scaling, so there
  * is a constant gain incurred after the forward&inverse transforms , so 
  * IFFT(FFT(x)) = Kx;  this is done to avoid a vector-by-value multiply.  
  * The downside is that algorithms that worked correctly in Matlab/octave 
  * don't behave the same way once implemented in C++.
  *
  * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x. 
  *
  * 2) Real FFT half-spectrum
  * Other libraries use only half the frequency spectrum (plus one extra 
  * sample for the Nyquist bin) for a real FFT, the other half is the 
  * conjugate-symmetric of the first half.  This saves them a copy and some 
  * memory.  The downside is the caller needs to have special logic for the 
  * number of bins in complex vs real.
  *
  * How Eigen/FFT differs: The full spectrum is returned from the forward 
  * transform.  This facilitates generic template programming by obviating 
  * separate specializations for real vs complex.  On the inverse
  * transform, only half the spectrum is actually used if the output type is real.
  */


#ifdef EIGEN_FFTW_DEFAULT
// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
#  include <fftw3.h>
#  include "src/FFT/ei_fftw_impl.h"
   namespace Eigen {
     //template <typename T> typedef struct internal::fftw_impl  default_fft_impl; this does not work
     template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
   }
#elif defined EIGEN_MKL_DEFAULT
// TODO
// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
#  include "src/FFT/ei_imklfft_impl.h"
   namespace Eigen {
     template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
   }
#else
// internal::kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
//
# include "src/FFT/ei_kissfft_impl.h"
namespace Eigen {
template<typename T>
struct default_fft_impl: public internal::kissfft_impl<T> { };
}
#endif

namespace Eigen {


// 
template<typename T_SrcMat, typename T_FftIfc>
struct fft_fwd_proxy;
template<typename T_SrcMat, typename T_FftIfc>
struct fft_inv_proxy;

namespace internal {
template<typename T_SrcMat, typename T_FftIfc>
struct traits<fft_fwd_proxy<T_SrcMat, T_FftIfc> > {
  typedef typename T_SrcMat::PlainObject ReturnType;
};
template<typename T_SrcMat, typename T_FftIfc>
struct traits<fft_inv_proxy<T_SrcMat, T_FftIfc> > {
  typedef typename T_SrcMat::PlainObject ReturnType;
};
}

template<typename T_SrcMat, typename T_FftIfc>
struct fft_fwd_proxy
    : public ReturnByValue<fft_fwd_proxy<T_SrcMat, T_FftIfc> > {
  typedef DenseIndex Index;

  fft_fwd_proxy(const T_SrcMat &src, T_FftIfc &fft, Index nfft) : m_src(src), m_ifc(fft), m_nfft(nfft) { }

  template<typename T_DestMat>
  void evalTo(T_DestMat &dst) const;

  Index rows() const { return m_src.rows(); }
  Index cols() const { return m_src.cols(); }
 protected:
  const T_SrcMat &m_src;
  T_FftIfc &m_ifc;
  Index m_nfft;
 private:
  fft_fwd_proxy &operator=(const fft_fwd_proxy &);
};

template<typename T_SrcMat, typename T_FftIfc>
struct fft_inv_proxy
    : public ReturnByValue<fft_inv_proxy<T_SrcMat, T_FftIfc> > {
  typedef DenseIndex Index;

  fft_inv_proxy(const T_SrcMat &src, T_FftIfc &fft, Index nfft) : m_src(src), m_ifc(fft), m_nfft(nfft) { }

  template<typename T_DestMat>
  void evalTo(T_DestMat &dst) const;

  Index rows() const { return m_src.rows(); }
  Index cols() const { return m_src.cols(); }
 protected:
  const T_SrcMat &m_src;
  T_FftIfc &m_ifc;
  Index m_nfft;
 private:
  fft_inv_proxy &operator=(const fft_inv_proxy &);
};


template<typename T_Scalar,
    typename T_Impl=default_fft_impl<T_Scalar> >
class FFT {
 public:
  typedef T_Impl impl_type;
  typedef DenseIndex Index;
  typedef typename impl_type::Scalar Scalar;
  typedef typename impl_type::Complex Complex;

  enum Flag {
    Default = 0, // goof proof
    Unscaled = 1,
    HalfSpectrum = 2,
    // SomeOtherSpeedOptimization=4
        Speedy = 32767
  };

  FFT(const impl_type &impl = impl_type(), Flag flags = Default) : m_impl(impl), m_flag(flags) { }

  inline
  bool HasFlag(Flag f) const { return (m_flag & (int) f) == f; }

  inline
  void SetFlag(Flag f) { m_flag |= (int) f; }

  inline
  void ClearFlag(Flag f) { m_flag &= (~(int) f); }

  inline
  void fwd(Complex *dst, const Scalar *src, Index nfft) {
    m_impl.fwd(dst, src, static_cast<int>(nfft));
    if (HasFlag(HalfSpectrum) == false)
      ReflectSpectrum(dst, nfft);
  }

  inline
  void fwd(Complex *dst, const Complex *src, Index nfft) {
    m_impl.fwd(dst, src, static_cast<int>(nfft));
  }

  /*
  inline
  void fwd2(Complex * dst, const Complex * src, int n0,int n1)
  {
    m_impl.fwd2(dst,src,n0,n1);
  }
  */

  template<typename _Input>
  inline
  void fwd(std::vector<Complex> &dst, const std::vector<_Input> &src) {
    if (NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum))
      dst.resize((src.size() >> 1) + 1); // half the bins + Nyquist bin
    else
      dst.resize(src.size());
    fwd(&dst[0], &src[0], src.size());
  }

  template<typename InputDerived, typename ComplexDerived>
  inline
  void fwd(MatrixBase<ComplexDerived> &dst, const MatrixBase<InputDerived> &src, Index nfft = -1) {
    typedef typename ComplexDerived::Scalar dst_type;
    typedef typename InputDerived::Scalar src_type;
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived, InputDerived) // size at compile-time
    EIGEN_STATIC_ASSERT((internal::is_same<dst_type, Complex>::value),
                        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    EIGEN_STATIC_ASSERT(int(InputDerived::Flags)
                            &int(ComplexDerived::Flags) & DirectAccessBit,
                        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)

    if (nfft < 1)
      nfft = src.size();

    if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum))
      dst.derived().resize((nfft >> 1) + 1);
    else
      dst.derived().resize(nfft);

    if (src.innerStride() != 1 || src.size() < nfft) {
      Matrix<src_type, 1, Dynamic> tmp;
      if (src.size() < nfft) {
        tmp.setZero(nfft);
        tmp.block(0, 0, src.size(), 1) = src;
      } else {
        tmp = src;
      }
      fwd(&dst[0], &tmp[0], nfft);
    } else {
      fwd(&dst[0], &src[0], nfft);
    }
  }

  template<typename InputDerived>
  inline
  fft_fwd_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >
  fwd(const MatrixBase<InputDerived> &src, Index nfft = -1) {
    return fft_fwd_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >(src, *this, nfft);
  }

  template<typename InputDerived>
  inline
  fft_inv_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >
  inv(const MatrixBase<InputDerived> &src, Index nfft = -1) {
    return fft_inv_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >(src, *this, nfft);
  }

  inline
  void inv(Complex *dst, const Complex *src, Index nfft) {
    m_impl.inv(dst, src, static_cast<int>(nfft));
    if (HasFlag(Unscaled) == false)
      scale(dst, Scalar(1. / nfft), nfft); // scale the time series
  }

  inline
  void inv(Scalar *dst, const Complex *src, Index nfft) {
    m_impl.inv(dst, src, static_cast<int>(nfft));
    if (HasFlag(Unscaled) == false)
      scale(dst, Scalar(1. / nfft), nfft); // scale the time series
  }

  template<typename OutputDerived, typename ComplexDerived>
  inline
  void inv(MatrixBase<OutputDerived> &dst, const MatrixBase<ComplexDerived> &src, Index nfft = -1) {
    typedef typename ComplexDerived::Scalar src_type;
    typedef typename OutputDerived::Scalar dst_type;
    const bool realfft = (NumTraits<dst_type>::IsComplex == 0);
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived, OutputDerived) // size at compile-time
    EIGEN_STATIC_ASSERT((internal::is_same<src_type, Complex>::value),
                        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)
                            &int(ComplexDerived::Flags) & DirectAccessBit,
                        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)

    if (nfft < 1) { //automatic FFT size determination
      if (realfft && HasFlag(HalfSpectrum))
        nfft = 2 * (src.size() - 1); //assume even fft size
      else
        nfft = src.size();
    }
    dst.derived().resize(nfft);

    // check for nfft that does not fit the input data size
    Index resize_input = (realfft && HasFlag(HalfSpectrum))
                         ? ((nfft / 2 + 1) - src.size())
                         : (nfft - src.size());

    if (src.innerStride() != 1 || resize_input) {
      // if the vector is strided, then we need to copy it to a packed temporary
      Matrix<src_type, 1, Dynamic> tmp;
      if (resize_input) {
        size_t ncopy = (std::min)(src.size(), src.size() + resize_input);
        tmp.setZero(src.size() + resize_input);
        if (realfft && HasFlag(HalfSpectrum)) {
          // pad at the Nyquist bin
          tmp.head(ncopy) = src.head(ncopy);
          tmp(ncopy - 1) = real(tmp(ncopy - 1)); // enforce real-only Nyquist bin
        } else {
          size_t nhead, ntail;
          nhead = 1 + ncopy / 2 - 1; // range  [0:pi)
          ntail = ncopy / 2 - 1;   // range (-pi:0)
          tmp.head(nhead) = src.head(nhead);
          tmp.tail(ntail) = src.tail(ntail);
          if (resize_input < 0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
            tmp(nhead) = (src(nfft / 2) + src(src.size() - nfft / 2)) * src_type(.5);
          } else { // expanding -- split the old Nyquist bin into two halves
            tmp(nhead) = src(nhead) * src_type(.5);
            tmp(tmp.size() - nhead) = tmp(nhead);
          }
        }
      } else {
        tmp = src;
      }
      inv(&dst[0], &tmp[0], nfft);
    } else {
      inv(&dst[0], &src[0], nfft);
    }
  }

  template<typename _Output>
  inline
  void inv(std::vector<_Output> &dst, const std::vector<Complex> &src, Index nfft = -1) {
    if (nfft < 1)
      nfft = (NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum)) ? 2 * (src.size() - 1) : src.size();
    dst.resize(nfft);
    inv(&dst[0], &src[0], nfft);
  }


  /*
  // TODO: multi-dimensional FFTs
  inline
  void inv2(Complex * dst, const Complex * src, int n0,int n1)
  {
    m_impl.inv2(dst,src,n0,n1);
    if ( HasFlag( Unscaled ) == false)
        scale(dst,1./(n0*n1),n0*n1);
  }
*/

  inline
  impl_type &impl() { return m_impl; }
 private:

  template<typename T_Data>
  inline
  void scale(T_Data *x, Scalar s, Index nx) {
#if 1
    for (int k = 0; k < nx; ++k)
      *x++ *= s;
#else
    if ( ((ptrdiff_t)x) & 15 )
      Matrix<T_Data, Dynamic, 1>::Map(x,nx) *= s;
    else
      Matrix<T_Data, Dynamic, 1>::MapAligned(x,nx) *= s;
       //Matrix<T_Data, Dynamic, Dynamic>::Map(x,nx) * s;
#endif
  }

  inline
  void ReflectSpectrum(Complex *freq, Index nfft) {
    // create the implicit right-half spectrum (conjugate-mirror of the left-half)
    Index nhbins = (nfft >> 1) + 1;
    for (Index k = nhbins; k < nfft; ++k)
      freq[k] = conj(freq[nfft - k]);
  }

  impl_type m_impl;
  int m_flag;
};

template<typename T_SrcMat, typename T_FftIfc>
template<typename T_DestMat>
inline
void fft_fwd_proxy<T_SrcMat, T_FftIfc>::evalTo(T_DestMat &dst) const {
  m_ifc.fwd(dst, m_src, m_nfft);
}

template<typename T_SrcMat, typename T_FftIfc>
template<typename T_DestMat>
inline
void fft_inv_proxy<T_SrcMat, T_FftIfc>::evalTo(T_DestMat &dst) const {
  m_ifc.inv(dst, m_src, m_nfft);
}

}
#endif
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
