Ayuna-Competitive-Programming-Library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub ayuna-stpyko/Ayuna-Competitive-Programming-Library

:question: fps/common_ntt.hpp

Depends on

Verified with

Code

#pragma once

#include "common.hpp"
#include <algorithm>
#include <bit>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <numeric>
#include <vector>

namespace ayuna {

template <typename mint>
struct NTTFriendlyFormalPowerSeries
    : FormalPowerSeriesBase<mint, NTTFriendlyFormalPowerSeries<mint>> {
  using Base = FormalPowerSeriesBase<mint, NTTFriendlyFormalPowerSeries<mint>>;
  using FPS = NTTFriendlyFormalPowerSeries;
  using Base::operator*=;
  using Base::operator*;

  void init_tables() { setwy(level); }

  NTTFriendlyFormalPowerSeries() : Base() { init_tables(); }

  explicit NTTFriendlyFormalPowerSeries(int n) : Base(n) { init_tables(); }

  NTTFriendlyFormalPowerSeries(int n, const mint &v) : Base(n, v) {
    init_tables();
  }

  NTTFriendlyFormalPowerSeries(std::initializer_list<mint> init) : Base(init) {
    init_tables();
  }

  template <std::input_iterator It>
  NTTFriendlyFormalPowerSeries(It first, It last) : Base(first, last) {
    init_tables();
  }

  static constexpr uint32_t mod = mint::mod();
  static constexpr int primitive_root = primitive_root_constexpr(mod);
  static constexpr int level = std::countr_zero((uint32_t)(mod - 1));
  static_assert(level >= 3, "NTTFriendlyFormalPowerSeries requires v2(mod-1) "
                            ">= 3 (NTT-friendly prime).");
  mint dw[level], dy[level];

  void setwy(int k) {
    mint w[level], y[level];
    w[k - 1] = mint(primitive_root).pow((mod - 1) / (1 << k));
    y[k - 1] = w[k - 1].inv();
    for(int i = k - 2; i > 0; i--) {
      w[i] = w[i + 1] * w[i + 1];
      y[i] = y[i + 1] * y[i + 1];
    }
    dw[1] = w[1], dw[2] = w[2];
    dy[1] = y[1], dy[2] = y[2];
    for(int i = 3; i < k; i++) {
      dw[i] = dw[i - 1] * y[i - 2] * w[i];
      dy[i] = dy[i - 1] * w[i - 2] * y[i];
    }
  }

  void fft4(std::vector<mint> &a, int k) {
    if((int)a.size() <= 1)
      return;
    if(k == 1) {
      mint a1 = a[1];
      a[1] = a[0] - a[1];
      a[0] = a[0] + a1;
      return;
    }
    if(k & 1) {
      int v = 1 << (k - 1);
      for(int j = 0; j < v; j++) {
        mint ajv = a[j + v];
        a[j + v] = a[j] - ajv;
        a[j] += ajv;
      }
    }
    int u = 1 << (2 + (k & 1));
    int v = 1 << (k - 2 - (k & 1));
    const mint one = mint(1);
    const mint imag = dw[1];
    while(v) {
      for(int j0 = 0, j1 = v, j2 = j1 + v, j3 = j2 + v; j0 < v;
          j0++, j1++, j2++, j3++) {
        const mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
        const mint t0p2 = t0 + t2, t1p3 = t1 + t3;
        const mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
        a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;
        a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;
      }
      mint ww = one, xx = one * dw[2], wx = one;
      for(int jh = 4; jh < u;) {
        ww = xx * xx, wx = ww * xx;
        for(int j0 = jh * v, je = j0 + v, j2 = je + v; j0 < je; j0++, j2++) {
          const mint t0 = a[j0], t1 = a[j0 + v] * xx;
          const mint t2 = a[j2] * ww, t3 = a[j2 + v] * wx;
          const mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          const mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
          a[j0] = t0p2 + t1p3, a[j0 + v] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3, a[j2 + v] = t0m2 - t1m3;
        }
        jh += 4;
        xx *= dw[__builtin_ctzll(jh)];
      }
      u <<= 2;
      v >>= 2;
    }
  }

  void ifft4(std::vector<mint> &a, int k) {
    if((int)a.size() <= 1)
      return;
    if(k == 1) {
      const mint a1 = a[1];
      a[1] = a[0] - a[1];
      a[0] = a[0] + a1;
      return;
    }
    int u = 1 << (k - 2);
    int v = 1;
    const mint one = mint(1);
    const mint imag = dy[1];
    while(u) {
      for(int j0 = 0, j1 = v, j2 = j1 + v, j3 = j2 + v; j0 < v;
          j0++, j1++, j2++, j3++) {
        const mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
        const mint t0p1 = t0 + t1, t2p3 = t2 + t3;
        const mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * imag;
        a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3;
        a[j1] = t0m1 + t2m3, a[j3] = t0m1 - t2m3;
      }
      mint ww = one, xx = one * dy[2], yy = one;
      u <<= 2;
      for(int jh = 4; jh < u;) {
        ww = xx * xx, yy = xx * imag;
        for(int j0 = jh * v, je = j0 + v, j2 = je + v; j0 < je; j0++, j2++) {
          const mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v];
          const mint t0p1 = t0 + t1, t2p3 = t2 + t3;
          const mint t0m1 = (t0 - t1) * xx, t2m3 = (t2 - t3) * yy;
          a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww;
          a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww;
        }
        jh += 4;
        xx *= dy[__builtin_ctzll(jh)];
      }
      u >>= 4;
      v <<= 2;
    }
    if(k & 1) {
      u = 1 << (k - 1);
      for(int j = 0; j < u; j++) {
        mint ajv = a[j] - a[j + u];
        a[j] += a[j + u];
        a[j + u] = ajv;
      }
    }
  }

  void ntt(std::vector<mint> &a) {
    if((int)a.size() <= 1)
      return;
    fft4(a, __builtin_ctz(a.size()));
  }

  void intt(std::vector<mint> &a) {
    if((int)a.size() <= 1)
      return;
    ifft4(a, __builtin_ctz(a.size()));
    const mint iv = mint(a.size()).inv();
    for(auto &x : a)
      x *= iv;
  }

  FPS &operator*=(const FPS &r) {
    if(this->empty() || r.empty()) {
      this->clear();
      return *this;
    }
    int l = int(this->size() + r.size()) - 1;
    if(std::min(this->size(), r.size()) <= size_t(40)) {
      std::vector<mint> s(l);
      for(int i = 0; i < (int)this->size(); i++)
        for(int j = 0; j < (int)r.size(); j++)
          s[i + j] += (*this)[i] * r[j];
      this->assign(s.begin(), s.end());
      return *this;
    }
    int k = 2, M = 4;
    while(M < l) {
      M <<= 1;
      k++;
    }
    setwy(k);
    std::vector<mint> s(M);
    for(int i = 0; i < (int)this->size(); i++)
      s[i] = (*this)[i];
    fft4(s, k);
    if(this->size() == r.size() && *this == r) {
      for(int i = 0; i < M; i++)
        s[i] *= s[i];
    }
    else {
      std::vector<mint> t(M);
      for(int i = 0; i < (int)r.size(); i++)
        t[i] = r[i];
      fft4(t, k);
      for(int i = 0; i < M; i++)
        s[i] *= t[i];
    }
    ifft4(s, k);
    s.resize(l);
    const mint invm = mint(M).inv();
    for(int i = 0; i < l; i++)
      s[i] *= invm;
    this->assign(s.begin(), s.end());
    return *this;
  }

  FPS operator*(const FPS &r) const { return FPS(*this) *= r; }

  void ntt_doubling(std::vector<mint> &a) {
    const int M = int(a.size());
    auto b = a;
    intt(b);
    mint r = 1, zeta = mint(primitive_root).pow((mod - 1) / (M << 1));
    for(int i = 0; i < M; i++) {
      b[i] *= r;
      r *= zeta;
    }
    ntt(b);
    std::copy(std::begin(b), std::end(b), std::back_inserter(a));
  }
};

} // namespace ayuna
#line 2 "fps/common_ntt.hpp"

#line 2 "fps/common.hpp"

#line 2 "math/convolution.hpp"

#line 2 "math/modint_static.hpp"

#include <cassert>
#include <iostream>
#include <numeric>
#include <type_traits>
#include <utility>

namespace ayuna {

using ll = long long;
using ull = unsigned long long;
using u32 = unsigned int;

template <int m> struct modint {
  using mint = modint;

private:
  u32 _v;

public:
  constexpr modint() : _v(0) {}
  template <class T, std::enable_if_t<std::is_integral_v<T>, int> = 0>
  constexpr modint(T v) {
    ll x = (ll)(v % (ll)(m));
    if(x < 0)
      x += m;
    _v = (u32)(x);
  }

  u32 val() const { return _v; }

  static constexpr u32 mod() { return m; }

  mint &operator++() {
    _v++;
    if(_v == m)
      _v = 0;
    return *this;
  }
  mint &operator--() {
    if(_v == 0)
      _v = m;
    _v--;
    return *this;
  }
  mint operator++(int) {
    mint result = *this;
    ++*this;
    return result;
  }
  mint operator--(int) {
    mint result = *this;
    --*this;
    return result;
  }

  mint &operator+=(const mint &r) {
    _v += r._v;
    if(_v >= m)
      _v -= m;
    return *this;
  }
  mint &operator-=(const mint &r) {
    _v -= r._v;
    if(_v >= m)
      _v += m;
    return *this;
  }
  mint &operator*=(const mint &r) {
    ull z = _v;
    z *= r._v;
    _v = (u32)(z % m);
    return *this;
  }
  mint &operator/=(const mint &r) { return *this = *this * r.inv(); }

  mint operator+() const { return *this; }
  mint operator-() const { return mint() - *this; }

  mint pow(ll n) const {
    assert(0 <= n);
    mint x = *this, r = 1;
    while(n) {
      if(n & 1)
        r *= x;
      x *= x;
      n >>= 1;
    }
    return r;
  }
  mint inv() const {
    assert(_v);
    return pow(m - 2);
  }

  friend mint operator+(const mint &l, const mint &r) { return mint(l) += r; }
  friend mint operator-(const mint &l, const mint &r) { return mint(l) -= r; }
  friend mint operator*(const mint &l, const mint &r) { return mint(l) *= r; }
  friend mint operator/(const mint &l, const mint &r) { return mint(l) /= r; }
  friend bool operator==(const mint &l, const mint &r) { return l._v == r._v; }
  friend bool operator!=(const mint &l, const mint &r) { return l._v != r._v; }
  friend std::ostream &operator<<(std::ostream &os, const mint &a) {
    return os << a.val();
  }
};

using modint998244353 = modint<998244353>;
using modint1000000007 = modint<1000000007>;

} // namespace ayuna
#line 4 "math/convolution.hpp"
#include <algorithm>
#include <array>
#include <bit>
#line 8 "math/convolution.hpp"
#include <concepts>
#line 11 "math/convolution.hpp"
#include <vector>

namespace ayuna {

constexpr long long safe_mod(long long x, long long m) {
  x %= m;
  if(x < 0)
    x += m;
  return x;
}

constexpr long long pow_mod_constexpr(long long x, long long n, int m) {
  if(m == 1)
    return 0;
  unsigned int _m = (unsigned int)(m);
  unsigned long long r = 1;
  unsigned long long y = safe_mod(x, m);
  while(n) {
    if(n & 1)
      r = (r * y) % _m;
    y = (y * y) % _m;
    n >>= 1;
  }
  return r;
}

constexpr int primitive_root_constexpr(int m) {
  if(m == 2)
    return 1;
  if(m == 167772161)
    return 3;
  if(m == 469762049)
    return 3;
  if(m == 754974721)
    return 11;
  if(m == 998244353)
    return 3;
  int divs[20] = {};
  divs[0] = 2;
  int cnt = 1;
  int x = (m - 1) / 2;
  while(x % 2 == 0)
    x >>= 1;
  for(int i = 3; (long long)(i)*i <= x; i += 2) {
    if(x % i == 0) {
      divs[cnt++] = i;
      while(x % i == 0)
        x /= i;
    }
  }
  if(x > 1)
    divs[cnt++] = x;
  for(int g = 2;; g++) {
    bool ok = true;
    for(int i = 0; i < cnt; i++) {
      if(pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) {
        ok = false;
        break;
      }
    }
    if(ok)
      return g;
  }
}
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);

template <class T>
concept StaticModint = requires(T a, T b, long long n) {
  std::integral_constant<decltype(T::mod()), T::mod()>{};
  { a + b } -> std::same_as<T>;
  { a - b } -> std::same_as<T>;
  { a *b } -> std::same_as<T>;
  { a.val() } -> std::convertible_to<unsigned int>;
  { a.inv() } -> std::same_as<T>;
  { a.pow(n) } -> std::same_as<T>;
  T{1};
};

template <StaticModint mint, int g = primitive_root<(int)mint::mod()>>
struct fft_info {
  static constexpr int rank2 =
      std::countr_zero((unsigned int)(mint::mod() - 1));
  std::array<mint, rank2 + 1> root;
  std::array<mint, rank2 + 1> iroot;
  std::array<mint, std::max(0, rank2 - 1)> rate2;
  std::array<mint, std::max(0, rank2 - 1)> irate2;
  std::array<mint, std::max(0, rank2 - 2)> rate3;
  std::array<mint, std::max(0, rank2 - 2)> irate3;

  fft_info() {
    root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
    iroot[rank2] = root[rank2].inv();
    for(int i = rank2 - 1; i >= 0; i--) {
      root[i] = root[i + 1] * root[i + 1];
      iroot[i] = iroot[i + 1] * iroot[i + 1];
    }
    {
      mint prod = 1, iprod = 1;
      for(int i = 0; i <= rank2 - 2; i++) {
        rate2[i] = root[i + 2] * prod;
        irate2[i] = iroot[i + 2] * iprod;
        prod *= iroot[i + 2];
        iprod *= root[i + 2];
      }
    }
    {
      mint prod = 1, iprod = 1;
      for(int i = 0; i <= rank2 - 3; i++) {
        rate3[i] = root[i + 3] * prod;
        irate3[i] = iroot[i + 3] * iprod;
        prod *= iroot[i + 3];
        iprod *= root[i + 3];
      }
    }
  }
};

template <StaticModint mint> void butterfly(std::vector<mint> &a) {
  const int n = int(a.size());
  const int h = std::countr_zero((unsigned int)n);
  static const fft_info<mint> info;
  int len = 0;
  while(len < h) {
    if(h - len == 1) {
      int p = 1 << (h - len - 1);
      mint rot = 1;
      for(int s = 0; s < (1 << len); s++) {
        int offset = s << (h - len);
        for(int i = 0; i < p; i++) {
          auto l = a[i + offset];
          auto r = a[i + offset + p] * rot;
          a[i + offset] = l + r;
          a[i + offset + p] = l - r;
        }
        if(s + 1 != (1 << len))
          rot *= info.rate2[std::countr_zero(~(unsigned int)(s))];
      }
      len++;
    }
		else {
      if constexpr(fft_info<mint>::rank2 >= 2) {
        // 4-base
        int p = 1 << (h - len - 2);
        mint rot = 1, imag = info.root[2];
        for(int s = 0; s < (1 << len); s++) {
          mint rot2 = rot * rot, rot3 = rot2 * rot;
          int offset = s << (h - len);
          for(int i = 0; i < p; i++) {
            auto mod2 = 1ULL * mint::mod() * mint::mod();
            auto a0 = 1ULL * a[i + offset].val();
            auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
            auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
            auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
            auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val();
            auto na2 = mod2 - a2;
            a[i + offset] = a0 + a2 + a1 + a3;
            a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
            a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
            a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
          }
          if(s + 1 != (1 << len))
            rot *= info.rate3[std::countr_zero(~(unsigned int)(s))];
        }
        len += 2;
      }
			else {
        assert(false && "butterfly: rank2 too small for 4-base NTT");
      }
    }
  }
}

template <StaticModint mint> void butterfly_inv(std::vector<mint> &a) {
  const int n = int(a.size());
  const int h = std::countr_zero((unsigned int)n);
  static const fft_info<mint> info;
  int len = h;
  while(len) {
    if(len == 1) {
      int p = 1 << (h - len);
      mint irot = 1;
      for(int s = 0; s < (1 << (len - 1)); s++) {
        int offset = s << (h - len + 1);
        for(int i = 0; i < p; i++) {
          auto l = a[i + offset];
          auto r = a[i + offset + p];
          a[i + offset] = l + r;
          a[i + offset + p] =
              (unsigned long long)(mint::mod() + l.val() - r.val()) *
              irot.val();
        }
        if(s + 1 != (1 << (len - 1)))
          irot *= info.irate2[std::countr_zero(~(unsigned int)(s))];
      }
      len--;
    }
		else {
      if constexpr(fft_info<mint>::rank2 >= 2) {
        // 4-base
        int p = 1 << (h - len);
        mint irot = 1, iimag = info.iroot[2];
        for(int s = 0; s < (1 << (len - 2)); s++) {
          mint irot2 = irot * irot, irot3 = irot2 * irot;
          int offset = s << (h - len + 2);
          for(int i = 0; i < p; i++) {
            auto a0 = 1ULL * a[i + offset + 0 * p].val();
            auto a1 = 1ULL * a[i + offset + 1 * p].val();
            auto a2 = 1ULL * a[i + offset + 2 * p].val();
            auto a3 = 1ULL * a[i + offset + 3 * p].val();
            auto a2na3iimag =
                1ULL * mint((mint::mod() + a2 - a3) * iimag.val()).val();
            a[i + offset] = a0 + a1 + a2 + a3;
            a[i + offset + 1 * p] =
                (a0 + (mint::mod() - a1) + a2na3iimag) * irot.val();
            a[i + offset + 2 * p] =
                (a0 + a1 + (mint::mod() - a2) + (mint::mod() - a3)) *
                irot2.val();
            a[i + offset + 3 * p] =
                (a0 + (mint::mod() - a1) + (mint::mod() - a2na3iimag)) *
                irot3.val();
          }
          if(s + 1 != (1 << (len - 2)))
            irot *= info.irate3[std::countr_zero(~(unsigned int)(s))];
        }
        len -= 2;
      }
			else {
        assert(false && "butterfly_inv: rank2 too small for 4-base NTT");
      }
    }
  }
}

template <StaticModint mint>
std::vector<mint> convolution_naive(const std::vector<mint> &a,
                                    const std::vector<mint> &b) {
  const int n = int(a.size()), m = int(b.size());
  std::vector<mint> ans(n + m - 1);
  for(int j = 0; j < m; j++)
    for(int i = 0; i < n; i++)
      ans[i + j] += a[i] * b[j];
  return ans;
}

template <StaticModint mint>
std::vector<mint> convolution_fft(std::vector<mint> a, std::vector<mint> b) {
  const int n = int(a.size()), m = int(b.size());
  if(!n || !m)
    return {};
  int z = (int)std::bit_ceil((unsigned int)(n + m - 1));
  a.resize(z);
  ayuna::butterfly(a);
  b.resize(z);
  ayuna::butterfly(b);
  for(int i = 0; i < z; i++)
    a[i] *= b[i];
  ayuna::butterfly_inv(a);
  a.resize(n + m - 1);
  mint iz = mint(z).inv();
  for(int i = 0; i < n + m - 1; i++)
    a[i] *= iz;
  return a;
}

template <StaticModint mint>
static std::vector<mint> convolution_arbitrary_mod(const std::vector<mint> &a,
                                                   const std::vector<mint> &b) {
  using m1 = modint<167772161>;  // 2^25 * 5 + 1
  using m2 = modint<469762049>;  // 2^26 * 7 + 1
  using m3 = modint<1224736769>; // 2^24 * 73 + 1
  static_assert(StaticModint<m1>);
  static_assert(StaticModint<m2>);
  static_assert(StaticModint<m3>);

  const int n = (int)a.size(), m = (int)b.size();
  std::vector<m1> a1(n), b1(m);
  std::vector<m2> a2(n), b2(m);
  std::vector<m3> a3(n), b3(m);
  for(int i = 0; i < n; i++) {
    const unsigned int v = a[i].val();
    a1[i] = v;
    a2[i] = v;
    a3[i] = v;
  }
  for(int i = 0; i < m; i++) {
    const unsigned int v = b[i].val();
    b1[i] = v;
    b2[i] = v;
    b3[i] = v;
  }

  const auto c1 = ayuna::convolution_fft(std::move(a1), std::move(b1));
  const auto c2 = ayuna::convolution_fft(std::move(a2), std::move(b2));
  const auto c3 = ayuna::convolution_fft(std::move(a3), std::move(b3));

  static constexpr unsigned long long M1 = m1::mod();
  static constexpr unsigned long long M2 = m2::mod();
  static constexpr unsigned long long M1M2 = M1 * M2;

  static const m2 inv_m1_mod_m2 = m2(M1).inv();
  static const m3 inv_m1m2_mod_m3 = m3(M1M2).inv();

  const int sz = n + m - 1;
  std::vector<mint> res(sz);
  for(int i = 0; i < sz; i++) {
    const unsigned long long x1 = c1[i].val();
    const unsigned long long x2 = c2[i].val();
    const unsigned long long x3 = c3[i].val();

    const unsigned long long t1 = x1;
    const unsigned long long t2 =
        (unsigned long long)(m2((long long)x2 - (long long)t1) * inv_m1_mod_m2)
            .val();
    const unsigned long long t3 =
        (unsigned long long)(m3((long long)x3 -
                                (long long)(m3(t1 + M1 * t2).val())) *
                             inv_m1m2_mod_m3)
            .val();

    const unsigned long long mod = mint::mod();
    const unsigned long long term1 = t1 % mod;
    const unsigned long long term2 = (M1 % mod) * (t2 % mod) % mod;
    const unsigned long long term3 = (M1M2 % mod) * (t3 % mod) % mod;
    res[i] = mint((term1 + term2 + term3) % mod);
  }
  return res;
}

template <StaticModint mint>
std::vector<mint> convolution(std::vector<mint> &&a, std::vector<mint> &&b) {
  const int n = int(a.size()), m = int(b.size());
  if(!n || !m)
    return {};
  const int z = (int)std::bit_ceil((unsigned int)(n + m - 1));
  if(std::min(n, m) <= 60)
    return ayuna::convolution_naive(a, b);
  if((mint::mod() - 1) % z == 0)
    return ayuna::convolution_fft(std::move(a), std::move(b));
  return ayuna::convolution_arbitrary_mod<mint>(a, b);
}

// -------------------------
// Bitwise Convolutions (FWT)
// -------------------------

template <StaticModint mint>
static void hadamard_transform(std::vector<mint> &a) {
  const int n = (int)a.size();
  for(int len = 1; len < n; len <<= 1) {
    for(int i = 0; i < n; i += (len << 1)) {
      for(int j = 0; j < len; j++) {
        const mint u = a[i + j];
        const mint v = a[i + j + len];
        a[i + j] = u + v;
        a[i + j + len] = u - v;
      }
    }
  }
}

template <StaticModint mint>
static void hadamard_transform_inv(std::vector<mint> &a) {
  hadamard_transform(a);
  const mint inv_n = mint((int)a.size()).inv();
  for(auto &x : a)
    x *= inv_n;
}

template <StaticModint mint>
std::vector<mint> xor_convolution(std::vector<mint> a, std::vector<mint> b) {
  const int n = (int)a.size();
  assert(n == (int)b.size());
  if(n == 0)
    return {};
  assert(std::has_single_bit((unsigned int)n));
  hadamard_transform(a);
  hadamard_transform(b);
  for(int i = 0; i < n; i++)
    a[i] *= b[i];
  hadamard_transform_inv(a);
  return a;
}

template <StaticModint mint>
static void and_transform(std::vector<mint> &a) {
  const int n = (int)a.size();
  for(int len = 1; len < n; len <<= 1) {
    for(int i = 0; i < n; i += (len << 1)) {
      for(int j = 0; j < len; j++) {
        // f[S] += f[S \ {bit}]
        a[i + j] += a[i + j + len];
      }
    }
  }
}

template <StaticModint mint>
static void and_transform_inv(std::vector<mint> &a) {
  const int n = (int)a.size();
  for(int len = 1; len < n; len <<= 1) {
    for(int i = 0; i < n; i += (len << 1)) {
      for(int j = 0; j < len; j++) {
        a[i + j] -= a[i + j + len];
      }
    }
  }
}

template <StaticModint mint>
std::vector<mint> and_convolution(std::vector<mint> a, std::vector<mint> b) {
  const int n = (int)a.size();
  assert(n == (int)b.size());
  if(n == 0)
    return {};
  assert(std::has_single_bit((unsigned int)n));
  and_transform(a);
  and_transform(b);
  for(int i = 0; i < n; i++)
    a[i] *= b[i];
  and_transform_inv(a);
  return a;
}

template <StaticModint mint>
static void or_transform(std::vector<mint> &a) {
  const int n = (int)a.size();
  for(int len = 1; len < n; len <<= 1) {
    for(int i = 0; i < n; i += (len << 1)) {
      for(int j = 0; j < len; j++) {
        // f[S ∪ {bit}] += f[S]
        a[i + j + len] += a[i + j];
      }
    }
  }
}

template <StaticModint mint>
static void or_transform_inv(std::vector<mint> &a) {
  const int n = (int)a.size();
  for(int len = 1; len < n; len <<= 1) {
    for(int i = 0; i < n; i += (len << 1)) {
      for(int j = 0; j < len; j++) {
        a[i + j + len] -= a[i + j];
      }
    }
  }
}

template <StaticModint mint>
std::vector<mint> or_convolution(std::vector<mint> a, std::vector<mint> b) {
  const int n = (int)a.size();
  assert(n == (int)b.size());
  if(n == 0)
    return {};
  assert(std::has_single_bit((unsigned int)n));
  or_transform(a);
  or_transform(b);
  for(int i = 0; i < n; i++)
    a[i] *= b[i];
  or_transform_inv(a);
  return a;
}

} // namespace ayuna
#line 7 "fps/common.hpp"
#include <iterator>
#line 11 "fps/common.hpp"

namespace ayuna {

template <typename mint, class Derived>
struct FormalPowerSeriesBase : std::vector<mint> {
  using std::vector<mint>::vector;
  using FPS = Derived;

  FPS &self() { return static_cast<FPS &>(*this); }
  const FPS &self() const { return static_cast<const FPS &>(*this); }

  FPS &operator+=(const FPS &r) {
    if(r.size() > this->size())
      this->resize(r.size());
    for(int i = 0; i < (int)r.size(); i++)
      (*this)[i] += r[i];
    return self();
  }

  FPS &operator+=(const mint &r) {
    if(this->empty())
      this->resize(1);
    (*this)[0] += r;
    return self();
  }

  FPS &operator-=(const FPS &r) {
    if(r.size() > this->size())
      this->resize(r.size());
    for(int i = 0; i < (int)r.size(); i++)
      (*this)[i] -= r[i];
    return self();
  }

  FPS &operator*=(const mint &v) {
    for(int k = 0; k < (int)this->size(); k++)
      (*this)[k] *= v;
    return self();
  }

  FPS &operator-=(const mint &r) {
    if(this->empty())
      this->resize(1);
    (*this)[0] -= r;
    return self();
  }

  FPS &operator/=(const FPS &r) {
    if(this->size() < r.size()) {
      this->clear();
      return self();
    }
    int n = this->size() - r.size() + 1;
    if((int)r.size() <= 64) {
      FPS f(self()), g(std::begin(r), std::end(r));
      g.shrink();
      mint coeff = g.back().inv();
      for(auto &x : g)
        x *= coeff;
      int deg = (int)f.size() - (int)g.size() + 1;
      int gs = g.size();
      FPS quo(deg);
      for(int i = deg - 1; i >= 0; i--) {
        quo[i] = f[i + gs - 1];
        for(int j = 0; j < gs; j++)
          f[i + j] -= quo[i] * g[j];
      }
      *this = quo * coeff;
      this->resize(n, mint(0));
      return self();
    }
    FPS rr(std::begin(r), std::end(r));
    return *this = (self().rev().pre(n) * rr.rev().inv(n)).pre(n).rev();
  }

  FPS &operator%=(const FPS &r) {
    *this -= *this / r * r;
    shrink();
    return self();
  }

  FPS operator+(const FPS &r) const { return FPS(self()) += r; }
  FPS operator+(const mint &v) const { return FPS(self()) += v; }
  FPS operator-(const FPS &r) const { return FPS(self()) -= r; }
  FPS operator-(const mint &v) const { return FPS(self()) -= v; }
  FPS operator*(const mint &v) const { return FPS(self()) *= v; }
  FPS operator/(const FPS &r) const { return FPS(self()) /= r; }
  FPS operator%(const FPS &r) const { return FPS(self()) %= r; }
  FPS operator-() const {
    FPS ret(this->size());
    for(int i = 0; i < (int)this->size(); i++)
      ret[i] = -(*this)[i];
    return ret;
  }

  void shrink() {
    while(this->size() && this->back() == mint(0))
      this->pop_back();
  }

  FPS rev() const {
    FPS ret(self());
    reverse(begin(ret), end(ret));
    return ret;
  }

  FPS dot(const FPS &r) const {
    FPS ret(std::min(this->size(), r.size()));
    for(int i = 0; i < (int)ret.size(); i++)
      ret[i] = (*this)[i] * r[i];
    return ret;
  }

  FPS pre(int sz) const {
    FPS ret(begin(*this), begin(*this) + std::min((int)this->size(), sz));
    if((int)ret.size() < sz)
      ret.resize(sz);
    return ret;
  }

  FPS operator>>(int sz) const {
    if((int)this->size() <= sz)
      return {};
    FPS ret(self());
    ret.erase(ret.begin(), ret.begin() + sz);
    return ret;
  }

  FPS operator<<(int sz) const {
    FPS ret(self());
    ret.insert(ret.begin(), sz, mint(0));
    return ret;
  }

  FPS diff() const {
    const int n = (int)this->size();
    FPS ret(std::max(0, n - 1));
    const mint one(1);
    mint coeff(1);
    for(int i = 1; i < n; i++) {
      ret[i - 1] = (*this)[i] * coeff;
      coeff += one;
    }
    return ret;
  }

  FPS inv(int deg = -1) const {
    assert((*this)[0] != mint(0));
    if(deg == -1)
      deg = (*this).size();
    FPS ret({mint(1) / (*this)[0]});
    for(int i = 1; i < deg; i <<= 1)
      ret = (ret + ret - ret * ret * (*this).pre(i << 1)).pre(i << 1);
    return ret.pre(deg);
  }

  FPS integral() const {
    const int n = (int)this->size();
    FPS ret(n + 1);
    ret[0] = mint(0);
    if(n > 0)
      ret[1] = mint(1);
    const auto mod = mint::mod();
    for(int i = 2; i <= n; i++)
      ret[i] = (-ret[mod % i]) * (mod / i);
    for(int i = 0; i < n; i++)
      ret[i + 1] *= (*this)[i];
    return ret;
  }

  mint eval(mint x) const {
    mint r = 0, w = 1;
    for(auto &v : *this)
      r += w * v, w *= x;
    return r;
  }

  FPS log(int deg = -1) const {
    assert(!(*this).empty() && (*this)[0] == mint(1));
    assert(deg);
    if(deg == -1)
      deg = (int)this->size();
    return (this->diff() * this->inv(deg)).pre(deg - 1).integral();
  }

  FPS exp(int deg = -1) const {
    assert((*this).size() == 0 || (*this)[0] == mint(0));
    if(deg == -1)
      deg = (int)this->size();
    FPS ret({mint(1)});
    for(int i = 1; i < deg; i <<= 1) {
      ret = (ret * (pre(i << 1) + mint(1) - ret.log(i << 1))).pre(i << 1);
    }
    return ret.pre(deg);
  }

  FPS pow(int64_t k, int deg = -1) const {
    const int n = (int)this->size();
    if(deg == -1)
      deg = n;
    if(k == 0) {
      FPS ret(deg);
      if(deg)
        ret[0] = 1;
      return ret;
    }
    for(int i = 0; i < n; i++) {
      if((*this)[i] != mint(0)) {
        mint rev = mint(1) / (*this)[i];
        FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg);
        ret *= (*this)[i].pow(k);
        ret = (ret << (i * k)).pre(deg);
        if((int)ret.size() < deg)
          ret.resize(deg, mint(0));
        return ret;
      }
      if(__int128_t(i + 1) * k >= deg)
        return FPS(deg, mint(0));
    }
    return FPS(deg, mint(0));
  }
};

template <typename mint>
struct FormalPowerSeries
    : FormalPowerSeriesBase<mint, FormalPowerSeries<mint>> {
  using Base = FormalPowerSeriesBase<mint, FormalPowerSeries<mint>>;
  using Base::Base;
  using FPS = FormalPowerSeries;
  using Base::operator*=;
  using Base::operator*;

  FPS &operator*=(const FPS &r) {
    if(this->empty() || r.empty()) {
      this->clear();
      return *this;
    }
    auto result = ayuna::convolution(std::vector<mint>(*this), std::vector<mint>(r));
    this->assign(result.begin(), result.end());
    return *this;
  }

  FPS operator*(const FPS &r) const { return FPS(*this) *= r; }
};

} // namespace ayuna
#line 7 "fps/common_ntt.hpp"
#include <cstdint>
#line 11 "fps/common_ntt.hpp"

namespace ayuna {

template <typename mint>
struct NTTFriendlyFormalPowerSeries
    : FormalPowerSeriesBase<mint, NTTFriendlyFormalPowerSeries<mint>> {
  using Base = FormalPowerSeriesBase<mint, NTTFriendlyFormalPowerSeries<mint>>;
  using FPS = NTTFriendlyFormalPowerSeries;
  using Base::operator*=;
  using Base::operator*;

  void init_tables() { setwy(level); }

  NTTFriendlyFormalPowerSeries() : Base() { init_tables(); }

  explicit NTTFriendlyFormalPowerSeries(int n) : Base(n) { init_tables(); }

  NTTFriendlyFormalPowerSeries(int n, const mint &v) : Base(n, v) {
    init_tables();
  }

  NTTFriendlyFormalPowerSeries(std::initializer_list<mint> init) : Base(init) {
    init_tables();
  }

  template <std::input_iterator It>
  NTTFriendlyFormalPowerSeries(It first, It last) : Base(first, last) {
    init_tables();
  }

  static constexpr uint32_t mod = mint::mod();
  static constexpr int primitive_root = primitive_root_constexpr(mod);
  static constexpr int level = std::countr_zero((uint32_t)(mod - 1));
  static_assert(level >= 3, "NTTFriendlyFormalPowerSeries requires v2(mod-1) "
                            ">= 3 (NTT-friendly prime).");
  mint dw[level], dy[level];

  void setwy(int k) {
    mint w[level], y[level];
    w[k - 1] = mint(primitive_root).pow((mod - 1) / (1 << k));
    y[k - 1] = w[k - 1].inv();
    for(int i = k - 2; i > 0; i--) {
      w[i] = w[i + 1] * w[i + 1];
      y[i] = y[i + 1] * y[i + 1];
    }
    dw[1] = w[1], dw[2] = w[2];
    dy[1] = y[1], dy[2] = y[2];
    for(int i = 3; i < k; i++) {
      dw[i] = dw[i - 1] * y[i - 2] * w[i];
      dy[i] = dy[i - 1] * w[i - 2] * y[i];
    }
  }

  void fft4(std::vector<mint> &a, int k) {
    if((int)a.size() <= 1)
      return;
    if(k == 1) {
      mint a1 = a[1];
      a[1] = a[0] - a[1];
      a[0] = a[0] + a1;
      return;
    }
    if(k & 1) {
      int v = 1 << (k - 1);
      for(int j = 0; j < v; j++) {
        mint ajv = a[j + v];
        a[j + v] = a[j] - ajv;
        a[j] += ajv;
      }
    }
    int u = 1 << (2 + (k & 1));
    int v = 1 << (k - 2 - (k & 1));
    const mint one = mint(1);
    const mint imag = dw[1];
    while(v) {
      for(int j0 = 0, j1 = v, j2 = j1 + v, j3 = j2 + v; j0 < v;
          j0++, j1++, j2++, j3++) {
        const mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
        const mint t0p2 = t0 + t2, t1p3 = t1 + t3;
        const mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
        a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;
        a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;
      }
      mint ww = one, xx = one * dw[2], wx = one;
      for(int jh = 4; jh < u;) {
        ww = xx * xx, wx = ww * xx;
        for(int j0 = jh * v, je = j0 + v, j2 = je + v; j0 < je; j0++, j2++) {
          const mint t0 = a[j0], t1 = a[j0 + v] * xx;
          const mint t2 = a[j2] * ww, t3 = a[j2 + v] * wx;
          const mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          const mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
          a[j0] = t0p2 + t1p3, a[j0 + v] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3, a[j2 + v] = t0m2 - t1m3;
        }
        jh += 4;
        xx *= dw[__builtin_ctzll(jh)];
      }
      u <<= 2;
      v >>= 2;
    }
  }

  void ifft4(std::vector<mint> &a, int k) {
    if((int)a.size() <= 1)
      return;
    if(k == 1) {
      const mint a1 = a[1];
      a[1] = a[0] - a[1];
      a[0] = a[0] + a1;
      return;
    }
    int u = 1 << (k - 2);
    int v = 1;
    const mint one = mint(1);
    const mint imag = dy[1];
    while(u) {
      for(int j0 = 0, j1 = v, j2 = j1 + v, j3 = j2 + v; j0 < v;
          j0++, j1++, j2++, j3++) {
        const mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
        const mint t0p1 = t0 + t1, t2p3 = t2 + t3;
        const mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * imag;
        a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3;
        a[j1] = t0m1 + t2m3, a[j3] = t0m1 - t2m3;
      }
      mint ww = one, xx = one * dy[2], yy = one;
      u <<= 2;
      for(int jh = 4; jh < u;) {
        ww = xx * xx, yy = xx * imag;
        for(int j0 = jh * v, je = j0 + v, j2 = je + v; j0 < je; j0++, j2++) {
          const mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v];
          const mint t0p1 = t0 + t1, t2p3 = t2 + t3;
          const mint t0m1 = (t0 - t1) * xx, t2m3 = (t2 - t3) * yy;
          a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww;
          a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww;
        }
        jh += 4;
        xx *= dy[__builtin_ctzll(jh)];
      }
      u >>= 4;
      v <<= 2;
    }
    if(k & 1) {
      u = 1 << (k - 1);
      for(int j = 0; j < u; j++) {
        mint ajv = a[j] - a[j + u];
        a[j] += a[j + u];
        a[j + u] = ajv;
      }
    }
  }

  void ntt(std::vector<mint> &a) {
    if((int)a.size() <= 1)
      return;
    fft4(a, __builtin_ctz(a.size()));
  }

  void intt(std::vector<mint> &a) {
    if((int)a.size() <= 1)
      return;
    ifft4(a, __builtin_ctz(a.size()));
    const mint iv = mint(a.size()).inv();
    for(auto &x : a)
      x *= iv;
  }

  FPS &operator*=(const FPS &r) {
    if(this->empty() || r.empty()) {
      this->clear();
      return *this;
    }
    int l = int(this->size() + r.size()) - 1;
    if(std::min(this->size(), r.size()) <= size_t(40)) {
      std::vector<mint> s(l);
      for(int i = 0; i < (int)this->size(); i++)
        for(int j = 0; j < (int)r.size(); j++)
          s[i + j] += (*this)[i] * r[j];
      this->assign(s.begin(), s.end());
      return *this;
    }
    int k = 2, M = 4;
    while(M < l) {
      M <<= 1;
      k++;
    }
    setwy(k);
    std::vector<mint> s(M);
    for(int i = 0; i < (int)this->size(); i++)
      s[i] = (*this)[i];
    fft4(s, k);
    if(this->size() == r.size() && *this == r) {
      for(int i = 0; i < M; i++)
        s[i] *= s[i];
    }
    else {
      std::vector<mint> t(M);
      for(int i = 0; i < (int)r.size(); i++)
        t[i] = r[i];
      fft4(t, k);
      for(int i = 0; i < M; i++)
        s[i] *= t[i];
    }
    ifft4(s, k);
    s.resize(l);
    const mint invm = mint(M).inv();
    for(int i = 0; i < l; i++)
      s[i] *= invm;
    this->assign(s.begin(), s.end());
    return *this;
  }

  FPS operator*(const FPS &r) const { return FPS(*this) *= r; }

  void ntt_doubling(std::vector<mint> &a) {
    const int M = int(a.size());
    auto b = a;
    intt(b);
    mint r = 1, zeta = mint(primitive_root).pow((mod - 1) / (M << 1));
    for(int i = 0; i < M; i++) {
      b[i] *= r;
      r *= zeta;
    }
    ntt(b);
    std::copy(std::begin(b), std::end(b), std::back_inserter(a));
  }
};

} // namespace ayuna
Back to top page