Perpetual Subtraction

题目传送门

可以很容易的得出dp方程
然后得出转移矩阵
矩阵快速幂优化dp即可
由于这个矩阵很大,我们需要考虑更多优化
因为转移矩阵是个上三角矩阵,所以可以进行对角化
对角化之后发现特征向量矩阵乘一个向量其实就是一个卷积,可以用$fft$优化
然后就神奇的在$nlogn$时间复杂度内做出来了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include<bits/stdc++.h>
#define fi first
#define se second
using namespace std;
typedef long long LL;
const int mod = 1e9 + 7;
const int maxn = 1e5 + 5;
const int N = 1 << 18;
const int P = 998244353;
const int G = 3;
const int NUM = 20;
int wn[NUM];
int mul(int x, int y) {
LL z = 1LL * x * y;
return z - z / P * P;
}
int add(int x, int y) {
x += y;
if(x >= P) x -= P;
return x;
}
int powt(int a, LL b) {
int ans = 1;
while(b) {
if(b & 1) ans = mul(ans, a);
b >>= 1;
a = mul(a, a);
}
return ans;
}
void GetWn() {
for(int i = 0; i < NUM; i++) {
int t = 1 << i;
wn[i] = powt(G, (P - 1) / t);
}
}
void NTT(int a[], int len, int t) {
for (int i = 0, j = 0; i < len; i++) {
if (i > j) swap(a[i], a[j]);
for (int l = len >> 1; (j ^= l) < l; l >>= 1);
}
int id = 0;
for(int h = 2; h <= len; h <<= 1) {
id++;
for(int j = 0; j < len; j += h) {
int w = 1;
for(int k = j; k < j + h / 2; ++k) {
int u = a[k];
int t = mul(w, a[k + h / 2]);
a[k] = add(u, t);
a[k + h / 2] = add(u, P - t);
w = mul(w, wn[id]);
}
}
}
if(t == -1) {
for(int i = 1; i < len / 2; i++) swap(a[i], a[len - i]);
LL inv = powt(len, P - 2);
for(int i = 0; i < len; i++) a[i] = mul(a[i], inv);
}
}
int a[maxn], p[maxn], inv[maxn];
int f[N], g[N];
int main() {
#ifdef CX_TEST
freopen("E:\\program--GG\\test_in.txt", "r", stdin);
#endif
GetWn();
int n, i;
LL m;
scanf("%d%lld", &n, &m);
for(i = 0;i <= n; i++) scanf("%d", &a[i]);
for(i = p[0] = 1;i <= n; i++) p[i] = mul(p[i - 1], i);
inv[n] = powt(p[n], P - 2);
for(i = n - 1;i >= 0; i--) inv[i] = mul(inv[i + 1], i + 1);
for(i = 0;i <= n; i++) f[i] = inv[i];
for(i = 0;i <= n; i++) g[n - i] = mul(p[i], a[i]);
NTT(f, N, 1);
NTT(g, N, 1);
for(i = 0;i < N; i++) f[i] = mul(f[i], g[i]);
NTT(f, N, -1);
for(i = 0;i <= n; i++) a[i] = mul(mul(f[n - i], inv[i]), powt(powt(i + 1, P - 2), m));
memset(f, 0, sizeof(f));
memset(g, 0, sizeof(g));
for(i = 0;i <= n; i++) f[i] = inv[i];
for(i = 0;i <= n; i++) {
g[n - i] = mul(p[i], a[i]);
if(i & 1) g[n - i] = add(0, P - g[n - i]);
}
NTT(f, N, 1);
NTT(g, N, 1);
for(i = 0;i < N; i++) f[i] = mul(f[i], g[i]);
NTT(f, N, -1);
for(i = 0;i <= n; i++) {
a[i] = mul(f[n - i], inv[i]);
if(i & 1) a[i] = add(0, P - a[i]);
}
for(i = 0;i <= n; i++) printf("%d ", a[i]);
return 0;
}