8a4781b538e5aea04a552ec9962e3577ecc17a45
[mesa.git] / src / panfrost / bifrost / test / bi_interpret.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "bit.h"
28 #include "util/half_float.h"
29
30 typedef union {
31 uint64_t u64;
32 uint32_t u32;
33 uint16_t u16[2];
34 uint8_t u8[4];
35 int64_t i64;
36 int32_t i32;
37 int16_t i16[2];
38 int8_t i8[4];
39 double f64;
40 float f32;
41 uint16_t f16[2];
42 } bit_t;
43
44 /* Interprets a subset of Bifrost IR required for automated testing */
45
46 static uint64_t
47 bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
48 {
49 if (index & BIR_INDEX_REGISTER) {
50 uint32_t reg = index & ~BIR_INDEX_REGISTER;
51 assert(reg < 64);
52 return s->r[reg];
53 } else if (index & BIR_INDEX_UNIFORM) {
54 unreachable("Uniform registers to be implemented");
55 } else if (index & BIR_INDEX_CONSTANT) {
56 return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
57 } else if (index & BIR_INDEX_ZERO) {
58 return 0;
59 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
60 return FMA ? 0 : s->T;
61 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
62 return s->T0;
63 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
64 return s->T1;
65 } else if (!index) {
66 /* Placeholder */
67 return 0;
68 } else {
69 unreachable("Invalid source");
70 }
71 }
72
73 static void
74 bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
75 {
76 /* Always write stage passthrough */
77 if (FMA)
78 s->T = value.u32;
79
80 if (index & BIR_INDEX_REGISTER) {
81 uint32_t reg = index & ~BIR_INDEX_REGISTER;
82 assert(reg < 64);
83 s->r[reg] = value.u32;
84 } else if (!index) {
85 /* Nothing to do */
86 } else {
87 unreachable("Invalid destination");
88 }
89 }
90
91 #define bh _mesa_float_to_half
92 #define bf _mesa_half_to_float
93
94 #define bv2f16(fxn) \
95 for (unsigned c = 0; c < 2; ++c) { \
96 dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
97 bf(srcs[1].f16[ins->swizzle[1][c]]), \
98 bf(srcs[2].f16[ins->swizzle[2][c]]), \
99 bf(srcs[3].f16[ins->swizzle[3][c]]))); \
100 }
101
102 #define bv2i16(fxn) \
103 for (unsigned c = 0; c < 2; ++c) { \
104 dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
105 srcs[1].u16[ins->swizzle[1][c]], \
106 srcs[2].u16[ins->swizzle[2][c]], \
107 srcs[3].u16[ins->swizzle[3][c]]); \
108 }
109
110 #define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
111 #define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
112
113 #define bfloat(fxn64, fxn32) \
114 if (ins->dest_type == nir_type_float64) { \
115 unreachable("TODO: 64-bit"); \
116 } else if (ins->dest_type == nir_type_float32) { \
117 bf32(fxn64); \
118 break; \
119 } else if (ins->dest_type == nir_type_float16) { \
120 bv2f16(fxn32); \
121 break; \
122 }
123
124 #define bint(fxn64, fxn32, fxn16, fxn8) \
125 if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
126 unreachable("TODO: 64-bit"); \
127 } else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
128 bi32(fxn32); \
129 break; \
130 } else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
131 bv2i16(fxn16); \
132 break; \
133 } else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
134 unreachable("TODO: 8-bit"); \
135 }
136
137 #define bpoly(name) \
138 bfloat(bit_f64 ## name, bit_f32 ## name); \
139 bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
140 unreachable("Invalid type");
141
142 #define bit_make_float(name, expr) \
143 static inline double \
144 bit_f64 ## name(double a, double b, double c, double d) \
145 { \
146 return expr; \
147 } \
148 static inline float \
149 bit_f32 ## name(float a, float b, float c, float d) \
150 { \
151 return expr; \
152 } \
153
154 #define bit_make_int(name, expr) \
155 static inline int64_t \
156 bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
157 { \
158 return expr; \
159 } \
160 \
161 static inline int32_t \
162 bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
163 { \
164 return expr; \
165 } \
166 \
167 static inline int16_t \
168 bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
169 { \
170 return expr; \
171 } \
172 \
173 static inline int8_t \
174 bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
175 { \
176 return expr; \
177 } \
178
179 #define bit_make_poly(name, expr) \
180 bit_make_float(name, expr) \
181 bit_make_int(name, expr) \
182
183 bit_make_poly(add, a + b);
184 bit_make_float(fma, (a * b) + c);
185 bit_make_poly(mov, a);
186
187 /* Modifiers */
188
189 static float
190 bit_outmod(float raw, enum bifrost_outmod mod)
191 {
192 switch (mod) {
193 case BIFROST_POS:
194 return MAX2(raw, 0.0);
195 case BIFROST_SAT_SIGNED:
196 return CLAMP(raw, -1.0, 1.0);
197 case BIFROST_SAT:
198 return CLAMP(raw, 0.0, 1.0);
199 default:
200 return raw;
201 }
202 }
203
204 void
205 bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
206 {
207 /* First, load sources */
208 bit_t srcs[BIR_SRC_COUNT] = { 0 };
209
210 bi_foreach_src(ins, src)
211 srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
212
213 /* Next, do the action of the instruction */
214 bit_t dest = { 0 };
215
216 switch (ins->type) {
217 case BI_ADD:
218 bpoly(add);
219
220 case BI_BRANCH:
221 case BI_CMP:
222 case BI_BITWISE:
223 case BI_CONVERT:
224 case BI_CSEL:
225 unreachable("Unsupported op");
226
227 case BI_FMA: {
228 bfloat(bit_f64fma, bit_f32fma);
229 unreachable("Unknown type");
230 }
231
232 case BI_FREXP:
233 case BI_ISUB:
234 case BI_MINMAX:
235 unreachable("Unsupported op");
236
237 case BI_MOV:
238 bpoly(mov);
239
240 case BI_SHIFT:
241 case BI_STORE_VAR:
242 case BI_SPECIAL: /* _FAST, _TABLE on supported GPUs */
243 case BI_SWIZZLE:
244 case BI_ROUND:
245 unreachable("Unsupported op");
246
247 /* We only interpret vertex shaders */
248 case BI_DISCARD:
249 case BI_LOAD_VAR:
250 case BI_ATEST:
251 case BI_BLEND:
252 unreachable("Fragment op used in interpreter");
253
254 /* Modeling main memory is more than I bargained for */
255 case BI_LOAD_UNIFORM:
256 case BI_LOAD_ATTR:
257 case BI_LOAD_VAR_ADDRESS:
258 case BI_LOAD:
259 case BI_STORE:
260 case BI_TEX:
261 unreachable("Unsupported I/O in interpreter");
262
263 default:
264 unreachable("Unsupported op");
265 }
266
267 /* Apply outmod */
268 if (bi_has_outmod(ins) && ins->outmod != BIFROST_NONE) {
269 if (ins->dest_type == nir_type_float16) {
270 for (unsigned c = 0; c < 2; ++c)
271 dest.f16[c] = bh(bit_outmod(bf(dest.f16[c]), ins->outmod));
272 } else {
273 dest.f32 = bit_outmod(dest.f32, ins->outmod);
274 }
275 }
276
277 /* Finally, store the result */
278 bit_write(s, ins->dest, ins->dest_type, dest, FMA);
279
280 /* For ADD - change out the passthrough */
281 if (!FMA) {
282 s->T0 = s->T;
283 s->T1 = dest.u32;
284 }
285 }
286
287 #undef bh
288 #undef bf