pan/bit: Add preliminary FMA/ADD/MOV implementations
[mesa.git] / src / panfrost / bifrost / test / bi_interpret.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "bit.h"
28 #include "util/half_float.h"
29
30 typedef union {
31 uint64_t u64;
32 uint32_t u32;
33 uint16_t u16[2];
34 uint8_t u8[4];
35 int64_t i64;
36 int32_t i32;
37 int16_t i16[2];
38 int8_t i8[4];
39 double f64;
40 float f32;
41 uint16_t f16[2];
42 } bit_t;
43
44 /* Interprets a subset of Bifrost IR required for automated testing */
45
46 static uint64_t
47 bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
48 {
49 if (index & BIR_INDEX_REGISTER) {
50 uint32_t reg = index & ~BIR_INDEX_REGISTER;
51 assert(reg < 64);
52 return s->r[reg];
53 } else if (index & BIR_INDEX_UNIFORM) {
54 unreachable("Uniform registers to be implemented");
55 } else if (index & BIR_INDEX_CONSTANT) {
56 return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
57 } else if (index & BIR_INDEX_ZERO) {
58 return 0;
59 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
60 return FMA ? 0 : s->T;
61 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
62 return s->T0;
63 } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
64 return s->T1;
65 } else if (!index) {
66 /* Placeholder */
67 return 0;
68 } else {
69 unreachable("Invalid source");
70 }
71 }
72
73 static void
74 bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
75 {
76 /* Always write stage passthrough */
77 if (FMA)
78 s->T = value.u32;
79
80 if (index & BIR_INDEX_REGISTER) {
81 uint32_t reg = index & ~BIR_INDEX_REGISTER;
82 assert(reg < 64);
83 s->r[reg] = value.u32;
84 } else if (!index) {
85 /* Nothing to do */
86 } else {
87 unreachable("Invalid destination");
88 }
89 }
90
91 #define bh _mesa_float_to_half
92 #define bf _mesa_half_to_float
93
94 #define bv2f16(fxn) \
95 for (unsigned c = 0; c < 2; ++c) { \
96 dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
97 bf(srcs[1].f16[ins->swizzle[1][c]]), \
98 bf(srcs[2].f16[ins->swizzle[2][c]]), \
99 bf(srcs[3].f16[ins->swizzle[3][c]]))); \
100 }
101
102 #define bv2i16(fxn) \
103 for (unsigned c = 0; c < 2; ++c) { \
104 dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
105 srcs[1].u16[ins->swizzle[1][c]], \
106 srcs[2].u16[ins->swizzle[2][c]], \
107 srcs[3].u16[ins->swizzle[3][c]]); \
108 }
109
110 #define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
111 #define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
112
113 #define bfloat(fxn64, fxn32) \
114 if (ins->dest_type == nir_type_float64) { \
115 unreachable("TODO: 64-bit"); \
116 } else if (ins->dest_type == nir_type_float32) { \
117 bf32(fxn64); \
118 break; \
119 } else if (ins->dest_type == nir_type_float16) { \
120 bv2f16(fxn32); \
121 break; \
122 }
123
124 #define bint(fxn64, fxn32, fxn16, fxn8) \
125 if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
126 unreachable("TODO: 64-bit"); \
127 } else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
128 bi32(fxn32); \
129 break; \
130 } else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
131 bv2i16(fxn16); \
132 break; \
133 } else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
134 unreachable("TODO: 8-bit"); \
135 }
136
137 #define bpoly(name) \
138 bfloat(bit_f64 ## name, bit_f32 ## name); \
139 bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
140 unreachable("Invalid type");
141
142 #define bit_make_float(name, expr) \
143 static inline double \
144 bit_f64 ## name(double a, double b, double c, double d) \
145 { \
146 return expr; \
147 } \
148 static inline float \
149 bit_f32 ## name(float a, float b, float c, float d) \
150 { \
151 return expr; \
152 } \
153
154 #define bit_make_int(name, expr) \
155 static inline int64_t \
156 bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
157 { \
158 return expr; \
159 } \
160 \
161 static inline int32_t \
162 bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
163 { \
164 return expr; \
165 } \
166 \
167 static inline int16_t \
168 bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
169 { \
170 return expr; \
171 } \
172 \
173 static inline int8_t \
174 bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
175 { \
176 return expr; \
177 } \
178
179 #define bit_make_poly(name, expr) \
180 bit_make_float(name, expr) \
181 bit_make_int(name, expr) \
182
183 bit_make_poly(add, a + b);
184 bit_make_float(fma, (a * b) + c);
185 bit_make_poly(mov, a);
186
187 void
188 bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
189 {
190 /* First, load sources */
191 bit_t srcs[BIR_SRC_COUNT] = { 0 };
192
193 bi_foreach_src(ins, src)
194 srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
195
196 /* Next, do the action of the instruction */
197 bit_t dest = { 0 };
198
199 switch (ins->type) {
200 case BI_ADD:
201 bpoly(add);
202
203 case BI_BRANCH:
204 case BI_CMP:
205 case BI_BITWISE:
206 case BI_CONVERT:
207 case BI_CSEL:
208 unreachable("Unsupported op");
209
210 case BI_FMA: {
211 bfloat(bit_f64fma, bit_f32fma);
212 unreachable("Unknown type");
213 }
214
215 case BI_FREXP:
216 case BI_ISUB:
217 case BI_MINMAX:
218 unreachable("Unsupported op");
219
220 case BI_MOV:
221 bpoly(mov);
222
223 case BI_SHIFT:
224 case BI_STORE_VAR:
225 case BI_SPECIAL: /* _FAST, _TABLE on supported GPUs */
226 case BI_SWIZZLE:
227 case BI_ROUND:
228 unreachable("Unsupported op");
229
230 /* We only interpret vertex shaders */
231 case BI_DISCARD:
232 case BI_LOAD_VAR:
233 case BI_ATEST:
234 case BI_BLEND:
235 unreachable("Fragment op used in interpreter");
236
237 /* Modeling main memory is more than I bargained for */
238 case BI_LOAD_UNIFORM:
239 case BI_LOAD_ATTR:
240 case BI_LOAD_VAR_ADDRESS:
241 case BI_LOAD:
242 case BI_STORE:
243 case BI_TEX:
244 unreachable("Unsupported I/O in interpreter");
245
246 default:
247 unreachable("Unsupported op");
248 }
249
250 /* Finally, store the result */
251 bit_write(s, ins->dest, ins->dest_type, dest, FMA);
252
253 /* For ADD - change out the passthrough */
254 if (!FMA) {
255 s->T0 = s->T;
256 s->T1 = dest.u32;
257 }
258 }
259
260 #undef bh
261 #undef bf