src/panfrost/bifrost/test/bi_interpret.c

   1 /*
   2  * Copyright (C) 2020 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include <math.h>
  28 #include "bit.h"
  29 #include "util/half_float.h"
  30
  31 typedef union {
  32         uint64_t u64;
  33         uint32_t u32;
  34         uint16_t u16[2];
  35         uint8_t u8[4];
  36         int64_t i64;
  37         int32_t i32;
  38         int16_t i16[2];
  39         int8_t i8[4];
  40         double f64;
  41         float f32;
  42         uint16_t f16[2];
  43 } bit_t;
  44
  45 /* Interprets a subset of Bifrost IR required for automated testing */
  46
  47 static uint64_t
  48 bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
  49 {
  50         if (index & BIR_INDEX_REGISTER) {
  51                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
  52                 assert(reg < 64);
  53                 return s->r[reg];
  54         } else if (index & BIR_INDEX_UNIFORM) {
  55                 unreachable("Uniform registers to be implemented");
  56         } else if (index & BIR_INDEX_CONSTANT) {
  57                 return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
  58         } else if (index & BIR_INDEX_ZERO) {
  59                 return 0;
  60         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
  61                 return FMA ? 0 : s->T;
  62         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
  63                 return s->T0;
  64         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
  65                 return s->T1;
  66         } else if (!index) {
  67                 /* Placeholder */
  68                 return 0;
  69         } else {
  70                 unreachable("Invalid source");
  71         }
  72 }
  73
  74 static void
  75 bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
  76 {
  77         /* Always write stage passthrough */
  78         if (FMA)
  79                 s->T = value.u32;
  80
  81         if (index & BIR_INDEX_REGISTER) {
  82                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
  83                 assert(reg < 64);
  84                 s->r[reg] = value.u32;
  85         } else if (!index) {
  86                 /* Nothing to do */
  87         } else {
  88                 unreachable("Invalid destination");
  89         }
  90 }
  91
  92 #define bh _mesa_float_to_half
  93 #define bf _mesa_half_to_float
  94
  95 #define bv2f16(fxn) \
  96         for (unsigned c = 0; c < 2; ++c) { \
  97                 dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
  98                                         bf(srcs[1].f16[ins->swizzle[1][c]]), \
  99                                         bf(srcs[2].f16[ins->swizzle[2][c]]), \
 100                                         bf(srcs[3].f16[ins->swizzle[3][c]]))); \
 101         }
 102
 103 #define bv2i16(fxn) \
 104         for (unsigned c = 0; c < 2; ++c) { \
 105                 dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
 106                                         srcs[1].u16[ins->swizzle[1][c]], \
 107                                         srcs[2].u16[ins->swizzle[2][c]], \
 108                                         srcs[3].u16[ins->swizzle[3][c]]); \
 109         }
 110
 111 #define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
 112 #define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
 113
 114 #define bfloat(fxn64, fxn32) \
 115         if (ins->dest_type == nir_type_float64) { \
 116                 unreachable("TODO: 64-bit"); \
 117         } else if (ins->dest_type == nir_type_float32) { \
 118                 bf32(fxn64); \
 119                 break; \
 120         } else if (ins->dest_type == nir_type_float16) { \
 121                 bv2f16(fxn32); \
 122                 break; \
 123         }
 124
 125 #define bint(fxn64, fxn32, fxn16, fxn8) \
 126         if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
 127                 unreachable("TODO: 64-bit"); \
 128         } else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
 129                 bi32(fxn32); \
 130                 break; \
 131         } else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
 132                 bv2i16(fxn16); \
 133                 break; \
 134         } else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
 135                 unreachable("TODO: 8-bit"); \
 136         }
 137
 138 #define bpoly(name) \
 139         bfloat(bit_f64 ## name, bit_f32 ## name); \
 140         bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
 141         unreachable("Invalid type");
 142
 143 #define bit_make_float(name, expr) \
 144         static inline double \
 145         bit_f64 ## name(double a, double b, double c, double d) \
 146         { \
 147                 return expr; \
 148         } \
 149         static inline float \
 150         bit_f32 ## name(float a, float b, float c, float d) \
 151         { \
 152                 return expr; \
 153         } \
 154
 155 #define bit_make_int(name, expr) \
 156         static inline int64_t \
 157         bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
 158         { \
 159                 return expr; \
 160         } \
 161         \
 162         static inline int32_t \
 163         bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
 164         { \
 165                 return expr; \
 166         } \
 167         \
 168         static inline int16_t \
 169         bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
 170         { \
 171                 return expr; \
 172         } \
 173         \
 174         static inline int8_t \
 175         bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
 176         { \
 177                 return expr; \
 178         } \
 179
 180 #define bit_make_poly(name, expr) \
 181         bit_make_float(name, expr) \
 182         bit_make_int(name, expr) \
 183
 184 bit_make_poly(add, a + b);
 185 bit_make_float(fma, (a * b) + c);
 186 bit_make_poly(mov, a);
 187
 188 /* Modifiers */
 189
 190 static float
 191 bit_outmod(float raw, enum bifrost_outmod mod)
 192 {
 193         switch (mod) {
 194         case BIFROST_POS:
 195                 return MAX2(raw, 0.0);
 196         case BIFROST_SAT_SIGNED:
 197                 return CLAMP(raw, -1.0, 1.0);
 198         case BIFROST_SAT:
 199                 return CLAMP(raw, 0.0, 1.0);
 200         default:
 201                 return raw;
 202         }
 203 }
 204
 205 static float
 206 bit_srcmod(float raw, bool abs, bool neg)
 207 {
 208         if (abs)
 209                 raw = fabs(raw);
 210
 211         if (neg)
 212                 raw = -raw;
 213
 214         return raw;
 215 }
 216
 217 #define BIT_COND(cond, left, right) \
 218         if (cond == BI_COND_LT) return left < right; \
 219         else if (cond == BI_COND_LE) return left <= right; \
 220         else if (cond == BI_COND_GE) return left >= right; \
 221         else if (cond == BI_COND_GT) return left > right; \
 222         else if (cond == BI_COND_EQ) return left == right; \
 223         else if (cond == BI_COND_NE) return left != right; \
 224         else { return true; }
 225
 226 static bool
 227 bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned c)
 228 {
 229         if (T == nir_type_float32) {
 230                 BIT_COND(cond, l.f32, r.f32);
 231         } else if (T == nir_type_float16) {
 232                 float left = bf(l.f16[c]);
 233                 float right = bf(r.f16[c]);
 234                 BIT_COND(cond, left, right);
 235         } else if (T == nir_type_int32) {
 236                 int32_t left = (int32_t) l.u32;
 237                 int32_t right = (int32_t) r.u32;
 238                 BIT_COND(cond, left, right);
 239         } else if (T == nir_type_int16) {
 240                 int16_t left = (int16_t) l.u32;
 241                 int16_t right = (int16_t) r.u32;
 242                 BIT_COND(cond, left, right);
 243         } else if (T == nir_type_uint32) {
 244                 BIT_COND(cond, l.u32, r.u32);
 245         } else if (T == nir_type_uint16) {
 246                 BIT_COND(cond, l.u16[c], r.u16[c]);
 247         } else {
 248                 unreachable("Unknown type evaluated");
 249         }
 250 }
 251
 252 static float
 253 biti_special(float Q, enum bi_special_op op)
 254 {
 255         switch (op) {
 256         case BI_SPECIAL_FRCP: return 1.0 / Q;
 257         case BI_SPECIAL_FRSQ: {
 258               double Qf = 1.0 / sqrt(Q);
 259               return Qf;
 260         }
 261         default: unreachable("Invalid special");
 262         }
 263 }
 264
 265 void
 266 bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
 267 {
 268         /* First, load sources */
 269         bit_t srcs[BIR_SRC_COUNT] = { 0 };
 270
 271         bi_foreach_src(ins, src)
 272                 srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
 273
 274         /* Apply source modifiers if we need to */
 275         if (bi_has_source_mods(ins)) {
 276                 bi_foreach_src(ins, src) {
 277                         if (ins->src_types[src] == nir_type_float16) {
 278                                 for (unsigned c = 0; c < 2; ++c) {
 279                                         srcs[src].f16[c] = bh(bit_srcmod(bf(srcs[src].f16[c]),
 280                                                         ins->src_abs[src],
 281                                                         ins->src_neg[src]));
 282                                 }
 283                         } else if (ins->src_types[src] == nir_type_float32) {
 284                                 srcs[src].f32 = bit_srcmod(srcs[src].f32,
 285                                                         ins->src_abs[src],
 286                                                         ins->src_neg[src]);
 287                         }
 288                 }
 289         }
 290
 291         /* Next, do the action of the instruction */
 292         bit_t dest = { 0 };
 293
 294         switch (ins->type) {
 295         case BI_ADD:
 296                 bpoly(add);
 297
 298         case BI_BRANCH:
 299         case BI_CMP:
 300         case BI_BITWISE:
 301         case BI_CONVERT:
 302                 unreachable("Unsupported op");
 303
 304         case BI_CSEL: {
 305                 bool direct = ins->csel_cond == BI_COND_ALWAYS;
 306                 bool cond = direct ? srcs[0].u32 :
 307                         bit_eval_cond(ins->csel_cond, srcs[0], srcs[1], ins->src_types[0], 0);
 308
 309                 dest = cond ? srcs[2] : srcs[3];
 310                 break;
 311         }
 312
 313         case BI_FMA: {
 314                 bfloat(bit_f64fma, bit_f32fma);
 315                 unreachable("Unknown type");
 316         }
 317
 318         case BI_FREXP:
 319         case BI_ISUB:
 320         case BI_MINMAX:
 321                 unreachable("Unsupported op");
 322
 323         case BI_MOV:
 324                 bpoly(mov);
 325
 326         case BI_SPECIAL: {
 327                 assert(nir_alu_type_get_base_type(ins->dest_type) == nir_type_float);
 328                 assert(nir_alu_type_get_base_type(ins->dest_type) != nir_type_float64);
 329                 float Q = (ins->dest_type == nir_type_float16) ?
 330                         bf(srcs[0].u16[ins->swizzle[0][0]]) :
 331                         srcs[0].f32;
 332
 333                 float R = biti_special(Q, ins->op.special);
 334
 335                 if (ins->dest_type == nir_type_float16) {
 336                         dest.f16[0] = bh(R);
 337
 338                         if (!ins->swizzle[0][0] && ins->op.special == BI_SPECIAL_FRSQ) {
 339                                 /* Sorry. */
 340                                 dest.f16[0]++;
 341                         }
 342                 } else {
 343                         dest.f32 = R;
 344                 }
 345                 break;
 346         }
 347
 348         case BI_SHIFT:
 349         case BI_SWIZZLE:
 350         case BI_ROUND:
 351                 unreachable("Unsupported op");
 352
 353         /* We only interpret vertex shaders */
 354         case BI_DISCARD:
 355         case BI_LOAD_VAR:
 356         case BI_ATEST:
 357         case BI_BLEND:
 358                 unreachable("Fragment op used in interpreter");
 359
 360         /* Modeling main memory is more than I bargained for */
 361         case BI_LOAD_UNIFORM:
 362         case BI_LOAD_ATTR:
 363         case BI_LOAD_VAR_ADDRESS:
 364         case BI_LOAD:
 365         case BI_STORE:
 366         case BI_STORE_VAR:
 367         case BI_TEX:
 368                 unreachable("Unsupported I/O in interpreter");
 369
 370         default:
 371                 unreachable("Unsupported op");
 372         }
 373
 374         /* Apply outmod */
 375         if (bi_has_outmod(ins) && ins->outmod != BIFROST_NONE) {
 376                 if (ins->dest_type == nir_type_float16) {
 377                         for (unsigned c = 0; c < 2; ++c)
 378                                 dest.f16[c] = bh(bit_outmod(bf(dest.f16[c]), ins->outmod));
 379                 } else {
 380                         dest.f32 = bit_outmod(dest.f32, ins->outmod);
 381                 }
 382         }
 383
 384         /* Finally, store the result */
 385         bit_write(s, ins->dest, ins->dest_type, dest, FMA);
 386
 387         /* For ADD - change out the passthrough */
 388         if (!FMA) {
 389                 s->T0 = s->T;
 390                 s->T1 = dest.u32;
 391         }
 392 }
 393
 394 #undef bh
 395 #undef bf