src/panfrost/bifrost/test/bi_interpret.c

   1 /*
   2  * Copyright (C) 2020 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include "bit.h"
  28 #include "util/half_float.h"
  29
  30 typedef union {
  31         uint64_t u64;
  32         uint32_t u32;
  33         uint16_t u16[2];
  34         uint8_t u8[4];
  35         int64_t i64;
  36         int32_t i32;
  37         int16_t i16[2];
  38         int8_t i8[4];
  39         double f64;
  40         float f32;
  41         uint16_t f16[2];
  42 } bit_t;
  43
  44 /* Interprets a subset of Bifrost IR required for automated testing */
  45
  46 static uint64_t
  47 bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
  48 {
  49         if (index & BIR_INDEX_REGISTER) {
  50                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
  51                 assert(reg < 64);
  52                 return s->r[reg];
  53         } else if (index & BIR_INDEX_UNIFORM) {
  54                 unreachable("Uniform registers to be implemented");
  55         } else if (index & BIR_INDEX_CONSTANT) {
  56                 return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
  57         } else if (index & BIR_INDEX_ZERO) {
  58                 return 0;
  59         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
  60                 return FMA ? 0 : s->T;
  61         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
  62                 return s->T0;
  63         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
  64                 return s->T1;
  65         } else if (!index) {
  66                 /* Placeholder */
  67                 return 0;
  68         } else {
  69                 unreachable("Invalid source");
  70         }
  71 }
  72
  73 static void
  74 bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
  75 {
  76         /* Always write stage passthrough */
  77         if (FMA)
  78                 s->T = value.u32;
  79
  80         if (index & BIR_INDEX_REGISTER) {
  81                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
  82                 assert(reg < 64);
  83                 s->r[reg] = value.u32;
  84         } else if (!index) {
  85                 /* Nothing to do */
  86         } else {
  87                 unreachable("Invalid destination");
  88         }
  89 }
  90
  91 #define bh _mesa_float_to_half
  92 #define bf _mesa_half_to_float
  93
  94 #define bv2f16(fxn) \
  95         for (unsigned c = 0; c < 2; ++c) { \
  96                 dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
  97                                         bf(srcs[1].f16[ins->swizzle[1][c]]), \
  98                                         bf(srcs[2].f16[ins->swizzle[2][c]]), \
  99                                         bf(srcs[3].f16[ins->swizzle[3][c]]))); \
 100         }
 101
 102 #define bv2i16(fxn) \
 103         for (unsigned c = 0; c < 2; ++c) { \
 104                 dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
 105                                         srcs[1].u16[ins->swizzle[1][c]], \
 106                                         srcs[2].u16[ins->swizzle[2][c]], \
 107                                         srcs[3].u16[ins->swizzle[3][c]]); \
 108         }
 109
 110 #define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
 111 #define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
 112
 113 #define bfloat(fxn64, fxn32) \
 114         if (ins->dest_type == nir_type_float64) { \
 115                 unreachable("TODO: 64-bit"); \
 116         } else if (ins->dest_type == nir_type_float32) { \
 117                 bf32(fxn64); \
 118                 break; \
 119         } else if (ins->dest_type == nir_type_float16) { \
 120                 bv2f16(fxn32); \
 121                 break; \
 122         }
 123
 124 #define bint(fxn64, fxn32, fxn16, fxn8) \
 125         if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
 126                 unreachable("TODO: 64-bit"); \
 127         } else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
 128                 bi32(fxn32); \
 129                 break; \
 130         } else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
 131                 bv2i16(fxn16); \
 132                 break; \
 133         } else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
 134                 unreachable("TODO: 8-bit"); \
 135         }
 136
 137 #define bpoly(name) \
 138         bfloat(bit_f64 ## name, bit_f32 ## name); \
 139         bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
 140         unreachable("Invalid type");
 141
 142 #define bit_make_float(name, expr) \
 143         static inline double \
 144         bit_f64 ## name(double a, double b, double c, double d) \
 145         { \
 146                 return expr; \
 147         } \
 148         static inline float \
 149         bit_f32 ## name(float a, float b, float c, float d) \
 150         { \
 151                 return expr; \
 152         } \
 153
 154 #define bit_make_int(name, expr) \
 155         static inline int64_t \
 156         bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
 157         { \
 158                 return expr; \
 159         } \
 160         \
 161         static inline int32_t \
 162         bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
 163         { \
 164                 return expr; \
 165         } \
 166         \
 167         static inline int16_t \
 168         bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
 169         { \
 170                 return expr; \
 171         } \
 172         \
 173         static inline int8_t \
 174         bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
 175         { \
 176                 return expr; \
 177         } \
 178
 179 #define bit_make_poly(name, expr) \
 180         bit_make_float(name, expr) \
 181         bit_make_int(name, expr) \
 182
 183 bit_make_poly(add, a + b);
 184 bit_make_float(fma, (a * b) + c);
 185 bit_make_poly(mov, a);
 186
 187 void
 188 bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
 189 {
 190         /* First, load sources */
 191         bit_t srcs[BIR_SRC_COUNT] = { 0 };
 192
 193         bi_foreach_src(ins, src)
 194                 srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
 195
 196         /* Next, do the action of the instruction */
 197         bit_t dest = { 0 };
 198
 199         switch (ins->type) {
 200         case BI_ADD:
 201                 bpoly(add);
 202
 203         case BI_BRANCH:
 204         case BI_CMP:
 205         case BI_BITWISE:
 206         case BI_CONVERT:
 207         case BI_CSEL:
 208                 unreachable("Unsupported op");
 209
 210         case BI_FMA: {
 211                 bfloat(bit_f64fma, bit_f32fma);
 212                 unreachable("Unknown type");
 213         }
 214
 215         case BI_FREXP:
 216         case BI_ISUB:
 217         case BI_MINMAX:
 218                 unreachable("Unsupported op");
 219
 220         case BI_MOV:
 221                 bpoly(mov);
 222
 223         case BI_SHIFT:
 224         case BI_STORE_VAR:
 225         case BI_SPECIAL: /* _FAST, _TABLE on supported GPUs */
 226         case BI_SWIZZLE:
 227         case BI_ROUND:
 228                 unreachable("Unsupported op");
 229
 230         /* We only interpret vertex shaders */
 231         case BI_DISCARD:
 232         case BI_LOAD_VAR:
 233         case BI_ATEST:
 234         case BI_BLEND:
 235                 unreachable("Fragment op used in interpreter");
 236
 237         /* Modeling main memory is more than I bargained for */
 238         case BI_LOAD_UNIFORM:
 239         case BI_LOAD_ATTR:
 240         case BI_LOAD_VAR_ADDRESS:
 241         case BI_LOAD:
 242         case BI_STORE:
 243         case BI_TEX:
 244                 unreachable("Unsupported I/O in interpreter");
 245
 246         default:
 247                 unreachable("Unsupported op");
 248         }
 249
 250         /* Finally, store the result */
 251         bit_write(s, ins->dest, ins->dest_type, dest, FMA);
 252
 253         /* For ADD - change out the passthrough */
 254         if (!FMA) {
 255                 s->T0 = s->T;
 256                 s->T1 = dest.u32;
 257         }
 258 }
 259
 260 #undef bh
 261 #undef bf