src/panfrost/bifrost/test/bi_test_pack.c

   1 /*
   2  * Copyright (C) 2020 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include "bit.h"
  28 #include "bi_print.h"
  29 #include "util/half_float.h"
  30 #include "bifrost/disassemble.h"
  31
  32 /* Instruction packing tests */
  33
  34 static bool
  35 bit_test_single(struct panfrost_device *dev,
  36                 bi_instruction *ins,
  37                 uint32_t input[4],
  38                 bool fma, enum bit_debug debug)
  39 {
  40         /* First, simulate the instruction */
  41         struct bit_state s = { 0 };
  42         memcpy(s.r, input, 16);
  43         bit_step(&s, ins, fma);
  44
  45         /* Next, wrap it up and pack it */
  46
  47         bi_instruction ldubo = {
  48                 .type = BI_LOAD_UNIFORM,
  49                 .src = {
  50                         BIR_INDEX_CONSTANT,
  51                         BIR_INDEX_ZERO
  52                 },
  53                 .src_types = {
  54                         nir_type_uint32,
  55                         nir_type_uint32,
  56                 },
  57                 .dest = BIR_INDEX_REGISTER | 0,
  58                 .dest_type = nir_type_uint32,
  59                 .writemask = 0xFFFF
  60         };
  61
  62         bi_instruction ldva = {
  63                 .type = BI_LOAD_VAR_ADDRESS,
  64                 .writemask = (1 << 12) - 1,
  65                 .dest = BIR_INDEX_REGISTER | 32,
  66                 .dest_type = nir_type_uint32,
  67                 .src = {
  68                         BIR_INDEX_CONSTANT,
  69                         BIR_INDEX_REGISTER | 61,
  70                         BIR_INDEX_REGISTER | 62,
  71                         0,
  72                 },
  73                 .src_types = {
  74                         nir_type_uint32,
  75                         nir_type_uint32,
  76                         nir_type_uint32,
  77                         nir_type_uint32,
  78                 }
  79         };
  80
  81         bi_instruction st = {
  82                 .type = BI_STORE_VAR,
  83                 .src = {
  84                         BIR_INDEX_REGISTER | 0,
  85                         ldva.dest, ldva.dest + 1, ldva.dest + 2,
  86                 },
  87                 .src_types = {
  88                         nir_type_uint32,
  89                         nir_type_uint32, nir_type_uint32, nir_type_uint32,
  90                 },
  91                 .store_channels = 4
  92         };
  93
  94         bi_context *ctx = rzalloc(NULL, bi_context);
  95         ctx->stage = MESA_SHADER_VERTEX;
  96
  97         bi_block *blk = rzalloc(ctx, bi_block);
  98         blk->scheduled = true;
  99
 100         blk->base.predecessors = _mesa_set_create(blk,
 101                         _mesa_hash_pointer,
 102                         _mesa_key_pointer_equal);
 103
 104         list_inithead(&ctx->blocks);
 105         list_addtail(&blk->base.link, &ctx->blocks);
 106         list_inithead(&blk->clauses);
 107
 108         bi_clause *clauses[4] = {
 109                 rzalloc(ctx, bi_clause),
 110                 rzalloc(ctx, bi_clause),
 111                 rzalloc(ctx, bi_clause),
 112                 rzalloc(ctx, bi_clause)
 113         };
 114
 115         for (unsigned i = 0; i < 4; ++i) {
 116                 clauses[i]->bundle_count = 1;
 117                 list_addtail(&clauses[i]->link, &blk->clauses);
 118                 clauses[i]->scoreboard_id = (i & 1);
 119
 120                 if (i) {
 121                         clauses[i]->dependencies = 1 << (~i & 1);
 122                         clauses[i]->data_register_write_barrier = true;
 123                 }
 124         }
 125
 126         clauses[0]->bundles[0].add = &ldubo;
 127         clauses[0]->clause_type = BIFROST_CLAUSE_UBO;
 128
 129         if (fma)
 130                 clauses[1]->bundles[0].fma = ins;
 131         else
 132                 clauses[1]->bundles[0].add = ins;
 133
 134         clauses[0]->constant_count = 1;
 135         clauses[1]->constant_count = 1;
 136         clauses[1]->constants[0] = ins->constant.u64;
 137
 138         clauses[2]->bundles[0].add = &ldva;
 139         clauses[3]->bundles[0].add = &st;
 140
 141         clauses[2]->clause_type = BIFROST_CLAUSE_UBO;
 142         clauses[3]->clause_type = BIFROST_CLAUSE_SSBO_STORE;
 143
 144         panfrost_program prog;
 145         bi_pack(ctx, &prog.compiled);
 146
 147         bool succ = bit_vertex(dev, prog, input, 16, NULL, 0,
 148                         s.r, 16, debug);
 149
 150         if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
 151                 bi_print_shader(ctx, stderr);
 152                 disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
 153         }
 154
 155         return succ;
 156 }
 157
 158 /* Utilities for generating tests */
 159
 160 static void
 161 bit_generate_float4(float *mem)
 162 {
 163         for (unsigned i = 0; i < 4; ++i)
 164                 mem[i] = (float) ((rand() & 255) - 127) / 16.0;
 165 }
 166
 167 static void
 168 bit_generate_half8(uint16_t *mem)
 169 {
 170         for (unsigned i = 0; i < 8; ++i)
 171                 mem[i] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
 172 }
 173
 174 static bi_instruction
 175 bit_ins(enum bi_class C, unsigned argc, nir_alu_type base, unsigned size)
 176 {
 177         nir_alu_type T = base | size;
 178
 179         bi_instruction ins = {
 180                 .type = C,
 181                 .dest = BIR_INDEX_REGISTER | 0,
 182                 .dest_type = T,
 183         };
 184
 185         for (unsigned i = 0; i < argc; ++i) {
 186                 ins.src[i] = BIR_INDEX_REGISTER | i;
 187                 ins.src_types[i] = T;
 188         }
 189
 190         return ins;
 191 }
 192
 193 /* Tests all 64 combinations of floating point modifiers for a given
 194  * instruction / floating-type / test type */
 195
 196 static void
 197 bit_fmod_helper(struct panfrost_device *dev,
 198                 enum bi_class c, unsigned size, bool fma,
 199                 uint32_t *input, enum bit_debug debug, unsigned op)
 200 {
 201         bi_instruction ins = bit_ins(c, 2, nir_type_float, size);
 202
 203         for (unsigned outmod = 0; outmod < 4; ++outmod) {
 204                 for (unsigned inmod = 0; inmod < 16; ++inmod) {
 205                         ins.outmod = outmod;
 206                         ins.op.minmax = op;
 207                         ins.src_abs[0] = (inmod & 0x1);
 208                         ins.src_abs[1] = (inmod & 0x2);
 209                         ins.src_neg[0] = (inmod & 0x4);
 210                         ins.src_neg[1] = (inmod & 0x8);
 211
 212                         /* Skip over tests that cannot run on FMA */
 213                         if (fma && (size == 16) && ins.src_abs[0] && ins.src_abs[1])
 214                                 continue;
 215
 216                         if (!bit_test_single(dev, &ins, input, fma, debug)) {
 217                                 fprintf(stderr, "FAIL: fmod.%s%u.%s%s.%u\n",
 218                                                 bi_class_name(c),
 219                                                 size,
 220                                                 fma ? "fma" : "add",
 221                                                 outmod ? bi_output_mod_name(outmod) : ".none",
 222                                                 inmod);
 223                         }
 224                 }
 225         }
 226 }
 227
 228 static void
 229 bit_fma_helper(struct panfrost_device *dev,
 230                 unsigned size, uint32_t *input, enum bit_debug debug)
 231 {
 232         bi_instruction ins = bit_ins(BI_FMA, 3, nir_type_float, size);
 233
 234         for (unsigned outmod = 0; outmod < 4; ++outmod) {
 235                 for (unsigned inmod = 0; inmod < 8; ++inmod) {
 236                         ins.outmod = outmod;
 237                         ins.src_neg[0] = (inmod & 0x1);
 238                         ins.src_neg[1] = (inmod & 0x2);
 239                         ins.src_neg[2] = (inmod & 0x4);
 240
 241                         if (!bit_test_single(dev, &ins, input, true, debug)) {
 242                                 fprintf(stderr, "FAIL: fma%u%s.%u\n",
 243                                                 size,
 244                                                 outmod ? bi_output_mod_name(outmod) : ".none",
 245                                                 inmod);
 246                         }
 247                 }
 248         }
 249 }
 250
 251 static void
 252 bit_fma_mscale_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
 253 {
 254         bi_instruction ins = bit_ins(BI_FMA, 4, nir_type_float, 32);
 255         ins.op.mscale = true;
 256         ins.src_types[3] = nir_type_int32;
 257         ins.src[2] = ins.src[3]; /* Not enough ports! */
 258
 259         for (unsigned outmod = 0; outmod < 4; ++outmod) {
 260                 for (unsigned inmod = 0; inmod < 8; ++inmod) {
 261                         ins.outmod = outmod;
 262                         ins.src_abs[0] = (inmod & 0x1);
 263                         ins.src_neg[1] = (inmod & 0x2);
 264                         ins.src_neg[2] = (inmod & 0x4);
 265
 266                         if (!bit_test_single(dev, &ins, input, true, debug)) {
 267                                 fprintf(stderr, "FAIL: fma_mscale%s.%u\n",
 268                                                 outmod ? bi_output_mod_name(outmod) : ".none",
 269                                                 inmod);
 270                         }
 271                 }
 272         }
 273 }
 274
 275 static void
 276 bit_csel_helper(struct panfrost_device *dev,
 277                 unsigned size, uint32_t *input, enum bit_debug debug)
 278 {
 279         bi_instruction ins = bit_ins(BI_CSEL, 4, nir_type_uint, size);
 280
 281         /* SCHEDULER: We can only read 3 registers at once. */
 282         ins.src[2] = ins.src[0];
 283
 284         for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
 285                 ins.csel_cond = cond;
 286
 287                 if (!bit_test_single(dev, &ins, input, true, debug)) {
 288                         fprintf(stderr, "FAIL: csel%u.%s\n",
 289                                         size, bi_cond_name(cond));
 290                 }
 291         }
 292 }
 293
 294 static void
 295 bit_special_helper(struct panfrost_device *dev,
 296                 unsigned size, uint32_t *input, enum bit_debug debug)
 297 {
 298         bi_instruction ins = bit_ins(BI_SPECIAL, 1, nir_type_float, size);
 299
 300         for (enum bi_special_op op = BI_SPECIAL_FRCP; op <= BI_SPECIAL_FRSQ; ++op) {
 301                 for (unsigned c = 0; c < ((size == 16) ? 2 : 1); ++c) {
 302                         ins.op.special = op;
 303                         ins.swizzle[0][0] = c;
 304
 305                         if (!bit_test_single(dev, &ins, input, false, debug)) {
 306                                 fprintf(stderr, "FAIL: special%u.%s\n",
 307                                                 size, bi_special_op_name(op));
 308                         }
 309                 }
 310         }
 311 }
 312
 313 static void
 314 bit_table_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
 315 {
 316         bi_instruction ins = bit_ins(BI_TABLE, 1, nir_type_float, 32);
 317
 318         for (enum bi_table_op op = 0; op <= BI_TABLE_LOG2_U_OVER_U_1_LOW; ++op) {
 319                 ins.op.table = op;
 320
 321                 if (!bit_test_single(dev, &ins, input, false, debug)) {
 322                         fprintf(stderr, "FAIL: table.%s\n",
 323                                         bi_table_op_name(op));
 324                 }
 325         }
 326 }
 327
 328 static void
 329 bit_frexp_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
 330 {
 331         bi_instruction ins = bit_ins(BI_FREXP, 1, nir_type_float, 32);
 332         ins.dest_type = nir_type_int32;
 333
 334         for (enum bi_frexp_op op = 0; op <= BI_FREXPE_LOG; ++op) {
 335                 ins.op.frexp = op;
 336
 337                 if (!bit_test_single(dev, &ins, input, true, debug)) {
 338                         fprintf(stderr, "FAIL: frexp.%s\n",
 339                                         bi_frexp_op_name(op));
 340                 }
 341         }
 342 }
 343
 344 static void
 345 bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
 346 {
 347         bi_instruction ins = bit_ins(BI_REDUCE_FMA, 2, nir_type_float, 32);
 348
 349         for (enum bi_reduce_op op = 0; op <= BI_REDUCE_ADD_FREXPM; ++op) {
 350                 ins.op.reduce = op;
 351
 352                 if (!bit_test_single(dev, &ins, input, true, debug)) {
 353                         fprintf(stderr, "FAIL: reduce.%s\n",
 354                                         bi_reduce_op_name(op));
 355                 }
 356         }
 357 }
 358
 359 static void
 360 bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
 361                 unsigned to_size, unsigned cx, unsigned cy, bool FMA,
 362                 enum bifrost_roundmode roundmode,
 363                 uint32_t *input, enum bit_debug debug)
 364 {
 365         bi_instruction ins = {
 366                 .type = BI_CONVERT,
 367                 .dest = BIR_INDEX_REGISTER | 0,
 368                 .writemask = 0xF,
 369                 .src = { BIR_INDEX_REGISTER | 0 }
 370         };
 371
 372         nir_alu_type Ts[3] = { nir_type_float, nir_type_uint, nir_type_int };
 373
 374         for (unsigned from_base = 0; from_base < 3; ++from_base) {
 375                 for (unsigned to_base = 0; to_base < 3; ++to_base) {
 376                         /* Discard invalid combinations.. */
 377                         if ((from_size == to_size) && (from_base == to_base))
 378                                 continue;
 379
 380                         /* Can't switch signedness */
 381                         if (from_base && to_base)
 382                                 continue;
 383
 384                         /* No F16_TO_I32, etc */
 385                         if (from_size != to_size && from_base == 0 && to_base)
 386                                 continue;
 387
 388                         if (from_size != to_size && from_base && to_base == 0)
 389                                 continue;
 390
 391                         /* No need, just ignore the upper half */
 392                         if (from_size > to_size && from_base == to_base && from_base)
 393                                 continue;
 394
 395                         ins.dest_type = Ts[to_base] | to_size;
 396                         ins.src_types[0] = Ts[from_base] | from_size;
 397                         ins.roundmode = roundmode;
 398                         ins.swizzle[0][0] = cx;
 399                         ins.swizzle[0][1] = cy;
 400
 401                         if (!bit_test_single(dev, &ins, input, FMA, debug)) {
 402                                 fprintf(stderr, "FAIL: convert.%u-%u.%u-%u.%u%u\n",
 403                                                 from_base, from_size,
 404                                                 to_base, to_size,
 405                                                 cx, cy);
 406                         }
 407                 }
 408         }
 409 }
 410
 411 void
 412 bit_packing(struct panfrost_device *dev, enum bit_debug debug)
 413 {
 414         float input32[4];
 415         uint16_t input16[8];
 416
 417         bit_generate_float4(input32);
 418         bit_generate_half8(input16);
 419
 420         for (unsigned sz = 16; sz <= 32; sz *= 2) {
 421                 uint32_t *input =
 422                         (sz == 16) ? (uint32_t *) input16 :
 423                         (uint32_t *) input32;
 424
 425                 bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
 426
 427                 if (sz == 32) {
 428                         bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
 429                         bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
 430                         bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
 431                 }
 432
 433                 bit_fma_helper(dev, sz, input, debug);
 434         }
 435
 436         for (unsigned sz = 32; sz <= 32; sz *= 2)
 437                 bit_csel_helper(dev, sz, (uint32_t *) input32, debug);
 438
 439         float special[4] = { 0.9 };
 440         uint32_t special16[4] = { _mesa_float_to_half(special[0]) | (_mesa_float_to_half(0.2) << 16) };
 441
 442         bit_table_helper(dev, (uint32_t *) special, debug);
 443
 444         for (unsigned sz = 16; sz <= 32; sz *= 2) {
 445                 uint32_t *input =
 446                         (sz == 16) ? special16 :
 447                         (uint32_t *) special;
 448
 449                 bit_special_helper(dev, sz, input, debug);
 450         }
 451
 452         for (unsigned rm = 0; rm < 4; ++rm) {
 453                 bit_convert_helper(dev, 32, 32, 0, 0, false, rm, (uint32_t *) input32, debug);
 454
 455                 for (unsigned c = 0; c < 2; ++c)
 456                         bit_convert_helper(dev, 32, 16, c, 0, false, rm, (uint32_t *) input32, debug);
 457
 458                 bit_convert_helper(dev, 16, 32, 0, 0, false, rm, (uint32_t *) input16, debug);
 459
 460                 for (unsigned c = 0; c < 4; ++c)
 461                         bit_convert_helper(dev, 16, 16, c & 1, c >> 1, false, rm, (uint32_t *) input16, debug);
 462         }
 463
 464         bit_frexp_helper(dev, (uint32_t *) input32, debug);
 465         bit_reduce_helper(dev, (uint32_t *) input32, debug);
 466
 467         uint32_t mscale_input[4];
 468         memcpy(mscale_input, input32, sizeof(input32));
 469         mscale_input[3] = 0x7;
 470         bit_fma_mscale_helper(dev, mscale_input, debug);
 471 }