src/panfrost/bifrost/test/bi_test_pack.c

   1 /*
   2  * Copyright (C) 2020 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include "bit.h"
  28 #include "bi_print.h"
  29 #include "util/half_float.h"
  30 #include "bifrost/disassemble.h"
  31
  32 /* Instruction packing tests */
  33
  34 static bool
  35 bit_test_single(struct panfrost_device *dev,
  36                 bi_instruction *ins,
  37                 uint32_t input[4],
  38                 bool fma, enum bit_debug debug)
  39 {
  40         /* First, simulate the instruction */
  41         struct bit_state s = { 0 };
  42         memcpy(s.r, input, 16);
  43         bit_step(&s, ins, fma);
  44
  45         /* Next, wrap it up and pack it */
  46
  47         bi_instruction ldubo = {
  48                 .type = BI_LOAD_UNIFORM,
  49                 .src = {
  50                         BIR_INDEX_CONSTANT,
  51                         BIR_INDEX_ZERO
  52                 },
  53                 .src_types = {
  54                         nir_type_uint32,
  55                         nir_type_uint32,
  56                 },
  57                 .dest = BIR_INDEX_REGISTER | 0,
  58                 .dest_type = nir_type_uint32,
  59                 .writemask = 0xFFFF
  60         };
  61
  62         bi_instruction ldva = {
  63                 .type = BI_LOAD_VAR_ADDRESS,
  64                 .writemask = (1 << 12) - 1,
  65                 .dest = BIR_INDEX_REGISTER | 32,
  66                 .dest_type = nir_type_uint32,
  67                 .src = {
  68                         BIR_INDEX_CONSTANT,
  69                         BIR_INDEX_REGISTER | 61,
  70                         BIR_INDEX_REGISTER | 62,
  71                         0,
  72                 },
  73                 .src_types = {
  74                         nir_type_uint32,
  75                         nir_type_uint32,
  76                         nir_type_uint32,
  77                         nir_type_uint32,
  78                 }
  79         };
  80
  81         bi_instruction st = {
  82                 .type = BI_STORE_VAR,
  83                 .src = {
  84                         BIR_INDEX_REGISTER | 0,
  85                         ldva.dest, ldva.dest + 1, ldva.dest + 2,
  86                 },
  87                 .src_types = {
  88                         nir_type_uint32,
  89                         nir_type_uint32, nir_type_uint32, nir_type_uint32,
  90                 },
  91                 .store_channels = 4
  92         };
  93
  94         bi_context *ctx = rzalloc(NULL, bi_context);
  95         ctx->stage = MESA_SHADER_VERTEX;
  96
  97         bi_block *blk = rzalloc(ctx, bi_block);
  98         blk->scheduled = true;
  99
 100         blk->base.predecessors = _mesa_set_create(blk,
 101                         _mesa_hash_pointer,
 102                         _mesa_key_pointer_equal);
 103
 104         list_inithead(&ctx->blocks);
 105         list_addtail(&blk->base.link, &ctx->blocks);
 106         list_inithead(&blk->clauses);
 107
 108         bi_clause *clauses[4] = {
 109                 rzalloc(ctx, bi_clause),
 110                 rzalloc(ctx, bi_clause),
 111                 rzalloc(ctx, bi_clause),
 112                 rzalloc(ctx, bi_clause)
 113         };
 114
 115         for (unsigned i = 0; i < 4; ++i) {
 116                 clauses[i]->bundle_count = 1;
 117                 list_addtail(&clauses[i]->link, &blk->clauses);
 118                 clauses[i]->scoreboard_id = (i & 1);
 119
 120                 if (i) {
 121                         clauses[i]->dependencies = 1 << (~i & 1);
 122                         clauses[i]->data_register_write_barrier = true;
 123                 }
 124         }
 125
 126         clauses[0]->bundles[0].add = &ldubo;
 127         clauses[0]->clause_type = BIFROST_CLAUSE_UBO;
 128
 129         if (fma)
 130                 clauses[1]->bundles[0].fma = ins;
 131         else
 132                 clauses[1]->bundles[0].add = ins;
 133
 134         clauses[0]->constant_count = 1;
 135         clauses[1]->constant_count = 1;
 136         clauses[1]->constants[0] = ins->constant.u64;
 137
 138         clauses[2]->bundles[0].add = &ldva;
 139         clauses[3]->bundles[0].add = &st;
 140
 141         clauses[2]->clause_type = BIFROST_CLAUSE_UBO;
 142         clauses[3]->clause_type = BIFROST_CLAUSE_SSBO_STORE;
 143
 144         panfrost_program prog;
 145         bi_pack(ctx, &prog.compiled);
 146
 147         bool succ = bit_vertex(dev, prog, input, 16, NULL, 0,
 148                         s.r, 16, debug);
 149
 150         if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
 151                 bi_print_shader(ctx, stderr);
 152                 disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
 153         }
 154
 155         return succ;
 156 }
 157
 158 /* Utilities for generating tests */
 159
 160 static void
 161 bit_generate_vector(uint32_t *mem)
 162 {
 163         for (unsigned i = 0; i < 4; ++i)
 164                 mem[i] = rand();
 165 }
 166
 167 /* Tests all 64 combinations of floating point modifiers for a given
 168  * instruction / floating-type / test type */
 169
 170 static void
 171 bit_fmod_helper(struct panfrost_device *dev,
 172                 enum bi_class c, unsigned size, bool fma,
 173                 uint32_t *input, enum bit_debug debug)
 174 {
 175         nir_alu_type T = nir_type_float | size;
 176
 177         bi_instruction ins = {
 178                 .type = c,
 179                 .src = {
 180                         BIR_INDEX_REGISTER | 0,
 181                         BIR_INDEX_REGISTER | 1,
 182                 },
 183                 .src_types = { T, T },
 184                 .dest = BIR_INDEX_REGISTER | 2,
 185                 .dest_type = T,
 186         };
 187
 188         for (unsigned outmod = 0; outmod < 4; ++outmod) {
 189                 for (unsigned inmod = 0; inmod < 16; ++inmod) {
 190                         ins.outmod = outmod;
 191                         ins.src_abs[0] = (inmod & 0x1);
 192                         ins.src_abs[1] = (inmod & 0x2);
 193                         ins.src_neg[0] = (inmod & 0x4);
 194                         ins.src_neg[1] = (inmod & 0x8);
 195
 196                         /* Skip over tests that cannot run on FMA */
 197                         if (fma && (size == 16) && ins.src_abs[0] && ins.src_abs[1])
 198                                 continue;
 199
 200                         if (!bit_test_single(dev, &ins, input, fma, debug)) {
 201                                 fprintf(stderr, "FAIL: fmod.%s%u.%s%s.%u\n",
 202                                                 bi_class_name(c),
 203                                                 size,
 204                                                 fma ? "fma" : "add",
 205                                                 outmod ? bi_output_mod_name(outmod) : ".none",
 206                                                 inmod);
 207                         }
 208                 }
 209         }
 210 }
 211
 212 static void
 213 bit_fma_helper(struct panfrost_device *dev,
 214                 unsigned size, uint32_t *input, enum bit_debug debug)
 215 {
 216         nir_alu_type T = nir_type_float | size;
 217
 218         bi_instruction ins = {
 219                 .type = BI_FMA,
 220                 .src = {
 221                         BIR_INDEX_REGISTER | 0,
 222                         BIR_INDEX_REGISTER | 1,
 223                         BIR_INDEX_REGISTER | 2,
 224                 },
 225                 .src_types = { T, T, T },
 226                 .dest = BIR_INDEX_REGISTER | 3,
 227                 .dest_type = T,
 228         };
 229
 230         for (unsigned outmod = 0; outmod < 4; ++outmod) {
 231                 for (unsigned inmod = 0; inmod < 8; ++inmod) {
 232                         ins.outmod = outmod;
 233                         ins.src_neg[0] = (inmod & 0x1);
 234                         ins.src_neg[1] = (inmod & 0x2);
 235                         ins.src_neg[2] = (inmod & 0x4);
 236
 237                         if (!bit_test_single(dev, &ins, input, true, debug)) {
 238                                 fprintf(stderr, "FAIL: fma%u%s.%u\n",
 239                                                 size,
 240                                                 outmod ? bi_output_mod_name(outmod) : ".none",
 241                                                 inmod);
 242                         }
 243                 }
 244         }
 245 }
 246
 247
 248
 249 void
 250 bit_fmod(struct panfrost_device *dev, enum bit_debug debug)
 251 {
 252         float input32[4] = { 0.8, 1.7, 0.0, 0.0 };
 253
 254         uint32_t input16[4] = {
 255                 _mesa_float_to_half(input32[0]) | (_mesa_float_to_half(-1.2) << 16),
 256                 _mesa_float_to_half(input32[1]) | (_mesa_float_to_half(0.9) << 16),
 257                 0, 0
 258         };
 259
 260         for (unsigned sz = 16; sz <= 32; sz *= 2) {
 261                 uint32_t *input =
 262                         (sz == 16) ? input16 :
 263                         (uint32_t *) input32;
 264
 265                 bit_fmod_helper(dev, BI_ADD, sz, true, input, debug);
 266         }
 267 }
 268
 269 void
 270 bit_fma(struct panfrost_device *dev, enum bit_debug debug)
 271 {
 272         float input32[4] = { 0.2, 1.6, -3.5, 0.0 };
 273
 274         uint32_t input16[4] = {
 275                 _mesa_float_to_half(input32[0]) | (_mesa_float_to_half(-1.8) << 16),
 276                 _mesa_float_to_half(input32[1]) | (_mesa_float_to_half(0.6) << 16),
 277                 _mesa_float_to_half(input32[1]) | (_mesa_float_to_half(16.2) << 16),
 278                 0
 279         };
 280
 281         for (unsigned sz = 16; sz <= 32; sz *= 2) {
 282                 uint32_t *input =
 283                         (sz == 16) ? input16 :
 284                         (uint32_t *) input32;
 285
 286                 bit_fma_helper(dev, sz, input, debug);
 287         }
 288 }