src/broadcom/compiler/v3d_nir_lower_io.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "compiler/v3d_compiler.h"
  25 #include "compiler/nir/nir_builder.h"
  26
  27 /**
  28  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
  29  * intrinsics into something amenable to the V3D architecture.
  30  *
  31  * Most of the work is turning the VS's store_output intrinsics from working
  32  * on a base representing the gallium-level vec4 driver_location to an offset
  33  * within the VPM, and emitting the header that's read by the fixed function
  34  * hardware between the VS and FS.
  35  *
  36  * We also adjust the offsets on uniform loads to be in bytes, since that's
  37  * what we need for indirect addressing with general TMU access.
  38  */
  39
  40 struct v3d_nir_lower_io_state {
  41         int pos_vpm_offset;
  42         int vp_vpm_offset;
  43         int zs_vpm_offset;
  44         int rcp_wc_vpm_offset;
  45         int psiz_vpm_offset;
  46         int varyings_vpm_offset;
  47
  48         /* Geometry shader state */
  49         struct {
  50                 /* VPM offset for the current vertex data output */
  51                 nir_variable *output_offset_var;
  52                 /* VPM offset for the current vertex header */
  53                 nir_variable *header_offset_var;
  54                 /* VPM header for the current vertex */
  55                 nir_variable *header_var;
  56
  57                 /* Size of the complete VPM output header */
  58                 uint32_t output_header_size;
  59                 /* Size of the output data for a single vertex */
  60                 uint32_t output_vertex_data_size;
  61         } gs;
  62
  63         BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
  64
  65         nir_ssa_def *pos[4];
  66 };
  67
  68 static void
  69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
  70                             struct v3d_nir_lower_io_state *state);
  71
  72 static void
  73 v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
  74                      nir_ssa_def *chan)
  75 {
  76         nir_intrinsic_instr *intr =
  77                 nir_intrinsic_instr_create(b->shader,
  78                                            nir_intrinsic_store_output);
  79         nir_ssa_dest_init(&intr->instr, &intr->dest,
  80                           1, intr->dest.ssa.bit_size, NULL);
  81         intr->num_components = 1;
  82
  83         intr->src[0] = nir_src_for_ssa(chan);
  84         if (offset) {
  85                 /* When generating the VIR instruction, the base and the offset
  86                  * are just going to get added together with an ADD instruction
  87                  * so we might as well do the add here at the NIR level instead
  88                  * and let the constant folding do its magic.
  89                  */
  90                 intr->src[1] = nir_src_for_ssa(nir_iadd_imm(b, offset, base));
  91                 base = 0;
  92         } else {
  93                 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
  94         }
  95
  96         nir_intrinsic_set_base(intr, base);
  97         nir_intrinsic_set_write_mask(intr, 0x1);
  98         nir_intrinsic_set_component(intr, 0);
  99
 100         nir_builder_instr_insert(b, &intr->instr);
 101 }
 102
 103 /* Convert the uniform offset to bytes.  If it happens to be a constant,
 104  * constant-folding will clean up the shift for us.
 105  */
 106 static void
 107 v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
 108                       nir_intrinsic_instr *intr)
 109 {
 110         b->cursor = nir_before_instr(&intr->instr);
 111
 112         nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
 113
 114         nir_instr_rewrite_src(&intr->instr,
 115                               &intr->src[0],
 116                               nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
 117                                                        nir_imm_int(b, 4))));
 118 }
 119
 120 static int
 121 v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
 122 {
 123         int component = var->data.location_frac + chan;
 124
 125         uint32_t num_used_outputs = 0;
 126         struct v3d_varying_slot *used_outputs = NULL;
 127         switch (c->s->info.stage) {
 128         case MESA_SHADER_VERTEX:
 129                 num_used_outputs = c->vs_key->num_used_outputs;
 130                 used_outputs = c->vs_key->used_outputs;
 131                 break;
 132         case MESA_SHADER_GEOMETRY:
 133                 num_used_outputs = c->gs_key->num_used_outputs;
 134                 used_outputs = c->gs_key->used_outputs;
 135                 break;
 136         default:
 137                 unreachable("Unsupported shader stage");
 138         }
 139
 140         for (int i = 0; i < num_used_outputs; i++) {
 141                 struct v3d_varying_slot slot = used_outputs[i];
 142
 143                 if (v3d_slot_get_slot(slot) == var->data.location &&
 144                     v3d_slot_get_component(slot) == component) {
 145                         return i;
 146                 }
 147         }
 148
 149         return -1;
 150 }
 151
 152 /* Lowers a store_output(gallium driver location) to a series of store_outputs
 153  * with a driver_location equal to the offset in the VPM.
 154  *
 155  * For geometry shaders we need to emit multiple vertices so the VPM offsets
 156  * need to be computed in the shader code based on the current vertex index.
 157  */
 158 static void
 159 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
 160                          nir_intrinsic_instr *intr,
 161                          struct v3d_nir_lower_io_state *state)
 162 {
 163         b->cursor = nir_before_instr(&intr->instr);
 164
 165         /* If this is a geometry shader we need to emit our outputs
 166          * to the current vertex offset in the VPM.
 167          */
 168         nir_ssa_def *offset_reg =
 169                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 170                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 171
 172         int start_comp = nir_intrinsic_component(intr);
 173         nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
 174                                            intr->num_components);
 175
 176         nir_variable *var = NULL;
 177         nir_foreach_variable(scan_var, &c->s->outputs) {
 178                 if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
 179                     start_comp < scan_var->data.location_frac ||
 180                     start_comp >= scan_var->data.location_frac +
 181                     glsl_get_components(scan_var->type)) {
 182                         continue;
 183                 }
 184                 var = scan_var;
 185         }
 186         assert(var);
 187
 188         /* Save off the components of the position for the setup of VPM inputs
 189          * read by fixed function HW.
 190          */
 191         if (var->data.location == VARYING_SLOT_POS) {
 192                 for (int i = 0; i < intr->num_components; i++) {
 193                         state->pos[start_comp + i] = nir_channel(b, src, i);
 194                 }
 195         }
 196
 197         /* Just psiz to the position in the FF header right now. */
 198         if (var->data.location == VARYING_SLOT_PSIZ &&
 199             state->psiz_vpm_offset != -1) {
 200                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
 201         }
 202
 203         if (var->data.location == VARYING_SLOT_LAYER) {
 204                 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
 205                 nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 206                 header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
 207
 208                 /* From the GLES 3.2 spec:
 209                  *
 210                  *    "When fragments are written to a layered framebuffer, the
 211                  *     fragment’s layer number selects an image from the array
 212                  *     of images at each attachment (...). If the fragment’s
 213                  *     layer number is negative, or greater than or equal to
 214                  *     the minimum number of layers of any attachment, the
 215                  *     effects of the fragment on the framebuffer contents are
 216                  *     undefined."
 217                  *
 218                  * This suggests we can just ignore that situation, however,
 219                  * for V3D an out-of-bounds layer index means that the binner
 220                  * might do out-of-bounds writes access to the tile state. The
 221                  * simulator has an assert to catch this, so we play safe here
 222                  * and we make sure that doesn't happen by setting gl_Layer
 223                  * to 0 in that case (we always allocate tile state for at
 224                  * least one layer).
 225                  */
 226                 nir_intrinsic_instr *load =
 227                         nir_intrinsic_instr_create(b->shader,
 228                                                    nir_intrinsic_load_fb_layers_v3d);
 229                 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
 230                 nir_builder_instr_insert(b, &load->instr);
 231                 nir_ssa_def *fb_layers = &load->dest.ssa;
 232
 233                 nir_ssa_def *cond = nir_ige(b, src, fb_layers);
 234                 nir_ssa_def *layer_id =
 235                         nir_bcsel(b, cond,
 236                                   nir_imm_int(b, 0),
 237                                   nir_ishl(b, src, nir_imm_int(b, 16)));
 238                 header = nir_ior(b, header, layer_id);
 239                 nir_store_var(b, state->gs.header_var, header, 0x1);
 240         }
 241
 242         /* Scalarize outputs if it hasn't happened already, since we want to
 243          * schedule each VPM write individually.  We can skip any outut
 244          * components not read by the FS.
 245          */
 246         for (int i = 0; i < intr->num_components; i++) {
 247                 int vpm_offset =
 248                         v3d_varying_slot_vpm_offset(c, var,
 249                                                     i +
 250                                                     start_comp -
 251                                                     var->data.location_frac);
 252
 253                 if (vpm_offset == -1)
 254                         continue;
 255
 256                 BITSET_SET(state->varyings_stored, vpm_offset);
 257
 258                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
 259                                      offset_reg, nir_channel(b, src, i));
 260         }
 261
 262         nir_instr_remove(&intr->instr);
 263 }
 264
 265 static inline void
 266 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
 267 {
 268         const uint8_t NEW_PRIMITIVE_OFFSET = 0;
 269         const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
 270
 271         uint32_t vertex_data_size = state->gs.output_vertex_data_size;
 272         assert((vertex_data_size & 0xffffff00) == 0);
 273
 274         uint32_t header;
 275         header  = 1 << NEW_PRIMITIVE_OFFSET;
 276         header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
 277         nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
 278 }
 279
 280 static void
 281 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
 282                           nir_intrinsic_instr *instr,
 283                           struct v3d_nir_lower_io_state *state)
 284 {
 285         b->cursor = nir_before_instr(&instr->instr);
 286
 287         nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 288         nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
 289         nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
 290
 291         /* Emit fixed function outputs */
 292         v3d_nir_emit_ff_vpm_outputs(c, b, state);
 293
 294         /* Emit vertex header */
 295         v3d_nir_store_output(b, 0, header_offset, header);
 296
 297         /* Update VPM offset for next vertex output data and header */
 298         output_offset =
 299                 nir_iadd(b, output_offset,
 300                             nir_imm_int(b, state->gs.output_vertex_data_size));
 301
 302         header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
 303
 304         /* Reset the New Primitive bit */
 305         header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
 306
 307         nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
 308         nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
 309         nir_store_var(b, state->gs.header_var, header, 0x1);
 310
 311         nir_instr_remove(&instr->instr);
 312 }
 313
 314 static void
 315 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
 316                             nir_intrinsic_instr *instr,
 317                             struct v3d_nir_lower_io_state *state)
 318 {
 319         assert(state->gs.header_var);
 320         b->cursor = nir_before_instr(&instr->instr);
 321         reset_gs_header(b, state);
 322
 323         nir_instr_remove(&instr->instr);
 324 }
 325
 326 static void
 327 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
 328                        struct nir_instr *instr,
 329                        struct v3d_nir_lower_io_state *state)
 330 {
 331         if (instr->type != nir_instr_type_intrinsic)
 332                 return;
 333         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 334
 335         switch (intr->intrinsic) {
 336         case nir_intrinsic_load_uniform:
 337                 v3d_nir_lower_uniform(c, b, intr);
 338                 break;
 339
 340         case nir_intrinsic_store_output:
 341                 if (c->s->info.stage == MESA_SHADER_VERTEX ||
 342                     c->s->info.stage == MESA_SHADER_GEOMETRY) {
 343                         v3d_nir_lower_vpm_output(c, b, intr, state);
 344                 }
 345                 break;
 346
 347         case nir_intrinsic_emit_vertex:
 348                 v3d_nir_lower_emit_vertex(c, b, intr, state);
 349                 break;
 350
 351         case nir_intrinsic_end_primitive:
 352                 v3d_nir_lower_end_primitive(c, b, intr, state);
 353                 break;
 354
 355         default:
 356                 break;
 357         }
 358 }
 359
 360 /* Remap the output var's .driver_location.  This is purely for
 361  * nir_print_shader() so that store_output can map back to a variable name.
 362  */
 363 static void
 364 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
 365                                         struct v3d_nir_lower_io_state *state)
 366 {
 367         nir_foreach_variable_safe(var, &c->s->outputs) {
 368                 if (var->data.location == VARYING_SLOT_POS &&
 369                     state->pos_vpm_offset != -1) {
 370                         var->data.driver_location = state->pos_vpm_offset;
 371                         continue;
 372                 }
 373
 374                 if (var->data.location == VARYING_SLOT_PSIZ &&
 375                     state->psiz_vpm_offset != -1) {
 376                         var->data.driver_location = state->psiz_vpm_offset;
 377                         continue;
 378                 }
 379
 380                 int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0);
 381                 if (vpm_offset != -1) {
 382                         var->data.driver_location =
 383                                 state->varyings_vpm_offset + vpm_offset;
 384                 } else {
 385                         /* If we couldn't find a mapping for the var, delete
 386                          * it so that its old .driver_location doesn't confuse
 387                          * nir_print_shader().
 388                          */
 389                         exec_node_remove(&var->node);
 390                 }
 391         }
 392 }
 393
 394 static void
 395 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
 396                             struct v3d_nir_lower_io_state *state)
 397 {
 398         uint32_t vpm_offset = 0;
 399
 400         state->pos_vpm_offset = -1;
 401         state->vp_vpm_offset = -1;
 402         state->zs_vpm_offset = -1;
 403         state->rcp_wc_vpm_offset = -1;
 404         state->psiz_vpm_offset = -1;
 405
 406         bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
 407         if (needs_ff_outputs) {
 408                 if (c->vs_key->is_coord) {
 409                         state->pos_vpm_offset = vpm_offset;
 410                         vpm_offset += 4;
 411                 }
 412
 413                 state->vp_vpm_offset = vpm_offset;
 414                 vpm_offset += 2;
 415
 416                 if (!c->vs_key->is_coord) {
 417                         state->zs_vpm_offset = vpm_offset++;
 418                         state->rcp_wc_vpm_offset = vpm_offset++;
 419                 }
 420
 421                 if (c->vs_key->per_vertex_point_size)
 422                         state->psiz_vpm_offset = vpm_offset++;
 423         }
 424
 425         state->varyings_vpm_offset = vpm_offset;
 426
 427         c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
 428 }
 429
 430 static void
 431 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
 432                             struct v3d_nir_lower_io_state *state)
 433 {
 434         /* 1 header slot for number of output vertices */
 435         uint32_t vpm_offset = 1;
 436
 437         /* 1 header slot per output vertex */
 438         const uint32_t num_vertices = c->s->info.gs.vertices_out;
 439         vpm_offset += num_vertices;
 440
 441         state->gs.output_header_size = vpm_offset;
 442
 443         /* Vertex data: here we only compute offsets into a generic vertex data
 444          * elements. When it is time to actually write a particular vertex to
 445          * the VPM, we will add the offset for that vertex into the VPM output
 446          * to these offsets.
 447          *
 448          * If geometry shaders are present, they are always the last shader
 449          * stage before rasterization, so we always emit fixed function outputs.
 450          */
 451         vpm_offset = 0;
 452         if (c->gs_key->is_coord) {
 453                 state->pos_vpm_offset = vpm_offset;
 454                 vpm_offset += 4;
 455         } else {
 456                 state->pos_vpm_offset = -1;
 457         }
 458
 459         state->vp_vpm_offset = vpm_offset;
 460         vpm_offset += 2;
 461
 462         if (!c->gs_key->is_coord) {
 463                 state->zs_vpm_offset = vpm_offset++;
 464                 state->rcp_wc_vpm_offset = vpm_offset++;
 465         } else {
 466                 state->zs_vpm_offset = -1;
 467                 state->rcp_wc_vpm_offset = -1;
 468         }
 469
 470         /* Mesa enables OES_geometry_shader_point_size automatically with
 471          * OES_geometry_shader so we always need to handle point size
 472          * writes if present.
 473          */
 474         if (c->gs_key->per_vertex_point_size)
 475                 state->psiz_vpm_offset = vpm_offset++;
 476
 477         state->varyings_vpm_offset = vpm_offset;
 478
 479         state->gs.output_vertex_data_size =
 480                 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
 481
 482         c->vpm_output_size =
 483                 state->gs.output_header_size +
 484                 state->gs.output_vertex_data_size * num_vertices;
 485 }
 486
 487 static void
 488 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
 489                             struct v3d_nir_lower_io_state *state)
 490 {
 491         /* If this is a geometry shader we need to emit our fixed function
 492          * outputs to the current vertex offset in the VPM.
 493          */
 494         nir_ssa_def *offset_reg =
 495                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 496                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 497
 498         for (int i = 0; i < 4; i++) {
 499                 if (!state->pos[i])
 500                         state->pos[i] = nir_ssa_undef(b, 1, 32);
 501         }
 502
 503         nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
 504
 505         if (state->pos_vpm_offset != -1) {
 506                 for (int i = 0; i < 4; i++) {
 507                         v3d_nir_store_output(b, state->pos_vpm_offset + i,
 508                                              offset_reg, state->pos[i]);
 509                 }
 510         }
 511
 512         if (state->vp_vpm_offset != -1) {
 513                 for (int i = 0; i < 2; i++) {
 514                         nir_ssa_def *pos;
 515                         nir_ssa_def *scale;
 516                         pos = state->pos[i];
 517                         if (i == 0)
 518                                 scale = nir_load_viewport_x_scale(b);
 519                         else
 520                                 scale = nir_load_viewport_y_scale(b);
 521                         pos = nir_fmul(b, pos, scale);
 522                         pos = nir_fmul(b, pos, rcp_wc);
 523                         pos = nir_f2i32(b, nir_fround_even(b, pos));
 524                         v3d_nir_store_output(b, state->vp_vpm_offset + i,
 525                                              offset_reg, pos);
 526                 }
 527         }
 528
 529         if (state->zs_vpm_offset != -1) {
 530                 nir_ssa_def *z = state->pos[2];
 531                 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
 532                 z = nir_fmul(b, z, rcp_wc);
 533                 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
 534                 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
 535         }
 536
 537         if (state->rcp_wc_vpm_offset != -1) {
 538                 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
 539                                      offset_reg, rcp_wc);
 540         }
 541
 542         /* Store 0 to varyings requested by the FS but not stored by the
 543          * previous stage. This should be undefined behavior, but
 544          * glsl-routing seems to rely on it.
 545          */
 546         uint32_t num_used_outputs;
 547         switch (c->s->info.stage) {
 548         case MESA_SHADER_VERTEX:
 549                 num_used_outputs = c->vs_key->num_used_outputs;
 550                 break;
 551         case MESA_SHADER_GEOMETRY:
 552                 num_used_outputs = c->gs_key->num_used_outputs;
 553                 break;
 554         default:
 555                 unreachable("Unsupported shader stage");
 556         }
 557
 558         for (int i = 0; i < num_used_outputs; i++) {
 559                 if (!BITSET_TEST(state->varyings_stored, i)) {
 560                         v3d_nir_store_output(b, state->varyings_vpm_offset + i,
 561                                              offset_reg, nir_imm_int(b, 0));
 562                 }
 563         }
 564 }
 565
 566 static void
 567 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
 568                nir_function_impl *impl,
 569                struct v3d_nir_lower_io_state *state)
 570 {
 571         nir_block *first = nir_start_block(impl);
 572         b->cursor = nir_before_block(first);
 573
 574         const struct glsl_type *uint_type = glsl_uint_type();
 575
 576         assert(!state->gs.output_offset_var);
 577         state->gs.output_offset_var =
 578                 nir_local_variable_create(impl, uint_type, "output_offset");
 579         nir_store_var(b, state->gs.output_offset_var,
 580                       nir_imm_int(b, state->gs.output_header_size), 0x1);
 581
 582         assert(!state->gs.header_offset_var);
 583         state->gs.header_offset_var =
 584                 nir_local_variable_create(impl, uint_type, "header_offset");
 585         nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
 586
 587         assert(!state->gs.header_var);
 588         state->gs.header_var =
 589                 nir_local_variable_create(impl, uint_type, "header");
 590         reset_gs_header(b, state);
 591 }
 592
 593 static void
 594 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
 595                                  struct v3d_nir_lower_io_state *state)
 596 {
 597         const uint8_t VERTEX_COUNT_OFFSET = 16;
 598
 599         /* Our GS header has 1 generic header slot (at VPM offset 0) and then
 600          * one slot per output vertex after it. This means we don't need to
 601          * have a variable just to keep track of the number of vertices we
 602          * emitted and instead we can just compute it here from the header
 603          * offset variable by removing the one generic header slot that always
 604          * goes at the begining of out header.
 605          */
 606         nir_ssa_def *header_offset =
 607                 nir_load_var(b, state->gs.header_offset_var);
 608         nir_ssa_def *vertex_count =
 609                 nir_isub(b, header_offset, nir_imm_int(b, 1));
 610         nir_ssa_def *header =
 611                 nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
 612                            nir_ishl(b, vertex_count,
 613                                     nir_imm_int(b, VERTEX_COUNT_OFFSET)));
 614
 615         v3d_nir_store_output(b, 0, NULL, header);
 616 }
 617
 618 void
 619 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
 620 {
 621         struct v3d_nir_lower_io_state state = { 0 };
 622
 623         /* Set up the layout of the VPM outputs. */
 624         switch (s->info.stage) {
 625         case MESA_SHADER_VERTEX:
 626                 v3d_nir_setup_vpm_layout_vs(c, &state);
 627                 break;
 628         case MESA_SHADER_GEOMETRY:
 629                 v3d_nir_setup_vpm_layout_gs(c, &state);
 630                 break;
 631         case MESA_SHADER_FRAGMENT:
 632         case MESA_SHADER_COMPUTE:
 633                 break;
 634         default:
 635                 unreachable("Unsupported shader stage");
 636         }
 637
 638         nir_foreach_function(function, s) {
 639                 if (function->impl) {
 640                         nir_builder b;
 641                         nir_builder_init(&b, function->impl);
 642
 643                         if (c->s->info.stage == MESA_SHADER_GEOMETRY)
 644                                 emit_gs_prolog(c, &b, function->impl, &state);
 645
 646                         nir_foreach_block(block, function->impl) {
 647                                 nir_foreach_instr_safe(instr, block)
 648                                         v3d_nir_lower_io_instr(c, &b, instr,
 649                                                                &state);
 650                         }
 651
 652                         nir_block *last = nir_impl_last_block(function->impl);
 653                         b.cursor = nir_after_block(last);
 654                         if (s->info.stage == MESA_SHADER_VERTEX) {
 655                                 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
 656                         } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
 657                                 emit_gs_vpm_output_header_prolog(c, &b, &state);
 658                         }
 659
 660                         nir_metadata_preserve(function->impl,
 661                                               nir_metadata_block_index |
 662                                               nir_metadata_dominance);
 663                 }
 664         }
 665
 666         if (s->info.stage == MESA_SHADER_VERTEX ||
 667             s->info.stage == MESA_SHADER_GEOMETRY) {
 668                 v3d_nir_lower_io_update_output_var_base(c, &state);
 669         }
 670 }