src/broadcom/compiler/v3d_nir_lower_io.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "compiler/v3d_compiler.h"
  25 #include "compiler/nir/nir_builder.h"
  26
  27 /**
  28  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
  29  * intrinsics into something amenable to the V3D architecture.
  30  *
  31  * Most of the work is turning the VS's store_output intrinsics from working
  32  * on a base representing the gallium-level vec4 driver_location to an offset
  33  * within the VPM, and emitting the header that's read by the fixed function
  34  * hardware between the VS and FS.
  35  *
  36  * We also adjust the offsets on uniform loads to be in bytes, since that's
  37  * what we need for indirect addressing with general TMU access.
  38  */
  39
  40 struct v3d_nir_lower_io_state {
  41         int pos_vpm_offset;
  42         int vp_vpm_offset;
  43         int zs_vpm_offset;
  44         int rcp_wc_vpm_offset;
  45         int psiz_vpm_offset;
  46         int varyings_vpm_offset;
  47
  48         /* Geometry shader state */
  49         struct {
  50                 /* VPM offset for the current vertex data output */
  51                 nir_variable *output_offset_var;
  52                 /* VPM offset for the current vertex header */
  53                 nir_variable *header_offset_var;
  54                 /* VPM header for the current vertex */
  55                 nir_variable *header_var;
  56
  57                 /* Size of the complete VPM output header */
  58                 uint32_t output_header_size;
  59                 /* Size of the output data for a single vertex */
  60                 uint32_t output_vertex_data_size;
  61         } gs;
  62
  63         BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
  64
  65         nir_ssa_def *pos[4];
  66 };
  67
  68 static void
  69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
  70                             struct v3d_nir_lower_io_state *state);
  71
  72 static void
  73 v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
  74                      nir_ssa_def *chan)
  75 {
  76         nir_intrinsic_instr *intr =
  77                 nir_intrinsic_instr_create(b->shader,
  78                                            nir_intrinsic_store_output);
  79         nir_ssa_dest_init(&intr->instr, &intr->dest,
  80                           1, intr->dest.ssa.bit_size, NULL);
  81         intr->num_components = 1;
  82
  83         intr->src[0] = nir_src_for_ssa(chan);
  84         if (offset)
  85                 intr->src[1] = nir_src_for_ssa(offset);
  86         else
  87                 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
  88
  89         nir_intrinsic_set_base(intr, base);
  90         nir_intrinsic_set_write_mask(intr, 0x1);
  91         nir_intrinsic_set_component(intr, 0);
  92
  93         nir_builder_instr_insert(b, &intr->instr);
  94 }
  95
  96 /* Convert the uniform offset to bytes.  If it happens to be a constant,
  97  * constant-folding will clean up the shift for us.
  98  */
  99 static void
 100 v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
 101                       nir_intrinsic_instr *intr)
 102 {
 103         b->cursor = nir_before_instr(&intr->instr);
 104
 105         nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
 106
 107         nir_instr_rewrite_src(&intr->instr,
 108                               &intr->src[0],
 109                               nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
 110                                                        nir_imm_int(b, 4))));
 111 }
 112
 113 static int
 114 v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
 115 {
 116         int component = var->data.location_frac + chan;
 117
 118         uint32_t num_used_outputs = 0;
 119         struct v3d_varying_slot *used_outputs = NULL;
 120         switch (c->s->info.stage) {
 121         case MESA_SHADER_VERTEX:
 122                 num_used_outputs = c->vs_key->num_used_outputs;
 123                 used_outputs = c->vs_key->used_outputs;
 124                 break;
 125         case MESA_SHADER_GEOMETRY:
 126                 num_used_outputs = c->gs_key->num_used_outputs;
 127                 used_outputs = c->gs_key->used_outputs;
 128                 break;
 129         default:
 130                 unreachable("Unsupported shader stage");
 131         }
 132
 133         for (int i = 0; i < num_used_outputs; i++) {
 134                 struct v3d_varying_slot slot = used_outputs[i];
 135
 136                 if (v3d_slot_get_slot(slot) == var->data.location &&
 137                     v3d_slot_get_component(slot) == component) {
 138                         return i;
 139                 }
 140         }
 141
 142         return -1;
 143 }
 144
 145 /* Lowers a store_output(gallium driver location) to a series of store_outputs
 146  * with a driver_location equal to the offset in the VPM.
 147  *
 148  * For geometry shaders we need to emit multiple vertices so the VPM offsets
 149  * need to be computed in the shader code based on the current vertex index.
 150  */
 151 static void
 152 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
 153                          nir_intrinsic_instr *intr,
 154                          struct v3d_nir_lower_io_state *state)
 155 {
 156         b->cursor = nir_before_instr(&intr->instr);
 157
 158         /* If this is a geometry shader we need to emit our outputs
 159          * to the current vertex offset in the VPM.
 160          */
 161         nir_ssa_def *offset_reg =
 162                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 163                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 164
 165         int start_comp = nir_intrinsic_component(intr);
 166         nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
 167                                            intr->num_components);
 168
 169         nir_variable *var = NULL;
 170         nir_foreach_variable(scan_var, &c->s->outputs) {
 171                 if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
 172                     start_comp < scan_var->data.location_frac ||
 173                     start_comp >= scan_var->data.location_frac +
 174                     glsl_get_components(scan_var->type)) {
 175                         continue;
 176                 }
 177                 var = scan_var;
 178         }
 179         assert(var);
 180
 181         /* Save off the components of the position for the setup of VPM inputs
 182          * read by fixed function HW.
 183          */
 184         if (var->data.location == VARYING_SLOT_POS) {
 185                 for (int i = 0; i < intr->num_components; i++) {
 186                         state->pos[start_comp + i] = nir_channel(b, src, i);
 187                 }
 188         }
 189
 190         /* Just psiz to the position in the FF header right now. */
 191         if (var->data.location == VARYING_SLOT_PSIZ &&
 192             state->psiz_vpm_offset != -1) {
 193                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
 194         }
 195
 196         if (var->data.location == VARYING_SLOT_LAYER) {
 197                 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
 198                 nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 199                 header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
 200
 201                 /* From the GLES 3.2 spec:
 202                  *
 203                  *    "When fragments are written to a layered framebuffer, the
 204                  *     fragment’s layer number selects an image from the array
 205                  *     of images at each attachment (...). If the fragment’s
 206                  *     layer number is negative, or greater than or equal to
 207                  *     the minimum number of layers of any attachment, the
 208                  *     effects of the fragment on the framebuffer contents are
 209                  *     undefined."
 210                  *
 211                  * This suggests we can just ignore that situation, however,
 212                  * for V3D an out-of-bounds layer index means that the binner
 213                  * might do out-of-bounds writes access to the tile state. The
 214                  * simulator has an assert to catch this, so we play safe here
 215                  * and we make sure that doesn't happen by setting gl_Layer
 216                  * to 0 in that case (we always allocate tile state for at
 217                  * least one layer).
 218                  */
 219                 nir_intrinsic_instr *load =
 220                         nir_intrinsic_instr_create(b->shader,
 221                                                    nir_intrinsic_load_fb_layers_v3d);
 222                 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
 223                 nir_builder_instr_insert(b, &load->instr);
 224                 nir_ssa_def *fb_layers = &load->dest.ssa;
 225
 226                 nir_ssa_def *cond = nir_ige(b, src, fb_layers);
 227                 nir_ssa_def *layer_id =
 228                         nir_bcsel(b, cond,
 229                                   nir_imm_int(b, 0),
 230                                   nir_ishl(b, src, nir_imm_int(b, 16)));
 231                 header = nir_ior(b, header, layer_id);
 232                 nir_store_var(b, state->gs.header_var, header, 0x1);
 233         }
 234
 235         /* Scalarize outputs if it hasn't happened already, since we want to
 236          * schedule each VPM write individually.  We can skip any outut
 237          * components not read by the FS.
 238          */
 239         for (int i = 0; i < intr->num_components; i++) {
 240                 int vpm_offset =
 241                         v3d_varying_slot_vpm_offset(c, var,
 242                                                     i +
 243                                                     start_comp -
 244                                                     var->data.location_frac);
 245
 246                 if (vpm_offset == -1)
 247                         continue;
 248
 249                 BITSET_SET(state->varyings_stored, vpm_offset);
 250
 251                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
 252                                      offset_reg, nir_channel(b, src, i));
 253         }
 254
 255         nir_instr_remove(&intr->instr);
 256 }
 257
 258 static inline void
 259 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
 260 {
 261         const uint8_t NEW_PRIMITIVE_OFFSET = 0;
 262         const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
 263
 264         uint32_t vertex_data_size = state->gs.output_vertex_data_size;
 265         assert((vertex_data_size & 0xffffff00) == 0);
 266
 267         uint32_t header;
 268         header  = 1 << NEW_PRIMITIVE_OFFSET;
 269         header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
 270         nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
 271 }
 272
 273 static void
 274 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
 275                           nir_intrinsic_instr *instr,
 276                           struct v3d_nir_lower_io_state *state)
 277 {
 278         b->cursor = nir_before_instr(&instr->instr);
 279
 280         nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 281         nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
 282         nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
 283
 284         /* Emit fixed function outputs */
 285         v3d_nir_emit_ff_vpm_outputs(c, b, state);
 286
 287         /* Emit vertex header */
 288         v3d_nir_store_output(b, 0, header_offset, header);
 289
 290         /* Update VPM offset for next vertex output data and header */
 291         output_offset =
 292                 nir_iadd(b, output_offset,
 293                             nir_imm_int(b, state->gs.output_vertex_data_size));
 294
 295         header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
 296
 297         /* Reset the New Primitive bit */
 298         header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
 299
 300         nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
 301         nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
 302         nir_store_var(b, state->gs.header_var, header, 0x1);
 303
 304         nir_instr_remove(&instr->instr);
 305 }
 306
 307 static void
 308 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
 309                             nir_intrinsic_instr *instr,
 310                             struct v3d_nir_lower_io_state *state)
 311 {
 312         assert(state->gs.header_var);
 313         b->cursor = nir_before_instr(&instr->instr);
 314         reset_gs_header(b, state);
 315
 316         nir_instr_remove(&instr->instr);
 317 }
 318
 319 static void
 320 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
 321                        struct nir_instr *instr,
 322                        struct v3d_nir_lower_io_state *state)
 323 {
 324         if (instr->type != nir_instr_type_intrinsic)
 325                 return;
 326         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 327
 328         switch (intr->intrinsic) {
 329         case nir_intrinsic_load_uniform:
 330                 v3d_nir_lower_uniform(c, b, intr);
 331                 break;
 332
 333         case nir_intrinsic_store_output:
 334                 if (c->s->info.stage == MESA_SHADER_VERTEX ||
 335                     c->s->info.stage == MESA_SHADER_GEOMETRY) {
 336                         v3d_nir_lower_vpm_output(c, b, intr, state);
 337                 }
 338                 break;
 339
 340         case nir_intrinsic_emit_vertex:
 341                 v3d_nir_lower_emit_vertex(c, b, intr, state);
 342                 break;
 343
 344         case nir_intrinsic_end_primitive:
 345                 v3d_nir_lower_end_primitive(c, b, intr, state);
 346                 break;
 347
 348         default:
 349                 break;
 350         }
 351 }
 352
 353 /* Remap the output var's .driver_location.  This is purely for
 354  * nir_print_shader() so that store_output can map back to a variable name.
 355  */
 356 static void
 357 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
 358                                         struct v3d_nir_lower_io_state *state)
 359 {
 360         nir_foreach_variable_safe(var, &c->s->outputs) {
 361                 if (var->data.location == VARYING_SLOT_POS &&
 362                     state->pos_vpm_offset != -1) {
 363                         var->data.driver_location = state->pos_vpm_offset;
 364                         continue;
 365                 }
 366
 367                 if (var->data.location == VARYING_SLOT_PSIZ &&
 368                     state->psiz_vpm_offset != -1) {
 369                         var->data.driver_location = state->psiz_vpm_offset;
 370                         continue;
 371                 }
 372
 373                 int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0);
 374                 if (vpm_offset != -1) {
 375                         var->data.driver_location =
 376                                 state->varyings_vpm_offset + vpm_offset;
 377                 } else {
 378                         /* If we couldn't find a mapping for the var, delete
 379                          * it so that its old .driver_location doesn't confuse
 380                          * nir_print_shader().
 381                          */
 382                         exec_node_remove(&var->node);
 383                 }
 384         }
 385 }
 386
 387 static void
 388 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
 389                             struct v3d_nir_lower_io_state *state)
 390 {
 391         uint32_t vpm_offset = 0;
 392
 393         state->pos_vpm_offset = -1;
 394         state->vp_vpm_offset = -1;
 395         state->zs_vpm_offset = -1;
 396         state->rcp_wc_vpm_offset = -1;
 397         state->psiz_vpm_offset = -1;
 398
 399         bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
 400         if (needs_ff_outputs) {
 401                 if (c->vs_key->is_coord) {
 402                         state->pos_vpm_offset = vpm_offset;
 403                         vpm_offset += 4;
 404                 }
 405
 406                 state->vp_vpm_offset = vpm_offset;
 407                 vpm_offset += 2;
 408
 409                 if (!c->vs_key->is_coord) {
 410                         state->zs_vpm_offset = vpm_offset++;
 411                         state->rcp_wc_vpm_offset = vpm_offset++;
 412                 }
 413
 414                 if (c->vs_key->per_vertex_point_size)
 415                         state->psiz_vpm_offset = vpm_offset++;
 416         }
 417
 418         state->varyings_vpm_offset = vpm_offset;
 419
 420         c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
 421 }
 422
 423 static void
 424 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
 425                             struct v3d_nir_lower_io_state *state)
 426 {
 427         /* 1 header slot for number of output vertices */
 428         uint32_t vpm_offset = 1;
 429
 430         /* 1 header slot per output vertex */
 431         const uint32_t num_vertices = c->s->info.gs.vertices_out;
 432         vpm_offset += num_vertices;
 433
 434         state->gs.output_header_size = vpm_offset;
 435
 436         /* Vertex data: here we only compute offsets into a generic vertex data
 437          * elements. When it is time to actually write a particular vertex to
 438          * the VPM, we will add the offset for that vertex into the VPM output
 439          * to these offsets.
 440          *
 441          * If geometry shaders are present, they are always the last shader
 442          * stage before rasterization, so we always emit fixed function outputs.
 443          */
 444         vpm_offset = 0;
 445         if (c->gs_key->is_coord) {
 446                 state->pos_vpm_offset = vpm_offset;
 447                 vpm_offset += 4;
 448         } else {
 449                 state->pos_vpm_offset = -1;
 450         }
 451
 452         state->vp_vpm_offset = vpm_offset;
 453         vpm_offset += 2;
 454
 455         if (!c->gs_key->is_coord) {
 456                 state->zs_vpm_offset = vpm_offset++;
 457                 state->rcp_wc_vpm_offset = vpm_offset++;
 458         } else {
 459                 state->zs_vpm_offset = -1;
 460                 state->rcp_wc_vpm_offset = -1;
 461         }
 462
 463         /* Mesa enables OES_geometry_shader_point_size automatically with
 464          * OES_geometry_shader so we always need to handle point size
 465          * writes if present.
 466          */
 467         if (c->gs_key->per_vertex_point_size)
 468                 state->psiz_vpm_offset = vpm_offset++;
 469
 470         state->varyings_vpm_offset = vpm_offset;
 471
 472         state->gs.output_vertex_data_size =
 473                 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
 474
 475         c->vpm_output_size =
 476                 state->gs.output_header_size +
 477                 state->gs.output_vertex_data_size * num_vertices;
 478 }
 479
 480 static void
 481 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
 482                             struct v3d_nir_lower_io_state *state)
 483 {
 484         /* If this is a geometry shader we need to emit our fixed function
 485          * outputs to the current vertex offset in the VPM.
 486          */
 487         nir_ssa_def *offset_reg =
 488                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 489                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 490
 491         for (int i = 0; i < 4; i++) {
 492                 if (!state->pos[i])
 493                         state->pos[i] = nir_ssa_undef(b, 1, 32);
 494         }
 495
 496         nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
 497
 498         if (state->pos_vpm_offset != -1) {
 499                 for (int i = 0; i < 4; i++) {
 500                         v3d_nir_store_output(b, state->pos_vpm_offset + i,
 501                                              offset_reg, state->pos[i]);
 502                 }
 503         }
 504
 505         if (state->vp_vpm_offset != -1) {
 506                 for (int i = 0; i < 2; i++) {
 507                         nir_ssa_def *pos;
 508                         nir_ssa_def *scale;
 509                         pos = state->pos[i];
 510                         if (i == 0)
 511                                 scale = nir_load_viewport_x_scale(b);
 512                         else
 513                                 scale = nir_load_viewport_y_scale(b);
 514                         pos = nir_fmul(b, pos, scale);
 515                         pos = nir_fmul(b, pos, rcp_wc);
 516                         pos = nir_f2i32(b, nir_fround_even(b, pos));
 517                         v3d_nir_store_output(b, state->vp_vpm_offset + i,
 518                                              offset_reg, pos);
 519                 }
 520         }
 521
 522         if (state->zs_vpm_offset != -1) {
 523                 nir_ssa_def *z = state->pos[2];
 524                 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
 525                 z = nir_fmul(b, z, rcp_wc);
 526                 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
 527                 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
 528         }
 529
 530         if (state->rcp_wc_vpm_offset != -1) {
 531                 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
 532                                      offset_reg, rcp_wc);
 533         }
 534
 535         /* Store 0 to varyings requested by the FS but not stored by the
 536          * previous stage. This should be undefined behavior, but
 537          * glsl-routing seems to rely on it.
 538          */
 539         uint32_t num_used_outputs;
 540         switch (c->s->info.stage) {
 541         case MESA_SHADER_VERTEX:
 542                 num_used_outputs = c->vs_key->num_used_outputs;
 543                 break;
 544         case MESA_SHADER_GEOMETRY:
 545                 num_used_outputs = c->gs_key->num_used_outputs;
 546                 break;
 547         default:
 548                 unreachable("Unsupported shader stage");
 549         }
 550
 551         for (int i = 0; i < num_used_outputs; i++) {
 552                 if (!BITSET_TEST(state->varyings_stored, i)) {
 553                         v3d_nir_store_output(b, state->varyings_vpm_offset + i,
 554                                              offset_reg, nir_imm_int(b, 0));
 555                 }
 556         }
 557 }
 558
 559 static void
 560 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
 561                nir_function_impl *impl,
 562                struct v3d_nir_lower_io_state *state)
 563 {
 564         nir_block *first = nir_start_block(impl);
 565         b->cursor = nir_before_block(first);
 566
 567         const struct glsl_type *uint_type = glsl_uint_type();
 568
 569         assert(!state->gs.output_offset_var);
 570         state->gs.output_offset_var =
 571                 nir_local_variable_create(impl, uint_type, "output_offset");
 572         nir_store_var(b, state->gs.output_offset_var,
 573                       nir_imm_int(b, state->gs.output_header_size), 0x1);
 574
 575         assert(!state->gs.header_offset_var);
 576         state->gs.header_offset_var =
 577                 nir_local_variable_create(impl, uint_type, "header_offset");
 578         nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
 579
 580         assert(!state->gs.header_var);
 581         state->gs.header_var =
 582                 nir_local_variable_create(impl, uint_type, "header");
 583         reset_gs_header(b, state);
 584 }
 585
 586 static void
 587 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
 588                                  struct v3d_nir_lower_io_state *state)
 589 {
 590         const uint8_t VERTEX_COUNT_OFFSET = 16;
 591
 592         /* Our GS header has 1 generic header slot (at VPM offset 0) and then
 593          * one slot per output vertex after it. This means we don't need to
 594          * have a variable just to keep track of the number of vertices we
 595          * emitted and instead we can just compute it here from the header
 596          * offset variable by removing the one generic header slot that always
 597          * goes at the begining of out header.
 598          */
 599         nir_ssa_def *header_offset =
 600                 nir_load_var(b, state->gs.header_offset_var);
 601         nir_ssa_def *vertex_count =
 602                 nir_isub(b, header_offset, nir_imm_int(b, 1));
 603         nir_ssa_def *header =
 604                 nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
 605                            nir_ishl(b, vertex_count,
 606                                     nir_imm_int(b, VERTEX_COUNT_OFFSET)));
 607
 608         v3d_nir_store_output(b, 0, NULL, header);
 609 }
 610
 611 void
 612 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
 613 {
 614         struct v3d_nir_lower_io_state state = { 0 };
 615
 616         /* Set up the layout of the VPM outputs. */
 617         switch (s->info.stage) {
 618         case MESA_SHADER_VERTEX:
 619                 v3d_nir_setup_vpm_layout_vs(c, &state);
 620                 break;
 621         case MESA_SHADER_GEOMETRY:
 622                 v3d_nir_setup_vpm_layout_gs(c, &state);
 623                 break;
 624         case MESA_SHADER_FRAGMENT:
 625         case MESA_SHADER_COMPUTE:
 626                 break;
 627         default:
 628                 unreachable("Unsupported shader stage");
 629         }
 630
 631         nir_foreach_function(function, s) {
 632                 if (function->impl) {
 633                         nir_builder b;
 634                         nir_builder_init(&b, function->impl);
 635
 636                         if (c->s->info.stage == MESA_SHADER_GEOMETRY)
 637                                 emit_gs_prolog(c, &b, function->impl, &state);
 638
 639                         nir_foreach_block(block, function->impl) {
 640                                 nir_foreach_instr_safe(instr, block)
 641                                         v3d_nir_lower_io_instr(c, &b, instr,
 642                                                                &state);
 643                         }
 644
 645                         nir_block *last = nir_impl_last_block(function->impl);
 646                         b.cursor = nir_after_block(last);
 647                         if (s->info.stage == MESA_SHADER_VERTEX) {
 648                                 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
 649                         } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
 650                                 emit_gs_vpm_output_header_prolog(c, &b, &state);
 651                         }
 652
 653                         nir_metadata_preserve(function->impl,
 654                                               nir_metadata_block_index |
 655                                               nir_metadata_dominance);
 656                 }
 657         }
 658
 659         if (s->info.stage == MESA_SHADER_VERTEX ||
 660             s->info.stage == MESA_SHADER_GEOMETRY) {
 661                 v3d_nir_lower_io_update_output_var_base(c, &state);
 662         }
 663 }