src/broadcom/compiler/v3d_nir_lower_io.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "compiler/v3d_compiler.h"
  25 #include "compiler/nir/nir_builder.h"
  26
  27 /**
  28  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
  29  * intrinsics into something amenable to the V3D architecture.
  30  *
  31  * Most of the work is turning the VS's store_output intrinsics from working
  32  * on a base representing the gallium-level vec4 driver_location to an offset
  33  * within the VPM, and emitting the header that's read by the fixed function
  34  * hardware between the VS and FS.
  35  *
  36  * We also adjust the offsets on uniform loads to be in bytes, since that's
  37  * what we need for indirect addressing with general TMU access.
  38  */
  39
  40 struct v3d_nir_lower_io_state {
  41         int pos_vpm_offset;
  42         int vp_vpm_offset;
  43         int zs_vpm_offset;
  44         int rcp_wc_vpm_offset;
  45         int psiz_vpm_offset;
  46         int varyings_vpm_offset;
  47
  48         /* Geometry shader state */
  49         struct {
  50                 /* VPM offset for the current vertex data output */
  51                 nir_variable *output_offset_var;
  52                 /* VPM offset for the current vertex header */
  53                 nir_variable *header_offset_var;
  54                 /* VPM header for the current vertex */
  55                 nir_variable *header_var;
  56
  57                 /* Size of the complete VPM output header */
  58                 uint32_t output_header_size;
  59                 /* Size of the output data for a single vertex */
  60                 uint32_t output_vertex_data_size;
  61         } gs;
  62
  63         BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
  64
  65         nir_ssa_def *pos[4];
  66 };
  67
  68 static void
  69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
  70                             struct v3d_nir_lower_io_state *state);
  71
  72 static void
  73 v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
  74                      nir_ssa_def *chan)
  75 {
  76         nir_intrinsic_instr *intr =
  77                 nir_intrinsic_instr_create(b->shader,
  78                                            nir_intrinsic_store_output);
  79         nir_ssa_dest_init(&intr->instr, &intr->dest,
  80                           1, intr->dest.ssa.bit_size, NULL);
  81         intr->num_components = 1;
  82
  83         intr->src[0] = nir_src_for_ssa(chan);
  84         if (offset)
  85                 intr->src[1] = nir_src_for_ssa(offset);
  86         else
  87                 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
  88
  89         nir_intrinsic_set_base(intr, base);
  90         nir_intrinsic_set_write_mask(intr, 0x1);
  91         nir_intrinsic_set_component(intr, 0);
  92
  93         nir_builder_instr_insert(b, &intr->instr);
  94 }
  95
  96 /* Convert the uniform offset to bytes.  If it happens to be a constant,
  97  * constant-folding will clean up the shift for us.
  98  */
  99 static void
 100 v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
 101                       nir_intrinsic_instr *intr)
 102 {
 103         b->cursor = nir_before_instr(&intr->instr);
 104
 105         nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
 106
 107         nir_instr_rewrite_src(&intr->instr,
 108                               &intr->src[0],
 109                               nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
 110                                                        nir_imm_int(b, 4))));
 111 }
 112
 113 static int
 114 v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
 115 {
 116         int component = var->data.location_frac + chan;
 117
 118         uint32_t num_used_outputs = 0;
 119         struct v3d_varying_slot *used_outputs = NULL;
 120         switch (c->s->info.stage) {
 121         case MESA_SHADER_VERTEX:
 122                 num_used_outputs = c->vs_key->num_used_outputs;
 123                 used_outputs = c->vs_key->used_outputs;
 124                 break;
 125         case MESA_SHADER_GEOMETRY:
 126                 num_used_outputs = c->gs_key->num_used_outputs;
 127                 used_outputs = c->gs_key->used_outputs;
 128                 break;
 129         default:
 130                 unreachable("Unsupported shader stage");
 131         }
 132
 133         for (int i = 0; i < num_used_outputs; i++) {
 134                 struct v3d_varying_slot slot = used_outputs[i];
 135
 136                 if (v3d_slot_get_slot(slot) == var->data.location &&
 137                     v3d_slot_get_component(slot) == component) {
 138                         return i;
 139                 }
 140         }
 141
 142         return -1;
 143 }
 144
 145 /* Lowers a store_output(gallium driver location) to a series of store_outputs
 146  * with a driver_location equal to the offset in the VPM.
 147  *
 148  * For geometry shaders we need to emit multiple vertices so the VPM offsets
 149  * need to be computed in the shader code based on the current vertex index.
 150  */
 151 static void
 152 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
 153                          nir_intrinsic_instr *intr,
 154                          struct v3d_nir_lower_io_state *state)
 155 {
 156         b->cursor = nir_before_instr(&intr->instr);
 157
 158         /* If this is a geometry shader we need to emit our outputs
 159          * to the current vertex offset in the VPM.
 160          */
 161         nir_ssa_def *offset_reg =
 162                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 163                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 164
 165         int start_comp = nir_intrinsic_component(intr);
 166         nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
 167                                            intr->num_components);
 168
 169         nir_variable *var = NULL;
 170         nir_foreach_variable(scan_var, &c->s->outputs) {
 171                 if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
 172                     start_comp < scan_var->data.location_frac ||
 173                     start_comp >= scan_var->data.location_frac +
 174                     glsl_get_components(scan_var->type)) {
 175                         continue;
 176                 }
 177                 var = scan_var;
 178         }
 179         assert(var);
 180
 181         /* Save off the components of the position for the setup of VPM inputs
 182          * read by fixed function HW.
 183          */
 184         if (var->data.location == VARYING_SLOT_POS) {
 185                 for (int i = 0; i < intr->num_components; i++) {
 186                         state->pos[start_comp + i] = nir_channel(b, src, i);
 187                 }
 188         }
 189
 190         /* Just psiz to the position in the FF header right now. */
 191         if (var->data.location == VARYING_SLOT_PSIZ &&
 192             state->psiz_vpm_offset != -1) {
 193                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
 194         }
 195
 196         if (var->data.location == VARYING_SLOT_LAYER) {
 197                 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
 198                 nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 199                 header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
 200
 201                 /* From the GLES 3.2 spec:
 202                  *
 203                  *    "When fragments are written to a layered framebuffer, the
 204                  *     fragment’s layer number selects an image from the array
 205                  *     of images at each attachment (...). If the fragment’s
 206                  *     layer number is negative, or greater than or equal to
 207                  *     the minimum number of layers of any attachment, the
 208                  *     effects of the fragment on the framebuffer contents are
 209                  *     undefined."
 210                  *
 211                  * This suggests we can just ignore that situation, however,
 212                  * for V3D an out-of-bounds layer index means that the binner
 213                  * might do out-of-bounds writes access to the tile state. The
 214                  * simulator has an assert to catch this, so we play safe here
 215                  * and we make sure that doesn't happen by setting gl_Layer
 216                  * to 0 in that case (we always allocate tile state for at
 217                  * least one layer).
 218                  */
 219                 nir_intrinsic_instr *load =
 220                         nir_intrinsic_instr_create(b->shader,
 221                                                    nir_intrinsic_load_fb_layers_v3d);
 222                 load->num_components = 1;
 223                 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
 224                 nir_builder_instr_insert(b, &load->instr);
 225                 nir_ssa_def *fb_layers = &load->dest.ssa;
 226
 227                 nir_ssa_def *cond = nir_ige(b, src, fb_layers);
 228                 nir_ssa_def *layer_id =
 229                         nir_bcsel(b, cond,
 230                                   nir_imm_int(b, 0),
 231                                   nir_ishl(b, src, nir_imm_int(b, 16)));
 232                 header = nir_ior(b, header, layer_id);
 233                 nir_store_var(b, state->gs.header_var, header, 0x1);
 234         }
 235
 236         /* Scalarize outputs if it hasn't happened already, since we want to
 237          * schedule each VPM write individually.  We can skip any outut
 238          * components not read by the FS.
 239          */
 240         for (int i = 0; i < intr->num_components; i++) {
 241                 int vpm_offset =
 242                         v3d_varying_slot_vpm_offset(c, var,
 243                                                     i +
 244                                                     start_comp -
 245                                                     var->data.location_frac);
 246
 247                 if (vpm_offset == -1)
 248                         continue;
 249
 250                 BITSET_SET(state->varyings_stored, vpm_offset);
 251
 252                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
 253                                      offset_reg, nir_channel(b, src, i));
 254         }
 255
 256         nir_instr_remove(&intr->instr);
 257 }
 258
 259 static inline void
 260 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
 261 {
 262         const uint8_t NEW_PRIMITIVE_OFFSET = 0;
 263         const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
 264
 265         uint32_t vertex_data_size = state->gs.output_vertex_data_size;
 266         assert((vertex_data_size & 0xffffff00) == 0);
 267
 268         uint32_t header;
 269         header  = 1 << NEW_PRIMITIVE_OFFSET;
 270         header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
 271         nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
 272 }
 273
 274 static void
 275 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
 276                           nir_intrinsic_instr *instr,
 277                           struct v3d_nir_lower_io_state *state)
 278 {
 279         b->cursor = nir_before_instr(&instr->instr);
 280
 281         nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 282         nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
 283         nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
 284
 285         /* Emit fixed function outputs */
 286         v3d_nir_emit_ff_vpm_outputs(c, b, state);
 287
 288         /* Emit vertex header */
 289         v3d_nir_store_output(b, 0, header_offset, header);
 290
 291         /* Update VPM offset for next vertex output data and header */
 292         output_offset =
 293                 nir_iadd(b, output_offset,
 294                             nir_imm_int(b, state->gs.output_vertex_data_size));
 295
 296         header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
 297
 298         /* Reset the New Primitive bit */
 299         header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
 300
 301         nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
 302         nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
 303         nir_store_var(b, state->gs.header_var, header, 0x1);
 304
 305         nir_instr_remove(&instr->instr);
 306 }
 307
 308 static void
 309 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
 310                             nir_intrinsic_instr *instr,
 311                             struct v3d_nir_lower_io_state *state)
 312 {
 313         assert(state->gs.header_var);
 314         b->cursor = nir_before_instr(&instr->instr);
 315         reset_gs_header(b, state);
 316
 317         nir_instr_remove(&instr->instr);
 318 }
 319
 320 static void
 321 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
 322                        struct nir_instr *instr,
 323                        struct v3d_nir_lower_io_state *state)
 324 {
 325         if (instr->type != nir_instr_type_intrinsic)
 326                 return;
 327         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 328
 329         switch (intr->intrinsic) {
 330         case nir_intrinsic_load_uniform:
 331                 v3d_nir_lower_uniform(c, b, intr);
 332                 break;
 333
 334         case nir_intrinsic_store_output:
 335                 if (c->s->info.stage == MESA_SHADER_VERTEX ||
 336                     c->s->info.stage == MESA_SHADER_GEOMETRY) {
 337                         v3d_nir_lower_vpm_output(c, b, intr, state);
 338                 }
 339                 break;
 340
 341         case nir_intrinsic_emit_vertex:
 342                 v3d_nir_lower_emit_vertex(c, b, intr, state);
 343                 break;
 344
 345         case nir_intrinsic_end_primitive:
 346                 v3d_nir_lower_end_primitive(c, b, intr, state);
 347                 break;
 348
 349         default:
 350                 break;
 351         }
 352 }
 353
 354 /* Remap the output var's .driver_location.  This is purely for
 355  * nir_print_shader() so that store_output can map back to a variable name.
 356  */
 357 static void
 358 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
 359                                         struct v3d_nir_lower_io_state *state)
 360 {
 361         nir_foreach_variable_safe(var, &c->s->outputs) {
 362                 if (var->data.location == VARYING_SLOT_POS &&
 363                     state->pos_vpm_offset != -1) {
 364                         var->data.driver_location = state->pos_vpm_offset;
 365                         continue;
 366                 }
 367
 368                 if (var->data.location == VARYING_SLOT_PSIZ &&
 369                     state->psiz_vpm_offset != -1) {
 370                         var->data.driver_location = state->psiz_vpm_offset;
 371                         continue;
 372                 }
 373
 374                 int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0);
 375                 if (vpm_offset != -1) {
 376                         var->data.driver_location =
 377                                 state->varyings_vpm_offset + vpm_offset;
 378                 } else {
 379                         /* If we couldn't find a mapping for the var, delete
 380                          * it so that its old .driver_location doesn't confuse
 381                          * nir_print_shader().
 382                          */
 383                         exec_node_remove(&var->node);
 384                 }
 385         }
 386 }
 387
 388 static void
 389 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
 390                             struct v3d_nir_lower_io_state *state)
 391 {
 392         uint32_t vpm_offset = 0;
 393
 394         state->pos_vpm_offset = -1;
 395         state->vp_vpm_offset = -1;
 396         state->zs_vpm_offset = -1;
 397         state->rcp_wc_vpm_offset = -1;
 398         state->psiz_vpm_offset = -1;
 399
 400         bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
 401         if (needs_ff_outputs) {
 402                 if (c->vs_key->is_coord) {
 403                         state->pos_vpm_offset = vpm_offset;
 404                         vpm_offset += 4;
 405                 }
 406
 407                 state->vp_vpm_offset = vpm_offset;
 408                 vpm_offset += 2;
 409
 410                 if (!c->vs_key->is_coord) {
 411                         state->zs_vpm_offset = vpm_offset++;
 412                         state->rcp_wc_vpm_offset = vpm_offset++;
 413                 }
 414
 415                 if (c->vs_key->per_vertex_point_size)
 416                         state->psiz_vpm_offset = vpm_offset++;
 417         }
 418
 419         state->varyings_vpm_offset = vpm_offset;
 420
 421         c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
 422 }
 423
 424 static void
 425 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
 426                             struct v3d_nir_lower_io_state *state)
 427 {
 428         /* 1 header slot for number of output vertices */
 429         uint32_t vpm_offset = 1;
 430
 431         /* 1 header slot per output vertex */
 432         const uint32_t num_vertices = c->s->info.gs.vertices_out;
 433         vpm_offset += num_vertices;
 434
 435         state->gs.output_header_size = vpm_offset;
 436
 437         /* Vertex data: here we only compute offsets into a generic vertex data
 438          * elements. When it is time to actually write a particular vertex to
 439          * the VPM, we will add the offset for that vertex into the VPM output
 440          * to these offsets.
 441          *
 442          * If geometry shaders are present, they are always the last shader
 443          * stage before rasterization, so we always emit fixed function outputs.
 444          */
 445         vpm_offset = 0;
 446         if (c->gs_key->is_coord) {
 447                 state->pos_vpm_offset = vpm_offset;
 448                 vpm_offset += 4;
 449         } else {
 450                 state->pos_vpm_offset = -1;
 451         }
 452
 453         state->vp_vpm_offset = vpm_offset;
 454         vpm_offset += 2;
 455
 456         if (!c->gs_key->is_coord) {
 457                 state->zs_vpm_offset = vpm_offset++;
 458                 state->rcp_wc_vpm_offset = vpm_offset++;
 459         } else {
 460                 state->zs_vpm_offset = -1;
 461                 state->rcp_wc_vpm_offset = -1;
 462         }
 463
 464         /* Mesa enables OES_geometry_shader_point_size automatically with
 465          * OES_geometry_shader so we always need to handle point size
 466          * writes if present.
 467          */
 468         if (c->gs_key->per_vertex_point_size)
 469                 state->psiz_vpm_offset = vpm_offset++;
 470
 471         state->varyings_vpm_offset = vpm_offset;
 472
 473         state->gs.output_vertex_data_size =
 474                 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
 475
 476         c->vpm_output_size =
 477                 state->gs.output_header_size +
 478                 state->gs.output_vertex_data_size * num_vertices;
 479 }
 480
 481 static void
 482 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
 483                             struct v3d_nir_lower_io_state *state)
 484 {
 485         /* If this is a geometry shader we need to emit our fixed function
 486          * outputs to the current vertex offset in the VPM.
 487          */
 488         nir_ssa_def *offset_reg =
 489                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 490                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 491
 492         for (int i = 0; i < 4; i++) {
 493                 if (!state->pos[i])
 494                         state->pos[i] = nir_ssa_undef(b, 1, 32);
 495         }
 496
 497         nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
 498
 499         if (state->pos_vpm_offset != -1) {
 500                 for (int i = 0; i < 4; i++) {
 501                         v3d_nir_store_output(b, state->pos_vpm_offset + i,
 502                                              offset_reg, state->pos[i]);
 503                 }
 504         }
 505
 506         if (state->vp_vpm_offset != -1) {
 507                 for (int i = 0; i < 2; i++) {
 508                         nir_ssa_def *pos;
 509                         nir_ssa_def *scale;
 510                         pos = state->pos[i];
 511                         if (i == 0)
 512                                 scale = nir_load_viewport_x_scale(b);
 513                         else
 514                                 scale = nir_load_viewport_y_scale(b);
 515                         pos = nir_fmul(b, pos, scale);
 516                         pos = nir_fmul(b, pos, rcp_wc);
 517                         pos = nir_f2i32(b, nir_fround_even(b, pos));
 518                         v3d_nir_store_output(b, state->vp_vpm_offset + i,
 519                                              offset_reg, pos);
 520                 }
 521         }
 522
 523         if (state->zs_vpm_offset != -1) {
 524                 nir_ssa_def *z = state->pos[2];
 525                 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
 526                 z = nir_fmul(b, z, rcp_wc);
 527                 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
 528                 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
 529         }
 530
 531         if (state->rcp_wc_vpm_offset != -1) {
 532                 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
 533                                      offset_reg, rcp_wc);
 534         }
 535
 536         /* Store 0 to varyings requested by the FS but not stored by the
 537          * previous stage. This should be undefined behavior, but
 538          * glsl-routing seems to rely on it.
 539          */
 540         uint32_t num_used_outputs;
 541         switch (c->s->info.stage) {
 542         case MESA_SHADER_VERTEX:
 543                 num_used_outputs = c->vs_key->num_used_outputs;
 544                 break;
 545         case MESA_SHADER_GEOMETRY:
 546                 num_used_outputs = c->gs_key->num_used_outputs;
 547                 break;
 548         default:
 549                 unreachable("Unsupported shader stage");
 550         }
 551
 552         for (int i = 0; i < num_used_outputs; i++) {
 553                 if (!BITSET_TEST(state->varyings_stored, i)) {
 554                         v3d_nir_store_output(b, state->varyings_vpm_offset + i,
 555                                              offset_reg, nir_imm_int(b, 0));
 556                 }
 557         }
 558 }
 559
 560 static void
 561 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
 562                nir_function_impl *impl,
 563                struct v3d_nir_lower_io_state *state)
 564 {
 565         nir_block *first = nir_start_block(impl);
 566         b->cursor = nir_before_block(first);
 567
 568         const struct glsl_type *uint_type = glsl_uint_type();
 569
 570         assert(!state->gs.output_offset_var);
 571         state->gs.output_offset_var =
 572                 nir_local_variable_create(impl, uint_type, "output_offset");
 573         nir_store_var(b, state->gs.output_offset_var,
 574                       nir_imm_int(b, state->gs.output_header_size), 0x1);
 575
 576         assert(!state->gs.header_offset_var);
 577         state->gs.header_offset_var =
 578                 nir_local_variable_create(impl, uint_type, "header_offset");
 579         nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
 580
 581         assert(!state->gs.header_var);
 582         state->gs.header_var =
 583                 nir_local_variable_create(impl, uint_type, "header");
 584         reset_gs_header(b, state);
 585 }
 586
 587 static void
 588 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
 589                                  struct v3d_nir_lower_io_state *state)
 590 {
 591         const uint8_t VERTEX_COUNT_OFFSET = 16;
 592
 593         /* Our GS header has 1 generic header slot (at VPM offset 0) and then
 594          * one slot per output vertex after it. This means we don't need to
 595          * have a variable just to keep track of the number of vertices we
 596          * emitted and instead we can just compute it here from the header
 597          * offset variable by removing the one generic header slot that always
 598          * goes at the begining of out header.
 599          */
 600         nir_ssa_def *header_offset =
 601                 nir_load_var(b, state->gs.header_offset_var);
 602         nir_ssa_def *vertex_count =
 603                 nir_isub(b, header_offset, nir_imm_int(b, 1));
 604         nir_ssa_def *header =
 605                 nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
 606                            nir_ishl(b, vertex_count,
 607                                     nir_imm_int(b, VERTEX_COUNT_OFFSET)));
 608
 609         v3d_nir_store_output(b, 0, NULL, header);
 610 }
 611
 612 void
 613 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
 614 {
 615         struct v3d_nir_lower_io_state state = { 0 };
 616
 617         /* Set up the layout of the VPM outputs. */
 618         switch (s->info.stage) {
 619         case MESA_SHADER_VERTEX:
 620                 v3d_nir_setup_vpm_layout_vs(c, &state);
 621                 break;
 622         case MESA_SHADER_GEOMETRY:
 623                 v3d_nir_setup_vpm_layout_gs(c, &state);
 624                 break;
 625         case MESA_SHADER_FRAGMENT:
 626         case MESA_SHADER_COMPUTE:
 627                 break;
 628         default:
 629                 unreachable("Unsupported shader stage");
 630         }
 631
 632         nir_foreach_function(function, s) {
 633                 if (function->impl) {
 634                         nir_builder b;
 635                         nir_builder_init(&b, function->impl);
 636
 637                         if (c->s->info.stage == MESA_SHADER_GEOMETRY)
 638                                 emit_gs_prolog(c, &b, function->impl, &state);
 639
 640                         nir_foreach_block(block, function->impl) {
 641                                 nir_foreach_instr_safe(instr, block)
 642                                         v3d_nir_lower_io_instr(c, &b, instr,
 643                                                                &state);
 644                         }
 645
 646                         nir_block *last = nir_impl_last_block(function->impl);
 647                         b.cursor = nir_after_block(last);
 648                         if (s->info.stage == MESA_SHADER_VERTEX) {
 649                                 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
 650                         } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
 651                                 emit_gs_vpm_output_header_prolog(c, &b, &state);
 652                         }
 653
 654                         nir_metadata_preserve(function->impl,
 655                                               nir_metadata_block_index |
 656                                               nir_metadata_dominance);
 657                 }
 658         }
 659
 660         if (s->info.stage == MESA_SHADER_VERTEX ||
 661             s->info.stage == MESA_SHADER_GEOMETRY) {
 662                 v3d_nir_lower_io_update_output_var_base(c, &state);
 663         }
 664 }