src/broadcom/compiler/v3d_nir_lower_io.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "compiler/v3d_compiler.h"
  25 #include "compiler/nir/nir_builder.h"
  26
  27 /**
  28  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
  29  * intrinsics into something amenable to the V3D architecture.
  30  *
  31  * Most of the work is turning the VS's store_output intrinsics from working
  32  * on a base representing the gallium-level vec4 driver_location to an offset
  33  * within the VPM, and emitting the header that's read by the fixed function
  34  * hardware between the VS and FS.
  35  *
  36  * We also adjust the offsets on uniform loads to be in bytes, since that's
  37  * what we need for indirect addressing with general TMU access.
  38  */
  39
  40 struct v3d_nir_lower_io_state {
  41         int pos_vpm_offset;
  42         int vp_vpm_offset;
  43         int zs_vpm_offset;
  44         int rcp_wc_vpm_offset;
  45         int psiz_vpm_offset;
  46         int varyings_vpm_offset;
  47
  48         /* Geometry shader state */
  49         struct {
  50                 /* VPM offset for the current vertex data output */
  51                 nir_variable *output_offset_var;
  52                 /* VPM offset for the current vertex header */
  53                 nir_variable *header_offset_var;
  54                 /* VPM header for the current vertex */
  55                 nir_variable *header_var;
  56
  57                 /* Size of the complete VPM output header */
  58                 uint32_t output_header_size;
  59                 /* Size of the output data for a single vertex */
  60                 uint32_t output_vertex_data_size;
  61         } gs;
  62
  63         BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
  64
  65         nir_ssa_def *pos[4];
  66 };
  67
  68 static void
  69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
  70                             struct v3d_nir_lower_io_state *state);
  71
  72 static void
  73 v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
  74                      nir_ssa_def *chan)
  75 {
  76         nir_intrinsic_instr *intr =
  77                 nir_intrinsic_instr_create(b->shader,
  78                                            nir_intrinsic_store_output);
  79         nir_ssa_dest_init(&intr->instr, &intr->dest,
  80                           1, intr->dest.ssa.bit_size, NULL);
  81         intr->num_components = 1;
  82
  83         intr->src[0] = nir_src_for_ssa(chan);
  84         if (offset)
  85                 intr->src[1] = nir_src_for_ssa(offset);
  86         else
  87                 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
  88
  89         nir_intrinsic_set_base(intr, base);
  90         nir_intrinsic_set_write_mask(intr, 0x1);
  91         nir_intrinsic_set_component(intr, 0);
  92
  93         nir_builder_instr_insert(b, &intr->instr);
  94 }
  95
  96 /* Convert the uniform offset to bytes.  If it happens to be a constant,
  97  * constant-folding will clean up the shift for us.
  98  */
  99 static void
 100 v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
 101                       nir_intrinsic_instr *intr)
 102 {
 103         b->cursor = nir_before_instr(&intr->instr);
 104
 105         nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
 106
 107         nir_instr_rewrite_src(&intr->instr,
 108                               &intr->src[0],
 109                               nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
 110                                                        nir_imm_int(b, 4))));
 111 }
 112
 113 static int
 114 v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
 115 {
 116         int component = var->data.location_frac + chan;
 117
 118         uint32_t num_used_outputs = 0;
 119         struct v3d_varying_slot *used_outputs = NULL;
 120         switch (c->s->info.stage) {
 121         case MESA_SHADER_VERTEX:
 122                 num_used_outputs = c->vs_key->num_used_outputs;
 123                 used_outputs = c->vs_key->used_outputs;
 124                 break;
 125         case MESA_SHADER_GEOMETRY:
 126                 num_used_outputs = c->gs_key->num_used_outputs;
 127                 used_outputs = c->gs_key->used_outputs;
 128                 break;
 129         default:
 130                 unreachable("Unsupported shader stage");
 131         }
 132
 133         for (int i = 0; i < num_used_outputs; i++) {
 134                 struct v3d_varying_slot slot = used_outputs[i];
 135
 136                 if (v3d_slot_get_slot(slot) == var->data.location &&
 137                     v3d_slot_get_component(slot) == component) {
 138                         return i;
 139                 }
 140         }
 141
 142         return -1;
 143 }
 144
 145 /* Lowers a store_output(gallium driver location) to a series of store_outputs
 146  * with a driver_location equal to the offset in the VPM.
 147  *
 148  * For geometry shaders we need to emit multiple vertices so the VPM offsets
 149  * need to be computed in the shader code based on the current vertex index.
 150  */
 151 static void
 152 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
 153                          nir_intrinsic_instr *intr,
 154                          struct v3d_nir_lower_io_state *state)
 155 {
 156         b->cursor = nir_before_instr(&intr->instr);
 157
 158         /* If this is a geometry shader we need to emit our outputs
 159          * to the current vertex offset in the VPM.
 160          */
 161         nir_ssa_def *offset_reg =
 162                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 163                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 164
 165         int start_comp = nir_intrinsic_component(intr);
 166         nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
 167                                            intr->num_components);
 168
 169         nir_variable *var = NULL;
 170         nir_foreach_variable(scan_var, &c->s->outputs) {
 171                 if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
 172                     start_comp < scan_var->data.location_frac ||
 173                     start_comp >= scan_var->data.location_frac +
 174                     glsl_get_components(scan_var->type)) {
 175                         continue;
 176                 }
 177                 var = scan_var;
 178         }
 179         assert(var);
 180
 181         /* Save off the components of the position for the setup of VPM inputs
 182          * read by fixed function HW.
 183          */
 184         if (var->data.location == VARYING_SLOT_POS) {
 185                 for (int i = 0; i < intr->num_components; i++) {
 186                         state->pos[start_comp + i] = nir_channel(b, src, i);
 187                 }
 188         }
 189
 190         /* Just psiz to the position in the FF header right now. */
 191         if (var->data.location == VARYING_SLOT_PSIZ &&
 192             state->psiz_vpm_offset != -1) {
 193                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
 194         }
 195
 196         /* Scalarize outputs if it hasn't happened already, since we want to
 197          * schedule each VPM write individually.  We can skip any outut
 198          * components not read by the FS.
 199          */
 200         for (int i = 0; i < intr->num_components; i++) {
 201                 int vpm_offset =
 202                         v3d_varying_slot_vpm_offset(c, var,
 203                                                     i +
 204                                                     start_comp -
 205                                                     var->data.location_frac);
 206
 207                 if (vpm_offset == -1)
 208                         continue;
 209
 210                 BITSET_SET(state->varyings_stored, vpm_offset);
 211
 212                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
 213                                      offset_reg, nir_channel(b, src, i));
 214         }
 215
 216         nir_instr_remove(&intr->instr);
 217 }
 218
 219 static inline void
 220 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
 221 {
 222         const uint8_t NEW_PRIMITIVE_OFFSET = 0;
 223         const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
 224
 225         uint32_t vertex_data_size = state->gs.output_vertex_data_size;
 226         assert((vertex_data_size & 0xffffff00) == 0);
 227
 228         uint32_t header;
 229         header  = 1 << NEW_PRIMITIVE_OFFSET;
 230         header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
 231         nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
 232 }
 233
 234 static void
 235 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
 236                           nir_intrinsic_instr *instr,
 237                           struct v3d_nir_lower_io_state *state)
 238 {
 239         b->cursor = nir_before_instr(&instr->instr);
 240
 241         nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
 242         nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
 243         nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
 244
 245         /* Emit fixed function outputs */
 246         v3d_nir_emit_ff_vpm_outputs(c, b, state);
 247
 248         /* Emit vertex header */
 249         v3d_nir_store_output(b, 0, header_offset, header);
 250
 251         /* Update VPM offset for next vertex output data and header */
 252         output_offset =
 253                 nir_iadd(b, output_offset,
 254                             nir_imm_int(b, state->gs.output_vertex_data_size));
 255
 256         header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
 257
 258         /* Reset the New Primitive bit */
 259         header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
 260
 261         nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
 262         nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
 263         nir_store_var(b, state->gs.header_var, header, 0x1);
 264
 265         nir_instr_remove(&instr->instr);
 266 }
 267
 268 static void
 269 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
 270                             nir_intrinsic_instr *instr,
 271                             struct v3d_nir_lower_io_state *state)
 272 {
 273         assert(state->gs.header_var);
 274         b->cursor = nir_before_instr(&instr->instr);
 275         reset_gs_header(b, state);
 276
 277         nir_instr_remove(&instr->instr);
 278 }
 279
 280 static void
 281 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
 282                        struct nir_instr *instr,
 283                        struct v3d_nir_lower_io_state *state)
 284 {
 285         if (instr->type != nir_instr_type_intrinsic)
 286                 return;
 287         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 288
 289         switch (intr->intrinsic) {
 290         case nir_intrinsic_load_uniform:
 291                 v3d_nir_lower_uniform(c, b, intr);
 292                 break;
 293
 294         case nir_intrinsic_store_output:
 295                 if (c->s->info.stage == MESA_SHADER_VERTEX ||
 296                     c->s->info.stage == MESA_SHADER_GEOMETRY) {
 297                         v3d_nir_lower_vpm_output(c, b, intr, state);
 298                 }
 299                 break;
 300
 301         case nir_intrinsic_emit_vertex:
 302                 v3d_nir_lower_emit_vertex(c, b, intr, state);
 303                 break;
 304
 305         case nir_intrinsic_end_primitive:
 306                 v3d_nir_lower_end_primitive(c, b, intr, state);
 307                 break;
 308
 309         default:
 310                 break;
 311         }
 312 }
 313
 314 /* Remap the output var's .driver_location.  This is purely for
 315  * nir_print_shader() so that store_output can map back to a variable name.
 316  */
 317 static void
 318 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
 319                                         struct v3d_nir_lower_io_state *state)
 320 {
 321         nir_foreach_variable_safe(var, &c->s->outputs) {
 322                 if (var->data.location == VARYING_SLOT_POS &&
 323                     state->pos_vpm_offset != -1) {
 324                         var->data.driver_location = state->pos_vpm_offset;
 325                         continue;
 326                 }
 327
 328                 if (var->data.location == VARYING_SLOT_PSIZ &&
 329                     state->psiz_vpm_offset != -1) {
 330                         var->data.driver_location = state->psiz_vpm_offset;
 331                         continue;
 332                 }
 333
 334                 int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0);
 335                 if (vpm_offset != -1) {
 336                         var->data.driver_location =
 337                                 state->varyings_vpm_offset + vpm_offset;
 338                 } else {
 339                         /* If we couldn't find a mapping for the var, delete
 340                          * it so that its old .driver_location doesn't confuse
 341                          * nir_print_shader().
 342                          */
 343                         exec_node_remove(&var->node);
 344                 }
 345         }
 346 }
 347
 348 static void
 349 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
 350                             struct v3d_nir_lower_io_state *state)
 351 {
 352         uint32_t vpm_offset = 0;
 353
 354         state->pos_vpm_offset = -1;
 355         state->vp_vpm_offset = -1;
 356         state->zs_vpm_offset = -1;
 357         state->rcp_wc_vpm_offset = -1;
 358         state->psiz_vpm_offset = -1;
 359
 360         bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
 361         if (needs_ff_outputs) {
 362                 if (c->vs_key->is_coord) {
 363                         state->pos_vpm_offset = vpm_offset;
 364                         vpm_offset += 4;
 365                 }
 366
 367                 state->vp_vpm_offset = vpm_offset;
 368                 vpm_offset += 2;
 369
 370                 if (!c->vs_key->is_coord) {
 371                         state->zs_vpm_offset = vpm_offset++;
 372                         state->rcp_wc_vpm_offset = vpm_offset++;
 373                 }
 374
 375                 if (c->vs_key->per_vertex_point_size)
 376                         state->psiz_vpm_offset = vpm_offset++;
 377         }
 378
 379         state->varyings_vpm_offset = vpm_offset;
 380
 381         c->vpm_output_size = vpm_offset + c->vs_key->num_used_outputs;
 382 }
 383
 384 static void
 385 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
 386                             struct v3d_nir_lower_io_state *state)
 387 {
 388         /* 1 header slot for number of output vertices */
 389         uint32_t vpm_offset = 1;
 390
 391         /* 1 header slot per output vertex */
 392         const uint32_t num_vertices = c->s->info.gs.vertices_out;
 393         vpm_offset += num_vertices;
 394
 395         state->gs.output_header_size = vpm_offset;
 396
 397         /* Vertex data: here we only compute offsets into a generic vertex data
 398          * elements. When it is time to actually write a particular vertex to
 399          * the VPM, we will add the offset for that vertex into the VPM output
 400          * to these offsets.
 401          *
 402          * If geometry shaders are present, they are always the last shader
 403          * stage before rasterization, so we always emit fixed function outputs.
 404          */
 405         vpm_offset = 0;
 406         if (c->gs_key->is_coord) {
 407                 state->pos_vpm_offset = vpm_offset;
 408                 vpm_offset += 4;
 409         } else {
 410                 state->pos_vpm_offset = -1;
 411         }
 412
 413         state->vp_vpm_offset = vpm_offset;
 414         vpm_offset += 2;
 415
 416         if (!c->gs_key->is_coord) {
 417                 state->zs_vpm_offset = vpm_offset++;
 418                 state->rcp_wc_vpm_offset = vpm_offset++;
 419         } else {
 420                 state->zs_vpm_offset = -1;
 421                 state->rcp_wc_vpm_offset = -1;
 422         }
 423
 424         /* Mesa enables OES_geometry_shader_point_size automatically with
 425          * OES_geometry_shader so we always need to handle point size
 426          * writes if present.
 427          */
 428         if (c->gs_key->per_vertex_point_size)
 429                 state->psiz_vpm_offset = vpm_offset++;
 430
 431         state->varyings_vpm_offset = vpm_offset;
 432
 433         state->gs.output_vertex_data_size =
 434                 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
 435
 436         c->vpm_output_size =
 437                 state->gs.output_header_size +
 438                 state->gs.output_vertex_data_size * num_vertices;
 439 }
 440
 441 static void
 442 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
 443                             struct v3d_nir_lower_io_state *state)
 444 {
 445         /* If this is a geometry shader we need to emit our fixed function
 446          * outputs to the current vertex offset in the VPM.
 447          */
 448         nir_ssa_def *offset_reg =
 449                 c->s->info.stage == MESA_SHADER_GEOMETRY ?
 450                         nir_load_var(b, state->gs.output_offset_var) : NULL;
 451
 452         for (int i = 0; i < 4; i++) {
 453                 if (!state->pos[i])
 454                         state->pos[i] = nir_ssa_undef(b, 1, 32);
 455         }
 456
 457         nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
 458
 459         if (state->pos_vpm_offset != -1) {
 460                 for (int i = 0; i < 4; i++) {
 461                         v3d_nir_store_output(b, state->pos_vpm_offset + i,
 462                                              offset_reg, state->pos[i]);
 463                 }
 464         }
 465
 466         if (state->vp_vpm_offset != -1) {
 467                 for (int i = 0; i < 2; i++) {
 468                         nir_ssa_def *pos;
 469                         nir_ssa_def *scale;
 470                         pos = state->pos[i];
 471                         if (i == 0)
 472                                 scale = nir_load_viewport_x_scale(b);
 473                         else
 474                                 scale = nir_load_viewport_y_scale(b);
 475                         pos = nir_fmul(b, pos, scale);
 476                         pos = nir_fmul(b, pos, rcp_wc);
 477                         pos = nir_f2i32(b, nir_fround_even(b, pos));
 478                         v3d_nir_store_output(b, state->vp_vpm_offset + i,
 479                                              offset_reg, pos);
 480                 }
 481         }
 482
 483         if (state->zs_vpm_offset != -1) {
 484                 nir_ssa_def *z = state->pos[2];
 485                 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
 486                 z = nir_fmul(b, z, rcp_wc);
 487                 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
 488                 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
 489         }
 490
 491         if (state->rcp_wc_vpm_offset != -1) {
 492                 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
 493                                      offset_reg, rcp_wc);
 494         }
 495
 496         /* Store 0 to varyings requested by the FS but not stored by the
 497          * previous stage. This should be undefined behavior, but
 498          * glsl-routing seems to rely on it.
 499          */
 500         uint32_t num_used_outputs;
 501         switch (c->s->info.stage) {
 502         case MESA_SHADER_VERTEX:
 503                 num_used_outputs = c->vs_key->num_used_outputs;
 504                 break;
 505         case MESA_SHADER_GEOMETRY:
 506                 num_used_outputs = c->gs_key->num_used_outputs;
 507                 break;
 508         default:
 509                 unreachable("Unsupported shader stage");
 510         }
 511
 512         for (int i = 0; i < num_used_outputs; i++) {
 513                 if (!BITSET_TEST(state->varyings_stored, i)) {
 514                         v3d_nir_store_output(b, state->varyings_vpm_offset + i,
 515                                              offset_reg, nir_imm_int(b, 0));
 516                 }
 517         }
 518 }
 519
 520 static void
 521 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
 522                nir_function_impl *impl,
 523                struct v3d_nir_lower_io_state *state)
 524 {
 525         nir_block *first = nir_start_block(impl);
 526         b->cursor = nir_before_block(first);
 527
 528         const struct glsl_type *uint_type = glsl_uint_type();
 529
 530         assert(!state->gs.output_offset_var);
 531         state->gs.output_offset_var =
 532                 nir_local_variable_create(impl, uint_type, "output_offset");
 533         nir_store_var(b, state->gs.output_offset_var,
 534                       nir_imm_int(b, state->gs.output_header_size), 0x1);
 535
 536         assert(!state->gs.header_offset_var);
 537         state->gs.header_offset_var =
 538                 nir_local_variable_create(impl, uint_type, "header_offset");
 539         nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
 540
 541         assert(!state->gs.header_var);
 542         state->gs.header_var =
 543                 nir_local_variable_create(impl, uint_type, "header");
 544         reset_gs_header(b, state);
 545 }
 546
 547 static void
 548 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
 549                                  struct v3d_nir_lower_io_state *state)
 550 {
 551         const uint8_t VERTEX_COUNT_OFFSET = 16;
 552
 553         /* Our GS header has 1 generic header slot (at VPM offset 0) and then
 554          * one slot per output vertex after it. This means we don't need to
 555          * have a variable just to keep track of the number of vertices we
 556          * emitted and instead we can just compute it here from the header
 557          * offset variable by removing the one generic header slot that always
 558          * goes at the begining of out header.
 559          */
 560         nir_ssa_def *header_offset =
 561                 nir_load_var(b, state->gs.header_offset_var);
 562         nir_ssa_def *vertex_count =
 563                 nir_isub(b, header_offset, nir_imm_int(b, 1));
 564         nir_ssa_def *header =
 565                 nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
 566                            nir_ishl(b, vertex_count,
 567                                     nir_imm_int(b, VERTEX_COUNT_OFFSET)));
 568
 569         v3d_nir_store_output(b, 0, NULL, header);
 570 }
 571
 572 void
 573 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
 574 {
 575         struct v3d_nir_lower_io_state state = { 0 };
 576
 577         /* Set up the layout of the VPM outputs. */
 578         switch (s->info.stage) {
 579         case MESA_SHADER_VERTEX:
 580                 v3d_nir_setup_vpm_layout_vs(c, &state);
 581                 break;
 582         case MESA_SHADER_GEOMETRY:
 583                 v3d_nir_setup_vpm_layout_gs(c, &state);
 584                 break;
 585         case MESA_SHADER_FRAGMENT:
 586         case MESA_SHADER_COMPUTE:
 587                 break;
 588         default:
 589                 unreachable("Unsupported shader stage");
 590         }
 591
 592         nir_foreach_function(function, s) {
 593                 if (function->impl) {
 594                         nir_builder b;
 595                         nir_builder_init(&b, function->impl);
 596
 597                         if (c->s->info.stage == MESA_SHADER_GEOMETRY)
 598                                 emit_gs_prolog(c, &b, function->impl, &state);
 599
 600                         nir_foreach_block(block, function->impl) {
 601                                 nir_foreach_instr_safe(instr, block)
 602                                         v3d_nir_lower_io_instr(c, &b, instr,
 603                                                                &state);
 604                         }
 605
 606                         nir_block *last = nir_impl_last_block(function->impl);
 607                         b.cursor = nir_after_block(last);
 608                         if (s->info.stage == MESA_SHADER_VERTEX) {
 609                                 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
 610                         } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
 611                                 emit_gs_vpm_output_header_prolog(c, &b, &state);
 612                         }
 613
 614                         nir_metadata_preserve(function->impl,
 615                                               nir_metadata_block_index |
 616                                               nir_metadata_dominance);
 617                 }
 618         }
 619
 620         if (s->info.stage == MESA_SHADER_VERTEX ||
 621             s->info.stage == MESA_SHADER_GEOMETRY) {
 622                 v3d_nir_lower_io_update_output_var_base(c, &state);
 623         }
 624 }