src/gallium/drivers/freedreno/a2xx/ir2_nir.c

   1 /*
   2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jonathan Marek <jonathan@marek.ca>
  25  */
  26
  27 #include "ir2_private.h"
  28
  29 #include "freedreno_util.h"
  30 #include "fd2_program.h"
  31
  32 static const nir_shader_compiler_options options = {
  33         .lower_fpow = true,
  34         .lower_flrp32 = true,
  35         .lower_fmod = true,
  36         .lower_fdiv = true,
  37         .lower_fceil = true,
  38         .fuse_ffma = true,
  39         /* .fdot_replicates = true, it is replicated, but it makes things worse */
  40         .lower_all_io_to_temps = true,
  41         .vertex_id_zero_based = true, /* its not implemented anyway */
  42         .lower_bitops = true,
  43         .lower_rotate = true,
  44         .lower_vector_cmp = true,
  45 };
  46
  47 const nir_shader_compiler_options *
  48 ir2_get_compiler_options(void)
  49 {
  50         return &options;
  51 }
  52
  53 #define OPT(nir, pass, ...) ({                             \
  54    bool this_progress = false;                             \
  55    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
  56    this_progress;                                          \
  57 })
  58 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
  59
  60 static void
  61 ir2_optimize_loop(nir_shader *s)
  62 {
  63         bool progress;
  64         do {
  65                 progress = false;
  66
  67                 OPT_V(s, nir_lower_vars_to_ssa);
  68                 progress |= OPT(s, nir_opt_copy_prop_vars);
  69                 progress |= OPT(s, nir_copy_prop);
  70                 progress |= OPT(s, nir_opt_dce);
  71                 progress |= OPT(s, nir_opt_cse);
  72                 /* progress |= OPT(s, nir_opt_gcm, true); */
  73                 progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
  74                 progress |= OPT(s, nir_opt_intrinsics);
  75                 progress |= OPT(s, nir_opt_algebraic);
  76                 progress |= OPT(s, nir_opt_constant_folding);
  77                 progress |= OPT(s, nir_opt_dead_cf);
  78                 if (OPT(s, nir_opt_trivial_continues)) {
  79                         progress |= true;
  80                         /* If nir_opt_trivial_continues makes progress, then we need to clean
  81                          * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
  82                          * to make progress.
  83                          */
  84                         OPT(s, nir_copy_prop);
  85                         OPT(s, nir_opt_dce);
  86                 }
  87                 progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
  88                 progress |= OPT(s, nir_opt_if, false);
  89                 progress |= OPT(s, nir_opt_remove_phis);
  90                 progress |= OPT(s, nir_opt_undef);
  91
  92         }
  93         while (progress);
  94 }
  95
  96 /* trig workarounds is the same as ir3.. but we don't want to include ir3 */
  97 bool ir3_nir_apply_trig_workarounds(nir_shader * shader);
  98
  99 int
 100 ir2_optimize_nir(nir_shader *s, bool lower)
 101 {
 102         struct nir_lower_tex_options tex_options = {
 103                 .lower_txp = ~0u,
 104                 .lower_rect = 0,
 105         };
 106
 107         if (fd_mesa_debug & FD_DBG_DISASM) {
 108                 debug_printf("----------------------\n");
 109                 nir_print_shader(s, stdout);
 110                 debug_printf("----------------------\n");
 111         }
 112
 113         OPT_V(s, nir_lower_regs_to_ssa);
 114         OPT_V(s, nir_lower_vars_to_ssa);
 115         OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
 116
 117         if (lower) {
 118                 OPT_V(s, ir3_nir_apply_trig_workarounds);
 119                 OPT_V(s, nir_lower_tex, &tex_options);
 120         }
 121
 122         ir2_optimize_loop(s);
 123
 124         OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
 125         OPT_V(s, nir_opt_sink, nir_move_const_undef);
 126
 127         /* TODO we dont want to get shaders writing to depth for depth textures */
 128         if (s->info.stage == MESA_SHADER_FRAGMENT) {
 129                 nir_foreach_variable(var, &s->outputs) {
 130                         if (var->data.location == FRAG_RESULT_DEPTH)
 131                                 return -1;
 132                 }
 133         }
 134
 135         return 0;
 136 }
 137
 138 static struct ir2_src
 139 load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
 140 {
 141         struct fd2_shader_stateobj *so = ctx->so;
 142         unsigned imm_ncomp, swiz, idx, i, j;
 143         uint32_t *value = (uint32_t*) value_f;
 144
 145         /* try to merge with existing immediate (TODO: try with neg) */
 146         for (idx = 0; idx < so->num_immediates; idx++) {
 147                 swiz = 0;
 148                 imm_ncomp = so->immediates[idx].ncomp;
 149                 for (i = 0; i < ncomp; i++) {
 150                         for (j = 0; j < imm_ncomp; j++) {
 151                                 if (value[i] == so->immediates[idx].val[j])
 152                                         break;
 153                         }
 154                         if (j == imm_ncomp) {
 155                                 if (j == 4)
 156                                         break;
 157                                 so->immediates[idx].val[imm_ncomp++] = value[i];
 158                         }
 159                         swiz |= swiz_set(j, i);
 160                 }
 161                 /* matched all components */
 162                 if (i == ncomp)
 163                         break;
 164         }
 165
 166         /* need to allocate new immediate */
 167         if (idx == so->num_immediates) {
 168                 swiz = 0;
 169                 imm_ncomp = 0;
 170                 for (i = 0; i < ncomp; i++) {
 171                         for (j = 0; j < imm_ncomp; j++) {
 172                                 if (value[i] == ctx->so->immediates[idx].val[j])
 173                                         break;
 174                         }
 175                         if (j == imm_ncomp) {
 176                                 so->immediates[idx].val[imm_ncomp++] = value[i];
 177                         }
 178                         swiz |= swiz_set(j, i);
 179                 }
 180                 so->num_immediates++;
 181         }
 182         so->immediates[idx].ncomp = imm_ncomp;
 183
 184         if (ncomp == 1)
 185                 swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
 186
 187         return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
 188 }
 189
 190 struct ir2_src
 191 ir2_zero(struct ir2_context *ctx)
 192 {
 193         return load_const(ctx, (float[]) {0.0f}, 1);
 194 }
 195
 196 static void
 197 update_range(struct ir2_context *ctx, struct ir2_reg *reg)
 198 {
 199         if (!reg->initialized) {
 200                 reg->initialized = true;
 201                 reg->loop_depth = ctx->loop_depth;
 202         }
 203
 204         if (ctx->loop_depth > reg->loop_depth) {
 205                 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
 206         } else {
 207                 reg->loop_depth = ctx->loop_depth;
 208                 reg->block_idx_free = -1;
 209         }
 210
 211         /* for regs we want to free at the end of the loop in any case
 212          * XXX dont do this for ssa
 213          */
 214         if (reg->loop_depth)
 215                 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
 216 }
 217
 218 static struct ir2_src
 219 make_src(struct ir2_context *ctx, nir_src src)
 220 {
 221         struct ir2_src res = {};
 222         struct ir2_reg *reg;
 223
 224         nir_const_value *const_value = nir_src_as_const_value(src);
 225
 226         if (const_value) {
 227                 assert(src.is_ssa);
 228                 float c[src.ssa->num_components];
 229                 nir_const_value_to_array(c, const_value, src.ssa->num_components, f32);
 230                 return load_const(ctx, c, src.ssa->num_components);
 231         }
 232
 233         if (!src.is_ssa) {
 234                 res.num = src.reg.reg->index;
 235                 res.type = IR2_SRC_REG;
 236                 reg = &ctx->reg[res.num];
 237         } else {
 238                 assert(ctx->ssa_map[src.ssa->index] >= 0);
 239                 res.num = ctx->ssa_map[src.ssa->index];
 240                 res.type = IR2_SRC_SSA;
 241                 reg = &ctx->instr[res.num].ssa;
 242         }
 243
 244         update_range(ctx, reg);
 245         return res;
 246 }
 247
 248 static void
 249 set_index(struct ir2_context *ctx, nir_dest * dst,
 250                   struct ir2_instr *instr)
 251 {
 252         struct ir2_reg *reg = &instr->ssa;
 253
 254         if (dst->is_ssa) {
 255                 ctx->ssa_map[dst->ssa.index] = instr->idx;
 256         } else {
 257                 assert(instr->is_ssa);
 258                 reg = &ctx->reg[dst->reg.reg->index];
 259
 260                 instr->is_ssa = false;
 261                 instr->reg = reg;
 262         }
 263         update_range(ctx, reg);
 264 }
 265
 266 static struct ir2_instr *
 267 ir2_instr_create(struct ir2_context *ctx, int type)
 268 {
 269         struct ir2_instr *instr;
 270
 271         instr = &ctx->instr[ctx->instr_count++];
 272         instr->idx = ctx->instr_count - 1;
 273         instr->type = type;
 274         instr->block_idx = ctx->block_idx;
 275         instr->pred = ctx->pred;
 276         instr->is_ssa = true;
 277         return instr;
 278 }
 279
 280 static struct ir2_instr *
 281 instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
 282 {
 283         /* emit_alu will fixup instrs that don't map directly */
 284         static const struct ir2_opc {
 285                 int8_t scalar, vector;
 286         } nir_ir2_opc[nir_num_opcodes+1] = {
 287                 [0 ... nir_num_opcodes - 1] = {-1, -1},
 288
 289                 [nir_op_mov] = {MAXs, MAXv},
 290                 [nir_op_fneg] = {MAXs, MAXv},
 291                 [nir_op_fabs] = {MAXs, MAXv},
 292                 [nir_op_fsat] = {MAXs, MAXv},
 293                 [nir_op_fsign] = {-1, CNDGTEv},
 294                 [nir_op_fadd] = {ADDs, ADDv},
 295                 [nir_op_fsub] = {ADDs, ADDv},
 296                 [nir_op_fmul] = {MULs, MULv},
 297                 [nir_op_ffma] = {-1, MULADDv},
 298                 [nir_op_fmax] = {MAXs, MAXv},
 299                 [nir_op_fmin] = {MINs, MINv},
 300                 [nir_op_ffloor] = {FLOORs, FLOORv},
 301                 [nir_op_ffract] = {FRACs, FRACv},
 302                 [nir_op_ftrunc] = {TRUNCs, TRUNCv},
 303                 [nir_op_fdot2] = {-1, DOT2ADDv},
 304                 [nir_op_fdot3] = {-1, DOT3v},
 305                 [nir_op_fdot4] = {-1, DOT4v},
 306                 [nir_op_sge] = {-1, SETGTEv},
 307                 [nir_op_slt] = {-1, SETGTv},
 308                 [nir_op_sne] = {-1, SETNEv},
 309                 [nir_op_seq] = {-1, SETEv},
 310                 [nir_op_fcsel] = {-1, CNDEv},
 311                 [nir_op_frsq] = {RECIPSQ_IEEE, -1},
 312                 [nir_op_frcp] = {RECIP_IEEE, -1},
 313                 [nir_op_flog2] = {LOG_IEEE, -1},
 314                 [nir_op_fexp2] = {EXP_IEEE, -1},
 315                 [nir_op_fsqrt] = {SQRT_IEEE, -1},
 316                 [nir_op_fcos] = {COS, -1},
 317                 [nir_op_fsin] = {SIN, -1},
 318                 /* no fsat, fneg, fabs since source mods deal with those */
 319
 320                 /* so we can use this function with non-nir op */
 321 #define ir2_op_cube nir_num_opcodes
 322                 [ir2_op_cube] = {-1, CUBEv},
 323         };
 324
 325         struct ir2_opc op = nir_ir2_opc[opcode];
 326         assert(op.vector >= 0 || op.scalar >= 0);
 327
 328         struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
 329         instr->alu.vector_opc = op.vector;
 330         instr->alu.scalar_opc = op.scalar;
 331         instr->alu.export = -1;
 332         instr->alu.write_mask = (1 << ncomp) - 1;
 333         instr->src_count = opcode == ir2_op_cube ? 2 :
 334                 nir_op_infos[opcode].num_inputs;
 335         instr->ssa.ncomp = ncomp;
 336         return instr;
 337 }
 338
 339 static struct ir2_instr *
 340 instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode,
 341                 uint8_t write_mask, struct ir2_instr *share_reg)
 342 {
 343         struct ir2_instr *instr;
 344         struct ir2_reg *reg;
 345
 346         reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
 347         reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
 348
 349         instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
 350         instr->alu.write_mask = write_mask;
 351         instr->reg = reg;
 352         instr->is_ssa = false;
 353         return instr;
 354 }
 355
 356
 357 static struct ir2_instr *
 358 instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_dest *dst)
 359 {
 360         struct ir2_instr *instr;
 361         instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst));
 362         set_index(ctx, dst, instr);
 363         return instr;
 364 }
 365
 366 static struct ir2_instr *
 367 ir2_instr_create_fetch(struct ir2_context *ctx, nir_dest *dst,
 368                 instr_fetch_opc_t opc)
 369 {
 370         struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
 371         instr->fetch.opc = opc;
 372         instr->src_count = 1;
 373         instr->ssa.ncomp = nir_dest_num_components(*dst);
 374         set_index(ctx, dst, instr);
 375         return instr;
 376 }
 377
 378 static struct ir2_src
 379 make_src_noconst(struct ir2_context *ctx, nir_src src)
 380 {
 381         struct ir2_instr *instr;
 382
 383         if (nir_src_as_const_value(src)) {
 384                 assert(src.is_ssa);
 385                 instr = instr_create_alu(ctx, nir_op_mov, src.ssa->num_components);
 386                 instr->src[0] = make_src(ctx, src);
 387                 return ir2_src(instr->idx, 0, IR2_SRC_SSA);
 388         }
 389
 390         return make_src(ctx, src);
 391 }
 392
 393 static void
 394 emit_alu(struct ir2_context *ctx, nir_alu_instr * alu)
 395 {
 396         const nir_op_info *info = &nir_op_infos[alu->op];
 397         nir_dest *dst = &alu->dest.dest;
 398         struct ir2_instr *instr;
 399         struct ir2_src tmp;
 400         unsigned ncomp;
 401
 402         /* get the number of dst components */
 403         if (dst->is_ssa) {
 404                 ncomp = dst->ssa.num_components;
 405         } else {
 406                 ncomp = 0;
 407                 for (int i = 0; i < 4; i++)
 408                         ncomp += !!(alu->dest.write_mask & 1 << i);
 409         }
 410
 411         instr = instr_create_alu(ctx, alu->op, ncomp);
 412         set_index(ctx, dst, instr);
 413         instr->alu.saturate = alu->dest.saturate;
 414         instr->alu.write_mask = alu->dest.write_mask;
 415
 416         for (int i = 0; i < info->num_inputs; i++) {
 417                 nir_alu_src *src = &alu->src[i];
 418
 419                 /* compress swizzle with writemask when applicable */
 420                 unsigned swiz = 0, j = 0;
 421                 for (int i = 0; i < 4; i++) {
 422                         if (!(alu->dest.write_mask & 1 << i) && !info->output_size)
 423                                 continue;
 424                         swiz |= swiz_set(src->swizzle[i], j++);
 425                 }
 426
 427                 instr->src[i] = make_src(ctx, src->src);
 428                 instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
 429                 instr->src[i].negate = src->negate;
 430                 instr->src[i].abs = src->abs;
 431         }
 432
 433         /* workarounds for NIR ops that don't map directly to a2xx ops */
 434         switch (alu->op) {
 435         case nir_op_fneg:
 436                 instr->src[0].negate = 1;
 437                 break;
 438         case nir_op_fabs:
 439                 instr->src[0].abs = 1;
 440                 break;
 441         case nir_op_fsat:
 442                 instr->alu.saturate = 1;
 443                 break;
 444         case nir_op_slt:
 445                 tmp = instr->src[0];
 446                 instr->src[0] = instr->src[1];
 447                 instr->src[1] = tmp;
 448                 break;
 449         case nir_op_fcsel:
 450                 tmp = instr->src[1];
 451                 instr->src[1] = instr->src[2];
 452                 instr->src[2] = tmp;
 453                 break;
 454         case nir_op_fsub:
 455                 instr->src[1].negate = !instr->src[1].negate;
 456                 break;
 457         case nir_op_fdot2:
 458                 instr->src_count = 3;
 459                 instr->src[2] = ir2_zero(ctx);
 460                 break;
 461         case nir_op_fsign: {
 462                 /* we need an extra instruction to deal with the zero case */
 463                 struct ir2_instr *tmp;
 464
 465                 /* tmp = x == 0 ? 0 : 1 */
 466                 tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
 467                 tmp->src[0] = instr->src[0];
 468                 tmp->src[1] = ir2_zero(ctx);
 469                 tmp->src[2] = load_const(ctx, (float[]) {1.0f}, 1);
 470
 471                 /* result = x >= 0 ? tmp : -tmp */
 472                 instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
 473                 instr->src[2] = instr->src[1];
 474                 instr->src[2].negate = true;
 475                 instr->src_count = 3;
 476         } break;
 477         default:
 478                 break;
 479         }
 480 }
 481
 482 static void
 483 load_input(struct ir2_context *ctx, nir_dest *dst, unsigned idx)
 484 {
 485         struct ir2_instr *instr;
 486         int slot = -1;
 487
 488         if (ctx->so->type == MESA_SHADER_VERTEX) {
 489                 instr = ir2_instr_create_fetch(ctx, dst, 0);
 490                 instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
 491                 instr->fetch.vtx.const_idx = 20 + (idx / 3);
 492                 instr->fetch.vtx.const_idx_sel = idx % 3;
 493                 return;
 494         }
 495
 496         /* get slot from idx */
 497         nir_foreach_variable(var, &ctx->nir->inputs) {
 498                 if (var->data.driver_location == idx) {
 499                         slot = var->data.location;
 500                         break;
 501                 }
 502         }
 503         assert(slot >= 0);
 504
 505         switch (slot) {
 506         case VARYING_SLOT_PNTC:
 507                 /* need to extract with abs and invert y */
 508                 instr = instr_create_alu_dest(ctx, nir_op_ffma, dst);
 509                 instr->src[0] = ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
 510                 instr->src[0].abs = true;
 511                 instr->src[1] = load_const(ctx, (float[]) {1.0f, -1.0f}, 2);
 512                 instr->src[2] = load_const(ctx, (float[]) {0.0f, 1.0f}, 2);
 513                 break;
 514         case VARYING_SLOT_POS:
 515                 /* need to extract xy with abs and add tile offset on a20x
 516                  * zw from fragcoord input (w inverted in fragment shader)
 517                  * TODO: only components that are required by fragment shader
 518                  */
 519                 instr = instr_create_alu_reg(ctx,
 520                         ctx->so->is_a20x ? nir_op_fadd : nir_op_mov, 3, NULL);
 521                 instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
 522                 instr->src[0].abs = true;
 523                 /* on a20x, C64 contains the tile offset */
 524                 instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
 525
 526                 instr = instr_create_alu_reg(ctx, nir_op_mov, 4, instr);
 527                 instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
 528
 529                 instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
 530                 instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
 531
 532                 unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
 533                 instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
 534                 instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
 535                 break;
 536         default:
 537                 instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
 538                 instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
 539                 break;
 540         }
 541 }
 542
 543 static unsigned
 544 output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
 545 {
 546         int slot = -1;
 547         unsigned idx = nir_intrinsic_base(intr);
 548         nir_foreach_variable(var, &ctx->nir->outputs) {
 549                 if (var->data.driver_location == idx) {
 550                         slot = var->data.location;
 551                         break;
 552                 }
 553         }
 554         assert(slot != -1);
 555         return slot;
 556 }
 557
 558 static void
 559 store_output(struct ir2_context *ctx, nir_src src, unsigned slot, unsigned ncomp)
 560 {
 561         struct ir2_instr *instr;
 562         unsigned idx = 0;
 563
 564         if (ctx->so->type == MESA_SHADER_VERTEX) {
 565                 switch (slot) {
 566                 case VARYING_SLOT_POS:
 567                         ctx->position = make_src(ctx, src);
 568                         idx = 62;
 569                         break;
 570                 case VARYING_SLOT_PSIZ:
 571                         ctx->so->writes_psize = true;
 572                         idx = 63;
 573                         break;
 574                 default:
 575                         /* find matching slot from fragment shader input */
 576                         for (idx = 0; idx < ctx->f->inputs_count; idx++)
 577                                 if (ctx->f->inputs[idx].slot == slot)
 578                                         break;
 579                         if (idx == ctx->f->inputs_count)
 580                                 return;
 581                 }
 582         } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
 583                 /* only color output is implemented */
 584                 return;
 585         }
 586
 587         instr = instr_create_alu(ctx, nir_op_mov, ncomp);
 588         instr->src[0] = make_src(ctx, src);
 589         instr->alu.export = idx;
 590 }
 591
 592 static void
 593 emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
 594 {
 595         struct ir2_instr *instr;
 596         nir_const_value *const_offset;
 597         unsigned idx;
 598
 599         switch (intr->intrinsic) {
 600         case nir_intrinsic_load_input:
 601                 load_input(ctx, &intr->dest, nir_intrinsic_base(intr));
 602                 break;
 603         case nir_intrinsic_store_output:
 604                 store_output(ctx, intr->src[0], output_slot(ctx, intr), intr->num_components);
 605                 break;
 606         case nir_intrinsic_load_uniform:
 607                 const_offset = nir_src_as_const_value(intr->src[0]);
 608                 assert(const_offset); /* TODO can be false in ES2? */
 609                 idx = nir_intrinsic_base(intr);
 610                 idx += (uint32_t) nir_src_as_const_value(intr->src[0])[0].f32;
 611                 instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->dest);
 612                 instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
 613                 break;
 614         case nir_intrinsic_discard:
 615         case nir_intrinsic_discard_if:
 616                 instr = ir2_instr_create(ctx, IR2_ALU);
 617                 instr->alu.vector_opc = VECTOR_NONE;
 618                 if (intr->intrinsic == nir_intrinsic_discard_if) {
 619                         instr->alu.scalar_opc = KILLNEs;
 620                         instr->src[0] = make_src(ctx, intr->src[0]);
 621                 } else {
 622                         instr->alu.scalar_opc = KILLEs;
 623                         instr->src[0] = ir2_zero(ctx);
 624                 }
 625                 instr->alu.export = -1;
 626                 instr->src_count = 1;
 627                 ctx->so->has_kill = true;
 628                 break;
 629         case nir_intrinsic_load_front_face:
 630                 /* gl_FrontFacing is in the sign of param.x
 631                  * rcp required because otherwise we can't differentiate -0.0 and +0.0
 632                  */
 633                 ctx->so->need_param = true;
 634
 635                 struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
 636                 tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
 637
 638                 instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest);
 639                 instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
 640                 instr->src[1] = ir2_zero(ctx);
 641                 break;
 642         default:
 643                 compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
 644                 break;
 645         }
 646 }
 647
 648 static void
 649 emit_tex(struct ir2_context *ctx, nir_tex_instr * tex)
 650 {
 651         bool is_rect = false, is_cube = false;
 652         struct ir2_instr *instr;
 653         nir_src *coord, *lod_bias;
 654
 655         coord = lod_bias = NULL;
 656
 657         for (unsigned i = 0; i < tex->num_srcs; i++) {
 658                 switch (tex->src[i].src_type) {
 659                 case nir_tex_src_coord:
 660                         coord = &tex->src[i].src;
 661                         break;
 662                 case nir_tex_src_bias:
 663                 case nir_tex_src_lod:
 664                         assert(!lod_bias);
 665                         lod_bias = &tex->src[i].src;
 666                         break;
 667                 default:
 668                         compile_error(ctx, "Unhandled NIR tex src type: %d\n",
 669                                                   tex->src[i].src_type);
 670                         return;
 671                 }
 672         }
 673
 674         switch (tex->op) {
 675         case nir_texop_tex:
 676         case nir_texop_txb:
 677         case nir_texop_txl:
 678                 break;
 679         default:
 680                 compile_error(ctx, "unimplemented texop %d\n", tex->op);
 681                 return;
 682         }
 683
 684         switch (tex->sampler_dim) {
 685         case GLSL_SAMPLER_DIM_2D:
 686                 break;
 687         case GLSL_SAMPLER_DIM_RECT:
 688                 is_rect = true;
 689                 break;
 690         case GLSL_SAMPLER_DIM_CUBE:
 691                 is_cube = true;
 692                 break;
 693         default:
 694                 compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
 695                 return;
 696         }
 697
 698         struct ir2_src src_coord = make_src_noconst(ctx, *coord);
 699
 700         /* for cube maps
 701          * tmp = cube(coord)
 702          * tmp.xy = tmp.xy / |tmp.z| + 1.5
 703          * coord = tmp.xyw
 704          */
 705         if (is_cube) {
 706                 struct ir2_instr *rcp, *coord_xy;
 707                 unsigned reg_idx;
 708
 709                 instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
 710                 instr->src[0] = src_coord;
 711                 instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
 712                 instr->src[1] = src_coord;
 713                 instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
 714
 715                 reg_idx = instr->reg - ctx->reg; /* hacky */
 716
 717                 rcp = instr_create_alu(ctx, nir_op_frcp, 1);
 718                 rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
 719                 rcp->src[0].abs = true;
 720
 721                 coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
 722                 coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
 723                 coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 724                 coord_xy->src[2] = load_const(ctx, (float[]) {1.5f}, 1);
 725
 726                 src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
 727                 /* TODO: lod/bias transformed by src_coord.z ? */
 728         }
 729
 730         instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH);
 731         instr->src[0] = src_coord;
 732         instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_YXW : 0;
 733         instr->fetch.tex.is_cube = is_cube;
 734         instr->fetch.tex.is_rect = is_rect;
 735         instr->fetch.tex.samp_id = tex->sampler_index;
 736
 737         /* for lod/bias, we insert an extra src for the backend to deal with */
 738         if (lod_bias) {
 739                 instr->src[1] = make_src_noconst(ctx, *lod_bias);
 740                 /* backend will use 2-3 components so apply swizzle */
 741                 swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
 742                 instr->src_count = 2;
 743         }
 744 }
 745
 746 static void
 747 setup_input(struct ir2_context *ctx, nir_variable * in)
 748 {
 749         struct fd2_shader_stateobj *so = ctx->so;
 750         unsigned array_len = MAX2(glsl_get_length(in->type), 1);
 751         unsigned n = in->data.driver_location;
 752         unsigned slot = in->data.location;
 753
 754         assert(array_len == 1);
 755
 756         /* handle later */
 757         if (ctx->so->type == MESA_SHADER_VERTEX)
 758                 return;
 759
 760         if (ctx->so->type != MESA_SHADER_FRAGMENT)
 761                 compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
 762
 763         if (slot == VARYING_SLOT_PNTC) {
 764                 so->need_param = true;
 765                 return;
 766         }
 767
 768         n = ctx->f->inputs_count++;
 769
 770         /* half of fragcoord from param reg, half from a varying */
 771         if (slot == VARYING_SLOT_POS) {
 772                 ctx->f->fragcoord = n;
 773                 so->need_param = true;
 774         }
 775
 776         ctx->f->inputs[n].slot = slot;
 777         ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
 778
 779         /* in->data.interpolation?
 780          * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
 781          */
 782 }
 783
 784 static void
 785 emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr * undef)
 786 {
 787         /* TODO we don't want to emit anything for undefs */
 788
 789         struct ir2_instr *instr;
 790
 791         instr = instr_create_alu_dest(ctx, nir_op_mov,
 792                 &(nir_dest) {.ssa = undef->def,.is_ssa = true});
 793         instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
 794 }
 795
 796 static void
 797 emit_instr(struct ir2_context *ctx, nir_instr * instr)
 798 {
 799         switch (instr->type) {
 800         case nir_instr_type_alu:
 801                 emit_alu(ctx, nir_instr_as_alu(instr));
 802                 break;
 803         case nir_instr_type_deref:
 804                 /* ignored, handled as part of the intrinsic they are src to */
 805                 break;
 806         case nir_instr_type_intrinsic:
 807                 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
 808                 break;
 809         case nir_instr_type_load_const:
 810                 /* dealt with when using nir_src */
 811                 break;
 812         case nir_instr_type_tex:
 813                 emit_tex(ctx, nir_instr_as_tex(instr));
 814                 break;
 815         case nir_instr_type_jump:
 816                 ctx->block_has_jump[ctx->block_idx] = true;
 817                 break;
 818         case nir_instr_type_ssa_undef:
 819                 emit_undef(ctx, nir_instr_as_ssa_undef(instr));
 820                 break;
 821         default:
 822                 break;
 823         }
 824 }
 825
 826 /* fragcoord.zw and a20x hw binning outputs */
 827 static void
 828 extra_position_exports(struct ir2_context *ctx, bool binning)
 829 {
 830         struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
 831
 832         if (ctx->f->fragcoord < 0 && !binning)
 833                 return;
 834
 835         instr = instr_create_alu(ctx, nir_op_fmax, 1);
 836         instr->src[0] = ctx->position;
 837         instr->src[0].swizzle = IR2_SWIZZLE_W;
 838         instr->src[1] = ir2_zero(ctx);
 839
 840         rcp = instr_create_alu(ctx, nir_op_frcp, 1);
 841         rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
 842
 843         sc = instr_create_alu(ctx, nir_op_fmul, 4);
 844         sc->src[0] = ctx->position;
 845         sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 846
 847         wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
 848         wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
 849         wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
 850         wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
 851
 852         /* fragcoord z/w */
 853         if (ctx->f->fragcoord >= 0 && !binning) {
 854                 instr = instr_create_alu(ctx, nir_op_mov, 1);
 855                 instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
 856                 instr->alu.export = ctx->f->fragcoord;
 857
 858                 instr = instr_create_alu(ctx, nir_op_mov, 1);
 859                 instr->src[0] = ctx->position;
 860                 instr->src[0].swizzle = IR2_SWIZZLE_W;
 861                 instr->alu.export = ctx->f->fragcoord;
 862                 instr->alu.write_mask = 2;
 863         }
 864
 865         if (!binning)
 866                 return;
 867
 868         off = instr_create_alu(ctx, nir_op_fadd, 1);
 869         off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
 870         off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
 871
 872         /* 8 max set in freedreno_screen.. unneeded instrs patched out */
 873         for (int i = 0; i < 8; i++) {
 874                 instr = instr_create_alu(ctx, nir_op_ffma, 4);
 875                 instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
 876                 instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 877                 instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
 878                 instr->alu.export = 32;
 879
 880                 instr = instr_create_alu(ctx, nir_op_ffma, 4);
 881                 instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
 882                 instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
 883                 instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
 884                 instr->alu.export = 33;
 885         }
 886 }
 887
 888 static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
 889
 890 static bool
 891 emit_block(struct ir2_context *ctx, nir_block * block)
 892 {
 893         struct ir2_instr *instr;
 894         nir_block *succs = block->successors[0];
 895
 896         ctx->block_idx = block->index;
 897
 898         nir_foreach_instr(instr, block)
 899                 emit_instr(ctx, instr);
 900
 901         if (!succs || !succs->index)
 902                 return false;
 903
 904         /* we want to be smart and always jump and have the backend cleanup
 905          * but we are not, so there are two cases where jump is needed:
 906          *  loops (succs index lower)
 907          *  jumps (jump instruction seen in block)
 908          */
 909         if (succs->index > block->index && !ctx->block_has_jump[block->index])
 910                 return false;
 911
 912         assert(block->successors[1] == NULL);
 913
 914         instr = ir2_instr_create(ctx, IR2_CF);
 915         instr->cf.block_idx = succs->index;
 916         /* XXX can't jump to a block with different predicate */
 917         return true;
 918 }
 919
 920 static void
 921 emit_if(struct ir2_context *ctx, nir_if * nif)
 922 {
 923         unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
 924         struct ir2_instr *instr;
 925
 926         /* XXX: blob seems to always use same register for condition */
 927
 928         instr = ir2_instr_create(ctx, IR2_ALU);
 929         instr->src[0] = make_src(ctx, nif->condition);
 930         instr->src_count = 1;
 931         instr->ssa.ncomp = 1;
 932         instr->alu.vector_opc = VECTOR_NONE;
 933         instr->alu.scalar_opc = SCALAR_NONE;
 934         instr->alu.export = -1;
 935         instr->alu.write_mask = 1;
 936         instr->pred = 0;
 937
 938         /* if nested, use PRED_SETNE_PUSHv */
 939         if (pred) {
 940                 instr->alu.vector_opc = PRED_SETNE_PUSHv;
 941                 instr->src[1] = instr->src[0];
 942                 instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
 943                 instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
 944                 instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
 945                 instr->src_count = 2;
 946         } else {
 947                 instr->alu.scalar_opc = PRED_SETNEs;
 948         }
 949
 950         ctx->pred_idx = instr->idx;
 951         ctx->pred = 3;
 952
 953         emit_cf_list(ctx, &nif->then_list);
 954
 955         /* TODO: if these is no else branch we don't need this
 956          * and if the else branch is simple, can just flip ctx->pred instead
 957          */
 958         instr = ir2_instr_create(ctx, IR2_ALU);
 959         instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
 960         instr->src_count = 1;
 961         instr->ssa.ncomp = 1;
 962         instr->alu.vector_opc = VECTOR_NONE;
 963         instr->alu.scalar_opc = PRED_SET_INVs;
 964         instr->alu.export = -1;
 965         instr->alu.write_mask = 1;
 966         instr->pred = 0;
 967         ctx->pred_idx = instr->idx;
 968
 969         emit_cf_list(ctx, &nif->else_list);
 970
 971         /* restore predicate for nested predicates */
 972         if (pred) {
 973                 instr = ir2_instr_create(ctx, IR2_ALU);
 974                 instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
 975                 instr->src_count = 1;
 976                 instr->ssa.ncomp = 1;
 977                 instr->alu.vector_opc = VECTOR_NONE;
 978                 instr->alu.scalar_opc = PRED_SET_POPs;
 979                 instr->alu.export = -1;
 980                 instr->alu.write_mask = 1;
 981                 instr->pred = 0;
 982                 ctx->pred_idx = instr->idx;
 983         }
 984
 985         /* restore ctx->pred */
 986         ctx->pred = pred;
 987 }
 988
 989 /* get the highest block idx in the loop, so we know when
 990  * we can free registers that are allocated outside the loop
 991  */
 992 static unsigned
 993 loop_last_block(struct exec_list *list)
 994 {
 995         nir_cf_node *node =
 996                 exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
 997         switch (node->type) {
 998         case nir_cf_node_block:
 999                 return nir_cf_node_as_block(node)->index;
1000         case nir_cf_node_if:
1001                 assert(0); /* XXX could this ever happen? */
1002                 return 0;
1003         case nir_cf_node_loop:
1004                 return loop_last_block(&nir_cf_node_as_loop(node)->body);
1005         default:
1006                 compile_error(ctx, "Not supported\n");
1007                 return 0;
1008         }
1009 }
1010
1011 static void
1012 emit_loop(struct ir2_context *ctx, nir_loop *nloop)
1013 {
1014         ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
1015         emit_cf_list(ctx, &nloop->body);
1016         ctx->loop_depth--;
1017 }
1018
1019 static bool
1020 emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
1021 {
1022         bool ret = false;
1023         foreach_list_typed(nir_cf_node, node, node, list) {
1024                 ret = false;
1025                 switch (node->type) {
1026                 case nir_cf_node_block:
1027                         ret = emit_block(ctx, nir_cf_node_as_block(node));
1028                         break;
1029                 case nir_cf_node_if:
1030                         emit_if(ctx, nir_cf_node_as_if(node));
1031                         break;
1032                 case nir_cf_node_loop:
1033                         emit_loop(ctx, nir_cf_node_as_loop(node));
1034                         break;
1035                 case nir_cf_node_function:
1036                         compile_error(ctx, "Not supported\n");
1037                         break;
1038                 }
1039         }
1040         return ret;
1041 }
1042
1043 static void cleanup_binning(struct ir2_context *ctx)
1044 {
1045         assert(ctx->so->type == MESA_SHADER_VERTEX);
1046
1047         /* kill non-position outputs for binning variant */
1048         nir_foreach_block(block, nir_shader_get_entrypoint(ctx->nir)) {
1049                 nir_foreach_instr_safe(instr, block) {
1050                         if (instr->type != nir_instr_type_intrinsic)
1051                                 continue;
1052
1053                         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1054                         if (intr->intrinsic != nir_intrinsic_store_output)
1055                                 continue;
1056
1057                         if (output_slot(ctx, intr) != VARYING_SLOT_POS)
1058                                 nir_instr_remove(instr);
1059                 }
1060         }
1061
1062         ir2_optimize_nir(ctx->nir, false);
1063 }
1064
1065 static bool
1066 ir2_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
1067 {
1068         if (instr->type != nir_instr_type_alu)
1069                 return false;
1070
1071         nir_alu_instr *alu = nir_instr_as_alu(instr);
1072         switch (alu->op) {
1073         case nir_op_frsq:
1074         case nir_op_frcp:
1075         case nir_op_flog2:
1076         case nir_op_fexp2:
1077         case nir_op_fsqrt:
1078         case nir_op_fcos:
1079         case nir_op_fsin:
1080                 return true;
1081         default:
1082                 break;
1083         }
1084
1085         return false;
1086 }
1087
1088 void
1089 ir2_nir_compile(struct ir2_context *ctx, bool binning)
1090 {
1091         struct fd2_shader_stateobj *so = ctx->so;
1092
1093         memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
1094
1095         ctx->nir = nir_shader_clone(NULL, so->nir);
1096
1097         if (binning)
1098                 cleanup_binning(ctx);
1099
1100         /* postprocess */
1101         OPT_V(ctx->nir, nir_opt_algebraic_late);
1102
1103         OPT_V(ctx->nir, nir_copy_prop);
1104         OPT_V(ctx->nir, nir_opt_dce);
1105         OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
1106
1107         OPT_V(ctx->nir, nir_lower_int_to_float);
1108         OPT_V(ctx->nir, nir_lower_bool_to_float);
1109         OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods);
1110
1111         OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL);
1112
1113         OPT_V(ctx->nir, nir_lower_locals_to_regs);
1114
1115         OPT_V(ctx->nir, nir_convert_from_ssa, true);
1116
1117         OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
1118         OPT_V(ctx->nir, nir_lower_vec_to_movs);
1119
1120         OPT_V(ctx->nir, nir_opt_dce);
1121
1122         nir_sweep(ctx->nir);
1123
1124         if (fd_mesa_debug & FD_DBG_DISASM) {
1125                 debug_printf("----------------------\n");
1126                 nir_print_shader(ctx->nir, stdout);
1127                 debug_printf("----------------------\n");
1128         }
1129
1130         /* fd2_shader_stateobj init */
1131         if (so->type == MESA_SHADER_FRAGMENT) {
1132                 ctx->f->fragcoord = -1;
1133                 ctx->f->inputs_count = 0;
1134                 memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
1135         }
1136
1137         /* Setup inputs: */
1138         nir_foreach_variable(in, &ctx->nir->inputs)
1139                 setup_input(ctx, in);
1140
1141         if (so->type == MESA_SHADER_FRAGMENT) {
1142                 unsigned idx;
1143                 for (idx = 0; idx < ctx->f->inputs_count; idx++) {
1144                         ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
1145                         update_range(ctx, &ctx->input[idx]);
1146                 }
1147                 /* assume we have param input and kill it later if not */
1148                 ctx->input[idx].ncomp = 4;
1149                 update_range(ctx, &ctx->input[idx]);
1150         } else {
1151                 ctx->input[0].ncomp = 1;
1152                 ctx->input[2].ncomp = 1;
1153                 update_range(ctx, &ctx->input[0]);
1154                 update_range(ctx, &ctx->input[2]);
1155         }
1156
1157         /* And emit the body: */
1158         nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
1159
1160         nir_foreach_register(reg, &fxn->registers) {
1161                 ctx->reg[reg->index].ncomp = reg->num_components;
1162                 ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1);
1163         }
1164
1165         nir_metadata_require(fxn, nir_metadata_block_index);
1166         emit_cf_list(ctx, &fxn->body);
1167         /* TODO emit_block(ctx, fxn->end_block); */
1168
1169         if (so->type == MESA_SHADER_VERTEX)
1170                 extra_position_exports(ctx, binning);
1171
1172         ralloc_free(ctx->nir);
1173
1174         /* kill unused param input */
1175         if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
1176                 ctx->input[ctx->f->inputs_count].initialized = false;
1177 }