src/gallium/drivers/freedreno/a2xx/ir2_nir.c

   1 /*
   2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jonathan Marek <jonathan@marek.ca>
  25  */
  26
  27 #include "ir2_private.h"
  28 #include "nir/tgsi_to_nir.h"
  29
  30 #include "freedreno_util.h"
  31 #include "fd2_program.h"
  32
  33 static const nir_shader_compiler_options options = {
  34         .lower_fpow = true,
  35         .lower_flrp32 = true,
  36         .lower_fmod32 = true,
  37         .lower_fdiv = true,
  38         .lower_fceil = true,
  39         .fuse_ffma = true,
  40         /* .fdot_replicates = true, it is replicated, but it makes things worse */
  41         .lower_all_io_to_temps = true,
  42         .vertex_id_zero_based = true, /* its not implemented anyway */
  43 };
  44
  45 struct nir_shader *
  46 ir2_tgsi_to_nir(const struct tgsi_token *tokens,
  47                 struct pipe_screen *screen)
  48 {
  49         if (!screen) {
  50                 return tgsi_to_nir_noscreen(tokens, &options);
  51         }
  52
  53         return tgsi_to_nir(tokens, screen);
  54 }
  55
  56 const nir_shader_compiler_options *
  57 ir2_get_compiler_options(void)
  58 {
  59         return &options;
  60 }
  61
  62 #define OPT(nir, pass, ...) ({                             \
  63    bool this_progress = false;                             \
  64    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
  65    this_progress;                                          \
  66 })
  67 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
  68
  69 static void
  70 ir2_optimize_loop(nir_shader *s)
  71 {
  72         bool progress;
  73         do {
  74                 progress = false;
  75
  76                 OPT_V(s, nir_lower_vars_to_ssa);
  77                 progress |= OPT(s, nir_opt_copy_prop_vars);
  78                 progress |= OPT(s, nir_copy_prop);
  79                 progress |= OPT(s, nir_opt_dce);
  80                 progress |= OPT(s, nir_opt_cse);
  81                 /* progress |= OPT(s, nir_opt_gcm, true); */
  82                 progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
  83                 progress |= OPT(s, nir_opt_intrinsics);
  84                 progress |= OPT(s, nir_opt_algebraic);
  85                 progress |= OPT(s, nir_opt_constant_folding);
  86                 progress |= OPT(s, nir_opt_dead_cf);
  87                 if (OPT(s, nir_opt_trivial_continues)) {
  88                         progress |= true;
  89                         /* If nir_opt_trivial_continues makes progress, then we need to clean
  90                          * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
  91                          * to make progress.
  92                          */
  93                         OPT(s, nir_copy_prop);
  94                         OPT(s, nir_opt_dce);
  95                 }
  96                 progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
  97                 progress |= OPT(s, nir_opt_if, false);
  98                 progress |= OPT(s, nir_opt_remove_phis);
  99                 progress |= OPT(s, nir_opt_undef);
 100
 101         }
 102         while (progress);
 103 }
 104
 105 /* trig workarounds is the same as ir3.. but we don't want to include ir3 */
 106 bool ir3_nir_apply_trig_workarounds(nir_shader * shader);
 107
 108 int
 109 ir2_optimize_nir(nir_shader *s, bool lower)
 110 {
 111         struct nir_lower_tex_options tex_options = {
 112                 .lower_txp = ~0u,
 113                 .lower_rect = 0,
 114         };
 115
 116         if (fd_mesa_debug & FD_DBG_DISASM) {
 117                 debug_printf("----------------------\n");
 118                 nir_print_shader(s, stdout);
 119                 debug_printf("----------------------\n");
 120         }
 121
 122         OPT_V(s, nir_opt_global_to_local);
 123         OPT_V(s, nir_lower_regs_to_ssa);
 124         OPT_V(s, nir_lower_vars_to_ssa);
 125         OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
 126
 127         if (lower) {
 128                 OPT_V(s, ir3_nir_apply_trig_workarounds);
 129                 OPT_V(s, nir_lower_tex, &tex_options);
 130         }
 131
 132         ir2_optimize_loop(s);
 133
 134         OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
 135         OPT_V(s, nir_move_load_const);
 136
 137         /* TODO we dont want to get shaders writing to depth for depth textures */
 138         if (s->info.stage == MESA_SHADER_FRAGMENT) {
 139                 nir_foreach_variable(var, &s->outputs) {
 140                         if (var->data.location == FRAG_RESULT_DEPTH)
 141                                 return -1;
 142                 }
 143         }
 144
 145         return 0;
 146 }
 147
 148 static struct ir2_src
 149 load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
 150 {
 151         struct fd2_shader_stateobj *so = ctx->so;
 152         unsigned imm_ncomp, swiz, idx, i, j;
 153         uint32_t *value = (uint32_t*) value_f;
 154
 155         /* try to merge with existing immediate (TODO: try with neg) */
 156         for (idx = 0; idx < so->num_immediates; idx++) {
 157                 swiz = 0;
 158                 imm_ncomp = so->immediates[idx].ncomp;
 159                 for (i = 0; i < ncomp; i++) {
 160                         for (j = 0; j < imm_ncomp; j++) {
 161                                 if (value[i] == so->immediates[idx].val[j])
 162                                         break;
 163                         }
 164                         if (j == imm_ncomp) {
 165                                 if (j == 4)
 166                                         break;
 167                                 so->immediates[idx].val[imm_ncomp++] = value[i];
 168                         }
 169                         swiz |= swiz_set(j, i);
 170                 }
 171                 /* matched all components */
 172                 if (i == ncomp)
 173                         break;
 174         }
 175
 176         /* need to allocate new immediate */
 177         if (idx == so->num_immediates) {
 178                 swiz = 0;
 179                 imm_ncomp = 0;
 180                 for (i = 0; i < ncomp; i++) {
 181                         for (j = 0; j < imm_ncomp; j++) {
 182                                 if (value[i] == ctx->so->immediates[idx].val[j])
 183                                         break;
 184                         }
 185                         if (j == imm_ncomp) {
 186                                 so->immediates[idx].val[imm_ncomp++] = value[i];
 187                         }
 188                         swiz |= swiz_set(j, i);
 189                 }
 190                 so->num_immediates++;
 191         }
 192         so->immediates[idx].ncomp = imm_ncomp;
 193
 194         if (ncomp == 1)
 195                 swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
 196
 197         return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
 198 }
 199
 200 struct ir2_src
 201 ir2_zero(struct ir2_context *ctx)
 202 {
 203         return load_const(ctx, (float[]) {0.0f}, 1);
 204 }
 205
 206 static void
 207 update_range(struct ir2_context *ctx, struct ir2_reg *reg)
 208 {
 209         if (!reg->initialized) {
 210                 reg->initialized = true;
 211                 reg->loop_depth = ctx->loop_depth;
 212         }
 213
 214         if (ctx->loop_depth > reg->loop_depth) {
 215                 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
 216         } else {
 217                 reg->loop_depth = ctx->loop_depth;
 218                 reg->block_idx_free = -1;
 219         }
 220
 221         /* for regs we want to free at the end of the loop in any case
 222          * XXX dont do this for ssa
 223          */
 224         if (reg->loop_depth)
 225                 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
 226 }
 227
 228 static struct ir2_src
 229 make_src(struct ir2_context *ctx, nir_src src)
 230 {
 231         struct ir2_src res = {};
 232         struct ir2_reg *reg;
 233
 234         nir_const_value *const_value = nir_src_as_const_value(src);
 235
 236         if (const_value) {
 237                 assert(src.is_ssa);
 238                 return load_const(ctx, &const_value->f32[0], src.ssa->num_components);
 239         }
 240
 241         if (!src.is_ssa) {
 242                 res.num = src.reg.reg->index;
 243                 res.type = IR2_SRC_REG;
 244                 reg = &ctx->reg[res.num];
 245         } else {
 246                 assert(ctx->ssa_map[src.ssa->index] >= 0);
 247                 res.num = ctx->ssa_map[src.ssa->index];
 248                 res.type = IR2_SRC_SSA;
 249                 reg = &ctx->instr[res.num].ssa;
 250         }
 251
 252         update_range(ctx, reg);
 253         return res;
 254 }
 255
 256 static void
 257 set_index(struct ir2_context *ctx, nir_dest * dst,
 258                   struct ir2_instr *instr)
 259 {
 260         struct ir2_reg *reg = &instr->ssa;
 261
 262         if (dst->is_ssa) {
 263                 ctx->ssa_map[dst->ssa.index] = instr->idx;
 264         } else {
 265                 assert(instr->is_ssa);
 266                 reg = &ctx->reg[dst->reg.reg->index];
 267
 268                 instr->is_ssa = false;
 269                 instr->reg = reg;
 270         }
 271         update_range(ctx, reg);
 272 }
 273
 274 static struct ir2_instr *
 275 ir2_instr_create(struct ir2_context *ctx, int type)
 276 {
 277         struct ir2_instr *instr;
 278
 279         instr = &ctx->instr[ctx->instr_count++];
 280         instr->idx = ctx->instr_count - 1;
 281         instr->type = type;
 282         instr->block_idx = ctx->block_idx;
 283         instr->pred = ctx->pred;
 284         instr->is_ssa = true;
 285         return instr;
 286 }
 287
 288 static struct ir2_instr *
 289 instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
 290 {
 291         /* emit_alu will fixup instrs that don't map directly */
 292         static const struct ir2_opc {
 293                 int8_t scalar, vector;
 294         } nir_ir2_opc[nir_num_opcodes+1] = {
 295                 [0 ... nir_num_opcodes - 1] = {-1, -1},
 296
 297                 [nir_op_fmov] = {MAXs, MAXv},
 298                 [nir_op_fsign] = {-1, CNDGTEv},
 299                 [nir_op_fnot] = {SETEs, SETEv},
 300                 [nir_op_for] = {MAXs, MAXv},
 301                 [nir_op_fand] = {MINs, MINv},
 302                 [nir_op_fxor] = {-1, SETNEv},
 303                 [nir_op_fadd] = {ADDs, ADDv},
 304                 [nir_op_fsub] = {ADDs, ADDv},
 305                 [nir_op_fmul] = {MULs, MULv},
 306                 [nir_op_ffma] = {-1, MULADDv},
 307                 [nir_op_fmax] = {MAXs, MAXv},
 308                 [nir_op_fmin] = {MINs, MINv},
 309                 [nir_op_ffloor] = {FLOORs, FLOORv},
 310                 [nir_op_ffract] = {FRACs, FRACv},
 311                 [nir_op_ftrunc] = {TRUNCs, TRUNCv},
 312                 [nir_op_fdot2] = {-1, DOT2ADDv},
 313                 [nir_op_fdot3] = {-1, DOT3v},
 314                 [nir_op_fdot4] = {-1, DOT4v},
 315                 [nir_op_sge] = {-1, SETGTEv},
 316                 [nir_op_slt] = {-1, SETGTv},
 317                 [nir_op_sne] = {-1, SETNEv},
 318                 [nir_op_seq] = {-1, SETEv},
 319                 [nir_op_fcsel] = {-1, CNDEv},
 320                 [nir_op_frsq] = {RECIPSQ_IEEE, -1},
 321                 [nir_op_frcp] = {RECIP_IEEE, -1},
 322                 [nir_op_flog2] = {LOG_IEEE, -1},
 323                 [nir_op_fexp2] = {EXP_IEEE, -1},
 324                 [nir_op_fsqrt] = {SQRT_IEEE, -1},
 325                 [nir_op_fcos] = {COS, -1},
 326                 [nir_op_fsin] = {SIN, -1},
 327                 /* no fsat, fneg, fabs since source mods deal with those */
 328
 329                 /* some nir passes still generate nir_op_imov */
 330                 [nir_op_imov] = {MAXs, MAXv},
 331
 332                 /* so we can use this function with non-nir op */
 333 #define ir2_op_cube nir_num_opcodes
 334                 [ir2_op_cube] = {-1, CUBEv},
 335         };
 336
 337         struct ir2_opc op = nir_ir2_opc[opcode];
 338         assert(op.vector >= 0 || op.scalar >= 0);
 339
 340         struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
 341         instr->alu.vector_opc = op.vector;
 342         instr->alu.scalar_opc = op.scalar;
 343         instr->alu.export = -1;
 344         instr->alu.write_mask = (1 << ncomp) - 1;
 345         instr->src_count = opcode == ir2_op_cube ? 2 :
 346                 nir_op_infos[opcode].num_inputs;
 347         instr->ssa.ncomp = ncomp;
 348         return instr;
 349 }
 350
 351 static struct ir2_instr *
 352 instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode,
 353                 uint8_t write_mask, struct ir2_instr *share_reg)
 354 {
 355         struct ir2_instr *instr;
 356         struct ir2_reg *reg;
 357
 358         reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
 359         reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
 360
 361         instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
 362         instr->alu.write_mask = write_mask;
 363         instr->reg = reg;
 364         instr->is_ssa = false;
 365         return instr;
 366 }
 367
 368
 369 static struct ir2_instr *
 370 instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_dest *dst)
 371 {
 372         struct ir2_instr *instr;
 373         instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst));
 374         set_index(ctx, dst, instr);
 375         return instr;
 376 }
 377
 378 static struct ir2_instr *
 379 ir2_instr_create_fetch(struct ir2_context *ctx, nir_dest *dst,
 380                 instr_fetch_opc_t opc)
 381 {
 382         struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
 383         instr->fetch.opc = opc;
 384         instr->src_count = 1;
 385         instr->ssa.ncomp = nir_dest_num_components(*dst);
 386         set_index(ctx, dst, instr);
 387         return instr;
 388 }
 389
 390 static struct ir2_src
 391 make_src_noconst(struct ir2_context *ctx, nir_src src)
 392 {
 393         struct ir2_instr *instr;
 394
 395         if (nir_src_as_const_value(src)) {
 396                 assert(src.is_ssa);
 397                 instr = instr_create_alu(ctx, nir_op_fmov, src.ssa->num_components);
 398                 instr->src[0] = make_src(ctx, src);
 399                 return ir2_src(instr->idx, 0, IR2_SRC_SSA);
 400         }
 401
 402         return make_src(ctx, src);
 403 }
 404
 405 static void
 406 emit_alu(struct ir2_context *ctx, nir_alu_instr * alu)
 407 {
 408         const nir_op_info *info = &nir_op_infos[alu->op];
 409         nir_dest *dst = &alu->dest.dest;
 410         struct ir2_instr *instr;
 411         struct ir2_src tmp;
 412         unsigned ncomp;
 413
 414         /* get the number of dst components */
 415         if (dst->is_ssa) {
 416                 ncomp = dst->ssa.num_components;
 417         } else {
 418                 ncomp = 0;
 419                 for (int i = 0; i < 4; i++)
 420                         ncomp += !!(alu->dest.write_mask & 1 << i);
 421         }
 422
 423         instr = instr_create_alu(ctx, alu->op, ncomp);
 424         set_index(ctx, dst, instr);
 425         instr->alu.saturate = alu->dest.saturate;
 426         instr->alu.write_mask = alu->dest.write_mask;
 427
 428         for (int i = 0; i < info->num_inputs; i++) {
 429                 nir_alu_src *src = &alu->src[i];
 430
 431                 /* compress swizzle with writemask when applicable */
 432                 unsigned swiz = 0, j = 0;
 433                 for (int i = 0; i < 4; i++) {
 434                         if (!(alu->dest.write_mask & 1 << i) && !info->output_size)
 435                                 continue;
 436                         swiz |= swiz_set(src->swizzle[i], j++);
 437                 }
 438
 439                 instr->src[i] = make_src(ctx, src->src);
 440                 instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
 441                 instr->src[i].negate = src->negate;
 442                 instr->src[i].abs = src->abs;
 443         }
 444
 445         /* workarounds for NIR ops that don't map directly to a2xx ops */
 446         switch (alu->op) {
 447         case nir_op_slt:
 448                 tmp = instr->src[0];
 449                 instr->src[0] = instr->src[1];
 450                 instr->src[1] = tmp;
 451                 break;
 452         case nir_op_fcsel:
 453                 tmp = instr->src[1];
 454                 instr->src[1] = instr->src[2];
 455                 instr->src[2] = tmp;
 456                 break;
 457         case nir_op_fsub:
 458                 instr->src[1].negate = !instr->src[1].negate;
 459                 break;
 460         case nir_op_fdot2:
 461                 instr->src_count = 3;
 462                 instr->src[2] = ir2_zero(ctx);
 463                 break;
 464         case nir_op_fsign: {
 465                 /* we need an extra instruction to deal with the zero case */
 466                 struct ir2_instr *tmp;
 467
 468                 /* tmp = x == 0 ? 0 : 1 */
 469                 tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
 470                 tmp->src[0] = instr->src[0];
 471                 tmp->src[1] = ir2_zero(ctx);
 472                 tmp->src[2] = load_const(ctx, (float[]) {1.0f}, 1);
 473
 474                 /* result = x >= 0 ? tmp : -tmp */
 475                 instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
 476                 instr->src[2] = instr->src[1];
 477                 instr->src[2].negate = true;
 478                 instr->src_count = 3;
 479         } break;
 480         default:
 481                 break;
 482         }
 483 }
 484
 485 static void
 486 load_input(struct ir2_context *ctx, nir_dest *dst, unsigned idx)
 487 {
 488         struct ir2_instr *instr;
 489         int slot = -1;
 490
 491         if (ctx->so->type == MESA_SHADER_VERTEX) {
 492                 instr = ir2_instr_create_fetch(ctx, dst, 0);
 493                 instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
 494                 instr->fetch.vtx.const_idx = 20 + (idx / 3);
 495                 instr->fetch.vtx.const_idx_sel = idx % 3;
 496                 return;
 497         }
 498
 499         /* get slot from idx */
 500         nir_foreach_variable(var, &ctx->nir->inputs) {
 501                 if (var->data.driver_location == idx) {
 502                         slot = var->data.location;
 503                         break;
 504                 }
 505         }
 506         assert(slot >= 0);
 507
 508         switch (slot) {
 509         case VARYING_SLOT_PNTC:
 510                 /* need to extract with abs and invert y */
 511                 instr = instr_create_alu_dest(ctx, nir_op_ffma, dst);
 512                 instr->src[0] = ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
 513                 instr->src[0].abs = true;
 514                 instr->src[1] = load_const(ctx, (float[]) {1.0f, -1.0f}, 2);
 515                 instr->src[2] = load_const(ctx, (float[]) {0.0f, 1.0f}, 2);
 516                 break;
 517         case VARYING_SLOT_POS:
 518                 /* need to extract xy with abs and add tile offset on a20x
 519                  * zw from fragcoord input (w inverted in fragment shader)
 520                  * TODO: only components that are required by fragment shader
 521                  */
 522                 instr = instr_create_alu_reg(ctx,
 523                         ctx->so->is_a20x ? nir_op_fadd : nir_op_fmov, 3, NULL);
 524                 instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
 525                 instr->src[0].abs = true;
 526                 /* on a20x, C64 contains the tile offset */
 527                 instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
 528
 529                 instr = instr_create_alu_reg(ctx, nir_op_fmov, 4, instr);
 530                 instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
 531
 532                 instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
 533                 instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
 534
 535                 unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
 536                 instr = instr_create_alu_dest(ctx, nir_op_fmov, dst);
 537                 instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
 538                 break;
 539         default:
 540                 instr = instr_create_alu_dest(ctx, nir_op_fmov, dst);
 541                 instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
 542                 break;
 543         }
 544 }
 545
 546 static unsigned
 547 output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
 548 {
 549         int slot = -1;
 550         unsigned idx = nir_intrinsic_base(intr);
 551         nir_foreach_variable(var, &ctx->nir->outputs) {
 552                 if (var->data.driver_location == idx) {
 553                         slot = var->data.location;
 554                         break;
 555                 }
 556         }
 557         assert(slot != -1);
 558         return slot;
 559 }
 560
 561 static void
 562 store_output(struct ir2_context *ctx, nir_src src, unsigned slot, unsigned ncomp)
 563 {
 564         struct ir2_instr *instr;
 565         unsigned idx = 0;
 566
 567         if (ctx->so->type == MESA_SHADER_VERTEX) {
 568                 switch (slot) {
 569                 case VARYING_SLOT_POS:
 570                         ctx->position = make_src(ctx, src);
 571                         idx = 62;
 572                         break;
 573                 case VARYING_SLOT_PSIZ:
 574                         ctx->so->writes_psize = true;
 575                         idx = 63;
 576                         break;
 577                 default:
 578                         /* find matching slot from fragment shader input */
 579                         for (idx = 0; idx < ctx->f->inputs_count; idx++)
 580                                 if (ctx->f->inputs[idx].slot == slot)
 581                                         break;
 582                         if (idx == ctx->f->inputs_count)
 583                                 return;
 584                 }
 585         } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
 586                 /* only color output is implemented */
 587                 return;
 588         }
 589
 590         instr = instr_create_alu(ctx, nir_op_fmov, ncomp);
 591         instr->src[0] = make_src(ctx, src);
 592         instr->alu.export = idx;
 593 }
 594
 595 static void
 596 emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
 597 {
 598         struct ir2_instr *instr;
 599         nir_const_value *const_offset;
 600         nir_deref_instr *deref;
 601         unsigned idx;
 602
 603         switch (intr->intrinsic) {
 604         case nir_intrinsic_load_input:
 605                 load_input(ctx, &intr->dest, nir_intrinsic_base(intr));
 606                 break;
 607         case nir_intrinsic_store_output:
 608                 store_output(ctx, intr->src[0], output_slot(ctx, intr), intr->num_components);
 609                 break;
 610         case nir_intrinsic_load_deref:
 611                 deref = nir_src_as_deref(intr->src[0]);
 612                 assert(deref->deref_type == nir_deref_type_var);
 613                 load_input(ctx, &intr->dest, deref->var->data.driver_location);
 614                 break;
 615         case nir_intrinsic_store_deref:
 616                 deref = nir_src_as_deref(intr->src[0]);
 617                 assert(deref->deref_type == nir_deref_type_var);
 618                 store_output(ctx, intr->src[1], deref->var->data.location, intr->num_components);
 619                 break;
 620         case nir_intrinsic_load_uniform:
 621                 const_offset = nir_src_as_const_value(intr->src[0]);
 622                 assert(const_offset); /* TODO can be false in ES2? */
 623                 idx = nir_intrinsic_base(intr);
 624                 idx += (uint32_t) nir_src_as_const_value(intr->src[0])->f32[0];
 625                 instr = instr_create_alu_dest(ctx, nir_op_fmov, &intr->dest);
 626                 instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
 627                 break;
 628         case nir_intrinsic_discard:
 629         case nir_intrinsic_discard_if:
 630                 instr = ir2_instr_create(ctx, IR2_ALU);
 631                 instr->alu.vector_opc = VECTOR_NONE;
 632                 if (intr->intrinsic == nir_intrinsic_discard_if) {
 633                         instr->alu.scalar_opc = KILLNEs;
 634                         instr->src[0] = make_src(ctx, intr->src[0]);
 635                 } else {
 636                         instr->alu.scalar_opc = KILLEs;
 637                         instr->src[0] = ir2_zero(ctx);
 638                 }
 639                 instr->alu.export = -1;
 640                 instr->src_count = 1;
 641                 ctx->so->has_kill = true;
 642                 break;
 643         case nir_intrinsic_load_front_face:
 644                 /* gl_FrontFacing is in the sign of param.x
 645                  * rcp required because otherwise we can't differentiate -0.0 and +0.0
 646                  */
 647                 ctx->so->need_param = true;
 648
 649                 struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
 650                 tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
 651
 652                 instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest);
 653                 instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
 654                 instr->src[1] = ir2_zero(ctx);
 655                 break;
 656         default:
 657                 compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
 658                 break;
 659         }
 660 }
 661
 662 static void
 663 emit_tex(struct ir2_context *ctx, nir_tex_instr * tex)
 664 {
 665         bool is_rect = false, is_cube = false;
 666         struct ir2_instr *instr;
 667         nir_src *coord, *lod_bias;
 668
 669         coord = lod_bias = NULL;
 670
 671         for (unsigned i = 0; i < tex->num_srcs; i++) {
 672                 switch (tex->src[i].src_type) {
 673                 case nir_tex_src_coord:
 674                         coord = &tex->src[i].src;
 675                         break;
 676                 case nir_tex_src_bias:
 677                 case nir_tex_src_lod:
 678                         assert(!lod_bias);
 679                         lod_bias = &tex->src[i].src;
 680                         break;
 681                 default:
 682                         compile_error(ctx, "Unhandled NIR tex src type: %d\n",
 683                                                   tex->src[i].src_type);
 684                         return;
 685                 }
 686         }
 687
 688         switch (tex->op) {
 689         case nir_texop_tex:
 690         case nir_texop_txb:
 691         case nir_texop_txl:
 692                 break;
 693         default:
 694                 compile_error(ctx, "unimplemented texop %d\n", tex->op);
 695                 return;
 696         }
 697
 698         switch (tex->sampler_dim) {
 699         case GLSL_SAMPLER_DIM_2D:
 700                 break;
 701         case GLSL_SAMPLER_DIM_RECT:
 702                 is_rect = true;
 703                 break;
 704         case GLSL_SAMPLER_DIM_CUBE:
 705                 is_cube = true;
 706                 break;
 707         default:
 708                 compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
 709                 return;
 710         }
 711
 712         struct ir2_src src_coord = make_src_noconst(ctx, *coord);
 713
 714         /* for cube maps
 715          * tmp = cube(coord)
 716          * tmp.xy = tmp.xy / |tmp.z| + 1.5
 717          * coord = tmp.xyw
 718          */
 719         if (is_cube) {
 720                 struct ir2_instr *rcp, *coord_xy;
 721                 unsigned reg_idx;
 722
 723                 instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
 724                 instr->src[0] = src_coord;
 725                 instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
 726                 instr->src[1] = src_coord;
 727                 instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
 728
 729                 reg_idx = instr->reg - ctx->reg; /* hacky */
 730
 731                 rcp = instr_create_alu(ctx, nir_op_frcp, 1);
 732                 rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
 733                 rcp->src[0].abs = true;
 734
 735                 coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
 736                 coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
 737                 coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 738                 coord_xy->src[2] = load_const(ctx, (float[]) {1.5f}, 1);
 739
 740                 src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
 741                 /* TODO: lod/bias transformed by src_coord.z ? */
 742         }
 743
 744         instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH);
 745         instr->src[0] = src_coord;
 746         instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_XYW : 0;
 747         instr->fetch.tex.is_cube = is_cube;
 748         instr->fetch.tex.is_rect = is_rect;
 749         instr->fetch.tex.samp_id = tex->sampler_index;
 750
 751         /* for lod/bias, we insert an extra src for the backend to deal with */
 752         if (lod_bias) {
 753                 instr->src[1] = make_src_noconst(ctx, *lod_bias);
 754                 /* backend will use 2-3 components so apply swizzle */
 755                 swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
 756                 instr->src_count = 2;
 757         }
 758 }
 759
 760 static void
 761 setup_input(struct ir2_context *ctx, nir_variable * in)
 762 {
 763         struct fd2_shader_stateobj *so = ctx->so;
 764         unsigned array_len = MAX2(glsl_get_length(in->type), 1);
 765         unsigned n = in->data.driver_location;
 766         unsigned slot = in->data.location;
 767
 768         assert(array_len == 1);
 769
 770         /* handle later */
 771         if (ctx->so->type == MESA_SHADER_VERTEX)
 772                 return;
 773
 774         if (ctx->so->type != MESA_SHADER_FRAGMENT)
 775                 compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
 776
 777         if (slot == VARYING_SLOT_PNTC) {
 778                 so->need_param = true;
 779                 return;
 780         }
 781
 782         n = ctx->f->inputs_count++;
 783
 784         /* half of fragcoord from param reg, half from a varying */
 785         if (slot == VARYING_SLOT_POS) {
 786                 ctx->f->fragcoord = n;
 787                 so->need_param = true;
 788         }
 789
 790         ctx->f->inputs[n].slot = slot;
 791         ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
 792
 793         /* in->data.interpolation?
 794          * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
 795          */
 796 }
 797
 798 static void
 799 emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr * undef)
 800 {
 801         /* TODO we don't want to emit anything for undefs */
 802
 803         struct ir2_instr *instr;
 804
 805         instr = instr_create_alu_dest(ctx, nir_op_fmov,
 806                 &(nir_dest) {.ssa = undef->def,.is_ssa = true});
 807         instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
 808 }
 809
 810 static void
 811 emit_instr(struct ir2_context *ctx, nir_instr * instr)
 812 {
 813         switch (instr->type) {
 814         case nir_instr_type_alu:
 815                 emit_alu(ctx, nir_instr_as_alu(instr));
 816                 break;
 817         case nir_instr_type_deref:
 818                 /* ignored, handled as part of the intrinsic they are src to */
 819                 break;
 820         case nir_instr_type_intrinsic:
 821                 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
 822                 break;
 823         case nir_instr_type_load_const:
 824                 /* dealt with when using nir_src */
 825                 break;
 826         case nir_instr_type_tex:
 827                 emit_tex(ctx, nir_instr_as_tex(instr));
 828                 break;
 829         case nir_instr_type_jump:
 830                 ctx->block_has_jump[ctx->block_idx] = true;
 831                 break;
 832         case nir_instr_type_ssa_undef:
 833                 emit_undef(ctx, nir_instr_as_ssa_undef(instr));
 834                 break;
 835         default:
 836                 break;
 837         }
 838 }
 839
 840 /* fragcoord.zw and a20x hw binning outputs */
 841 static void
 842 extra_position_exports(struct ir2_context *ctx, bool binning)
 843 {
 844         struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
 845
 846         if (ctx->f->fragcoord < 0 && !binning)
 847                 return;
 848
 849         instr = instr_create_alu(ctx, nir_op_fmax, 1);
 850         instr->src[0] = ctx->position;
 851         instr->src[0].swizzle = IR2_SWIZZLE_W;
 852         instr->src[1] = ir2_zero(ctx);
 853
 854         rcp = instr_create_alu(ctx, nir_op_frcp, 1);
 855         rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
 856
 857         sc = instr_create_alu(ctx, nir_op_fmul, 4);
 858         sc->src[0] = ctx->position;
 859         sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 860
 861         wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
 862         wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
 863         wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
 864         wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
 865
 866         /* fragcoord z/w */
 867         if (ctx->f->fragcoord >= 0 && !binning) {
 868                 instr = instr_create_alu(ctx, nir_op_fmov, 1);
 869                 instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
 870                 instr->alu.export = ctx->f->fragcoord;
 871
 872                 instr = instr_create_alu(ctx, nir_op_fmov, 1);
 873                 instr->src[0] = ctx->position;
 874                 instr->src[0].swizzle = IR2_SWIZZLE_W;
 875                 instr->alu.export = ctx->f->fragcoord;
 876                 instr->alu.write_mask = 2;
 877         }
 878
 879         if (!binning)
 880                 return;
 881
 882         off = instr_create_alu(ctx, nir_op_fadd, 1);
 883         off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
 884         off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
 885
 886         /* 8 max set in freedreno_screen.. unneeded instrs patched out */
 887         for (int i = 0; i < 8; i++) {
 888                 instr = instr_create_alu(ctx, nir_op_ffma, 4);
 889                 instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
 890                 instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
 891                 instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
 892                 instr->alu.export = 32;
 893
 894                 instr = instr_create_alu(ctx, nir_op_ffma, 4);
 895                 instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
 896                 instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
 897                 instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
 898                 instr->alu.export = 33;
 899         }
 900 }
 901
 902 static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
 903
 904 static bool
 905 emit_block(struct ir2_context *ctx, nir_block * block)
 906 {
 907         struct ir2_instr *instr;
 908         nir_block *succs = block->successors[0];
 909
 910         ctx->block_idx = block->index;
 911
 912         nir_foreach_instr(instr, block)
 913                 emit_instr(ctx, instr);
 914
 915         if (!succs || !succs->index)
 916                 return false;
 917
 918         /* we want to be smart and always jump and have the backend cleanup
 919          * but we are not, so there are two cases where jump is needed:
 920          *  loops (succs index lower)
 921          *  jumps (jump instruction seen in block)
 922          */
 923         if (succs->index > block->index && !ctx->block_has_jump[block->index])
 924                 return false;
 925
 926         assert(block->successors[1] == NULL);
 927
 928         instr = ir2_instr_create(ctx, IR2_CF);
 929         instr->cf.block_idx = succs->index;
 930         /* XXX can't jump to a block with different predicate */
 931         return true;
 932 }
 933
 934 static void
 935 emit_if(struct ir2_context *ctx, nir_if * nif)
 936 {
 937         unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
 938         struct ir2_instr *instr;
 939
 940         /* XXX: blob seems to always use same register for condition */
 941
 942         instr = ir2_instr_create(ctx, IR2_ALU);
 943         instr->src[0] = make_src(ctx, nif->condition);
 944         instr->src_count = 1;
 945         instr->ssa.ncomp = 1;
 946         instr->alu.vector_opc = VECTOR_NONE;
 947         instr->alu.scalar_opc = SCALAR_NONE;
 948         instr->alu.export = -1;
 949         instr->alu.write_mask = 1;
 950         instr->pred = 0;
 951
 952         /* if nested, use PRED_SETNE_PUSHv */
 953         if (pred) {
 954                 instr->alu.vector_opc = PRED_SETNE_PUSHv;
 955                 instr->src[1] = instr->src[0];
 956                 instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
 957                 instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
 958                 instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
 959                 instr->src_count = 2;
 960         } else {
 961                 instr->alu.scalar_opc = PRED_SETNEs;
 962         }
 963
 964         ctx->pred_idx = instr->idx;
 965         ctx->pred = 3;
 966
 967         emit_cf_list(ctx, &nif->then_list);
 968
 969         /* TODO: if these is no else branch we don't need this
 970          * and if the else branch is simple, can just flip ctx->pred instead
 971          */
 972         instr = ir2_instr_create(ctx, IR2_ALU);
 973         instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
 974         instr->src_count = 1;
 975         instr->ssa.ncomp = 1;
 976         instr->alu.vector_opc = VECTOR_NONE;
 977         instr->alu.scalar_opc = PRED_SET_INVs;
 978         instr->alu.export = -1;
 979         instr->alu.write_mask = 1;
 980         instr->pred = 0;
 981         ctx->pred_idx = instr->idx;
 982
 983         emit_cf_list(ctx, &nif->else_list);
 984
 985         /* restore predicate for nested predicates */
 986         if (pred) {
 987                 instr = ir2_instr_create(ctx, IR2_ALU);
 988                 instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
 989                 instr->src_count = 1;
 990                 instr->ssa.ncomp = 1;
 991                 instr->alu.vector_opc = VECTOR_NONE;
 992                 instr->alu.scalar_opc = PRED_SET_POPs;
 993                 instr->alu.export = -1;
 994                 instr->alu.write_mask = 1;
 995                 instr->pred = 0;
 996                 ctx->pred_idx = instr->idx;
 997         }
 998
 999         /* restore ctx->pred */
1000         ctx->pred = pred;
1001 }
1002
1003 /* get the highest block idx in the loop, so we know when
1004  * we can free registers that are allocated outside the loop
1005  */
1006 static unsigned
1007 loop_last_block(struct exec_list *list)
1008 {
1009         nir_cf_node *node =
1010                 exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
1011         switch (node->type) {
1012         case nir_cf_node_block:
1013                 return nir_cf_node_as_block(node)->index;
1014         case nir_cf_node_if:
1015                 assert(0); /* XXX could this ever happen? */
1016                 return 0;
1017         case nir_cf_node_loop:
1018                 return loop_last_block(&nir_cf_node_as_loop(node)->body);
1019         default:
1020                 compile_error(ctx, "Not supported\n");
1021                 return 0;
1022         }
1023 }
1024
1025 static void
1026 emit_loop(struct ir2_context *ctx, nir_loop *nloop)
1027 {
1028         ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
1029         emit_cf_list(ctx, &nloop->body);
1030         ctx->loop_depth--;
1031 }
1032
1033 static bool
1034 emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
1035 {
1036         bool ret = false;
1037         foreach_list_typed(nir_cf_node, node, node, list) {
1038                 ret = false;
1039                 switch (node->type) {
1040                 case nir_cf_node_block:
1041                         ret = emit_block(ctx, nir_cf_node_as_block(node));
1042                         break;
1043                 case nir_cf_node_if:
1044                         emit_if(ctx, nir_cf_node_as_if(node));
1045                         break;
1046                 case nir_cf_node_loop:
1047                         emit_loop(ctx, nir_cf_node_as_loop(node));
1048                         break;
1049                 case nir_cf_node_function:
1050                         compile_error(ctx, "Not supported\n");
1051                         break;
1052                 }
1053         }
1054         return ret;
1055 }
1056
1057 static void cleanup_binning(struct ir2_context *ctx)
1058 {
1059         assert(ctx->so->type == MESA_SHADER_VERTEX);
1060
1061         /* kill non-position outputs for binning variant */
1062         nir_foreach_block(block, nir_shader_get_entrypoint(ctx->nir)) {
1063                 nir_foreach_instr_safe(instr, block) {
1064                         if (instr->type != nir_instr_type_intrinsic)
1065                                 continue;
1066
1067                         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1068                         unsigned slot;
1069                         switch (intr->intrinsic) {
1070                         case nir_intrinsic_store_deref: {
1071                                 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1072                                 assert(deref->deref_type == nir_deref_type_var);
1073                                 slot = deref->var->data.location;
1074                         } break;
1075                         case nir_intrinsic_store_output:
1076                                 slot = output_slot(ctx, intr);
1077                                 break;
1078                         default:
1079                                 continue;
1080                         }
1081
1082                         if (slot != VARYING_SLOT_POS)
1083                                 nir_instr_remove(instr);
1084                 }
1085         }
1086
1087         ir2_optimize_nir(ctx->nir, false);
1088 }
1089
1090 void
1091 ir2_nir_compile(struct ir2_context *ctx, bool binning)
1092 {
1093         struct fd2_shader_stateobj *so = ctx->so;
1094
1095         memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
1096
1097         ctx->nir = nir_shader_clone(NULL, so->nir);
1098
1099         if (binning)
1100                 cleanup_binning(ctx);
1101
1102         /* postprocess */
1103         OPT_V(ctx->nir, nir_opt_algebraic_late);
1104
1105         OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods);
1106         OPT_V(ctx->nir, nir_copy_prop);
1107         OPT_V(ctx->nir, nir_opt_dce);
1108         OPT_V(ctx->nir, nir_opt_move_comparisons);
1109
1110         OPT_V(ctx->nir, nir_lower_bool_to_float);
1111
1112         /* lower to scalar instructions that can only be scalar on a2xx */
1113         OPT_V(ctx->nir, ir2_nir_lower_scalar);
1114
1115         OPT_V(ctx->nir, nir_lower_locals_to_regs);
1116
1117         OPT_V(ctx->nir, nir_convert_from_ssa, true);
1118
1119         OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
1120         OPT_V(ctx->nir, nir_lower_vec_to_movs);
1121
1122         OPT_V(ctx->nir, nir_opt_dce);
1123
1124         nir_sweep(ctx->nir);
1125
1126         if (fd_mesa_debug & FD_DBG_DISASM) {
1127                 debug_printf("----------------------\n");
1128                 nir_print_shader(ctx->nir, stdout);
1129                 debug_printf("----------------------\n");
1130         }
1131
1132         /* fd2_shader_stateobj init */
1133         if (so->type == MESA_SHADER_FRAGMENT) {
1134                 ctx->f->fragcoord = -1;
1135                 ctx->f->inputs_count = 0;
1136                 memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
1137         }
1138
1139         /* Setup inputs: */
1140         nir_foreach_variable(in, &ctx->nir->inputs)
1141                 setup_input(ctx, in);
1142
1143         if (so->type == MESA_SHADER_FRAGMENT) {
1144                 unsigned idx;
1145                 for (idx = 0; idx < ctx->f->inputs_count; idx++) {
1146                         ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
1147                         update_range(ctx, &ctx->input[idx]);
1148                 }
1149                 /* assume we have param input and kill it later if not */
1150                 ctx->input[idx].ncomp = 4;
1151                 update_range(ctx, &ctx->input[idx]);
1152         } else {
1153                 ctx->input[0].ncomp = 1;
1154                 ctx->input[2].ncomp = 1;
1155                 update_range(ctx, &ctx->input[0]);
1156                 update_range(ctx, &ctx->input[2]);
1157         }
1158
1159         /* And emit the body: */
1160         nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
1161
1162         nir_foreach_register(reg, &fxn->registers) {
1163                 ctx->reg[reg->index].ncomp = reg->num_components;
1164                 ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1);
1165         }
1166
1167         nir_metadata_require(fxn, nir_metadata_block_index);
1168         emit_cf_list(ctx, &fxn->body);
1169         /* TODO emit_block(ctx, fxn->end_block); */
1170
1171         if (so->type == MESA_SHADER_VERTEX)
1172                 extra_position_exports(ctx, binning);
1173
1174         ralloc_free(ctx->nir);
1175
1176         /* kill unused param input */
1177         if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
1178                 ctx->input[ctx->f->inputs_count].initialized = false;
1179 }