src/gallium/drivers/lima/ir/pp/nir.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <string.h>
  26
  27 #include "util/hash_table.h"
  28 #include "util/ralloc.h"
  29 #include "util/bitscan.h"
  30 #include "compiler/nir/nir.h"
  31 #include "pipe/p_state.h"
  32
  33
  34 #include "ppir.h"
  35
  36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
  37 {
  38    ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
  39    if (!node)
  40       return NULL;
  41
  42    ppir_dest *dest = ppir_node_get_dest(node);
  43    dest->type = ppir_target_ssa;
  44    dest->ssa.num_components = ssa->num_components;
  45    dest->ssa.live_in = INT_MAX;
  46    dest->ssa.live_out = 0;
  47    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
  48
  49    if (node->type == ppir_node_type_load ||
  50        node->type == ppir_node_type_store)
  51       dest->ssa.is_head = true;
  52
  53    return node;
  54 }
  55
  56 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
  57                                   nir_register *reg, unsigned mask)
  58 {
  59    ppir_node *node = ppir_node_create(block, op, reg->index, mask);
  60    if (!node)
  61       return NULL;
  62
  63    ppir_dest *dest = ppir_node_get_dest(node);
  64
  65    list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
  66       if (r->index == reg->index) {
  67          dest->reg = r;
  68          break;
  69       }
  70    }
  71
  72    dest->type = ppir_target_register;
  73    dest->write_mask = mask;
  74
  75    if (node->type == ppir_node_type_load ||
  76        node->type == ppir_node_type_store)
  77       dest->reg->is_head = true;
  78
  79    return node;
  80 }
  81
  82 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
  83                                    nir_dest *dest, unsigned mask)
  84 {
  85    unsigned index = -1;
  86
  87    if (dest) {
  88       if (dest->is_ssa)
  89          return ppir_node_create_ssa(block, op, &dest->ssa);
  90       else
  91          return ppir_node_create_reg(block, op, dest->reg.reg, mask);
  92    }
  93
  94    return ppir_node_create(block, op, index, 0);
  95 }
  96
  97 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
  98                               ppir_src *ps, nir_src *ns, unsigned mask)
  99 {
 100    ppir_node *child = NULL;
 101
 102    if (ns->is_ssa) {
 103       child = comp->var_nodes[ns->ssa->index];
 104       /* Clone consts for each successor */
 105       switch (child->op) {
 106       case ppir_op_const:
 107          child = ppir_node_clone(node->block, child);
 108          break;
 109       case ppir_op_load_texture:
 110          /* Clone texture loads for each block */
 111          if (child->block != node->block) {
 112             child = ppir_node_clone(node->block, child);
 113             comp->var_nodes[ns->ssa->index] = child;
 114          }
 115          break;
 116       case ppir_op_load_varying:
 117          if ((node->op != ppir_op_load_texture)) {
 118             /* Clone varying loads for each block */
 119             if (child->block != node->block) {
 120                child = ppir_node_clone(node->block, child);
 121                comp->var_nodes[ns->ssa->index] = child;
 122             }
 123             break;
 124          }
 125          /* At least one successor is load_texture, promote it to load_coords
 126           * to ensure that is has exactly one successor */
 127          child->op = ppir_op_load_coords;
 128          /* Fallthrough */
 129       case ppir_op_load_uniform:
 130       case ppir_op_load_coords:
 131          /* Clone uniform and texture coord loads for each block.
 132           * Also ensure that each load has a single successor.
 133           * Let's do a fetch each time and hope for a cache hit instead
 134           * of increasing reg pressure.
 135           */
 136          if (child->block != node->block || !ppir_node_is_root(child)) {
 137             child = ppir_node_clone(node->block, child);
 138             comp->var_nodes[ns->ssa->index] = child;
 139          }
 140          break;
 141       default:
 142          break;
 143       }
 144
 145       if (child->op != ppir_op_undef)
 146          ppir_node_add_dep(node, child, ppir_dep_src);
 147    }
 148    else {
 149       nir_register *reg = ns->reg.reg;
 150       while (mask) {
 151          int swizzle = ps->swizzle[u_bit_scan(&mask)];
 152          child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
 153          /* Reg is read before it was written, create a dummy node for it */
 154          if (!child) {
 155             child = ppir_node_create_reg(node->block, ppir_op_undef, reg,
 156                u_bit_consecutive(0, 4));
 157             comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
 158          }
 159          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
 160          if (child && node != child && child->op != ppir_op_undef)
 161             ppir_node_add_dep(node, child, ppir_dep_src);
 162       }
 163    }
 164
 165    ppir_node_target_assign(ps, child);
 166 }
 167
 168 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
 169    /* not supported */
 170    [0 ... nir_last_opcode] = -1,
 171
 172    [nir_op_mov] = ppir_op_mov,
 173    [nir_op_fmul] = ppir_op_mul,
 174    [nir_op_fabs] = ppir_op_abs,
 175    [nir_op_fneg] = ppir_op_neg,
 176    [nir_op_fadd] = ppir_op_add,
 177    [nir_op_fsum3] = ppir_op_sum3,
 178    [nir_op_fsum4] = ppir_op_sum4,
 179    [nir_op_frsq] = ppir_op_rsqrt,
 180    [nir_op_flog2] = ppir_op_log2,
 181    [nir_op_fexp2] = ppir_op_exp2,
 182    [nir_op_fsqrt] = ppir_op_sqrt,
 183    [nir_op_fsin] = ppir_op_sin,
 184    [nir_op_fcos] = ppir_op_cos,
 185    [nir_op_fmax] = ppir_op_max,
 186    [nir_op_fmin] = ppir_op_min,
 187    [nir_op_frcp] = ppir_op_rcp,
 188    [nir_op_ffloor] = ppir_op_floor,
 189    [nir_op_fceil] = ppir_op_ceil,
 190    [nir_op_ffract] = ppir_op_fract,
 191    [nir_op_sge] = ppir_op_ge,
 192    [nir_op_slt] = ppir_op_lt,
 193    [nir_op_seq] = ppir_op_eq,
 194    [nir_op_sne] = ppir_op_ne,
 195    [nir_op_fcsel] = ppir_op_select,
 196    [nir_op_inot] = ppir_op_not,
 197    [nir_op_ftrunc] = ppir_op_trunc,
 198    [nir_op_fsat] = ppir_op_sat,
 199    [nir_op_fddx] = ppir_op_ddx,
 200    [nir_op_fddy] = ppir_op_ddy,
 201 };
 202
 203 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
 204 {
 205    nir_alu_instr *instr = nir_instr_as_alu(ni);
 206    int op = nir_to_ppir_opcodes[instr->op];
 207
 208    if (op < 0) {
 209       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
 210       return NULL;
 211    }
 212
 213    ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
 214                                                instr->dest.write_mask);
 215    if (!node)
 216       return NULL;
 217
 218    ppir_dest *pd = &node->dest;
 219    nir_alu_dest *nd = &instr->dest;
 220    if (nd->saturate)
 221       pd->modifier = ppir_outmod_clamp_fraction;
 222
 223    unsigned src_mask;
 224    switch (op) {
 225    case ppir_op_sum3:
 226       src_mask = 0b0111;
 227       break;
 228    case ppir_op_sum4:
 229       src_mask = 0b1111;
 230       break;
 231    default:
 232       src_mask = pd->write_mask;
 233       break;
 234    }
 235
 236    unsigned num_child = nir_op_infos[instr->op].num_inputs;
 237    node->num_src = num_child;
 238
 239    for (int i = 0; i < num_child; i++) {
 240       nir_alu_src *ns = instr->src + i;
 241       ppir_src *ps = node->src + i;
 242       memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
 243       ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
 244
 245       ps->absolute = ns->abs;
 246       ps->negate = ns->negate;
 247    }
 248
 249    return &node->node;
 250 }
 251
 252 static ppir_block *ppir_block_create(ppir_compiler *comp);
 253
 254 static bool ppir_emit_discard_block(ppir_compiler *comp)
 255 {
 256    ppir_block *block = ppir_block_create(comp);
 257    ppir_discard_node *discard;
 258    if (!block)
 259       return false;
 260
 261    comp->discard_block = block;
 262    block->comp  = comp;
 263
 264    discard = ppir_node_create(block, ppir_op_discard, -1, 0);
 265    if (discard)
 266       list_addtail(&discard->node.list, &block->node_list);
 267    else
 268       return false;
 269
 270    return true;
 271 }
 272
 273 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
 274 {
 275    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
 276    ppir_node *node;
 277    ppir_compiler *comp = block->comp;
 278    ppir_branch_node *branch;
 279
 280    if (!comp->discard_block && !ppir_emit_discard_block(comp))
 281       return NULL;
 282
 283    node = ppir_node_create(block, ppir_op_branch, -1, 0);
 284    if (!node)
 285       return NULL;
 286    branch = ppir_node_to_branch(node);
 287
 288    /* second src and condition will be updated during lowering */
 289    ppir_node_add_src(block->comp, node, &branch->src[0],
 290                      &instr->src[0], u_bit_consecutive(0, instr->num_components));
 291    branch->num_src = 1;
 292    branch->target = comp->discard_block;
 293
 294    return node;
 295 }
 296
 297 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
 298 {
 299    ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
 300
 301    return node;
 302 }
 303
 304 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
 305 {
 306    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
 307    unsigned mask = 0;
 308    ppir_load_node *lnode;
 309    ppir_alu_node *alu_node;
 310
 311    switch (instr->intrinsic) {
 312    case nir_intrinsic_load_input:
 313       if (!instr->dest.is_ssa)
 314          mask = u_bit_consecutive(0, instr->num_components);
 315
 316       lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
 317       if (!lnode)
 318          return NULL;
 319
 320       lnode->num_components = instr->num_components;
 321       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
 322       if (nir_src_is_const(instr->src[0]))
 323          lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
 324       else {
 325          lnode->num_src = 1;
 326          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
 327       }
 328       return &lnode->node;
 329
 330    case nir_intrinsic_load_frag_coord:
 331    case nir_intrinsic_load_point_coord:
 332    case nir_intrinsic_load_front_face:
 333       if (!instr->dest.is_ssa)
 334          mask = u_bit_consecutive(0, instr->num_components);
 335
 336       ppir_op op;
 337       switch (instr->intrinsic) {
 338       case nir_intrinsic_load_frag_coord:
 339          op = ppir_op_load_fragcoord;
 340          break;
 341       case nir_intrinsic_load_point_coord:
 342          op = ppir_op_load_pointcoord;
 343          break;
 344       case nir_intrinsic_load_front_face:
 345          op = ppir_op_load_frontface;
 346          break;
 347       default:
 348          assert(0);
 349          break;
 350       }
 351
 352       lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
 353       if (!lnode)
 354          return NULL;
 355
 356       lnode->num_components = instr->num_components;
 357       return &lnode->node;
 358
 359    case nir_intrinsic_load_uniform:
 360       if (!instr->dest.is_ssa)
 361          mask = u_bit_consecutive(0, instr->num_components);
 362
 363       lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
 364       if (!lnode)
 365          return NULL;
 366
 367       lnode->num_components = instr->num_components;
 368       lnode->index = nir_intrinsic_base(instr);
 369       if (nir_src_is_const(instr->src[0]))
 370          lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
 371       else {
 372          lnode->num_src = 1;
 373          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
 374       }
 375
 376       return &lnode->node;
 377
 378    case nir_intrinsic_store_output: {
 379       alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
 380       if (!alu_node)
 381          return NULL;
 382
 383       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
 384       dest->type = ppir_target_ssa;
 385       dest->ssa.num_components = instr->num_components;
 386       dest->ssa.live_in = INT_MAX;
 387       dest->ssa.live_out = 0;
 388       dest->ssa.index = 0;
 389       dest->write_mask = u_bit_consecutive(0, instr->num_components);
 390
 391       alu_node->num_src = 1;
 392
 393       for (int i = 0; i < instr->num_components; i++)
 394          alu_node->src[0].swizzle[i] = i;
 395
 396       ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
 397                         u_bit_consecutive(0, instr->num_components));
 398
 399       return &alu_node->node;
 400    }
 401
 402    case nir_intrinsic_discard:
 403       return ppir_emit_discard(block, ni);
 404
 405    case nir_intrinsic_discard_if:
 406       return ppir_emit_discard_if(block, ni);
 407
 408    default:
 409       ppir_error("unsupported nir_intrinsic_instr %s\n",
 410                  nir_intrinsic_infos[instr->intrinsic].name);
 411       return NULL;
 412    }
 413 }
 414
 415 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
 416 {
 417    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
 418    ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
 419    if (!node)
 420       return NULL;
 421
 422    assert(instr->def.bit_size == 32);
 423
 424    for (int i = 0; i < instr->def.num_components; i++)
 425       node->constant.value[i].i = instr->value[i].i32;
 426    node->constant.num = instr->def.num_components;
 427
 428    return &node->node;
 429 }
 430
 431 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
 432 {
 433    nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
 434    ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
 435    if (!node)
 436       return NULL;
 437    ppir_alu_node *alu = ppir_node_to_alu(node);
 438
 439    ppir_dest *dest = &alu->dest;
 440    dest->ssa.undef = true;
 441    ppir_reg *ssa = &dest->ssa;
 442
 443    list_add(&ssa->list, &block->comp->reg_list);
 444
 445    return node;
 446 }
 447
 448 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
 449 {
 450    nir_tex_instr *instr = nir_instr_as_tex(ni);
 451    ppir_load_texture_node *node;
 452
 453    if (instr->op != nir_texop_tex) {
 454       ppir_error("unsupported texop %d\n", instr->op);
 455       return NULL;
 456    }
 457
 458    unsigned mask = 0;
 459    if (!instr->dest.is_ssa)
 460       mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
 461
 462    node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
 463    if (!node)
 464       return NULL;
 465
 466    node->sampler = instr->texture_index;
 467
 468    switch (instr->sampler_dim) {
 469    case GLSL_SAMPLER_DIM_2D:
 470    case GLSL_SAMPLER_DIM_RECT:
 471    case GLSL_SAMPLER_DIM_EXTERNAL:
 472       break;
 473    default:
 474       ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
 475       return NULL;
 476    }
 477
 478    node->sampler_dim = instr->sampler_dim;
 479
 480    for (int i = 0; i < instr->coord_components; i++)
 481          node->src_coords.swizzle[i] = i;
 482
 483    for (int i = 0; i < instr->num_srcs; i++) {
 484       switch (instr->src[i].src_type) {
 485       case nir_tex_src_coord:
 486          ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
 487                            u_bit_consecutive(0, instr->coord_components));
 488          break;
 489       default:
 490          ppir_error("unsupported texture source type\n");
 491          assert(0);
 492          return NULL;
 493       }
 494    }
 495
 496    return &node->node;
 497 }
 498
 499 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
 500 {
 501    ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
 502
 503    return block;
 504 }
 505
 506 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
 507 {
 508    ppir_node *node;
 509    ppir_compiler *comp = block->comp;
 510    ppir_branch_node *branch;
 511    ppir_block *jump_block;
 512    nir_jump_instr *jump = nir_instr_as_jump(ni);
 513
 514    switch (jump->type) {
 515    case nir_jump_break: {
 516       assert(comp->current_block->successors[0]);
 517       assert(!comp->current_block->successors[1]);
 518       jump_block = comp->current_block->successors[0];
 519    }
 520    break;
 521    case nir_jump_continue:
 522       jump_block = comp->loop_cont_block;
 523    break;
 524    default:
 525       ppir_error("nir_jump_instr not support\n");
 526       return NULL;
 527    }
 528
 529    assert(jump_block != NULL);
 530
 531    node = ppir_node_create(block, ppir_op_branch, -1, 0);
 532    if (!node)
 533       return NULL;
 534    branch = ppir_node_to_branch(node);
 535
 536    /* Unconditional */
 537    branch->num_src = 0;
 538    branch->target = jump_block;
 539
 540    return node;
 541 }
 542
 543 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
 544    [nir_instr_type_alu]        = ppir_emit_alu,
 545    [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
 546    [nir_instr_type_load_const] = ppir_emit_load_const,
 547    [nir_instr_type_ssa_undef]  = ppir_emit_ssa_undef,
 548    [nir_instr_type_tex]        = ppir_emit_tex,
 549    [nir_instr_type_jump]       = ppir_emit_jump,
 550 };
 551
 552 static ppir_block *ppir_block_create(ppir_compiler *comp)
 553 {
 554    ppir_block *block = rzalloc(comp, ppir_block);
 555    if (!block)
 556       return NULL;
 557
 558    list_inithead(&block->node_list);
 559    list_inithead(&block->instr_list);
 560
 561    block->comp = comp;
 562
 563    return block;
 564 }
 565
 566 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
 567 {
 568    ppir_block *block = ppir_get_block(comp, nblock);
 569
 570    comp->current_block = block;
 571
 572    list_addtail(&block->list, &comp->block_list);
 573
 574    nir_foreach_instr(instr, nblock) {
 575       assert(instr->type < nir_instr_type_phi);
 576       ppir_node *node = ppir_emit_instr[instr->type](block, instr);
 577       if (!node)
 578          return false;
 579
 580       list_addtail(&node->list, &block->node_list);
 581    }
 582
 583    return true;
 584 }
 585
 586 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
 587
 588 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
 589 {
 590    ppir_node *node;
 591    ppir_branch_node *else_branch, *after_branch;
 592    nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
 593    bool empty_else_block =
 594       (nir_else_block == nir_if_last_else_block(if_stmt) &&
 595       exec_list_is_empty(&nir_else_block->instr_list));
 596    ppir_block *block = comp->current_block;
 597
 598    node = ppir_node_create(block, ppir_op_branch, -1, 0);
 599    if (!node)
 600       return false;
 601    else_branch = ppir_node_to_branch(node);
 602    ppir_node_add_src(block->comp, node, &else_branch->src[0],
 603                      &if_stmt->condition, 1);
 604    else_branch->num_src = 1;
 605    /* Negate condition to minimize branching. We're generating following:
 606     * current_block: { ...; if (!statement) branch else_block; }
 607     * then_block: { ...; branch after_block; }
 608     * else_block: { ... }
 609     * after_block: { ... }
 610     *
 611     * or if else list is empty:
 612     * block: { if (!statement) branch else_block; }
 613     * then_block: { ... }
 614     * else_block: after_block: { ... }
 615     */
 616    else_branch->negate = true;
 617    list_addtail(&else_branch->node.list, &block->node_list);
 618
 619    ppir_emit_cf_list(comp, &if_stmt->then_list);
 620    if (empty_else_block) {
 621       nir_block *nblock = nir_if_last_else_block(if_stmt);
 622       assert(nblock->successors[0]);
 623       assert(!nblock->successors[1]);
 624       else_branch->target = ppir_get_block(comp, nblock->successors[0]);
 625       /* Add empty else block to the list */
 626       list_addtail(&block->successors[1]->list, &comp->block_list);
 627       return true;
 628    }
 629
 630    else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
 631
 632    nir_block *last_then_block = nir_if_last_then_block(if_stmt);
 633    assert(last_then_block->successors[0]);
 634    assert(!last_then_block->successors[1]);
 635    block = ppir_get_block(comp, last_then_block);
 636    node = ppir_node_create(block, ppir_op_branch, -1, 0);
 637    if (!node)
 638       return false;
 639    after_branch = ppir_node_to_branch(node);
 640    /* Unconditional */
 641    after_branch->num_src = 0;
 642    after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
 643    /* Target should be after_block, will fixup later */
 644    list_addtail(&after_branch->node.list, &block->node_list);
 645
 646    ppir_emit_cf_list(comp, &if_stmt->else_list);
 647
 648    return true;
 649 }
 650
 651 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
 652 {
 653    ppir_block *save_loop_cont_block = comp->loop_cont_block;
 654    ppir_block *block;
 655    ppir_branch_node *loop_branch;
 656    nir_block *loop_last_block;
 657    ppir_node *node;
 658
 659    comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
 660
 661    ppir_emit_cf_list(comp, &nloop->body);
 662
 663    loop_last_block = nir_loop_last_block(nloop);
 664    block = ppir_get_block(comp, loop_last_block);
 665    node = ppir_node_create(block, ppir_op_branch, -1, 0);
 666    if (!node)
 667       return false;
 668    loop_branch = ppir_node_to_branch(node);
 669    /* Unconditional */
 670    loop_branch->num_src = 0;
 671    loop_branch->target = comp->loop_cont_block;
 672    list_addtail(&loop_branch->node.list, &block->node_list);
 673
 674    comp->loop_cont_block = save_loop_cont_block;
 675
 676    comp->num_loops++;
 677
 678    return true;
 679 }
 680
 681 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
 682 {
 683    ppir_error("function nir_cf_node not support\n");
 684    return false;
 685 }
 686
 687 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
 688 {
 689    foreach_list_typed(nir_cf_node, node, node, list) {
 690       bool ret;
 691
 692       switch (node->type) {
 693       case nir_cf_node_block:
 694          ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
 695          break;
 696       case nir_cf_node_if:
 697          ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
 698          break;
 699       case nir_cf_node_loop:
 700          ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
 701          break;
 702       case nir_cf_node_function:
 703          ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
 704          break;
 705       default:
 706          ppir_error("unknown NIR node type %d\n", node->type);
 707          return false;
 708       }
 709
 710       if (!ret)
 711          return false;
 712    }
 713
 714    return true;
 715 }
 716
 717 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
 718 {
 719    ppir_compiler *comp = rzalloc_size(
 720       prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
 721    if (!comp)
 722       return NULL;
 723
 724    list_inithead(&comp->block_list);
 725    list_inithead(&comp->reg_list);
 726    comp->blocks = _mesa_hash_table_u64_create(prog);
 727
 728    comp->var_nodes = (ppir_node **)(comp + 1);
 729    comp->reg_base = num_ssa;
 730    comp->prog = prog;
 731    return comp;
 732 }
 733
 734 static void ppir_add_ordering_deps(ppir_compiler *comp)
 735 {
 736    /* Some intrinsics do not have explicit dependencies and thus depend
 737     * on instructions order. Consider discard_if and store_ouput as
 738     * example. If we don't add fake dependency of discard_if to store_output
 739     * scheduler may put store_output first and since store_output terminates
 740     * shader on Utgard PP, rest of it will never be executed.
 741     * Add fake dependencies for discard/branch/store to preserve
 742     * instruction order.
 743     *
 744     * TODO: scheduler should schedule discard_if as early as possible otherwise
 745     * we may end up with suboptimal code for cases like this:
 746     *
 747     * s3 = s1 < s2
 748     * discard_if s3
 749     * s4 = s1 + s2
 750     * store s4
 751     *
 752     * In this case store depends on discard_if and s4, but since dependencies can
 753     * be scheduled in any order it can result in code like this:
 754     *
 755     * instr1: s3 = s1 < s3
 756     * instr2: s4 = s1 + s2
 757     * instr3: discard_if s3
 758     * instr4: store s4
 759     */
 760    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 761       ppir_node *prev_node = NULL;
 762       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
 763          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
 764             ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
 765          }
 766          if (node->op == ppir_op_discard ||
 767              node->op == ppir_op_store_color ||
 768              node->op == ppir_op_store_temp ||
 769              node->op == ppir_op_branch) {
 770             prev_node = node;
 771          }
 772       }
 773    }
 774 }
 775
 776 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
 777                                  struct pipe_debug_callback *debug)
 778 {
 779    const struct shader_info *info = &nir->info;
 780    char *shaderdb;
 781    int ret = asprintf(&shaderdb,
 782                       "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
 783                       gl_shader_stage_name(info->stage),
 784                       comp->cur_instr_index,
 785                       comp->num_loops,
 786                       comp->num_spills,
 787                       comp->num_fills);
 788    assert(ret >= 0);
 789
 790    if (lima_debug & LIMA_DEBUG_SHADERDB)
 791       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
 792
 793    pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
 794    free(shaderdb);
 795 }
 796
 797 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
 798 {
 799    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 800       list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 801          ppir_node *write = NULL;
 802          list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
 803             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
 804                ppir_src *src = ppir_node_get_src(node, i);
 805                if (src && src->type == ppir_target_register &&
 806                    src->reg == reg &&
 807                    write) {
 808                   ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
 809                   ppir_node_add_dep(write, node, ppir_dep_write_after_read);
 810                }
 811             }
 812             ppir_dest *dest = ppir_node_get_dest(node);
 813             if (dest && dest->type == ppir_target_register &&
 814                 dest->reg == reg)
 815                write = node;
 816          }
 817       }
 818    }
 819 }
 820
 821 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
 822                       struct ra_regs *ra,
 823                       struct pipe_debug_callback *debug)
 824 {
 825    nir_function_impl *func = nir_shader_get_entrypoint(nir);
 826    ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
 827    if (!comp)
 828       return false;
 829
 830    comp->ra = ra;
 831
 832    /* 1st pass: create ppir blocks */
 833    nir_foreach_function(function, nir) {
 834       if (!function->impl)
 835          continue;
 836
 837       nir_foreach_block(nblock, function->impl) {
 838          ppir_block *block = ppir_block_create(comp);
 839          if (!block)
 840             return false;
 841          block->index = nblock->index;
 842          _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
 843       }
 844    }
 845
 846    /* 2nd pass: populate successors */
 847    nir_foreach_function(function, nir) {
 848       if (!function->impl)
 849          continue;
 850
 851       nir_foreach_block(nblock, function->impl) {
 852          ppir_block *block = ppir_get_block(comp, nblock);
 853          assert(block);
 854
 855          for (int i = 0; i < 2; i++) {
 856             if (nblock->successors[i])
 857                block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
 858          }
 859       }
 860    }
 861
 862    /* Validate outputs, we support only gl_FragColor */
 863    nir_foreach_variable(var, &nir->outputs) {
 864       switch (var->data.location) {
 865       case FRAG_RESULT_COLOR:
 866       case FRAG_RESULT_DATA0:
 867          break;
 868       default:
 869          ppir_error("unsupported output type\n");
 870          goto err_out0;
 871          break;
 872       }
 873    }
 874
 875    foreach_list_typed(nir_register, reg, node, &func->registers) {
 876       ppir_reg *r = rzalloc(comp, ppir_reg);
 877       if (!r)
 878          return false;
 879
 880       r->index = reg->index;
 881       r->num_components = reg->num_components;
 882       r->live_in = INT_MAX;
 883       r->live_out = 0;
 884       r->is_head = false;
 885       list_addtail(&r->list, &comp->reg_list);
 886    }
 887
 888    if (!ppir_emit_cf_list(comp, &func->body))
 889       goto err_out0;
 890
 891    /* If we have discard block add it to the very end */
 892    if (comp->discard_block)
 893       list_addtail(&comp->discard_block->list, &comp->block_list);
 894
 895    ppir_node_print_prog(comp);
 896
 897    if (!ppir_lower_prog(comp))
 898       goto err_out0;
 899
 900    ppir_add_ordering_deps(comp);
 901    ppir_add_write_after_read_deps(comp);
 902
 903    ppir_node_print_prog(comp);
 904
 905    if (!ppir_node_to_instr(comp))
 906       goto err_out0;
 907
 908    if (!ppir_schedule_prog(comp))
 909       goto err_out0;
 910
 911    if (!ppir_regalloc_prog(comp))
 912       goto err_out0;
 913
 914    if (!ppir_codegen_prog(comp))
 915       goto err_out0;
 916
 917    ppir_print_shader_db(nir, comp, debug);
 918
 919    _mesa_hash_table_u64_destroy(comp->blocks, NULL);
 920    ralloc_free(comp);
 921    return true;
 922
 923 err_out0:
 924    _mesa_hash_table_u64_destroy(comp->blocks, NULL);
 925    ralloc_free(comp);
 926    return false;
 927 }
 928