src/gallium/drivers/lima/ir/pp/codegen.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/u_half.h"
  27 #include "util/bitscan.h"
  28
  29 #include "ppir.h"
  30 #include "codegen.h"
  31 #include "lima_context.h"
  32
  33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  34 {
  35    unsigned ret = 0;
  36    for (int i = 0; i < 4; i++)
  37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  38    return ret;
  39 }
  40
  41 static int get_scl_reg_index(ppir_src *src, int component)
  42 {
  43    int ret = ppir_target_get_src_reg_index(src);
  44    ret += src->swizzle[component];
  45    return ret;
  46 }
  47
  48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  49 {
  50    ppir_codegen_field_varying *f = code;
  51    ppir_load_node *load = ppir_node_to_load(node);
  52    ppir_dest *dest = &load->dest;
  53    int index = ppir_target_get_dest_reg_index(dest);
  54    int num_components = load->num_components;
  55
  56    if (num_components) {
  57       assert(node->op == ppir_op_load_varying ||
  58              node->op == ppir_op_load_coords ||
  59              node->op == ppir_op_load_fragcoord);
  60
  61       f->imm.dest = index >> 2;
  62       f->imm.mask = dest->write_mask << (index & 0x3);
  63
  64       int alignment = num_components == 3 ? 3 : num_components - 1;
  65       f->imm.alignment = alignment;
  66       f->imm.offset_vector = 0xf;
  67
  68       if (alignment == 3)
  69          f->imm.index = load->index >> 2;
  70       else
  71          f->imm.index = load->index >> alignment;
  72
  73       if (node->op == ppir_op_load_fragcoord) {
  74          f->imm.source_type = 2;
  75          f->imm.perspective = 3;
  76       }
  77    }
  78    else {
  79       assert(node->op == ppir_op_load_coords);
  80
  81       f->reg.dest = index >> 2;
  82       f->reg.mask = dest->write_mask << (index & 0x3);
  83
  84       f->reg.source_type = 1;
  85
  86       ppir_src *src = &load->src;
  87       index = ppir_target_get_src_reg_index(src);
  88       f->reg.source = index >> 2;
  89       f->reg.negate = src->negate;
  90       f->reg.absolute = src->absolute;
  91       f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
  92    }
  93 }
  94
  95 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
  96 {
  97    ppir_codegen_field_sampler *f = code;
  98    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
  99
 100    f->index = ldtex->sampler;
 101    f->lod_bias_en = 0;
 102    f->type = ppir_codegen_sampler_type_2d;
 103    f->offset_en = 0;
 104    f->unknown_2 = 0x39001;
 105 }
 106
 107 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
 108 {
 109    ppir_codegen_field_uniform *f = code;
 110    ppir_load_node *load = ppir_node_to_load(node);
 111
 112    switch (node->op) {
 113       case ppir_op_load_uniform:
 114          f->source = ppir_codegen_uniform_src_uniform;
 115          break;
 116       case ppir_op_load_temp:
 117          f->source = ppir_codegen_uniform_src_temporary;
 118          break;
 119       default:
 120          assert(0);
 121    }
 122
 123    int num_components = load->num_components;
 124    int alignment = num_components == 4 ? 2 : num_components - 1;
 125
 126    f->alignment = alignment;
 127
 128    /* TODO: uniform can be also combined like varying */
 129    f->index = load->index << (2 - alignment);
 130 }
 131
 132 static unsigned shift_to_op(int shift)
 133 {
 134    assert(shift >= -3 && shift <= 3);
 135    return shift < 0 ? shift + 8 : shift;
 136 }
 137
 138 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
 139 {
 140    ppir_codegen_field_vec4_mul *f = code;
 141    ppir_alu_node *alu = ppir_node_to_alu(node);
 142
 143    ppir_dest *dest = &alu->dest;
 144    int dest_shift = 0;
 145    if (dest->type != ppir_target_pipeline) {
 146       int index = ppir_target_get_dest_reg_index(dest);
 147       dest_shift = index & 0x3;
 148       f->dest = index >> 2;
 149       f->mask = dest->write_mask << dest_shift;
 150    }
 151    f->dest_modifier = dest->modifier;
 152
 153    switch (node->op) {
 154    case ppir_op_mul:
 155       f->op = shift_to_op(alu->shift);
 156       break;
 157    case ppir_op_mov:
 158       f->op = ppir_codegen_vec4_mul_op_mov;
 159       break;
 160    case ppir_op_max:
 161       f->op = ppir_codegen_vec4_mul_op_max;
 162       break;
 163    case ppir_op_min:
 164       f->op = ppir_codegen_vec4_mul_op_min;
 165       break;
 166    case ppir_op_and:
 167       f->op = ppir_codegen_vec4_mul_op_and;
 168       break;
 169    case ppir_op_or:
 170       f->op = ppir_codegen_vec4_mul_op_or;
 171       break;
 172    case ppir_op_xor:
 173       f->op = ppir_codegen_vec4_mul_op_xor;
 174       break;
 175    case ppir_op_gt:
 176       f->op = ppir_codegen_vec4_mul_op_gt;
 177       break;
 178    case ppir_op_ge:
 179       f->op = ppir_codegen_vec4_mul_op_ge;
 180       break;
 181    case ppir_op_eq:
 182       f->op = ppir_codegen_vec4_mul_op_eq;
 183       break;
 184    case ppir_op_ne:
 185       f->op = ppir_codegen_vec4_mul_op_ne;
 186       break;
 187    case ppir_op_not:
 188       f->op = ppir_codegen_vec4_mul_op_not;
 189       break;
 190    default:
 191       break;
 192    }
 193
 194    ppir_src *src = alu->src;
 195    int index = ppir_target_get_src_reg_index(src);
 196    f->arg0_source = index >> 2;
 197    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 198    f->arg0_absolute = src->absolute;
 199    f->arg0_negate = src->negate;
 200
 201    if (alu->num_src == 2) {
 202       src = alu->src + 1;
 203       index = ppir_target_get_src_reg_index(src);
 204       f->arg1_source = index >> 2;
 205       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 206       f->arg1_absolute = src->absolute;
 207       f->arg1_negate = src->negate;
 208    }
 209 }
 210
 211 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
 212 {
 213    ppir_codegen_field_float_mul *f = code;
 214    ppir_alu_node *alu = ppir_node_to_alu(node);
 215
 216    ppir_dest *dest = &alu->dest;
 217    int dest_component = ffs(dest->write_mask) - 1;
 218    assert(dest_component >= 0);
 219
 220    if (dest->type != ppir_target_pipeline) {
 221       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 222       f->output_en = true;
 223    }
 224    f->dest_modifier = dest->modifier;
 225
 226    switch (node->op) {
 227    case ppir_op_mul:
 228       f->op = shift_to_op(alu->shift);
 229       break;
 230    case ppir_op_mov:
 231       f->op = ppir_codegen_float_mul_op_mov;
 232       break;
 233    case ppir_op_max:
 234       f->op = ppir_codegen_float_mul_op_max;
 235       break;
 236    case ppir_op_min:
 237       f->op = ppir_codegen_float_mul_op_min;
 238       break;
 239    case ppir_op_and:
 240       f->op = ppir_codegen_float_mul_op_and;
 241       break;
 242    case ppir_op_or:
 243       f->op = ppir_codegen_float_mul_op_or;
 244       break;
 245    case ppir_op_xor:
 246       f->op = ppir_codegen_float_mul_op_xor;
 247       break;
 248    case ppir_op_gt:
 249       f->op = ppir_codegen_float_mul_op_gt;
 250       break;
 251    case ppir_op_ge:
 252       f->op = ppir_codegen_float_mul_op_ge;
 253       break;
 254    case ppir_op_eq:
 255       f->op = ppir_codegen_float_mul_op_eq;
 256       break;
 257    case ppir_op_ne:
 258       f->op = ppir_codegen_float_mul_op_ne;
 259       break;
 260    case ppir_op_not:
 261       f->op = ppir_codegen_float_mul_op_not;
 262       break;
 263    default:
 264       break;
 265    }
 266
 267    ppir_src *src = alu->src;
 268    f->arg0_source = get_scl_reg_index(src, dest_component);
 269    f->arg0_absolute = src->absolute;
 270    f->arg0_negate = src->negate;
 271
 272    if (alu->num_src == 2) {
 273       src = alu->src + 1;
 274       f->arg1_source = get_scl_reg_index(src, dest_component);
 275       f->arg1_absolute = src->absolute;
 276       f->arg1_negate = src->negate;
 277    }
 278 }
 279
 280 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
 281 {
 282    ppir_codegen_field_vec4_acc *f = code;
 283    ppir_alu_node *alu = ppir_node_to_alu(node);
 284
 285    ppir_dest *dest = &alu->dest;
 286    int index = ppir_target_get_dest_reg_index(dest);
 287    int dest_shift = index & 0x3;
 288    f->dest = index >> 2;
 289    f->mask = dest->write_mask << dest_shift;
 290    f->dest_modifier = dest->modifier;
 291
 292    switch (node->op) {
 293    case ppir_op_add:
 294       f->op = ppir_codegen_vec4_acc_op_add;
 295       break;
 296    case ppir_op_mov:
 297       f->op = ppir_codegen_vec4_acc_op_mov;
 298       break;
 299    case ppir_op_sum3:
 300       f->op = ppir_codegen_vec4_acc_op_sum3;
 301       dest_shift = 0;
 302       break;
 303    case ppir_op_sum4:
 304       f->op = ppir_codegen_vec4_acc_op_sum4;
 305       dest_shift = 0;
 306       break;
 307    case ppir_op_floor:
 308       f->op = ppir_codegen_vec4_acc_op_floor;
 309       break;
 310    case ppir_op_ceil:
 311       f->op = ppir_codegen_vec4_acc_op_ceil;
 312       break;
 313    case ppir_op_fract:
 314       f->op = ppir_codegen_vec4_acc_op_fract;
 315       break;
 316    case ppir_op_gt:
 317       f->op = ppir_codegen_vec4_acc_op_gt;
 318       break;
 319    case ppir_op_ge:
 320       f->op = ppir_codegen_vec4_acc_op_ge;
 321       break;
 322    case ppir_op_eq:
 323       f->op = ppir_codegen_vec4_acc_op_eq;
 324       break;
 325    case ppir_op_ne:
 326       f->op = ppir_codegen_vec4_acc_op_ne;
 327       break;
 328    case ppir_op_select:
 329       f->op = ppir_codegen_vec4_acc_op_sel;
 330       break;
 331    case ppir_op_max:
 332       f->op = ppir_codegen_vec4_acc_op_max;
 333       break;
 334    case ppir_op_min:
 335       f->op = ppir_codegen_vec4_acc_op_min;
 336       break;
 337    default:
 338       break;
 339    }
 340
 341    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
 342    index = ppir_target_get_src_reg_index(src);
 343
 344    if (src->type == ppir_target_pipeline &&
 345        src->pipeline == ppir_pipeline_reg_vmul)
 346       f->mul_in = true;
 347    else
 348       f->arg0_source = index >> 2;
 349
 350    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 351    f->arg0_absolute = src->absolute;
 352    f->arg0_negate = src->negate;
 353
 354    if (++src < alu->src + alu->num_src) {
 355       index = ppir_target_get_src_reg_index(src);
 356       f->arg1_source = index >> 2;
 357       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 358       f->arg1_absolute = src->absolute;
 359       f->arg1_negate = src->negate;
 360    }
 361 }
 362
 363 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
 364 {
 365    ppir_codegen_field_float_acc *f = code;
 366    ppir_alu_node *alu = ppir_node_to_alu(node);
 367
 368    ppir_dest *dest = &alu->dest;
 369    int dest_component = ffs(dest->write_mask) - 1;
 370    assert(dest_component >= 0);
 371
 372    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 373    f->output_en = true;
 374    f->dest_modifier = dest->modifier;
 375
 376    switch (node->op) {
 377    case ppir_op_add:
 378       f->op = shift_to_op(alu->shift);
 379       break;
 380    case ppir_op_mov:
 381       f->op = ppir_codegen_float_acc_op_mov;
 382       break;
 383    case ppir_op_max:
 384       f->op = ppir_codegen_float_acc_op_max;
 385       break;
 386    case ppir_op_min:
 387       f->op = ppir_codegen_float_acc_op_min;
 388       break;
 389    case ppir_op_floor:
 390       f->op = ppir_codegen_float_acc_op_floor;
 391       break;
 392    case ppir_op_ceil:
 393       f->op = ppir_codegen_float_acc_op_ceil;
 394       break;
 395    case ppir_op_fract:
 396       f->op = ppir_codegen_float_acc_op_fract;
 397       break;
 398    case ppir_op_gt:
 399       f->op = ppir_codegen_float_acc_op_gt;
 400       break;
 401    case ppir_op_ge:
 402       f->op = ppir_codegen_float_acc_op_ge;
 403       break;
 404    case ppir_op_eq:
 405       f->op = ppir_codegen_float_acc_op_eq;
 406       break;
 407    case ppir_op_ne:
 408       f->op = ppir_codegen_float_acc_op_ne;
 409       break;
 410    case ppir_op_select:
 411       f->op = ppir_codegen_float_acc_op_sel;
 412       break;
 413    default:
 414       break;
 415    }
 416
 417    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
 418    if (src->type == ppir_target_pipeline &&
 419        src->pipeline == ppir_pipeline_reg_fmul)
 420       f->mul_in = true;
 421    else
 422       f->arg0_source = get_scl_reg_index(src, dest_component);
 423    f->arg0_absolute = src->absolute;
 424    f->arg0_negate = src->negate;
 425
 426    if (++src < alu->src + alu->num_src) {
 427       f->arg1_source = get_scl_reg_index(src, dest_component);
 428       f->arg1_absolute = src->absolute;
 429       f->arg1_negate = src->negate;
 430    }
 431 }
 432
 433 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
 434 {
 435    ppir_codegen_field_combine *f = code;
 436    ppir_alu_node *alu = ppir_node_to_alu(node);
 437
 438    switch (node->op) {
 439    case ppir_op_rsqrt:
 440    case ppir_op_log2:
 441    case ppir_op_exp2:
 442    case ppir_op_rcp:
 443    case ppir_op_sqrt:
 444    case ppir_op_sin:
 445    case ppir_op_cos:
 446    {
 447       f->scalar.dest_vec = false;
 448       f->scalar.arg1_en = false;
 449
 450       ppir_dest *dest = &alu->dest;
 451       int dest_component = ffs(dest->write_mask) - 1;
 452       assert(dest_component >= 0);
 453       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 454       f->scalar.dest_modifier = dest->modifier;
 455
 456       ppir_src *src = alu->src;
 457       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
 458       f->scalar.arg0_absolute = src->absolute;
 459       f->scalar.arg0_negate = src->negate;
 460
 461       switch (node->op) {
 462       case ppir_op_rsqrt:
 463          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
 464          break;
 465       case ppir_op_log2:
 466          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
 467          break;
 468       case ppir_op_exp2:
 469          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
 470          break;
 471       case ppir_op_rcp:
 472          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
 473          break;
 474       case ppir_op_sqrt:
 475          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
 476          break;
 477       case ppir_op_sin:
 478          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
 479          break;
 480       case ppir_op_cos:
 481          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
 482          break;
 483       default:
 484          break;
 485       }
 486    }
 487    default:
 488       break;
 489    }
 490 }
 491
 492 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
 493 {
 494    assert(node->op == ppir_op_store_temp);
 495
 496    ppir_codegen_field_temp_write *f = code;
 497    ppir_store_node *snode = ppir_node_to_store(node);
 498    int num_components = snode->num_components;
 499
 500    f->temp_write.dest = 0x03; // 11 - temporary
 501    f->temp_write.source = snode->src.reg->index;
 502
 503    int alignment = num_components == 4 ? 2 : num_components - 1;
 504    f->temp_write.alignment = alignment;
 505    f->temp_write.index = snode->index << (2 - alignment);
 506
 507    f->temp_write.offset_reg = snode->index >> 2;
 508 }
 509
 510 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 511 {
 512    for (int i = 0; i < constant->num; i++)
 513       code[i] = util_float_to_half(constant->value[i].f);
 514 }
 515
 516 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
 517 {
 518    ppir_codegen_field_branch *b = code;
 519    assert(node->op == ppir_op_discard);
 520
 521    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
 522    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
 523    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
 524 }
 525
 526 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
 527 {
 528    ppir_codegen_field_branch *b = code;
 529    ppir_branch_node *branch;
 530    ppir_instr *target_instr;
 531    if (node->op == ppir_op_discard) {
 532       ppir_codegen_encode_discard(node, code);
 533       return;
 534    }
 535
 536    assert(node->op == ppir_op_branch);
 537    branch = ppir_node_to_branch(node);
 538
 539    b->branch.unknown_0 = 0x0;
 540    b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]);
 541    b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]);
 542    b->branch.cond_gt = branch->cond_gt;
 543    b->branch.cond_eq = branch->cond_eq;
 544    b->branch.cond_lt = branch->cond_lt;
 545    b->branch.unknown_1 = 0x0;
 546
 547    target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
 548    b->branch.target = target_instr->offset - node->instr->offset;
 549    b->branch.next_count = target_instr->encode_size;
 550 }
 551
 552 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
 553
 554 static const ppir_codegen_instr_slot_encode_func
 555 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
 556    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
 557    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
 558    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
 559    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
 560    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
 561    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
 562    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
 563    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
 564    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
 565    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
 566 };
 567
 568 static const int ppir_codegen_field_size[] = {
 569    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
 570 };
 571
 572 static inline int align_to_word(int size)
 573 {
 574    return ((size + 0x1f) >> 5);
 575 }
 576
 577 static int get_instr_encode_size(ppir_instr *instr)
 578 {
 579    int size = 0;
 580
 581    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 582       if (instr->slots[i])
 583          size += ppir_codegen_field_size[i];
 584    }
 585
 586    for (int i = 0; i < 2; i++) {
 587       if (instr->constant[i].num)
 588          size += 64;
 589    }
 590
 591    return align_to_word(size) + 1;
 592 }
 593
 594 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
 595 {
 596    int off1 = dst_offset & 0x1f;
 597    uint32_t *cpy_dst = dst, *cpy_src = src;
 598
 599    cpy_dst += (dst_offset >> 5);
 600
 601    if (off1) {
 602       int off2 = 32 - off1;
 603       int cpy_size = 0;
 604       while (1) {
 605          *cpy_dst |= *cpy_src << off1;
 606          cpy_dst++;
 607
 608          cpy_size += off2;
 609          if (cpy_size >= src_size)
 610             break;
 611
 612          *cpy_dst |= *cpy_src >> off2;
 613          cpy_src++;
 614
 615          cpy_size += off1;
 616          if (cpy_size >= src_size)
 617             break;
 618       }
 619    }
 620    else
 621       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
 622 }
 623
 624 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
 625 {
 626    int size = 0;
 627    ppir_codegen_ctrl *ctrl = code;
 628
 629    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 630       if (instr->slots[i]) {
 631          /* max field size (73), align to dword */
 632          uint8_t output[12] = {0};
 633
 634          ppir_codegen_encode_slot[i](instr->slots[i], output);
 635          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
 636
 637          size += ppir_codegen_field_size[i];
 638          ctrl->fields |= 1 << i;
 639       }
 640    }
 641
 642    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
 643       ctrl->sync = true;
 644
 645    for (int i = 0; i < 2; i++) {
 646       if (instr->constant[i].num) {
 647          uint16_t output[4] = {0};
 648
 649          ppir_codegen_encode_const(instr->constant + i, output);
 650          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
 651
 652          size += 64;
 653          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
 654       }
 655    }
 656
 657    size = align_to_word(size) + 1;
 658
 659    ctrl->count = size;
 660    if (instr->is_end)
 661       ctrl->stop = true;
 662
 663    if (last_code) {
 664       ppir_codegen_ctrl *last_ctrl = last_code;
 665       last_ctrl->next_count = size;
 666       last_ctrl->prefetch = true;
 667    }
 668
 669    return size;
 670 }
 671
 672 static void ppir_codegen_print_prog(ppir_compiler *comp)
 673 {
 674    uint32_t *prog = comp->prog->shader;
 675    unsigned offset = 0;
 676
 677    printf("========ppir codegen========\n");
 678    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 679       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 680          printf("%03d (@%6d): ", instr->index, instr->offset);
 681          int n = prog[0] & 0x1f;
 682          for (int i = 0; i < n; i++) {
 683             if (i && i % 6 == 0)
 684                printf("\n    ");
 685             printf("%08x ", prog[i]);
 686          }
 687          printf("\n");
 688          ppir_disassemble_instr(prog, offset);
 689          prog += n;
 690          offset += n;
 691       }
 692    }
 693    printf("-----------------------\n");
 694 }
 695
 696 bool ppir_codegen_prog(ppir_compiler *comp)
 697 {
 698    int size = 0;
 699    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 700       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 701          instr->offset = size;
 702          instr->encode_size = get_instr_encode_size(instr);
 703          size += instr->encode_size;
 704       }
 705    }
 706
 707    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
 708    if (!prog)
 709       return false;
 710
 711    uint32_t *code = prog, *last_code = NULL;
 712    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 713       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 714          int offset = encode_instr(instr, code, last_code);
 715          last_code = code;
 716          code += offset;
 717       }
 718    }
 719
 720    comp->prog->shader = prog;
 721    comp->prog->shader_size = size * sizeof(uint32_t);
 722
 723    if (lima_debug & LIMA_DEBUG_PP)
 724       ppir_codegen_print_prog(comp);
 725
 726    return true;
 727 }