src/gallium/drivers/lima/ir/pp/codegen.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/u_half.h"
  27 #include "util/bitscan.h"
  28
  29 #include "ppir.h"
  30 #include "codegen.h"
  31 #include "lima_context.h"
  32
  33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  34 {
  35    unsigned ret = 0;
  36    for (int i = 0; i < 4; i++)
  37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  38    return ret;
  39 }
  40
  41 static int get_scl_reg_index(ppir_src *src, int component)
  42 {
  43    int ret = ppir_target_get_src_reg_index(src);
  44    ret += src->swizzle[component];
  45    return ret;
  46 }
  47
  48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  49 {
  50    ppir_codegen_field_varying *f = code;
  51    ppir_load_node *load = ppir_node_to_load(node);
  52    ppir_dest *dest = &load->dest;
  53    int index = ppir_target_get_dest_reg_index(dest);
  54    int num_components = load->num_components;
  55
  56    if (num_components) {
  57       assert(node->op == ppir_op_load_varying ||
  58              node->op == ppir_op_load_coords ||
  59              node->op == ppir_op_load_fragcoord ||
  60              node->op == ppir_op_load_pointcoord);
  61
  62       f->imm.dest = index >> 2;
  63       f->imm.mask = dest->write_mask << (index & 0x3);
  64
  65       int alignment = num_components == 3 ? 3 : num_components - 1;
  66       f->imm.alignment = alignment;
  67       f->imm.offset_vector = 0xf;
  68
  69       if (alignment == 3)
  70          f->imm.index = load->index >> 2;
  71       else
  72          f->imm.index = load->index >> alignment;
  73
  74       switch (node->op) {
  75          case ppir_op_load_fragcoord:
  76             f->imm.source_type = 2;
  77             f->imm.perspective = 3;
  78             break;
  79          case ppir_op_load_pointcoord:
  80             f->imm.source_type = 3;
  81             break;
  82          default:
  83             break;
  84       }
  85    }
  86    else {
  87       assert(node->op == ppir_op_load_coords);
  88
  89       f->reg.dest = index >> 2;
  90       f->reg.mask = dest->write_mask << (index & 0x3);
  91
  92       f->reg.source_type = 1;
  93
  94       ppir_src *src = &load->src;
  95       index = ppir_target_get_src_reg_index(src);
  96       f->reg.source = index >> 2;
  97       f->reg.negate = src->negate;
  98       f->reg.absolute = src->absolute;
  99       f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
 100    }
 101 }
 102
 103 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
 104 {
 105    ppir_codegen_field_sampler *f = code;
 106    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
 107
 108    f->index = ldtex->sampler;
 109    f->lod_bias_en = 0;
 110    f->type = ppir_codegen_sampler_type_2d;
 111    f->offset_en = 0;
 112    f->unknown_2 = 0x39001;
 113 }
 114
 115 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
 116 {
 117    ppir_codegen_field_uniform *f = code;
 118    ppir_load_node *load = ppir_node_to_load(node);
 119
 120    switch (node->op) {
 121       case ppir_op_load_uniform:
 122          f->source = ppir_codegen_uniform_src_uniform;
 123          break;
 124       case ppir_op_load_temp:
 125          f->source = ppir_codegen_uniform_src_temporary;
 126          break;
 127       default:
 128          assert(0);
 129    }
 130
 131    int num_components = load->num_components;
 132    int alignment = num_components == 4 ? 2 : num_components - 1;
 133
 134    f->alignment = alignment;
 135
 136    /* TODO: uniform can be also combined like varying */
 137    f->index = load->index << (2 - alignment);
 138 }
 139
 140 static unsigned shift_to_op(int shift)
 141 {
 142    assert(shift >= -3 && shift <= 3);
 143    return shift < 0 ? shift + 8 : shift;
 144 }
 145
 146 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
 147 {
 148    ppir_codegen_field_vec4_mul *f = code;
 149    ppir_alu_node *alu = ppir_node_to_alu(node);
 150
 151    ppir_dest *dest = &alu->dest;
 152    int dest_shift = 0;
 153    if (dest->type != ppir_target_pipeline) {
 154       int index = ppir_target_get_dest_reg_index(dest);
 155       dest_shift = index & 0x3;
 156       f->dest = index >> 2;
 157       f->mask = dest->write_mask << dest_shift;
 158    }
 159    f->dest_modifier = dest->modifier;
 160
 161    switch (node->op) {
 162    case ppir_op_mul:
 163       f->op = shift_to_op(alu->shift);
 164       break;
 165    case ppir_op_mov:
 166       f->op = ppir_codegen_vec4_mul_op_mov;
 167       break;
 168    case ppir_op_max:
 169       f->op = ppir_codegen_vec4_mul_op_max;
 170       break;
 171    case ppir_op_min:
 172       f->op = ppir_codegen_vec4_mul_op_min;
 173       break;
 174    case ppir_op_and:
 175       f->op = ppir_codegen_vec4_mul_op_and;
 176       break;
 177    case ppir_op_or:
 178       f->op = ppir_codegen_vec4_mul_op_or;
 179       break;
 180    case ppir_op_xor:
 181       f->op = ppir_codegen_vec4_mul_op_xor;
 182       break;
 183    case ppir_op_gt:
 184       f->op = ppir_codegen_vec4_mul_op_gt;
 185       break;
 186    case ppir_op_ge:
 187       f->op = ppir_codegen_vec4_mul_op_ge;
 188       break;
 189    case ppir_op_eq:
 190       f->op = ppir_codegen_vec4_mul_op_eq;
 191       break;
 192    case ppir_op_ne:
 193       f->op = ppir_codegen_vec4_mul_op_ne;
 194       break;
 195    case ppir_op_not:
 196       f->op = ppir_codegen_vec4_mul_op_not;
 197       break;
 198    default:
 199       break;
 200    }
 201
 202    ppir_src *src = alu->src;
 203    int index = ppir_target_get_src_reg_index(src);
 204    f->arg0_source = index >> 2;
 205    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 206    f->arg0_absolute = src->absolute;
 207    f->arg0_negate = src->negate;
 208
 209    if (alu->num_src == 2) {
 210       src = alu->src + 1;
 211       index = ppir_target_get_src_reg_index(src);
 212       f->arg1_source = index >> 2;
 213       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 214       f->arg1_absolute = src->absolute;
 215       f->arg1_negate = src->negate;
 216    }
 217 }
 218
 219 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
 220 {
 221    ppir_codegen_field_float_mul *f = code;
 222    ppir_alu_node *alu = ppir_node_to_alu(node);
 223
 224    ppir_dest *dest = &alu->dest;
 225    int dest_component = ffs(dest->write_mask) - 1;
 226    assert(dest_component >= 0);
 227
 228    if (dest->type != ppir_target_pipeline) {
 229       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 230       f->output_en = true;
 231    }
 232    f->dest_modifier = dest->modifier;
 233
 234    switch (node->op) {
 235    case ppir_op_mul:
 236       f->op = shift_to_op(alu->shift);
 237       break;
 238    case ppir_op_mov:
 239       f->op = ppir_codegen_float_mul_op_mov;
 240       break;
 241    case ppir_op_max:
 242       f->op = ppir_codegen_float_mul_op_max;
 243       break;
 244    case ppir_op_min:
 245       f->op = ppir_codegen_float_mul_op_min;
 246       break;
 247    case ppir_op_and:
 248       f->op = ppir_codegen_float_mul_op_and;
 249       break;
 250    case ppir_op_or:
 251       f->op = ppir_codegen_float_mul_op_or;
 252       break;
 253    case ppir_op_xor:
 254       f->op = ppir_codegen_float_mul_op_xor;
 255       break;
 256    case ppir_op_gt:
 257       f->op = ppir_codegen_float_mul_op_gt;
 258       break;
 259    case ppir_op_ge:
 260       f->op = ppir_codegen_float_mul_op_ge;
 261       break;
 262    case ppir_op_eq:
 263       f->op = ppir_codegen_float_mul_op_eq;
 264       break;
 265    case ppir_op_ne:
 266       f->op = ppir_codegen_float_mul_op_ne;
 267       break;
 268    case ppir_op_not:
 269       f->op = ppir_codegen_float_mul_op_not;
 270       break;
 271    default:
 272       break;
 273    }
 274
 275    ppir_src *src = alu->src;
 276    f->arg0_source = get_scl_reg_index(src, dest_component);
 277    f->arg0_absolute = src->absolute;
 278    f->arg0_negate = src->negate;
 279
 280    if (alu->num_src == 2) {
 281       src = alu->src + 1;
 282       f->arg1_source = get_scl_reg_index(src, dest_component);
 283       f->arg1_absolute = src->absolute;
 284       f->arg1_negate = src->negate;
 285    }
 286 }
 287
 288 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
 289 {
 290    ppir_codegen_field_vec4_acc *f = code;
 291    ppir_alu_node *alu = ppir_node_to_alu(node);
 292
 293    ppir_dest *dest = &alu->dest;
 294    int index = ppir_target_get_dest_reg_index(dest);
 295    int dest_shift = index & 0x3;
 296    f->dest = index >> 2;
 297    f->mask = dest->write_mask << dest_shift;
 298    f->dest_modifier = dest->modifier;
 299
 300    switch (node->op) {
 301    case ppir_op_add:
 302       f->op = ppir_codegen_vec4_acc_op_add;
 303       break;
 304    case ppir_op_mov:
 305       f->op = ppir_codegen_vec4_acc_op_mov;
 306       break;
 307    case ppir_op_sum3:
 308       f->op = ppir_codegen_vec4_acc_op_sum3;
 309       dest_shift = 0;
 310       break;
 311    case ppir_op_sum4:
 312       f->op = ppir_codegen_vec4_acc_op_sum4;
 313       dest_shift = 0;
 314       break;
 315    case ppir_op_floor:
 316       f->op = ppir_codegen_vec4_acc_op_floor;
 317       break;
 318    case ppir_op_ceil:
 319       f->op = ppir_codegen_vec4_acc_op_ceil;
 320       break;
 321    case ppir_op_fract:
 322       f->op = ppir_codegen_vec4_acc_op_fract;
 323       break;
 324    case ppir_op_gt:
 325       f->op = ppir_codegen_vec4_acc_op_gt;
 326       break;
 327    case ppir_op_ge:
 328       f->op = ppir_codegen_vec4_acc_op_ge;
 329       break;
 330    case ppir_op_eq:
 331       f->op = ppir_codegen_vec4_acc_op_eq;
 332       break;
 333    case ppir_op_ne:
 334       f->op = ppir_codegen_vec4_acc_op_ne;
 335       break;
 336    case ppir_op_select:
 337       f->op = ppir_codegen_vec4_acc_op_sel;
 338       break;
 339    case ppir_op_max:
 340       f->op = ppir_codegen_vec4_acc_op_max;
 341       break;
 342    case ppir_op_min:
 343       f->op = ppir_codegen_vec4_acc_op_min;
 344       break;
 345    default:
 346       break;
 347    }
 348
 349    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
 350    index = ppir_target_get_src_reg_index(src);
 351
 352    if (src->type == ppir_target_pipeline &&
 353        src->pipeline == ppir_pipeline_reg_vmul)
 354       f->mul_in = true;
 355    else
 356       f->arg0_source = index >> 2;
 357
 358    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 359    f->arg0_absolute = src->absolute;
 360    f->arg0_negate = src->negate;
 361
 362    if (++src < alu->src + alu->num_src) {
 363       index = ppir_target_get_src_reg_index(src);
 364       f->arg1_source = index >> 2;
 365       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 366       f->arg1_absolute = src->absolute;
 367       f->arg1_negate = src->negate;
 368    }
 369 }
 370
 371 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
 372 {
 373    ppir_codegen_field_float_acc *f = code;
 374    ppir_alu_node *alu = ppir_node_to_alu(node);
 375
 376    ppir_dest *dest = &alu->dest;
 377    int dest_component = ffs(dest->write_mask) - 1;
 378    assert(dest_component >= 0);
 379
 380    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 381    f->output_en = true;
 382    f->dest_modifier = dest->modifier;
 383
 384    switch (node->op) {
 385    case ppir_op_add:
 386       f->op = shift_to_op(alu->shift);
 387       break;
 388    case ppir_op_mov:
 389       f->op = ppir_codegen_float_acc_op_mov;
 390       break;
 391    case ppir_op_max:
 392       f->op = ppir_codegen_float_acc_op_max;
 393       break;
 394    case ppir_op_min:
 395       f->op = ppir_codegen_float_acc_op_min;
 396       break;
 397    case ppir_op_floor:
 398       f->op = ppir_codegen_float_acc_op_floor;
 399       break;
 400    case ppir_op_ceil:
 401       f->op = ppir_codegen_float_acc_op_ceil;
 402       break;
 403    case ppir_op_fract:
 404       f->op = ppir_codegen_float_acc_op_fract;
 405       break;
 406    case ppir_op_gt:
 407       f->op = ppir_codegen_float_acc_op_gt;
 408       break;
 409    case ppir_op_ge:
 410       f->op = ppir_codegen_float_acc_op_ge;
 411       break;
 412    case ppir_op_eq:
 413       f->op = ppir_codegen_float_acc_op_eq;
 414       break;
 415    case ppir_op_ne:
 416       f->op = ppir_codegen_float_acc_op_ne;
 417       break;
 418    case ppir_op_select:
 419       f->op = ppir_codegen_float_acc_op_sel;
 420       break;
 421    default:
 422       break;
 423    }
 424
 425    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
 426    if (src->type == ppir_target_pipeline &&
 427        src->pipeline == ppir_pipeline_reg_fmul)
 428       f->mul_in = true;
 429    else
 430       f->arg0_source = get_scl_reg_index(src, dest_component);
 431    f->arg0_absolute = src->absolute;
 432    f->arg0_negate = src->negate;
 433
 434    if (++src < alu->src + alu->num_src) {
 435       f->arg1_source = get_scl_reg_index(src, dest_component);
 436       f->arg1_absolute = src->absolute;
 437       f->arg1_negate = src->negate;
 438    }
 439 }
 440
 441 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
 442 {
 443    ppir_codegen_field_combine *f = code;
 444    ppir_alu_node *alu = ppir_node_to_alu(node);
 445
 446    switch (node->op) {
 447    case ppir_op_rsqrt:
 448    case ppir_op_log2:
 449    case ppir_op_exp2:
 450    case ppir_op_rcp:
 451    case ppir_op_sqrt:
 452    case ppir_op_sin:
 453    case ppir_op_cos:
 454    {
 455       f->scalar.dest_vec = false;
 456       f->scalar.arg1_en = false;
 457
 458       ppir_dest *dest = &alu->dest;
 459       int dest_component = ffs(dest->write_mask) - 1;
 460       assert(dest_component >= 0);
 461       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 462       f->scalar.dest_modifier = dest->modifier;
 463
 464       ppir_src *src = alu->src;
 465       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
 466       f->scalar.arg0_absolute = src->absolute;
 467       f->scalar.arg0_negate = src->negate;
 468
 469       switch (node->op) {
 470       case ppir_op_rsqrt:
 471          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
 472          break;
 473       case ppir_op_log2:
 474          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
 475          break;
 476       case ppir_op_exp2:
 477          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
 478          break;
 479       case ppir_op_rcp:
 480          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
 481          break;
 482       case ppir_op_sqrt:
 483          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
 484          break;
 485       case ppir_op_sin:
 486          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
 487          break;
 488       case ppir_op_cos:
 489          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
 490          break;
 491       default:
 492          break;
 493       }
 494    }
 495    default:
 496       break;
 497    }
 498 }
 499
 500 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
 501 {
 502    assert(node->op == ppir_op_store_temp);
 503
 504    ppir_codegen_field_temp_write *f = code;
 505    ppir_store_node *snode = ppir_node_to_store(node);
 506    int num_components = snode->num_components;
 507
 508    f->temp_write.dest = 0x03; // 11 - temporary
 509    f->temp_write.source = snode->src.reg->index;
 510
 511    int alignment = num_components == 4 ? 2 : num_components - 1;
 512    f->temp_write.alignment = alignment;
 513    f->temp_write.index = snode->index << (2 - alignment);
 514
 515    f->temp_write.offset_reg = snode->index >> 2;
 516 }
 517
 518 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 519 {
 520    for (int i = 0; i < constant->num; i++)
 521       code[i] = util_float_to_half(constant->value[i].f);
 522 }
 523
 524 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
 525 {
 526    ppir_codegen_field_branch *b = code;
 527    assert(node->op == ppir_op_discard);
 528
 529    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
 530    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
 531    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
 532 }
 533
 534 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
 535 {
 536    ppir_codegen_field_branch *b = code;
 537    ppir_branch_node *branch;
 538    ppir_instr *target_instr;
 539    if (node->op == ppir_op_discard) {
 540       ppir_codegen_encode_discard(node, code);
 541       return;
 542    }
 543
 544    assert(node->op == ppir_op_branch);
 545    branch = ppir_node_to_branch(node);
 546
 547    b->branch.unknown_0 = 0x0;
 548    b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
 549    b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
 550    b->branch.cond_gt = branch->cond_gt;
 551    b->branch.cond_eq = branch->cond_eq;
 552    b->branch.cond_lt = branch->cond_lt;
 553    b->branch.unknown_1 = 0x0;
 554
 555    target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
 556    b->branch.target = target_instr->offset - node->instr->offset;
 557    b->branch.next_count = target_instr->encode_size;
 558 }
 559
 560 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
 561
 562 static const ppir_codegen_instr_slot_encode_func
 563 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
 564    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
 565    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
 566    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
 567    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
 568    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
 569    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
 570    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
 571    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
 572    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
 573    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
 574 };
 575
 576 static const int ppir_codegen_field_size[] = {
 577    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
 578 };
 579
 580 static inline int align_to_word(int size)
 581 {
 582    return ((size + 0x1f) >> 5);
 583 }
 584
 585 static int get_instr_encode_size(ppir_instr *instr)
 586 {
 587    int size = 0;
 588
 589    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 590       if (instr->slots[i])
 591          size += ppir_codegen_field_size[i];
 592    }
 593
 594    for (int i = 0; i < 2; i++) {
 595       if (instr->constant[i].num)
 596          size += 64;
 597    }
 598
 599    return align_to_word(size) + 1;
 600 }
 601
 602 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
 603 {
 604    int off1 = dst_offset & 0x1f;
 605    uint32_t *cpy_dst = dst, *cpy_src = src;
 606
 607    cpy_dst += (dst_offset >> 5);
 608
 609    if (off1) {
 610       int off2 = 32 - off1;
 611       int cpy_size = 0;
 612       while (1) {
 613          *cpy_dst |= *cpy_src << off1;
 614          cpy_dst++;
 615
 616          cpy_size += off2;
 617          if (cpy_size >= src_size)
 618             break;
 619
 620          *cpy_dst |= *cpy_src >> off2;
 621          cpy_src++;
 622
 623          cpy_size += off1;
 624          if (cpy_size >= src_size)
 625             break;
 626       }
 627    }
 628    else
 629       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
 630 }
 631
 632 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
 633 {
 634    int size = 0;
 635    ppir_codegen_ctrl *ctrl = code;
 636
 637    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 638       if (instr->slots[i]) {
 639          /* max field size (73), align to dword */
 640          uint8_t output[12] = {0};
 641
 642          ppir_codegen_encode_slot[i](instr->slots[i], output);
 643          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
 644
 645          size += ppir_codegen_field_size[i];
 646          ctrl->fields |= 1 << i;
 647       }
 648    }
 649
 650    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
 651       ctrl->sync = true;
 652
 653    for (int i = 0; i < 2; i++) {
 654       if (instr->constant[i].num) {
 655          uint16_t output[4] = {0};
 656
 657          ppir_codegen_encode_const(instr->constant + i, output);
 658          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
 659
 660          size += 64;
 661          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
 662       }
 663    }
 664
 665    size = align_to_word(size) + 1;
 666
 667    ctrl->count = size;
 668    if (instr->is_end)
 669       ctrl->stop = true;
 670
 671    if (last_code) {
 672       ppir_codegen_ctrl *last_ctrl = last_code;
 673       last_ctrl->next_count = size;
 674       last_ctrl->prefetch = true;
 675    }
 676
 677    return size;
 678 }
 679
 680 static void ppir_codegen_print_prog(ppir_compiler *comp)
 681 {
 682    uint32_t *prog = comp->prog->shader;
 683    unsigned offset = 0;
 684
 685    printf("========ppir codegen========\n");
 686    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 687       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 688          printf("%03d (@%6d): ", instr->index, instr->offset);
 689          int n = prog[0] & 0x1f;
 690          for (int i = 0; i < n; i++) {
 691             if (i && i % 6 == 0)
 692                printf("\n    ");
 693             printf("%08x ", prog[i]);
 694          }
 695          printf("\n");
 696          ppir_disassemble_instr(prog, offset);
 697          prog += n;
 698          offset += n;
 699       }
 700    }
 701    printf("-----------------------\n");
 702 }
 703
 704 bool ppir_codegen_prog(ppir_compiler *comp)
 705 {
 706    int size = 0;
 707    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 708       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 709          instr->offset = size;
 710          instr->encode_size = get_instr_encode_size(instr);
 711          size += instr->encode_size;
 712       }
 713    }
 714
 715    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
 716    if (!prog)
 717       return false;
 718
 719    uint32_t *code = prog, *last_code = NULL;
 720    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 721       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 722          int offset = encode_instr(instr, code, last_code);
 723          last_code = code;
 724          code += offset;
 725       }
 726    }
 727
 728    comp->prog->shader = prog;
 729    comp->prog->shader_size = size * sizeof(uint32_t);
 730
 731    if (lima_debug & LIMA_DEBUG_PP)
 732       ppir_codegen_print_prog(comp);
 733
 734    return true;
 735 }