src/gallium/drivers/lima/ir/pp/codegen.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/u_half.h"
  27 #include "util/bitscan.h"
  28
  29 #include "ppir.h"
  30 #include "codegen.h"
  31 #include "lima_context.h"
  32
  33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  34 {
  35    unsigned ret = 0;
  36    for (int i = 0; i < 4; i++)
  37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  38    return ret;
  39 }
  40
  41 static int get_scl_reg_index(ppir_src *src, int component)
  42 {
  43    int ret = ppir_target_get_src_reg_index(src);
  44    ret += src->swizzle[component];
  45    return ret;
  46 }
  47
  48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  49 {
  50    ppir_codegen_field_varying *f = code;
  51    ppir_load_node *load = ppir_node_to_load(node);
  52    ppir_dest *dest = &load->dest;
  53    int index = ppir_target_get_dest_reg_index(dest);
  54    int num_components = load->num_components;
  55
  56    if (num_components) {
  57       assert(node->op == ppir_op_load_varying ||
  58              node->op == ppir_op_load_coords ||
  59              node->op == ppir_op_load_fragcoord ||
  60              node->op == ppir_op_load_pointcoord ||
  61              node->op == ppir_op_load_frontface);
  62
  63       f->imm.dest = index >> 2;
  64       f->imm.mask = dest->write_mask << (index & 0x3);
  65
  66       int alignment = num_components == 3 ? 3 : num_components - 1;
  67       f->imm.alignment = alignment;
  68       f->imm.offset_vector = 0xf;
  69
  70       if (alignment == 3)
  71          f->imm.index = load->index >> 2;
  72       else
  73          f->imm.index = load->index >> alignment;
  74
  75       switch (node->op) {
  76          case ppir_op_load_fragcoord:
  77             f->imm.source_type = 2;
  78             f->imm.perspective = 3;
  79             break;
  80          case ppir_op_load_pointcoord:
  81             f->imm.source_type = 3;
  82             break;
  83          case ppir_op_load_frontface:
  84             f->imm.source_type = 3;
  85             f->imm.perspective = 1;
  86             break;
  87          default:
  88             break;
  89       }
  90    }
  91    else {
  92       assert(node->op == ppir_op_load_coords);
  93
  94       f->reg.dest = index >> 2;
  95       f->reg.mask = dest->write_mask << (index & 0x3);
  96
  97       f->reg.source_type = 1;
  98
  99       ppir_src *src = &load->src;
 100       index = ppir_target_get_src_reg_index(src);
 101       f->reg.source = index >> 2;
 102       f->reg.negate = src->negate;
 103       f->reg.absolute = src->absolute;
 104       f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
 105    }
 106 }
 107
 108 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
 109 {
 110    ppir_codegen_field_sampler *f = code;
 111    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
 112
 113    f->index = ldtex->sampler;
 114    f->lod_bias_en = 0;
 115    f->type = ppir_codegen_sampler_type_2d;
 116    f->offset_en = 0;
 117    f->unknown_2 = 0x39001;
 118 }
 119
 120 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
 121 {
 122    ppir_codegen_field_uniform *f = code;
 123    ppir_load_node *load = ppir_node_to_load(node);
 124
 125    switch (node->op) {
 126       case ppir_op_load_uniform:
 127          f->source = ppir_codegen_uniform_src_uniform;
 128          break;
 129       case ppir_op_load_temp:
 130          f->source = ppir_codegen_uniform_src_temporary;
 131          break;
 132       default:
 133          assert(0);
 134    }
 135
 136    int num_components = load->num_components;
 137    int alignment = num_components == 4 ? 2 : num_components - 1;
 138
 139    f->alignment = alignment;
 140
 141    /* TODO: uniform can be also combined like varying */
 142    f->index = load->index << (2 - alignment);
 143 }
 144
 145 static unsigned shift_to_op(int shift)
 146 {
 147    assert(shift >= -3 && shift <= 3);
 148    return shift < 0 ? shift + 8 : shift;
 149 }
 150
 151 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
 152 {
 153    ppir_codegen_field_vec4_mul *f = code;
 154    ppir_alu_node *alu = ppir_node_to_alu(node);
 155
 156    ppir_dest *dest = &alu->dest;
 157    int dest_shift = 0;
 158    if (dest->type != ppir_target_pipeline) {
 159       int index = ppir_target_get_dest_reg_index(dest);
 160       dest_shift = index & 0x3;
 161       f->dest = index >> 2;
 162       f->mask = dest->write_mask << dest_shift;
 163    }
 164    f->dest_modifier = dest->modifier;
 165
 166    switch (node->op) {
 167    case ppir_op_mul:
 168       f->op = shift_to_op(alu->shift);
 169       break;
 170    case ppir_op_mov:
 171    case ppir_op_store_color:
 172       f->op = ppir_codegen_vec4_mul_op_mov;
 173       break;
 174    case ppir_op_max:
 175       f->op = ppir_codegen_vec4_mul_op_max;
 176       break;
 177    case ppir_op_min:
 178       f->op = ppir_codegen_vec4_mul_op_min;
 179       break;
 180    case ppir_op_and:
 181       f->op = ppir_codegen_vec4_mul_op_and;
 182       break;
 183    case ppir_op_or:
 184       f->op = ppir_codegen_vec4_mul_op_or;
 185       break;
 186    case ppir_op_xor:
 187       f->op = ppir_codegen_vec4_mul_op_xor;
 188       break;
 189    case ppir_op_gt:
 190       f->op = ppir_codegen_vec4_mul_op_gt;
 191       break;
 192    case ppir_op_ge:
 193       f->op = ppir_codegen_vec4_mul_op_ge;
 194       break;
 195    case ppir_op_eq:
 196       f->op = ppir_codegen_vec4_mul_op_eq;
 197       break;
 198    case ppir_op_ne:
 199       f->op = ppir_codegen_vec4_mul_op_ne;
 200       break;
 201    case ppir_op_not:
 202       f->op = ppir_codegen_vec4_mul_op_not;
 203       break;
 204    default:
 205       break;
 206    }
 207
 208    ppir_src *src = alu->src;
 209    int index = ppir_target_get_src_reg_index(src);
 210    f->arg0_source = index >> 2;
 211    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 212    f->arg0_absolute = src->absolute;
 213    f->arg0_negate = src->negate;
 214
 215    if (alu->num_src == 2) {
 216       src = alu->src + 1;
 217       index = ppir_target_get_src_reg_index(src);
 218       f->arg1_source = index >> 2;
 219       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 220       f->arg1_absolute = src->absolute;
 221       f->arg1_negate = src->negate;
 222    }
 223 }
 224
 225 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
 226 {
 227    ppir_codegen_field_float_mul *f = code;
 228    ppir_alu_node *alu = ppir_node_to_alu(node);
 229
 230    ppir_dest *dest = &alu->dest;
 231    int dest_component = ffs(dest->write_mask) - 1;
 232    assert(dest_component >= 0);
 233
 234    if (dest->type != ppir_target_pipeline) {
 235       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 236       f->output_en = true;
 237    }
 238    f->dest_modifier = dest->modifier;
 239
 240    switch (node->op) {
 241    case ppir_op_mul:
 242       f->op = shift_to_op(alu->shift);
 243       break;
 244    case ppir_op_mov:
 245       f->op = ppir_codegen_float_mul_op_mov;
 246       break;
 247    case ppir_op_sel_cond:
 248       f->op = ppir_codegen_float_mul_op_mov;
 249       break;
 250    case ppir_op_max:
 251       f->op = ppir_codegen_float_mul_op_max;
 252       break;
 253    case ppir_op_min:
 254       f->op = ppir_codegen_float_mul_op_min;
 255       break;
 256    case ppir_op_and:
 257       f->op = ppir_codegen_float_mul_op_and;
 258       break;
 259    case ppir_op_or:
 260       f->op = ppir_codegen_float_mul_op_or;
 261       break;
 262    case ppir_op_xor:
 263       f->op = ppir_codegen_float_mul_op_xor;
 264       break;
 265    case ppir_op_gt:
 266       f->op = ppir_codegen_float_mul_op_gt;
 267       break;
 268    case ppir_op_ge:
 269       f->op = ppir_codegen_float_mul_op_ge;
 270       break;
 271    case ppir_op_eq:
 272       f->op = ppir_codegen_float_mul_op_eq;
 273       break;
 274    case ppir_op_ne:
 275       f->op = ppir_codegen_float_mul_op_ne;
 276       break;
 277    case ppir_op_not:
 278       f->op = ppir_codegen_float_mul_op_not;
 279       break;
 280    default:
 281       break;
 282    }
 283
 284    ppir_src *src = alu->src;
 285    f->arg0_source = get_scl_reg_index(src, dest_component);
 286    f->arg0_absolute = src->absolute;
 287    f->arg0_negate = src->negate;
 288
 289    if (alu->num_src == 2) {
 290       src = alu->src + 1;
 291       f->arg1_source = get_scl_reg_index(src, dest_component);
 292       f->arg1_absolute = src->absolute;
 293       f->arg1_negate = src->negate;
 294    }
 295 }
 296
 297 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
 298 {
 299    ppir_codegen_field_vec4_acc *f = code;
 300    ppir_alu_node *alu = ppir_node_to_alu(node);
 301
 302    ppir_dest *dest = &alu->dest;
 303    int index = ppir_target_get_dest_reg_index(dest);
 304    int dest_shift = index & 0x3;
 305    f->dest = index >> 2;
 306    f->mask = dest->write_mask << dest_shift;
 307    f->dest_modifier = dest->modifier;
 308
 309    switch (node->op) {
 310    case ppir_op_add:
 311       f->op = ppir_codegen_vec4_acc_op_add;
 312       break;
 313    case ppir_op_mov:
 314    case ppir_op_store_color:
 315       f->op = ppir_codegen_vec4_acc_op_mov;
 316       break;
 317    case ppir_op_sum3:
 318       f->op = ppir_codegen_vec4_acc_op_sum3;
 319       dest_shift = 0;
 320       break;
 321    case ppir_op_sum4:
 322       f->op = ppir_codegen_vec4_acc_op_sum4;
 323       dest_shift = 0;
 324       break;
 325    case ppir_op_floor:
 326       f->op = ppir_codegen_vec4_acc_op_floor;
 327       break;
 328    case ppir_op_ceil:
 329       f->op = ppir_codegen_vec4_acc_op_ceil;
 330       break;
 331    case ppir_op_fract:
 332       f->op = ppir_codegen_vec4_acc_op_fract;
 333       break;
 334    case ppir_op_gt:
 335       f->op = ppir_codegen_vec4_acc_op_gt;
 336       break;
 337    case ppir_op_ge:
 338       f->op = ppir_codegen_vec4_acc_op_ge;
 339       break;
 340    case ppir_op_eq:
 341       f->op = ppir_codegen_vec4_acc_op_eq;
 342       break;
 343    case ppir_op_ne:
 344       f->op = ppir_codegen_vec4_acc_op_ne;
 345       break;
 346    case ppir_op_select:
 347       f->op = ppir_codegen_vec4_acc_op_sel;
 348       break;
 349    case ppir_op_max:
 350       f->op = ppir_codegen_vec4_acc_op_max;
 351       break;
 352    case ppir_op_min:
 353       f->op = ppir_codegen_vec4_acc_op_min;
 354       break;
 355    case ppir_op_ddx:
 356       f->op = ppir_codegen_vec4_acc_op_dFdx;
 357       break;
 358    case ppir_op_ddy:
 359       f->op = ppir_codegen_vec4_acc_op_dFdy;
 360       break;
 361    default:
 362       break;
 363    }
 364
 365    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
 366    index = ppir_target_get_src_reg_index(src);
 367
 368    if (src->type == ppir_target_pipeline &&
 369        src->pipeline == ppir_pipeline_reg_vmul)
 370       f->mul_in = true;
 371    else
 372       f->arg0_source = index >> 2;
 373
 374    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 375    f->arg0_absolute = src->absolute;
 376    f->arg0_negate = src->negate;
 377
 378    if (++src < alu->src + alu->num_src) {
 379       index = ppir_target_get_src_reg_index(src);
 380       f->arg1_source = index >> 2;
 381       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 382       f->arg1_absolute = src->absolute;
 383       f->arg1_negate = src->negate;
 384    }
 385 }
 386
 387 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
 388 {
 389    ppir_codegen_field_float_acc *f = code;
 390    ppir_alu_node *alu = ppir_node_to_alu(node);
 391
 392    ppir_dest *dest = &alu->dest;
 393    int dest_component = ffs(dest->write_mask) - 1;
 394    assert(dest_component >= 0);
 395
 396    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 397    f->output_en = true;
 398    f->dest_modifier = dest->modifier;
 399
 400    switch (node->op) {
 401    case ppir_op_add:
 402       f->op = shift_to_op(alu->shift);
 403       break;
 404    case ppir_op_mov:
 405       f->op = ppir_codegen_float_acc_op_mov;
 406       break;
 407    case ppir_op_max:
 408       f->op = ppir_codegen_float_acc_op_max;
 409       break;
 410    case ppir_op_min:
 411       f->op = ppir_codegen_float_acc_op_min;
 412       break;
 413    case ppir_op_floor:
 414       f->op = ppir_codegen_float_acc_op_floor;
 415       break;
 416    case ppir_op_ceil:
 417       f->op = ppir_codegen_float_acc_op_ceil;
 418       break;
 419    case ppir_op_fract:
 420       f->op = ppir_codegen_float_acc_op_fract;
 421       break;
 422    case ppir_op_gt:
 423       f->op = ppir_codegen_float_acc_op_gt;
 424       break;
 425    case ppir_op_ge:
 426       f->op = ppir_codegen_float_acc_op_ge;
 427       break;
 428    case ppir_op_eq:
 429       f->op = ppir_codegen_float_acc_op_eq;
 430       break;
 431    case ppir_op_ne:
 432       f->op = ppir_codegen_float_acc_op_ne;
 433       break;
 434    case ppir_op_select:
 435       f->op = ppir_codegen_float_acc_op_sel;
 436       break;
 437    case ppir_op_ddx:
 438       f->op = ppir_codegen_float_acc_op_dFdx;
 439       break;
 440    case ppir_op_ddy:
 441       f->op = ppir_codegen_float_acc_op_dFdy;
 442       break;
 443    default:
 444       break;
 445    }
 446
 447    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
 448    if (src->type == ppir_target_pipeline &&
 449        src->pipeline == ppir_pipeline_reg_fmul)
 450       f->mul_in = true;
 451    else
 452       f->arg0_source = get_scl_reg_index(src, dest_component);
 453    f->arg0_absolute = src->absolute;
 454    f->arg0_negate = src->negate;
 455
 456    if (++src < alu->src + alu->num_src) {
 457       f->arg1_source = get_scl_reg_index(src, dest_component);
 458       f->arg1_absolute = src->absolute;
 459       f->arg1_negate = src->negate;
 460    }
 461 }
 462
 463 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
 464 {
 465    ppir_codegen_field_combine *f = code;
 466    ppir_alu_node *alu = ppir_node_to_alu(node);
 467
 468    switch (node->op) {
 469    case ppir_op_rsqrt:
 470    case ppir_op_log2:
 471    case ppir_op_exp2:
 472    case ppir_op_rcp:
 473    case ppir_op_sqrt:
 474    case ppir_op_sin:
 475    case ppir_op_cos:
 476    {
 477       f->scalar.dest_vec = false;
 478       f->scalar.arg1_en = false;
 479
 480       ppir_dest *dest = &alu->dest;
 481       int dest_component = ffs(dest->write_mask) - 1;
 482       assert(dest_component >= 0);
 483       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 484       f->scalar.dest_modifier = dest->modifier;
 485
 486       ppir_src *src = alu->src;
 487       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
 488       f->scalar.arg0_absolute = src->absolute;
 489       f->scalar.arg0_negate = src->negate;
 490
 491       switch (node->op) {
 492       case ppir_op_rsqrt:
 493          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
 494          break;
 495       case ppir_op_log2:
 496          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
 497          break;
 498       case ppir_op_exp2:
 499          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
 500          break;
 501       case ppir_op_rcp:
 502          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
 503          break;
 504       case ppir_op_sqrt:
 505          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
 506          break;
 507       case ppir_op_sin:
 508          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
 509          break;
 510       case ppir_op_cos:
 511          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
 512          break;
 513       default:
 514          break;
 515       }
 516    }
 517    default:
 518       break;
 519    }
 520 }
 521
 522 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
 523 {
 524    assert(node->op == ppir_op_store_temp);
 525
 526    ppir_codegen_field_temp_write *f = code;
 527    ppir_store_node *snode = ppir_node_to_store(node);
 528    int num_components = snode->num_components;
 529
 530    f->temp_write.dest = 0x03; // 11 - temporary
 531    f->temp_write.source = snode->src.reg->index;
 532
 533    int alignment = num_components == 4 ? 2 : num_components - 1;
 534    f->temp_write.alignment = alignment;
 535    f->temp_write.index = snode->index << (2 - alignment);
 536
 537    f->temp_write.offset_reg = snode->index >> 2;
 538 }
 539
 540 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 541 {
 542    for (int i = 0; i < constant->num; i++)
 543       code[i] = util_float_to_half(constant->value[i].f);
 544 }
 545
 546 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
 547 {
 548    ppir_codegen_field_branch *b = code;
 549    assert(node->op == ppir_op_discard);
 550
 551    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
 552    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
 553    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
 554 }
 555
 556 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
 557 {
 558    ppir_codegen_field_branch *b = code;
 559    ppir_branch_node *branch;
 560    ppir_instr *target_instr;
 561    if (node->op == ppir_op_discard) {
 562       ppir_codegen_encode_discard(node, code);
 563       return;
 564    }
 565
 566    assert(node->op == ppir_op_branch);
 567    branch = ppir_node_to_branch(node);
 568
 569    b->branch.unknown_0 = 0x0;
 570    b->branch.unknown_1 = 0x0;
 571
 572    if (branch->num_src == 2) {
 573       b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
 574       b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
 575       b->branch.cond_gt = branch->cond_gt;
 576       b->branch.cond_eq = branch->cond_eq;
 577       b->branch.cond_lt = branch->cond_lt;
 578    } else if (branch->num_src == 0) {
 579       /* Unconditional branch */
 580       b->branch.arg0_source = 0;
 581       b->branch.arg1_source = 0;
 582       b->branch.cond_gt = true;
 583       b->branch.cond_eq = true;
 584       b->branch.cond_lt = true;
 585    } else {
 586       assert(false);
 587    }
 588
 589    target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
 590    b->branch.target = target_instr->offset - node->instr->offset;
 591    b->branch.next_count = target_instr->encode_size;
 592 }
 593
 594 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
 595
 596 static const ppir_codegen_instr_slot_encode_func
 597 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
 598    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
 599    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
 600    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
 601    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
 602    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
 603    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
 604    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
 605    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
 606    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
 607    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
 608 };
 609
 610 static const int ppir_codegen_field_size[] = {
 611    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
 612 };
 613
 614 static inline int align_to_word(int size)
 615 {
 616    return ((size + 0x1f) >> 5);
 617 }
 618
 619 static int get_instr_encode_size(ppir_instr *instr)
 620 {
 621    int size = 0;
 622
 623    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 624       if (instr->slots[i])
 625          size += ppir_codegen_field_size[i];
 626    }
 627
 628    for (int i = 0; i < 2; i++) {
 629       if (instr->constant[i].num)
 630          size += 64;
 631    }
 632
 633    return align_to_word(size) + 1;
 634 }
 635
 636 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
 637 {
 638    int off1 = dst_offset & 0x1f;
 639    uint32_t *cpy_dst = dst, *cpy_src = src;
 640
 641    cpy_dst += (dst_offset >> 5);
 642
 643    if (off1) {
 644       int off2 = 32 - off1;
 645       int cpy_size = 0;
 646       while (1) {
 647          *cpy_dst |= *cpy_src << off1;
 648          cpy_dst++;
 649
 650          cpy_size += off2;
 651          if (cpy_size >= src_size)
 652             break;
 653
 654          *cpy_dst |= *cpy_src >> off2;
 655          cpy_src++;
 656
 657          cpy_size += off1;
 658          if (cpy_size >= src_size)
 659             break;
 660       }
 661    }
 662    else
 663       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
 664 }
 665
 666 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
 667 {
 668    int size = 0;
 669    ppir_codegen_ctrl *ctrl = code;
 670
 671    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 672       if (instr->slots[i]) {
 673          /* max field size (73), align to dword */
 674          uint8_t output[12] = {0};
 675
 676          ppir_codegen_encode_slot[i](instr->slots[i], output);
 677          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
 678
 679          size += ppir_codegen_field_size[i];
 680          ctrl->fields |= 1 << i;
 681       }
 682    }
 683
 684    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
 685       ctrl->sync = true;
 686
 687    if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
 688       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
 689       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
 690          ctrl->sync = true;
 691    }
 692
 693    if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
 694       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
 695       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
 696          ctrl->sync = true;
 697    }
 698
 699    for (int i = 0; i < 2; i++) {
 700       if (instr->constant[i].num) {
 701          uint16_t output[4] = {0};
 702
 703          ppir_codegen_encode_const(instr->constant + i, output);
 704          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
 705
 706          size += 64;
 707          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
 708       }
 709    }
 710
 711    size = align_to_word(size) + 1;
 712
 713    ctrl->count = size;
 714    if (instr->is_end)
 715       ctrl->stop = true;
 716
 717    if (last_code) {
 718       ppir_codegen_ctrl *last_ctrl = last_code;
 719       last_ctrl->next_count = size;
 720       last_ctrl->prefetch = true;
 721    }
 722
 723    return size;
 724 }
 725
 726 static void ppir_codegen_print_prog(ppir_compiler *comp)
 727 {
 728    uint32_t *prog = comp->prog->shader;
 729    unsigned offset = 0;
 730
 731    printf("========ppir codegen========\n");
 732    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 733       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 734          printf("%03d (@%6d): ", instr->index, instr->offset);
 735          int n = prog[0] & 0x1f;
 736          for (int i = 0; i < n; i++) {
 737             if (i && i % 6 == 0)
 738                printf("\n    ");
 739             printf("%08x ", prog[i]);
 740          }
 741          printf("\n");
 742          ppir_disassemble_instr(prog, offset);
 743          prog += n;
 744          offset += n;
 745       }
 746    }
 747    printf("-----------------------\n");
 748 }
 749
 750 bool ppir_codegen_prog(ppir_compiler *comp)
 751 {
 752    int size = 0;
 753    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 754       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 755          instr->offset = size;
 756          instr->encode_size = get_instr_encode_size(instr);
 757          size += instr->encode_size;
 758       }
 759    }
 760
 761    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
 762    if (!prog)
 763       return false;
 764
 765    uint32_t *code = prog, *last_code = NULL;
 766    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 767       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 768          int offset = encode_instr(instr, code, last_code);
 769          last_code = code;
 770          code += offset;
 771       }
 772    }
 773
 774    comp->prog->shader = prog;
 775    comp->prog->shader_size = size * sizeof(uint32_t);
 776
 777    if (lima_debug & LIMA_DEBUG_PP)
 778       ppir_codegen_print_prog(comp);
 779
 780    return true;
 781 }