src/gallium/drivers/lima/ir/pp/codegen.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/u_half.h"
  27 #include "util/bitscan.h"
  28
  29 #include "ppir.h"
  30 #include "codegen.h"
  31 #include "lima_context.h"
  32
  33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  34 {
  35    unsigned ret = 0;
  36    for (int i = 0; i < 4; i++)
  37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  38    return ret;
  39 }
  40
  41 static int get_scl_reg_index(ppir_src *src, int component)
  42 {
  43    int ret = ppir_target_get_src_reg_index(src);
  44    ret += src->swizzle[component];
  45    return ret;
  46 }
  47
  48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  49 {
  50    ppir_codegen_field_varying *f = code;
  51    ppir_load_node *load = ppir_node_to_load(node);
  52    ppir_dest *dest = &load->dest;
  53    int index = ppir_target_get_dest_reg_index(dest);
  54    int num_components = load->num_components;
  55
  56    if (num_components) {
  57       assert(node->op == ppir_op_load_varying ||
  58              node->op == ppir_op_load_coords ||
  59              node->op == ppir_op_load_fragcoord ||
  60              node->op == ppir_op_load_pointcoord ||
  61              node->op == ppir_op_load_frontface);
  62
  63       f->imm.dest = index >> 2;
  64       f->imm.mask = dest->write_mask << (index & 0x3);
  65
  66       int alignment = num_components == 3 ? 3 : num_components - 1;
  67       f->imm.alignment = alignment;
  68       f->imm.offset_vector = 0xf;
  69
  70       if (alignment == 3)
  71          f->imm.index = load->index >> 2;
  72       else
  73          f->imm.index = load->index >> alignment;
  74
  75       switch (node->op) {
  76          case ppir_op_load_fragcoord:
  77             f->imm.source_type = 2;
  78             f->imm.perspective = 3;
  79             break;
  80          case ppir_op_load_pointcoord:
  81             f->imm.source_type = 3;
  82             break;
  83          case ppir_op_load_frontface:
  84             f->imm.source_type = 3;
  85             f->imm.perspective = 1;
  86             break;
  87          default:
  88             break;
  89       }
  90    }
  91    else {
  92       assert(node->op == ppir_op_load_coords);
  93
  94       f->reg.dest = index >> 2;
  95       f->reg.mask = dest->write_mask << (index & 0x3);
  96
  97       f->reg.source_type = 1;
  98
  99       ppir_src *src = &load->src;
 100       index = ppir_target_get_src_reg_index(src);
 101       f->reg.source = index >> 2;
 102       f->reg.negate = src->negate;
 103       f->reg.absolute = src->absolute;
 104       f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
 105    }
 106 }
 107
 108 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
 109 {
 110    ppir_codegen_field_sampler *f = code;
 111    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
 112
 113    f->index = ldtex->sampler;
 114    f->lod_bias_en = 0;
 115    f->type = ppir_codegen_sampler_type_2d;
 116    f->offset_en = 0;
 117    f->unknown_2 = 0x39001;
 118 }
 119
 120 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
 121 {
 122    ppir_codegen_field_uniform *f = code;
 123    ppir_load_node *load = ppir_node_to_load(node);
 124
 125    switch (node->op) {
 126       case ppir_op_load_uniform:
 127          f->source = ppir_codegen_uniform_src_uniform;
 128          break;
 129       case ppir_op_load_temp:
 130          f->source = ppir_codegen_uniform_src_temporary;
 131          break;
 132       default:
 133          assert(0);
 134    }
 135
 136    int num_components = load->num_components;
 137    int alignment = num_components == 4 ? 2 : num_components - 1;
 138
 139    f->alignment = alignment;
 140
 141    /* TODO: uniform can be also combined like varying */
 142    f->index = load->index << (2 - alignment);
 143 }
 144
 145 static unsigned shift_to_op(int shift)
 146 {
 147    assert(shift >= -3 && shift <= 3);
 148    return shift < 0 ? shift + 8 : shift;
 149 }
 150
 151 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
 152 {
 153    ppir_codegen_field_vec4_mul *f = code;
 154    ppir_alu_node *alu = ppir_node_to_alu(node);
 155
 156    ppir_dest *dest = &alu->dest;
 157    int dest_shift = 0;
 158    if (dest->type != ppir_target_pipeline) {
 159       int index = ppir_target_get_dest_reg_index(dest);
 160       dest_shift = index & 0x3;
 161       f->dest = index >> 2;
 162       f->mask = dest->write_mask << dest_shift;
 163    }
 164    f->dest_modifier = dest->modifier;
 165
 166    switch (node->op) {
 167    case ppir_op_mul:
 168       f->op = shift_to_op(alu->shift);
 169       break;
 170    case ppir_op_mov:
 171       f->op = ppir_codegen_vec4_mul_op_mov;
 172       break;
 173    case ppir_op_max:
 174       f->op = ppir_codegen_vec4_mul_op_max;
 175       break;
 176    case ppir_op_min:
 177       f->op = ppir_codegen_vec4_mul_op_min;
 178       break;
 179    case ppir_op_and:
 180       f->op = ppir_codegen_vec4_mul_op_and;
 181       break;
 182    case ppir_op_or:
 183       f->op = ppir_codegen_vec4_mul_op_or;
 184       break;
 185    case ppir_op_xor:
 186       f->op = ppir_codegen_vec4_mul_op_xor;
 187       break;
 188    case ppir_op_gt:
 189       f->op = ppir_codegen_vec4_mul_op_gt;
 190       break;
 191    case ppir_op_ge:
 192       f->op = ppir_codegen_vec4_mul_op_ge;
 193       break;
 194    case ppir_op_eq:
 195       f->op = ppir_codegen_vec4_mul_op_eq;
 196       break;
 197    case ppir_op_ne:
 198       f->op = ppir_codegen_vec4_mul_op_ne;
 199       break;
 200    case ppir_op_not:
 201       f->op = ppir_codegen_vec4_mul_op_not;
 202       break;
 203    default:
 204       break;
 205    }
 206
 207    ppir_src *src = alu->src;
 208    int index = ppir_target_get_src_reg_index(src);
 209    f->arg0_source = index >> 2;
 210    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 211    f->arg0_absolute = src->absolute;
 212    f->arg0_negate = src->negate;
 213
 214    if (alu->num_src == 2) {
 215       src = alu->src + 1;
 216       index = ppir_target_get_src_reg_index(src);
 217       f->arg1_source = index >> 2;
 218       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 219       f->arg1_absolute = src->absolute;
 220       f->arg1_negate = src->negate;
 221    }
 222 }
 223
 224 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
 225 {
 226    ppir_codegen_field_float_mul *f = code;
 227    ppir_alu_node *alu = ppir_node_to_alu(node);
 228
 229    ppir_dest *dest = &alu->dest;
 230    int dest_component = ffs(dest->write_mask) - 1;
 231    assert(dest_component >= 0);
 232
 233    if (dest->type != ppir_target_pipeline) {
 234       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 235       f->output_en = true;
 236    }
 237    f->dest_modifier = dest->modifier;
 238
 239    switch (node->op) {
 240    case ppir_op_mul:
 241       f->op = shift_to_op(alu->shift);
 242       break;
 243    case ppir_op_mov:
 244       f->op = ppir_codegen_float_mul_op_mov;
 245       break;
 246    case ppir_op_sel_cond:
 247       f->op = ppir_codegen_float_mul_op_mov;
 248       break;
 249    case ppir_op_max:
 250       f->op = ppir_codegen_float_mul_op_max;
 251       break;
 252    case ppir_op_min:
 253       f->op = ppir_codegen_float_mul_op_min;
 254       break;
 255    case ppir_op_and:
 256       f->op = ppir_codegen_float_mul_op_and;
 257       break;
 258    case ppir_op_or:
 259       f->op = ppir_codegen_float_mul_op_or;
 260       break;
 261    case ppir_op_xor:
 262       f->op = ppir_codegen_float_mul_op_xor;
 263       break;
 264    case ppir_op_gt:
 265       f->op = ppir_codegen_float_mul_op_gt;
 266       break;
 267    case ppir_op_ge:
 268       f->op = ppir_codegen_float_mul_op_ge;
 269       break;
 270    case ppir_op_eq:
 271       f->op = ppir_codegen_float_mul_op_eq;
 272       break;
 273    case ppir_op_ne:
 274       f->op = ppir_codegen_float_mul_op_ne;
 275       break;
 276    case ppir_op_not:
 277       f->op = ppir_codegen_float_mul_op_not;
 278       break;
 279    default:
 280       break;
 281    }
 282
 283    ppir_src *src = alu->src;
 284    f->arg0_source = get_scl_reg_index(src, dest_component);
 285    f->arg0_absolute = src->absolute;
 286    f->arg0_negate = src->negate;
 287
 288    if (alu->num_src == 2) {
 289       src = alu->src + 1;
 290       f->arg1_source = get_scl_reg_index(src, dest_component);
 291       f->arg1_absolute = src->absolute;
 292       f->arg1_negate = src->negate;
 293    }
 294 }
 295
 296 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
 297 {
 298    ppir_codegen_field_vec4_acc *f = code;
 299    ppir_alu_node *alu = ppir_node_to_alu(node);
 300
 301    ppir_dest *dest = &alu->dest;
 302    int index = ppir_target_get_dest_reg_index(dest);
 303    int dest_shift = index & 0x3;
 304    f->dest = index >> 2;
 305    f->mask = dest->write_mask << dest_shift;
 306    f->dest_modifier = dest->modifier;
 307
 308    switch (node->op) {
 309    case ppir_op_add:
 310       f->op = ppir_codegen_vec4_acc_op_add;
 311       break;
 312    case ppir_op_mov:
 313       f->op = ppir_codegen_vec4_acc_op_mov;
 314       break;
 315    case ppir_op_sum3:
 316       f->op = ppir_codegen_vec4_acc_op_sum3;
 317       dest_shift = 0;
 318       break;
 319    case ppir_op_sum4:
 320       f->op = ppir_codegen_vec4_acc_op_sum4;
 321       dest_shift = 0;
 322       break;
 323    case ppir_op_floor:
 324       f->op = ppir_codegen_vec4_acc_op_floor;
 325       break;
 326    case ppir_op_ceil:
 327       f->op = ppir_codegen_vec4_acc_op_ceil;
 328       break;
 329    case ppir_op_fract:
 330       f->op = ppir_codegen_vec4_acc_op_fract;
 331       break;
 332    case ppir_op_gt:
 333       f->op = ppir_codegen_vec4_acc_op_gt;
 334       break;
 335    case ppir_op_ge:
 336       f->op = ppir_codegen_vec4_acc_op_ge;
 337       break;
 338    case ppir_op_eq:
 339       f->op = ppir_codegen_vec4_acc_op_eq;
 340       break;
 341    case ppir_op_ne:
 342       f->op = ppir_codegen_vec4_acc_op_ne;
 343       break;
 344    case ppir_op_select:
 345       f->op = ppir_codegen_vec4_acc_op_sel;
 346       break;
 347    case ppir_op_max:
 348       f->op = ppir_codegen_vec4_acc_op_max;
 349       break;
 350    case ppir_op_min:
 351       f->op = ppir_codegen_vec4_acc_op_min;
 352       break;
 353    default:
 354       break;
 355    }
 356
 357    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
 358    index = ppir_target_get_src_reg_index(src);
 359
 360    if (src->type == ppir_target_pipeline &&
 361        src->pipeline == ppir_pipeline_reg_vmul)
 362       f->mul_in = true;
 363    else
 364       f->arg0_source = index >> 2;
 365
 366    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 367    f->arg0_absolute = src->absolute;
 368    f->arg0_negate = src->negate;
 369
 370    if (++src < alu->src + alu->num_src) {
 371       index = ppir_target_get_src_reg_index(src);
 372       f->arg1_source = index >> 2;
 373       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 374       f->arg1_absolute = src->absolute;
 375       f->arg1_negate = src->negate;
 376    }
 377 }
 378
 379 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
 380 {
 381    ppir_codegen_field_float_acc *f = code;
 382    ppir_alu_node *alu = ppir_node_to_alu(node);
 383
 384    ppir_dest *dest = &alu->dest;
 385    int dest_component = ffs(dest->write_mask) - 1;
 386    assert(dest_component >= 0);
 387
 388    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 389    f->output_en = true;
 390    f->dest_modifier = dest->modifier;
 391
 392    switch (node->op) {
 393    case ppir_op_add:
 394       f->op = shift_to_op(alu->shift);
 395       break;
 396    case ppir_op_mov:
 397       f->op = ppir_codegen_float_acc_op_mov;
 398       break;
 399    case ppir_op_max:
 400       f->op = ppir_codegen_float_acc_op_max;
 401       break;
 402    case ppir_op_min:
 403       f->op = ppir_codegen_float_acc_op_min;
 404       break;
 405    case ppir_op_floor:
 406       f->op = ppir_codegen_float_acc_op_floor;
 407       break;
 408    case ppir_op_ceil:
 409       f->op = ppir_codegen_float_acc_op_ceil;
 410       break;
 411    case ppir_op_fract:
 412       f->op = ppir_codegen_float_acc_op_fract;
 413       break;
 414    case ppir_op_gt:
 415       f->op = ppir_codegen_float_acc_op_gt;
 416       break;
 417    case ppir_op_ge:
 418       f->op = ppir_codegen_float_acc_op_ge;
 419       break;
 420    case ppir_op_eq:
 421       f->op = ppir_codegen_float_acc_op_eq;
 422       break;
 423    case ppir_op_ne:
 424       f->op = ppir_codegen_float_acc_op_ne;
 425       break;
 426    case ppir_op_select:
 427       f->op = ppir_codegen_float_acc_op_sel;
 428       break;
 429    default:
 430       break;
 431    }
 432
 433    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
 434    if (src->type == ppir_target_pipeline &&
 435        src->pipeline == ppir_pipeline_reg_fmul)
 436       f->mul_in = true;
 437    else
 438       f->arg0_source = get_scl_reg_index(src, dest_component);
 439    f->arg0_absolute = src->absolute;
 440    f->arg0_negate = src->negate;
 441
 442    if (++src < alu->src + alu->num_src) {
 443       f->arg1_source = get_scl_reg_index(src, dest_component);
 444       f->arg1_absolute = src->absolute;
 445       f->arg1_negate = src->negate;
 446    }
 447 }
 448
 449 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
 450 {
 451    ppir_codegen_field_combine *f = code;
 452    ppir_alu_node *alu = ppir_node_to_alu(node);
 453
 454    switch (node->op) {
 455    case ppir_op_rsqrt:
 456    case ppir_op_log2:
 457    case ppir_op_exp2:
 458    case ppir_op_rcp:
 459    case ppir_op_sqrt:
 460    case ppir_op_sin:
 461    case ppir_op_cos:
 462    {
 463       f->scalar.dest_vec = false;
 464       f->scalar.arg1_en = false;
 465
 466       ppir_dest *dest = &alu->dest;
 467       int dest_component = ffs(dest->write_mask) - 1;
 468       assert(dest_component >= 0);
 469       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 470       f->scalar.dest_modifier = dest->modifier;
 471
 472       ppir_src *src = alu->src;
 473       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
 474       f->scalar.arg0_absolute = src->absolute;
 475       f->scalar.arg0_negate = src->negate;
 476
 477       switch (node->op) {
 478       case ppir_op_rsqrt:
 479          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
 480          break;
 481       case ppir_op_log2:
 482          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
 483          break;
 484       case ppir_op_exp2:
 485          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
 486          break;
 487       case ppir_op_rcp:
 488          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
 489          break;
 490       case ppir_op_sqrt:
 491          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
 492          break;
 493       case ppir_op_sin:
 494          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
 495          break;
 496       case ppir_op_cos:
 497          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
 498          break;
 499       default:
 500          break;
 501       }
 502    }
 503    default:
 504       break;
 505    }
 506 }
 507
 508 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
 509 {
 510    assert(node->op == ppir_op_store_temp);
 511
 512    ppir_codegen_field_temp_write *f = code;
 513    ppir_store_node *snode = ppir_node_to_store(node);
 514    int num_components = snode->num_components;
 515
 516    f->temp_write.dest = 0x03; // 11 - temporary
 517    f->temp_write.source = snode->src.reg->index;
 518
 519    int alignment = num_components == 4 ? 2 : num_components - 1;
 520    f->temp_write.alignment = alignment;
 521    f->temp_write.index = snode->index << (2 - alignment);
 522
 523    f->temp_write.offset_reg = snode->index >> 2;
 524 }
 525
 526 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 527 {
 528    for (int i = 0; i < constant->num; i++)
 529       code[i] = util_float_to_half(constant->value[i].f);
 530 }
 531
 532 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
 533 {
 534    ppir_codegen_field_branch *b = code;
 535    assert(node->op == ppir_op_discard);
 536
 537    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
 538    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
 539    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
 540 }
 541
 542 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
 543 {
 544    ppir_codegen_field_branch *b = code;
 545    ppir_branch_node *branch;
 546    ppir_instr *target_instr;
 547    if (node->op == ppir_op_discard) {
 548       ppir_codegen_encode_discard(node, code);
 549       return;
 550    }
 551
 552    assert(node->op == ppir_op_branch);
 553    branch = ppir_node_to_branch(node);
 554
 555    b->branch.unknown_0 = 0x0;
 556    b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
 557    b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
 558    b->branch.cond_gt = branch->cond_gt;
 559    b->branch.cond_eq = branch->cond_eq;
 560    b->branch.cond_lt = branch->cond_lt;
 561    b->branch.unknown_1 = 0x0;
 562
 563    target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
 564    b->branch.target = target_instr->offset - node->instr->offset;
 565    b->branch.next_count = target_instr->encode_size;
 566 }
 567
 568 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
 569
 570 static const ppir_codegen_instr_slot_encode_func
 571 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
 572    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
 573    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
 574    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
 575    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
 576    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
 577    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
 578    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
 579    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
 580    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
 581    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
 582 };
 583
 584 static const int ppir_codegen_field_size[] = {
 585    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
 586 };
 587
 588 static inline int align_to_word(int size)
 589 {
 590    return ((size + 0x1f) >> 5);
 591 }
 592
 593 static int get_instr_encode_size(ppir_instr *instr)
 594 {
 595    int size = 0;
 596
 597    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 598       if (instr->slots[i])
 599          size += ppir_codegen_field_size[i];
 600    }
 601
 602    for (int i = 0; i < 2; i++) {
 603       if (instr->constant[i].num)
 604          size += 64;
 605    }
 606
 607    return align_to_word(size) + 1;
 608 }
 609
 610 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
 611 {
 612    int off1 = dst_offset & 0x1f;
 613    uint32_t *cpy_dst = dst, *cpy_src = src;
 614
 615    cpy_dst += (dst_offset >> 5);
 616
 617    if (off1) {
 618       int off2 = 32 - off1;
 619       int cpy_size = 0;
 620       while (1) {
 621          *cpy_dst |= *cpy_src << off1;
 622          cpy_dst++;
 623
 624          cpy_size += off2;
 625          if (cpy_size >= src_size)
 626             break;
 627
 628          *cpy_dst |= *cpy_src >> off2;
 629          cpy_src++;
 630
 631          cpy_size += off1;
 632          if (cpy_size >= src_size)
 633             break;
 634       }
 635    }
 636    else
 637       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
 638 }
 639
 640 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
 641 {
 642    int size = 0;
 643    ppir_codegen_ctrl *ctrl = code;
 644
 645    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 646       if (instr->slots[i]) {
 647          /* max field size (73), align to dword */
 648          uint8_t output[12] = {0};
 649
 650          ppir_codegen_encode_slot[i](instr->slots[i], output);
 651          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
 652
 653          size += ppir_codegen_field_size[i];
 654          ctrl->fields |= 1 << i;
 655       }
 656    }
 657
 658    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
 659       ctrl->sync = true;
 660
 661    for (int i = 0; i < 2; i++) {
 662       if (instr->constant[i].num) {
 663          uint16_t output[4] = {0};
 664
 665          ppir_codegen_encode_const(instr->constant + i, output);
 666          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
 667
 668          size += 64;
 669          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
 670       }
 671    }
 672
 673    size = align_to_word(size) + 1;
 674
 675    ctrl->count = size;
 676    if (instr->is_end)
 677       ctrl->stop = true;
 678
 679    if (last_code) {
 680       ppir_codegen_ctrl *last_ctrl = last_code;
 681       last_ctrl->next_count = size;
 682       last_ctrl->prefetch = true;
 683    }
 684
 685    return size;
 686 }
 687
 688 static void ppir_codegen_print_prog(ppir_compiler *comp)
 689 {
 690    uint32_t *prog = comp->prog->shader;
 691    unsigned offset = 0;
 692
 693    printf("========ppir codegen========\n");
 694    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 695       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 696          printf("%03d (@%6d): ", instr->index, instr->offset);
 697          int n = prog[0] & 0x1f;
 698          for (int i = 0; i < n; i++) {
 699             if (i && i % 6 == 0)
 700                printf("\n    ");
 701             printf("%08x ", prog[i]);
 702          }
 703          printf("\n");
 704          ppir_disassemble_instr(prog, offset);
 705          prog += n;
 706          offset += n;
 707       }
 708    }
 709    printf("-----------------------\n");
 710 }
 711
 712 bool ppir_codegen_prog(ppir_compiler *comp)
 713 {
 714    int size = 0;
 715    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 716       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 717          instr->offset = size;
 718          instr->encode_size = get_instr_encode_size(instr);
 719          size += instr->encode_size;
 720       }
 721    }
 722
 723    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
 724    if (!prog)
 725       return false;
 726
 727    uint32_t *code = prog, *last_code = NULL;
 728    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 729       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 730          int offset = encode_instr(instr, code, last_code);
 731          last_code = code;
 732          code += offset;
 733       }
 734    }
 735
 736    comp->prog->shader = prog;
 737    comp->prog->shader_size = size * sizeof(uint32_t);
 738
 739    if (lima_debug & LIMA_DEBUG_PP)
 740       ppir_codegen_print_prog(comp);
 741
 742    return true;
 743 }