src/gallium/drivers/lima/ir/pp/codegen.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/u_half.h"
  27 #include "util/bitscan.h"
  28
  29 #include "ppir.h"
  30 #include "codegen.h"
  31 #include "lima_context.h"
  32
  33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
  34 {
  35    unsigned ret = 0;
  36    for (int i = 0; i < 4; i++)
  37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
  38    return ret;
  39 }
  40
  41 static int get_scl_reg_index(ppir_src *src, int component)
  42 {
  43    int ret = ppir_target_get_src_reg_index(src);
  44    ret += src->swizzle[component];
  45    return ret;
  46 }
  47
  48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
  49 {
  50    ppir_codegen_field_varying *f = code;
  51    ppir_load_node *load = ppir_node_to_load(node);
  52    ppir_dest *dest = &load->dest;
  53    int index = ppir_target_get_dest_reg_index(dest);
  54    int num_components = load->num_components;
  55
  56    if (node->op != ppir_op_load_coords_reg) {
  57       assert(node->op == ppir_op_load_varying ||
  58              node->op == ppir_op_load_coords ||
  59              node->op == ppir_op_load_fragcoord ||
  60              node->op == ppir_op_load_pointcoord ||
  61              node->op == ppir_op_load_frontface);
  62
  63       f->imm.dest = index >> 2;
  64       f->imm.mask = dest->write_mask << (index & 0x3);
  65
  66       int alignment = num_components == 3 ? 3 : num_components - 1;
  67       f->imm.alignment = alignment;
  68
  69       if (load->num_src) {
  70          index = ppir_target_get_src_reg_index(&load->src);
  71          f->imm.offset_vector = index >> 2;
  72          f->imm.offset_scalar = index & 0x3;
  73       } else
  74          f->imm.offset_vector = 0xf;
  75
  76       if (alignment == 3)
  77          f->imm.index = load->index >> 2;
  78       else
  79          f->imm.index = load->index >> alignment;
  80
  81       switch (node->op) {
  82          case ppir_op_load_fragcoord:
  83             f->imm.source_type = 2;
  84             f->imm.perspective = 3;
  85             break;
  86          case ppir_op_load_pointcoord:
  87             f->imm.source_type = 3;
  88             break;
  89          case ppir_op_load_frontface:
  90             f->imm.source_type = 3;
  91             f->imm.perspective = 1;
  92             break;
  93          case ppir_op_load_coords:
  94             /* num_components == 3 implies cubemap as we don't support 3D textures */
  95             f->imm.source_type = num_components == 3 ? 2 : 0;
  96             break;
  97          default:
  98             break;
  99       }
 100    }
 101    else {  /* node->op == ppir_op_load_coords_reg */
 102       f->reg.dest = index >> 2;
 103       f->reg.mask = dest->write_mask << (index & 0x3);
 104
 105       if (load->num_src) {
 106          /* num_components == 3 implies cubemap as we don't support 3D textures */
 107          if (num_components == 3) {
 108             f->reg.source_type = 2;
 109             f->reg.perspective = 1;
 110          } else {
 111             f->reg.source_type = 1;
 112          }
 113          ppir_src *src = &load->src;
 114          index = ppir_target_get_src_reg_index(src);
 115          f->reg.source = index >> 2;
 116          f->reg.negate = src->negate;
 117          f->reg.absolute = src->absolute;
 118          f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
 119       }
 120    }
 121 }
 122
 123 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
 124 {
 125    ppir_codegen_field_sampler *f = code;
 126    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
 127
 128    f->index = ldtex->sampler;
 129
 130    f->lod_bias_en = ldtex->lod_bias_en;
 131    f->explicit_lod = ldtex->explicit_lod;
 132    if (ldtex->lod_bias_en)
 133       ppir_target_get_src_reg_index(&ldtex->src[1]);
 134
 135    switch (ldtex->sampler_dim) {
 136    case GLSL_SAMPLER_DIM_2D:
 137    case GLSL_SAMPLER_DIM_RECT:
 138    case GLSL_SAMPLER_DIM_EXTERNAL:
 139       f->type = ppir_codegen_sampler_type_2d;
 140       break;
 141    case GLSL_SAMPLER_DIM_CUBE:
 142       f->type = ppir_codegen_sampler_type_cube;
 143       break;
 144    default:
 145       break;
 146    }
 147
 148    f->offset_en = 0;
 149    f->unknown_2 = 0x39001;
 150 }
 151
 152 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
 153 {
 154    ppir_codegen_field_uniform *f = code;
 155    ppir_load_node *load = ppir_node_to_load(node);
 156
 157    switch (node->op) {
 158       case ppir_op_load_uniform:
 159          f->source = ppir_codegen_uniform_src_uniform;
 160          break;
 161       case ppir_op_load_temp:
 162          f->source = ppir_codegen_uniform_src_temporary;
 163          break;
 164       default:
 165          assert(0);
 166    }
 167
 168    /* Uniforms are always aligned to vec4 boundary */
 169    f->alignment = 2;
 170    f->index = load->index;
 171
 172    if (load->num_src) {
 173       f->offset_en = 1;
 174       f->offset_reg = ppir_target_get_src_reg_index(&load->src);
 175    }
 176 }
 177
 178 static unsigned shift_to_op(int shift)
 179 {
 180    assert(shift >= -3 && shift <= 3);
 181    return shift < 0 ? shift + 8 : shift;
 182 }
 183
 184 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
 185 {
 186    ppir_codegen_field_vec4_mul *f = code;
 187    ppir_alu_node *alu = ppir_node_to_alu(node);
 188
 189    ppir_dest *dest = &alu->dest;
 190    int dest_shift = 0;
 191    if (dest->type != ppir_target_pipeline) {
 192       int index = ppir_target_get_dest_reg_index(dest);
 193       dest_shift = index & 0x3;
 194       f->dest = index >> 2;
 195       f->mask = dest->write_mask << dest_shift;
 196    }
 197    f->dest_modifier = dest->modifier;
 198
 199    switch (node->op) {
 200    case ppir_op_mul:
 201       f->op = shift_to_op(alu->shift);
 202       break;
 203    case ppir_op_mov:
 204    case ppir_op_store_color:
 205       f->op = ppir_codegen_vec4_mul_op_mov;
 206       break;
 207    case ppir_op_max:
 208       f->op = ppir_codegen_vec4_mul_op_max;
 209       break;
 210    case ppir_op_min:
 211       f->op = ppir_codegen_vec4_mul_op_min;
 212       break;
 213    case ppir_op_and:
 214       f->op = ppir_codegen_vec4_mul_op_and;
 215       break;
 216    case ppir_op_or:
 217       f->op = ppir_codegen_vec4_mul_op_or;
 218       break;
 219    case ppir_op_xor:
 220       f->op = ppir_codegen_vec4_mul_op_xor;
 221       break;
 222    case ppir_op_gt:
 223       f->op = ppir_codegen_vec4_mul_op_gt;
 224       break;
 225    case ppir_op_ge:
 226       f->op = ppir_codegen_vec4_mul_op_ge;
 227       break;
 228    case ppir_op_eq:
 229       f->op = ppir_codegen_vec4_mul_op_eq;
 230       break;
 231    case ppir_op_ne:
 232       f->op = ppir_codegen_vec4_mul_op_ne;
 233       break;
 234    case ppir_op_not:
 235       f->op = ppir_codegen_vec4_mul_op_not;
 236       break;
 237    default:
 238       break;
 239    }
 240
 241    ppir_src *src = alu->src;
 242    int index = ppir_target_get_src_reg_index(src);
 243    f->arg0_source = index >> 2;
 244    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 245    f->arg0_absolute = src->absolute;
 246    f->arg0_negate = src->negate;
 247
 248    if (alu->num_src == 2) {
 249       src = alu->src + 1;
 250       index = ppir_target_get_src_reg_index(src);
 251       f->arg1_source = index >> 2;
 252       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 253       f->arg1_absolute = src->absolute;
 254       f->arg1_negate = src->negate;
 255    }
 256 }
 257
 258 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
 259 {
 260    ppir_codegen_field_float_mul *f = code;
 261    ppir_alu_node *alu = ppir_node_to_alu(node);
 262
 263    ppir_dest *dest = &alu->dest;
 264    int dest_component = ffs(dest->write_mask) - 1;
 265    assert(dest_component >= 0);
 266
 267    if (dest->type != ppir_target_pipeline) {
 268       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 269       f->output_en = true;
 270    }
 271    f->dest_modifier = dest->modifier;
 272
 273    switch (node->op) {
 274    case ppir_op_mul:
 275       f->op = shift_to_op(alu->shift);
 276       break;
 277    case ppir_op_mov:
 278       f->op = ppir_codegen_float_mul_op_mov;
 279       break;
 280    case ppir_op_sel_cond:
 281       f->op = ppir_codegen_float_mul_op_mov;
 282       break;
 283    case ppir_op_max:
 284       f->op = ppir_codegen_float_mul_op_max;
 285       break;
 286    case ppir_op_min:
 287       f->op = ppir_codegen_float_mul_op_min;
 288       break;
 289    case ppir_op_and:
 290       f->op = ppir_codegen_float_mul_op_and;
 291       break;
 292    case ppir_op_or:
 293       f->op = ppir_codegen_float_mul_op_or;
 294       break;
 295    case ppir_op_xor:
 296       f->op = ppir_codegen_float_mul_op_xor;
 297       break;
 298    case ppir_op_gt:
 299       f->op = ppir_codegen_float_mul_op_gt;
 300       break;
 301    case ppir_op_ge:
 302       f->op = ppir_codegen_float_mul_op_ge;
 303       break;
 304    case ppir_op_eq:
 305       f->op = ppir_codegen_float_mul_op_eq;
 306       break;
 307    case ppir_op_ne:
 308       f->op = ppir_codegen_float_mul_op_ne;
 309       break;
 310    case ppir_op_not:
 311       f->op = ppir_codegen_float_mul_op_not;
 312       break;
 313    default:
 314       break;
 315    }
 316
 317    ppir_src *src = alu->src;
 318    f->arg0_source = get_scl_reg_index(src, dest_component);
 319    f->arg0_absolute = src->absolute;
 320    f->arg0_negate = src->negate;
 321
 322    if (alu->num_src == 2) {
 323       src = alu->src + 1;
 324       f->arg1_source = get_scl_reg_index(src, dest_component);
 325       f->arg1_absolute = src->absolute;
 326       f->arg1_negate = src->negate;
 327    }
 328 }
 329
 330 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
 331 {
 332    ppir_codegen_field_vec4_acc *f = code;
 333    ppir_alu_node *alu = ppir_node_to_alu(node);
 334
 335    ppir_dest *dest = &alu->dest;
 336    int index = ppir_target_get_dest_reg_index(dest);
 337    int dest_shift = index & 0x3;
 338    f->dest = index >> 2;
 339    f->mask = dest->write_mask << dest_shift;
 340    f->dest_modifier = dest->modifier;
 341
 342    switch (node->op) {
 343    case ppir_op_add:
 344       f->op = ppir_codegen_vec4_acc_op_add;
 345       break;
 346    case ppir_op_mov:
 347    case ppir_op_store_color:
 348       f->op = ppir_codegen_vec4_acc_op_mov;
 349       break;
 350    case ppir_op_sum3:
 351       f->op = ppir_codegen_vec4_acc_op_sum3;
 352       dest_shift = 0;
 353       break;
 354    case ppir_op_sum4:
 355       f->op = ppir_codegen_vec4_acc_op_sum4;
 356       dest_shift = 0;
 357       break;
 358    case ppir_op_floor:
 359       f->op = ppir_codegen_vec4_acc_op_floor;
 360       break;
 361    case ppir_op_ceil:
 362       f->op = ppir_codegen_vec4_acc_op_ceil;
 363       break;
 364    case ppir_op_fract:
 365       f->op = ppir_codegen_vec4_acc_op_fract;
 366       break;
 367    case ppir_op_gt:
 368       f->op = ppir_codegen_vec4_acc_op_gt;
 369       break;
 370    case ppir_op_ge:
 371       f->op = ppir_codegen_vec4_acc_op_ge;
 372       break;
 373    case ppir_op_eq:
 374       f->op = ppir_codegen_vec4_acc_op_eq;
 375       break;
 376    case ppir_op_ne:
 377       f->op = ppir_codegen_vec4_acc_op_ne;
 378       break;
 379    case ppir_op_select:
 380       f->op = ppir_codegen_vec4_acc_op_sel;
 381       break;
 382    case ppir_op_max:
 383       f->op = ppir_codegen_vec4_acc_op_max;
 384       break;
 385    case ppir_op_min:
 386       f->op = ppir_codegen_vec4_acc_op_min;
 387       break;
 388    case ppir_op_ddx:
 389       f->op = ppir_codegen_vec4_acc_op_dFdx;
 390       break;
 391    case ppir_op_ddy:
 392       f->op = ppir_codegen_vec4_acc_op_dFdy;
 393       break;
 394    default:
 395       break;
 396    }
 397
 398    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
 399    index = ppir_target_get_src_reg_index(src);
 400
 401    if (src->type == ppir_target_pipeline &&
 402        src->pipeline == ppir_pipeline_reg_vmul)
 403       f->mul_in = true;
 404    else
 405       f->arg0_source = index >> 2;
 406
 407    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 408    f->arg0_absolute = src->absolute;
 409    f->arg0_negate = src->negate;
 410
 411    if (++src < alu->src + alu->num_src) {
 412       index = ppir_target_get_src_reg_index(src);
 413       f->arg1_source = index >> 2;
 414       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
 415       f->arg1_absolute = src->absolute;
 416       f->arg1_negate = src->negate;
 417    }
 418 }
 419
 420 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
 421 {
 422    ppir_codegen_field_float_acc *f = code;
 423    ppir_alu_node *alu = ppir_node_to_alu(node);
 424
 425    ppir_dest *dest = &alu->dest;
 426    int dest_component = ffs(dest->write_mask) - 1;
 427    assert(dest_component >= 0);
 428
 429    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 430    f->output_en = true;
 431    f->dest_modifier = dest->modifier;
 432
 433    switch (node->op) {
 434    case ppir_op_add:
 435       f->op = shift_to_op(alu->shift);
 436       break;
 437    case ppir_op_mov:
 438       f->op = ppir_codegen_float_acc_op_mov;
 439       break;
 440    case ppir_op_max:
 441       f->op = ppir_codegen_float_acc_op_max;
 442       break;
 443    case ppir_op_min:
 444       f->op = ppir_codegen_float_acc_op_min;
 445       break;
 446    case ppir_op_floor:
 447       f->op = ppir_codegen_float_acc_op_floor;
 448       break;
 449    case ppir_op_ceil:
 450       f->op = ppir_codegen_float_acc_op_ceil;
 451       break;
 452    case ppir_op_fract:
 453       f->op = ppir_codegen_float_acc_op_fract;
 454       break;
 455    case ppir_op_gt:
 456       f->op = ppir_codegen_float_acc_op_gt;
 457       break;
 458    case ppir_op_ge:
 459       f->op = ppir_codegen_float_acc_op_ge;
 460       break;
 461    case ppir_op_eq:
 462       f->op = ppir_codegen_float_acc_op_eq;
 463       break;
 464    case ppir_op_ne:
 465       f->op = ppir_codegen_float_acc_op_ne;
 466       break;
 467    case ppir_op_select:
 468       f->op = ppir_codegen_float_acc_op_sel;
 469       break;
 470    case ppir_op_ddx:
 471       f->op = ppir_codegen_float_acc_op_dFdx;
 472       break;
 473    case ppir_op_ddy:
 474       f->op = ppir_codegen_float_acc_op_dFdy;
 475       break;
 476    default:
 477       break;
 478    }
 479
 480    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
 481    if (src->type == ppir_target_pipeline &&
 482        src->pipeline == ppir_pipeline_reg_fmul)
 483       f->mul_in = true;
 484    else
 485       f->arg0_source = get_scl_reg_index(src, dest_component);
 486    f->arg0_absolute = src->absolute;
 487    f->arg0_negate = src->negate;
 488
 489    if (++src < alu->src + alu->num_src) {
 490       f->arg1_source = get_scl_reg_index(src, dest_component);
 491       f->arg1_absolute = src->absolute;
 492       f->arg1_negate = src->negate;
 493    }
 494 }
 495
 496 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
 497 {
 498    ppir_codegen_field_combine *f = code;
 499    ppir_alu_node *alu = ppir_node_to_alu(node);
 500
 501    switch (node->op) {
 502    case ppir_op_rsqrt:
 503    case ppir_op_log2:
 504    case ppir_op_exp2:
 505    case ppir_op_rcp:
 506    case ppir_op_sqrt:
 507    case ppir_op_sin:
 508    case ppir_op_cos:
 509    {
 510       f->scalar.dest_vec = false;
 511       f->scalar.arg1_en = false;
 512
 513       ppir_dest *dest = &alu->dest;
 514       int dest_component = ffs(dest->write_mask) - 1;
 515       assert(dest_component >= 0);
 516       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
 517       f->scalar.dest_modifier = dest->modifier;
 518
 519       ppir_src *src = alu->src;
 520       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
 521       f->scalar.arg0_absolute = src->absolute;
 522       f->scalar.arg0_negate = src->negate;
 523
 524       switch (node->op) {
 525       case ppir_op_rsqrt:
 526          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
 527          break;
 528       case ppir_op_log2:
 529          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
 530          break;
 531       case ppir_op_exp2:
 532          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
 533          break;
 534       case ppir_op_rcp:
 535          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
 536          break;
 537       case ppir_op_sqrt:
 538          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
 539          break;
 540       case ppir_op_sin:
 541          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
 542          break;
 543       case ppir_op_cos:
 544          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
 545          break;
 546       default:
 547          break;
 548       }
 549    }
 550    default:
 551       break;
 552    }
 553 }
 554
 555 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
 556 {
 557    assert(node->op == ppir_op_store_temp);
 558
 559    ppir_codegen_field_temp_write *f = code;
 560    ppir_store_node *snode = ppir_node_to_store(node);
 561    int num_components = snode->num_components;
 562
 563    f->temp_write.dest = 0x03; // 11 - temporary
 564    f->temp_write.source = snode->src.reg->index;
 565
 566    int alignment = num_components == 4 ? 2 : num_components - 1;
 567    f->temp_write.alignment = alignment;
 568    f->temp_write.index = snode->index << (2 - alignment);
 569
 570    f->temp_write.offset_reg = snode->index >> 2;
 571 }
 572
 573 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 574 {
 575    for (int i = 0; i < constant->num; i++)
 576       code[i] = util_float_to_half(constant->value[i].f);
 577 }
 578
 579 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
 580 {
 581    ppir_codegen_field_branch *b = code;
 582    assert(node->op == ppir_op_discard);
 583
 584    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
 585    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
 586    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
 587 }
 588
 589 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
 590 {
 591    ppir_codegen_field_branch *b = code;
 592    ppir_branch_node *branch;
 593    ppir_instr *target_instr;
 594    ppir_block *target;
 595    if (node->op == ppir_op_discard) {
 596       ppir_codegen_encode_discard(node, code);
 597       return;
 598    }
 599
 600    assert(node->op == ppir_op_branch);
 601    branch = ppir_node_to_branch(node);
 602
 603    b->branch.unknown_0 = 0x0;
 604    b->branch.unknown_1 = 0x0;
 605
 606    if (branch->num_src == 2) {
 607       b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
 608       b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
 609       b->branch.cond_gt = branch->cond_gt;
 610       b->branch.cond_eq = branch->cond_eq;
 611       b->branch.cond_lt = branch->cond_lt;
 612    } else if (branch->num_src == 0) {
 613       /* Unconditional branch */
 614       b->branch.arg0_source = 0;
 615       b->branch.arg1_source = 0;
 616       b->branch.cond_gt = true;
 617       b->branch.cond_eq = true;
 618       b->branch.cond_lt = true;
 619    } else {
 620       assert(false);
 621    }
 622
 623    target = branch->target;
 624    while (list_is_empty(&target->instr_list)) {
 625       if (!target->list.next)
 626          break;
 627       target = LIST_ENTRY(ppir_block, target->list.next, list);
 628    }
 629
 630    assert(!list_is_empty(&target->instr_list));
 631
 632    target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
 633    b->branch.target = target_instr->offset - node->instr->offset;
 634    b->branch.next_count = target_instr->encode_size;
 635 }
 636
 637 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
 638
 639 static const ppir_codegen_instr_slot_encode_func
 640 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
 641    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
 642    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
 643    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
 644    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
 645    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
 646    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
 647    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
 648    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
 649    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
 650    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
 651 };
 652
 653 static const int ppir_codegen_field_size[] = {
 654    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
 655 };
 656
 657 static inline int align_to_word(int size)
 658 {
 659    return ((size + 0x1f) >> 5);
 660 }
 661
 662 static int get_instr_encode_size(ppir_instr *instr)
 663 {
 664    int size = 0;
 665
 666    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 667       if (instr->slots[i])
 668          size += ppir_codegen_field_size[i];
 669    }
 670
 671    for (int i = 0; i < 2; i++) {
 672       if (instr->constant[i].num)
 673          size += 64;
 674    }
 675
 676    return align_to_word(size) + 1;
 677 }
 678
 679 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
 680 {
 681    int off1 = dst_offset & 0x1f;
 682    uint32_t *cpy_dst = dst, *cpy_src = src;
 683
 684    cpy_dst += (dst_offset >> 5);
 685
 686    if (off1) {
 687       int off2 = 32 - off1;
 688       int cpy_size = 0;
 689       while (1) {
 690          *cpy_dst |= *cpy_src << off1;
 691          cpy_dst++;
 692
 693          cpy_size += off2;
 694          if (cpy_size >= src_size)
 695             break;
 696
 697          *cpy_dst |= *cpy_src >> off2;
 698          cpy_src++;
 699
 700          cpy_size += off1;
 701          if (cpy_size >= src_size)
 702             break;
 703       }
 704    }
 705    else
 706       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
 707 }
 708
 709 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
 710 {
 711    int size = 0;
 712    ppir_codegen_ctrl *ctrl = code;
 713
 714    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 715       if (instr->slots[i]) {
 716          /* max field size (73), align to dword */
 717          uint8_t output[12] = {0};
 718
 719          ppir_codegen_encode_slot[i](instr->slots[i], output);
 720          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
 721
 722          size += ppir_codegen_field_size[i];
 723          ctrl->fields |= 1 << i;
 724       }
 725    }
 726
 727    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
 728       ctrl->sync = true;
 729
 730    if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
 731       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
 732       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
 733          ctrl->sync = true;
 734    }
 735
 736    if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
 737       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
 738       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
 739          ctrl->sync = true;
 740    }
 741
 742    for (int i = 0; i < 2; i++) {
 743       if (instr->constant[i].num) {
 744          uint16_t output[4] = {0};
 745
 746          ppir_codegen_encode_const(instr->constant + i, output);
 747          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
 748
 749          size += 64;
 750          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
 751       }
 752    }
 753
 754    size = align_to_word(size) + 1;
 755
 756    ctrl->count = size;
 757    if (instr->is_end)
 758       ctrl->stop = true;
 759
 760    if (last_code) {
 761       ppir_codegen_ctrl *last_ctrl = last_code;
 762       last_ctrl->next_count = size;
 763       last_ctrl->prefetch = true;
 764    }
 765
 766    return size;
 767 }
 768
 769 static void ppir_codegen_print_prog(ppir_compiler *comp)
 770 {
 771    uint32_t *prog = comp->prog->shader;
 772    unsigned offset = 0;
 773
 774    printf("========ppir codegen========\n");
 775    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 776       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 777          printf("%03d (@%6d): ", instr->index, instr->offset);
 778          int n = prog[0] & 0x1f;
 779          for (int i = 0; i < n; i++) {
 780             if (i && i % 6 == 0)
 781                printf("\n    ");
 782             printf("%08x ", prog[i]);
 783          }
 784          printf("\n");
 785          ppir_disassemble_instr(prog, offset);
 786          prog += n;
 787          offset += n;
 788       }
 789    }
 790    printf("-----------------------\n");
 791 }
 792
 793 bool ppir_codegen_prog(ppir_compiler *comp)
 794 {
 795    int size = 0;
 796    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 797       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 798          instr->offset = size;
 799          instr->encode_size = get_instr_encode_size(instr);
 800          size += instr->encode_size;
 801       }
 802    }
 803
 804    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
 805    if (!prog)
 806       return false;
 807
 808    uint32_t *code = prog, *last_code = NULL;
 809    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 810       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 811          int offset = encode_instr(instr, code, last_code);
 812          last_code = code;
 813          code += offset;
 814       }
 815    }
 816
 817    comp->prog->shader = prog;
 818    comp->prog->shader_size = size * sizeof(uint32_t);
 819
 820    if (lima_debug & LIMA_DEBUG_PP)
 821       ppir_codegen_print_prog(comp);
 822
 823    return true;
 824 }