src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file brw_wm_channel_expressions.cpp
  26  *
  27  * Breaks vector operations down into operations on each component.
  28  *
  29  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  30  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
  31  * ALU operation operates on one of those channel registers.  As a
  32  * result, there is no value to the 965 fragment shader in tracking
  33  * "vector" expressions in the sense of GLSL fragment shaders, when
  34  * doing a channel at a time may help in constant folding, algebraic
  35  * simplification, and reducing the liveness of channel registers.
  36  *
  37  * The exception to the desire to break everything down to floats is
  38  * texturing.  The texture sampler returns a writemasked masked
  39  * 4/8-register sequence containing the texture values.  We don't want
  40  * to dispatch to the sampler separately for each channel we need, so
  41  * we do retain the vector types in that case.
  42  */
  43
  44 #include "brw_program.h"
  45 #include "compiler/glsl/ir.h"
  46 #include "compiler/glsl/ir_expression_flattening.h"
  47 #include "compiler/glsl_types.h"
  48
  49 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  50 public:
  51    ir_channel_expressions_visitor()
  52    {
  53       this->progress = false;
  54       this->mem_ctx = NULL;
  55    }
  56
  57    ir_visitor_status visit_leave(ir_assignment *);
  58
  59    ir_rvalue *get_element(ir_variable *var, unsigned int element);
  60    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  61
  62    bool progress;
  63    void *mem_ctx;
  64 };
  65
  66 static bool
  67 channel_expressions_predicate(ir_instruction *ir)
  68 {
  69    ir_expression *expr = ir->as_expression();
  70    unsigned int i;
  71
  72    if (!expr)
  73       return false;
  74
  75    switch (expr->operation) {
  76       case ir_unop_pack_half_2x16:
  77       case ir_unop_pack_snorm_2x16:
  78       case ir_unop_pack_snorm_4x8:
  79       case ir_unop_pack_unorm_2x16:
  80       case ir_unop_pack_unorm_4x8:
  81          return false;
  82
  83       /* these opcodes need to act on the whole vector,
  84        * just like texturing.
  85        */
  86       case ir_unop_interpolate_at_centroid:
  87       case ir_binop_interpolate_at_offset:
  88       case ir_binop_interpolate_at_sample:
  89       case ir_unop_pack_double_2x32:
  90       case ir_unop_pack_int_2x32:
  91       case ir_unop_pack_uint_2x32:
  92          return false;
  93       default:
  94          break;
  95    }
  96
  97    for (i = 0; i < expr->get_num_operands(); i++) {
  98       if (expr->operands[i]->type->is_vector())
  99          return true;
 100    }
 101
 102    return false;
 103 }
 104
 105 bool
 106 brw_do_channel_expressions(exec_list *instructions)
 107 {
 108    ir_channel_expressions_visitor v;
 109
 110    /* Pull out any matrix expression to a separate assignment to a
 111     * temp.  This will make our handling of the breakdown to
 112     * operations on the matrix's vector components much easier.
 113     */
 114    do_expression_flattening(instructions, channel_expressions_predicate);
 115
 116    visit_list_elements(&v, instructions);
 117
 118    return v.progress;
 119 }
 120
 121 ir_rvalue *
 122 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
 123 {
 124    ir_dereference *deref;
 125
 126    if (var->type->is_scalar())
 127       return new(mem_ctx) ir_dereference_variable(var);
 128
 129    assert(elem < var->type->components());
 130    deref = new(mem_ctx) ir_dereference_variable(var);
 131    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
 132 }
 133
 134 void
 135 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
 136 {
 137    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
 138    ir_assignment *assign;
 139
 140    /* This assign-of-expression should have been generated by the
 141     * expression flattening visitor (since we never short circit to
 142     * not flatten, even for plain assignments of variables), so the
 143     * writemask is always full.
 144     */
 145    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
 146
 147    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
 148    ir->insert_before(assign);
 149 }
 150
 151 ir_visitor_status
 152 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
 153 {
 154    ir_expression *expr = ir->rhs->as_expression();
 155    bool found_vector = false;
 156    unsigned int i, vector_elements = 1;
 157    ir_variable *op_var[4];
 158
 159    if (!expr)
 160       return visit_continue;
 161
 162    if (!this->mem_ctx)
 163       this->mem_ctx = ralloc_parent(ir);
 164
 165    for (i = 0; i < expr->get_num_operands(); i++) {
 166       if (expr->operands[i]->type->is_vector()) {
 167          found_vector = true;
 168          vector_elements = expr->operands[i]->type->vector_elements;
 169          break;
 170       }
 171    }
 172    if (!found_vector)
 173       return visit_continue;
 174
 175    switch (expr->operation) {
 176       case ir_unop_pack_half_2x16:
 177       case ir_unop_pack_snorm_2x16:
 178       case ir_unop_pack_snorm_4x8:
 179       case ir_unop_pack_unorm_2x16:
 180       case ir_unop_pack_unorm_4x8:
 181       case ir_unop_interpolate_at_centroid:
 182       case ir_binop_interpolate_at_offset:
 183       case ir_binop_interpolate_at_sample:
 184       /* We scalarize these in NIR, so no need to do it here */
 185       case ir_unop_pack_double_2x32:
 186       case ir_unop_pack_int_2x32:
 187       case ir_unop_pack_uint_2x32:
 188          return visit_continue;
 189
 190       default:
 191          break;
 192    }
 193
 194    /* Store the expression operands in temps so we can use them
 195     * multiple times.
 196     */
 197    for (i = 0; i < expr->get_num_operands(); i++) {
 198       ir_assignment *assign;
 199       ir_dereference *deref;
 200
 201       assert(!expr->operands[i]->type->is_matrix());
 202
 203       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
 204                                            "channel_expressions",
 205                                            ir_var_temporary);
 206       ir->insert_before(op_var[i]);
 207
 208       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
 209       assign = new(mem_ctx) ir_assignment(deref,
 210                                           expr->operands[i],
 211                                           NULL);
 212       ir->insert_before(assign);
 213    }
 214
 215    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
 216                                                            1, 1);
 217
 218    /* OK, time to break down this vector operation. */
 219    switch (expr->operation) {
 220    case ir_unop_bit_not:
 221    case ir_unop_logic_not:
 222    case ir_unop_neg:
 223    case ir_unop_abs:
 224    case ir_unop_sign:
 225    case ir_unop_rcp:
 226    case ir_unop_rsq:
 227    case ir_unop_sqrt:
 228    case ir_unop_exp:
 229    case ir_unop_log:
 230    case ir_unop_exp2:
 231    case ir_unop_log2:
 232    case ir_unop_bitcast_i2f:
 233    case ir_unop_bitcast_f2i:
 234    case ir_unop_bitcast_f2u:
 235    case ir_unop_bitcast_u2f:
 236    case ir_unop_bitcast_u642d:
 237    case ir_unop_bitcast_i642d:
 238    case ir_unop_bitcast_d2u64:
 239    case ir_unop_bitcast_d2i64:
 240    case ir_unop_i2u:
 241    case ir_unop_u2i:
 242    case ir_unop_f2i:
 243    case ir_unop_f2u:
 244    case ir_unop_i2f:
 245    case ir_unop_f2b:
 246    case ir_unop_b2f:
 247    case ir_unop_i2b:
 248    case ir_unop_b2i:
 249    case ir_unop_u2f:
 250    case ir_unop_d2f:
 251    case ir_unop_f2d:
 252    case ir_unop_d2i:
 253    case ir_unop_i2d:
 254    case ir_unop_d2u:
 255    case ir_unop_u2d:
 256    case ir_unop_d2b:
 257    case ir_unop_i642i:
 258    case ir_unop_u642i:
 259    case ir_unop_i642u:
 260    case ir_unop_u642u:
 261    case ir_unop_i642b:
 262    case ir_unop_i642f:
 263    case ir_unop_u642f:
 264    case ir_unop_i642d:
 265    case ir_unop_u642d:
 266    case ir_unop_i2i64:
 267    case ir_unop_u2i64:
 268    case ir_unop_b2i64:
 269    case ir_unop_f2i64:
 270    case ir_unop_d2i64:
 271    case ir_unop_i2u64:
 272    case ir_unop_u2u64:
 273    case ir_unop_f2u64:
 274    case ir_unop_d2u64:
 275    case ir_unop_u642i64:
 276    case ir_unop_i642u64:
 277    case ir_unop_trunc:
 278    case ir_unop_ceil:
 279    case ir_unop_floor:
 280    case ir_unop_fract:
 281    case ir_unop_round_even:
 282    case ir_unop_sin:
 283    case ir_unop_cos:
 284    case ir_unop_dFdx:
 285    case ir_unop_dFdx_coarse:
 286    case ir_unop_dFdx_fine:
 287    case ir_unop_dFdy:
 288    case ir_unop_dFdy_coarse:
 289    case ir_unop_dFdy_fine:
 290    case ir_unop_bitfield_reverse:
 291    case ir_unop_bit_count:
 292    case ir_unop_find_msb:
 293    case ir_unop_find_lsb:
 294    case ir_unop_saturate:
 295    case ir_unop_subroutine_to_int:
 296       for (i = 0; i < vector_elements; i++) {
 297          ir_rvalue *op0 = get_element(op_var[0], i);
 298
 299          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 300                                                   element_type,
 301                                                   op0,
 302                                                   NULL));
 303       }
 304       break;
 305
 306    case ir_binop_add:
 307    case ir_binop_sub:
 308    case ir_binop_mul:
 309    case ir_binop_imul_high:
 310    case ir_binop_div:
 311    case ir_binop_carry:
 312    case ir_binop_borrow:
 313    case ir_binop_mod:
 314    case ir_binop_min:
 315    case ir_binop_max:
 316    case ir_binop_pow:
 317    case ir_binop_lshift:
 318    case ir_binop_rshift:
 319    case ir_binop_bit_and:
 320    case ir_binop_bit_xor:
 321    case ir_binop_bit_or:
 322    case ir_binop_logic_and:
 323    case ir_binop_logic_xor:
 324    case ir_binop_logic_or:
 325    case ir_binop_less:
 326    case ir_binop_greater:
 327    case ir_binop_lequal:
 328    case ir_binop_gequal:
 329    case ir_binop_equal:
 330    case ir_binop_nequal:
 331    case ir_binop_ldexp:
 332       for (i = 0; i < vector_elements; i++) {
 333          ir_rvalue *op0 = get_element(op_var[0], i);
 334          ir_rvalue *op1 = get_element(op_var[1], i);
 335
 336          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 337                                                   element_type,
 338                                                   op0,
 339                                                   op1));
 340       }
 341       break;
 342
 343    case ir_binop_dot: {
 344       ir_expression *last = NULL;
 345       for (i = 0; i < vector_elements; i++) {
 346          ir_rvalue *op0 = get_element(op_var[0], i);
 347          ir_rvalue *op1 = get_element(op_var[1], i);
 348          ir_expression *temp;
 349
 350          temp = new(mem_ctx) ir_expression(ir_binop_mul,
 351                                            element_type,
 352                                            op0,
 353                                            op1);
 354          if (last) {
 355             last = new(mem_ctx) ir_expression(ir_binop_add,
 356                                               element_type,
 357                                               temp,
 358                                               last);
 359          } else {
 360             last = temp;
 361          }
 362       }
 363       assign(ir, 0, last);
 364       break;
 365    }
 366
 367    case ir_binop_all_equal:
 368    case ir_binop_any_nequal: {
 369       ir_expression *last = NULL;
 370       for (i = 0; i < vector_elements; i++) {
 371          ir_rvalue *op0 = get_element(op_var[0], i);
 372          ir_rvalue *op1 = get_element(op_var[1], i);
 373          ir_expression *temp;
 374          ir_expression_operation join;
 375
 376          if (expr->operation == ir_binop_all_equal)
 377             join = ir_binop_logic_and;
 378          else
 379             join = ir_binop_logic_or;
 380
 381          temp = new(mem_ctx) ir_expression(expr->operation,
 382                                            element_type,
 383                                            op0,
 384                                            op1);
 385          if (last) {
 386             last = new(mem_ctx) ir_expression(join,
 387                                               element_type,
 388                                               temp,
 389                                               last);
 390          } else {
 391             last = temp;
 392          }
 393       }
 394       assign(ir, 0, last);
 395       break;
 396    }
 397    case ir_unop_noise:
 398       unreachable("noise should have been broken down to function call");
 399
 400    case ir_binop_ubo_load:
 401    case ir_unop_get_buffer_size:
 402       unreachable("not yet supported");
 403
 404    case ir_triop_fma:
 405    case ir_triop_lrp:
 406    case ir_triop_csel:
 407    case ir_triop_bitfield_extract:
 408       for (i = 0; i < vector_elements; i++) {
 409          ir_rvalue *op0 = get_element(op_var[0], i);
 410          ir_rvalue *op1 = get_element(op_var[1], i);
 411          ir_rvalue *op2 = get_element(op_var[2], i);
 412
 413          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 414                                                   element_type,
 415                                                   op0,
 416                                                   op1,
 417                                                   op2));
 418       }
 419       break;
 420
 421    case ir_quadop_bitfield_insert:
 422       for (i = 0; i < vector_elements; i++) {
 423          ir_rvalue *op0 = get_element(op_var[0], i);
 424          ir_rvalue *op1 = get_element(op_var[1], i);
 425          ir_rvalue *op2 = get_element(op_var[2], i);
 426          ir_rvalue *op3 = get_element(op_var[3], i);
 427
 428          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 429                                                   element_type,
 430                                                   op0,
 431                                                   op1,
 432                                                   op2,
 433                                                   op3));
 434       }
 435       break;
 436
 437    case ir_unop_pack_snorm_2x16:
 438    case ir_unop_pack_snorm_4x8:
 439    case ir_unop_pack_unorm_2x16:
 440    case ir_unop_pack_unorm_4x8:
 441    case ir_unop_pack_half_2x16:
 442    case ir_unop_unpack_snorm_2x16:
 443    case ir_unop_unpack_snorm_4x8:
 444    case ir_unop_unpack_unorm_2x16:
 445    case ir_unop_unpack_unorm_4x8:
 446    case ir_unop_unpack_half_2x16:
 447    case ir_binop_vector_extract:
 448    case ir_triop_vector_insert:
 449    case ir_quadop_vector:
 450    case ir_unop_ssbo_unsized_array_length:
 451       unreachable("should have been lowered");
 452
 453    case ir_unop_interpolate_at_centroid:
 454    case ir_binop_interpolate_at_offset:
 455    case ir_binop_interpolate_at_sample:
 456    case ir_unop_unpack_double_2x32:
 457       unreachable("not reached: expression operates on scalars only");
 458
 459    case ir_unop_pack_double_2x32:
 460    case ir_unop_pack_int_2x32:
 461    case ir_unop_pack_uint_2x32:
 462       unreachable("not reached: to be lowered in NIR, should've been skipped");
 463
 464    case ir_unop_frexp_sig:
 465    case ir_unop_frexp_exp:
 466       unreachable("should have been lowered by lower_instructions");
 467
 468    case ir_unop_vote_any:
 469    case ir_unop_vote_all:
 470    case ir_unop_vote_eq:
 471    case ir_unop_unpack_int_2x32:
 472    case ir_unop_unpack_uint_2x32:
 473       unreachable("unsupported");
 474    }
 475
 476    ir->remove();
 477    this->progress = true;
 478
 479    return visit_continue;
 480 }