src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file brw_wm_channel_expressions.cpp
  26  *
  27  * Breaks vector operations down into operations on each component.
  28  *
  29  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  30  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
  31  * ALU operation operates on one of those channel registers.  As a
  32  * result, there is no value to the 965 fragment shader in tracking
  33  * "vector" expressions in the sense of GLSL fragment shaders, when
  34  * doing a channel at a time may help in constant folding, algebraic
  35  * simplification, and reducing the liveness of channel registers.
  36  *
  37  * The exception to the desire to break everything down to floats is
  38  * texturing.  The texture sampler returns a writemasked masked
  39  * 4/8-register sequence containing the texture values.  We don't want
  40  * to dispatch to the sampler separately for each channel we need, so
  41  * we do retain the vector types in that case.
  42  */
  43
  44 #include "main/core.h"
  45 #include "brw_wm.h"
  46 #include "glsl/ir.h"
  47 #include "glsl/ir_expression_flattening.h"
  48 #include "glsl/nir/glsl_types.h"
  49
  50 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  51 public:
  52    ir_channel_expressions_visitor()
  53    {
  54       this->progress = false;
  55       this->mem_ctx = NULL;
  56    }
  57
  58    ir_visitor_status visit_leave(ir_assignment *);
  59
  60    ir_rvalue *get_element(ir_variable *var, unsigned int element);
  61    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  62
  63    bool progress;
  64    void *mem_ctx;
  65 };
  66
  67 static bool
  68 channel_expressions_predicate(ir_instruction *ir)
  69 {
  70    ir_expression *expr = ir->as_expression();
  71    unsigned int i;
  72
  73    if (!expr)
  74       return false;
  75
  76    switch (expr->operation) {
  77       /* these opcodes need to act on the whole vector,
  78        * just like texturing.
  79        */
  80       case ir_unop_interpolate_at_centroid:
  81       case ir_binop_interpolate_at_offset:
  82       case ir_binop_interpolate_at_sample:
  83          return false;
  84       default:
  85          break;
  86    }
  87
  88    for (i = 0; i < expr->get_num_operands(); i++) {
  89       if (expr->operands[i]->type->is_vector())
  90          return true;
  91    }
  92
  93    return false;
  94 }
  95
  96 bool
  97 brw_do_channel_expressions(exec_list *instructions)
  98 {
  99    ir_channel_expressions_visitor v;
 100
 101    /* Pull out any matrix expression to a separate assignment to a
 102     * temp.  This will make our handling of the breakdown to
 103     * operations on the matrix's vector components much easier.
 104     */
 105    do_expression_flattening(instructions, channel_expressions_predicate);
 106
 107    visit_list_elements(&v, instructions);
 108
 109    return v.progress;
 110 }
 111
 112 ir_rvalue *
 113 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
 114 {
 115    ir_dereference *deref;
 116
 117    if (var->type->is_scalar())
 118       return new(mem_ctx) ir_dereference_variable(var);
 119
 120    assert(elem < var->type->components());
 121    deref = new(mem_ctx) ir_dereference_variable(var);
 122    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
 123 }
 124
 125 void
 126 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
 127 {
 128    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
 129    ir_assignment *assign;
 130
 131    /* This assign-of-expression should have been generated by the
 132     * expression flattening visitor (since we never short circit to
 133     * not flatten, even for plain assignments of variables), so the
 134     * writemask is always full.
 135     */
 136    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
 137
 138    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
 139    ir->insert_before(assign);
 140 }
 141
 142 ir_visitor_status
 143 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
 144 {
 145    ir_expression *expr = ir->rhs->as_expression();
 146    bool found_vector = false;
 147    unsigned int i, vector_elements = 1;
 148    ir_variable *op_var[3];
 149
 150    if (!expr)
 151       return visit_continue;
 152
 153    if (!this->mem_ctx)
 154       this->mem_ctx = ralloc_parent(ir);
 155
 156    for (i = 0; i < expr->get_num_operands(); i++) {
 157       if (expr->operands[i]->type->is_vector()) {
 158          found_vector = true;
 159          vector_elements = expr->operands[i]->type->vector_elements;
 160          break;
 161       }
 162    }
 163    if (!found_vector)
 164       return visit_continue;
 165
 166    switch (expr->operation) {
 167       case ir_unop_interpolate_at_centroid:
 168       case ir_binop_interpolate_at_offset:
 169       case ir_binop_interpolate_at_sample:
 170          return visit_continue;
 171
 172       default:
 173          break;
 174    }
 175
 176    /* Store the expression operands in temps so we can use them
 177     * multiple times.
 178     */
 179    for (i = 0; i < expr->get_num_operands(); i++) {
 180       ir_assignment *assign;
 181       ir_dereference *deref;
 182
 183       assert(!expr->operands[i]->type->is_matrix());
 184
 185       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
 186                                            "channel_expressions",
 187                                            ir_var_temporary);
 188       ir->insert_before(op_var[i]);
 189
 190       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
 191       assign = new(mem_ctx) ir_assignment(deref,
 192                                           expr->operands[i],
 193                                           NULL);
 194       ir->insert_before(assign);
 195    }
 196
 197    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
 198                                                            1, 1);
 199
 200    /* OK, time to break down this vector operation. */
 201    switch (expr->operation) {
 202    case ir_unop_bit_not:
 203    case ir_unop_logic_not:
 204    case ir_unop_neg:
 205    case ir_unop_abs:
 206    case ir_unop_sign:
 207    case ir_unop_rcp:
 208    case ir_unop_rsq:
 209    case ir_unop_sqrt:
 210    case ir_unop_exp:
 211    case ir_unop_log:
 212    case ir_unop_exp2:
 213    case ir_unop_log2:
 214    case ir_unop_bitcast_i2f:
 215    case ir_unop_bitcast_f2i:
 216    case ir_unop_bitcast_f2u:
 217    case ir_unop_bitcast_u2f:
 218    case ir_unop_i2u:
 219    case ir_unop_u2i:
 220    case ir_unop_f2i:
 221    case ir_unop_f2u:
 222    case ir_unop_i2f:
 223    case ir_unop_f2b:
 224    case ir_unop_b2f:
 225    case ir_unop_i2b:
 226    case ir_unop_b2i:
 227    case ir_unop_u2f:
 228    case ir_unop_trunc:
 229    case ir_unop_ceil:
 230    case ir_unop_floor:
 231    case ir_unop_fract:
 232    case ir_unop_round_even:
 233    case ir_unop_sin:
 234    case ir_unop_cos:
 235    case ir_unop_dFdx:
 236    case ir_unop_dFdx_coarse:
 237    case ir_unop_dFdx_fine:
 238    case ir_unop_dFdy:
 239    case ir_unop_dFdy_coarse:
 240    case ir_unop_dFdy_fine:
 241    case ir_unop_bitfield_reverse:
 242    case ir_unop_bit_count:
 243    case ir_unop_find_msb:
 244    case ir_unop_find_lsb:
 245    case ir_unop_saturate:
 246    case ir_unop_subroutine_to_int:
 247       for (i = 0; i < vector_elements; i++) {
 248          ir_rvalue *op0 = get_element(op_var[0], i);
 249
 250          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 251                                                   element_type,
 252                                                   op0,
 253                                                   NULL));
 254       }
 255       break;
 256
 257    case ir_binop_add:
 258    case ir_binop_sub:
 259    case ir_binop_mul:
 260    case ir_binop_imul_high:
 261    case ir_binop_div:
 262    case ir_binop_carry:
 263    case ir_binop_borrow:
 264    case ir_binop_mod:
 265    case ir_binop_min:
 266    case ir_binop_max:
 267    case ir_binop_pow:
 268    case ir_binop_lshift:
 269    case ir_binop_rshift:
 270    case ir_binop_bit_and:
 271    case ir_binop_bit_xor:
 272    case ir_binop_bit_or:
 273    case ir_binop_logic_and:
 274    case ir_binop_logic_xor:
 275    case ir_binop_logic_or:
 276    case ir_binop_less:
 277    case ir_binop_greater:
 278    case ir_binop_lequal:
 279    case ir_binop_gequal:
 280    case ir_binop_equal:
 281    case ir_binop_nequal:
 282       for (i = 0; i < vector_elements; i++) {
 283          ir_rvalue *op0 = get_element(op_var[0], i);
 284          ir_rvalue *op1 = get_element(op_var[1], i);
 285
 286          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 287                                                   element_type,
 288                                                   op0,
 289                                                   op1));
 290       }
 291       break;
 292
 293    case ir_unop_any: {
 294       ir_expression *temp;
 295       temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
 296                                         element_type,
 297                                         get_element(op_var[0], 0),
 298                                         get_element(op_var[0], 1));
 299
 300       for (i = 2; i < vector_elements; i++) {
 301          temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
 302                                            element_type,
 303                                            get_element(op_var[0], i),
 304                                            temp);
 305       }
 306       assign(ir, 0, temp);
 307       break;
 308    }
 309
 310    case ir_binop_dot: {
 311       ir_expression *last = NULL;
 312       for (i = 0; i < vector_elements; i++) {
 313          ir_rvalue *op0 = get_element(op_var[0], i);
 314          ir_rvalue *op1 = get_element(op_var[1], i);
 315          ir_expression *temp;
 316
 317          temp = new(mem_ctx) ir_expression(ir_binop_mul,
 318                                            element_type,
 319                                            op0,
 320                                            op1);
 321          if (last) {
 322             last = new(mem_ctx) ir_expression(ir_binop_add,
 323                                               element_type,
 324                                               temp,
 325                                               last);
 326          } else {
 327             last = temp;
 328          }
 329       }
 330       assign(ir, 0, last);
 331       break;
 332    }
 333
 334    case ir_binop_all_equal:
 335    case ir_binop_any_nequal: {
 336       ir_expression *last = NULL;
 337       for (i = 0; i < vector_elements; i++) {
 338          ir_rvalue *op0 = get_element(op_var[0], i);
 339          ir_rvalue *op1 = get_element(op_var[1], i);
 340          ir_expression *temp;
 341          ir_expression_operation join;
 342
 343          if (expr->operation == ir_binop_all_equal)
 344             join = ir_binop_logic_and;
 345          else
 346             join = ir_binop_logic_or;
 347
 348          temp = new(mem_ctx) ir_expression(expr->operation,
 349                                            element_type,
 350                                            op0,
 351                                            op1);
 352          if (last) {
 353             last = new(mem_ctx) ir_expression(join,
 354                                               element_type,
 355                                               temp,
 356                                               last);
 357          } else {
 358             last = temp;
 359          }
 360       }
 361       assign(ir, 0, last);
 362       break;
 363    }
 364    case ir_unop_noise:
 365       unreachable("noise should have been broken down to function call");
 366
 367    case ir_binop_bfm: {
 368       /* Does not need to be scalarized, since its result will be identical
 369        * for all channels.
 370        */
 371       ir_rvalue *op0 = get_element(op_var[0], 0);
 372       ir_rvalue *op1 = get_element(op_var[1], 0);
 373
 374       assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
 375                                                element_type,
 376                                                op0,
 377                                                op1));
 378       break;
 379    }
 380
 381    case ir_binop_ubo_load:
 382    case ir_unop_get_buffer_size:
 383       unreachable("not yet supported");
 384
 385    case ir_triop_fma:
 386    case ir_triop_lrp:
 387    case ir_triop_csel:
 388    case ir_triop_bitfield_extract:
 389       for (i = 0; i < vector_elements; i++) {
 390          ir_rvalue *op0 = get_element(op_var[0], i);
 391          ir_rvalue *op1 = get_element(op_var[1], i);
 392          ir_rvalue *op2 = get_element(op_var[2], i);
 393
 394          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 395                                                   element_type,
 396                                                   op0,
 397                                                   op1,
 398                                                   op2));
 399       }
 400       break;
 401
 402    case ir_triop_bfi: {
 403       /* Only a single BFM is needed for multiple BFIs. */
 404       ir_rvalue *op0 = get_element(op_var[0], 0);
 405
 406       for (i = 0; i < vector_elements; i++) {
 407          ir_rvalue *op1 = get_element(op_var[1], i);
 408          ir_rvalue *op2 = get_element(op_var[2], i);
 409
 410          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 411                                                   element_type,
 412                                                   op0->clone(mem_ctx, NULL),
 413                                                   op1,
 414                                                   op2));
 415       }
 416       break;
 417    }
 418
 419    case ir_unop_pack_snorm_2x16:
 420    case ir_unop_pack_snorm_4x8:
 421    case ir_unop_pack_unorm_2x16:
 422    case ir_unop_pack_unorm_4x8:
 423    case ir_unop_pack_half_2x16:
 424    case ir_unop_unpack_snorm_2x16:
 425    case ir_unop_unpack_snorm_4x8:
 426    case ir_unop_unpack_unorm_2x16:
 427    case ir_unop_unpack_unorm_4x8:
 428    case ir_unop_unpack_half_2x16:
 429    case ir_binop_ldexp:
 430    case ir_binop_vector_extract:
 431    case ir_triop_vector_insert:
 432    case ir_quadop_bitfield_insert:
 433    case ir_quadop_vector:
 434    case ir_unop_ssbo_unsized_array_length:
 435       unreachable("should have been lowered");
 436
 437    case ir_unop_unpack_half_2x16_split_x:
 438    case ir_unop_unpack_half_2x16_split_y:
 439    case ir_binop_pack_half_2x16_split:
 440    case ir_unop_interpolate_at_centroid:
 441    case ir_binop_interpolate_at_offset:
 442    case ir_binop_interpolate_at_sample:
 443       unreachable("not reached: expression operates on scalars only");
 444
 445    case ir_unop_pack_double_2x32:
 446    case ir_unop_unpack_double_2x32:
 447    case ir_unop_frexp_sig:
 448    case ir_unop_frexp_exp:
 449    case ir_unop_d2f:
 450    case ir_unop_f2d:
 451    case ir_unop_d2i:
 452    case ir_unop_i2d:
 453    case ir_unop_d2u:
 454    case ir_unop_u2d:
 455    case ir_unop_d2b:
 456       unreachable("no fp64 support yet");
 457    }
 458
 459    ir->remove();
 460    this->progress = true;
 461
 462    return visit_continue;
 463 }