src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file brw_wm_channel_expressions.cpp
  26  *
  27  * Breaks vector operations down into operations on each component.
  28  *
  29  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  30  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
  31  * ALU operation operates on one of those channel registers.  As a
  32  * result, there is no value to the 965 fragment shader in tracking
  33  * "vector" expressions in the sense of GLSL fragment shaders, when
  34  * doing a channel at a time may help in constant folding, algebraic
  35  * simplification, and reducing the liveness of channel registers.
  36  *
  37  * The exception to the desire to break everything down to floats is
  38  * texturing.  The texture sampler returns a writemasked masked
  39  * 4/8-register sequence containing the texture values.  We don't want
  40  * to dispatch to the sampler separately for each channel we need, so
  41  * we do retain the vector types in that case.
  42  */
  43
  44 extern "C" {
  45 #include "main/core.h"
  46 #include "brw_wm.h"
  47 }
  48 #include "glsl/ir.h"
  49 #include "glsl/ir_expression_flattening.h"
  50 #include "glsl/glsl_types.h"
  51
  52 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  53 public:
  54    ir_channel_expressions_visitor()
  55    {
  56       this->progress = false;
  57       this->mem_ctx = NULL;
  58    }
  59
  60    ir_visitor_status visit_leave(ir_assignment *);
  61
  62    ir_rvalue *get_element(ir_variable *var, unsigned int element);
  63    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  64
  65    bool progress;
  66    void *mem_ctx;
  67 };
  68
  69 static bool
  70 channel_expressions_predicate(ir_instruction *ir)
  71 {
  72    ir_expression *expr = ir->as_expression();
  73    unsigned int i;
  74
  75    if (!expr)
  76       return false;
  77
  78    for (i = 0; i < expr->get_num_operands(); i++) {
  79       if (expr->operands[i]->type->is_vector())
  80          return true;
  81    }
  82
  83    return false;
  84 }
  85
  86 bool
  87 brw_do_channel_expressions(exec_list *instructions)
  88 {
  89    ir_channel_expressions_visitor v;
  90
  91    /* Pull out any matrix expression to a separate assignment to a
  92     * temp.  This will make our handling of the breakdown to
  93     * operations on the matrix's vector components much easier.
  94     */
  95    do_expression_flattening(instructions, channel_expressions_predicate);
  96
  97    visit_list_elements(&v, instructions);
  98
  99    return v.progress;
 100 }
 101
 102 ir_rvalue *
 103 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
 104 {
 105    ir_dereference *deref;
 106
 107    if (var->type->is_scalar())
 108       return new(mem_ctx) ir_dereference_variable(var);
 109
 110    assert(elem < var->type->components());
 111    deref = new(mem_ctx) ir_dereference_variable(var);
 112    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
 113 }
 114
 115 void
 116 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
 117 {
 118    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
 119    ir_assignment *assign;
 120
 121    /* This assign-of-expression should have been generated by the
 122     * expression flattening visitor (since we never short circit to
 123     * not flatten, even for plain assignments of variables), so the
 124     * writemask is always full.
 125     */
 126    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
 127
 128    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
 129    ir->insert_before(assign);
 130 }
 131
 132 ir_visitor_status
 133 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
 134 {
 135    ir_expression *expr = ir->rhs->as_expression();
 136    bool found_vector = false;
 137    unsigned int i, vector_elements = 1;
 138    ir_variable *op_var[3];
 139
 140    if (!expr)
 141       return visit_continue;
 142
 143    if (!this->mem_ctx)
 144       this->mem_ctx = ralloc_parent(ir);
 145
 146    for (i = 0; i < expr->get_num_operands(); i++) {
 147       if (expr->operands[i]->type->is_vector()) {
 148          found_vector = true;
 149          vector_elements = expr->operands[i]->type->vector_elements;
 150          break;
 151       }
 152    }
 153    if (!found_vector)
 154       return visit_continue;
 155
 156    /* Store the expression operands in temps so we can use them
 157     * multiple times.
 158     */
 159    for (i = 0; i < expr->get_num_operands(); i++) {
 160       ir_assignment *assign;
 161       ir_dereference *deref;
 162
 163       assert(!expr->operands[i]->type->is_matrix());
 164
 165       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
 166                                            "channel_expressions",
 167                                            ir_var_temporary);
 168       ir->insert_before(op_var[i]);
 169
 170       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
 171       assign = new(mem_ctx) ir_assignment(deref,
 172                                           expr->operands[i],
 173                                           NULL);
 174       ir->insert_before(assign);
 175    }
 176
 177    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
 178                                                            1, 1);
 179
 180    /* OK, time to break down this vector operation. */
 181    switch (expr->operation) {
 182    case ir_unop_bit_not:
 183    case ir_unop_logic_not:
 184    case ir_unop_neg:
 185    case ir_unop_abs:
 186    case ir_unop_sign:
 187    case ir_unop_rcp:
 188    case ir_unop_rsq:
 189    case ir_unop_sqrt:
 190    case ir_unop_exp:
 191    case ir_unop_log:
 192    case ir_unop_exp2:
 193    case ir_unop_log2:
 194    case ir_unop_bitcast_i2f:
 195    case ir_unop_bitcast_f2i:
 196    case ir_unop_bitcast_f2u:
 197    case ir_unop_bitcast_u2f:
 198    case ir_unop_i2u:
 199    case ir_unop_u2i:
 200    case ir_unop_f2i:
 201    case ir_unop_f2u:
 202    case ir_unop_i2f:
 203    case ir_unop_f2b:
 204    case ir_unop_b2f:
 205    case ir_unop_i2b:
 206    case ir_unop_b2i:
 207    case ir_unop_u2f:
 208    case ir_unop_trunc:
 209    case ir_unop_ceil:
 210    case ir_unop_floor:
 211    case ir_unop_fract:
 212    case ir_unop_round_even:
 213    case ir_unop_sin:
 214    case ir_unop_cos:
 215    case ir_unop_sin_reduced:
 216    case ir_unop_cos_reduced:
 217    case ir_unop_dFdx:
 218    case ir_unop_dFdy:
 219    case ir_unop_bitfield_reverse:
 220    case ir_unop_bit_count:
 221    case ir_unop_find_msb:
 222    case ir_unop_find_lsb:
 223       for (i = 0; i < vector_elements; i++) {
 224          ir_rvalue *op0 = get_element(op_var[0], i);
 225
 226          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 227                                                   element_type,
 228                                                   op0,
 229                                                   NULL));
 230       }
 231       break;
 232
 233    case ir_binop_add:
 234    case ir_binop_sub:
 235    case ir_binop_mul:
 236    case ir_binop_imul_high:
 237    case ir_binop_div:
 238    case ir_binop_carry:
 239    case ir_binop_borrow:
 240    case ir_binop_mod:
 241    case ir_binop_min:
 242    case ir_binop_max:
 243    case ir_binop_pow:
 244    case ir_binop_lshift:
 245    case ir_binop_rshift:
 246    case ir_binop_bit_and:
 247    case ir_binop_bit_xor:
 248    case ir_binop_bit_or:
 249    case ir_binop_less:
 250    case ir_binop_greater:
 251    case ir_binop_lequal:
 252    case ir_binop_gequal:
 253    case ir_binop_equal:
 254    case ir_binop_nequal:
 255       for (i = 0; i < vector_elements; i++) {
 256          ir_rvalue *op0 = get_element(op_var[0], i);
 257          ir_rvalue *op1 = get_element(op_var[1], i);
 258
 259          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 260                                                   element_type,
 261                                                   op0,
 262                                                   op1));
 263       }
 264       break;
 265
 266    case ir_unop_any: {
 267       ir_expression *temp;
 268       temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
 269                                         element_type,
 270                                         get_element(op_var[0], 0),
 271                                         get_element(op_var[0], 1));
 272
 273       for (i = 2; i < vector_elements; i++) {
 274          temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
 275                                            element_type,
 276                                            get_element(op_var[0], i),
 277                                            temp);
 278       }
 279       assign(ir, 0, temp);
 280       break;
 281    }
 282
 283    case ir_binop_dot: {
 284       ir_expression *last = NULL;
 285       for (i = 0; i < vector_elements; i++) {
 286          ir_rvalue *op0 = get_element(op_var[0], i);
 287          ir_rvalue *op1 = get_element(op_var[1], i);
 288          ir_expression *temp;
 289
 290          temp = new(mem_ctx) ir_expression(ir_binop_mul,
 291                                            element_type,
 292                                            op0,
 293                                            op1);
 294          if (last) {
 295             last = new(mem_ctx) ir_expression(ir_binop_add,
 296                                               element_type,
 297                                               temp,
 298                                               last);
 299          } else {
 300             last = temp;
 301          }
 302       }
 303       assign(ir, 0, last);
 304       break;
 305    }
 306
 307    case ir_binop_logic_and:
 308    case ir_binop_logic_xor:
 309    case ir_binop_logic_or:
 310       ir->fprint(stderr);
 311       fprintf(stderr, "\n");
 312       assert(!"not reached: expression operates on scalars only");
 313       break;
 314    case ir_binop_all_equal:
 315    case ir_binop_any_nequal: {
 316       ir_expression *last = NULL;
 317       for (i = 0; i < vector_elements; i++) {
 318          ir_rvalue *op0 = get_element(op_var[0], i);
 319          ir_rvalue *op1 = get_element(op_var[1], i);
 320          ir_expression *temp;
 321          ir_expression_operation join;
 322
 323          if (expr->operation == ir_binop_all_equal)
 324             join = ir_binop_logic_and;
 325          else
 326             join = ir_binop_logic_or;
 327
 328          temp = new(mem_ctx) ir_expression(expr->operation,
 329                                            element_type,
 330                                            op0,
 331                                            op1);
 332          if (last) {
 333             last = new(mem_ctx) ir_expression(join,
 334                                               element_type,
 335                                               temp,
 336                                               last);
 337          } else {
 338             last = temp;
 339          }
 340       }
 341       assign(ir, 0, last);
 342       break;
 343    }
 344    case ir_unop_noise:
 345       assert(!"noise should have been broken down to function call");
 346       break;
 347
 348    case ir_binop_bfm: {
 349       /* Does not need to be scalarized, since its result will be identical
 350        * for all channels.
 351        */
 352       ir_rvalue *op0 = get_element(op_var[0], 0);
 353       ir_rvalue *op1 = get_element(op_var[1], 0);
 354
 355       assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
 356                                                element_type,
 357                                                op0,
 358                                                op1));
 359       break;
 360    }
 361
 362    case ir_binop_ubo_load:
 363       assert(!"not yet supported");
 364       break;
 365
 366    case ir_triop_fma:
 367    case ir_triop_lrp:
 368    case ir_triop_csel:
 369    case ir_triop_bitfield_extract:
 370       for (i = 0; i < vector_elements; i++) {
 371          ir_rvalue *op0 = get_element(op_var[0], i);
 372          ir_rvalue *op1 = get_element(op_var[1], i);
 373          ir_rvalue *op2 = get_element(op_var[2], i);
 374
 375          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 376                                                   element_type,
 377                                                   op0,
 378                                                   op1,
 379                                                   op2));
 380       }
 381       break;
 382
 383    case ir_triop_bfi: {
 384       /* Only a single BFM is needed for multiple BFIs. */
 385       ir_rvalue *op0 = get_element(op_var[0], 0);
 386
 387       for (i = 0; i < vector_elements; i++) {
 388          ir_rvalue *op1 = get_element(op_var[1], i);
 389          ir_rvalue *op2 = get_element(op_var[2], i);
 390
 391          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
 392                                                   element_type,
 393                                                   op0->clone(mem_ctx, NULL),
 394                                                   op1,
 395                                                   op2));
 396       }
 397       break;
 398    }
 399
 400    case ir_unop_pack_snorm_2x16:
 401    case ir_unop_pack_snorm_4x8:
 402    case ir_unop_pack_unorm_2x16:
 403    case ir_unop_pack_unorm_4x8:
 404    case ir_unop_pack_half_2x16:
 405    case ir_unop_unpack_snorm_2x16:
 406    case ir_unop_unpack_snorm_4x8:
 407    case ir_unop_unpack_unorm_2x16:
 408    case ir_unop_unpack_unorm_4x8:
 409    case ir_unop_unpack_half_2x16:
 410    case ir_binop_ldexp:
 411    case ir_binop_vector_extract:
 412    case ir_triop_vector_insert:
 413    case ir_quadop_bitfield_insert:
 414    case ir_quadop_vector:
 415       assert(!"should have been lowered");
 416       break;
 417
 418    case ir_unop_unpack_half_2x16_split_x:
 419    case ir_unop_unpack_half_2x16_split_y:
 420    case ir_binop_pack_half_2x16_split:
 421       assert(!"not reached: expression operates on scalars only");
 422       break;
 423    }
 424
 425    ir->remove();
 426    this->progress = true;
 427
 428    return visit_continue;
 429 }