src/compiler/glsl/lower_int64.cpp

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_int64.cpp
  26  *
  27  * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
  28  * to a uvec2.  For each operation that can be lowered, there is a function
  29  * called __builtin_foo with the same number of parameters that takes uvec2
  30  * sources and produces uvec2 results.  An operation like
  31  *
  32  *     uint64_t(x) * uint64_t(y)
  33  *
  34  * becomes
  35  *
  36  *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
  37  */
  38
  39 #include "main/macros.h"
  40 #include "compiler/glsl_types.h"
  41 #include "ir.h"
  42 #include "ir_rvalue_visitor.h"
  43 #include "ir_builder.h"
  44 #include "ir_optimization.h"
  45 #include "util/hash_table.h"
  46 #include "builtin_functions.h"
  47
  48 typedef ir_function_signature *(*function_generator)(void *mem_ctx,
  49                                                      builtin_available_predicate avail);
  50
  51 using namespace ir_builder;
  52
  53 namespace lower_64bit {
  54 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
  55
  56 ir_dereference_variable *compact_destination(ir_factory &,
  57                                              const glsl_type *type,
  58                                              ir_variable *result[4]);
  59
  60 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
  61                                      ir_expression *ir,
  62                                      ir_function_signature *callee);
  63 };
  64
  65 using namespace lower_64bit;
  66
  67 namespace {
  68
  69 class lower_64bit_visitor : public ir_rvalue_visitor {
  70 public:
  71    lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
  72       : progress(false), lower(lower), instructions(instructions),
  73         function_list(), added_functions(&function_list, mem_ctx)
  74    {
  75       functions = _mesa_hash_table_create(mem_ctx,
  76                                           _mesa_key_hash_string,
  77                                           _mesa_key_string_equal);
  78
  79       foreach_in_list(ir_instruction, node, instructions) {
  80          ir_function *const f = node->as_function();
  81
  82          if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
  83             continue;
  84
  85          add_function(f);
  86       }
  87    }
  88
  89    ~lower_64bit_visitor()
  90    {
  91       _mesa_hash_table_destroy(functions, NULL);
  92    }
  93
  94    void handle_rvalue(ir_rvalue **rvalue);
  95
  96    void add_function(ir_function *f)
  97    {
  98       _mesa_hash_table_insert(functions, f->name, f);
  99    }
 100
 101    ir_function *find_function(const char *name)
 102    {
 103       struct hash_entry *const entry =
 104          _mesa_hash_table_search(functions, name);
 105
 106       return entry != NULL ? (ir_function *) entry->data : NULL;
 107    }
 108
 109    bool progress;
 110
 111 private:
 112    unsigned lower; /** Bitfield of which operations to lower */
 113
 114    exec_list *instructions;
 115
 116    /** Hashtable containing all of the known functions in the IR */
 117    struct hash_table *functions;
 118
 119 public:
 120    exec_list function_list;
 121
 122 private:
 123    ir_factory added_functions;
 124
 125    ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
 126                         function_generator generator);
 127 };
 128
 129 } /* anonymous namespace */
 130
 131 /**
 132  * Determine if a particular type of lowering should occur
 133  */
 134 #define lowering(x) (this->lower & x)
 135
 136 bool
 137 lower_64bit_integer_instructions(exec_list *instructions,
 138                                  unsigned what_to_lower)
 139 {
 140    if (instructions->is_empty())
 141       return false;
 142
 143    ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
 144    void *const mem_ctx = ralloc_parent(first_inst);
 145    lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
 146
 147    visit_list_elements(&v, instructions);
 148
 149    if (v.progress && !v.function_list.is_empty()) {
 150       /* Move all of the nodes from function_list to the head if the incoming
 151        * instruction list.
 152        */
 153       exec_node *const after = &instructions->head_sentinel;
 154       exec_node *const before = instructions->head_sentinel.next;
 155       exec_node *const head = v.function_list.head_sentinel.next;
 156       exec_node *const tail = v.function_list.tail_sentinel.prev;
 157
 158       before->next = head;
 159       head->prev = before;
 160
 161       after->prev = tail;
 162       tail->next = after;
 163    }
 164
 165    return v.progress;
 166 }
 167
 168
 169 /**
 170  * Expand individual 64-bit values to uvec2 values
 171  *
 172  * Each operation is in one of a few forms.
 173  *
 174  *     vector op vector
 175  *     vector op scalar
 176  *     scalar op vector
 177  *     scalar op scalar
 178  *
 179  * In the 'vector op vector' case, the two vectors must have the same size.
 180  * In a way, the 'scalar op scalar' form is special case of the 'vector op
 181  * vector' form.
 182  *
 183  * This method generates a new set of uvec2 values for each element of a
 184  * single operand.  If the operand is a scalar, the uvec2 is replicated
 185  * multiple times.  A value like
 186  *
 187  *     u64vec3(a) + u64vec3(b)
 188  *
 189  * becomes
 190  *
 191  *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
 192  *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
 193  *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
 194  *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
 195  *
 196  * and the returned operands array contains ir_variable pointers to
 197  *
 198  *     { tmp1, tmp2, tmp3, tmp1 }
 199  */
 200 void
 201 lower_64bit::expand_source(ir_factory &body,
 202                            ir_rvalue *val,
 203                            ir_variable **expanded_src)
 204 {
 205    assert(val->type->is_integer_64());
 206
 207    ir_variable *const temp = body.make_temp(val->type, "tmp");
 208
 209    body.emit(assign(temp, val));
 210
 211    const ir_expression_operation unpack_opcode =
 212       val->type->base_type == GLSL_TYPE_UINT64
 213       ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
 214
 215    const glsl_type *const type =
 216       val->type->base_type == GLSL_TYPE_UINT64
 217       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
 218
 219    unsigned i;
 220    for (i = 0; i < val->type->vector_elements; i++) {
 221       expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
 222
 223       body.emit(assign(expanded_src[i],
 224                        expr(unpack_opcode, swizzle(temp, i, 1))));
 225    }
 226
 227    for (/* empty */; i < 4; i++)
 228       expanded_src[i] = expanded_src[0];
 229 }
 230
 231 /**
 232  * Convert a series of uvec2 results into a single 64-bit integer vector
 233  */
 234 ir_dereference_variable *
 235 lower_64bit::compact_destination(ir_factory &body,
 236                                  const glsl_type *type,
 237                                  ir_variable *result[4])
 238 {
 239    const ir_expression_operation pack_opcode =
 240       type->base_type == GLSL_TYPE_UINT64
 241       ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
 242
 243    ir_variable *const compacted_result =
 244       body.make_temp(type, "compacted_64bit_result");
 245
 246    for (unsigned i = 0; i < type->vector_elements; i++) {
 247       body.emit(assign(compacted_result,
 248                        expr(pack_opcode, result[i]),
 249                        1U << i));
 250    }
 251
 252    void *const mem_ctx = ralloc_parent(compacted_result);
 253    return new(mem_ctx) ir_dereference_variable(compacted_result);
 254 }
 255
 256 ir_rvalue *
 257 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
 258                                        ir_expression *ir,
 259                                        ir_function_signature *callee)
 260 {
 261    const unsigned num_operands = ir->get_num_operands();
 262    ir_variable *src[4][4];
 263    ir_variable *dst[4];
 264    void *const mem_ctx = ralloc_parent(ir);
 265    exec_list instructions;
 266    unsigned source_components = 0;
 267    const glsl_type *const result_type =
 268       ir->type->base_type == GLSL_TYPE_UINT64
 269       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
 270
 271    ir_factory body(&instructions, mem_ctx);
 272
 273    for (unsigned i = 0; i < num_operands; i++) {
 274       expand_source(body, ir->operands[i], src[i]);
 275
 276       if (ir->operands[i]->type->vector_elements > source_components)
 277          source_components = ir->operands[i]->type->vector_elements;
 278    }
 279
 280    for (unsigned i = 0; i < source_components; i++) {
 281       dst[i] = body.make_temp(result_type, "expanded_64bit_result");
 282
 283       exec_list parameters;
 284
 285       for (unsigned j = 0; j < num_operands; j++)
 286          parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
 287
 288       ir_dereference_variable *const return_deref =
 289          new(mem_ctx) ir_dereference_variable(dst[i]);
 290
 291       ir_call *const c = new(mem_ctx) ir_call(callee,
 292                                               return_deref,
 293                                               &parameters);
 294
 295       body.emit(c);
 296    }
 297
 298    ir_rvalue *const rv = compact_destination(body, ir->type, dst);
 299
 300    /* Move all of the nodes from instructions between base_ir and the
 301     * instruction before it.
 302     */
 303    exec_node *const after = base_ir;
 304    exec_node *const before = after->prev;
 305    exec_node *const head = instructions.head_sentinel.next;
 306    exec_node *const tail = instructions.tail_sentinel.prev;
 307
 308    before->next = head;
 309    head->prev = before;
 310
 311    after->prev = tail;
 312    tail->next = after;
 313
 314    return rv;
 315 }
 316
 317 ir_rvalue *
 318 lower_64bit_visitor::handle_op(ir_expression *ir,
 319                                const char *function_name,
 320                                function_generator generator)
 321 {
 322    for (unsigned i = 0; i < ir->get_num_operands(); i++)
 323       if (!ir->operands[i]->type->is_integer_64())
 324          return ir;
 325
 326    /* Get a handle to the correct ir_function_signature for the core
 327     * operation.
 328     */
 329    ir_function_signature *callee = NULL;
 330    ir_function *f = find_function(function_name);
 331
 332    if (f != NULL) {
 333       callee = (ir_function_signature *) f->signatures.get_head();
 334       assert(callee != NULL && callee->ir_type == ir_type_function_signature);
 335    } else {
 336       f = new(base_ir) ir_function(function_name);
 337       callee = generator(base_ir, NULL);
 338
 339       f->add_signature(callee);
 340
 341       add_function(f);
 342    }
 343
 344    this->progress = true;
 345    return lower_op_to_function_call(this->base_ir, ir, callee);
 346 }
 347
 348 void
 349 lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
 350 {
 351    if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
 352       return;
 353
 354    ir_expression *const ir = (*rvalue)->as_expression();
 355    assert(ir != NULL);
 356
 357    switch (ir->operation) {
 358    case ir_unop_sign:
 359       if (lowering(SIGN64)) {
 360          *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
 361       }
 362       break;
 363
 364    case ir_binop_div:
 365       if (lowering(DIV64)) {
 366          if (ir->type->base_type == GLSL_TYPE_UINT64) {
 367             *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
 368          } else {
 369             *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
 370          }
 371       }
 372       break;
 373
 374    case ir_binop_mod:
 375       if (lowering(MOD64)) {
 376          if (ir->type->base_type == GLSL_TYPE_UINT64) {
 377             *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
 378          } else {
 379             *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
 380          }
 381       }
 382       break;
 383
 384    case ir_binop_mul:
 385       if (lowering(MUL64)) {
 386          *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
 387       }
 388       break;
 389
 390    default:
 391       break;
 392    }
 393 }