2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_int64.cpp
27 * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
28 * to a uvec2. For each operation that can be lowered, there is a function
29 * called __builtin_foo with the same number of parameters that takes uvec2
30 * sources and produces uvec2 results. An operation like
32 * uint64_t(x) * uint64_t(y)
36 * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
39 #include "main/macros.h"
40 #include "compiler/glsl_types.h"
42 #include "ir_rvalue_visitor.h"
43 #include "ir_builder.h"
44 #include "ir_optimization.h"
45 #include "util/hash_table.h"
46 #include "builtin_functions.h"
48 typedef ir_function_signature
*(*function_generator
)(void *mem_ctx
,
49 builtin_available_predicate avail
);
51 using namespace ir_builder
;
53 namespace lower_64bit
{
54 void expand_source(ir_factory
&, ir_rvalue
*val
, ir_variable
**expanded_src
);
56 ir_dereference_variable
*compact_destination(ir_factory
&,
57 const glsl_type
*type
,
58 ir_variable
*result
[4]);
60 ir_rvalue
*lower_op_to_function_call(ir_instruction
*base_ir
,
62 ir_function_signature
*callee
);
65 using namespace lower_64bit
;
69 class lower_64bit_visitor
: public ir_rvalue_visitor
{
71 lower_64bit_visitor(void *mem_ctx
, exec_list
*instructions
, unsigned lower
)
72 : progress(false), lower(lower
), instructions(instructions
),
73 function_list(), added_functions(&function_list
, mem_ctx
)
75 functions
= _mesa_hash_table_create(mem_ctx
,
76 _mesa_key_hash_string
,
77 _mesa_key_string_equal
);
79 foreach_in_list(ir_instruction
, node
, instructions
) {
80 ir_function
*const f
= node
->as_function();
82 if (f
== NULL
|| strncmp(f
->name
, "__builtin_", 10) != 0)
89 ~lower_64bit_visitor()
91 _mesa_hash_table_destroy(functions
, NULL
);
94 void handle_rvalue(ir_rvalue
**rvalue
);
96 void add_function(ir_function
*f
)
98 _mesa_hash_table_insert(functions
, f
->name
, f
);
101 ir_function
*find_function(const char *name
)
103 struct hash_entry
*const entry
=
104 _mesa_hash_table_search(functions
, name
);
106 return entry
!= NULL
? (ir_function
*) entry
->data
: NULL
;
112 unsigned lower
; /** Bitfield of which operations to lower */
114 exec_list
*instructions
;
116 /** Hashtable containing all of the known functions in the IR */
117 struct hash_table
*functions
;
120 exec_list function_list
;
123 ir_factory added_functions
;
125 ir_rvalue
*handle_op(ir_expression
*ir
, const char *function_name
,
126 function_generator generator
);
129 } /* anonymous namespace */
132 * Determine if a particular type of lowering should occur
134 #define lowering(x) (this->lower & x)
137 lower_64bit_integer_instructions(exec_list
*instructions
,
138 unsigned what_to_lower
)
140 if (instructions
->is_empty())
143 ir_instruction
*first_inst
= (ir_instruction
*) instructions
->get_head_raw();
144 void *const mem_ctx
= ralloc_parent(first_inst
);
145 lower_64bit_visitor
v(mem_ctx
, instructions
, what_to_lower
);
147 visit_list_elements(&v
, instructions
);
149 if (v
.progress
&& !v
.function_list
.is_empty()) {
150 /* Move all of the nodes from function_list to the head if the incoming
153 exec_node
*const after
= &instructions
->head_sentinel
;
154 exec_node
*const before
= instructions
->head_sentinel
.next
;
155 exec_node
*const head
= v
.function_list
.head_sentinel
.next
;
156 exec_node
*const tail
= v
.function_list
.tail_sentinel
.prev
;
170 * Expand individual 64-bit values to uvec2 values
172 * Each operation is in one of a few forms.
179 * In the 'vector op vector' case, the two vectors must have the same size.
180 * In a way, the 'scalar op scalar' form is special case of the 'vector op
183 * This method generates a new set of uvec2 values for each element of a
184 * single operand. If the operand is a scalar, the uvec2 is replicated
185 * multiple times. A value like
187 * u64vec3(a) + u64vec3(b)
191 * u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
192 * uvec2 tmp1 = unpackUint2x32(tmp0.x);
193 * uvec2 tmp2 = unpackUint2x32(tmp0.y);
194 * uvec2 tmp3 = unpackUint2x32(tmp0.z);
196 * and the returned operands array contains ir_variable pointers to
198 * { tmp1, tmp2, tmp3, tmp1 }
201 lower_64bit::expand_source(ir_factory
&body
,
203 ir_variable
**expanded_src
)
205 assert(val
->type
->is_integer_64());
207 ir_variable
*const temp
= body
.make_temp(val
->type
, "tmp");
209 body
.emit(assign(temp
, val
));
211 const ir_expression_operation unpack_opcode
=
212 val
->type
->base_type
== GLSL_TYPE_UINT64
213 ? ir_unop_unpack_uint_2x32
: ir_unop_unpack_int_2x32
;
215 const glsl_type
*const type
=
216 val
->type
->base_type
== GLSL_TYPE_UINT64
217 ? glsl_type::uvec2_type
: glsl_type::ivec2_type
;
220 for (i
= 0; i
< val
->type
->vector_elements
; i
++) {
221 expanded_src
[i
] = body
.make_temp(type
, "expanded_64bit_source");
223 body
.emit(assign(expanded_src
[i
],
224 expr(unpack_opcode
, swizzle(temp
, i
, 1))));
227 for (/* empty */; i
< 4; i
++)
228 expanded_src
[i
] = expanded_src
[0];
232 * Convert a series of uvec2 results into a single 64-bit integer vector
234 ir_dereference_variable
*
235 lower_64bit::compact_destination(ir_factory
&body
,
236 const glsl_type
*type
,
237 ir_variable
*result
[4])
239 const ir_expression_operation pack_opcode
=
240 type
->base_type
== GLSL_TYPE_UINT64
241 ? ir_unop_pack_uint_2x32
: ir_unop_pack_int_2x32
;
243 ir_variable
*const compacted_result
=
244 body
.make_temp(type
, "compacted_64bit_result");
246 for (unsigned i
= 0; i
< type
->vector_elements
; i
++) {
247 body
.emit(assign(compacted_result
,
248 expr(pack_opcode
, result
[i
]),
252 void *const mem_ctx
= ralloc_parent(compacted_result
);
253 return new(mem_ctx
) ir_dereference_variable(compacted_result
);
257 lower_64bit::lower_op_to_function_call(ir_instruction
*base_ir
,
259 ir_function_signature
*callee
)
261 const unsigned num_operands
= ir
->get_num_operands();
262 ir_variable
*src
[4][4];
264 void *const mem_ctx
= ralloc_parent(ir
);
265 exec_list instructions
;
266 unsigned source_components
= 0;
267 const glsl_type
*const result_type
=
268 ir
->type
->base_type
== GLSL_TYPE_UINT64
269 ? glsl_type::uvec2_type
: glsl_type::ivec2_type
;
271 ir_factory
body(&instructions
, mem_ctx
);
273 for (unsigned i
= 0; i
< num_operands
; i
++) {
274 expand_source(body
, ir
->operands
[i
], src
[i
]);
276 if (ir
->operands
[i
]->type
->vector_elements
> source_components
)
277 source_components
= ir
->operands
[i
]->type
->vector_elements
;
280 for (unsigned i
= 0; i
< source_components
; i
++) {
281 dst
[i
] = body
.make_temp(result_type
, "expanded_64bit_result");
283 exec_list parameters
;
285 for (unsigned j
= 0; j
< num_operands
; j
++)
286 parameters
.push_tail(new(mem_ctx
) ir_dereference_variable(src
[j
][i
]));
288 ir_dereference_variable
*const return_deref
=
289 new(mem_ctx
) ir_dereference_variable(dst
[i
]);
291 ir_call
*const c
= new(mem_ctx
) ir_call(callee
,
298 ir_rvalue
*const rv
= compact_destination(body
, ir
->type
, dst
);
300 /* Move all of the nodes from instructions between base_ir and the
301 * instruction before it.
303 exec_node
*const after
= base_ir
;
304 exec_node
*const before
= after
->prev
;
305 exec_node
*const head
= instructions
.head_sentinel
.next
;
306 exec_node
*const tail
= instructions
.tail_sentinel
.prev
;
318 lower_64bit_visitor::handle_op(ir_expression
*ir
,
319 const char *function_name
,
320 function_generator generator
)
322 for (unsigned i
= 0; i
< ir
->get_num_operands(); i
++)
323 if (!ir
->operands
[i
]->type
->is_integer_64())
326 /* Get a handle to the correct ir_function_signature for the core
329 ir_function_signature
*callee
= NULL
;
330 ir_function
*f
= find_function(function_name
);
333 callee
= (ir_function_signature
*) f
->signatures
.get_head();
334 assert(callee
!= NULL
&& callee
->ir_type
== ir_type_function_signature
);
336 f
= new(base_ir
) ir_function(function_name
);
337 callee
= generator(base_ir
, NULL
);
339 f
->add_signature(callee
);
344 return lower_op_to_function_call(this->base_ir
, ir
, callee
);
348 lower_64bit_visitor::handle_rvalue(ir_rvalue
**rvalue
)
350 if (*rvalue
== NULL
|| (*rvalue
)->ir_type
!= ir_type_expression
)
353 ir_expression
*const ir
= (*rvalue
)->as_expression();
356 switch (ir
->operation
) {
358 if (lowering(SIGN64
)) {
359 *rvalue
= handle_op(ir
, "__builtin_sign64", generate_ir::sign64
);
360 this->progress
= true;
365 if (lowering(DIV64
)) {
366 if (ir
->type
->base_type
== GLSL_TYPE_UINT64
) {
367 *rvalue
= handle_op(ir
, "__builtin_udiv64", generate_ir::udiv64
);
369 *rvalue
= handle_op(ir
, "__builtin_idiv64", generate_ir::idiv64
);
371 this->progress
= true;
376 if (lowering(MOD64
)) {
377 if (ir
->type
->base_type
== GLSL_TYPE_UINT64
) {
378 *rvalue
= handle_op(ir
, "__builtin_umod64", generate_ir::umod64
);
380 *rvalue
= handle_op(ir
, "__builtin_imod64", generate_ir::imod64
);
382 this->progress
= true;
387 if (lowering(MUL64
)) {
388 *rvalue
= handle_op(ir
, "__builtin_umul64", generate_ir::umul64
);
389 this->progress
= true;