2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_int64.cpp
27 * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
28 * to a uvec2. For each operation that can be lowered, there is a function
29 * called __builtin_foo with the same number of parameters that takes uvec2
30 * sources and produces uvec2 results. An operation like
32 * uint64_t(x) * uint64_t(y)
36 * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
39 #include "main/macros.h"
40 #include "compiler/glsl_types.h"
42 #include "ir_rvalue_visitor.h"
43 #include "ir_builder.h"
44 #include "ir_optimization.h"
45 #include "util/hash_table.h"
46 #include "builtin_functions.h"
48 typedef ir_function_signature
*(*function_generator
)(void *mem_ctx
,
49 builtin_available_predicate avail
);
51 using namespace ir_builder
;
53 namespace lower_64bit
{
54 void expand_source(ir_factory
&, ir_rvalue
*val
, ir_variable
**expanded_src
);
56 ir_dereference_variable
*compact_destination(ir_factory
&,
57 const glsl_type
*type
,
58 ir_variable
*result
[4]);
60 ir_rvalue
*lower_op_to_function_call(ir_instruction
*base_ir
,
62 ir_function_signature
*callee
);
65 using namespace lower_64bit
;
69 class lower_64bit_visitor
: public ir_rvalue_visitor
{
71 lower_64bit_visitor(void *mem_ctx
, exec_list
*instructions
, unsigned lower
)
72 : progress(false), lower(lower
),
73 function_list(), added_functions(&function_list
, mem_ctx
)
75 functions
= _mesa_hash_table_create(mem_ctx
,
76 _mesa_key_hash_string
,
77 _mesa_key_string_equal
);
79 foreach_in_list(ir_instruction
, node
, instructions
) {
80 ir_function
*const f
= node
->as_function();
82 if (f
== NULL
|| strncmp(f
->name
, "__builtin_", 10) != 0)
89 ~lower_64bit_visitor()
91 _mesa_hash_table_destroy(functions
, NULL
);
94 void handle_rvalue(ir_rvalue
**rvalue
);
96 void add_function(ir_function
*f
)
98 _mesa_hash_table_insert(functions
, f
->name
, f
);
101 ir_function
*find_function(const char *name
)
103 struct hash_entry
*const entry
=
104 _mesa_hash_table_search(functions
, name
);
106 return entry
!= NULL
? (ir_function
*) entry
->data
: NULL
;
112 unsigned lower
; /** Bitfield of which operations to lower */
114 /** Hashtable containing all of the known functions in the IR */
115 struct hash_table
*functions
;
118 exec_list function_list
;
121 ir_factory added_functions
;
123 ir_rvalue
*handle_op(ir_expression
*ir
, const char *function_name
,
124 function_generator generator
);
127 } /* anonymous namespace */
130 * Determine if a particular type of lowering should occur
132 #define lowering(x) (this->lower & x)
135 lower_64bit_integer_instructions(exec_list
*instructions
,
136 unsigned what_to_lower
)
138 if (instructions
->is_empty())
141 ir_instruction
*first_inst
= (ir_instruction
*) instructions
->get_head_raw();
142 void *const mem_ctx
= ralloc_parent(first_inst
);
143 lower_64bit_visitor
v(mem_ctx
, instructions
, what_to_lower
);
145 visit_list_elements(&v
, instructions
);
147 if (v
.progress
&& !v
.function_list
.is_empty()) {
148 /* Move all of the nodes from function_list to the head if the incoming
151 exec_node
*const after
= &instructions
->head_sentinel
;
152 exec_node
*const before
= instructions
->head_sentinel
.next
;
153 exec_node
*const head
= v
.function_list
.head_sentinel
.next
;
154 exec_node
*const tail
= v
.function_list
.tail_sentinel
.prev
;
168 * Expand individual 64-bit values to uvec2 values
170 * Each operation is in one of a few forms.
177 * In the 'vector op vector' case, the two vectors must have the same size.
178 * In a way, the 'scalar op scalar' form is special case of the 'vector op
181 * This method generates a new set of uvec2 values for each element of a
182 * single operand. If the operand is a scalar, the uvec2 is replicated
183 * multiple times. A value like
185 * u64vec3(a) + u64vec3(b)
189 * u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
190 * uvec2 tmp1 = unpackUint2x32(tmp0.x);
191 * uvec2 tmp2 = unpackUint2x32(tmp0.y);
192 * uvec2 tmp3 = unpackUint2x32(tmp0.z);
194 * and the returned operands array contains ir_variable pointers to
196 * { tmp1, tmp2, tmp3, tmp1 }
199 lower_64bit::expand_source(ir_factory
&body
,
201 ir_variable
**expanded_src
)
203 assert(val
->type
->is_integer_64());
205 ir_variable
*const temp
= body
.make_temp(val
->type
, "tmp");
207 body
.emit(assign(temp
, val
));
209 const ir_expression_operation unpack_opcode
=
210 val
->type
->base_type
== GLSL_TYPE_UINT64
211 ? ir_unop_unpack_uint_2x32
: ir_unop_unpack_int_2x32
;
213 const glsl_type
*const type
=
214 val
->type
->base_type
== GLSL_TYPE_UINT64
215 ? glsl_type::uvec2_type
: glsl_type::ivec2_type
;
218 for (i
= 0; i
< val
->type
->vector_elements
; i
++) {
219 expanded_src
[i
] = body
.make_temp(type
, "expanded_64bit_source");
221 body
.emit(assign(expanded_src
[i
],
222 expr(unpack_opcode
, swizzle(temp
, i
, 1))));
225 for (/* empty */; i
< 4; i
++)
226 expanded_src
[i
] = expanded_src
[0];
230 * Convert a series of uvec2 results into a single 64-bit integer vector
232 ir_dereference_variable
*
233 lower_64bit::compact_destination(ir_factory
&body
,
234 const glsl_type
*type
,
235 ir_variable
*result
[4])
237 const ir_expression_operation pack_opcode
=
238 type
->base_type
== GLSL_TYPE_UINT64
239 ? ir_unop_pack_uint_2x32
: ir_unop_pack_int_2x32
;
241 ir_variable
*const compacted_result
=
242 body
.make_temp(type
, "compacted_64bit_result");
244 for (unsigned i
= 0; i
< type
->vector_elements
; i
++) {
245 body
.emit(assign(compacted_result
,
246 expr(pack_opcode
, result
[i
]),
250 void *const mem_ctx
= ralloc_parent(compacted_result
);
251 return new(mem_ctx
) ir_dereference_variable(compacted_result
);
255 lower_64bit::lower_op_to_function_call(ir_instruction
*base_ir
,
257 ir_function_signature
*callee
)
259 const unsigned num_operands
= ir
->num_operands
;
260 ir_variable
*src
[4][4];
262 void *const mem_ctx
= ralloc_parent(ir
);
263 exec_list instructions
;
264 unsigned source_components
= 0;
265 const glsl_type
*const result_type
=
266 ir
->type
->base_type
== GLSL_TYPE_UINT64
267 ? glsl_type::uvec2_type
: glsl_type::ivec2_type
;
269 ir_factory
body(&instructions
, mem_ctx
);
271 for (unsigned i
= 0; i
< num_operands
; i
++) {
272 expand_source(body
, ir
->operands
[i
], src
[i
]);
274 if (ir
->operands
[i
]->type
->vector_elements
> source_components
)
275 source_components
= ir
->operands
[i
]->type
->vector_elements
;
278 for (unsigned i
= 0; i
< source_components
; i
++) {
279 dst
[i
] = body
.make_temp(result_type
, "expanded_64bit_result");
281 exec_list parameters
;
283 for (unsigned j
= 0; j
< num_operands
; j
++)
284 parameters
.push_tail(new(mem_ctx
) ir_dereference_variable(src
[j
][i
]));
286 ir_dereference_variable
*const return_deref
=
287 new(mem_ctx
) ir_dereference_variable(dst
[i
]);
289 ir_call
*const c
= new(mem_ctx
) ir_call(callee
,
296 ir_rvalue
*const rv
= compact_destination(body
, ir
->type
, dst
);
298 /* Move all of the nodes from instructions between base_ir and the
299 * instruction before it.
301 exec_node
*const after
= base_ir
;
302 exec_node
*const before
= after
->prev
;
303 exec_node
*const head
= instructions
.head_sentinel
.next
;
304 exec_node
*const tail
= instructions
.tail_sentinel
.prev
;
316 lower_64bit_visitor::handle_op(ir_expression
*ir
,
317 const char *function_name
,
318 function_generator generator
)
320 for (unsigned i
= 0; i
< ir
->num_operands
; i
++)
321 if (!ir
->operands
[i
]->type
->is_integer_64())
324 /* Get a handle to the correct ir_function_signature for the core
327 ir_function_signature
*callee
= NULL
;
328 ir_function
*f
= find_function(function_name
);
331 callee
= (ir_function_signature
*) f
->signatures
.get_head();
332 assert(callee
!= NULL
&& callee
->ir_type
== ir_type_function_signature
);
334 f
= new(base_ir
) ir_function(function_name
);
335 callee
= generator(base_ir
, NULL
);
337 f
->add_signature(callee
);
342 this->progress
= true;
343 return lower_op_to_function_call(this->base_ir
, ir
, callee
);
347 lower_64bit_visitor::handle_rvalue(ir_rvalue
**rvalue
)
349 if (*rvalue
== NULL
|| (*rvalue
)->ir_type
!= ir_type_expression
)
352 ir_expression
*const ir
= (*rvalue
)->as_expression();
355 switch (ir
->operation
) {
357 if (lowering(SIGN64
)) {
358 *rvalue
= handle_op(ir
, "__builtin_sign64", generate_ir::sign64
);
363 if (lowering(DIV64
)) {
364 if (ir
->type
->base_type
== GLSL_TYPE_UINT64
) {
365 *rvalue
= handle_op(ir
, "__builtin_udiv64", generate_ir::udiv64
);
367 *rvalue
= handle_op(ir
, "__builtin_idiv64", generate_ir::idiv64
);
373 if (lowering(MOD64
)) {
374 if (ir
->type
->base_type
== GLSL_TYPE_UINT64
) {
375 *rvalue
= handle_op(ir
, "__builtin_umod64", generate_ir::umod64
);
377 *rvalue
= handle_op(ir
, "__builtin_imod64", generate_ir::imod64
);
383 if (lowering(MUL64
)) {
384 *rvalue
= handle_op(ir
, "__builtin_umul64", generate_ir::umul64
);