glsl/lower_int64: only set progress when something is lowered.
[mesa.git] / src / compiler / glsl / lower_int64.cpp
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_int64.cpp
26 *
27 * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
28 * to a uvec2. For each operation that can be lowered, there is a function
29 * called __builtin_foo with the same number of parameters that takes uvec2
30 * sources and produces uvec2 results. An operation like
31 *
32 * uint64_t(x) * uint64_t(y)
33 *
34 * becomes
35 *
36 * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
37 */
38
39 #include "main/macros.h"
40 #include "compiler/glsl_types.h"
41 #include "ir.h"
42 #include "ir_rvalue_visitor.h"
43 #include "ir_builder.h"
44 #include "ir_optimization.h"
45 #include "util/hash_table.h"
46 #include "builtin_functions.h"
47
48 typedef ir_function_signature *(*function_generator)(void *mem_ctx,
49 builtin_available_predicate avail);
50
51 using namespace ir_builder;
52
53 namespace lower_64bit {
54 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
55
56 ir_dereference_variable *compact_destination(ir_factory &,
57 const glsl_type *type,
58 ir_variable *result[4]);
59
60 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
61 ir_expression *ir,
62 ir_function_signature *callee);
63 };
64
65 using namespace lower_64bit;
66
67 namespace {
68
69 class lower_64bit_visitor : public ir_rvalue_visitor {
70 public:
71 lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
72 : progress(false), lower(lower), instructions(instructions),
73 function_list(), added_functions(&function_list, mem_ctx)
74 {
75 functions = _mesa_hash_table_create(mem_ctx,
76 _mesa_key_hash_string,
77 _mesa_key_string_equal);
78
79 foreach_in_list(ir_instruction, node, instructions) {
80 ir_function *const f = node->as_function();
81
82 if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
83 continue;
84
85 add_function(f);
86 }
87 }
88
89 ~lower_64bit_visitor()
90 {
91 _mesa_hash_table_destroy(functions, NULL);
92 }
93
94 void handle_rvalue(ir_rvalue **rvalue);
95
96 void add_function(ir_function *f)
97 {
98 _mesa_hash_table_insert(functions, f->name, f);
99 }
100
101 ir_function *find_function(const char *name)
102 {
103 struct hash_entry *const entry =
104 _mesa_hash_table_search(functions, name);
105
106 return entry != NULL ? (ir_function *) entry->data : NULL;
107 }
108
109 bool progress;
110
111 private:
112 unsigned lower; /** Bitfield of which operations to lower */
113
114 exec_list *instructions;
115
116 /** Hashtable containing all of the known functions in the IR */
117 struct hash_table *functions;
118
119 public:
120 exec_list function_list;
121
122 private:
123 ir_factory added_functions;
124
125 ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
126 function_generator generator);
127 };
128
129 } /* anonymous namespace */
130
131 /**
132 * Determine if a particular type of lowering should occur
133 */
134 #define lowering(x) (this->lower & x)
135
136 bool
137 lower_64bit_integer_instructions(exec_list *instructions,
138 unsigned what_to_lower)
139 {
140 if (instructions->is_empty())
141 return false;
142
143 ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
144 void *const mem_ctx = ralloc_parent(first_inst);
145 lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
146
147 visit_list_elements(&v, instructions);
148
149 if (v.progress && !v.function_list.is_empty()) {
150 /* Move all of the nodes from function_list to the head if the incoming
151 * instruction list.
152 */
153 exec_node *const after = &instructions->head_sentinel;
154 exec_node *const before = instructions->head_sentinel.next;
155 exec_node *const head = v.function_list.head_sentinel.next;
156 exec_node *const tail = v.function_list.tail_sentinel.prev;
157
158 before->next = head;
159 head->prev = before;
160
161 after->prev = tail;
162 tail->next = after;
163 }
164
165 return v.progress;
166 }
167
168
169 /**
170 * Expand individual 64-bit values to uvec2 values
171 *
172 * Each operation is in one of a few forms.
173 *
174 * vector op vector
175 * vector op scalar
176 * scalar op vector
177 * scalar op scalar
178 *
179 * In the 'vector op vector' case, the two vectors must have the same size.
180 * In a way, the 'scalar op scalar' form is special case of the 'vector op
181 * vector' form.
182 *
183 * This method generates a new set of uvec2 values for each element of a
184 * single operand. If the operand is a scalar, the uvec2 is replicated
185 * multiple times. A value like
186 *
187 * u64vec3(a) + u64vec3(b)
188 *
189 * becomes
190 *
191 * u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
192 * uvec2 tmp1 = unpackUint2x32(tmp0.x);
193 * uvec2 tmp2 = unpackUint2x32(tmp0.y);
194 * uvec2 tmp3 = unpackUint2x32(tmp0.z);
195 *
196 * and the returned operands array contains ir_variable pointers to
197 *
198 * { tmp1, tmp2, tmp3, tmp1 }
199 */
200 void
201 lower_64bit::expand_source(ir_factory &body,
202 ir_rvalue *val,
203 ir_variable **expanded_src)
204 {
205 assert(val->type->is_integer_64());
206
207 ir_variable *const temp = body.make_temp(val->type, "tmp");
208
209 body.emit(assign(temp, val));
210
211 const ir_expression_operation unpack_opcode =
212 val->type->base_type == GLSL_TYPE_UINT64
213 ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
214
215 const glsl_type *const type =
216 val->type->base_type == GLSL_TYPE_UINT64
217 ? glsl_type::uvec2_type : glsl_type::ivec2_type;
218
219 unsigned i;
220 for (i = 0; i < val->type->vector_elements; i++) {
221 expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
222
223 body.emit(assign(expanded_src[i],
224 expr(unpack_opcode, swizzle(temp, i, 1))));
225 }
226
227 for (/* empty */; i < 4; i++)
228 expanded_src[i] = expanded_src[0];
229 }
230
231 /**
232 * Convert a series of uvec2 results into a single 64-bit integer vector
233 */
234 ir_dereference_variable *
235 lower_64bit::compact_destination(ir_factory &body,
236 const glsl_type *type,
237 ir_variable *result[4])
238 {
239 const ir_expression_operation pack_opcode =
240 type->base_type == GLSL_TYPE_UINT64
241 ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
242
243 ir_variable *const compacted_result =
244 body.make_temp(type, "compacted_64bit_result");
245
246 for (unsigned i = 0; i < type->vector_elements; i++) {
247 body.emit(assign(compacted_result,
248 expr(pack_opcode, result[i]),
249 1U << i));
250 }
251
252 void *const mem_ctx = ralloc_parent(compacted_result);
253 return new(mem_ctx) ir_dereference_variable(compacted_result);
254 }
255
256 ir_rvalue *
257 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
258 ir_expression *ir,
259 ir_function_signature *callee)
260 {
261 const unsigned num_operands = ir->get_num_operands();
262 ir_variable *src[4][4];
263 ir_variable *dst[4];
264 void *const mem_ctx = ralloc_parent(ir);
265 exec_list instructions;
266 unsigned source_components = 0;
267 const glsl_type *const result_type =
268 ir->type->base_type == GLSL_TYPE_UINT64
269 ? glsl_type::uvec2_type : glsl_type::ivec2_type;
270
271 ir_factory body(&instructions, mem_ctx);
272
273 for (unsigned i = 0; i < num_operands; i++) {
274 expand_source(body, ir->operands[i], src[i]);
275
276 if (ir->operands[i]->type->vector_elements > source_components)
277 source_components = ir->operands[i]->type->vector_elements;
278 }
279
280 for (unsigned i = 0; i < source_components; i++) {
281 dst[i] = body.make_temp(result_type, "expanded_64bit_result");
282
283 exec_list parameters;
284
285 for (unsigned j = 0; j < num_operands; j++)
286 parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
287
288 ir_dereference_variable *const return_deref =
289 new(mem_ctx) ir_dereference_variable(dst[i]);
290
291 ir_call *const c = new(mem_ctx) ir_call(callee,
292 return_deref,
293 &parameters);
294
295 body.emit(c);
296 }
297
298 ir_rvalue *const rv = compact_destination(body, ir->type, dst);
299
300 /* Move all of the nodes from instructions between base_ir and the
301 * instruction before it.
302 */
303 exec_node *const after = base_ir;
304 exec_node *const before = after->prev;
305 exec_node *const head = instructions.head_sentinel.next;
306 exec_node *const tail = instructions.tail_sentinel.prev;
307
308 before->next = head;
309 head->prev = before;
310
311 after->prev = tail;
312 tail->next = after;
313
314 return rv;
315 }
316
317 ir_rvalue *
318 lower_64bit_visitor::handle_op(ir_expression *ir,
319 const char *function_name,
320 function_generator generator)
321 {
322 for (unsigned i = 0; i < ir->get_num_operands(); i++)
323 if (!ir->operands[i]->type->is_integer_64())
324 return ir;
325
326 /* Get a handle to the correct ir_function_signature for the core
327 * operation.
328 */
329 ir_function_signature *callee = NULL;
330 ir_function *f = find_function(function_name);
331
332 if (f != NULL) {
333 callee = (ir_function_signature *) f->signatures.get_head();
334 assert(callee != NULL && callee->ir_type == ir_type_function_signature);
335 } else {
336 f = new(base_ir) ir_function(function_name);
337 callee = generator(base_ir, NULL);
338
339 f->add_signature(callee);
340
341 add_function(f);
342 }
343
344 this->progress = true;
345 return lower_op_to_function_call(this->base_ir, ir, callee);
346 }
347
348 void
349 lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
350 {
351 if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
352 return;
353
354 ir_expression *const ir = (*rvalue)->as_expression();
355 assert(ir != NULL);
356
357 switch (ir->operation) {
358 case ir_unop_sign:
359 if (lowering(SIGN64)) {
360 *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
361 }
362 break;
363
364 case ir_binop_div:
365 if (lowering(DIV64)) {
366 if (ir->type->base_type == GLSL_TYPE_UINT64) {
367 *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
368 } else {
369 *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
370 }
371 }
372 break;
373
374 case ir_binop_mod:
375 if (lowering(MOD64)) {
376 if (ir->type->base_type == GLSL_TYPE_UINT64) {
377 *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
378 } else {
379 *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
380 }
381 }
382 break;
383
384 case ir_binop_mul:
385 if (lowering(MUL64)) {
386 *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
387 }
388 break;
389
390 default:
391 break;
392 }
393 }