nir: Don't bail too early in lower_mem_constant_vars
[mesa.git] / src / compiler / glsl / lower_variable_index_to_cond_assign.cpp
1 /*
2 * Copyright © 2010 Luca Barbieri
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_variable_index_to_cond_assign.cpp
26 *
27 * Turns non-constant indexing into array types to a series of
28 * conditional moves of each element into a temporary.
29 *
30 * Pre-DX10 GPUs often don't have a native way to do this operation,
31 * and this works around that.
32 *
33 * The lowering process proceeds as follows. Each non-constant index
34 * found in an r-value is converted to a canonical form \c array[i]. Each
35 * element of the array is conditionally assigned to a temporary by comparing
36 * \c i to a constant index. This is done by cloning the canonical form and
37 * replacing all occurances of \c i with a constant. Each remaining occurance
38 * of the canonical form in the IR is replaced with a dereference of the
39 * temporary variable.
40 *
41 * L-values with non-constant indices are handled similarly. In this case,
42 * the RHS of the assignment is assigned to a temporary. The non-constant
43 * index is replace with the canonical form (just like for r-values). The
44 * temporary is conditionally assigned to each element of the canonical form
45 * by comparing \c i with each index. The same clone-and-replace scheme is
46 * used.
47 */
48
49 #include "ir.h"
50 #include "ir_rvalue_visitor.h"
51 #include "ir_optimization.h"
52 #include "compiler/glsl_types.h"
53 #include "main/macros.h"
54 #include "program/prog_instruction.h" /* For SWIZZLE_XXXX */
55 #include "ir_builder.h"
56
57 using namespace ir_builder;
58
59 /**
60 * Generate a comparison value for a block of indices
61 *
62 * Lowering passes for non-constant indexing of arrays, matrices, or vectors
63 * can use this to generate blocks of index comparison values.
64 *
65 * \param instructions List where new instructions will be appended
66 * \param index \c ir_variable containing the desired index
67 * \param base Base value for this block of comparisons
68 * \param components Number of unique index values to compare. This must
69 * be on the range [1, 4].
70 * \param mem_ctx ralloc memory context to be used for all allocations.
71 *
72 * \returns
73 * An \c ir_variable containing the per-component comparison results. This
74 * must be dereferenced per use.
75 */
76 ir_variable *
77 compare_index_block(ir_factory &body, ir_variable *index,
78 unsigned base, unsigned components)
79 {
80 assert(index->type->is_scalar());
81 assert(index->type->base_type == GLSL_TYPE_INT ||
82 index->type->base_type == GLSL_TYPE_UINT);
83 assert(components >= 1 && components <= 4);
84
85 ir_rvalue *const broadcast_index = components > 1
86 ? swizzle(index, SWIZZLE_XXXX, components)
87 : operand(index).val;
88
89 /* Compare the desired index value with the next block of four indices.
90 */
91 ir_constant_data test_indices_data;
92 memset(&test_indices_data, 0, sizeof(test_indices_data));
93 test_indices_data.i[0] = base;
94 test_indices_data.i[1] = base + 1;
95 test_indices_data.i[2] = base + 2;
96 test_indices_data.i[3] = base + 3;
97
98 ir_constant *const test_indices =
99 new(body.mem_ctx) ir_constant(broadcast_index->type, &test_indices_data);
100
101 ir_rvalue *const condition_val = equal(broadcast_index, test_indices);
102
103 ir_variable *const condition = body.make_temp(condition_val->type,
104 "dereference_condition");
105
106 body.emit(assign(condition, condition_val));
107
108 return condition;
109 }
110
111 static inline bool
112 is_array_or_matrix(const ir_rvalue *ir)
113 {
114 return (ir->type->is_array() || ir->type->is_matrix());
115 }
116
117 namespace {
118 /**
119 * Replace a dereference of a variable with a specified r-value
120 *
121 * Each time a dereference of the specified value is replaced, the r-value
122 * tree is cloned.
123 */
124 class deref_replacer : public ir_rvalue_visitor {
125 public:
126 deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
127 : variable_to_replace(variable_to_replace), value(value),
128 progress(false)
129 {
130 assert(this->variable_to_replace != NULL);
131 assert(this->value != NULL);
132 }
133
134 virtual void handle_rvalue(ir_rvalue **rvalue)
135 {
136 ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
137
138 if (dv != NULL && dv->var == this->variable_to_replace) {
139 this->progress = true;
140 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
141 }
142 }
143
144 const ir_variable *variable_to_replace;
145 ir_rvalue *value;
146 bool progress;
147 };
148
149 /**
150 * Find a variable index dereference of an array in an rvalue tree
151 */
152 class find_variable_index : public ir_hierarchical_visitor {
153 public:
154 find_variable_index()
155 : deref(NULL)
156 {
157 /* empty */
158 }
159
160 virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
161 {
162 if (is_array_or_matrix(ir->array) &&
163 ir->array_index->as_constant() == NULL) {
164 this->deref = ir;
165 return visit_stop;
166 }
167
168 return visit_continue;
169 }
170
171 /**
172 * First array dereference found in the tree that has a non-constant index.
173 */
174 ir_dereference_array *deref;
175 };
176
177 struct assignment_generator
178 {
179 ir_instruction* base_ir;
180 ir_dereference *rvalue;
181 ir_variable *old_index;
182 bool is_write;
183 unsigned int write_mask;
184 ir_variable* var;
185
186 assignment_generator()
187 : base_ir(NULL),
188 rvalue(NULL),
189 old_index(NULL),
190 is_write(false),
191 write_mask(0),
192 var(NULL)
193 {
194 }
195
196 void generate(unsigned i, ir_rvalue* condition, ir_factory &body) const
197 {
198 /* Clone the old r-value in its entirety. Then replace any occurances of
199 * the old variable index with the new constant index.
200 */
201 ir_dereference *element = this->rvalue->clone(body.mem_ctx, NULL);
202 ir_constant *const index = body.constant(i);
203 deref_replacer r(this->old_index, index);
204 element->accept(&r);
205 assert(r.progress);
206
207 /* Generate a conditional assignment to (or from) the constant indexed
208 * array dereference.
209 */
210 ir_assignment *const assignment = (is_write)
211 ? assign(element, this->var, condition, write_mask)
212 : assign(this->var, element, condition);
213
214 body.emit(assignment);
215 }
216 };
217
218 struct switch_generator
219 {
220 /* make TFunction a template parameter if you need to use other generators */
221 typedef assignment_generator TFunction;
222 const TFunction& generator;
223
224 ir_variable* index;
225 unsigned linear_sequence_max_length;
226 unsigned condition_components;
227
228 void *mem_ctx;
229
230 switch_generator(const TFunction& generator, ir_variable *index,
231 unsigned linear_sequence_max_length,
232 unsigned condition_components)
233 : generator(generator), index(index),
234 linear_sequence_max_length(linear_sequence_max_length),
235 condition_components(condition_components)
236 {
237 this->mem_ctx = ralloc_parent(index);
238 }
239
240 void linear_sequence(unsigned begin, unsigned end, ir_factory &body)
241 {
242 if (begin == end)
243 return;
244
245 /* If the array access is a read, read the first element of this subregion
246 * unconditionally. The remaining tests will possibly overwrite this
247 * value with one of the other array elements.
248 *
249 * This optimization cannot be done for writes because it will cause the
250 * first element of the subregion to be written possibly *in addition* to
251 * one of the other elements.
252 */
253 unsigned first;
254 if (!this->generator.is_write) {
255 this->generator.generate(begin, 0, body);
256 first = begin + 1;
257 } else {
258 first = begin;
259 }
260
261 for (unsigned i = first; i < end; i += 4) {
262 const unsigned comps = MIN2(condition_components, end - i);
263 ir_variable *const cond = compare_index_block(body, index, i, comps);
264
265 if (comps == 1) {
266 this->generator.generate(i,
267 operand(cond).val,
268 body);
269 } else {
270 for (unsigned j = 0; j < comps; j++) {
271 this->generator.generate(i + j,
272 swizzle(cond, j, 1),
273 body);
274 }
275 }
276 }
277 }
278
279 void bisect(unsigned begin, unsigned end, ir_factory &body)
280 {
281 unsigned middle = (begin + end) >> 1;
282
283 assert(index->type->is_integer_32());
284
285 ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
286 ? new(body.mem_ctx) ir_constant((unsigned)middle)
287 : new(body.mem_ctx) ir_constant((int)middle);
288
289 ir_if *if_less = new(body.mem_ctx) ir_if(less(this->index, middle_c));
290
291 ir_factory then_body(&if_less->then_instructions, body.mem_ctx);
292 ir_factory else_body(&if_less->else_instructions, body.mem_ctx);
293 generate(begin, middle, then_body);
294 generate(middle, end, else_body);
295
296 body.emit(if_less);
297 }
298
299 void generate(unsigned begin, unsigned end, ir_factory &body)
300 {
301 unsigned length = end - begin;
302 if (length <= this->linear_sequence_max_length)
303 return linear_sequence(begin, end, body);
304 else
305 return bisect(begin, end, body);
306 }
307 };
308
309 /**
310 * Visitor class for replacing expressions with ir_constant values.
311 */
312
313 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
314 public:
315 variable_index_to_cond_assign_visitor(gl_shader_stage stage,
316 bool lower_input,
317 bool lower_output,
318 bool lower_temp,
319 bool lower_uniform)
320 : progress(false), stage(stage), lower_inputs(lower_input),
321 lower_outputs(lower_output), lower_temps(lower_temp),
322 lower_uniforms(lower_uniform)
323 {
324 /* empty */
325 }
326
327 bool progress;
328
329 gl_shader_stage stage;
330 bool lower_inputs;
331 bool lower_outputs;
332 bool lower_temps;
333 bool lower_uniforms;
334
335 bool storage_type_needs_lowering(ir_dereference_array *deref) const
336 {
337 /* If a variable isn't eventually the target of this dereference, then
338 * it must be a constant or some sort of anonymous temporary storage.
339 *
340 * FINISHME: Is this correct? Most drivers treat arrays of constants as
341 * FINISHME: uniforms. It seems like this should do the same.
342 */
343 const ir_variable *const var = deref->array->variable_referenced();
344 if (var == NULL)
345 return this->lower_temps;
346
347 switch (var->data.mode) {
348 case ir_var_auto:
349 case ir_var_temporary:
350 return this->lower_temps;
351
352 case ir_var_uniform:
353 case ir_var_shader_storage:
354 return this->lower_uniforms;
355
356 case ir_var_shader_shared:
357 return false;
358
359 case ir_var_function_in:
360 case ir_var_const_in:
361 return this->lower_temps;
362
363 case ir_var_system_value:
364 /* There are only a few system values that have array types:
365 *
366 * gl_TessLevelInner[]
367 * gl_TessLevelOuter[]
368 * gl_SampleMaskIn[]
369 *
370 * The tessellation factor arrays are lowered to vec4/vec2s
371 * by lower_tess_level() before this pass occurs, so we'll
372 * never see them here.
373 *
374 * The only remaining case is gl_SampleMaskIn[], which has
375 * a length of ceil(ctx->Const.MaxSamples / 32). Most hardware
376 * supports no more than 32 samples, in which case our lowering
377 * produces a single read of gl_SampleMaskIn[0]. Even with 64x
378 * MSAA, the array length is only 2, so the lowering is fairly
379 * efficient. Therefore, lower unconditionally.
380 */
381 return true;
382
383 case ir_var_shader_in:
384 /* The input array size is unknown at compiler time for non-patch
385 * inputs in TCS and TES. The arrays are sized to
386 * the implementation-dependent limit "gl_MaxPatchVertices", but
387 * the real size is stored in the "gl_PatchVerticesIn" built-in
388 * uniform.
389 *
390 * The TCS input array size is specified by
391 * glPatchParameteri(GL_PATCH_VERTICES).
392 *
393 * The TES input array size is specified by the "vertices" output
394 * layout qualifier in TCS.
395 */
396 if ((stage == MESA_SHADER_TESS_CTRL ||
397 stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
398 return false;
399 return this->lower_inputs;
400
401 case ir_var_function_out:
402 /* TCS non-patch outputs can only be indexed with "gl_InvocationID".
403 * Other expressions are not allowed.
404 */
405 if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
406 return false;
407 return this->lower_temps;
408
409 case ir_var_shader_out:
410 return this->lower_outputs;
411
412 case ir_var_function_inout:
413 return this->lower_temps;
414 }
415
416 assert(!"Should not get here.");
417 return false;
418 }
419
420 bool needs_lowering(ir_dereference_array *deref) const
421 {
422 if (deref == NULL || deref->array_index->as_constant() ||
423 !is_array_or_matrix(deref->array))
424 return false;
425
426 return this->storage_type_needs_lowering(deref);
427 }
428
429 ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
430 ir_assignment* orig_assign,
431 ir_dereference *orig_base)
432 {
433 void *const mem_ctx = ralloc_parent(base_ir);
434 exec_list list;
435 ir_factory body(&list, mem_ctx);
436
437 assert(is_array_or_matrix(orig_deref->array));
438
439 const unsigned length = (orig_deref->array->type->is_array())
440 ? orig_deref->array->type->length
441 : orig_deref->array->type->matrix_columns;
442
443 /* Temporary storage for either the result of the dereference of
444 * the array, or the RHS that's being assigned into the
445 * dereference of the array.
446 */
447 ir_variable *var;
448
449 if (orig_assign) {
450 var = body.make_temp(orig_assign->rhs->type,
451 "dereference_array_value");
452
453 body.emit(assign(var, orig_assign->rhs));
454 } else {
455 var = body.make_temp(orig_deref->type,
456 "dereference_array_value");
457 }
458
459 /* Store the index to a temporary to avoid reusing its tree. */
460 ir_variable *index = body.make_temp(orig_deref->array_index->type,
461 "dereference_array_index");
462
463 body.emit(assign(index, orig_deref->array_index));
464
465 orig_deref->array_index = deref(index).val;
466
467 assignment_generator ag;
468 ag.rvalue = orig_base;
469 ag.base_ir = base_ir;
470 ag.old_index = index;
471 ag.var = var;
472 if (orig_assign) {
473 ag.is_write = true;
474 ag.write_mask = orig_assign->write_mask;
475 } else {
476 ag.is_write = false;
477 }
478
479 switch_generator sg(ag, index, 4, 4);
480
481 /* If the original assignment has a condition, respect that original
482 * condition! This is acomplished by wrapping the new conditional
483 * assignments in an if-statement that uses the original condition.
484 */
485 if (orig_assign != NULL && orig_assign->condition != NULL) {
486 /* No need to clone the condition because the IR that it hangs on is
487 * going to be removed from the instruction sequence.
488 */
489 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
490 ir_factory then_body(&if_stmt->then_instructions, body.mem_ctx);
491
492 sg.generate(0, length, then_body);
493 body.emit(if_stmt);
494 } else {
495 sg.generate(0, length, body);
496 }
497
498 base_ir->insert_before(&list);
499 return var;
500 }
501
502 virtual void handle_rvalue(ir_rvalue **pir)
503 {
504 if (this->in_assignee)
505 return;
506
507 if (!*pir)
508 return;
509
510 ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
511 if (needs_lowering(orig_deref)) {
512 ir_variable *var =
513 convert_dereference_array(orig_deref, NULL, orig_deref);
514 assert(var);
515 *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
516 this->progress = true;
517 }
518 }
519
520 ir_visitor_status
521 visit_leave(ir_assignment *ir)
522 {
523 ir_rvalue_visitor::visit_leave(ir);
524
525 find_variable_index f;
526 ir->lhs->accept(&f);
527
528 if (f.deref != NULL && storage_type_needs_lowering(f.deref)) {
529 convert_dereference_array(f.deref, ir, ir->lhs);
530 ir->remove();
531 this->progress = true;
532 }
533
534 return visit_continue;
535 }
536 };
537
538 } /* anonymous namespace */
539
540 bool
541 lower_variable_index_to_cond_assign(gl_shader_stage stage,
542 exec_list *instructions,
543 bool lower_input,
544 bool lower_output,
545 bool lower_temp,
546 bool lower_uniform)
547 {
548 variable_index_to_cond_assign_visitor v(stage,
549 lower_input,
550 lower_output,
551 lower_temp,
552 lower_uniform);
553
554 /* Continue lowering until no progress is made. If there are multiple
555 * levels of indirection (e.g., non-constant indexing of array elements and
556 * matrix columns of an array of matrix), each pass will only lower one
557 * level of indirection.
558 */
559 bool progress_ever = false;
560 do {
561 v.progress = false;
562 visit_list_elements(&v, instructions);
563 progress_ever = v.progress || progress_ever;
564 } while (v.progress);
565
566 return progress_ever;
567 }