2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
29 * This lowering pass converts references to variables with loads/stores to
30 * registers or inputs/outputs. We assume that structure splitting has already
31 * been run, or else structures with indirect references can't be split. We
32 * also assume that this pass will be consumed by a scalar backend, so we pack
33 * things more tightly.
39 type_size(const struct glsl_type
*type
)
43 switch (glsl_get_base_type(type
)) {
48 return glsl_get_components(type
);
50 return type_size(glsl_get_array_element(type
)) * glsl_get_length(type
);
51 case GLSL_TYPE_STRUCT
:
53 for (i
= 0; i
< glsl_get_length(type
); i
++) {
54 size
+= type_size(glsl_get_struct_elem_type(type
, i
));
57 case GLSL_TYPE_SAMPLER
:
59 case GLSL_TYPE_ATOMIC_UINT
:
61 case GLSL_TYPE_INTERFACE
:
67 unreachable("not reached");
74 * for inputs, outputs, and uniforms, assigns starting locations for variables
78 assign_var_locations(struct hash_table
*ht
)
80 unsigned location
= 0;
82 struct hash_entry
*entry
;
83 hash_table_foreach(ht
, entry
) {
84 nir_variable
*var
= (nir_variable
*) entry
->data
;
87 * UBO's have their own address spaces, so don't count them towards the
88 * number of global uniforms
90 if (var
->data
.mode
== nir_var_uniform
&& var
->interface_type
!= NULL
)
93 var
->data
.driver_location
= location
;
94 location
+= type_size(var
->type
);
99 assign_var_locations_shader(nir_shader
*shader
)
101 assign_var_locations(shader
->inputs
);
102 assign_var_locations(shader
->outputs
);
103 assign_var_locations(shader
->uniforms
);
107 init_reg(nir_variable
*var
, nir_register
*reg
, struct hash_table
*ht
,
110 if (!glsl_type_is_scalar(var
->type
) &&
111 !glsl_type_is_vector(var
->type
)) {
112 reg
->is_packed
= true;
113 reg
->num_components
= 1;
114 reg
->num_array_elems
= type_size(var
->type
);
116 reg
->num_components
= glsl_get_components(var
->type
);
119 reg
->name
= ralloc_strdup(reg
, var
->name
);
120 _mesa_hash_table_insert(ht
, var
, reg
);
123 static struct hash_table
*
124 init_var_ht(nir_shader
*shader
, bool lower_globals
, bool lower_io
,
127 struct hash_table
*ht
= _mesa_hash_table_create(NULL
,
129 _mesa_key_pointer_equal
);
132 foreach_list_typed(nir_variable
, var
, node
, &shader
->globals
) {
133 nir_register
*reg
= nir_global_reg_create(shader
);
134 init_reg(var
, reg
, ht
, add_names
);
139 struct hash_entry
*entry
;
140 hash_table_foreach(shader
->outputs
, entry
) {
141 nir_variable
*var
= (nir_variable
*) entry
->data
;
142 nir_register
*reg
= nir_global_reg_create(shader
);
143 init_reg(var
, reg
, ht
, add_names
);
147 nir_foreach_overload(shader
, overload
) {
148 if (overload
->impl
) {
149 nir_function_impl
*impl
= overload
->impl
;
151 foreach_list_typed(nir_variable
, var
, node
, &impl
->locals
) {
152 nir_register
*reg
= nir_local_reg_create(impl
);
153 init_reg(var
, reg
, ht
, add_names
);
162 deref_has_indirect(nir_deref_var
*deref_var
)
164 nir_deref
*deref
= &deref_var
->deref
;
166 while (deref
->child
!= NULL
) {
167 deref
= deref
->child
;
168 if (deref
->deref_type
== nir_deref_type_array
) {
169 nir_deref_array
*deref_array
= nir_deref_as_array(deref
);
170 if (deref_array
->has_indirect
)
179 get_deref_offset(nir_deref_var
*deref_var
, nir_instr
*instr
,
180 nir_function_impl
*impl
, bool native_integers
,
183 void *mem_ctx
= ralloc_parent(instr
);
185 bool first_indirect
= true;
187 unsigned base_offset
= 0;
188 nir_deref
*deref
= &deref_var
->deref
;
189 while (deref
->child
!= NULL
) {
190 const struct glsl_type
*parent_type
= deref
->type
;
191 deref
= deref
->child
;
193 if (deref
->deref_type
== nir_deref_type_array
) {
194 nir_deref_array
*deref_array
= nir_deref_as_array(deref
);
195 unsigned size
= type_size(deref
->type
);
197 base_offset
+= size
* deref_array
->base_offset
;
199 if (deref_array
->has_indirect
) {
202 src
= deref_array
->indirect
;
204 /* temp1 = size * deref_array->indirect */
206 nir_register
*const_reg
= nir_local_reg_create(impl
);
207 const_reg
->num_components
= 1;
209 nir_load_const_instr
*load_const
=
210 nir_load_const_instr_create(mem_ctx
);
211 load_const
->dest
.reg
.reg
= const_reg
;
212 load_const
->num_components
= 1;
213 load_const
->value
.u
[0] = size
;
214 nir_instr_insert_before(instr
, &load_const
->instr
);
216 nir_register
*reg
= nir_local_reg_create(impl
);
217 reg
->num_components
= 1;
224 nir_alu_instr
*mul_instr
= nir_alu_instr_create(mem_ctx
, op
);
225 mul_instr
->dest
.write_mask
= 1;
226 mul_instr
->dest
.dest
.reg
.reg
= reg
;
227 mul_instr
->src
[0].src
= deref_array
->indirect
;
228 mul_instr
->src
[1].src
.reg
.reg
= const_reg
;
229 nir_instr_insert_before(instr
, &mul_instr
->instr
);
233 src
.reg
.base_offset
= 0;
234 src
.reg
.indirect
= NULL
;
237 if (!first_indirect
) {
238 /* temp2 = indirect + temp1 */
240 nir_register
*reg
= nir_local_reg_create(impl
);
241 reg
->num_components
= 1;
248 nir_alu_instr
*add_instr
= nir_alu_instr_create(mem_ctx
, op
);
249 add_instr
->dest
.write_mask
= 1;
250 add_instr
->dest
.dest
.reg
.reg
= reg
;
251 add_instr
->src
[0].src
= *indirect
;
252 add_instr
->src
[1].src
= src
;
253 nir_instr_insert_before(instr
, &add_instr
->instr
);
257 src
.reg
.base_offset
= 0;
258 src
.reg
.indirect
= NULL
;
261 /* indirect = tempX */
263 first_indirect
= false;
266 nir_deref_struct
*deref_struct
= nir_deref_as_struct(deref
);
269 while(strcmp(glsl_get_struct_elem_name(parent_type
, i
),
270 deref_struct
->elem
) != 0) {
271 base_offset
+= type_size(glsl_get_struct_elem_type(parent_type
, i
));
281 * We cannot convert variables used in calls, so remove them from the hash
286 remove_call_vars_cb(nir_block
*block
, void *state
)
288 struct hash_table
*ht
= (struct hash_table
*) state
;
290 nir_foreach_instr(block
, instr
) {
291 if (instr
->type
== nir_instr_type_call
) {
292 nir_call_instr
*call
= nir_instr_as_call(instr
);
293 if (call
->return_deref
) {
294 struct hash_entry
*entry
=
295 _mesa_hash_table_search(ht
, call
->return_deref
->var
);
297 _mesa_hash_table_remove(ht
, entry
);
300 for (unsigned i
= 0; i
< call
->num_params
; i
++) {
301 struct hash_entry
*entry
=
302 _mesa_hash_table_search(ht
, call
->params
[i
]->var
);
304 _mesa_hash_table_remove(ht
, entry
);
313 remove_local_vars(nir_function_impl
*impl
, struct hash_table
*ht
)
315 if (impl
->return_var
) {
316 struct hash_entry
*entry
=
317 _mesa_hash_table_search(ht
, impl
->return_var
);
320 _mesa_hash_table_remove(ht
, entry
);
323 for (unsigned i
= 0; i
< impl
->num_params
; i
++) {
324 struct hash_entry
*entry
=
325 _mesa_hash_table_search(ht
, impl
->params
[i
]);
327 _mesa_hash_table_remove(ht
, entry
);
330 nir_foreach_block(impl
, remove_call_vars_cb
, ht
);
334 remove_local_vars_shader(nir_shader
*shader
, struct hash_table
*ht
)
336 nir_foreach_overload(shader
, overload
) {
338 remove_local_vars(overload
->impl
, ht
);
343 get_deref_tail(nir_deref
*deref
)
345 while (deref
->child
!= NULL
)
346 deref
= deref
->child
;
350 /* helper for reg_const_load which emits a single instruction */
352 reg_const_load_single_instr(nir_reg_dest reg
, nir_constant
*constant
,
353 unsigned num_components
, unsigned offset
,
354 nir_function_impl
*impl
, void *mem_ctx
)
356 nir_load_const_instr
*instr
= nir_load_const_instr_create(mem_ctx
);
357 instr
->num_components
= num_components
;
358 for (unsigned i
= 0; i
< num_components
; i
++) {
359 instr
->value
.u
[i
] = constant
->value
.u
[i
+ offset
];
361 instr
->dest
.reg
= reg
;
362 instr
->dest
.reg
.base_offset
+= offset
;
364 nir_instr_insert_before_cf_list(&impl
->body
, &instr
->instr
);
367 /* loads a constant value into a register */
369 reg_const_load(nir_reg_dest reg
, nir_constant
*constant
,
370 const struct glsl_type
*type
, nir_function_impl
*impl
,
374 const struct glsl_type
*subtype
;
375 unsigned subtype_size
;
377 switch (glsl_get_base_type(type
)) {
378 case GLSL_TYPE_FLOAT
:
382 if (glsl_type_is_matrix(type
)) {
383 for (unsigned i
= 0; i
< glsl_get_matrix_columns(type
); i
++) {
384 reg_const_load_single_instr(reg
, constant
,
385 glsl_get_vector_elements(type
),
386 i
* glsl_get_vector_elements(type
),
390 reg_const_load_single_instr(reg
, constant
,
391 glsl_get_vector_elements(type
), 0,
396 case GLSL_TYPE_STRUCT
:
397 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
398 const struct glsl_type
*field
= glsl_get_struct_elem_type(type
, i
);
399 nir_reg_dest new_reg
= reg
;
400 new_reg
.base_offset
+= offset
;
401 reg_const_load(new_reg
, constant
->elements
[i
], field
, impl
,
403 offset
+= type_size(field
);
407 case GLSL_TYPE_ARRAY
:
408 subtype
= glsl_get_array_element(type
);
409 subtype_size
= type_size(subtype
);
410 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
411 nir_reg_dest new_reg
= reg
;
412 new_reg
.base_offset
+= subtype_size
* i
;
413 reg_const_load(new_reg
, constant
->elements
[i
], subtype
, impl
,
424 /* recursively emits a register <-> dereference block copy */
426 var_reg_block_copy_impl(nir_reg_src reg
, nir_deref_var
*deref_head
,
427 nir_src
*predicate
, const struct glsl_type
*type
,
428 nir_instr
*after
, bool var_dest
, void *mem_ctx
)
432 switch (glsl_get_base_type(type
)) {
433 case GLSL_TYPE_FLOAT
:
437 if (glsl_type_is_matrix(type
)) {
438 for (unsigned i
= 0; i
< glsl_get_matrix_columns(type
); i
++) {
439 nir_deref_array
*deref_array
= nir_deref_array_create(mem_ctx
);
440 deref_array
->base_offset
= i
;
441 deref_array
->deref
.type
= glsl_get_column_type(type
);
443 nir_deref_var
*new_deref_head
=
444 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
445 get_deref_tail(&new_deref_head
->deref
)->child
=
448 nir_reg_src new_reg
= reg
;
449 new_reg
.base_offset
+= i
* glsl_get_vector_elements(type
);
451 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
452 glsl_get_column_type(type
), after
,
458 switch (glsl_get_vector_elements(type
)) {
459 case 1: op
= nir_intrinsic_store_var_vec1
; break;
460 case 2: op
= nir_intrinsic_store_var_vec2
; break;
461 case 3: op
= nir_intrinsic_store_var_vec3
; break;
462 case 4: op
= nir_intrinsic_store_var_vec4
; break;
463 default: assert(0); break;
466 nir_intrinsic_instr
*store
=
467 nir_intrinsic_instr_create(mem_ctx
, op
);
468 store
->variables
[0] = deref_head
;
469 store
->src
[0].reg
.reg
= reg
.reg
;
470 store
->src
[0].reg
.base_offset
= reg
.base_offset
;
472 store
->src
[0].reg
.indirect
= ralloc(mem_ctx
, nir_src
);
473 *store
->src
[0].reg
.indirect
= *reg
.indirect
;
477 store
->has_predicate
= true;
478 store
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
481 nir_instr_insert_before(after
, &store
->instr
);
484 switch (glsl_get_vector_elements(type
)) {
485 case 1: op
= nir_intrinsic_load_var_vec1
; break;
486 case 2: op
= nir_intrinsic_load_var_vec2
; break;
487 case 3: op
= nir_intrinsic_load_var_vec3
; break;
488 case 4: op
= nir_intrinsic_load_var_vec4
; break;
489 default: assert(0); break;
492 nir_intrinsic_instr
*load
=
493 nir_intrinsic_instr_create(mem_ctx
, op
);
494 load
->variables
[0] = deref_head
;
495 load
->dest
.reg
.reg
= reg
.reg
;
496 load
->dest
.reg
.base_offset
= reg
.base_offset
;
498 load
->dest
.reg
.indirect
= ralloc(mem_ctx
, nir_src
);
499 *load
->dest
.reg
.indirect
= *reg
.indirect
;
503 load
->has_predicate
= true;
504 load
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
507 nir_instr_insert_before(after
, &load
->instr
);
512 case GLSL_TYPE_STRUCT
:
514 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
515 const struct glsl_type
*field_type
=
516 glsl_get_struct_elem_type(type
, i
);
517 const char *field_name
= glsl_get_struct_elem_name(type
, i
);
519 nir_deref_struct
*deref_struct
=
520 nir_deref_struct_create(mem_ctx
, field_name
);
521 deref_struct
->deref
.type
= field_type
;
522 deref_struct
->elem
= field_name
;
524 nir_deref_var
*new_deref_head
=
525 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
526 get_deref_tail(&new_deref_head
->deref
)->child
=
527 &deref_struct
->deref
;
529 nir_reg_src new_reg
= reg
;
530 new_reg
.base_offset
+= offset
;
532 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
533 field_type
, after
, var_dest
, mem_ctx
);
535 offset
+= type_size(field_type
);
539 case GLSL_TYPE_ARRAY
:
540 for (unsigned i
= 0; i
< glsl_get_length(type
);
542 const struct glsl_type
*elem_type
= glsl_get_array_element(type
);
544 nir_deref_array
*deref_array
= nir_deref_array_create(mem_ctx
);
545 deref_array
->base_offset
= i
;
546 deref_array
->deref
.type
= elem_type
;
548 nir_deref_var
*new_deref_head
=
549 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
550 get_deref_tail(&new_deref_head
->deref
)->child
=
553 nir_reg_src new_reg
= reg
;
554 new_reg
.base_offset
+= i
* type_size(elem_type
);
556 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
557 elem_type
, after
, var_dest
, mem_ctx
);
566 static nir_intrinsic_op
567 get_load_op(nir_variable_mode mode
, bool indirect
, unsigned num_components
)
571 case nir_var_shader_in
:
572 switch (num_components
) {
573 case 1: return nir_intrinsic_load_input_vec1_indirect
;
574 case 2: return nir_intrinsic_load_input_vec2_indirect
;
575 case 3: return nir_intrinsic_load_input_vec3_indirect
;
576 case 4: return nir_intrinsic_load_input_vec4_indirect
;
577 default: assert(0); break;
581 case nir_var_uniform
:
582 switch (num_components
) {
583 case 1: return nir_intrinsic_load_uniform_vec1_indirect
;
584 case 2: return nir_intrinsic_load_uniform_vec2_indirect
;
585 case 3: return nir_intrinsic_load_uniform_vec3_indirect
;
586 case 4: return nir_intrinsic_load_uniform_vec4_indirect
;
587 default: assert(0); break;
597 case nir_var_shader_in
:
598 switch (num_components
) {
599 case 1: return nir_intrinsic_load_input_vec1
;
600 case 2: return nir_intrinsic_load_input_vec2
;
601 case 3: return nir_intrinsic_load_input_vec3
;
602 case 4: return nir_intrinsic_load_input_vec4
;
603 default: assert(0); break;
607 case nir_var_uniform
:
608 switch (num_components
) {
609 case 1: return nir_intrinsic_load_uniform_vec1
;
610 case 2: return nir_intrinsic_load_uniform_vec2
;
611 case 3: return nir_intrinsic_load_uniform_vec3
;
612 case 4: return nir_intrinsic_load_uniform_vec4
;
613 default: assert(0); break;
623 return nir_intrinsic_load_input_vec1
;
626 /* emits an input -> reg block copy */
629 reg_input_block_copy(nir_reg_dest dest
, unsigned src_index
, nir_src
*indirect
,
630 nir_src
*predicate
, unsigned size
,
631 unsigned num_components
, nir_variable_mode mode
,
632 nir_instr
*after
, void *mem_ctx
)
634 nir_intrinsic_op op
= get_load_op(mode
, indirect
!= NULL
, num_components
);
636 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(mem_ctx
, op
);
637 load
->const_index
[0] = src_index
;
638 load
->const_index
[1] = size
;
640 load
->src
[0] = *indirect
;
642 load
->has_predicate
= true;
643 load
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
645 load
->dest
.reg
= dest
;
646 nir_instr_insert_before(after
, &load
->instr
);
649 /* emits a variable/input -> register block copy */
652 var_reg_block_copy(nir_deref_var
*src
, nir_reg_dest dest
, nir_src
*predicate
,
653 bool lower_io
, nir_instr
*after
, nir_function_impl
*impl
,
654 bool native_integers
, void *mem_ctx
)
656 const struct glsl_type
*src_type
= get_deref_tail(&src
->deref
)->type
;
658 if (lower_io
&& (src
->var
->data
.mode
== nir_var_shader_in
||
659 src
->var
->data
.mode
== nir_var_uniform
)) {
660 unsigned size
, num_components
;
661 if (glsl_type_is_scalar(src_type
) || glsl_type_is_vector(src_type
)) {
662 num_components
= glsl_get_vector_elements(src_type
);
666 size
= type_size(src_type
);
668 bool has_indirect
= deref_has_indirect(src
);
670 nir_src
*indirect_ptr
= has_indirect
? &indirect
: NULL
;
671 unsigned offset
= get_deref_offset(src
, after
, impl
, native_integers
,
673 offset
+= src
->var
->data
.driver_location
;
675 reg_input_block_copy(dest
, offset
, indirect_ptr
, predicate
, size
,
676 num_components
, src
->var
->data
.mode
, after
,
681 reg
.base_offset
= dest
.base_offset
;
682 reg
.indirect
= dest
.indirect
;
684 var_reg_block_copy_impl(reg
, src
, predicate
, src_type
, after
, false,
689 /* emits a register -> variable copy */
691 reg_var_block_copy(nir_reg_src src
, nir_deref_var
*dest
, nir_src
*predicate
,
692 nir_instr
*after
, void *mem_ctx
)
694 const struct glsl_type
*dest_type
= get_deref_tail(&dest
->deref
)->type
;
696 var_reg_block_copy_impl(src
, dest
, predicate
, dest_type
, after
, true,
701 * emits an input -> variable block copy using an intermediate register
704 var_var_block_copy(nir_deref_var
*src
, nir_deref_var
*dest
, nir_src
*predicate
,
705 nir_instr
*after
, nir_function_impl
*impl
,
706 bool native_integers
, void *mem_ctx
)
708 const struct glsl_type
*type
= get_deref_tail(&dest
->deref
)->type
;
709 nir_register
*reg
= nir_local_reg_create(impl
);
710 if (glsl_type_is_scalar(type
) || glsl_type_is_vector(type
)) {
711 reg
->num_components
= glsl_get_vector_elements(type
);
713 reg
->is_packed
= true;
714 reg
->num_components
= 1;
715 reg
->num_array_elems
= type_size(type
);
719 reg_src
.base_offset
= 0;
720 reg_src
.indirect
= NULL
;
723 nir_reg_dest reg_dest
;
724 reg_dest
.base_offset
= 0;
725 reg_dest
.indirect
= NULL
;
728 var_reg_block_copy(src
, reg_dest
, predicate
, true, after
, impl
,
729 native_integers
, mem_ctx
);
730 reg_var_block_copy(reg_src
, dest
, predicate
, after
, mem_ctx
);
733 /* emits a register -> register block copy */
735 reg_reg_block_copy(nir_reg_dest dest
, nir_reg_src src
, nir_src
*predicate
,
736 const struct glsl_type
*type
, nir_instr
*after
,
739 if (!dest
.reg
->is_packed
&& !src
.reg
->is_packed
)
740 assert(dest
.reg
->num_components
== src
.reg
->num_components
);
742 unsigned size
, num_components
;
743 if (dest
.reg
->is_packed
&& src
.reg
->is_packed
) {
744 size
= type_size(type
);
748 if (dest
.reg
->is_packed
)
749 num_components
= src
.reg
->num_components
;
751 num_components
= dest
.reg
->num_components
;
754 for (unsigned i
= 0; i
< size
; i
++) {
755 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
756 move
->dest
.write_mask
= (1 << num_components
) - 1;
758 move
->dest
.dest
.reg
.reg
= dest
.reg
;
759 move
->dest
.dest
.reg
.base_offset
= dest
.base_offset
+ i
;
760 if (dest
.indirect
!= NULL
) {
761 move
->dest
.dest
.reg
.indirect
= ralloc(mem_ctx
, nir_src
);
762 *move
->dest
.dest
.reg
.indirect
= *dest
.indirect
;
766 move
->has_predicate
= true;
767 move
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
770 move
->src
[0].src
.reg
= src
;
771 move
->src
[0].src
.reg
.base_offset
+= i
;
773 nir_instr_insert_before(after
, &move
->instr
);
778 create_dest(nir_deref_var
*deref
, nir_instr
*instr
, nir_register
*reg
,
779 nir_function_impl
*impl
, bool native_integers
, void *mem_ctx
)
782 if (deref_has_indirect(deref
)) {
783 dest
.indirect
= ralloc(mem_ctx
, nir_src
);
784 dest
.indirect
->is_ssa
= false;
785 dest
.base_offset
= get_deref_offset(deref
, instr
,
786 impl
, native_integers
,
789 dest
.base_offset
= get_deref_offset(deref
, instr
,
790 impl
, native_integers
, NULL
);
791 dest
.indirect
= NULL
;
799 create_src(nir_deref_var
*deref
, nir_instr
*instr
, nir_register
*reg
,
800 nir_function_impl
*impl
, bool native_integers
, void *mem_ctx
)
803 if (deref_has_indirect(deref
)) {
804 src
.indirect
= ralloc(mem_ctx
, nir_src
);
805 src
.indirect
->is_ssa
= false;
806 src
.base_offset
= get_deref_offset(deref
, instr
,
807 impl
, native_integers
,
810 src
.base_offset
= get_deref_offset(deref
, instr
,
811 impl
, native_integers
, NULL
);
820 handle_var_copy(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
821 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
823 void *mem_ctx
= ralloc_parent(instr
);
825 struct hash_entry
*entry
;
827 nir_variable
*dest_var
= instr
->variables
[0]->var
;
828 nir_variable
*src_var
= instr
->variables
[1]->var
;
830 const struct glsl_type
*type
=
831 get_deref_tail(&instr
->variables
[0]->deref
)->type
;
833 nir_src
*predicate
= instr
->has_predicate
? &instr
->predicate
: NULL
;
836 * The source can be either:
837 * 1. a variable we're lowering to a register
838 * 2. an input or uniform we're lowering to loads from an index
839 * 3. a variable we can't lower yet
841 * and similarly, the destination can be either:
842 * 1. a variable we're lowering to a register
843 * 2. a variable we can't lower yet
845 * meaning that there are six cases, including the trivial one (where
846 * source and destination are #3 and #2 respectively) where we can't do
850 entry
= _mesa_hash_table_search(ht
, dest_var
);
852 nir_reg_dest dest
= create_dest(instr
->variables
[0], &instr
->instr
,
853 (nir_register
*) entry
->data
, impl
,
854 native_integers
, mem_ctx
);
856 entry
= _mesa_hash_table_search(ht
, src_var
);
858 nir_reg_src src
= create_src(instr
->variables
[1], &instr
->instr
,
859 (nir_register
*) entry
->data
, impl
,
860 native_integers
, mem_ctx
);
862 reg_reg_block_copy(dest
, src
, predicate
, type
, &instr
->instr
, mem_ctx
);
864 var_reg_block_copy(instr
->variables
[1], dest
, predicate
, lower_io
,
865 &instr
->instr
, impl
, native_integers
, mem_ctx
);
868 entry
= _mesa_hash_table_search(ht
, src_var
);
870 nir_reg_src src
= create_src(instr
->variables
[1], &instr
->instr
,
871 (nir_register
*) entry
->data
, impl
,
872 native_integers
, mem_ctx
);
874 reg_var_block_copy(src
, instr
->variables
[0], predicate
, &instr
->instr
,
877 if (!lower_io
|| (src_var
->data
.mode
!= nir_var_shader_in
&&
878 src_var
->data
.mode
!= nir_var_uniform
)) {
879 /* nothing to do here */
883 var_var_block_copy(instr
->variables
[1], instr
->variables
[0], predicate
,
884 &instr
->instr
, impl
, native_integers
, mem_ctx
);
888 nir_instr_remove(&instr
->instr
);
892 handle_var_load(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
893 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
895 void *mem_ctx
= ralloc_parent(instr
);
897 struct hash_entry
*entry
=
898 _mesa_hash_table_search(ht
, instr
->variables
[0]->var
);
901 nir_variable
*src_var
= instr
->variables
[0]->var
;
903 if (lower_io
&& (src_var
->data
.mode
== nir_var_shader_in
||
904 src_var
->data
.mode
== nir_var_uniform
)) {
905 bool has_indirect
= deref_has_indirect(instr
->variables
[0]);
906 unsigned num_components
=
907 nir_intrinsic_infos
[instr
->intrinsic
].dest_components
;
909 unsigned offset
= get_deref_offset(instr
->variables
[0], &instr
->instr
,
910 impl
, native_integers
, &indirect
);
911 offset
+= src_var
->data
.driver_location
;
913 nir_intrinsic_op op
= get_load_op(src_var
->data
.mode
, has_indirect
,
915 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(mem_ctx
, op
);
916 load
->dest
= instr
->dest
;
917 load
->const_index
[0] = (int) offset
;
918 load
->const_index
[1] = 1;
920 load
->src
[0] = indirect
;
922 if (instr
->has_predicate
) {
923 load
->has_predicate
= true;
924 load
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
927 nir_instr_insert_before(&instr
->instr
, &load
->instr
);
932 nir_register
*reg
= (nir_register
*) entry
->data
;
934 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
935 unsigned dest_components
=
936 nir_intrinsic_infos
[instr
->intrinsic
].dest_components
;
937 move
->dest
.dest
= instr
->dest
;
938 move
->dest
.write_mask
= (1 << dest_components
) - 1;
939 move
->src
[0].src
.reg
= create_src(instr
->variables
[0], &instr
->instr
,
940 reg
, impl
, native_integers
, mem_ctx
);
941 if (instr
->has_predicate
) {
942 move
->has_predicate
= true;
943 move
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
945 nir_instr_insert_before(&instr
->instr
, &move
->instr
);
948 nir_instr_remove(&instr
->instr
);
952 handle_var_store(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
953 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
955 void *mem_ctx
= ralloc_parent(instr
);
957 struct hash_entry
*entry
=
958 _mesa_hash_table_search(ht
, instr
->variables
[0]->var
);
962 nir_register
*reg
= (nir_register
*) entry
->data
;
964 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
965 unsigned src_components
=
966 nir_intrinsic_infos
[instr
->intrinsic
].src_components
[0];
967 move
->dest
.dest
.reg
= create_dest(instr
->variables
[0], &instr
->instr
,
968 reg
, impl
, native_integers
, mem_ctx
);
969 move
->dest
.write_mask
= (1 << src_components
) - 1;
970 move
->src
[0].src
= instr
->src
[0];
971 if (instr
->has_predicate
) {
972 move
->has_predicate
= true;
973 move
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
975 nir_instr_insert_before(&instr
->instr
, &move
->instr
);
976 nir_instr_remove(&instr
->instr
);
980 struct hash_table
*ht
;
981 bool native_integers
, lower_io
;
982 nir_function_impl
*impl
;
986 rewrite_block_cb(nir_block
*block
, void *_state
)
988 rewrite_state
*state
= (rewrite_state
*) _state
;
990 nir_foreach_instr_safe(block
, instr
) {
991 if (instr
->type
== nir_instr_type_intrinsic
) {
992 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
993 switch (intrin
->intrinsic
) {
994 case nir_intrinsic_load_var_vec1
:
995 case nir_intrinsic_load_var_vec2
:
996 case nir_intrinsic_load_var_vec3
:
997 case nir_intrinsic_load_var_vec4
:
998 handle_var_load(intrin
, state
->impl
, state
->native_integers
,
999 state
->lower_io
, state
->ht
);
1002 case nir_intrinsic_store_var_vec1
:
1003 case nir_intrinsic_store_var_vec2
:
1004 case nir_intrinsic_store_var_vec3
:
1005 case nir_intrinsic_store_var_vec4
:
1006 handle_var_store(intrin
, state
->impl
, state
->native_integers
,
1007 state
->lower_io
, state
->ht
);
1010 case nir_intrinsic_copy_var
:
1011 handle_var_copy(intrin
, state
->impl
, state
->native_integers
,
1012 state
->lower_io
, state
->ht
);
1025 rewrite_impl(nir_function_impl
*impl
, struct hash_table
*ht
,
1026 bool native_integers
, bool lower_io
)
1028 rewrite_state state
;
1030 state
.native_integers
= native_integers
;
1031 state
.lower_io
= lower_io
;
1034 nir_foreach_block(impl
, rewrite_block_cb
, &state
);
1038 insert_load_const_impl(nir_function_impl
*impl
, struct exec_list
*vars
,
1039 struct hash_table
*ht
)
1041 void *mem_ctx
= ralloc_parent(impl
);
1043 foreach_list_typed(nir_variable
, var
, node
, vars
) {
1044 if (var
->constant_initializer
== NULL
)
1047 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
1049 nir_register
*reg
= (nir_register
*) entry
->data
;
1052 dest
.base_offset
= 0;
1053 dest
.indirect
= NULL
;
1054 reg_const_load(dest
, var
->constant_initializer
, var
->type
, impl
,
1060 static nir_intrinsic_op
1061 get_store_op(bool indirect
, unsigned num_components
)
1064 switch (num_components
) {
1065 case 1: return nir_intrinsic_store_output_vec1_indirect
;
1066 case 2: return nir_intrinsic_store_output_vec2_indirect
;
1067 case 3: return nir_intrinsic_store_output_vec3_indirect
;
1068 case 4: return nir_intrinsic_store_output_vec4_indirect
;
1069 default: assert(0); break;
1072 switch (num_components
) {
1073 case 1: return nir_intrinsic_store_output_vec1
;
1074 case 2: return nir_intrinsic_store_output_vec2
;
1075 case 3: return nir_intrinsic_store_output_vec3
;
1076 case 4: return nir_intrinsic_store_output_vec4
;
1077 default: assert(0); break;
1081 return nir_intrinsic_store_output_vec1
;
1084 /* emits a reg -> output block copy after a block */
1086 reg_output_block_copy_block(nir_reg_src src
, unsigned dest_index
,
1087 unsigned num_components
, unsigned size
,
1088 nir_block
*block
, void *mem_ctx
)
1090 nir_intrinsic_op op
= get_store_op(false, num_components
);
1092 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(mem_ctx
, op
);
1093 store
->const_index
[0] = dest_index
;
1094 store
->const_index
[1] = (size
== 0) ? 1 : size
;
1095 store
->src
[0].reg
= src
;
1096 nir_instr_insert_after_block(block
, &store
->instr
);
1099 /* emits a reg -> output copy after an instruction */
1101 reg_output_block_copy_instr(nir_reg_src src
, unsigned dest_index
,
1102 unsigned num_components
, unsigned size
,
1103 nir_instr
*after
, void *mem_ctx
)
1105 nir_intrinsic_op op
= get_store_op(false, num_components
);
1107 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(mem_ctx
, op
);
1108 store
->const_index
[0] = dest_index
;
1109 store
->const_index
[1] = (size
== 0) ? 1 : size
;
1110 store
->src
[0].reg
= src
;
1111 nir_instr_insert_before(after
, &store
->instr
);
1114 static nir_function_impl
*
1115 find_main(nir_shader
*shader
)
1117 foreach_list_typed(nir_function
, func
, node
, &shader
->functions
) {
1118 if (strcmp(func
->name
, "main") == 0) {
1119 assert(exec_list_length(&func
->overload_list
) == 1);
1120 nir_function_overload
*overload
= nir_function_first_overload(func
);
1121 return overload
->impl
;
1130 insert_output_reg_copies(nir_shader
*shader
, nir_block
*block
,
1131 nir_instr
*after
, struct hash_table
*ht
)
1133 struct hash_entry
*entry
;
1134 hash_table_foreach(shader
->outputs
, entry
) {
1135 nir_variable
*var
= (nir_variable
*) entry
->data
;
1137 struct hash_entry
*entry2
;
1138 entry2
= _mesa_hash_table_search(ht
, var
);
1140 nir_register
*reg
= (nir_register
*) entry2
->data
;
1143 src
.base_offset
= 0;
1144 src
.indirect
= NULL
;
1147 reg_output_block_copy_instr(src
, var
->data
.driver_location
,
1148 reg
->num_components
,
1149 reg
->num_array_elems
,
1152 reg_output_block_copy_block(src
, var
->data
.driver_location
,
1153 reg
->num_components
,
1154 reg
->num_array_elems
,
1162 struct hash_table
*ht
;
1164 bool found_emit_vertex
;
1168 insert_output_reg_copies_emit_vertex(nir_block
*block
, void *_state
)
1170 reg_output_state
*state
= (reg_output_state
*) _state
;
1172 nir_foreach_instr(block
, instr
) {
1173 if (instr
->type
== nir_instr_type_intrinsic
) {
1174 nir_intrinsic_instr
*intrin_instr
= nir_instr_as_intrinsic(instr
);
1175 if (intrin_instr
->intrinsic
== nir_intrinsic_emit_vertex
) {
1176 insert_output_reg_copies(state
->shader
, NULL
, instr
, state
->ht
);
1177 state
->found_emit_vertex
= true;
1186 insert_output_reg_copies_shader(nir_shader
*shader
, struct hash_table
*ht
)
1188 nir_function_impl
*main_impl
= find_main(shader
);
1190 reg_output_state state
;
1191 state
.shader
= shader
;
1193 state
.found_emit_vertex
= false;
1194 nir_foreach_block(main_impl
, insert_output_reg_copies_emit_vertex
, &state
);
1196 if (!state
.found_emit_vertex
) {
1197 struct set_entry
*entry
;
1198 set_foreach(main_impl
->end_block
->predecessors
, entry
) {
1199 nir_block
*block
= (nir_block
*) entry
->key
;
1200 insert_output_reg_copies(shader
, block
, NULL
, ht
);
1206 rewrite_shader(nir_shader
*shader
, struct hash_table
*ht
, bool native_integers
,
1207 bool lower_globals
, bool lower_io
)
1209 nir_foreach_overload(shader
, overload
) {
1210 if (overload
->impl
) {
1211 insert_load_const_impl(overload
->impl
, &overload
->impl
->locals
, ht
);
1212 if (lower_globals
&& strcmp(overload
->function
->name
, "main") == 0)
1213 insert_load_const_impl(overload
->impl
, &shader
->globals
, ht
);
1214 rewrite_impl(overload
->impl
, ht
, native_integers
, lower_io
);
1220 nir_lower_variables_scalar(nir_shader
*shader
, bool lower_globals
,
1221 bool lower_io
, bool add_names
, bool native_integers
)
1224 assign_var_locations_shader(shader
);
1225 struct hash_table
*ht
= init_var_ht(shader
, lower_globals
, lower_io
,
1227 remove_local_vars_shader(shader
, ht
);
1228 rewrite_shader(shader
, ht
, native_integers
, lower_globals
, lower_io
);
1230 insert_output_reg_copies_shader(shader
, ht
);
1231 _mesa_hash_table_destroy(ht
, NULL
);