2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
29 * This lowering pass converts references to variables with loads/stores to
30 * registers or inputs/outputs. We assume that structure splitting has already
31 * been run, or else structures with indirect references can't be split. We
32 * also assume that this pass will be consumed by a scalar backend, so we pack
33 * things more tightly.
39 type_size(const struct glsl_type
*type
)
43 switch (glsl_get_base_type(type
)) {
48 return glsl_get_components(type
);
50 return type_size(glsl_get_array_element(type
)) * glsl_get_length(type
);
51 case GLSL_TYPE_STRUCT
:
53 for (i
= 0; i
< glsl_get_length(type
); i
++) {
54 size
+= type_size(glsl_get_struct_elem_type(type
, i
));
57 case GLSL_TYPE_SAMPLER
:
59 case GLSL_TYPE_ATOMIC_UINT
:
61 case GLSL_TYPE_INTERFACE
:
67 unreachable("not reached");
74 * for inputs, outputs, and uniforms, assigns starting locations for variables
78 assign_var_locations(struct hash_table
*ht
, unsigned *size
)
80 unsigned location
= 0;
82 struct hash_entry
*entry
;
83 hash_table_foreach(ht
, entry
) {
84 nir_variable
*var
= (nir_variable
*) entry
->data
;
87 * UBO's have their own address spaces, so don't count them towards the
88 * number of global uniforms
90 if (var
->data
.mode
== nir_var_uniform
&& var
->interface_type
!= NULL
)
93 var
->data
.driver_location
= location
;
94 location
+= type_size(var
->type
);
101 assign_var_locations_shader(nir_shader
*shader
)
103 assign_var_locations(shader
->inputs
, &shader
->num_inputs
);
104 assign_var_locations(shader
->outputs
, &shader
->num_outputs
);
105 assign_var_locations(shader
->uniforms
, &shader
->num_uniforms
);
109 init_reg(nir_variable
*var
, nir_register
*reg
, struct hash_table
*ht
,
112 if (!glsl_type_is_scalar(var
->type
) &&
113 !glsl_type_is_vector(var
->type
)) {
114 reg
->is_packed
= true;
115 reg
->num_components
= 1;
116 reg
->num_array_elems
= type_size(var
->type
);
118 reg
->num_components
= glsl_get_components(var
->type
);
121 reg
->name
= ralloc_strdup(reg
, var
->name
);
122 _mesa_hash_table_insert(ht
, var
, reg
);
125 static struct hash_table
*
126 init_var_ht(nir_shader
*shader
, bool lower_globals
, bool lower_io
,
129 struct hash_table
*ht
= _mesa_hash_table_create(NULL
,
131 _mesa_key_pointer_equal
);
134 foreach_list_typed(nir_variable
, var
, node
, &shader
->globals
) {
135 nir_register
*reg
= nir_global_reg_create(shader
);
136 init_reg(var
, reg
, ht
, add_names
);
141 struct hash_entry
*entry
;
142 hash_table_foreach(shader
->outputs
, entry
) {
143 nir_variable
*var
= (nir_variable
*) entry
->data
;
144 nir_register
*reg
= nir_global_reg_create(shader
);
145 init_reg(var
, reg
, ht
, add_names
);
149 nir_foreach_overload(shader
, overload
) {
150 if (overload
->impl
) {
151 nir_function_impl
*impl
= overload
->impl
;
153 foreach_list_typed(nir_variable
, var
, node
, &impl
->locals
) {
154 nir_register
*reg
= nir_local_reg_create(impl
);
155 init_reg(var
, reg
, ht
, add_names
);
164 deref_has_indirect(nir_deref_var
*deref_var
)
166 nir_deref
*deref
= &deref_var
->deref
;
168 while (deref
->child
!= NULL
) {
169 deref
= deref
->child
;
170 if (deref
->deref_type
== nir_deref_type_array
) {
171 nir_deref_array
*deref_array
= nir_deref_as_array(deref
);
172 if (deref_array
->has_indirect
)
181 get_deref_offset(nir_deref_var
*deref_var
, nir_instr
*instr
,
182 nir_function_impl
*impl
, bool native_integers
,
185 void *mem_ctx
= ralloc_parent(instr
);
187 bool first_indirect
= true;
189 unsigned base_offset
= 0;
190 nir_deref
*deref
= &deref_var
->deref
;
191 while (deref
->child
!= NULL
) {
192 const struct glsl_type
*parent_type
= deref
->type
;
193 deref
= deref
->child
;
195 if (deref
->deref_type
== nir_deref_type_array
) {
196 nir_deref_array
*deref_array
= nir_deref_as_array(deref
);
197 unsigned size
= type_size(deref
->type
);
199 base_offset
+= size
* deref_array
->base_offset
;
201 if (deref_array
->has_indirect
) {
204 src
= deref_array
->indirect
;
206 /* temp1 = size * deref_array->indirect */
208 nir_register
*const_reg
= nir_local_reg_create(impl
);
209 const_reg
->num_components
= 1;
211 nir_load_const_instr
*load_const
=
212 nir_load_const_instr_create(mem_ctx
);
213 load_const
->dest
.reg
.reg
= const_reg
;
214 load_const
->num_components
= 1;
215 load_const
->value
.u
[0] = size
;
216 nir_instr_insert_before(instr
, &load_const
->instr
);
218 nir_register
*reg
= nir_local_reg_create(impl
);
219 reg
->num_components
= 1;
226 nir_alu_instr
*mul_instr
= nir_alu_instr_create(mem_ctx
, op
);
227 mul_instr
->dest
.write_mask
= 1;
228 mul_instr
->dest
.dest
.reg
.reg
= reg
;
229 mul_instr
->src
[0].src
= deref_array
->indirect
;
230 mul_instr
->src
[1].src
.reg
.reg
= const_reg
;
231 nir_instr_insert_before(instr
, &mul_instr
->instr
);
235 src
.reg
.base_offset
= 0;
236 src
.reg
.indirect
= NULL
;
239 if (!first_indirect
) {
240 /* temp2 = indirect + temp1 */
242 nir_register
*reg
= nir_local_reg_create(impl
);
243 reg
->num_components
= 1;
250 nir_alu_instr
*add_instr
= nir_alu_instr_create(mem_ctx
, op
);
251 add_instr
->dest
.write_mask
= 1;
252 add_instr
->dest
.dest
.reg
.reg
= reg
;
253 add_instr
->src
[0].src
= *indirect
;
254 add_instr
->src
[1].src
= src
;
255 nir_instr_insert_before(instr
, &add_instr
->instr
);
259 src
.reg
.base_offset
= 0;
260 src
.reg
.indirect
= NULL
;
263 /* indirect = tempX */
265 first_indirect
= false;
268 nir_deref_struct
*deref_struct
= nir_deref_as_struct(deref
);
271 while(strcmp(glsl_get_struct_elem_name(parent_type
, i
),
272 deref_struct
->elem
) != 0) {
273 base_offset
+= type_size(glsl_get_struct_elem_type(parent_type
, i
));
283 * We cannot convert variables used in calls, so remove them from the hash
288 remove_call_vars_cb(nir_block
*block
, void *state
)
290 struct hash_table
*ht
= (struct hash_table
*) state
;
292 nir_foreach_instr(block
, instr
) {
293 if (instr
->type
== nir_instr_type_call
) {
294 nir_call_instr
*call
= nir_instr_as_call(instr
);
295 if (call
->return_deref
) {
296 struct hash_entry
*entry
=
297 _mesa_hash_table_search(ht
, call
->return_deref
->var
);
299 _mesa_hash_table_remove(ht
, entry
);
302 for (unsigned i
= 0; i
< call
->num_params
; i
++) {
303 struct hash_entry
*entry
=
304 _mesa_hash_table_search(ht
, call
->params
[i
]->var
);
306 _mesa_hash_table_remove(ht
, entry
);
315 remove_local_vars(nir_function_impl
*impl
, struct hash_table
*ht
)
317 if (impl
->return_var
) {
318 struct hash_entry
*entry
=
319 _mesa_hash_table_search(ht
, impl
->return_var
);
322 _mesa_hash_table_remove(ht
, entry
);
325 for (unsigned i
= 0; i
< impl
->num_params
; i
++) {
326 struct hash_entry
*entry
=
327 _mesa_hash_table_search(ht
, impl
->params
[i
]);
329 _mesa_hash_table_remove(ht
, entry
);
332 nir_foreach_block(impl
, remove_call_vars_cb
, ht
);
336 remove_local_vars_shader(nir_shader
*shader
, struct hash_table
*ht
)
338 nir_foreach_overload(shader
, overload
) {
340 remove_local_vars(overload
->impl
, ht
);
345 get_deref_tail(nir_deref
*deref
)
347 while (deref
->child
!= NULL
)
348 deref
= deref
->child
;
352 /* helper for reg_const_load which emits a single instruction */
354 reg_const_load_single_instr(nir_reg_dest reg
, nir_constant
*constant
,
355 enum glsl_base_type base_type
,
356 unsigned num_components
, unsigned offset
,
357 nir_function_impl
*impl
, void *mem_ctx
)
359 nir_load_const_instr
*instr
= nir_load_const_instr_create(mem_ctx
);
360 instr
->num_components
= num_components
;
361 for (unsigned i
= 0; i
< num_components
; i
++) {
363 case GLSL_TYPE_FLOAT
:
366 instr
->value
.u
[i
] = constant
->value
.u
[i
+ offset
];
369 instr
->value
.u
[i
] = constant
->value
.u
[i
+ offset
] ?
370 NIR_TRUE
: NIR_FALSE
;
373 unreachable("Invalid immediate type");
376 instr
->dest
.reg
= reg
;
377 instr
->dest
.reg
.base_offset
+= offset
;
379 nir_instr_insert_before_cf_list(&impl
->body
, &instr
->instr
);
382 /* loads a constant value into a register */
384 reg_const_load(nir_reg_dest reg
, nir_constant
*constant
,
385 const struct glsl_type
*type
, nir_function_impl
*impl
,
389 const struct glsl_type
*subtype
;
390 unsigned subtype_size
;
392 enum glsl_base_type base_type
= glsl_get_base_type(type
);
394 case GLSL_TYPE_FLOAT
:
398 if (glsl_type_is_matrix(type
)) {
399 for (unsigned i
= 0; i
< glsl_get_matrix_columns(type
); i
++) {
400 reg_const_load_single_instr(reg
, constant
, base_type
,
401 glsl_get_vector_elements(type
),
402 i
* glsl_get_vector_elements(type
),
406 reg_const_load_single_instr(reg
, constant
, base_type
,
407 glsl_get_vector_elements(type
), 0,
412 case GLSL_TYPE_STRUCT
:
413 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
414 const struct glsl_type
*field
= glsl_get_struct_elem_type(type
, i
);
415 nir_reg_dest new_reg
= reg
;
416 new_reg
.base_offset
+= offset
;
417 reg_const_load(new_reg
, constant
->elements
[i
], field
, impl
,
419 offset
+= type_size(field
);
423 case GLSL_TYPE_ARRAY
:
424 subtype
= glsl_get_array_element(type
);
425 subtype_size
= type_size(subtype
);
426 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
427 nir_reg_dest new_reg
= reg
;
428 new_reg
.base_offset
+= subtype_size
* i
;
429 reg_const_load(new_reg
, constant
->elements
[i
], subtype
, impl
,
440 /* recursively emits a register <-> dereference block copy */
442 var_reg_block_copy_impl(nir_reg_src reg
, nir_deref_var
*deref_head
,
443 nir_src
*predicate
, const struct glsl_type
*type
,
444 nir_instr
*after
, bool var_dest
, void *mem_ctx
)
448 switch (glsl_get_base_type(type
)) {
449 case GLSL_TYPE_FLOAT
:
453 if (glsl_type_is_matrix(type
)) {
454 for (unsigned i
= 0; i
< glsl_get_matrix_columns(type
); i
++) {
455 nir_deref_array
*deref_array
= nir_deref_array_create(mem_ctx
);
456 deref_array
->base_offset
= i
;
457 deref_array
->deref
.type
= glsl_get_column_type(type
);
459 nir_deref_var
*new_deref_head
=
460 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
461 get_deref_tail(&new_deref_head
->deref
)->child
=
464 nir_reg_src new_reg
= reg
;
465 new_reg
.base_offset
+= i
* glsl_get_vector_elements(type
);
467 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
468 glsl_get_column_type(type
), after
,
474 switch (glsl_get_vector_elements(type
)) {
475 case 1: op
= nir_intrinsic_store_var_vec1
; break;
476 case 2: op
= nir_intrinsic_store_var_vec2
; break;
477 case 3: op
= nir_intrinsic_store_var_vec3
; break;
478 case 4: op
= nir_intrinsic_store_var_vec4
; break;
479 default: assert(0); break;
482 nir_intrinsic_instr
*store
=
483 nir_intrinsic_instr_create(mem_ctx
, op
);
484 store
->variables
[0] = deref_head
;
485 store
->src
[0].reg
.reg
= reg
.reg
;
486 store
->src
[0].reg
.base_offset
= reg
.base_offset
;
488 store
->src
[0].reg
.indirect
= ralloc(mem_ctx
, nir_src
);
489 *store
->src
[0].reg
.indirect
= *reg
.indirect
;
493 store
->has_predicate
= true;
494 store
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
497 nir_instr_insert_before(after
, &store
->instr
);
500 switch (glsl_get_vector_elements(type
)) {
501 case 1: op
= nir_intrinsic_load_var_vec1
; break;
502 case 2: op
= nir_intrinsic_load_var_vec2
; break;
503 case 3: op
= nir_intrinsic_load_var_vec3
; break;
504 case 4: op
= nir_intrinsic_load_var_vec4
; break;
505 default: assert(0); break;
508 nir_intrinsic_instr
*load
=
509 nir_intrinsic_instr_create(mem_ctx
, op
);
510 load
->variables
[0] = deref_head
;
511 load
->dest
.reg
.reg
= reg
.reg
;
512 load
->dest
.reg
.base_offset
= reg
.base_offset
;
514 load
->dest
.reg
.indirect
= ralloc(mem_ctx
, nir_src
);
515 *load
->dest
.reg
.indirect
= *reg
.indirect
;
519 load
->has_predicate
= true;
520 load
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
523 nir_instr_insert_before(after
, &load
->instr
);
528 case GLSL_TYPE_STRUCT
:
530 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
531 const struct glsl_type
*field_type
=
532 glsl_get_struct_elem_type(type
, i
);
533 const char *field_name
= glsl_get_struct_elem_name(type
, i
);
535 nir_deref_struct
*deref_struct
=
536 nir_deref_struct_create(mem_ctx
, field_name
);
537 deref_struct
->deref
.type
= field_type
;
538 deref_struct
->elem
= field_name
;
540 nir_deref_var
*new_deref_head
=
541 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
542 get_deref_tail(&new_deref_head
->deref
)->child
=
543 &deref_struct
->deref
;
545 nir_reg_src new_reg
= reg
;
546 new_reg
.base_offset
+= offset
;
548 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
549 field_type
, after
, var_dest
, mem_ctx
);
551 offset
+= type_size(field_type
);
555 case GLSL_TYPE_ARRAY
:
556 for (unsigned i
= 0; i
< glsl_get_length(type
);
558 const struct glsl_type
*elem_type
= glsl_get_array_element(type
);
560 nir_deref_array
*deref_array
= nir_deref_array_create(mem_ctx
);
561 deref_array
->base_offset
= i
;
562 deref_array
->deref
.type
= elem_type
;
564 nir_deref_var
*new_deref_head
=
565 nir_deref_as_var(nir_copy_deref(mem_ctx
, &deref_head
->deref
));
566 get_deref_tail(&new_deref_head
->deref
)->child
=
569 nir_reg_src new_reg
= reg
;
570 new_reg
.base_offset
+= i
* type_size(elem_type
);
572 var_reg_block_copy_impl(new_reg
, new_deref_head
, predicate
,
573 elem_type
, after
, var_dest
, mem_ctx
);
582 static nir_intrinsic_op
583 get_load_op(nir_variable_mode mode
, bool indirect
, unsigned num_components
)
587 case nir_var_shader_in
:
588 switch (num_components
) {
589 case 1: return nir_intrinsic_load_input_vec1_indirect
;
590 case 2: return nir_intrinsic_load_input_vec2_indirect
;
591 case 3: return nir_intrinsic_load_input_vec3_indirect
;
592 case 4: return nir_intrinsic_load_input_vec4_indirect
;
593 default: assert(0); break;
597 case nir_var_uniform
:
598 switch (num_components
) {
599 case 1: return nir_intrinsic_load_uniform_vec1_indirect
;
600 case 2: return nir_intrinsic_load_uniform_vec2_indirect
;
601 case 3: return nir_intrinsic_load_uniform_vec3_indirect
;
602 case 4: return nir_intrinsic_load_uniform_vec4_indirect
;
603 default: assert(0); break;
613 case nir_var_shader_in
:
614 switch (num_components
) {
615 case 1: return nir_intrinsic_load_input_vec1
;
616 case 2: return nir_intrinsic_load_input_vec2
;
617 case 3: return nir_intrinsic_load_input_vec3
;
618 case 4: return nir_intrinsic_load_input_vec4
;
619 default: assert(0); break;
623 case nir_var_uniform
:
624 switch (num_components
) {
625 case 1: return nir_intrinsic_load_uniform_vec1
;
626 case 2: return nir_intrinsic_load_uniform_vec2
;
627 case 3: return nir_intrinsic_load_uniform_vec3
;
628 case 4: return nir_intrinsic_load_uniform_vec4
;
629 default: assert(0); break;
639 return nir_intrinsic_load_input_vec1
;
642 /* emits an input -> reg block copy */
645 reg_input_block_copy(nir_reg_dest dest
, unsigned src_index
, nir_src
*indirect
,
646 nir_src
*predicate
, unsigned size
,
647 unsigned num_components
, nir_variable_mode mode
,
648 nir_instr
*after
, void *mem_ctx
)
650 nir_intrinsic_op op
= get_load_op(mode
, indirect
!= NULL
, num_components
);
652 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(mem_ctx
, op
);
653 load
->const_index
[0] = src_index
;
654 load
->const_index
[1] = size
;
656 load
->src
[0] = *indirect
;
658 load
->has_predicate
= true;
659 load
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
661 load
->dest
.reg
= dest
;
662 nir_instr_insert_before(after
, &load
->instr
);
665 /* emits a variable/input -> register block copy */
668 var_reg_block_copy(nir_deref_var
*src
, nir_reg_dest dest
, nir_src
*predicate
,
669 bool lower_io
, nir_instr
*after
, nir_function_impl
*impl
,
670 bool native_integers
, void *mem_ctx
)
672 const struct glsl_type
*src_type
= get_deref_tail(&src
->deref
)->type
;
674 if (lower_io
&& (src
->var
->data
.mode
== nir_var_shader_in
||
675 src
->var
->data
.mode
== nir_var_uniform
)) {
676 unsigned size
, num_components
;
677 if (glsl_type_is_scalar(src_type
) || glsl_type_is_vector(src_type
)) {
678 num_components
= glsl_get_vector_elements(src_type
);
682 size
= type_size(src_type
);
684 bool has_indirect
= deref_has_indirect(src
);
686 nir_src
*indirect_ptr
= has_indirect
? &indirect
: NULL
;
687 unsigned offset
= get_deref_offset(src
, after
, impl
, native_integers
,
689 offset
+= src
->var
->data
.driver_location
;
691 reg_input_block_copy(dest
, offset
, indirect_ptr
, predicate
, size
,
692 num_components
, src
->var
->data
.mode
, after
,
697 reg
.base_offset
= dest
.base_offset
;
698 reg
.indirect
= dest
.indirect
;
700 var_reg_block_copy_impl(reg
, src
, predicate
, src_type
, after
, false,
705 /* emits a register -> variable copy */
707 reg_var_block_copy(nir_reg_src src
, nir_deref_var
*dest
, nir_src
*predicate
,
708 nir_instr
*after
, void *mem_ctx
)
710 const struct glsl_type
*dest_type
= get_deref_tail(&dest
->deref
)->type
;
712 var_reg_block_copy_impl(src
, dest
, predicate
, dest_type
, after
, true,
717 * emits an input -> variable block copy using an intermediate register
720 var_var_block_copy(nir_deref_var
*src
, nir_deref_var
*dest
, nir_src
*predicate
,
721 nir_instr
*after
, nir_function_impl
*impl
,
722 bool native_integers
, void *mem_ctx
)
724 const struct glsl_type
*type
= get_deref_tail(&dest
->deref
)->type
;
725 nir_register
*reg
= nir_local_reg_create(impl
);
726 if (glsl_type_is_scalar(type
) || glsl_type_is_vector(type
)) {
727 reg
->num_components
= glsl_get_vector_elements(type
);
729 reg
->is_packed
= true;
730 reg
->num_components
= 1;
731 reg
->num_array_elems
= type_size(type
);
735 reg_src
.base_offset
= 0;
736 reg_src
.indirect
= NULL
;
739 nir_reg_dest reg_dest
;
740 reg_dest
.base_offset
= 0;
741 reg_dest
.indirect
= NULL
;
744 var_reg_block_copy(src
, reg_dest
, predicate
, true, after
, impl
,
745 native_integers
, mem_ctx
);
746 reg_var_block_copy(reg_src
, dest
, predicate
, after
, mem_ctx
);
749 /* emits a register -> register block copy */
751 reg_reg_block_copy(nir_reg_dest dest
, nir_reg_src src
, nir_src
*predicate
,
752 const struct glsl_type
*type
, nir_instr
*after
,
755 if (!dest
.reg
->is_packed
&& !src
.reg
->is_packed
)
756 assert(dest
.reg
->num_components
== src
.reg
->num_components
);
758 unsigned size
, num_components
;
759 if (dest
.reg
->is_packed
&& src
.reg
->is_packed
) {
760 size
= type_size(type
);
764 if (dest
.reg
->is_packed
)
765 num_components
= src
.reg
->num_components
;
767 num_components
= dest
.reg
->num_components
;
770 for (unsigned i
= 0; i
< size
; i
++) {
771 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
772 move
->dest
.write_mask
= (1 << num_components
) - 1;
774 move
->dest
.dest
.reg
.reg
= dest
.reg
;
775 move
->dest
.dest
.reg
.base_offset
= dest
.base_offset
+ i
;
776 if (dest
.indirect
!= NULL
) {
777 move
->dest
.dest
.reg
.indirect
= ralloc(mem_ctx
, nir_src
);
778 *move
->dest
.dest
.reg
.indirect
= *dest
.indirect
;
782 move
->has_predicate
= true;
783 move
->predicate
= nir_src_copy(*predicate
, mem_ctx
);
786 move
->src
[0].src
.reg
= src
;
787 move
->src
[0].src
.reg
.base_offset
+= i
;
789 nir_instr_insert_before(after
, &move
->instr
);
794 create_dest(nir_deref_var
*deref
, nir_instr
*instr
, nir_register
*reg
,
795 nir_function_impl
*impl
, bool native_integers
, void *mem_ctx
)
798 if (deref_has_indirect(deref
)) {
799 dest
.indirect
= ralloc(mem_ctx
, nir_src
);
800 dest
.indirect
->is_ssa
= false;
801 dest
.base_offset
= get_deref_offset(deref
, instr
,
802 impl
, native_integers
,
805 dest
.base_offset
= get_deref_offset(deref
, instr
,
806 impl
, native_integers
, NULL
);
807 dest
.indirect
= NULL
;
815 create_src(nir_deref_var
*deref
, nir_instr
*instr
, nir_register
*reg
,
816 nir_function_impl
*impl
, bool native_integers
, void *mem_ctx
)
819 if (deref_has_indirect(deref
)) {
820 src
.indirect
= ralloc(mem_ctx
, nir_src
);
821 src
.indirect
->is_ssa
= false;
822 src
.base_offset
= get_deref_offset(deref
, instr
,
823 impl
, native_integers
,
826 src
.base_offset
= get_deref_offset(deref
, instr
,
827 impl
, native_integers
, NULL
);
836 handle_var_copy(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
837 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
839 void *mem_ctx
= ralloc_parent(instr
);
841 struct hash_entry
*entry
;
843 nir_variable
*dest_var
= instr
->variables
[0]->var
;
844 nir_variable
*src_var
= instr
->variables
[1]->var
;
846 const struct glsl_type
*type
=
847 get_deref_tail(&instr
->variables
[0]->deref
)->type
;
849 nir_src
*predicate
= instr
->has_predicate
? &instr
->predicate
: NULL
;
852 * The source can be either:
853 * 1. a variable we're lowering to a register
854 * 2. an input or uniform we're lowering to loads from an index
855 * 3. a variable we can't lower yet
857 * and similarly, the destination can be either:
858 * 1. a variable we're lowering to a register
859 * 2. a variable we can't lower yet
861 * meaning that there are six cases, including the trivial one (where
862 * source and destination are #3 and #2 respectively) where we can't do
866 entry
= _mesa_hash_table_search(ht
, dest_var
);
868 nir_reg_dest dest
= create_dest(instr
->variables
[0], &instr
->instr
,
869 (nir_register
*) entry
->data
, impl
,
870 native_integers
, mem_ctx
);
872 entry
= _mesa_hash_table_search(ht
, src_var
);
874 nir_reg_src src
= create_src(instr
->variables
[1], &instr
->instr
,
875 (nir_register
*) entry
->data
, impl
,
876 native_integers
, mem_ctx
);
878 reg_reg_block_copy(dest
, src
, predicate
, type
, &instr
->instr
, mem_ctx
);
880 var_reg_block_copy(instr
->variables
[1], dest
, predicate
, lower_io
,
881 &instr
->instr
, impl
, native_integers
, mem_ctx
);
884 entry
= _mesa_hash_table_search(ht
, src_var
);
886 nir_reg_src src
= create_src(instr
->variables
[1], &instr
->instr
,
887 (nir_register
*) entry
->data
, impl
,
888 native_integers
, mem_ctx
);
890 reg_var_block_copy(src
, instr
->variables
[0], predicate
, &instr
->instr
,
893 if (!lower_io
|| (src_var
->data
.mode
!= nir_var_shader_in
&&
894 src_var
->data
.mode
!= nir_var_uniform
)) {
895 /* nothing to do here */
899 var_var_block_copy(instr
->variables
[1], instr
->variables
[0], predicate
,
900 &instr
->instr
, impl
, native_integers
, mem_ctx
);
904 nir_instr_remove(&instr
->instr
);
908 handle_var_load(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
909 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
911 void *mem_ctx
= ralloc_parent(instr
);
913 struct hash_entry
*entry
=
914 _mesa_hash_table_search(ht
, instr
->variables
[0]->var
);
917 nir_variable
*src_var
= instr
->variables
[0]->var
;
919 if (lower_io
&& (src_var
->data
.mode
== nir_var_shader_in
||
920 src_var
->data
.mode
== nir_var_uniform
)) {
921 bool has_indirect
= deref_has_indirect(instr
->variables
[0]);
922 unsigned num_components
=
923 nir_intrinsic_infos
[instr
->intrinsic
].dest_components
;
925 unsigned offset
= get_deref_offset(instr
->variables
[0], &instr
->instr
,
926 impl
, native_integers
, &indirect
);
927 offset
+= src_var
->data
.driver_location
;
929 nir_intrinsic_op op
= get_load_op(src_var
->data
.mode
, has_indirect
,
931 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(mem_ctx
, op
);
932 load
->dest
= instr
->dest
;
933 load
->const_index
[0] = (int) offset
;
934 load
->const_index
[1] = 1;
936 load
->src
[0] = indirect
;
938 if (instr
->has_predicate
) {
939 load
->has_predicate
= true;
940 load
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
943 nir_instr_insert_before(&instr
->instr
, &load
->instr
);
948 nir_register
*reg
= (nir_register
*) entry
->data
;
950 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
951 unsigned dest_components
=
952 nir_intrinsic_infos
[instr
->intrinsic
].dest_components
;
953 move
->dest
.dest
= instr
->dest
;
954 move
->dest
.write_mask
= (1 << dest_components
) - 1;
955 move
->src
[0].src
.reg
= create_src(instr
->variables
[0], &instr
->instr
,
956 reg
, impl
, native_integers
, mem_ctx
);
957 if (instr
->has_predicate
) {
958 move
->has_predicate
= true;
959 move
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
961 nir_instr_insert_before(&instr
->instr
, &move
->instr
);
964 nir_instr_remove(&instr
->instr
);
968 handle_var_store(nir_intrinsic_instr
*instr
, nir_function_impl
*impl
,
969 bool native_integers
, bool lower_io
, struct hash_table
*ht
)
971 void *mem_ctx
= ralloc_parent(instr
);
973 struct hash_entry
*entry
=
974 _mesa_hash_table_search(ht
, instr
->variables
[0]->var
);
978 nir_register
*reg
= (nir_register
*) entry
->data
;
980 nir_alu_instr
*move
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
981 unsigned src_components
=
982 nir_intrinsic_infos
[instr
->intrinsic
].src_components
[0];
983 move
->dest
.dest
.reg
= create_dest(instr
->variables
[0], &instr
->instr
,
984 reg
, impl
, native_integers
, mem_ctx
);
985 move
->dest
.write_mask
= (1 << src_components
) - 1;
986 move
->src
[0].src
= instr
->src
[0];
987 if (instr
->has_predicate
) {
988 move
->has_predicate
= true;
989 move
->predicate
= nir_src_copy(instr
->predicate
, mem_ctx
);
991 nir_instr_insert_before(&instr
->instr
, &move
->instr
);
992 nir_instr_remove(&instr
->instr
);
996 struct hash_table
*ht
;
997 bool native_integers
, lower_io
;
998 nir_function_impl
*impl
;
1002 rewrite_block_cb(nir_block
*block
, void *_state
)
1004 rewrite_state
*state
= (rewrite_state
*) _state
;
1006 nir_foreach_instr_safe(block
, instr
) {
1007 if (instr
->type
== nir_instr_type_intrinsic
) {
1008 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1009 switch (intrin
->intrinsic
) {
1010 case nir_intrinsic_load_var_vec1
:
1011 case nir_intrinsic_load_var_vec2
:
1012 case nir_intrinsic_load_var_vec3
:
1013 case nir_intrinsic_load_var_vec4
:
1014 handle_var_load(intrin
, state
->impl
, state
->native_integers
,
1015 state
->lower_io
, state
->ht
);
1018 case nir_intrinsic_store_var_vec1
:
1019 case nir_intrinsic_store_var_vec2
:
1020 case nir_intrinsic_store_var_vec3
:
1021 case nir_intrinsic_store_var_vec4
:
1022 handle_var_store(intrin
, state
->impl
, state
->native_integers
,
1023 state
->lower_io
, state
->ht
);
1026 case nir_intrinsic_copy_var
:
1027 handle_var_copy(intrin
, state
->impl
, state
->native_integers
,
1028 state
->lower_io
, state
->ht
);
1041 rewrite_impl(nir_function_impl
*impl
, struct hash_table
*ht
,
1042 bool native_integers
, bool lower_io
)
1044 rewrite_state state
;
1046 state
.native_integers
= native_integers
;
1047 state
.lower_io
= lower_io
;
1050 nir_foreach_block(impl
, rewrite_block_cb
, &state
);
1054 insert_load_const_impl(nir_function_impl
*impl
, struct exec_list
*vars
,
1055 struct hash_table
*ht
)
1057 void *mem_ctx
= ralloc_parent(impl
);
1059 foreach_list_typed(nir_variable
, var
, node
, vars
) {
1060 if (var
->constant_initializer
== NULL
)
1063 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
1065 nir_register
*reg
= (nir_register
*) entry
->data
;
1068 dest
.base_offset
= 0;
1069 dest
.indirect
= NULL
;
1070 reg_const_load(dest
, var
->constant_initializer
, var
->type
, impl
,
1076 static nir_intrinsic_op
1077 get_store_op(bool indirect
, unsigned num_components
)
1080 switch (num_components
) {
1081 case 1: return nir_intrinsic_store_output_vec1_indirect
;
1082 case 2: return nir_intrinsic_store_output_vec2_indirect
;
1083 case 3: return nir_intrinsic_store_output_vec3_indirect
;
1084 case 4: return nir_intrinsic_store_output_vec4_indirect
;
1085 default: assert(0); break;
1088 switch (num_components
) {
1089 case 1: return nir_intrinsic_store_output_vec1
;
1090 case 2: return nir_intrinsic_store_output_vec2
;
1091 case 3: return nir_intrinsic_store_output_vec3
;
1092 case 4: return nir_intrinsic_store_output_vec4
;
1093 default: assert(0); break;
1097 return nir_intrinsic_store_output_vec1
;
1100 /* emits a reg -> output block copy after a block */
1102 reg_output_block_copy_block(nir_reg_src src
, unsigned dest_index
,
1103 unsigned num_components
, unsigned size
,
1104 nir_block
*block
, void *mem_ctx
)
1106 nir_intrinsic_op op
= get_store_op(false, num_components
);
1108 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(mem_ctx
, op
);
1109 store
->const_index
[0] = dest_index
;
1110 store
->const_index
[1] = (size
== 0) ? 1 : size
;
1111 store
->src
[0].reg
= src
;
1112 nir_instr_insert_after_block(block
, &store
->instr
);
1115 /* emits a reg -> output copy after an instruction */
1117 reg_output_block_copy_instr(nir_reg_src src
, unsigned dest_index
,
1118 unsigned num_components
, unsigned size
,
1119 nir_instr
*after
, void *mem_ctx
)
1121 nir_intrinsic_op op
= get_store_op(false, num_components
);
1123 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(mem_ctx
, op
);
1124 store
->const_index
[0] = dest_index
;
1125 store
->const_index
[1] = (size
== 0) ? 1 : size
;
1126 store
->src
[0].reg
= src
;
1127 nir_instr_insert_before(after
, &store
->instr
);
1130 static nir_function_impl
*
1131 find_main(nir_shader
*shader
)
1133 foreach_list_typed(nir_function
, func
, node
, &shader
->functions
) {
1134 if (strcmp(func
->name
, "main") == 0) {
1135 assert(exec_list_length(&func
->overload_list
) == 1);
1136 nir_function_overload
*overload
= nir_function_first_overload(func
);
1137 return overload
->impl
;
1146 insert_output_reg_copies(nir_shader
*shader
, nir_block
*block
,
1147 nir_instr
*after
, struct hash_table
*ht
)
1149 struct hash_entry
*entry
;
1150 hash_table_foreach(shader
->outputs
, entry
) {
1151 nir_variable
*var
= (nir_variable
*) entry
->data
;
1153 struct hash_entry
*entry2
;
1154 entry2
= _mesa_hash_table_search(ht
, var
);
1156 nir_register
*reg
= (nir_register
*) entry2
->data
;
1159 src
.base_offset
= 0;
1160 src
.indirect
= NULL
;
1163 reg_output_block_copy_instr(src
, var
->data
.driver_location
,
1164 reg
->num_components
,
1165 reg
->num_array_elems
,
1168 reg_output_block_copy_block(src
, var
->data
.driver_location
,
1169 reg
->num_components
,
1170 reg
->num_array_elems
,
1178 struct hash_table
*ht
;
1180 bool found_emit_vertex
;
1184 insert_output_reg_copies_emit_vertex(nir_block
*block
, void *_state
)
1186 reg_output_state
*state
= (reg_output_state
*) _state
;
1188 nir_foreach_instr(block
, instr
) {
1189 if (instr
->type
== nir_instr_type_intrinsic
) {
1190 nir_intrinsic_instr
*intrin_instr
= nir_instr_as_intrinsic(instr
);
1191 if (intrin_instr
->intrinsic
== nir_intrinsic_emit_vertex
) {
1192 insert_output_reg_copies(state
->shader
, NULL
, instr
, state
->ht
);
1193 state
->found_emit_vertex
= true;
1202 insert_output_reg_copies_shader(nir_shader
*shader
, struct hash_table
*ht
)
1204 nir_function_impl
*main_impl
= find_main(shader
);
1206 reg_output_state state
;
1207 state
.shader
= shader
;
1209 state
.found_emit_vertex
= false;
1210 nir_foreach_block(main_impl
, insert_output_reg_copies_emit_vertex
, &state
);
1212 if (!state
.found_emit_vertex
) {
1213 struct set_entry
*entry
;
1214 set_foreach(main_impl
->end_block
->predecessors
, entry
) {
1215 nir_block
*block
= (nir_block
*) entry
->key
;
1216 insert_output_reg_copies(shader
, block
, NULL
, ht
);
1222 rewrite_shader(nir_shader
*shader
, struct hash_table
*ht
, bool native_integers
,
1223 bool lower_globals
, bool lower_io
)
1225 nir_foreach_overload(shader
, overload
) {
1226 if (overload
->impl
) {
1227 insert_load_const_impl(overload
->impl
, &overload
->impl
->locals
, ht
);
1228 if (lower_globals
&& strcmp(overload
->function
->name
, "main") == 0)
1229 insert_load_const_impl(overload
->impl
, &shader
->globals
, ht
);
1230 rewrite_impl(overload
->impl
, ht
, native_integers
, lower_io
);
1236 nir_lower_variables_scalar(nir_shader
*shader
, bool lower_globals
,
1237 bool lower_io
, bool add_names
, bool native_integers
)
1240 assign_var_locations_shader(shader
);
1241 struct hash_table
*ht
= init_var_ht(shader
, lower_globals
, lower_io
,
1243 remove_local_vars_shader(shader
, ht
);
1244 rewrite_shader(shader
, ht
, native_integers
, lower_globals
, lower_io
);
1246 insert_output_reg_copies_shader(shader
, ht
);
1247 _mesa_hash_table_destroy(ht
, NULL
);