3 * Copyright (c) 2019 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_nir_lower_fs_out_to_vector.h"
29 #include "nir_builder.h"
30 #include "nir_deref.h"
31 #include "util/u_math.h"
44 struct nir_intrinsic_instr_less
{
45 bool operator () (const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) const
47 nir_variable
*vlhs
= nir_deref_instr_get_variable(nir_src_as_deref(lhs
->src
[0]));
48 nir_variable
*vrhs
= nir_deref_instr_get_variable(nir_src_as_deref(rhs
->src
[0]));
50 auto ltype
= glsl_get_base_type(vlhs
->type
);
51 auto rtype
= glsl_get_base_type(vrhs
->type
);
55 return vlhs
->data
.location
< vrhs
->data
.location
;
59 class NirLowerIOToVector
{
61 NirLowerIOToVector(int base_slot
);
62 bool run(nir_function_impl
*shader
);
65 bool var_can_merge(const nir_variable
*lhs
, const nir_variable
*rhs
);
66 bool var_can_rewrite(nir_variable
*var
) const;
67 void create_new_io_vars(nir_shader
*shader
);
68 void create_new_io_var(nir_shader
*shader
, unsigned location
, unsigned comps
);
70 nir_deref_instr
*clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
71 const nir_deref_instr
*src_head
);
73 bool vectorize_block(nir_builder
*b
, nir_block
*block
);
74 bool instr_can_rewrite(nir_instr
*instr
);
75 bool vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
);
77 using InstrSet
= multiset
<nir_intrinsic_instr
*, nir_intrinsic_instr_less
>;
78 using InstrSubSet
= std::pair
<InstrSet::iterator
, InstrSet::iterator
>;
80 bool vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
& ir_set
,
81 nir_intrinsic_instr
*instr
);
83 array
<array
<nir_variable
*, 4>, 16> m_vars
;
87 virtual nir_variable_mode
get_io_mode(nir_shader
*shader
) const = 0;
88 virtual bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const = 0;
89 virtual bool var_can_rewrite_slot(nir_variable
*var
) const = 0;
90 virtual void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
91 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) = 0;
96 class NirLowerFSOutToVector
: public NirLowerIOToVector
{
98 NirLowerFSOutToVector();
101 nir_variable_mode
get_io_mode(nir_shader
*shader
) const override
;
102 bool var_can_rewrite_slot(nir_variable
*var
) const override
;
103 void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
104 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) override
;
105 bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const override
;
107 nir_ssa_def
*create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
108 int first_comp
, int num_comp
);
111 bool r600_lower_fs_out_to_vector(nir_shader
*shader
)
113 NirLowerFSOutToVector processor
;
115 assert(shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
116 bool progress
= false;
118 nir_foreach_function(function
, shader
) {
120 progress
|= processor
.run(function
->impl
);
125 NirLowerIOToVector::NirLowerIOToVector(int base_slot
):
127 m_base_slot(base_slot
)
129 for(auto& a
: m_vars
)
134 bool NirLowerIOToVector::run(nir_function_impl
*impl
)
137 nir_builder_init(&b
, impl
);
139 nir_metadata_require(impl
, nir_metadata_dominance
);
140 create_new_io_vars(impl
->function
->shader
);
142 bool progress
= vectorize_block(&b
, nir_start_block(impl
));
144 nir_metadata_preserve(impl
, nir_metadata_block_index
| nir_metadata_dominance
);
149 void NirLowerIOToVector::create_new_io_vars(nir_shader
*shader
)
151 nir_variable_mode mode
= get_io_mode(shader
);
153 bool can_rewrite_vars
= false;
154 nir_foreach_variable_with_modes(var
, shader
, mode
) {
155 if (var_can_rewrite(var
)) {
156 can_rewrite_vars
= true;
157 unsigned loc
= var
->data
.location
- m_base_slot
;
158 m_vars
[loc
][var
->data
.location_frac
] = var
;
162 if (!can_rewrite_vars
)
165 /* We don't handle combining vars of different type e.g. different array
168 for (unsigned i
= 0; i
< 16; i
++) {
171 for (unsigned j
= 0; j
< 3; j
++) {
175 for (unsigned k
= j
+ 1; k
< 4; k
++) {
179 if (!var_can_merge(m_vars
[i
][j
], m_vars
[i
][k
]))
183 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][j
]->type
); ++n
)
184 comps
|= 1 << (m_vars
[i
][j
]->data
.location_frac
+ n
);
186 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][k
]->type
); ++n
)
187 comps
|= 1 << (m_vars
[i
][k
]->data
.location_frac
+ n
);
192 create_new_io_var(shader
, i
, comps
);
197 NirLowerIOToVector::var_can_merge(const nir_variable
*lhs
,
198 const nir_variable
*rhs
)
200 return (glsl_get_base_type(lhs
->type
) == glsl_get_base_type(rhs
->type
));
204 NirLowerIOToVector::create_new_io_var(nir_shader
*shader
,
205 unsigned location
, unsigned comps
)
207 unsigned num_comps
= util_bitcount(comps
);
208 assert(num_comps
> 1);
210 /* Note: u_bit_scan() strips a component of the comps bitfield here */
211 unsigned first_comp
= u_bit_scan(&comps
);
213 nir_variable
*var
= nir_variable_clone(m_vars
[location
][first_comp
], shader
);
214 var
->data
.location_frac
= first_comp
;
215 var
->type
= glsl_replace_vector_type(var
->type
, num_comps
);
217 nir_shader_add_variable(shader
, var
);
219 m_vars
[location
][first_comp
] = var
;
222 const int comp
= u_bit_scan(&comps
);
223 if (m_vars
[location
][comp
]) {
224 m_vars
[location
][comp
] = var
;
229 bool NirLowerIOToVector::var_can_rewrite(nir_variable
*var
) const
231 /* Skip complex types we don't split in the first place */
232 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
235 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
238 return var_can_rewrite_slot(var
);
242 NirLowerIOToVector::vectorize_block(nir_builder
*b
, nir_block
*block
)
244 bool progress
= false;
246 nir_foreach_instr_safe(instr
, block
) {
247 if (instr_can_rewrite(instr
)) {
248 instr
->index
= m_next_index
++;
249 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
250 m_block_io
.insert(ir
);
254 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
255 nir_block
*child
= block
->dom_children
[i
];
256 progress
|= vectorize_block(b
, child
);
259 nir_foreach_instr_reverse_safe(instr
, block
) {
260 progress
|= vec_instr_set_remove(b
, instr
);
267 bool NirLowerIOToVector::instr_can_rewrite(nir_instr
*instr
)
269 if (instr
->type
!= nir_instr_type_intrinsic
)
272 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
274 if (intr
->num_components
> 3)
277 return instr_can_rewrite_type(intr
);
280 bool NirLowerIOToVector::vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
)
282 if (!instr_can_rewrite(instr
))
285 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
286 auto entry
= m_block_io
.equal_range(ir
);
287 if (entry
.first
!= m_block_io
.end()) {
288 vec_instr_stack_pop(b
, entry
, ir
);
294 NirLowerIOToVector::clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
295 const nir_deref_instr
*src_head
)
297 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
302 assert(src_head
->deref_type
== nir_deref_type_array
);
304 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
306 return nir_build_deref_array(b
, dst_tail
,
307 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
310 NirLowerFSOutToVector::NirLowerFSOutToVector():
311 NirLowerIOToVector(FRAG_RESULT_COLOR
)
316 bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable
*var
) const
318 return ((var
->data
.mode
== nir_var_shader_out
) &&
319 ((var
->data
.location
== FRAG_RESULT_COLOR
) ||
320 ((var
->data
.location
>= FRAG_RESULT_DATA0
) &&
321 (var
->data
.location
<= FRAG_RESULT_DATA7
))));
324 bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
&ir_set
,
325 nir_intrinsic_instr
*instr
)
327 vector
< nir_intrinsic_instr
*> ir_sorted_set(ir_set
.first
, ir_set
.second
);
328 std::sort(ir_sorted_set
.begin(), ir_sorted_set
.end(),
329 [](const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) {
330 return lhs
->instr
.index
> rhs
->instr
.index
;
334 nir_intrinsic_instr
*intr
= *ir_sorted_set
.begin();
335 nir_variable
*var
= nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
337 unsigned loc
= var
->data
.location
- m_base_slot
;
339 nir_variable
*new_var
= m_vars
[loc
][var
->data
.location_frac
];
340 unsigned num_comps
= glsl_get_vector_elements(glsl_without_array(new_var
->type
));
341 unsigned old_num_comps
= glsl_get_vector_elements(glsl_without_array(var
->type
));
343 /* Don't bother walking the stack if this component can't be vectorised. */
344 if (old_num_comps
> 3) {
348 if (new_var
== var
) {
352 b
->cursor
= nir_after_instr(&intr
->instr
);
353 nir_ssa_undef_instr
*instr_undef
=
354 nir_ssa_undef_instr_create(b
->shader
, 1, 32);
355 nir_builder_instr_insert(b
, &instr_undef
->instr
);
357 nir_ssa_def
*srcs
[4];
358 for (int i
= 0; i
< 4; i
++) {
359 srcs
[i
] = &instr_undef
->def
;
361 srcs
[var
->data
.location_frac
] = intr
->src
[1].ssa
;
363 for (auto k
= ir_sorted_set
.begin() + 1; k
!= ir_sorted_set
.end(); ++k
) {
364 nir_intrinsic_instr
*intr2
= *k
;
366 nir_deref_instr_get_variable(nir_src_as_deref(intr2
->src
[0]));
367 unsigned loc2
= var
->data
.location
- m_base_slot
;
369 if (m_vars
[loc
][var
->data
.location_frac
] !=
370 m_vars
[loc2
][var2
->data
.location_frac
]) {
374 assert(glsl_get_vector_elements(glsl_without_array(var2
->type
)) < 4);
376 if (srcs
[var2
->data
.location_frac
] == &instr_undef
->def
) {
377 assert(intr2
->src
[1].is_ssa
);
378 assert(intr2
->src
[1].ssa
);
379 srcs
[var2
->data
.location_frac
] = intr2
->src
[1].ssa
;
381 nir_instr_remove(&intr2
->instr
);
384 create_new_io(b
, intr
, new_var
, srcs
, new_var
->data
.location_frac
,
389 nir_variable_mode
NirLowerFSOutToVector::get_io_mode(nir_shader
*shader
) const
391 return nir_var_shader_out
;
395 NirLowerFSOutToVector::create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
396 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
)
398 b
->cursor
= nir_before_instr(&intr
->instr
);
400 nir_intrinsic_instr
*new_intr
=
401 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
402 new_intr
->num_components
= num_comps
;
404 nir_intrinsic_set_write_mask(new_intr
, (1 << num_comps
) - 1);
406 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
407 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
409 new_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
410 new_intr
->src
[1] = nir_src_for_ssa(create_combined_vector(b
, srcs
, first_comp
, num_comps
));
412 nir_builder_instr_insert(b
, &new_intr
->instr
);
414 /* Remove the old store intrinsic */
415 nir_instr_remove(&intr
->instr
);
418 bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const
420 if (intr
->intrinsic
!= nir_intrinsic_store_deref
)
423 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
424 if (deref
->mode
!= nir_var_shader_out
)
427 return var_can_rewrite(nir_deref_instr_get_variable(deref
));
430 nir_ssa_def
*NirLowerFSOutToVector::create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
431 int first_comp
, int num_comp
)
435 case 2: op
= nir_op_vec2
; break;
436 case 3: op
= nir_op_vec3
; break;
437 case 4: op
= nir_op_vec4
; break;
439 assert(0 && "combined vector must have 2 to 4 components");
442 nir_alu_instr
* instr
= nir_alu_instr_create(b
->shader
, op
);
443 instr
->exact
= b
->exact
;
447 while (i
< num_comp
) {
448 nir_ssa_def
*s
= srcs
[first_comp
+ k
];
449 for(uint8_t kk
= 0; kk
< s
->num_components
&& i
< num_comp
; ++kk
) {
450 instr
->src
[i
].src
= nir_src_for_ssa(s
);
451 instr
->src
[i
].swizzle
[0] = kk
;
454 k
+= s
->num_components
;
457 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_comp
, 32, NULL
);
458 instr
->dest
.write_mask
= (1 << num_comp
) - 1;
459 nir_builder_instr_insert(b
, &instr
->instr
);
460 return &instr
->dest
.dest
.ssa
;