3 * Copyright (c) 2019 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_nir_lower_fs_out_to_vector.h"
29 #include "nir_builder.h"
30 #include "nir_deref.h"
31 #include "util/u_math.h"
44 struct nir_intrinsic_instr_less
{
45 bool operator () (const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) const
47 nir_variable
*vlhs
= nir_deref_instr_get_variable(nir_src_as_deref(lhs
->src
[0]));
48 nir_variable
*vrhs
= nir_deref_instr_get_variable(nir_src_as_deref(rhs
->src
[0]));
50 auto ltype
= glsl_get_base_type(vlhs
->type
);
51 auto rtype
= glsl_get_base_type(vrhs
->type
);
55 return vlhs
->data
.location
< vrhs
->data
.location
;
59 class NirLowerIOToVector
{
61 NirLowerIOToVector(int base_slot
);
62 bool run(nir_function_impl
*shader
);
65 bool var_can_merge(const nir_variable
*lhs
, const nir_variable
*rhs
);
66 bool var_can_rewrite(nir_variable
*var
) const;
67 void create_new_io_vars(nir_shader
*shader
);
68 void create_new_io_var(nir_shader
*shader
, unsigned location
, unsigned comps
);
70 nir_deref_instr
*clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
71 const nir_deref_instr
*src_head
);
73 bool vectorize_block(nir_builder
*b
, nir_block
*block
);
74 bool instr_can_rewrite(nir_instr
*instr
);
75 bool vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
);
77 using InstrSet
= multiset
<nir_intrinsic_instr
*, nir_intrinsic_instr_less
>;
78 using InstrSubSet
= std::pair
<InstrSet::iterator
, InstrSet::iterator
>;
80 bool vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
& ir_set
,
81 nir_intrinsic_instr
*instr
);
83 array
<array
<nir_variable
*, 4>, 16> m_vars
;
87 virtual nir_variable_mode
get_io_mode(nir_shader
*shader
) const = 0;
88 virtual bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const = 0;
89 virtual bool var_can_rewrite_slot(nir_variable
*var
) const = 0;
90 virtual void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
91 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) = 0;
96 class NirLowerFSOutToVector
: public NirLowerIOToVector
{
98 NirLowerFSOutToVector();
101 nir_variable_mode
get_io_mode(nir_shader
*shader
) const override
;
102 bool var_can_rewrite_slot(nir_variable
*var
) const override
;
103 void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
104 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) override
;
105 bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const override
;
107 nir_ssa_def
*create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
108 int first_comp
, int num_comp
);
111 bool r600_lower_fs_out_to_vector(nir_shader
*shader
)
113 NirLowerFSOutToVector processor
;
115 assert(shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
116 bool progress
= false;
118 nir_foreach_function(function
, shader
) {
120 progress
|= processor
.run(function
->impl
);
125 NirLowerIOToVector::NirLowerIOToVector(int base_slot
):
127 m_base_slot(base_slot
)
129 for(auto& a
: m_vars
)
134 bool NirLowerIOToVector::run(nir_function_impl
*impl
)
137 nir_builder_init(&b
, impl
);
139 nir_metadata_require(impl
, nir_metadata_dominance
);
140 create_new_io_vars(impl
->function
->shader
);
142 bool progress
= vectorize_block(&b
, nir_start_block(impl
));
144 nir_metadata_preserve(impl
, (nir_metadata
)
145 (nir_metadata_block_index
|
146 nir_metadata_dominance
));
151 void NirLowerIOToVector::create_new_io_vars(nir_shader
*shader
)
153 nir_variable_mode mode
= get_io_mode(shader
);
155 bool can_rewrite_vars
= false;
156 nir_foreach_variable_with_modes(var
, shader
, mode
) {
157 if (var_can_rewrite(var
)) {
158 can_rewrite_vars
= true;
159 unsigned loc
= var
->data
.location
- m_base_slot
;
160 m_vars
[loc
][var
->data
.location_frac
] = var
;
164 if (!can_rewrite_vars
)
167 /* We don't handle combining vars of different type e.g. different array
170 for (unsigned i
= 0; i
< 16; i
++) {
173 for (unsigned j
= 0; j
< 3; j
++) {
177 for (unsigned k
= j
+ 1; k
< 4; k
++) {
181 if (!var_can_merge(m_vars
[i
][j
], m_vars
[i
][k
]))
185 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][j
]->type
); ++n
)
186 comps
|= 1 << (m_vars
[i
][j
]->data
.location_frac
+ n
);
188 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][k
]->type
); ++n
)
189 comps
|= 1 << (m_vars
[i
][k
]->data
.location_frac
+ n
);
194 create_new_io_var(shader
, i
, comps
);
199 NirLowerIOToVector::var_can_merge(const nir_variable
*lhs
,
200 const nir_variable
*rhs
)
202 return (glsl_get_base_type(lhs
->type
) == glsl_get_base_type(rhs
->type
));
206 NirLowerIOToVector::create_new_io_var(nir_shader
*shader
,
207 unsigned location
, unsigned comps
)
209 unsigned num_comps
= util_bitcount(comps
);
210 assert(num_comps
> 1);
212 /* Note: u_bit_scan() strips a component of the comps bitfield here */
213 unsigned first_comp
= u_bit_scan(&comps
);
215 nir_variable
*var
= nir_variable_clone(m_vars
[location
][first_comp
], shader
);
216 var
->data
.location_frac
= first_comp
;
217 var
->type
= glsl_replace_vector_type(var
->type
, num_comps
);
219 nir_shader_add_variable(shader
, var
);
221 m_vars
[location
][first_comp
] = var
;
224 const int comp
= u_bit_scan(&comps
);
225 if (m_vars
[location
][comp
]) {
226 m_vars
[location
][comp
] = var
;
231 bool NirLowerIOToVector::var_can_rewrite(nir_variable
*var
) const
233 /* Skip complex types we don't split in the first place */
234 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
237 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
240 return var_can_rewrite_slot(var
);
244 NirLowerIOToVector::vectorize_block(nir_builder
*b
, nir_block
*block
)
246 bool progress
= false;
248 nir_foreach_instr_safe(instr
, block
) {
249 if (instr_can_rewrite(instr
)) {
250 instr
->index
= m_next_index
++;
251 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
252 m_block_io
.insert(ir
);
256 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
257 nir_block
*child
= block
->dom_children
[i
];
258 progress
|= vectorize_block(b
, child
);
261 nir_foreach_instr_reverse_safe(instr
, block
) {
262 progress
|= vec_instr_set_remove(b
, instr
);
269 bool NirLowerIOToVector::instr_can_rewrite(nir_instr
*instr
)
271 if (instr
->type
!= nir_instr_type_intrinsic
)
274 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
276 if (intr
->num_components
> 3)
279 return instr_can_rewrite_type(intr
);
282 bool NirLowerIOToVector::vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
)
284 if (!instr_can_rewrite(instr
))
287 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
288 auto entry
= m_block_io
.equal_range(ir
);
289 if (entry
.first
!= m_block_io
.end()) {
290 vec_instr_stack_pop(b
, entry
, ir
);
296 NirLowerIOToVector::clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
297 const nir_deref_instr
*src_head
)
299 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
304 assert(src_head
->deref_type
== nir_deref_type_array
);
306 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
308 return nir_build_deref_array(b
, dst_tail
,
309 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
312 NirLowerFSOutToVector::NirLowerFSOutToVector():
313 NirLowerIOToVector(FRAG_RESULT_COLOR
)
318 bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable
*var
) const
320 return ((var
->data
.mode
== nir_var_shader_out
) &&
321 ((var
->data
.location
== FRAG_RESULT_COLOR
) ||
322 ((var
->data
.location
>= FRAG_RESULT_DATA0
) &&
323 (var
->data
.location
<= FRAG_RESULT_DATA7
))));
326 bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
&ir_set
,
327 nir_intrinsic_instr
*instr
)
329 vector
< nir_intrinsic_instr
*> ir_sorted_set(ir_set
.first
, ir_set
.second
);
330 std::sort(ir_sorted_set
.begin(), ir_sorted_set
.end(),
331 [](const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) {
332 return lhs
->instr
.index
> rhs
->instr
.index
;
336 nir_intrinsic_instr
*intr
= *ir_sorted_set
.begin();
337 nir_variable
*var
= nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
339 unsigned loc
= var
->data
.location
- m_base_slot
;
341 nir_variable
*new_var
= m_vars
[loc
][var
->data
.location_frac
];
342 unsigned num_comps
= glsl_get_vector_elements(glsl_without_array(new_var
->type
));
343 unsigned old_num_comps
= glsl_get_vector_elements(glsl_without_array(var
->type
));
345 /* Don't bother walking the stack if this component can't be vectorised. */
346 if (old_num_comps
> 3) {
350 if (new_var
== var
) {
354 b
->cursor
= nir_after_instr(&intr
->instr
);
355 nir_ssa_undef_instr
*instr_undef
=
356 nir_ssa_undef_instr_create(b
->shader
, 1, 32);
357 nir_builder_instr_insert(b
, &instr_undef
->instr
);
359 nir_ssa_def
*srcs
[4];
360 for (int i
= 0; i
< 4; i
++) {
361 srcs
[i
] = &instr_undef
->def
;
363 srcs
[var
->data
.location_frac
] = intr
->src
[1].ssa
;
365 for (auto k
= ir_sorted_set
.begin() + 1; k
!= ir_sorted_set
.end(); ++k
) {
366 nir_intrinsic_instr
*intr2
= *k
;
368 nir_deref_instr_get_variable(nir_src_as_deref(intr2
->src
[0]));
369 unsigned loc2
= var
->data
.location
- m_base_slot
;
371 if (m_vars
[loc
][var
->data
.location_frac
] !=
372 m_vars
[loc2
][var2
->data
.location_frac
]) {
376 assert(glsl_get_vector_elements(glsl_without_array(var2
->type
)) < 4);
378 if (srcs
[var2
->data
.location_frac
] == &instr_undef
->def
) {
379 assert(intr2
->src
[1].is_ssa
);
380 assert(intr2
->src
[1].ssa
);
381 srcs
[var2
->data
.location_frac
] = intr2
->src
[1].ssa
;
383 nir_instr_remove(&intr2
->instr
);
386 create_new_io(b
, intr
, new_var
, srcs
, new_var
->data
.location_frac
,
391 nir_variable_mode
NirLowerFSOutToVector::get_io_mode(nir_shader
*shader
) const
393 return nir_var_shader_out
;
397 NirLowerFSOutToVector::create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
398 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
)
400 b
->cursor
= nir_before_instr(&intr
->instr
);
402 nir_intrinsic_instr
*new_intr
=
403 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
404 new_intr
->num_components
= num_comps
;
406 nir_intrinsic_set_write_mask(new_intr
, (1 << num_comps
) - 1);
408 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
409 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
411 new_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
412 new_intr
->src
[1] = nir_src_for_ssa(create_combined_vector(b
, srcs
, first_comp
, num_comps
));
414 nir_builder_instr_insert(b
, &new_intr
->instr
);
416 /* Remove the old store intrinsic */
417 nir_instr_remove(&intr
->instr
);
420 bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const
422 if (intr
->intrinsic
!= nir_intrinsic_store_deref
)
425 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
426 if (deref
->mode
!= nir_var_shader_out
)
429 return var_can_rewrite(nir_deref_instr_get_variable(deref
));
432 nir_ssa_def
*NirLowerFSOutToVector::create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
433 int first_comp
, int num_comp
)
437 case 2: op
= nir_op_vec2
; break;
438 case 3: op
= nir_op_vec3
; break;
439 case 4: op
= nir_op_vec4
; break;
441 assert(0 && "combined vector must have 2 to 4 components");
444 nir_alu_instr
* instr
= nir_alu_instr_create(b
->shader
, op
);
445 instr
->exact
= b
->exact
;
449 while (i
< num_comp
) {
450 nir_ssa_def
*s
= srcs
[first_comp
+ k
];
451 for(uint8_t kk
= 0; kk
< s
->num_components
&& i
< num_comp
; ++kk
) {
452 instr
->src
[i
].src
= nir_src_for_ssa(s
);
453 instr
->src
[i
].swizzle
[0] = kk
;
456 k
+= s
->num_components
;
459 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_comp
, 32, NULL
);
460 instr
->dest
.write_mask
= (1 << num_comp
) - 1;
461 nir_builder_instr_insert(b
, &instr
->instr
);
462 return &instr
->dest
.dest
.ssa
;