3 * Copyright (c) 2019 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_nir_lower_fs_out_to_vector.h"
29 #include "nir_builder.h"
30 #include "nir_deref.h"
31 #include "util/u_math.h"
44 struct nir_intrinsic_instr_less
{
45 bool operator () (const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) const
47 nir_variable
*vlhs
= nir_deref_instr_get_variable(nir_src_as_deref(lhs
->src
[0]));
48 nir_variable
*vrhs
= nir_deref_instr_get_variable(nir_src_as_deref(rhs
->src
[0]));
50 auto ltype
= glsl_get_base_type(vlhs
->type
);
51 auto rtype
= glsl_get_base_type(vrhs
->type
);
55 return vlhs
->data
.location
< vrhs
->data
.location
;
59 class NirLowerIOToVector
{
61 NirLowerIOToVector(int base_slot
);
62 bool run(nir_function_impl
*shader
);
65 bool var_can_merge(const nir_variable
*lhs
, const nir_variable
*rhs
);
66 bool var_can_rewrite(nir_variable
*var
) const;
67 void create_new_io_vars(nir_shader
*shader
);
68 void create_new_io_var(nir_shader
*shader
, unsigned location
, unsigned comps
);
70 nir_deref_instr
*clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
71 const nir_deref_instr
*src_head
);
73 bool vectorize_block(nir_builder
*b
, nir_block
*block
);
74 bool instr_can_rewrite(nir_instr
*instr
);
75 bool vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
);
77 using InstrSet
= multiset
<nir_intrinsic_instr
*, nir_intrinsic_instr_less
>;
78 using InstrSubSet
= std::pair
<InstrSet::iterator
, InstrSet::iterator
>;
80 bool vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
& ir_set
,
81 nir_intrinsic_instr
*instr
);
83 array
<array
<nir_variable
*, 4>, 16> m_vars
;
87 virtual exec_list
*get_io_list(nir_shader
*shader
) const = 0;
88 virtual bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const = 0;
89 virtual bool var_can_rewrite_slot(nir_variable
*var
) const = 0;
90 virtual void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
91 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) = 0;
96 class NirLowerFSOutToVector
: public NirLowerIOToVector
{
98 NirLowerFSOutToVector();
101 exec_list
*get_io_list(nir_shader
*shader
) const override
;
102 bool var_can_rewrite_slot(nir_variable
*var
) const override
;
103 void create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
104 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
) override
;
105 bool instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const override
;
107 nir_ssa_def
*create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
108 int first_comp
, int num_comp
);
111 bool r600_lower_fs_out_to_vector(nir_shader
*shader
)
113 NirLowerFSOutToVector processor
;
115 assert(shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
116 bool progress
= false;
118 nir_foreach_function(function
, shader
) {
120 progress
|= processor
.run(function
->impl
);
125 NirLowerIOToVector::NirLowerIOToVector(int base_slot
):
127 m_base_slot(base_slot
)
129 for(auto& a
: m_vars
)
134 bool NirLowerIOToVector::run(nir_function_impl
*impl
)
137 nir_builder_init(&b
, impl
);
139 nir_metadata_require(impl
, nir_metadata_dominance
);
140 create_new_io_vars(impl
->function
->shader
);
142 bool progress
= vectorize_block(&b
, nir_start_block(impl
));
144 nir_metadata_preserve(impl
, (nir_metadata
)
145 (nir_metadata_block_index
|
146 nir_metadata_dominance
));
151 void NirLowerIOToVector::create_new_io_vars(nir_shader
*shader
)
153 struct exec_list
*io_list
= get_io_list(shader
);
154 if (exec_list_is_empty(io_list
))
157 nir_foreach_variable(var
, io_list
) {
158 if (var_can_rewrite(var
)) {
159 unsigned loc
= var
->data
.location
- m_base_slot
;
160 m_vars
[loc
][var
->data
.location_frac
] = var
;
164 /* We don't handle combining vars of different type e.g. different array
167 for (unsigned i
= 0; i
< 16; i
++) {
170 for (unsigned j
= 0; j
< 3; j
++) {
174 for (unsigned k
= j
+ 1; k
< 4; k
++) {
178 if (!var_can_merge(m_vars
[i
][j
], m_vars
[i
][k
]))
182 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][j
]->type
); ++n
)
183 comps
|= 1 << (m_vars
[i
][j
]->data
.location_frac
+ n
);
185 for (unsigned n
= 0; n
< glsl_get_components(m_vars
[i
][k
]->type
); ++n
)
186 comps
|= 1 << (m_vars
[i
][k
]->data
.location_frac
+ n
);
191 create_new_io_var(shader
, i
, comps
);
196 NirLowerIOToVector::var_can_merge(const nir_variable
*lhs
,
197 const nir_variable
*rhs
)
199 return (glsl_get_base_type(lhs
->type
) == glsl_get_base_type(rhs
->type
));
203 NirLowerIOToVector::create_new_io_var(nir_shader
*shader
,
204 unsigned location
, unsigned comps
)
206 unsigned num_comps
= util_bitcount(comps
);
207 assert(num_comps
> 1);
209 /* Note: u_bit_scan() strips a component of the comps bitfield here */
210 unsigned first_comp
= u_bit_scan(&comps
);
212 nir_variable
*var
= nir_variable_clone(m_vars
[location
][first_comp
], shader
);
213 var
->data
.location_frac
= first_comp
;
214 var
->type
= glsl_replace_vector_type(var
->type
, num_comps
);
216 nir_shader_add_variable(shader
, var
);
218 m_vars
[location
][first_comp
] = var
;
221 const int comp
= u_bit_scan(&comps
);
222 if (m_vars
[location
][comp
]) {
223 m_vars
[location
][comp
] = var
;
228 bool NirLowerIOToVector::var_can_rewrite(nir_variable
*var
) const
230 /* Skip complex types we don't split in the first place */
231 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
234 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
237 return var_can_rewrite_slot(var
);
241 NirLowerIOToVector::vectorize_block(nir_builder
*b
, nir_block
*block
)
243 bool progress
= false;
245 nir_foreach_instr_safe(instr
, block
) {
246 if (instr_can_rewrite(instr
)) {
247 instr
->index
= m_next_index
++;
248 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
249 m_block_io
.insert(ir
);
253 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
254 nir_block
*child
= block
->dom_children
[i
];
255 progress
|= vectorize_block(b
, child
);
258 nir_foreach_instr_reverse_safe(instr
, block
) {
259 progress
|= vec_instr_set_remove(b
, instr
);
266 bool NirLowerIOToVector::instr_can_rewrite(nir_instr
*instr
)
268 if (instr
->type
!= nir_instr_type_intrinsic
)
271 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
273 if (intr
->num_components
> 3)
276 return instr_can_rewrite_type(intr
);
279 bool NirLowerIOToVector::vec_instr_set_remove(nir_builder
*b
,nir_instr
*instr
)
281 if (!instr_can_rewrite(instr
))
284 nir_intrinsic_instr
*ir
= nir_instr_as_intrinsic(instr
);
285 auto entry
= m_block_io
.equal_range(ir
);
286 if (entry
.first
!= m_block_io
.end()) {
287 vec_instr_stack_pop(b
, entry
, ir
);
293 NirLowerIOToVector::clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
294 const nir_deref_instr
*src_head
)
296 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
301 assert(src_head
->deref_type
== nir_deref_type_array
);
303 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
305 return nir_build_deref_array(b
, dst_tail
,
306 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
309 NirLowerFSOutToVector::NirLowerFSOutToVector():
310 NirLowerIOToVector(FRAG_RESULT_COLOR
)
315 bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable
*var
) const
317 return ((var
->data
.mode
== nir_var_shader_out
) &&
318 ((var
->data
.location
== FRAG_RESULT_COLOR
) ||
319 ((var
->data
.location
>= FRAG_RESULT_DATA0
) &&
320 (var
->data
.location
<= FRAG_RESULT_DATA7
))));
323 bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder
*b
, InstrSubSet
&ir_set
,
324 nir_intrinsic_instr
*instr
)
326 vector
< nir_intrinsic_instr
*> ir_sorted_set(ir_set
.first
, ir_set
.second
);
327 std::sort(ir_sorted_set
.begin(), ir_sorted_set
.end(),
328 [](const nir_intrinsic_instr
*lhs
, const nir_intrinsic_instr
*rhs
) {
329 return lhs
->instr
.index
> rhs
->instr
.index
;
333 nir_intrinsic_instr
*intr
= *ir_sorted_set
.begin();
334 nir_variable
*var
= nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
336 unsigned loc
= var
->data
.location
- m_base_slot
;
338 nir_variable
*new_var
= m_vars
[loc
][var
->data
.location_frac
];
339 unsigned num_comps
= glsl_get_vector_elements(glsl_without_array(new_var
->type
));
340 unsigned old_num_comps
= glsl_get_vector_elements(glsl_without_array(var
->type
));
342 /* Don't bother walking the stack if this component can't be vectorised. */
343 if (old_num_comps
> 3) {
347 if (new_var
== var
) {
351 b
->cursor
= nir_after_instr(&intr
->instr
);
352 nir_ssa_undef_instr
*instr_undef
=
353 nir_ssa_undef_instr_create(b
->shader
, 1, 32);
354 nir_builder_instr_insert(b
, &instr_undef
->instr
);
356 nir_ssa_def
*srcs
[4];
357 for (int i
= 0; i
< 4; i
++) {
358 srcs
[i
] = &instr_undef
->def
;
360 srcs
[var
->data
.location_frac
] = intr
->src
[1].ssa
;
362 for (auto k
= ir_sorted_set
.begin() + 1; k
!= ir_sorted_set
.end(); ++k
) {
363 nir_intrinsic_instr
*intr2
= *k
;
365 nir_deref_instr_get_variable(nir_src_as_deref(intr2
->src
[0]));
366 unsigned loc2
= var
->data
.location
- m_base_slot
;
368 if (m_vars
[loc
][var
->data
.location_frac
] !=
369 m_vars
[loc2
][var2
->data
.location_frac
]) {
373 assert(glsl_get_vector_elements(glsl_without_array(var2
->type
)) < 4);
375 if (srcs
[var2
->data
.location_frac
] == &instr_undef
->def
) {
376 assert(intr2
->src
[1].is_ssa
);
377 assert(intr2
->src
[1].ssa
);
378 srcs
[var2
->data
.location_frac
] = intr2
->src
[1].ssa
;
380 nir_instr_remove(&intr2
->instr
);
383 create_new_io(b
, intr
, new_var
, srcs
, new_var
->data
.location_frac
,
388 exec_list
*NirLowerFSOutToVector::get_io_list(nir_shader
*shader
) const
390 return &shader
->outputs
;
394 NirLowerFSOutToVector::create_new_io(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
395 nir_ssa_def
**srcs
, unsigned first_comp
, unsigned num_comps
)
397 b
->cursor
= nir_before_instr(&intr
->instr
);
399 nir_intrinsic_instr
*new_intr
=
400 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
401 new_intr
->num_components
= num_comps
;
403 nir_intrinsic_set_write_mask(new_intr
, (1 << num_comps
) - 1);
405 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
406 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
408 new_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
409 new_intr
->src
[1] = nir_src_for_ssa(create_combined_vector(b
, srcs
, first_comp
, num_comps
));
411 nir_builder_instr_insert(b
, &new_intr
->instr
);
413 /* Remove the old store intrinsic */
414 nir_instr_remove(&intr
->instr
);
417 bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr
*intr
) const
419 if (intr
->intrinsic
!= nir_intrinsic_store_deref
)
422 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
423 if (deref
->mode
!= nir_var_shader_out
)
426 return var_can_rewrite(nir_deref_instr_get_variable(deref
));
429 nir_ssa_def
*NirLowerFSOutToVector::create_combined_vector(nir_builder
*b
, nir_ssa_def
**srcs
,
430 int first_comp
, int num_comp
)
434 case 2: op
= nir_op_vec2
; break;
435 case 3: op
= nir_op_vec3
; break;
436 case 4: op
= nir_op_vec4
; break;
438 assert(0 && "combined vector must have 2 to 4 components");
441 nir_alu_instr
* instr
= nir_alu_instr_create(b
->shader
, op
);
442 instr
->exact
= b
->exact
;
446 while (i
< num_comp
) {
447 nir_ssa_def
*s
= srcs
[first_comp
+ k
];
448 for(uint8_t kk
= 0; kk
< s
->num_components
&& i
< num_comp
; ++kk
) {
449 instr
->src
[i
].src
= nir_src_for_ssa(s
);
450 instr
->src
[i
].swizzle
[0] = kk
;
453 k
+= s
->num_components
;
456 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_comp
, 32, NULL
);
457 instr
->dest
.write_mask
= (1 << num_comp
) - 1;
458 nir_builder_instr_insert(b
, &instr
->instr
);
459 return &instr
->dest
.dest
.ssa
;