2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Rhys Perry (pendingchaos02@gmail.com)
31 #include "aco_builder.h"
38 bool checked_preds_for_uniform
;
39 bool all_preds_uniform
;
42 uint64_t cur_undef_operands
;
44 unsigned phi_block_idx
;
45 unsigned loop_nest_depth
;
46 std::map
<unsigned, unsigned> writes
;
47 std::vector
<unsigned> latest
;
50 Operand
get_ssa(Program
*program
, unsigned block_idx
, ssa_state
*state
, bool before_write
)
53 auto it
= state
->writes
.find(block_idx
);
54 if (it
!= state
->writes
.end())
55 return Operand(Temp(it
->second
, program
->lane_mask
));
56 if (state
->latest
[block_idx
])
57 return Operand(Temp(state
->latest
[block_idx
], program
->lane_mask
));
60 Block
& block
= program
->blocks
[block_idx
];
61 size_t pred
= block
.linear_preds
.size();
62 if (pred
== 0 || block
.loop_nest_depth
< state
->loop_nest_depth
) {
63 return Operand(program
->lane_mask
);
64 } else if (block
.loop_nest_depth
> state
->loop_nest_depth
) {
65 Operand op
= get_ssa(program
, block_idx
- 1, state
, false);
66 assert(!state
->latest
[block_idx
]);
67 state
->latest
[block_idx
] = op
.tempId();
69 } else if (pred
== 1 || block
.kind
& block_kind_loop_exit
) {
70 Operand op
= get_ssa(program
, block
.linear_preds
[0], state
, false);
71 assert(!state
->latest
[block_idx
]);
72 state
->latest
[block_idx
] = op
.tempId();
74 } else if (block
.kind
& block_kind_loop_header
&&
75 !(program
->blocks
[state
->phi_block_idx
].kind
& block_kind_loop_exit
)) {
76 return Operand(program
->lane_mask
);
78 unsigned res
= program
->allocateId();
79 assert(!state
->latest
[block_idx
]);
80 state
->latest
[block_idx
] = res
;
82 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, pred
, 1)};
83 for (unsigned i
= 0; i
< pred
; i
++)
84 phi
->operands
[i
] = get_ssa(program
, block
.linear_preds
[i
], state
, false);
85 phi
->definitions
[0] = Definition(Temp
{res
, program
->lane_mask
});
86 block
.instructions
.emplace(block
.instructions
.begin(), std::move(phi
));
88 return Operand(Temp(res
, program
->lane_mask
));
92 void insert_before_logical_end(Block
*block
, aco_ptr
<Instruction
> instr
)
94 auto IsLogicalEnd
= [] (const aco_ptr
<Instruction
>& instr
) -> bool {
95 return instr
->opcode
== aco_opcode::p_logical_end
;
97 auto it
= std::find_if(block
->instructions
.crbegin(), block
->instructions
.crend(), IsLogicalEnd
);
99 if (it
== block
->instructions
.crend()) {
100 assert(block
->instructions
.back()->format
== Format::PSEUDO_BRANCH
);
101 block
->instructions
.insert(std::prev(block
->instructions
.end()), std::move(instr
));
103 block
->instructions
.insert(std::prev(it
.base()), std::move(instr
));
107 void build_merge_code(Program
*program
, Block
*block
, Definition dst
, Operand prev
, Operand cur
)
109 Builder
bld(program
);
111 auto IsLogicalEnd
= [] (const aco_ptr
<Instruction
>& instr
) -> bool {
112 return instr
->opcode
== aco_opcode::p_logical_end
;
114 auto it
= std::find_if(block
->instructions
.rbegin(), block
->instructions
.rend(), IsLogicalEnd
);
115 assert(it
!= block
->instructions
.rend());
116 bld
.reset(&block
->instructions
, std::prev(it
.base()));
118 if (prev
.isUndefined()) {
119 bld
.sop1(Builder::s_mov
, dst
, cur
);
123 bool prev_is_constant
= prev
.isConstant() && prev
.constantValue64(true) + 1u < 2u;
124 bool cur_is_constant
= cur
.isConstant() && cur
.constantValue64(true) + 1u < 2u;
126 if (!prev_is_constant
) {
127 if (!cur_is_constant
) {
128 Temp tmp1
= bld
.tmp(bld
.lm
), tmp2
= bld
.tmp(bld
.lm
);
129 bld
.sop2(Builder::s_andn2
, Definition(tmp1
), bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
130 bld
.sop2(Builder::s_and
, Definition(tmp2
), bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
131 bld
.sop2(Builder::s_or
, dst
, bld
.def(s1
, scc
), tmp1
, tmp2
);
132 } else if (cur
.constantValue64(true)) {
133 bld
.sop2(Builder::s_or
, dst
, bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
135 bld
.sop2(Builder::s_andn2
, dst
, bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
137 } else if (prev
.constantValue64(true)) {
138 if (!cur_is_constant
)
139 bld
.sop2(Builder::s_orn2
, dst
, bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
140 else if (cur
.constantValue64(true))
141 bld
.sop1(Builder::s_mov
, dst
, program
->wave_size
== 64 ? Operand(UINT64_MAX
) : Operand(UINT32_MAX
));
143 bld
.sop1(Builder::s_not
, dst
, bld
.def(s1
, scc
), Operand(exec
, bld
.lm
));
145 if (!cur_is_constant
)
146 bld
.sop2(Builder::s_and
, dst
, bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
147 else if (cur
.constantValue64(true))
148 bld
.sop1(Builder::s_mov
, dst
, Operand(exec
, bld
.lm
));
150 bld
.sop1(Builder::s_mov
, dst
, program
->wave_size
== 64 ? Operand((uint64_t)0u) : Operand(0u));
154 void lower_divergent_bool_phi(Program
*program
, ssa_state
*state
, Block
*block
, aco_ptr
<Instruction
>& phi
)
156 Builder
bld(program
);
158 if (!state
->checked_preds_for_uniform
) {
159 state
->all_preds_uniform
= !(block
->kind
& block_kind_merge
);
160 for (unsigned pred
: block
->logical_preds
)
161 state
->all_preds_uniform
= state
->all_preds_uniform
&& (program
->blocks
[pred
].kind
& block_kind_uniform
);
162 state
->checked_preds_for_uniform
= true;
165 if (state
->all_preds_uniform
) {
166 assert(block
->logical_preds
.size() == block
->linear_preds
.size());
167 phi
->opcode
= aco_opcode::p_linear_phi
;
171 state
->latest
.resize(program
->blocks
.size());
173 uint64_t undef_operands
= 0;
174 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++)
175 undef_operands
|= phi
->operands
[i
].isUndefined() << i
;
177 if (state
->needs_init
|| undef_operands
!= state
->cur_undef_operands
||
178 block
->logical_preds
.size() > 64) {
179 /* this only has to be done once per block unless the set of predecessors
180 * which are undefined changes */
181 state
->cur_undef_operands
= undef_operands
;
182 state
->phi_block_idx
= block
->index
;
183 state
->loop_nest_depth
= block
->loop_nest_depth
;
184 if (block
->kind
& block_kind_loop_exit
) {
185 state
->loop_nest_depth
+= 1;
187 state
->writes
.clear();
188 state
->needs_init
= false;
190 std::fill(state
->latest
.begin(), state
->latest
.end(), 0);
192 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
193 if (phi
->operands
[i
].isUndefined())
196 state
->writes
[block
->logical_preds
[i
]] = program
->allocateId();
199 bool uniform_merge
= block
->kind
& block_kind_loop_header
;
201 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
202 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
204 bool need_get_ssa
= !uniform_merge
;
205 if (block
->kind
& block_kind_loop_header
&& !(pred
->kind
& block_kind_uniform
))
206 uniform_merge
= false;
208 if (phi
->operands
[i
].isUndefined())
213 cur
= get_ssa(program
, pred
->index
, state
, true);
214 assert(cur
.regClass() == bld
.lm
);
216 Temp new_cur
= {state
->writes
.at(pred
->index
), program
->lane_mask
};
217 assert(new_cur
.regClass() == bld
.lm
);
219 if (i
== 1 && (block
->kind
& block_kind_merge
) && phi
->operands
[0].isConstant())
220 cur
= phi
->operands
[0];
221 build_merge_code(program
, pred
, Definition(new_cur
), cur
, phi
->operands
[i
]);
224 unsigned num_preds
= block
->linear_preds
.size();
225 if (phi
->operands
.size() != num_preds
) {
226 Pseudo_instruction
* new_phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, num_preds
, 1)};
227 new_phi
->definitions
[0] = phi
->definitions
[0];
230 phi
->opcode
= aco_opcode::p_linear_phi
;
232 assert(phi
->operands
.size() == num_preds
);
234 for (unsigned i
= 0; i
< num_preds
; i
++)
235 phi
->operands
[i
] = get_ssa(program
, block
->linear_preds
[i
], state
, false);
240 void lower_subdword_phis(Program
*program
, Block
*block
, aco_ptr
<Instruction
>& phi
)
242 Builder
bld(program
);
243 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
244 if (phi
->operands
[i
].isUndefined())
246 if (phi
->operands
[i
].regClass() == phi
->definitions
[0].regClass())
249 assert(phi
->operands
[i
].isTemp());
250 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
251 Temp phi_src
= phi
->operands
[i
].getTemp();
253 assert(phi_src
.regClass().type() == RegType::sgpr
);
254 Temp tmp
= bld
.tmp(RegClass(RegType::vgpr
, phi_src
.size()));
255 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_create_vector
, Definition(tmp
), phi_src
).get_ptr());
256 Temp new_phi_src
= bld
.tmp(phi
->definitions
[0].regClass());
257 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_extract_vector
, Definition(new_phi_src
), tmp
, Operand(0u)).get_ptr());
259 phi
->operands
[i
].setTemp(new_phi_src
);
264 void lower_phis(Program
* program
)
268 for (Block
& block
: program
->blocks
) {
269 state
.checked_preds_for_uniform
= false;
270 state
.needs_init
= true;
271 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
272 if (phi
->opcode
== aco_opcode::p_phi
) {
273 assert(program
->wave_size
== 64 ? phi
->definitions
[0].regClass() != s1
: phi
->definitions
[0].regClass() != s2
);
274 if (phi
->definitions
[0].regClass() == program
->lane_mask
)
275 lower_divergent_bool_phi(program
, &state
, &block
, phi
);
276 else if (phi
->definitions
[0].regClass().is_subdword())
277 lower_subdword_phis(program
, &block
, phi
);
278 } else if (!is_phi(phi
)) {