2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Rhys Perry (pendingchaos02@gmail.com)
31 #include "aco_builder.h"
39 uint64_t cur_undef_operands
;
41 unsigned phi_block_idx
;
42 unsigned loop_nest_depth
;
43 std::map
<unsigned, unsigned> writes
;
44 std::vector
<unsigned> latest
;
47 Operand
get_ssa(Program
*program
, unsigned block_idx
, ssa_state
*state
, bool before_write
)
50 auto it
= state
->writes
.find(block_idx
);
51 if (it
!= state
->writes
.end())
52 return Operand(Temp(it
->second
, program
->lane_mask
));
53 if (state
->latest
[block_idx
])
54 return Operand(Temp(state
->latest
[block_idx
], program
->lane_mask
));
57 Block
& block
= program
->blocks
[block_idx
];
58 size_t pred
= block
.linear_preds
.size();
59 if (pred
== 0 || block
.loop_nest_depth
< state
->loop_nest_depth
) {
60 return Operand(program
->lane_mask
);
61 } else if (block
.loop_nest_depth
> state
->loop_nest_depth
) {
62 Operand op
= get_ssa(program
, block_idx
- 1, state
, false);
63 assert(!state
->latest
[block_idx
]);
64 state
->latest
[block_idx
] = op
.tempId();
66 } else if (pred
== 1 || block
.kind
& block_kind_loop_exit
) {
67 Operand op
= get_ssa(program
, block
.linear_preds
[0], state
, false);
68 assert(!state
->latest
[block_idx
]);
69 state
->latest
[block_idx
] = op
.tempId();
71 } else if (block
.kind
& block_kind_loop_header
&&
72 !(program
->blocks
[state
->phi_block_idx
].kind
& block_kind_loop_exit
)) {
73 return Operand(program
->lane_mask
);
75 unsigned res
= program
->allocateId();
76 assert(!state
->latest
[block_idx
]);
77 state
->latest
[block_idx
] = res
;
79 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, pred
, 1)};
80 for (unsigned i
= 0; i
< pred
; i
++)
81 phi
->operands
[i
] = get_ssa(program
, block
.linear_preds
[i
], state
, false);
82 phi
->definitions
[0] = Definition(Temp
{res
, program
->lane_mask
});
83 block
.instructions
.emplace(block
.instructions
.begin(), std::move(phi
));
85 return Operand(Temp(res
, program
->lane_mask
));
89 void insert_before_logical_end(Block
*block
, aco_ptr
<Instruction
> instr
)
91 auto IsLogicalEnd
= [] (const aco_ptr
<Instruction
>& instr
) -> bool {
92 return instr
->opcode
== aco_opcode::p_logical_end
;
94 auto it
= std::find_if(block
->instructions
.crbegin(), block
->instructions
.crend(), IsLogicalEnd
);
96 if (it
== block
->instructions
.crend()) {
97 assert(block
->instructions
.back()->format
== Format::PSEUDO_BRANCH
);
98 block
->instructions
.insert(std::prev(block
->instructions
.end()), std::move(instr
));
101 block
->instructions
.insert(std::prev(it
.base()), std::move(instr
));
104 void lower_divergent_bool_phi(Program
*program
, ssa_state
*state
, Block
*block
, aco_ptr
<Instruction
>& phi
)
106 Builder
bld(program
);
108 state
->latest
.resize(program
->blocks
.size());
110 uint64_t undef_operands
= 0;
111 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++)
112 undef_operands
|= phi
->operands
[i
].isUndefined() << i
;
114 if (state
->needs_init
|| undef_operands
!= state
->cur_undef_operands
||
115 block
->logical_preds
.size() > 64) {
116 /* this only has to be done once per block unless the set of predecessors
117 * which are undefined changes */
118 state
->cur_undef_operands
= undef_operands
;
119 state
->phi_block_idx
= block
->index
;
120 state
->loop_nest_depth
= block
->loop_nest_depth
;
121 if (block
->kind
& block_kind_loop_exit
) {
122 state
->loop_nest_depth
+= 1;
124 state
->writes
.clear();
125 state
->needs_init
= false;
127 std::fill(state
->latest
.begin(), state
->latest
.end(), 0);
129 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
130 if (phi
->operands
[i
].isUndefined())
133 state
->writes
[block
->logical_preds
[i
]] = program
->allocateId();
136 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
137 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
139 if (phi
->operands
[i
].isUndefined())
142 Operand cur
= get_ssa(program
, pred
->index
, state
, true);
143 assert(cur
.regClass() == bld
.lm
);
144 Temp new_cur
= {state
->writes
.at(pred
->index
), program
->lane_mask
};
145 assert(new_cur
.regClass() == bld
.lm
);
147 if (cur
.isUndefined()) {
148 insert_before_logical_end(pred
, bld
.sop1(aco_opcode::s_mov_b64
, Definition(new_cur
), phi
->operands
[i
]).get_ptr());
150 Temp tmp1
= bld
.tmp(bld
.lm
), tmp2
= bld
.tmp(bld
.lm
);
151 insert_before_logical_end(pred
,
152 bld
.sop2(Builder::s_andn2
, Definition(tmp1
), bld
.def(s1
, scc
),
153 cur
, Operand(exec
, bld
.lm
)).get_ptr());
154 insert_before_logical_end(pred
,
155 bld
.sop2(Builder::s_and
, Definition(tmp2
), bld
.def(s1
, scc
),
156 phi
->operands
[i
].getTemp(), Operand(exec
, bld
.lm
)).get_ptr());
157 insert_before_logical_end(pred
,
158 bld
.sop2(Builder::s_or
, Definition(new_cur
), bld
.def(s1
, scc
),
159 tmp1
, tmp2
).get_ptr());
163 unsigned num_preds
= block
->linear_preds
.size();
164 if (phi
->operands
.size() != num_preds
) {
165 Pseudo_instruction
* new_phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, num_preds
, 1)};
166 new_phi
->definitions
[0] = phi
->definitions
[0];
169 phi
->opcode
= aco_opcode::p_linear_phi
;
171 assert(phi
->operands
.size() == num_preds
);
173 for (unsigned i
= 0; i
< num_preds
; i
++)
174 phi
->operands
[i
] = get_ssa(program
, block
->linear_preds
[i
], state
, false);
179 void lower_subdword_phis(Program
*program
, Block
*block
, aco_ptr
<Instruction
>& phi
)
181 Builder
bld(program
);
182 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
183 if (phi
->operands
[i
].isUndefined())
185 if (phi
->operands
[i
].regClass() == phi
->definitions
[0].regClass())
188 assert(phi
->operands
[i
].isTemp());
189 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
190 Temp phi_src
= phi
->operands
[i
].getTemp();
192 assert(phi_src
.regClass().type() == RegType::sgpr
);
193 Temp tmp
= bld
.tmp(RegClass(RegType::vgpr
, phi_src
.size()));
194 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_create_vector
, Definition(tmp
), phi_src
).get_ptr());
195 Temp new_phi_src
= bld
.tmp(phi
->definitions
[0].regClass());
196 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_extract_vector
, Definition(new_phi_src
), tmp
, Operand(0u)).get_ptr());
198 phi
->operands
[i
].setTemp(new_phi_src
);
203 void lower_phis(Program
* program
)
207 for (Block
& block
: program
->blocks
) {
208 state
.needs_init
= true;
209 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
210 if (phi
->opcode
== aco_opcode::p_phi
) {
211 assert(program
->wave_size
== 64 ? phi
->definitions
[0].regClass() != s1
: phi
->definitions
[0].regClass() != s2
);
212 if (phi
->definitions
[0].regClass() == program
->lane_mask
)
213 lower_divergent_bool_phi(program
, &state
, &block
, phi
);
214 else if (phi
->definitions
[0].regClass().is_subdword())
215 lower_subdword_phis(program
, &block
, phi
);
216 } else if (!is_phi(phi
)) {