2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de)
26 * Bas Nieuwenhuizen (bas@basnieuwenhuizen.nl)
35 #include "vulkan/radv_shader.h"
40 void process_live_temps_per_block(Program
*program
, live
& lives
, Block
* block
,
41 std::set
<unsigned>& worklist
, std::vector
<uint16_t>& phi_sgpr_ops
)
43 std::vector
<RegisterDemand
>& register_demand
= lives
.register_demand
[block
->index
];
44 RegisterDemand new_demand
;
46 register_demand
.resize(block
->instructions
.size());
47 block
->register_demand
= RegisterDemand();
49 std::set
<Temp
> live_sgprs
;
50 std::set
<Temp
> live_vgprs
;
52 /* add the live_out_exec to live */
53 bool exec_live
= false;
54 if (block
->live_out_exec
!= Temp()) {
55 live_sgprs
.insert(block
->live_out_exec
);
60 /* split the live-outs from this block into the temporary sets */
61 std::vector
<std::set
<Temp
>>& live_temps
= lives
.live_out
;
62 for (const Temp temp
: live_temps
[block
->index
]) {
63 const bool inserted
= temp
.is_linear()
64 ? live_sgprs
.insert(temp
).second
65 : live_vgprs
.insert(temp
).second
;
70 new_demand
.sgpr
-= phi_sgpr_ops
[block
->index
];
72 /* traverse the instructions backwards */
73 for (int idx
= block
->instructions
.size() -1; idx
>= 0; idx
--)
75 /* substract the 2 sgprs from exec */
77 assert(new_demand
.sgpr
>= 2);
78 register_demand
[idx
] = RegisterDemand(new_demand
.vgpr
, new_demand
.sgpr
- (exec_live
? 2 : 0));
80 Instruction
*insn
= block
->instructions
[idx
].get();
82 for (Definition
& definition
: insn
->definitions
) {
83 if (!definition
.isTemp()) {
87 const Temp temp
= definition
.getTemp();
90 n
= live_sgprs
.erase(temp
);
92 n
= live_vgprs
.erase(temp
);
96 definition
.setKill(false);
98 register_demand
[idx
] += temp
;
99 definition
.setKill(true);
102 if (definition
.isFixed() && definition
.physReg() == exec
)
107 if (insn
->opcode
== aco_opcode::p_phi
||
108 insn
->opcode
== aco_opcode::p_linear_phi
) {
109 /* directly insert into the predecessors live-out set */
110 std::vector
<unsigned>& preds
= insn
->opcode
== aco_opcode::p_phi
111 ? block
->logical_preds
112 : block
->linear_preds
;
113 for (unsigned i
= 0; i
< preds
.size(); ++i
)
115 Operand
&operand
= insn
->operands
[i
];
116 if (!operand
.isTemp()) {
119 /* check if we changed an already processed block */
120 const bool inserted
= live_temps
[preds
[i
]].insert(operand
.getTemp()).second
;
122 operand
.setFirstKill(true);
123 worklist
.insert(preds
[i
]);
124 if (insn
->opcode
== aco_opcode::p_phi
&& operand
.getTemp().type() == RegType::sgpr
)
125 phi_sgpr_ops
[preds
[i
]] += operand
.size();
128 } else if (insn
->opcode
== aco_opcode::p_logical_end
) {
129 new_demand
.sgpr
+= phi_sgpr_ops
[block
->index
];
131 for (unsigned i
= 0; i
< insn
->operands
.size(); ++i
)
133 Operand
& operand
= insn
->operands
[i
];
134 if (!operand
.isTemp()) {
137 const Temp temp
= operand
.getTemp();
138 const bool inserted
= temp
.is_linear()
139 ? live_sgprs
.insert(temp
).second
140 : live_vgprs
.insert(temp
).second
;
142 operand
.setFirstKill(true);
143 for (unsigned j
= i
+ 1; j
< insn
->operands
.size(); ++j
) {
144 if (insn
->operands
[j
].isTemp() && insn
->operands
[j
].tempId() == operand
.tempId()) {
145 insn
->operands
[j
].setFirstKill(false);
146 insn
->operands
[j
].setKill(true);
151 operand
.setKill(false);
154 if (operand
.isFixed() && operand
.physReg() == exec
)
159 block
->register_demand
.update(register_demand
[idx
]);
162 /* now, we have the live-in sets and need to merge them into the live-out sets */
163 for (unsigned pred_idx
: block
->logical_preds
) {
164 for (Temp vgpr
: live_vgprs
) {
165 auto it
= live_temps
[pred_idx
].insert(vgpr
);
167 worklist
.insert(pred_idx
);
171 for (unsigned pred_idx
: block
->linear_preds
) {
172 for (Temp sgpr
: live_sgprs
) {
173 auto it
= live_temps
[pred_idx
].insert(sgpr
);
175 worklist
.insert(pred_idx
);
179 if (!(block
->index
!= 0 || (live_vgprs
.empty() && live_sgprs
.empty()))) {
180 aco_print_program(program
, stderr
);
181 fprintf(stderr
, "These temporaries are never defined or are defined after use:\n");
182 for (Temp vgpr
: live_vgprs
)
183 fprintf(stderr
, "%%%d\n", vgpr
.id());
184 for (Temp sgpr
: live_sgprs
)
185 fprintf(stderr
, "%%%d\n", sgpr
.id());
189 assert(block
->index
!= 0 || new_demand
== RegisterDemand());
191 } /* end namespace */
193 void update_vgpr_sgpr_demand(Program
* program
, const RegisterDemand new_demand
)
195 // TODO: also take shared mem into account
196 const int16_t total_sgpr_regs
= program
->chip_class
>= GFX8
? 800 : 512;
197 const int16_t max_addressible_sgpr
= program
->sgpr_limit
;
198 /* VGPRs are allocated in chunks of 4 */
199 const int16_t rounded_vgpr_demand
= std::max
<int16_t>(4, (new_demand
.vgpr
+ 3) & ~3);
200 /* SGPRs are allocated in chunks of 16 between 8 and 104. VCC occupies the last 2 registers */
201 const int16_t rounded_sgpr_demand
= std::min(std::max
<int16_t>(8, (new_demand
.sgpr
+ 2 + 7) & ~7), max_addressible_sgpr
);
202 /* this won't compile, register pressure reduction necessary */
203 if (new_demand
.vgpr
> 256 || new_demand
.sgpr
> max_addressible_sgpr
) {
204 program
->num_waves
= 0;
205 program
->max_reg_demand
= new_demand
;
207 program
->num_waves
= std::min
<uint16_t>(10,
208 std::min
<uint16_t>(256 / rounded_vgpr_demand
,
209 total_sgpr_regs
/ rounded_sgpr_demand
));
211 program
->max_reg_demand
= { int16_t((256 / program
->num_waves
) & ~3), std::min
<int16_t>(((total_sgpr_regs
/ program
->num_waves
) & ~7) - 2, max_addressible_sgpr
)};
215 live
live_var_analysis(Program
* program
,
216 const struct radv_nir_compiler_options
*options
)
219 result
.live_out
.resize(program
->blocks
.size());
220 result
.register_demand
.resize(program
->blocks
.size());
221 std::set
<unsigned> worklist
;
222 std::vector
<uint16_t> phi_sgpr_ops(program
->blocks
.size());
223 RegisterDemand new_demand
;
225 /* this implementation assumes that the block idx corresponds to the block's position in program->blocks vector */
226 for (Block
& block
: program
->blocks
)
227 worklist
.insert(block
.index
);
228 while (!worklist
.empty()) {
229 std::set
<unsigned>::reverse_iterator b_it
= worklist
.rbegin();
230 unsigned block_idx
= *b_it
;
231 worklist
.erase(block_idx
);
232 process_live_temps_per_block(program
, result
, &program
->blocks
[block_idx
], worklist
, phi_sgpr_ops
);
233 new_demand
.update(program
->blocks
[block_idx
].register_demand
);
236 /* calculate the program's register demand and number of waves */
237 update_vgpr_sgpr_demand(program
, new_demand
);