2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "aco_builder.h"
30 * Implements an algorithm to lower to Concentional SSA Form (CSSA).
31 * After "Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency"
32 * by B. Boissinot, A. Darte, F. Rastello, B. Dupont de Dinechin, C. Guillon,
34 * By lowering the IR to CSSA, the insertion of parallelcopies is separated from
35 * the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling.
36 * The algorithm tries to find beneficial insertion points by checking if a basic block
37 * is empty and if the variable already has a new definition in a dominating block.
44 typedef std::map
<uint32_t, std::vector
<std::pair
<Definition
, Operand
>>> phi_info
;
49 phi_info logical_phi_info
;
50 phi_info linear_phi_info
;
52 cssa_ctx(Program
* program
, live
& live_vars
) : program(program
), live_vars(live_vars
) {}
55 unsigned get_lca(cssa_ctx
& ctx
, unsigned x
, unsigned y
, bool is_logical
)
59 x
= is_logical
? ctx
.program
->blocks
[x
].logical_idom
: ctx
.program
->blocks
[x
].linear_idom
;
61 y
= is_logical
? ctx
.program
->blocks
[y
].logical_idom
: ctx
.program
->blocks
[y
].linear_idom
;
67 bool collect_phi_info(cssa_ctx
& ctx
)
69 bool progress
= false;
70 for (Block
& block
: ctx
.program
->blocks
) {
71 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
73 if (phi
->opcode
== aco_opcode::p_phi
)
75 else if (phi
->opcode
== aco_opcode::p_linear_phi
)
80 /* no CSSA for the exec mask as we don't spill it anyway */
81 if (phi
->definitions
[0].isFixed() && phi
->definitions
[0].physReg() == exec
)
83 std::vector
<unsigned>& preds
= is_logical
? block
.logical_preds
: block
.linear_preds
;
85 /* collect definition's block per Operand */
86 std::vector
<unsigned> def_points(phi
->operands
.size());
87 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
88 Operand
& op
= phi
->operands
[i
];
89 if (op
.isUndefined()) {
90 def_points
[i
] = preds
[i
];
91 } else if (op
.isConstant()) {
92 /* in theory, we could insert the definition there... */
96 unsigned pred
= preds
[i
];
100 ctx
.program
->blocks
[pred
].logical_idom
:
101 ctx
.program
->blocks
[pred
].linear_idom
;
102 } while (def_points
[i
] != pred
&&
103 ctx
.live_vars
.live_out
[pred
].find(op
.getTemp()) != ctx
.live_vars
.live_out
[pred
].end());
107 /* check live-range intersections */
108 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
109 Operand op
= phi
->operands
[i
];
110 if (op
.isUndefined())
112 /* check if the operand comes from the exec mask of a predecessor */
113 if (op
.isTemp() && op
.getTemp() == ctx
.program
->blocks
[preds
[i
]].live_out_exec
)
116 /* calculate the earliest block where we can insert a copy if needed */
117 bool intersects
= false;
118 unsigned upper_bound
= preds
[i
];
119 while (def_points
[i
] < upper_bound
) {
120 unsigned new_ub
= is_logical
?
121 ctx
.program
->blocks
[upper_bound
].logical_idom
:
122 ctx
.program
->blocks
[upper_bound
].linear_idom
;
124 for (unsigned j
= 0; j
< phi
->operands
.size(); j
++) {
125 /* same operands cannot intersect */
126 if (phi
->operands
[j
].isTemp() && op
.getTemp() == phi
->operands
[j
].getTemp())
128 /* live-ranges intersect if any other definition point is dominated by the current definition */
129 intersects
|= def_points
[j
] == new_ub
;
134 upper_bound
= new_ub
;
138 /* constants and live-through definitions can get a copy
139 * at their definition point if there is no other intersection */
140 if (op
.isConstant() || !op
.isKill() || op
.regClass().type() != phi
->definitions
[0].regClass().type()) {
141 upper_bound
= def_points
[i
];
148 unsigned insert_block
= preds
[i
];
150 /* if the predecessor block is empty, try to insert at the dominator */
151 bool is_empty
= (is_logical
&& ctx
.program
->blocks
[insert_block
].instructions
.size() == 3) ||
152 ctx
.program
->blocks
[insert_block
].instructions
.size() == 1;
154 unsigned idom
= is_logical
?
155 ctx
.program
->blocks
[insert_block
].logical_idom
:
156 ctx
.program
->blocks
[insert_block
].linear_idom
;
157 if (idom
> upper_bound
)
161 /* if other operands share the same value, try to insert at LCA */
162 std::vector
<unsigned> indices
= {i
};
163 for (unsigned j
= i
+ 1; j
< phi
->operands
.size(); j
++) {
164 if ((phi
->operands
[j
].isTemp() && op
.isTemp() && phi
->operands
[j
].getTemp() == op
.getTemp()) ||
165 (phi
->operands
[j
].isConstant() && op
.isConstant() && phi
->operands
[j
].constantValue() == op
.constantValue())) {
166 unsigned lca
= get_lca(ctx
, insert_block
, preds
[j
], is_logical
);
167 if (lca
> upper_bound
) {
169 indices
.push_back(j
);
174 /* create new temporary and rename operands */
175 Temp new_tmp
= Temp
{ctx
.program
->allocateId(), phi
->definitions
[0].regClass()};
177 ctx
.logical_phi_info
[insert_block
].emplace_back(Definition(new_tmp
), op
);
179 ctx
.linear_phi_info
[insert_block
].emplace_back(Definition(new_tmp
), op
);
180 for (unsigned index
: indices
) {
181 phi
->operands
[index
] = Operand(new_tmp
);
182 phi
->operands
[index
].setKill(true);
183 def_points
[index
] = insert_block
;
191 void insert_parallelcopies(cssa_ctx
& ctx
)
193 /* insert the parallelcopies from logical phis before p_logical_end */
194 for (auto&& entry
: ctx
.logical_phi_info
) {
195 Block
& block
= ctx
.program
->blocks
[entry
.first
];
196 unsigned idx
= block
.instructions
.size() - 1;
197 while (block
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
) {
202 Builder
bld(ctx
.program
);
203 bld
.reset(&block
.instructions
, std::next(block
.instructions
.begin(), idx
));
204 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
205 bld
.pseudo(aco_opcode::p_parallelcopy
, pair
.first
, pair
.second
);
208 /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
209 for (auto&& entry
: ctx
.linear_phi_info
) {
210 Block
& block
= ctx
.program
->blocks
[entry
.first
];
211 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.end();
213 assert((*it
)->format
== Format::PSEUDO_BRANCH
);
215 Builder
bld(ctx
.program
);
216 bld
.reset(&block
.instructions
, it
);
217 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
218 bld
.pseudo(aco_opcode::p_parallelcopy
, pair
.first
, pair
.second
);
222 } /* end namespace */
225 void lower_to_cssa(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
)
227 cssa_ctx ctx
= {program
, live_vars
};
228 /* collect information about all interfering phi operands */
229 bool progress
= collect_phi_info(ctx
);
234 insert_parallelcopies(ctx
);
236 /* update live variable information */
237 live_vars
= live_var_analysis(program
, options
);