2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "aco_builder.h"
30 * Implements an algorithm to lower to Concentional SSA Form (CSSA).
31 * After "Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency"
32 * by B. Boissinot, A. Darte, F. Rastello, B. Dupont de Dinechin, C. Guillon,
34 * By lowering the IR to CSSA, the insertion of parallelcopies is separated from
35 * the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling.
36 * The algorithm tries to find beneficial insertion points by checking if a basic block
37 * is empty and if the variable already has a new definition in a dominating block.
44 typedef std::map
<uint32_t, std::vector
<std::pair
<Definition
, Operand
>>> phi_info
;
49 phi_info logical_phi_info
;
50 phi_info linear_phi_info
;
52 cssa_ctx(Program
* program
, live
& live_vars
) : program(program
), live_vars(live_vars
) {}
55 bool collect_phi_info(cssa_ctx
& ctx
)
57 bool progress
= false;
58 for (Block
& block
: ctx
.program
->blocks
) {
59 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
61 if (phi
->opcode
== aco_opcode::p_phi
)
63 else if (phi
->opcode
== aco_opcode::p_linear_phi
)
68 /* no CSSA for the exec mask as we don't spill it anyway */
69 if (phi
->definitions
[0].isFixed() && phi
->definitions
[0].physReg() == exec
)
71 std::vector
<unsigned>& preds
= is_logical
? block
.logical_preds
: block
.linear_preds
;
73 /* collect definition's block per Operand */
74 std::vector
<unsigned> def_points(phi
->operands
.size());
75 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
76 Operand
& op
= phi
->operands
[i
];
77 if (op
.isUndefined()) {
78 def_points
[i
] = preds
[i
];
79 } else if (op
.isConstant()) {
80 /* in theory, we could insert the definition there... */
84 unsigned pred
= preds
[i
];
88 ctx
.program
->blocks
[pred
].logical_idom
:
89 ctx
.program
->blocks
[pred
].linear_idom
;
90 } while (def_points
[i
] != pred
&&
91 ctx
.live_vars
.live_out
[pred
].find(op
.getTemp()) != ctx
.live_vars
.live_out
[pred
].end());
95 /* check live-range intersections */
96 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
97 Operand op
= phi
->operands
[i
];
100 /* check if the operand comes from the exec mask of a predecessor */
101 if (op
.isTemp() && op
.getTemp() == ctx
.program
->blocks
[preds
[i
]].live_out_exec
)
104 bool interferes
= false;
105 unsigned idom
= is_logical
?
106 ctx
.program
->blocks
[def_points
[i
]].logical_idom
:
107 ctx
.program
->blocks
[def_points
[i
]].linear_idom
;
108 /* live-through operands definitely interfere */
109 if (op
.isTemp() && !op
.isKill()) {
111 /* create copies for constants to ease spilling */
112 } else if (op
.isConstant()) {
114 /* create copies for SGPR -> VGPR moves */
115 } else if (op
.regClass() != phi
->definitions
[0].regClass()) {
117 /* operand might interfere with any phi-def*/
118 } else if (def_points
[i
] == block
.index
) {
120 /* operand might interfere with phi-def */
121 } else if (ctx
.live_vars
.live_out
[idom
].count(phi
->definitions
[0].getTemp())) {
123 /* else check for interferences with other operands */
125 for (unsigned j
= 0; !interferes
&& j
< phi
->operands
.size(); j
++) {
126 /* don't care about other register classes */
127 if (!phi
->operands
[j
].isTemp() || phi
->operands
[j
].regClass() != phi
->definitions
[0].regClass())
129 /* same operands cannot interfere */
130 if (op
.getTemp() == phi
->operands
[j
].getTemp())
132 /* if def_points[i] dominates any other def_point, assume they interfere.
133 * As live-through operands are checked above, only test up the current block. */
134 unsigned other_def_point
= def_points
[j
];
135 while (def_points
[i
] < other_def_point
&& other_def_point
!= block
.index
)
136 other_def_point
= is_logical
?
137 ctx
.program
->blocks
[other_def_point
].logical_idom
:
138 ctx
.program
->blocks
[other_def_point
].linear_idom
;
139 interferes
= def_points
[i
] == other_def_point
;
148 /* create new temporary and rename operands */
149 Temp new_tmp
= Temp
{ctx
.program
->allocateId(), phi
->definitions
[0].regClass()};
151 ctx
.logical_phi_info
[preds
[i
]].emplace_back(Definition(new_tmp
), op
);
153 ctx
.linear_phi_info
[preds
[i
]].emplace_back(Definition(new_tmp
), op
);
154 phi
->operands
[i
] = Operand(new_tmp
);
155 phi
->operands
[i
].setKill(true);
156 def_points
[i
] = preds
[i
];
163 void insert_parallelcopies(cssa_ctx
& ctx
)
165 /* insert the parallelcopies from logical phis before p_logical_end */
166 for (auto&& entry
: ctx
.logical_phi_info
) {
167 Block
& block
= ctx
.program
->blocks
[entry
.first
];
168 unsigned idx
= block
.instructions
.size() - 1;
169 while (block
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
) {
174 Builder
bld(ctx
.program
);
175 bld
.reset(&block
.instructions
, std::next(block
.instructions
.begin(), idx
));
176 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
177 bld
.pseudo(aco_opcode::p_parallelcopy
, pair
.first
, pair
.second
);
180 /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
181 for (auto&& entry
: ctx
.linear_phi_info
) {
182 Block
& block
= ctx
.program
->blocks
[entry
.first
];
183 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.end();
185 assert((*it
)->format
== Format::PSEUDO_BRANCH
);
187 Builder
bld(ctx
.program
);
188 bld
.reset(&block
.instructions
, it
);
189 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
190 bld
.pseudo(aco_opcode::p_parallelcopy
, pair
.first
, pair
.second
);
194 } /* end namespace */
197 void lower_to_cssa(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
)
199 cssa_ctx ctx
= {program
, live_vars
};
200 /* collect information about all interfering phi operands */
201 bool progress
= collect_phi_info(ctx
);
206 insert_parallelcopies(ctx
);
208 /* update live variable information */
209 live_vars
= live_var_analysis(program
, options
);