de518ec8430b9dfc5707b5c9746bf4bfcd418150
[gem5.git] / src / gpu-compute / kernel_cfg.cc
1 /*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36 #include "gpu-compute/kernel_cfg.hh"
37
38 #include <algorithm>
39 #include <cassert>
40 #include <cstdio>
41 #include <cstring>
42 #include <iostream>
43 #include <iterator>
44 #include <map>
45 #include <string>
46
47 #include "gpu-compute/gpu_static_inst.hh"
48
49 void
50 ControlFlowInfo::assignImmediatePostDominators(
51 const std::vector<GPUStaticInst*>& instructions)
52 {
53 ControlFlowInfo cfg(instructions);
54 cfg.findImmediatePostDominators();
55 }
56
57
58 ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
59 instructions(insts)
60 {
61 createBasicBlocks();
62 connectBasicBlocks();
63 }
64
65 BasicBlock*
66 ControlFlowInfo::basicBlock(int inst_addr) const {
67 for (auto& block: basicBlocks) {
68 int first_block_addr = block->firstInstruction->instAddr();
69 if (inst_addr >= first_block_addr && inst_addr <
70 first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
71 return block.get();
72 }
73 }
74 return nullptr;
75 }
76
77
78 GPUStaticInst*
79 ControlFlowInfo::lastInstruction(const BasicBlock* block) const
80 {
81 if (block->isExit()) {
82 return nullptr;
83 }
84
85 return instructions.at(block->firstInstruction->instNum() +
86 block->size - 1);
87 }
88
89 BasicBlock*
90 ControlFlowInfo::postDominator(const BasicBlock* block) const
91 {
92 if (block->isExit()) {
93 return nullptr;
94 }
95 return basicBlock(lastInstruction(block)->ipdInstNum());
96 }
97
98 void
99 ControlFlowInfo::createBasicBlocks()
100 {
101 assert(!instructions.empty());
102 std::set<int> leaders;
103 // first instruction is a leader
104 leaders.insert(0);
105 for (const auto &instruction : instructions) {
106 if (instruction->isBranch()) {
107 const int target_pc = instruction->getTargetPc();
108 leaders.insert(target_pc);
109 leaders.insert(instruction->nextInstAddr());
110 }
111 }
112
113 size_t block_size = 0;
114 for (const auto &instruction : instructions) {
115 if (leaders.find(instruction->instAddr()) != leaders.end()) {
116 uint32_t id = basicBlocks.size();
117 if (id > 0) {
118 basicBlocks.back()->size = block_size;
119 }
120 block_size = 0;
121 basicBlocks.emplace_back(new BasicBlock(id, instruction));
122 }
123 block_size++;
124 }
125 basicBlocks.back()->size = block_size;
126 // exit basic block
127 basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr));
128 }
129
130 void
131 ControlFlowInfo::connectBasicBlocks()
132 {
133 BasicBlock* exit_bb = basicBlocks.back().get();
134 for (auto& bb : basicBlocks) {
135 if (bb->isExit()) {
136 break;
137 }
138 GPUStaticInst* last = lastInstruction(bb.get());
139 if (last->isReturn()) {
140 bb->successorIds.insert(exit_bb->id);
141 continue;
142 }
143 if (last->isBranch()) {
144 const uint32_t target_pc = last->getTargetPc();
145 BasicBlock* target_bb = basicBlock(target_pc);
146 bb->successorIds.insert(target_bb->id);
147 }
148
149 // Unconditional jump instructions have a unique successor
150 if (!last->isUnconditionalJump()) {
151 BasicBlock* next_bb = basicBlock(last->nextInstAddr());
152 bb->successorIds.insert(next_bb->id);
153 }
154 }
155 }
156
157
158 // In-place set intersection
159 static void
160 intersect(std::set<uint32_t>& a, const std::set<uint32_t>& b)
161 {
162 std::set<uint32_t>::iterator it = a.begin();
163 while (it != a.end()) {
164 it = b.find(*it) != b.end() ? ++it : a.erase(it);
165 }
166 }
167
168
169 void
170 ControlFlowInfo::findPostDominators()
171 {
172 // the only postdominator of the exit block is itself
173 basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id);
174 //copy all basic blocks to all postdominator lists except for exit block
175 for (auto& block : basicBlocks) {
176 if (!block->isExit()) {
177 for (uint32_t i = 0; i < basicBlocks.size(); i++) {
178 block->postDominatorIds.insert(i);
179 }
180 }
181 }
182
183 bool change = true;
184 while (change) {
185 change = false;
186 for (int h = basicBlocks.size() - 2; h >= 0; --h) {
187 size_t num_postdominators =
188 basicBlocks[h]->postDominatorIds.size();
189 for (int s : basicBlocks[h]->successorIds) {
190 intersect(basicBlocks[h]->postDominatorIds,
191 basicBlocks[s]->postDominatorIds);
192 }
193 basicBlocks[h]->postDominatorIds.insert(h);
194 change |= (num_postdominators
195 != basicBlocks[h]->postDominatorIds.size());
196 }
197 }
198 }
199
200
201 // In-place set difference
202 static void
203 setDifference(std::set<uint32_t>&a,
204 const std::set<uint32_t>& b, uint32_t exception)
205 {
206 for (uint32_t b_elem : b) {
207 if (b_elem != exception) {
208 a.erase(b_elem);
209 }
210 }
211 }
212
213 void
214 ControlFlowInfo::findImmediatePostDominators()
215 {
216 assert(basicBlocks.size() > 1); // Entry and exit blocks must be present
217
218 findPostDominators();
219
220 for (auto& basicBlock : basicBlocks) {
221 if (basicBlock->isExit()) {
222 continue;
223 }
224 std::set<uint32_t> candidates = basicBlock->postDominatorIds;
225 candidates.erase(basicBlock->id);
226 for (uint32_t postDominatorId : basicBlock->postDominatorIds) {
227 if (postDominatorId != basicBlock->id) {
228 setDifference(candidates,
229 basicBlocks[postDominatorId]->postDominatorIds,
230 postDominatorId);
231 }
232 }
233 assert(candidates.size() == 1);
234 GPUStaticInst* last_instruction = lastInstruction(basicBlock.get());
235 BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
236 if (!ipd_block->isExit()) {
237 GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
238 last_instruction->ipdInstNum(ipd_first_inst->instAddr());
239 } else {
240 last_instruction->ipdInstNum(last_instruction->nextInstAddr());
241 }
242 }
243 }
244
245 void
246 ControlFlowInfo::printPostDominators() const
247 {
248 for (auto& block : basicBlocks) {
249 std::cout << "PD(" << block->id << ") = {";
250 std::copy(block->postDominatorIds.begin(),
251 block->postDominatorIds.end(),
252 std::ostream_iterator<uint32_t>(std::cout, ", "));
253 std::cout << "}" << std::endl;
254 }
255 }
256
257 void
258 ControlFlowInfo::printImmediatePostDominators() const
259 {
260 for (const auto& block : basicBlocks) {
261 if (block->isExit()) {
262 continue;
263 }
264 std::cout << "IPD(" << block->id << ") = ";
265 std::cout << postDominator(block.get())->id << ", ";
266 }
267 std::cout << std::endl;
268 }
269 void
270 ControlFlowInfo::printBasicBlocks() const
271 {
272 for (GPUStaticInst* inst : instructions) {
273 int inst_addr = inst->instAddr();
274 std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
275 << "]: " << inst->disassemble();
276 if (inst->isBranch()) {
277 std::cout << ", PC = " << inst->getTargetPc();
278 }
279 std::cout << std::endl;
280 }
281 }
282
283 void
284 ControlFlowInfo::printBasicBlockDot() const
285 {
286 printf("digraph {\n");
287 for (const auto& basic_block : basicBlocks) {
288 printf("\t");
289 for (uint32_t successorId : basic_block->successorIds) {
290 printf("%d -> %d; ", basic_block->id, successorId);
291 }
292 printf("\n");
293 }
294 printf("}\n");
295 }