f20768e12e4cbd0bb66b78e2977823a972de4306
[mesa.git] / src / gallium / drivers / lima / ir / gp / reduce_scheduler.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <limits.h>
26
27 #include "gpir.h"
28
29 /* Register sensitive schedule algorithm from paper:
30 * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
31 * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons
32 */
33
34 static void schedule_calc_sched_info(gpir_node *node)
35 {
36 int n = 0;
37 float extra_reg = 1.0f;
38
39 /* update all children's sched info */
40 gpir_node_foreach_pred(node, dep) {
41 gpir_node *pred = dep->pred;
42
43 if (pred->rsched.reg_pressure < 0)
44 schedule_calc_sched_info(pred);
45
46 int est = pred->rsched.est + 1;
47 if (node->rsched.est < est)
48 node->rsched.est = est;
49
50 float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list);
51 if (extra_reg > reg_weight)
52 extra_reg = reg_weight;
53
54 n++;
55 }
56
57 /* leaf instr */
58 if (!n) {
59 node->rsched.reg_pressure = 0;
60 return;
61 }
62
63 int i = 0;
64 float reg[n];
65 gpir_node_foreach_pred(node, dep) {
66 gpir_node *pred = dep->pred;
67 reg[i++] = pred->rsched.reg_pressure;
68 }
69
70 /* sort */
71 for (i = 0; i < n - 1; i++) {
72 for (int j = 0; j < n - i - 1; j++) {
73 if (reg[j] > reg[j + 1]) {
74 float tmp = reg[j + 1];
75 reg[j + 1] = reg[j];
76 reg[j] = tmp;
77 }
78 }
79 }
80
81 for (i = 0; i < n; i++) {
82 float pressure = reg[i] + n - (i + 1);
83 if (pressure > node->rsched.reg_pressure)
84 node->rsched.reg_pressure = pressure;
85 }
86
87 /* If all children of this node have multi parents, then this
88 * node need an extra reg to store its result. For example,
89 * it's not fair for parent has the same reg pressure as child
90 * if n==1 and child's successor>1, because we need 2 reg for
91 * this.
92 *
93 * But we can't add a full reg to the reg_pressure, because the
94 * last parent of a multi-successor child doesn't need an extra
95 * reg. For example, a single child (with multi successor) node
96 * should has less reg pressure than a two children (with single
97 * successor) instr.
98 *
99 * extra reg = min(all child)(1.0 - 1.0 / num successor)
100 */
101 node->rsched.reg_pressure += extra_reg;
102 }
103
104 static void schedule_insert_ready_list(struct list_head *ready_list,
105 gpir_node *insert_node)
106 {
107 struct list_head *insert_pos = ready_list;
108
109 list_for_each_entry(gpir_node, node, ready_list, list) {
110 if (insert_node->rsched.parent_index < node->rsched.parent_index ||
111 (insert_node->rsched.parent_index == node->rsched.parent_index &&
112 (insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||
113 (insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&
114 (insert_node->rsched.est >= node->rsched.est))))) {
115 insert_pos = &node->list;
116 break;
117 }
118 }
119
120 list_del(&insert_node->list);
121 list_addtail(&insert_node->list, insert_pos);
122 }
123
124 static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)
125 {
126 if (list_empty(ready_list))
127 return;
128
129 gpir_node *node = list_first_entry(ready_list, gpir_node, list);
130 list_del(&node->list);
131
132 /* schedule the node to the block node list */
133 list_add(&node->list, &block->node_list);
134 node->rsched.scheduled = true;
135 block->rsched.node_index--;
136
137 gpir_node_foreach_pred(node, dep) {
138 gpir_node *pred = dep->pred;
139 pred->rsched.parent_index = block->rsched.node_index;
140
141 bool ready = true;
142 gpir_node_foreach_succ(pred, dep) {
143 gpir_node *succ = dep->succ;
144 if (!succ->rsched.scheduled) {
145 ready = false;
146 break;
147 }
148 }
149 /* all successor have been scheduled */
150 if (ready)
151 schedule_insert_ready_list(ready_list, pred);
152 }
153
154 schedule_ready_list(block, ready_list);
155 }
156
157 static void schedule_block(gpir_block *block)
158 {
159 /* move all nodes to node_list, block->node_list will
160 * contain schedule result */
161 struct list_head node_list;
162 list_replace(&block->node_list, &node_list);
163 list_inithead(&block->node_list);
164
165 /* step 2 & 3 */
166 list_for_each_entry(gpir_node, node, &node_list, list) {
167 if (gpir_node_is_root(node))
168 schedule_calc_sched_info(node);
169 block->rsched.node_index++;
170 }
171
172 /* step 4 */
173 struct list_head ready_list;
174 list_inithead(&ready_list);
175
176 /* step 5 */
177 list_for_each_entry_safe(gpir_node, node, &node_list, list) {
178 if (gpir_node_is_root(node)) {
179 node->rsched.parent_index = INT_MAX;
180 schedule_insert_ready_list(&ready_list, node);
181 }
182 }
183
184 /* step 6 */
185 schedule_ready_list(block, &ready_list);
186 }
187
188 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)
189 {
190 /* No need to build physical reg load/store dependency here,
191 * because we just exit SSA form, there should be at most
192 * one load and one store pair for a physical reg within a
193 * block, and the store must be after load with the output
194 * of load as input after some calculation. So we don't need to
195 * insert extra write-after-read or read-after-write dependecy
196 * for load/store nodes to maintain the right sequence before
197 * scheduling.
198 *
199 * Also no need to handle SSA def/use in difference block,
200 * because we'll load/store SSA to a physical reg if def/use
201 * are not in the same block.
202 */
203
204 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
205 block->rsched.node_index = 0;
206 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
207 node->rsched.reg_pressure = -1;
208 node->rsched.est = 0;
209 node->rsched.scheduled = false;
210 }
211 }
212
213 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
214 schedule_block(block);
215 }
216
217 gpir_debug("after reduce scheduler\n");
218 gpir_node_print_prog_seq(comp);
219 return true;
220 }