c3be6acfe8f08ff1fcdb4cf9ebe3652bdc25a9a6
[mesa.git] / src / gallium / drivers / lima / ir / pp / lower.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 ppir_node *move = NULL;
38 ppir_dest *dest = ppir_node_get_dest(node);
39
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node, dep) {
43 ppir_node *succ = dep->succ;
44
45 if (succ->type != ppir_node_type_alu) {
46 if (!move) {
47 move = ppir_node_create(block, ppir_op_mov, -1, 0);
48 if (unlikely(!move))
49 return false;
50
51 ppir_debug("lower const create move %d for %d\n",
52 move->index, node->index);
53
54 ppir_alu_node *alu = ppir_node_to_alu(move);
55 alu->dest = *dest;
56 alu->num_src = 1;
57 ppir_node_target_assign(alu->src, dest);
58 for (int i = 0; i < 4; i++)
59 alu->src->swizzle[i] = i;
60 }
61
62 ppir_node_replace_pred(dep, move);
63 ppir_node_replace_child(succ, node, move);
64 }
65 }
66
67 if (move) {
68 ppir_node_add_dep(move, node);
69 list_addtail(&move->list, &node->list);
70 }
71
72 return true;
73 }
74
75 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
76 {
77 /* swapped op must be the next op */
78 node->op++;
79
80 assert(node->type == ppir_node_type_alu);
81 ppir_alu_node *alu = ppir_node_to_alu(node);
82 assert(alu->num_src == 2);
83
84 ppir_src tmp = alu->src[0];
85 alu->src[0] = alu->src[1];
86 alu->src[1] = tmp;
87 return true;
88 }
89
90 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
91 {
92 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
93
94 /* Create load_coords node */
95 ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
96 if (!load)
97 return false;
98 list_addtail(&load->node.list, &node->list);
99
100 ppir_debug("%s create load_coords node %d for %d\n",
101 __FUNCTION__, load->node.index, node->index);
102
103 load->dest.type = ppir_target_pipeline;
104 load->dest.pipeline = ppir_pipeline_reg_discard;
105
106 load->src = load_tex->src_coords;
107
108 ppir_node_foreach_pred_safe(node, dep) {
109 ppir_node *pred = dep->pred;
110 ppir_node_remove_dep(dep);
111 ppir_node_add_dep(&load->node, pred);
112 }
113
114 ppir_node_add_dep(node, &load->node);
115
116 /* Create move node */
117 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1 , 0);
118 if (unlikely(!move))
119 return false;
120
121 ppir_alu_node *alu = ppir_node_to_alu(move);
122
123 ppir_dest *dest = ppir_node_get_dest(node);
124 alu->dest = *dest;
125
126 ppir_node_replace_all_succ(move, node);
127
128 dest->type = ppir_target_pipeline;
129 dest->pipeline = ppir_pipeline_reg_sampler;
130
131 alu->num_src = 1;
132 ppir_node_target_assign(&alu->src[0], dest);
133 for (int i = 0; i < 4; i++)
134 alu->src->swizzle[i] = i;
135
136 ppir_node_add_dep(move, node);
137 list_addtail(&move->list, &node->list);
138
139 return true;
140 }
141
142 static bool ppir_lower_sin_cos(ppir_block *block, ppir_node *node)
143 {
144 ppir_alu_node *alu = ppir_node_to_alu(node);
145
146 ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
147 if (!inv_2pi_node)
148 return false;
149 list_addtail(&inv_2pi_node->list, &node->list);
150
151 /* For sin and cos, the input has to multiplied by the constant
152 * 1/(2*pi), presumably to simplify the hardware. */
153 ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
154 inv_2pi_const->constant.num = 1;
155 inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
156
157 inv_2pi_const->dest.type = ppir_target_ssa;
158 inv_2pi_const->dest.ssa.num_components = 1;
159 inv_2pi_const->dest.ssa.live_in = INT_MAX;
160 inv_2pi_const->dest.ssa.live_out = 0;
161 inv_2pi_const->dest.write_mask = 0x01;
162
163 ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
164 if (!mul_node)
165 return false;
166 list_addtail(&mul_node->list, &node->list);
167
168 ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
169 mul_alu->num_src = 2;
170 mul_alu->src[0] = alu->src[0];
171 mul_alu->src[1].type = ppir_target_ssa;
172 mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
173
174 int num_components = alu->src[0].ssa->num_components;
175 mul_alu->dest.type = ppir_target_ssa;
176 mul_alu->dest.ssa.num_components = num_components;
177 mul_alu->dest.ssa.live_in = INT_MAX;
178 mul_alu->dest.ssa.live_out = 0;
179 mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
180
181 alu->src[0].type = ppir_target_ssa;
182 alu->src[0].ssa = &mul_alu->dest.ssa;
183 for (int i = 0; i < 4; i++)
184 alu->src->swizzle[i] = i;
185
186 ppir_node_foreach_pred_safe(node, dep) {
187 ppir_node *pred = dep->pred;
188 ppir_node_remove_dep(dep);
189 ppir_node_add_dep(mul_node, pred);
190 }
191 ppir_node_add_dep(node, mul_node);
192 ppir_node_add_dep(mul_node, inv_2pi_node);
193
194 return true;
195 }
196
197 /* insert a move as the select condition to make sure it can
198 * be inserted to select instr float mul slot
199 */
200 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
201 {
202 ppir_alu_node *alu = ppir_node_to_alu(node);
203
204 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
205 if (!move)
206 return false;
207 list_addtail(&move->list, &node->list);
208
209 ppir_alu_node *move_alu = ppir_node_to_alu(move);
210 ppir_src *move_src = move_alu->src, *src = alu->src;
211 move_src->type = src->type;
212 move_src->ssa = src->ssa;
213 move_src->swizzle[0] = src->swizzle[0];
214 move_alu->num_src = 1;
215
216 ppir_dest *move_dest = &move_alu->dest;
217 move_dest->type = ppir_target_ssa;
218 move_dest->ssa.num_components = 1;
219 move_dest->ssa.live_in = INT_MAX;
220 move_dest->ssa.live_out = 0;
221 move_dest->write_mask = 1;
222
223 ppir_node_foreach_pred(node, dep) {
224 ppir_node *pred = dep->pred;
225 ppir_dest *dest = ppir_node_get_dest(pred);
226 if (ppir_node_target_equal(alu->src, dest)) {
227 ppir_node_replace_pred(dep, move);
228 ppir_node_add_dep(move, pred);
229 }
230 }
231
232 /* move must be the first pred of select node which make sure
233 * the float mul slot is free when node to instr
234 */
235 assert(ppir_node_first_pred(node) == move);
236
237 src->swizzle[0] = 0;
238 ppir_node_target_assign(alu->src, move_dest);
239 return true;
240 }
241
242 static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
243 {
244 /* Turn it into a mov with a round to integer output modifier */
245 ppir_alu_node *alu = ppir_node_to_alu(node);
246 ppir_dest *move_dest = &alu->dest;
247 move_dest->modifier = ppir_outmod_round;
248 node->op = ppir_op_mov;
249
250 return true;
251 }
252
253 static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
254 {
255 /* Turn it into a mov and set the absolute modifier */
256 ppir_alu_node *alu = ppir_node_to_alu(node);
257
258 assert(alu->num_src == 1);
259
260 alu->src[0].absolute = true;
261 alu->src[0].negate = false;
262 node->op = ppir_op_mov;
263
264 return true;
265 }
266
267 static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
268 {
269 /* Turn it into a mov and set the negate modifier */
270 ppir_alu_node *alu = ppir_node_to_alu(node);
271
272 assert(alu->num_src == 1);
273
274 alu->src[0].negate = !alu->src[0].negate;
275 node->op = ppir_op_mov;
276
277 return true;
278 }
279
280 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
281 {
282 /* Turn it into a mov with the saturate output modifier */
283 ppir_alu_node *alu = ppir_node_to_alu(node);
284
285 assert(alu->num_src == 1);
286
287 ppir_dest *move_dest = &alu->dest;
288 move_dest->modifier = ppir_outmod_clamp_fraction;
289 node->op = ppir_op_mov;
290
291 return true;
292 }
293
294 static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
295 {
296 ppir_branch_node *branch = ppir_node_to_branch(node);
297 ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
298
299 if (!zero)
300 return false;
301
302 list_addtail(&zero->node.list, &node->list);
303
304 zero->constant.value[0].f = 0;
305 zero->constant.num = 1;
306 zero->dest.type = ppir_target_ssa;
307 zero->dest.ssa.num_components = 1;
308 zero->dest.ssa.live_in = INT_MAX;
309 zero->dest.ssa.live_out = 0;
310 zero->dest.write_mask = 0x01;
311
312 /* For now we're just comparing branch condition with 0,
313 * in future we should look whether it's possible to move
314 * comparision node into branch itself and use current
315 * way as a fallback for complex conditions.
316 */
317 branch->src[1].type = ppir_target_ssa;
318 branch->src[1].ssa = &zero->dest.ssa;
319
320 branch->cond_gt = true;
321 branch->cond_lt = true;
322
323 ppir_node_add_dep(&branch->node, &zero->node);
324
325 return true;
326 }
327
328 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
329 [ppir_op_abs] = ppir_lower_abs,
330 [ppir_op_neg] = ppir_lower_neg,
331 [ppir_op_const] = ppir_lower_const,
332 [ppir_op_sin] = ppir_lower_sin_cos,
333 [ppir_op_cos] = ppir_lower_sin_cos,
334 [ppir_op_lt] = ppir_lower_swap_args,
335 [ppir_op_le] = ppir_lower_swap_args,
336 [ppir_op_load_texture] = ppir_lower_texture,
337 [ppir_op_select] = ppir_lower_select,
338 [ppir_op_trunc] = ppir_lower_trunc,
339 [ppir_op_sat] = ppir_lower_sat,
340 [ppir_op_branch] = ppir_lower_branch,
341 };
342
343 bool ppir_lower_prog(ppir_compiler *comp)
344 {
345 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
346 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
347 if (ppir_lower_funcs[node->op] &&
348 !ppir_lower_funcs[node->op](block, node))
349 return false;
350 }
351 }
352
353 ppir_node_print_prog(comp);
354 return true;
355 }