lima/ppir: refactor texture code to simplify scheduler
[mesa.git] / src / gallium / drivers / lima / ir / pp / lower.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 ppir_node *move = NULL;
38 ppir_dest *dest = ppir_node_get_dest(node);
39
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node, dep) {
43 ppir_node *succ = dep->succ;
44
45 if (succ->type != ppir_node_type_alu) {
46 if (!move) {
47 move = ppir_node_create(block, ppir_op_mov, -1, 0);
48 if (unlikely(!move))
49 return false;
50
51 ppir_debug("lower const create move %d for %d\n",
52 move->index, node->index);
53
54 ppir_alu_node *alu = ppir_node_to_alu(move);
55 alu->dest = *dest;
56 alu->num_src = 1;
57 ppir_node_target_assign(alu->src, dest);
58 for (int i = 0; i < 4; i++)
59 alu->src->swizzle[i] = i;
60 }
61
62 ppir_node_replace_pred(dep, move);
63 ppir_node_replace_child(succ, node, move);
64 }
65 }
66
67 if (move) {
68 ppir_node_add_dep(move, node);
69 list_addtail(&move->list, &node->list);
70 }
71
72 return true;
73 }
74
75 /* lower dot to mul+sum */
76 static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
77 {
78 ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
79 if (!mul)
80 return false;
81 list_addtail(&mul->node.list, &node->list);
82
83 ppir_alu_node *dot = ppir_node_to_alu(node);
84 mul->src[0] = dot->src[0];
85 mul->src[1] = dot->src[1];
86 mul->num_src = 2;
87
88 int num_components = node->op - ppir_op_dot2 + 2;
89 ppir_dest *dest = &mul->dest;
90 dest->type = ppir_target_ssa;
91 dest->ssa.num_components = num_components;
92 dest->ssa.live_in = INT_MAX;
93 dest->ssa.live_out = 0;
94 dest->write_mask = u_bit_consecutive(0, num_components);
95
96 ppir_node_foreach_pred_safe(node, dep) {
97 ppir_node *pred = dep->pred;
98 ppir_node_remove_dep(dep);
99 ppir_node_add_dep(&mul->node, pred);
100 }
101 ppir_node_add_dep(node, &mul->node);
102
103 if (node->op == ppir_op_dot2) {
104 node->op = ppir_op_add;
105
106 ppir_node_target_assign(dot->src, dest);
107 dot->src[0].swizzle[0] = 0;
108 dot->src[0].absolute = false;
109 dot->src[0].negate = false;
110
111 ppir_node_target_assign(dot->src + 1, dest);
112 dot->src[1].swizzle[0] = 1;
113 dot->src[1].absolute = false;
114 dot->src[1].negate = false;
115 }
116 else {
117 node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
118
119 ppir_node_target_assign(dot->src, dest);
120 for (int i = 0; i < 4; i++)
121 dot->src[0].swizzle[i] = i;
122 dot->src[0].absolute = false;
123 dot->src[0].negate = false;
124
125 dot->num_src = 1;
126 }
127
128 return true;
129 }
130
131 static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
132 {
133 ppir_reg *r = rzalloc(comp, ppir_reg);
134 if (!r)
135 return NULL;
136
137 r->num_components = num_components;
138 r->live_in = INT_MAX;
139 r->live_out = 0;
140 r->is_head = false;
141 list_addtail(&r->list, &comp->reg_list);
142
143 return r;
144 }
145
146 /* lower vector alu node to multi scalar nodes */
147 static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node)
148 {
149 ppir_alu_node *alu = ppir_node_to_alu(node);
150 ppir_dest *dest = &alu->dest;
151
152 int n = 0;
153 int index[4];
154
155 unsigned mask = dest->write_mask;
156 while (mask)
157 index[n++] = u_bit_scan(&mask);
158
159 if (n == 1)
160 return true;
161
162 ppir_reg *r;
163 /* we need a reg for scalar nodes to store output */
164 if (dest->type == ppir_target_register)
165 r = dest->reg;
166 else {
167 r = create_reg(block->comp, n);
168 if (!r)
169 return false;
170
171 /* change all successors to use reg r */
172 ppir_node_foreach_succ(node, dep) {
173 ppir_node *succ = dep->succ;
174 if (succ->type == ppir_node_type_alu) {
175 ppir_alu_node *sa = ppir_node_to_alu(succ);
176 for (int i = 0; i < sa->num_src; i++) {
177 ppir_src *src = sa->src + i;
178 if (ppir_node_target_equal(src, dest)) {
179 src->type = ppir_target_register;
180 src->reg = r;
181 }
182 }
183 }
184 else {
185 assert(succ->type == ppir_node_type_store);
186 ppir_store_node *ss = ppir_node_to_store(succ);
187 ppir_src *src = &ss->src;
188 src->type = ppir_target_register;
189 src->reg = r;
190 }
191 }
192 }
193
194 /* create each component's scalar node */
195 for (int i = 0; i < n; i++) {
196 ppir_node *s = ppir_node_create(block, node->op, -1, 0);
197 if (!s)
198 return false;
199 list_addtail(&s->list, &node->list);
200
201 ppir_alu_node *sa = ppir_node_to_alu(s);
202 ppir_dest *sd = &sa->dest;
203 sd->type = ppir_target_register;
204 sd->reg = r;
205 sd->modifier = dest->modifier;
206 sd->write_mask = 1 << index[i];
207
208 for (int j = 0; j < alu->num_src; j++)
209 sa->src[j] = alu->src[j];
210 sa->num_src = alu->num_src;
211
212 /* TODO: need per reg component dependancy */
213 ppir_node_foreach_succ(node, dep) {
214 ppir_node_add_dep(dep->succ, s);
215 }
216
217 ppir_node_foreach_pred(node, dep) {
218 ppir_node_add_dep(s, dep->pred);
219 }
220 }
221
222 ppir_node_delete(node);
223 return true;
224 }
225
226 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
227 {
228 /* swapped op must be the next op */
229 node->op++;
230
231 assert(node->type == ppir_node_type_alu);
232 ppir_alu_node *alu = ppir_node_to_alu(node);
233 assert(alu->num_src == 2);
234
235 ppir_src tmp = alu->src[0];
236 alu->src[0] = alu->src[1];
237 alu->src[1] = tmp;
238 return true;
239 }
240
241 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
242 {
243 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
244
245 /* Create load_coords node */
246 ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
247 if (!load)
248 return false;
249 list_addtail(&load->node.list, &node->list);
250
251 ppir_debug("%s create load_coords node %d for %d\n",
252 __FUNCTION__, load->node.index, node->index);
253
254 load->dest.type = ppir_target_pipeline;
255 load->dest.pipeline = ppir_pipeline_reg_discard;
256
257 load->src = load_tex->src_coords;
258
259 ppir_node_foreach_pred_safe(node, dep) {
260 ppir_node *pred = dep->pred;
261 ppir_node_remove_dep(dep);
262 ppir_node_add_dep(&load->node, pred);
263 }
264
265 ppir_node_add_dep(node, &load->node);
266 return true;
267 }
268
269 /* Prepare for sin and cos and then lower vector alu node to multi
270 * scalar nodes */
271 static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node)
272 {
273 ppir_alu_node *alu = ppir_node_to_alu(node);
274
275 ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
276 if (!inv_2pi_node)
277 return false;
278 list_addtail(&inv_2pi_node->list, &node->list);
279
280 /* For sin and cos, the input has to multiplied by the constant
281 * 1/(2*pi), presumably to simplify the hardware. */
282 ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
283 inv_2pi_const->constant.num = 1;
284 inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
285
286 inv_2pi_const->dest.type = ppir_target_ssa;
287 inv_2pi_const->dest.ssa.num_components = 1;
288 inv_2pi_const->dest.ssa.live_in = INT_MAX;
289 inv_2pi_const->dest.ssa.live_out = 0;
290 inv_2pi_const->dest.write_mask = 0x01;
291
292 ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
293 if (!mul_node)
294 return false;
295 list_addtail(&mul_node->list, &node->list);
296
297 ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
298 mul_alu->num_src = 2;
299 mul_alu->src[0] = alu->src[0];
300 mul_alu->src[1].type = ppir_target_ssa;
301 mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
302
303 int num_components = alu->src[0].ssa->num_components;
304 mul_alu->dest.type = ppir_target_ssa;
305 mul_alu->dest.ssa.num_components = num_components;
306 mul_alu->dest.ssa.live_in = INT_MAX;
307 mul_alu->dest.ssa.live_out = 0;
308 mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
309
310 alu->src[0].type = ppir_target_ssa;
311 alu->src[0].ssa = &mul_alu->dest.ssa;
312 for (int i = 0; i < 4; i++)
313 alu->src->swizzle[i] = i;
314
315 ppir_node_foreach_pred_safe(node, dep) {
316 ppir_node *pred = dep->pred;
317 ppir_node_remove_dep(dep);
318 ppir_node_add_dep(mul_node, pred);
319 }
320 ppir_node_add_dep(node, mul_node);
321 ppir_node_add_dep(mul_node, inv_2pi_node);
322
323 return ppir_lower_vec_to_scalar(block, node);
324 }
325
326 /* insert a move as the select condition to make sure it can
327 * be inserted to select instr float mul slot
328 */
329 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
330 {
331 ppir_alu_node *alu = ppir_node_to_alu(node);
332
333 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
334 if (!move)
335 return false;
336 list_addtail(&move->list, &node->list);
337
338 ppir_alu_node *move_alu = ppir_node_to_alu(move);
339 ppir_src *move_src = move_alu->src, *src = alu->src;
340 move_src->type = src->type;
341 move_src->ssa = src->ssa;
342 move_src->swizzle[0] = src->swizzle[0];
343 move_alu->num_src = 1;
344
345 ppir_dest *move_dest = &move_alu->dest;
346 move_dest->type = ppir_target_ssa;
347 move_dest->ssa.num_components = 1;
348 move_dest->ssa.live_in = INT_MAX;
349 move_dest->ssa.live_out = 0;
350 move_dest->write_mask = 1;
351
352 ppir_node_foreach_pred(node, dep) {
353 ppir_node *pred = dep->pred;
354 ppir_dest *dest = ppir_node_get_dest(pred);
355 if (ppir_node_target_equal(alu->src, dest)) {
356 ppir_node_replace_pred(dep, move);
357 ppir_node_add_dep(move, pred);
358 }
359 }
360
361 /* move must be the first pred of select node which make sure
362 * the float mul slot is free when node to instr
363 */
364 assert(ppir_node_first_pred(node) == move);
365
366 src->swizzle[0] = 0;
367 ppir_node_target_assign(alu->src, move_dest);
368 return true;
369 }
370
371 static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
372 {
373 /* Turn it into a mov with a round to integer output modifier */
374 ppir_alu_node *alu = ppir_node_to_alu(node);
375 ppir_dest *move_dest = &alu->dest;
376 move_dest->modifier = ppir_outmod_round;
377 node->op = ppir_op_mov;
378
379 return true;
380 }
381
382 static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
383 {
384 /* Turn it into a mov and set the absolute modifier */
385 ppir_alu_node *alu = ppir_node_to_alu(node);
386
387 assert(alu->num_src == 1);
388
389 alu->src[0].absolute = true;
390 alu->src[0].negate = false;
391 node->op = ppir_op_mov;
392
393 return true;
394 }
395
396 static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
397 {
398 /* Turn it into a mov and set the negate modifier */
399 ppir_alu_node *alu = ppir_node_to_alu(node);
400
401 assert(alu->num_src == 1);
402
403 alu->src[0].negate = !alu->src[0].negate;
404 node->op = ppir_op_mov;
405
406 return true;
407 }
408
409 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
410 {
411 /* Turn it into a mov with the saturate output modifier */
412 ppir_alu_node *alu = ppir_node_to_alu(node);
413
414 assert(alu->num_src == 1);
415
416 ppir_dest *move_dest = &alu->dest;
417 move_dest->modifier = ppir_outmod_clamp_fraction;
418 node->op = ppir_op_mov;
419
420 return true;
421 }
422
423 static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
424 {
425 ppir_branch_node *branch = ppir_node_to_branch(node);
426 ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
427
428 if (!zero)
429 return false;
430
431 list_addtail(&zero->node.list, &node->list);
432
433 zero->constant.value[0].f = 0;
434 zero->constant.num = 1;
435 zero->dest.type = ppir_target_ssa;
436 zero->dest.ssa.num_components = 1;
437 zero->dest.ssa.live_in = INT_MAX;
438 zero->dest.ssa.live_out = 0;
439 zero->dest.write_mask = 0x01;
440
441 /* For now we're just comparing branch condition with 0,
442 * in future we should look whether it's possible to move
443 * comparision node into branch itself and use current
444 * way as a fallback for complex conditions.
445 */
446 branch->src[1].type = ppir_target_ssa;
447 branch->src[1].ssa = &zero->dest.ssa;
448
449 branch->cond_gt = true;
450 branch->cond_lt = true;
451
452 ppir_node_add_dep(&branch->node, &zero->node);
453
454 return true;
455 }
456
457 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
458 [ppir_op_abs] = ppir_lower_abs,
459 [ppir_op_neg] = ppir_lower_neg,
460 [ppir_op_const] = ppir_lower_const,
461 [ppir_op_dot2] = ppir_lower_dot,
462 [ppir_op_dot3] = ppir_lower_dot,
463 [ppir_op_dot4] = ppir_lower_dot,
464 [ppir_op_rcp] = ppir_lower_vec_to_scalar,
465 [ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
466 [ppir_op_log2] = ppir_lower_vec_to_scalar,
467 [ppir_op_exp2] = ppir_lower_vec_to_scalar,
468 [ppir_op_sqrt] = ppir_lower_vec_to_scalar,
469 [ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar,
470 [ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar,
471 [ppir_op_lt] = ppir_lower_swap_args,
472 [ppir_op_le] = ppir_lower_swap_args,
473 [ppir_op_load_texture] = ppir_lower_texture,
474 [ppir_op_select] = ppir_lower_select,
475 [ppir_op_trunc] = ppir_lower_trunc,
476 [ppir_op_sat] = ppir_lower_sat,
477 [ppir_op_branch] = ppir_lower_branch,
478 };
479
480 bool ppir_lower_prog(ppir_compiler *comp)
481 {
482 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
483 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
484 if (ppir_lower_funcs[node->op] &&
485 !ppir_lower_funcs[node->op](block, node))
486 return false;
487 }
488 }
489
490 ppir_node_print_prog(comp);
491 return true;
492 }