gallium: add lima driver
[mesa.git] / src / gallium / drivers / lima / ir / pp / lower.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 ppir_node *move = NULL;
38 ppir_dest *dest = ppir_node_get_dest(node);
39
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node, dep) {
43 ppir_node *succ = dep->succ;
44
45 if (succ->type != ppir_node_type_alu) {
46 if (!move) {
47 move = ppir_node_create(block, ppir_op_mov, -1, 0);
48 if (unlikely(!move))
49 return false;
50
51 ppir_debug("lower const create move %d for %d\n",
52 move->index, node->index);
53
54 ppir_alu_node *alu = ppir_node_to_alu(move);
55 alu->dest = *dest;
56 alu->num_src = 1;
57 ppir_node_target_assign(alu->src, dest);
58 for (int i = 0; i < 4; i++)
59 alu->src->swizzle[i] = i;
60 }
61
62 ppir_node_replace_pred(dep, move);
63 ppir_node_replace_child(succ, node, move);
64 }
65 }
66
67 if (move) {
68 ppir_node_add_dep(move, node);
69 list_addtail(&move->list, &node->list);
70 }
71
72 return true;
73 }
74
75 /* lower dot to mul+sum */
76 static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
77 {
78 ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
79 if (!mul)
80 return false;
81 list_addtail(&mul->node.list, &node->list);
82
83 ppir_alu_node *dot = ppir_node_to_alu(node);
84 mul->src[0] = dot->src[0];
85 mul->src[1] = dot->src[1];
86 mul->num_src = 2;
87
88 int num_components = node->op - ppir_op_dot2 + 2;
89 ppir_dest *dest = &mul->dest;
90 dest->type = ppir_target_ssa;
91 dest->ssa.num_components = num_components;
92 dest->ssa.live_in = INT_MAX;
93 dest->ssa.live_out = 0;
94 dest->write_mask = u_bit_consecutive(0, num_components);
95
96 ppir_node_foreach_pred_safe(node, dep) {
97 ppir_node_remove_dep(dep);
98 ppir_node_add_dep(&mul->node, dep->pred);
99 }
100 ppir_node_add_dep(node, &mul->node);
101
102 if (node->op == ppir_op_dot2) {
103 node->op = ppir_op_add;
104
105 ppir_node_target_assign(dot->src, dest);
106 dot->src[0].swizzle[0] = 0;
107 dot->src[0].absolute = false;
108 dot->src[0].negate = false;
109
110 ppir_node_target_assign(dot->src + 1, dest);
111 dot->src[1].swizzle[0] = 1;
112 dot->src[1].absolute = false;
113 dot->src[1].negate = false;
114 }
115 else {
116 node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
117
118 ppir_node_target_assign(dot->src, dest);
119 for (int i = 0; i < 4; i++)
120 dot->src[0].swizzle[i] = i;
121 dot->src[0].absolute = false;
122 dot->src[0].negate = false;
123
124 dot->num_src = 1;
125 }
126
127 return true;
128 }
129
130 static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
131 {
132 ppir_reg *r = rzalloc(comp, ppir_reg);
133 if (!r)
134 return NULL;
135
136 r->num_components = num_components;
137 r->live_in = INT_MAX;
138 r->live_out = 0;
139 r->is_head = false;
140 list_addtail(&r->list, &comp->reg_list);
141
142 return r;
143 }
144
145 /* lower vector alu node to multi scalar nodes */
146 static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node)
147 {
148 ppir_alu_node *alu = ppir_node_to_alu(node);
149 ppir_dest *dest = &alu->dest;
150
151 int n = 0;
152 int index[4];
153
154 unsigned mask = dest->write_mask;
155 while (mask)
156 index[n++] = u_bit_scan(&mask);
157
158 if (n == 1)
159 return true;
160
161 ppir_reg *r;
162 /* we need a reg for scalar nodes to store output */
163 if (dest->type == ppir_target_register)
164 r = dest->reg;
165 else {
166 r = create_reg(block->comp, n);
167 if (!r)
168 return false;
169
170 /* change all successors to use reg r */
171 ppir_node_foreach_succ(node, dep) {
172 ppir_node *succ = dep->succ;
173 if (succ->type == ppir_node_type_alu) {
174 ppir_alu_node *sa = ppir_node_to_alu(succ);
175 for (int i = 0; i < sa->num_src; i++) {
176 ppir_src *src = sa->src + i;
177 if (ppir_node_target_equal(src, dest)) {
178 src->type = ppir_target_register;
179 src->reg = r;
180 }
181 }
182 }
183 else {
184 assert(succ->type == ppir_node_type_store);
185 ppir_store_node *ss = ppir_node_to_store(succ);
186 ppir_src *src = &ss->src;
187 src->type = ppir_target_register;
188 src->reg = r;
189 }
190 }
191 }
192
193 /* create each component's scalar node */
194 for (int i = 0; i < n; i++) {
195 ppir_node *s = ppir_node_create(block, node->op, -1, 0);
196 if (!s)
197 return false;
198 list_addtail(&s->list, &node->list);
199
200 ppir_alu_node *sa = ppir_node_to_alu(s);
201 ppir_dest *sd = &sa->dest;
202 sd->type = ppir_target_register;
203 sd->reg = r;
204 sd->modifier = dest->modifier;
205 sd->write_mask = 1 << index[i];
206
207 for (int j = 0; j < alu->num_src; j++)
208 sa->src[j] = alu->src[j];
209 sa->num_src = alu->num_src;
210
211 /* TODO: need per reg component dependancy */
212 ppir_node_foreach_succ(node, dep) {
213 ppir_node_add_dep(dep->succ, s);
214 }
215
216 ppir_node_foreach_pred(node, dep) {
217 ppir_node_add_dep(s, dep->pred);
218 }
219 }
220
221 ppir_node_delete(node);
222 return true;
223 }
224
225 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
226 {
227 /* swapped op must be the next op */
228 node->op++;
229
230 assert(node->type == ppir_node_type_alu);
231 ppir_alu_node *alu = ppir_node_to_alu(node);
232 assert(alu->num_src == 2);
233
234 ppir_src tmp = alu->src[0];
235 alu->src[0] = alu->src[1];
236 alu->src[1] = tmp;
237 return true;
238 }
239
240 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
241 {
242 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
243
244 if (ppir_node_has_single_pred(node)) {
245 ppir_node *pred = ppir_node_first_pred(node);
246 if (pred->op == ppir_op_load_varying) {
247 /* If ldtex is the only successor of load_varying node
248 * we're good. Just change load_varying op type to load_coords.
249 */
250 if (ppir_node_has_single_succ(pred)) {
251 pred->op = ppir_op_load_coords;
252 return true;
253 }
254 }
255 }
256
257 /* Otherwise we need to create load_coords node */
258 ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
259 if (!load)
260 return false;
261 list_addtail(&load->node.list, &node->list);
262
263 ppir_debug("%s create load_coords node %d for %d\n",
264 __FUNCTION__, load->node.index, node->index);
265
266 ppir_dest *dest = &load->dest;
267 dest->type = ppir_target_ssa;
268 dest->ssa.num_components = load_tex->src_coords.ssa->num_components;
269 dest->ssa.live_in = INT_MAX;
270 dest->ssa.live_out = 0;
271 dest->write_mask = u_bit_consecutive(0, dest->ssa.num_components);
272
273 load->src = load_tex->src_coords;
274
275 ppir_src *src = &load_tex->src_coords;
276 src->type = ppir_target_ssa;
277 src->ssa = &dest->ssa;
278
279 ppir_node_foreach_pred_safe(node, dep) {
280 ppir_node *pred = dep->pred;
281 ppir_node_remove_dep(dep);
282 ppir_node_add_dep(&load->node, pred);
283 }
284
285 ppir_node_add_dep(node, &load->node);
286 return true;
287 }
288
289 /* Prepare for sin and cos and then lower vector alu node to multi
290 * scalar nodes */
291 static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node)
292 {
293 ppir_alu_node *alu = ppir_node_to_alu(node);
294
295 ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
296 if (!inv_2pi_node)
297 return false;
298 list_addtail(&inv_2pi_node->list, &node->list);
299
300 /* For sin and cos, the input has to multiplied by the constant
301 * 1/(2*pi), presumably to simplify the hardware. */
302 ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
303 inv_2pi_const->constant.num = 1;
304 inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
305
306 inv_2pi_const->dest.type = ppir_target_ssa;
307 inv_2pi_const->dest.ssa.num_components = 1;
308 inv_2pi_const->dest.ssa.live_in = INT_MAX;
309 inv_2pi_const->dest.ssa.live_out = 0;
310 inv_2pi_const->dest.write_mask = 0x01;
311
312 ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
313 if (!mul_node)
314 return false;
315 list_addtail(&mul_node->list, &node->list);
316
317 ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
318 mul_alu->num_src = 2;
319 mul_alu->src[0] = alu->src[0];
320 mul_alu->src[1].type = ppir_target_ssa;
321 mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
322
323 int num_components = alu->src[0].ssa->num_components;
324 mul_alu->dest.type = ppir_target_ssa;
325 mul_alu->dest.ssa.num_components = num_components;
326 mul_alu->dest.ssa.live_in = INT_MAX;
327 mul_alu->dest.ssa.live_out = 0;
328 mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
329
330 alu->src[0].type = ppir_target_ssa;
331 alu->src[0].ssa = &mul_alu->dest.ssa;
332 for (int i = 0; i < 4; i++)
333 alu->src->swizzle[i] = i;
334
335 ppir_node_foreach_pred_safe(node, dep) {
336 ppir_node *pred = dep->pred;
337 ppir_node_remove_dep(dep);
338 ppir_node_add_dep(mul_node, pred);
339 }
340 ppir_node_add_dep(node, mul_node);
341 ppir_node_add_dep(mul_node, inv_2pi_node);
342
343 return ppir_lower_vec_to_scalar(block, node);
344 }
345
346 /* insert a move as the select condition to make sure it can
347 * be inserted to select instr float mul slot
348 */
349 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
350 {
351 ppir_alu_node *alu = ppir_node_to_alu(node);
352
353 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
354 if (!move)
355 return false;
356 list_addtail(&move->list, &node->list);
357
358 ppir_alu_node *move_alu = ppir_node_to_alu(move);
359 ppir_src *move_src = move_alu->src, *src = alu->src;
360 move_src->type = src->type;
361 move_src->ssa = src->ssa;
362 move_src->swizzle[0] = src->swizzle[0];
363 move_alu->num_src = 1;
364
365 ppir_dest *move_dest = &move_alu->dest;
366 move_dest->type = ppir_target_ssa;
367 move_dest->ssa.num_components = 1;
368 move_dest->ssa.live_in = INT_MAX;
369 move_dest->ssa.live_out = 0;
370 move_dest->write_mask = 1;
371
372 ppir_node_foreach_pred(node, dep) {
373 ppir_node *pred = dep->pred;
374 ppir_dest *dest = ppir_node_get_dest(pred);
375 if (ppir_node_target_equal(alu->src, dest)) {
376 ppir_node_replace_pred(dep, move);
377 ppir_node_add_dep(move, pred);
378 }
379 }
380
381 /* move must be the first pred of select node which make sure
382 * the float mul slot is free when node to instr
383 */
384 assert(ppir_node_first_pred(node) == move);
385
386 src->swizzle[0] = 0;
387 ppir_node_target_assign(alu->src, move_dest);
388 return true;
389 }
390
391 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
392 [ppir_op_const] = ppir_lower_const,
393 [ppir_op_dot2] = ppir_lower_dot,
394 [ppir_op_dot3] = ppir_lower_dot,
395 [ppir_op_dot4] = ppir_lower_dot,
396 [ppir_op_rcp] = ppir_lower_vec_to_scalar,
397 [ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
398 [ppir_op_log2] = ppir_lower_vec_to_scalar,
399 [ppir_op_exp2] = ppir_lower_vec_to_scalar,
400 [ppir_op_sqrt] = ppir_lower_vec_to_scalar,
401 [ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar,
402 [ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar,
403 [ppir_op_lt] = ppir_lower_swap_args,
404 [ppir_op_le] = ppir_lower_swap_args,
405 [ppir_op_load_texture] = ppir_lower_texture,
406 [ppir_op_select] = ppir_lower_select,
407 };
408
409 bool ppir_lower_prog(ppir_compiler *comp)
410 {
411 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
412 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
413 if (ppir_lower_funcs[node->op] &&
414 !ppir_lower_funcs[node->op](block, node))
415 return false;
416 }
417 }
418
419 ppir_node_print_prog(comp);
420 return true;
421 }