lima/ppir: fix pointer referenced after a free
[mesa.git] / src / gallium / drivers / lima / ir / pp / lower.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 ppir_node *move = NULL;
38 ppir_dest *dest = ppir_node_get_dest(node);
39
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node, dep) {
43 ppir_node *succ = dep->succ;
44
45 if (succ->type != ppir_node_type_alu) {
46 if (!move) {
47 move = ppir_node_create(block, ppir_op_mov, -1, 0);
48 if (unlikely(!move))
49 return false;
50
51 ppir_debug("lower const create move %d for %d\n",
52 move->index, node->index);
53
54 ppir_alu_node *alu = ppir_node_to_alu(move);
55 alu->dest = *dest;
56 alu->num_src = 1;
57 ppir_node_target_assign(alu->src, dest);
58 for (int i = 0; i < 4; i++)
59 alu->src->swizzle[i] = i;
60 }
61
62 ppir_node_replace_pred(dep, move);
63 ppir_node_replace_child(succ, node, move);
64 }
65 }
66
67 if (move) {
68 ppir_node_add_dep(move, node);
69 list_addtail(&move->list, &node->list);
70 }
71
72 return true;
73 }
74
75 /* lower dot to mul+sum */
76 static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
77 {
78 ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
79 if (!mul)
80 return false;
81 list_addtail(&mul->node.list, &node->list);
82
83 ppir_alu_node *dot = ppir_node_to_alu(node);
84 mul->src[0] = dot->src[0];
85 mul->src[1] = dot->src[1];
86 mul->num_src = 2;
87
88 int num_components = node->op - ppir_op_dot2 + 2;
89 ppir_dest *dest = &mul->dest;
90 dest->type = ppir_target_ssa;
91 dest->ssa.num_components = num_components;
92 dest->ssa.live_in = INT_MAX;
93 dest->ssa.live_out = 0;
94 dest->write_mask = u_bit_consecutive(0, num_components);
95
96 ppir_node_foreach_pred_safe(node, dep) {
97 ppir_node *pred = dep->pred;
98 ppir_node_remove_dep(dep);
99 ppir_node_add_dep(&mul->node, pred);
100 }
101 ppir_node_add_dep(node, &mul->node);
102
103 if (node->op == ppir_op_dot2) {
104 node->op = ppir_op_add;
105
106 ppir_node_target_assign(dot->src, dest);
107 dot->src[0].swizzle[0] = 0;
108 dot->src[0].absolute = false;
109 dot->src[0].negate = false;
110
111 ppir_node_target_assign(dot->src + 1, dest);
112 dot->src[1].swizzle[0] = 1;
113 dot->src[1].absolute = false;
114 dot->src[1].negate = false;
115 }
116 else {
117 node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
118
119 ppir_node_target_assign(dot->src, dest);
120 for (int i = 0; i < 4; i++)
121 dot->src[0].swizzle[i] = i;
122 dot->src[0].absolute = false;
123 dot->src[0].negate = false;
124
125 dot->num_src = 1;
126 }
127
128 return true;
129 }
130
131 static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
132 {
133 ppir_reg *r = rzalloc(comp, ppir_reg);
134 if (!r)
135 return NULL;
136
137 r->num_components = num_components;
138 r->live_in = INT_MAX;
139 r->live_out = 0;
140 r->is_head = false;
141 list_addtail(&r->list, &comp->reg_list);
142
143 return r;
144 }
145
146 /* lower vector alu node to multi scalar nodes */
147 static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node)
148 {
149 ppir_alu_node *alu = ppir_node_to_alu(node);
150 ppir_dest *dest = &alu->dest;
151
152 int n = 0;
153 int index[4];
154
155 unsigned mask = dest->write_mask;
156 while (mask)
157 index[n++] = u_bit_scan(&mask);
158
159 if (n == 1)
160 return true;
161
162 ppir_reg *r;
163 /* we need a reg for scalar nodes to store output */
164 if (dest->type == ppir_target_register)
165 r = dest->reg;
166 else {
167 r = create_reg(block->comp, n);
168 if (!r)
169 return false;
170
171 /* change all successors to use reg r */
172 ppir_node_foreach_succ(node, dep) {
173 ppir_node *succ = dep->succ;
174 if (succ->type == ppir_node_type_alu) {
175 ppir_alu_node *sa = ppir_node_to_alu(succ);
176 for (int i = 0; i < sa->num_src; i++) {
177 ppir_src *src = sa->src + i;
178 if (ppir_node_target_equal(src, dest)) {
179 src->type = ppir_target_register;
180 src->reg = r;
181 }
182 }
183 }
184 else {
185 assert(succ->type == ppir_node_type_store);
186 ppir_store_node *ss = ppir_node_to_store(succ);
187 ppir_src *src = &ss->src;
188 src->type = ppir_target_register;
189 src->reg = r;
190 }
191 }
192 }
193
194 /* create each component's scalar node */
195 for (int i = 0; i < n; i++) {
196 ppir_node *s = ppir_node_create(block, node->op, -1, 0);
197 if (!s)
198 return false;
199 list_addtail(&s->list, &node->list);
200
201 ppir_alu_node *sa = ppir_node_to_alu(s);
202 ppir_dest *sd = &sa->dest;
203 sd->type = ppir_target_register;
204 sd->reg = r;
205 sd->modifier = dest->modifier;
206 sd->write_mask = 1 << index[i];
207
208 for (int j = 0; j < alu->num_src; j++)
209 sa->src[j] = alu->src[j];
210 sa->num_src = alu->num_src;
211
212 /* TODO: need per reg component dependancy */
213 ppir_node_foreach_succ(node, dep) {
214 ppir_node_add_dep(dep->succ, s);
215 }
216
217 ppir_node_foreach_pred(node, dep) {
218 ppir_node_add_dep(s, dep->pred);
219 }
220 }
221
222 ppir_node_delete(node);
223 return true;
224 }
225
226 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
227 {
228 /* swapped op must be the next op */
229 node->op++;
230
231 assert(node->type == ppir_node_type_alu);
232 ppir_alu_node *alu = ppir_node_to_alu(node);
233 assert(alu->num_src == 2);
234
235 ppir_src tmp = alu->src[0];
236 alu->src[0] = alu->src[1];
237 alu->src[1] = tmp;
238 return true;
239 }
240
241 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
242 {
243 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
244
245 if (ppir_node_has_single_pred(node)) {
246 ppir_node *pred = ppir_node_first_pred(node);
247 if (pred->op == ppir_op_load_varying) {
248 /* If ldtex is the only successor of load_varying node
249 * we're good. Just change load_varying op type to load_coords.
250 */
251 if (ppir_node_has_single_succ(pred)) {
252 pred->op = ppir_op_load_coords;
253 return true;
254 }
255 }
256 }
257
258 /* Otherwise we need to create load_coords node */
259 ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
260 if (!load)
261 return false;
262 list_addtail(&load->node.list, &node->list);
263
264 ppir_debug("%s create load_coords node %d for %d\n",
265 __FUNCTION__, load->node.index, node->index);
266
267 ppir_dest *dest = &load->dest;
268 dest->type = ppir_target_ssa;
269 dest->ssa.num_components = load_tex->src_coords.ssa->num_components;
270 dest->ssa.live_in = INT_MAX;
271 dest->ssa.live_out = 0;
272 dest->write_mask = u_bit_consecutive(0, dest->ssa.num_components);
273
274 load->src = load_tex->src_coords;
275
276 ppir_src *src = &load_tex->src_coords;
277 src->type = ppir_target_ssa;
278 src->ssa = &dest->ssa;
279
280 ppir_node_foreach_pred_safe(node, dep) {
281 ppir_node *pred = dep->pred;
282 ppir_node_remove_dep(dep);
283 ppir_node_add_dep(&load->node, pred);
284 }
285
286 ppir_node_add_dep(node, &load->node);
287 return true;
288 }
289
290 /* Prepare for sin and cos and then lower vector alu node to multi
291 * scalar nodes */
292 static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node)
293 {
294 ppir_alu_node *alu = ppir_node_to_alu(node);
295
296 ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
297 if (!inv_2pi_node)
298 return false;
299 list_addtail(&inv_2pi_node->list, &node->list);
300
301 /* For sin and cos, the input has to multiplied by the constant
302 * 1/(2*pi), presumably to simplify the hardware. */
303 ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
304 inv_2pi_const->constant.num = 1;
305 inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
306
307 inv_2pi_const->dest.type = ppir_target_ssa;
308 inv_2pi_const->dest.ssa.num_components = 1;
309 inv_2pi_const->dest.ssa.live_in = INT_MAX;
310 inv_2pi_const->dest.ssa.live_out = 0;
311 inv_2pi_const->dest.write_mask = 0x01;
312
313 ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
314 if (!mul_node)
315 return false;
316 list_addtail(&mul_node->list, &node->list);
317
318 ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
319 mul_alu->num_src = 2;
320 mul_alu->src[0] = alu->src[0];
321 mul_alu->src[1].type = ppir_target_ssa;
322 mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
323
324 int num_components = alu->src[0].ssa->num_components;
325 mul_alu->dest.type = ppir_target_ssa;
326 mul_alu->dest.ssa.num_components = num_components;
327 mul_alu->dest.ssa.live_in = INT_MAX;
328 mul_alu->dest.ssa.live_out = 0;
329 mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
330
331 alu->src[0].type = ppir_target_ssa;
332 alu->src[0].ssa = &mul_alu->dest.ssa;
333 for (int i = 0; i < 4; i++)
334 alu->src->swizzle[i] = i;
335
336 ppir_node_foreach_pred_safe(node, dep) {
337 ppir_node *pred = dep->pred;
338 ppir_node_remove_dep(dep);
339 ppir_node_add_dep(mul_node, pred);
340 }
341 ppir_node_add_dep(node, mul_node);
342 ppir_node_add_dep(mul_node, inv_2pi_node);
343
344 return ppir_lower_vec_to_scalar(block, node);
345 }
346
347 /* insert a move as the select condition to make sure it can
348 * be inserted to select instr float mul slot
349 */
350 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
351 {
352 ppir_alu_node *alu = ppir_node_to_alu(node);
353
354 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
355 if (!move)
356 return false;
357 list_addtail(&move->list, &node->list);
358
359 ppir_alu_node *move_alu = ppir_node_to_alu(move);
360 ppir_src *move_src = move_alu->src, *src = alu->src;
361 move_src->type = src->type;
362 move_src->ssa = src->ssa;
363 move_src->swizzle[0] = src->swizzle[0];
364 move_alu->num_src = 1;
365
366 ppir_dest *move_dest = &move_alu->dest;
367 move_dest->type = ppir_target_ssa;
368 move_dest->ssa.num_components = 1;
369 move_dest->ssa.live_in = INT_MAX;
370 move_dest->ssa.live_out = 0;
371 move_dest->write_mask = 1;
372
373 ppir_node_foreach_pred(node, dep) {
374 ppir_node *pred = dep->pred;
375 ppir_dest *dest = ppir_node_get_dest(pred);
376 if (ppir_node_target_equal(alu->src, dest)) {
377 ppir_node_replace_pred(dep, move);
378 ppir_node_add_dep(move, pred);
379 }
380 }
381
382 /* move must be the first pred of select node which make sure
383 * the float mul slot is free when node to instr
384 */
385 assert(ppir_node_first_pred(node) == move);
386
387 src->swizzle[0] = 0;
388 ppir_node_target_assign(alu->src, move_dest);
389 return true;
390 }
391
392 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
393 [ppir_op_const] = ppir_lower_const,
394 [ppir_op_dot2] = ppir_lower_dot,
395 [ppir_op_dot3] = ppir_lower_dot,
396 [ppir_op_dot4] = ppir_lower_dot,
397 [ppir_op_rcp] = ppir_lower_vec_to_scalar,
398 [ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
399 [ppir_op_log2] = ppir_lower_vec_to_scalar,
400 [ppir_op_exp2] = ppir_lower_vec_to_scalar,
401 [ppir_op_sqrt] = ppir_lower_vec_to_scalar,
402 [ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar,
403 [ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar,
404 [ppir_op_lt] = ppir_lower_swap_args,
405 [ppir_op_le] = ppir_lower_swap_args,
406 [ppir_op_load_texture] = ppir_lower_texture,
407 [ppir_op_select] = ppir_lower_select,
408 };
409
410 bool ppir_lower_prog(ppir_compiler *comp)
411 {
412 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
413 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
414 if (ppir_lower_funcs[node->op] &&
415 !ppir_lower_funcs[node->op](block, node))
416 return false;
417 }
418 }
419
420 ppir_node_print_prog(comp);
421 return true;
422 }