lima/ppir: move alu vec to scalar lowering into NIR
[mesa.git] / src / gallium / drivers / lima / ir / pp / lower.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 ppir_node *move = NULL;
38 ppir_dest *dest = ppir_node_get_dest(node);
39
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node, dep) {
43 ppir_node *succ = dep->succ;
44
45 if (succ->type != ppir_node_type_alu) {
46 if (!move) {
47 move = ppir_node_create(block, ppir_op_mov, -1, 0);
48 if (unlikely(!move))
49 return false;
50
51 ppir_debug("lower const create move %d for %d\n",
52 move->index, node->index);
53
54 ppir_alu_node *alu = ppir_node_to_alu(move);
55 alu->dest = *dest;
56 alu->num_src = 1;
57 ppir_node_target_assign(alu->src, dest);
58 for (int i = 0; i < 4; i++)
59 alu->src->swizzle[i] = i;
60 }
61
62 ppir_node_replace_pred(dep, move);
63 ppir_node_replace_child(succ, node, move);
64 }
65 }
66
67 if (move) {
68 ppir_node_add_dep(move, node);
69 list_addtail(&move->list, &node->list);
70 }
71
72 return true;
73 }
74
75 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
76 {
77 /* swapped op must be the next op */
78 node->op++;
79
80 assert(node->type == ppir_node_type_alu);
81 ppir_alu_node *alu = ppir_node_to_alu(node);
82 assert(alu->num_src == 2);
83
84 ppir_src tmp = alu->src[0];
85 alu->src[0] = alu->src[1];
86 alu->src[1] = tmp;
87 return true;
88 }
89
90 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
91 {
92 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
93
94 /* Create load_coords node */
95 ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
96 if (!load)
97 return false;
98 list_addtail(&load->node.list, &node->list);
99
100 ppir_debug("%s create load_coords node %d for %d\n",
101 __FUNCTION__, load->node.index, node->index);
102
103 load->dest.type = ppir_target_pipeline;
104 load->dest.pipeline = ppir_pipeline_reg_discard;
105
106 load->src = load_tex->src_coords;
107
108 ppir_node_foreach_pred_safe(node, dep) {
109 ppir_node *pred = dep->pred;
110 ppir_node_remove_dep(dep);
111 ppir_node_add_dep(&load->node, pred);
112 }
113
114 ppir_node_add_dep(node, &load->node);
115 return true;
116 }
117
118 static bool ppir_lower_sin_cos(ppir_block *block, ppir_node *node)
119 {
120 ppir_alu_node *alu = ppir_node_to_alu(node);
121
122 ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
123 if (!inv_2pi_node)
124 return false;
125 list_addtail(&inv_2pi_node->list, &node->list);
126
127 /* For sin and cos, the input has to multiplied by the constant
128 * 1/(2*pi), presumably to simplify the hardware. */
129 ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
130 inv_2pi_const->constant.num = 1;
131 inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
132
133 inv_2pi_const->dest.type = ppir_target_ssa;
134 inv_2pi_const->dest.ssa.num_components = 1;
135 inv_2pi_const->dest.ssa.live_in = INT_MAX;
136 inv_2pi_const->dest.ssa.live_out = 0;
137 inv_2pi_const->dest.write_mask = 0x01;
138
139 ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
140 if (!mul_node)
141 return false;
142 list_addtail(&mul_node->list, &node->list);
143
144 ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
145 mul_alu->num_src = 2;
146 mul_alu->src[0] = alu->src[0];
147 mul_alu->src[1].type = ppir_target_ssa;
148 mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
149
150 int num_components = alu->src[0].ssa->num_components;
151 mul_alu->dest.type = ppir_target_ssa;
152 mul_alu->dest.ssa.num_components = num_components;
153 mul_alu->dest.ssa.live_in = INT_MAX;
154 mul_alu->dest.ssa.live_out = 0;
155 mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
156
157 alu->src[0].type = ppir_target_ssa;
158 alu->src[0].ssa = &mul_alu->dest.ssa;
159 for (int i = 0; i < 4; i++)
160 alu->src->swizzle[i] = i;
161
162 ppir_node_foreach_pred_safe(node, dep) {
163 ppir_node *pred = dep->pred;
164 ppir_node_remove_dep(dep);
165 ppir_node_add_dep(mul_node, pred);
166 }
167 ppir_node_add_dep(node, mul_node);
168 ppir_node_add_dep(mul_node, inv_2pi_node);
169
170 return true;
171 }
172
173 /* insert a move as the select condition to make sure it can
174 * be inserted to select instr float mul slot
175 */
176 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
177 {
178 ppir_alu_node *alu = ppir_node_to_alu(node);
179
180 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
181 if (!move)
182 return false;
183 list_addtail(&move->list, &node->list);
184
185 ppir_alu_node *move_alu = ppir_node_to_alu(move);
186 ppir_src *move_src = move_alu->src, *src = alu->src;
187 move_src->type = src->type;
188 move_src->ssa = src->ssa;
189 move_src->swizzle[0] = src->swizzle[0];
190 move_alu->num_src = 1;
191
192 ppir_dest *move_dest = &move_alu->dest;
193 move_dest->type = ppir_target_ssa;
194 move_dest->ssa.num_components = 1;
195 move_dest->ssa.live_in = INT_MAX;
196 move_dest->ssa.live_out = 0;
197 move_dest->write_mask = 1;
198
199 ppir_node_foreach_pred(node, dep) {
200 ppir_node *pred = dep->pred;
201 ppir_dest *dest = ppir_node_get_dest(pred);
202 if (ppir_node_target_equal(alu->src, dest)) {
203 ppir_node_replace_pred(dep, move);
204 ppir_node_add_dep(move, pred);
205 }
206 }
207
208 /* move must be the first pred of select node which make sure
209 * the float mul slot is free when node to instr
210 */
211 assert(ppir_node_first_pred(node) == move);
212
213 src->swizzle[0] = 0;
214 ppir_node_target_assign(alu->src, move_dest);
215 return true;
216 }
217
218 static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
219 {
220 /* Turn it into a mov with a round to integer output modifier */
221 ppir_alu_node *alu = ppir_node_to_alu(node);
222 ppir_dest *move_dest = &alu->dest;
223 move_dest->modifier = ppir_outmod_round;
224 node->op = ppir_op_mov;
225
226 return true;
227 }
228
229 static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
230 {
231 /* Turn it into a mov and set the absolute modifier */
232 ppir_alu_node *alu = ppir_node_to_alu(node);
233
234 assert(alu->num_src == 1);
235
236 alu->src[0].absolute = true;
237 alu->src[0].negate = false;
238 node->op = ppir_op_mov;
239
240 return true;
241 }
242
243 static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
244 {
245 /* Turn it into a mov and set the negate modifier */
246 ppir_alu_node *alu = ppir_node_to_alu(node);
247
248 assert(alu->num_src == 1);
249
250 alu->src[0].negate = !alu->src[0].negate;
251 node->op = ppir_op_mov;
252
253 return true;
254 }
255
256 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
257 {
258 /* Turn it into a mov with the saturate output modifier */
259 ppir_alu_node *alu = ppir_node_to_alu(node);
260
261 assert(alu->num_src == 1);
262
263 ppir_dest *move_dest = &alu->dest;
264 move_dest->modifier = ppir_outmod_clamp_fraction;
265 node->op = ppir_op_mov;
266
267 return true;
268 }
269
270 static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
271 {
272 ppir_branch_node *branch = ppir_node_to_branch(node);
273 ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
274
275 if (!zero)
276 return false;
277
278 list_addtail(&zero->node.list, &node->list);
279
280 zero->constant.value[0].f = 0;
281 zero->constant.num = 1;
282 zero->dest.type = ppir_target_ssa;
283 zero->dest.ssa.num_components = 1;
284 zero->dest.ssa.live_in = INT_MAX;
285 zero->dest.ssa.live_out = 0;
286 zero->dest.write_mask = 0x01;
287
288 /* For now we're just comparing branch condition with 0,
289 * in future we should look whether it's possible to move
290 * comparision node into branch itself and use current
291 * way as a fallback for complex conditions.
292 */
293 branch->src[1].type = ppir_target_ssa;
294 branch->src[1].ssa = &zero->dest.ssa;
295
296 branch->cond_gt = true;
297 branch->cond_lt = true;
298
299 ppir_node_add_dep(&branch->node, &zero->node);
300
301 return true;
302 }
303
304 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
305 [ppir_op_abs] = ppir_lower_abs,
306 [ppir_op_neg] = ppir_lower_neg,
307 [ppir_op_const] = ppir_lower_const,
308 [ppir_op_sin] = ppir_lower_sin_cos,
309 [ppir_op_cos] = ppir_lower_sin_cos,
310 [ppir_op_lt] = ppir_lower_swap_args,
311 [ppir_op_le] = ppir_lower_swap_args,
312 [ppir_op_load_texture] = ppir_lower_texture,
313 [ppir_op_select] = ppir_lower_select,
314 [ppir_op_trunc] = ppir_lower_trunc,
315 [ppir_op_sat] = ppir_lower_sat,
316 [ppir_op_branch] = ppir_lower_branch,
317 };
318
319 bool ppir_lower_prog(ppir_compiler *comp)
320 {
321 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
322 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
323 if (ppir_lower_funcs[node->op] &&
324 !ppir_lower_funcs[node->op](block, node))
325 return false;
326 }
327 }
328
329 ppir_node_print_prog(comp);
330 return true;
331 }