i915g: Reduce state emission by using a index bias
[mesa.git] / src / gallium / drivers / r600 / r600_compiler.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include "r600_compiler.h"
29
30 struct c_vector *c_vector_new(void)
31 {
32 struct c_vector *v = calloc(1, sizeof(struct c_vector));
33
34 if (v == NULL) {
35 return NULL;
36 }
37 c_list_init(v);
38 return v;
39 }
40
41 static unsigned c_opcode_is_alu(unsigned opcode)
42 {
43 switch (opcode) {
44 case C_OPCODE_MOV:
45 case C_OPCODE_MUL:
46 case C_OPCODE_MAD:
47 case C_OPCODE_ARL:
48 case C_OPCODE_LIT:
49 case C_OPCODE_RCP:
50 case C_OPCODE_RSQ:
51 case C_OPCODE_EXP:
52 case C_OPCODE_LOG:
53 case C_OPCODE_ADD:
54 case C_OPCODE_DP3:
55 case C_OPCODE_DP4:
56 case C_OPCODE_DST:
57 case C_OPCODE_MIN:
58 case C_OPCODE_MAX:
59 case C_OPCODE_SLT:
60 case C_OPCODE_SGE:
61 case C_OPCODE_SUB:
62 case C_OPCODE_LRP:
63 case C_OPCODE_CND:
64 case C_OPCODE_DP2A:
65 case C_OPCODE_FRC:
66 case C_OPCODE_CLAMP:
67 case C_OPCODE_FLR:
68 case C_OPCODE_ROUND:
69 case C_OPCODE_EX2:
70 case C_OPCODE_LG2:
71 case C_OPCODE_POW:
72 case C_OPCODE_XPD:
73 case C_OPCODE_ABS:
74 case C_OPCODE_RCC:
75 case C_OPCODE_DPH:
76 case C_OPCODE_COS:
77 case C_OPCODE_DDX:
78 case C_OPCODE_DDY:
79 case C_OPCODE_PK2H:
80 case C_OPCODE_PK2US:
81 case C_OPCODE_PK4B:
82 case C_OPCODE_PK4UB:
83 case C_OPCODE_RFL:
84 case C_OPCODE_SEQ:
85 case C_OPCODE_SFL:
86 case C_OPCODE_SGT:
87 case C_OPCODE_SIN:
88 case C_OPCODE_SLE:
89 case C_OPCODE_SNE:
90 case C_OPCODE_STR:
91 case C_OPCODE_UP2H:
92 case C_OPCODE_UP2US:
93 case C_OPCODE_UP4B:
94 case C_OPCODE_UP4UB:
95 case C_OPCODE_X2D:
96 case C_OPCODE_ARA:
97 case C_OPCODE_ARR:
98 case C_OPCODE_BRA:
99 case C_OPCODE_SSG:
100 case C_OPCODE_CMP:
101 case C_OPCODE_SCS:
102 case C_OPCODE_NRM:
103 case C_OPCODE_DIV:
104 case C_OPCODE_DP2:
105 case C_OPCODE_CEIL:
106 case C_OPCODE_I2F:
107 case C_OPCODE_NOT:
108 case C_OPCODE_TRUNC:
109 case C_OPCODE_SHL:
110 case C_OPCODE_AND:
111 case C_OPCODE_OR:
112 case C_OPCODE_MOD:
113 case C_OPCODE_XOR:
114 case C_OPCODE_SAD:
115 case C_OPCODE_NRM4:
116 case C_OPCODE_F2I:
117 case C_OPCODE_IDIV:
118 case C_OPCODE_IMAX:
119 case C_OPCODE_IMIN:
120 case C_OPCODE_INEG:
121 case C_OPCODE_ISGE:
122 case C_OPCODE_ISHR:
123 case C_OPCODE_ISLT:
124 case C_OPCODE_F2U:
125 case C_OPCODE_U2F:
126 case C_OPCODE_UADD:
127 case C_OPCODE_UDIV:
128 case C_OPCODE_UMAD:
129 case C_OPCODE_UMAX:
130 case C_OPCODE_UMIN:
131 case C_OPCODE_UMOD:
132 case C_OPCODE_UMUL:
133 case C_OPCODE_USEQ:
134 case C_OPCODE_USGE:
135 case C_OPCODE_USHR:
136 case C_OPCODE_USLT:
137 case C_OPCODE_USNE:
138 return 1;
139 case C_OPCODE_END:
140 case C_OPCODE_VFETCH:
141 case C_OPCODE_KILP:
142 case C_OPCODE_CAL:
143 case C_OPCODE_RET:
144 case C_OPCODE_TXB:
145 case C_OPCODE_TXL:
146 case C_OPCODE_BRK:
147 case C_OPCODE_IF:
148 case C_OPCODE_BGNFOR:
149 case C_OPCODE_REP:
150 case C_OPCODE_ELSE:
151 case C_OPCODE_ENDIF:
152 case C_OPCODE_ENDFOR:
153 case C_OPCODE_ENDREP:
154 case C_OPCODE_PUSHA:
155 case C_OPCODE_POPA:
156 case C_OPCODE_TXF:
157 case C_OPCODE_TXQ:
158 case C_OPCODE_CONT:
159 case C_OPCODE_EMIT:
160 case C_OPCODE_ENDPRIM:
161 case C_OPCODE_BGNLOOP:
162 case C_OPCODE_BGNSUB:
163 case C_OPCODE_ENDLOOP:
164 case C_OPCODE_ENDSUB:
165 case C_OPCODE_NOP:
166 case C_OPCODE_CALLNZ:
167 case C_OPCODE_IFC:
168 case C_OPCODE_BREAKC:
169 case C_OPCODE_KIL:
170 case C_OPCODE_TEX:
171 case C_OPCODE_TXD:
172 case C_OPCODE_TXP:
173 case C_OPCODE_SWITCH:
174 case C_OPCODE_CASE:
175 case C_OPCODE_DEFAULT:
176 case C_OPCODE_ENDSWITCH:
177 default:
178 return 0;
179 }
180 }
181
182
183 /* NEW */
184 void c_node_init(struct c_node *node)
185 {
186 memset(node, 0, sizeof(struct c_node));
187 c_list_init(&node->predecessors);
188 c_list_init(&node->successors);
189 c_list_init(&node->childs);
190 c_list_init(&node->insts);
191 node->parent = NULL;
192 }
193
194 static struct c_node_link *c_node_link_new(struct c_node *node)
195 {
196 struct c_node_link *link;
197
198 link = calloc(1, sizeof(struct c_node_link));
199 if (link == NULL)
200 return NULL;
201 c_list_init(link);
202 link->node = node;
203 return link;
204 }
205
206 int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor)
207 {
208 struct c_node_link *pedge, *sedge;
209
210 pedge = c_node_link_new(successor);
211 sedge = c_node_link_new(predecessor);
212 if (sedge == NULL || pedge == NULL) {
213 free(sedge);
214 free(pedge);
215 return -ENOMEM;
216 }
217 c_list_add_tail(pedge, &predecessor->successors);
218 c_list_add_tail(sedge, &successor->predecessors);
219 return 0;
220 }
221
222 int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction)
223 {
224 struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));
225
226 if (inst == NULL)
227 return -ENOMEM;
228 memcpy(inst, instruction, sizeof(struct c_instruction));
229 c_list_add(inst, &node->insts);
230 return 0;
231 }
232
233 int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction)
234 {
235 struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));
236
237 if (inst == NULL)
238 return -ENOMEM;
239 memcpy(inst, instruction, sizeof(struct c_instruction));
240 c_list_add_tail(inst, &node->insts);
241 return 0;
242 }
243
244 struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor)
245 {
246 struct c_node *node = calloc(1, sizeof(struct c_node));
247
248 if (node == NULL)
249 return NULL;
250 c_node_init(node);
251 if (c_node_cfg_link(predecessor, node)) {
252 free(node);
253 return NULL;
254 }
255 c_list_add_tail(node, &shader->nodes);
256 return node;
257 }
258
259 int c_shader_init(struct c_shader *shader, unsigned type)
260 {
261 unsigned i;
262 int r;
263
264 shader->type = type;
265 for (i = 0; i < C_FILE_COUNT; i++) {
266 shader->files[i].nvectors = 0;
267 c_list_init(&shader->files[i].vectors);
268 }
269 c_list_init(&shader->nodes);
270 c_node_init(&shader->entry);
271 c_node_init(&shader->end);
272 shader->entry.opcode = C_OPCODE_ENTRY;
273 shader->end.opcode = C_OPCODE_END;
274 r = c_node_cfg_link(&shader->entry, &shader->end);
275 if (r)
276 return r;
277 return 0;
278 }
279
280 struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid)
281 {
282 struct c_vector *v = calloc(1, sizeof(struct c_vector));
283 int i;
284
285 if (v == NULL) {
286 return NULL;
287 }
288 for (i = 0; i < 4; i++) {
289 v->channel[i] = calloc(1, sizeof(struct c_channel));
290 if (v->channel[i] == NULL)
291 goto out_err;
292 v->channel[i]->vindex = i;
293 v->channel[i]->vector = v;
294 }
295 v->file = file;
296 v->name = name;
297 v->sid = sid;
298 shader->files[v->file].nvectors++;
299 v->id = shader->nvectors++;
300 c_list_add_tail(v, &shader->files[v->file].vectors);
301 return v;
302 out_err:
303 for (i = 0; i < 4; i++) {
304 free(v->channel[i]);
305 }
306 free(v);
307 return NULL;
308 }
309
310 static void c_node_remove_link(struct c_node_link *head, struct c_node *node)
311 {
312 struct c_node_link *link, *tmp;
313
314 c_list_for_each_safe(link, tmp, head) {
315 if (link->node == node) {
316 c_list_del(link);
317 free(link);
318 }
319 }
320 }
321
322 static void c_node_destroy(struct c_node *node)
323 {
324 struct c_instruction *i, *ni;
325 struct c_node_link *link, *tmp;
326
327 c_list_for_each_safe(i, ni, &node->insts) {
328 c_list_del(i);
329 free(i);
330 }
331 if (node->parent)
332 c_node_remove_link(&node->parent->childs, node);
333 node->parent = NULL;
334 c_list_for_each_safe(link, tmp, &node->predecessors) {
335 c_node_remove_link(&link->node->successors, node);
336 c_list_del(link);
337 free(link);
338 }
339 c_list_for_each_safe(link, tmp, &node->successors) {
340 c_node_remove_link(&link->node->predecessors, node);
341 c_list_del(link);
342 free(link);
343 }
344 c_list_for_each_safe(link, tmp, &node->childs) {
345 link->node->parent = NULL;
346 c_list_del(link);
347 free(link);
348 }
349 }
350
351 void c_shader_destroy(struct c_shader *shader)
352 {
353 struct c_node *n, *nn;
354 struct c_vector *v, *nv;
355 unsigned i;
356
357 for (i = 0; i < C_FILE_COUNT; i++) {
358 shader->files[i].nvectors = 0;
359 c_list_for_each_safe(v, nv, &shader->files[i].vectors) {
360 c_list_del(v);
361 free(v->channel[0]);
362 free(v->channel[1]);
363 free(v->channel[2]);
364 free(v->channel[3]);
365 free(v);
366 }
367 }
368 c_list_for_each_safe(n, nn, &shader->nodes) {
369 c_list_del(n);
370 c_node_destroy(n);
371 }
372 memset(shader, 0, sizeof(struct c_shader));
373 }
374
375 static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node)
376 {
377 struct c_node_link *link;
378
379 if (entry == node || entry->visited)
380 return;
381 entry->visited = 1;
382 c_list_for_each(link, &entry->successors) {
383 c_shader_dfs_without_rec(link->node, node);
384 }
385 }
386
387 static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node)
388 {
389 struct c_node *n;
390
391 shader->entry.visited = 0;
392 shader->end.visited = 0;
393 c_list_for_each(n, &shader->nodes) {
394 n->visited = 0;
395 }
396 c_shader_dfs_without_rec(&shader->entry, node);
397 }
398
399 static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node)
400 {
401 struct c_node_link *link, *nlink;
402 unsigned found = 0;
403 int r;
404
405 if (node->done)
406 return 0;
407 node->done = 1;
408 c_list_for_each(link, &node->predecessors) {
409 /* if we remove this predecessor can we reach the current node ? */
410 c_shader_dfs_without(shader, link->node);
411 if (node->visited == 0) {
412 /* we were unable to visit current node thus current
413 * predecessor is the immediate dominator of node, as
414 * their can be only one immediate dominator we break
415 */
416 node->parent = link->node;
417 nlink = c_node_link_new(node);
418 if (nlink == NULL)
419 return -ENOMEM;
420 c_list_add_tail(nlink, &link->node->childs);
421 found = 1;
422 break;
423 }
424 }
425 /* this shouldn't happen there should at least be 1 denominator for each node */
426 if (!found && node->opcode != C_OPCODE_ENTRY) {
427 fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n",
428 node, node->opcode);
429 return -EINVAL;
430 }
431 c_list_for_each(link, &node->predecessors) {
432 r = c_shader_build_dominator_tree_rec(shader, link->node);
433 if (r)
434 return r;
435 }
436 return 0;
437 }
438
439 int c_shader_build_dominator_tree(struct c_shader *shader)
440 {
441 struct c_node *node;
442 c_list_for_each(node, &shader->nodes) {
443 node->done = 0;
444 }
445 return c_shader_build_dominator_tree_rec(shader, &shader->end);
446 }