ed14c343faa48b616a2fef3051738868d029cee2
[mesa.git] / src / freedreno / ir3 / ir3.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3.h"
25
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <assert.h>
30 #include <stdbool.h>
31 #include <errno.h>
32
33 #include "util/bitscan.h"
34 #include "util/ralloc.h"
35 #include "util/u_math.h"
36
37 #include "instr-a3xx.h"
38
39 /* simple allocator to carve allocations out of an up-front allocated heap,
40 * so that we can free everything easily in one shot.
41 */
42 void * ir3_alloc(struct ir3 *shader, int sz)
43 {
44 return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
45 }
46
47 struct ir3 * ir3_create(struct ir3_compiler *compiler,
48 unsigned nin, unsigned nout)
49 {
50 struct ir3 *shader = rzalloc(compiler, struct ir3);
51
52 shader->compiler = compiler;
53 shader->ninputs = nin;
54 shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
55
56 shader->noutputs = nout;
57 shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
58
59 list_inithead(&shader->block_list);
60 list_inithead(&shader->array_list);
61
62 return shader;
63 }
64
65 void ir3_destroy(struct ir3 *shader)
66 {
67 ralloc_free(shader);
68 }
69
70 #define iassert(cond) do { \
71 if (!(cond)) { \
72 debug_assert(cond); \
73 return -1; \
74 } } while (0)
75
76 #define iassert_type(reg, full) do { \
77 if ((full)) { \
78 iassert(!((reg)->flags & IR3_REG_HALF)); \
79 } else { \
80 iassert((reg)->flags & IR3_REG_HALF); \
81 } } while (0);
82
83 static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
84 uint32_t repeat, uint32_t valid_flags)
85 {
86 reg_t val = { .dummy32 = 0 };
87
88 if (reg->flags & ~valid_flags) {
89 debug_printf("INVALID FLAGS: %x vs %x\n",
90 reg->flags, valid_flags);
91 }
92
93 if (!(reg->flags & IR3_REG_R))
94 repeat = 0;
95
96 if (reg->flags & IR3_REG_IMMED) {
97 val.iim_val = reg->iim_val;
98 } else {
99 unsigned components;
100 int16_t max;
101
102 if (reg->flags & IR3_REG_RELATIV) {
103 components = reg->size;
104 val.idummy10 = reg->array.offset;
105 max = (reg->array.offset + repeat + components - 1) >> 2;
106 } else {
107 components = util_last_bit(reg->wrmask);
108 val.comp = reg->num & 0x3;
109 val.num = reg->num >> 2;
110 max = (reg->num + repeat + components - 1) >> 2;
111 }
112
113 if (reg->flags & IR3_REG_CONST) {
114 info->max_const = MAX2(info->max_const, max);
115 } else if (val.num == 63) {
116 /* ignore writes to dummy register r63.x */
117 } else if (max < 48) {
118 if (reg->flags & IR3_REG_HALF) {
119 if (info->gpu_id >= 600) {
120 /* starting w/ a6xx, half regs conflict with full regs: */
121 info->max_reg = MAX2(info->max_reg, (max+1)/2);
122 } else {
123 info->max_half_reg = MAX2(info->max_half_reg, max);
124 }
125 } else {
126 info->max_reg = MAX2(info->max_reg, max);
127 }
128 }
129 }
130
131 return val.dummy32;
132 }
133
134 static int emit_cat0(struct ir3_instruction *instr, void *ptr,
135 struct ir3_info *info)
136 {
137 instr_cat0_t *cat0 = ptr;
138
139 if (info->gpu_id >= 500) {
140 cat0->a5xx.immed = instr->cat0.immed;
141 } else if (info->gpu_id >= 400) {
142 cat0->a4xx.immed = instr->cat0.immed;
143 } else {
144 cat0->a3xx.immed = instr->cat0.immed;
145 }
146 cat0->repeat = instr->repeat;
147 cat0->ss = !!(instr->flags & IR3_INSTR_SS);
148 cat0->inv = instr->cat0.inv;
149 cat0->comp = instr->cat0.comp;
150 cat0->opc = instr->opc;
151 cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
152 cat0->sync = !!(instr->flags & IR3_INSTR_SY);
153 cat0->opc_cat = 0;
154
155 return 0;
156 }
157
158 static int emit_cat1(struct ir3_instruction *instr, void *ptr,
159 struct ir3_info *info)
160 {
161 struct ir3_register *dst = instr->regs[0];
162 struct ir3_register *src = instr->regs[1];
163 instr_cat1_t *cat1 = ptr;
164
165 iassert(instr->regs_count == 2);
166 iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
167 if (!(src->flags & IR3_REG_IMMED))
168 iassert_type(src, type_size(instr->cat1.src_type) == 32);
169
170 if (src->flags & IR3_REG_IMMED) {
171 cat1->iim_val = src->iim_val;
172 cat1->src_im = 1;
173 } else if (src->flags & IR3_REG_RELATIV) {
174 cat1->off = reg(src, info, instr->repeat,
175 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
176 cat1->src_rel = 1;
177 cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
178 } else {
179 cat1->src = reg(src, info, instr->repeat,
180 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
181 cat1->src_c = !!(src->flags & IR3_REG_CONST);
182 }
183
184 cat1->dst = reg(dst, info, instr->repeat,
185 IR3_REG_RELATIV | IR3_REG_EVEN |
186 IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
187 cat1->repeat = instr->repeat;
188 cat1->src_r = !!(src->flags & IR3_REG_R);
189 cat1->ss = !!(instr->flags & IR3_INSTR_SS);
190 cat1->ul = !!(instr->flags & IR3_INSTR_UL);
191 cat1->dst_type = instr->cat1.dst_type;
192 cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV);
193 cat1->src_type = instr->cat1.src_type;
194 cat1->even = !!(dst->flags & IR3_REG_EVEN);
195 cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF);
196 cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
197 cat1->sync = !!(instr->flags & IR3_INSTR_SY);
198 cat1->opc_cat = 1;
199
200 return 0;
201 }
202
203 static int emit_cat2(struct ir3_instruction *instr, void *ptr,
204 struct ir3_info *info)
205 {
206 struct ir3_register *dst = instr->regs[0];
207 struct ir3_register *src1 = instr->regs[1];
208 struct ir3_register *src2 = instr->regs[2];
209 instr_cat2_t *cat2 = ptr;
210 unsigned absneg = ir3_cat2_absneg(instr->opc);
211
212 iassert((instr->regs_count == 2) || (instr->regs_count == 3));
213
214 if (instr->nop) {
215 iassert(!instr->repeat);
216 iassert(instr->nop <= 3);
217
218 cat2->src1_r = instr->nop & 0x1;
219 cat2->src2_r = (instr->nop >> 1) & 0x1;
220 } else {
221 cat2->src1_r = !!(src1->flags & IR3_REG_R);
222 if (src2)
223 cat2->src2_r = !!(src2->flags & IR3_REG_R);
224 }
225
226 if (src1->flags & IR3_REG_RELATIV) {
227 iassert(src1->array.offset < (1 << 10));
228 cat2->rel1.src1 = reg(src1, info, instr->repeat,
229 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
230 IR3_REG_HALF | absneg);
231 cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST);
232 cat2->rel1.src1_rel = 1;
233 } else if (src1->flags & IR3_REG_CONST) {
234 iassert(src1->num < (1 << 12));
235 cat2->c1.src1 = reg(src1, info, instr->repeat,
236 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
237 cat2->c1.src1_c = 1;
238 } else {
239 iassert(src1->num < (1 << 11));
240 cat2->src1 = reg(src1, info, instr->repeat,
241 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
242 absneg);
243 }
244 cat2->src1_im = !!(src1->flags & IR3_REG_IMMED);
245 cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
246 cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
247
248 if (src2) {
249 iassert((src2->flags & IR3_REG_IMMED) ||
250 !((src1->flags ^ src2->flags) & IR3_REG_HALF));
251
252 if (src2->flags & IR3_REG_RELATIV) {
253 iassert(src2->array.offset < (1 << 10));
254 cat2->rel2.src2 = reg(src2, info, instr->repeat,
255 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
256 IR3_REG_HALF | absneg);
257 cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST);
258 cat2->rel2.src2_rel = 1;
259 } else if (src2->flags & IR3_REG_CONST) {
260 iassert(src2->num < (1 << 12));
261 cat2->c2.src2 = reg(src2, info, instr->repeat,
262 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
263 cat2->c2.src2_c = 1;
264 } else {
265 iassert(src2->num < (1 << 11));
266 cat2->src2 = reg(src2, info, instr->repeat,
267 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
268 absneg);
269 }
270
271 cat2->src2_im = !!(src2->flags & IR3_REG_IMMED);
272 cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
273 cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
274 }
275
276 cat2->dst = reg(dst, info, instr->repeat,
277 IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
278 cat2->repeat = instr->repeat;
279 cat2->sat = !!(instr->flags & IR3_INSTR_SAT);
280 cat2->ss = !!(instr->flags & IR3_INSTR_SS);
281 cat2->ul = !!(instr->flags & IR3_INSTR_UL);
282 cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
283 cat2->ei = !!(dst->flags & IR3_REG_EI);
284 cat2->cond = instr->cat2.condition;
285 cat2->full = ! (src1->flags & IR3_REG_HALF);
286 cat2->opc = instr->opc;
287 cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
288 cat2->sync = !!(instr->flags & IR3_INSTR_SY);
289 cat2->opc_cat = 2;
290
291 return 0;
292 }
293
294 static int emit_cat3(struct ir3_instruction *instr, void *ptr,
295 struct ir3_info *info)
296 {
297 struct ir3_register *dst = instr->regs[0];
298 struct ir3_register *src1 = instr->regs[1];
299 struct ir3_register *src2 = instr->regs[2];
300 struct ir3_register *src3 = instr->regs[3];
301 unsigned absneg = ir3_cat3_absneg(instr->opc);
302 instr_cat3_t *cat3 = ptr;
303 uint32_t src_flags = 0;
304
305 switch (instr->opc) {
306 case OPC_MAD_F16:
307 case OPC_MAD_U16:
308 case OPC_MAD_S16:
309 case OPC_SEL_B16:
310 case OPC_SEL_S16:
311 case OPC_SEL_F16:
312 case OPC_SAD_S16:
313 case OPC_SAD_S32: // really??
314 src_flags |= IR3_REG_HALF;
315 break;
316 default:
317 break;
318 }
319
320 iassert(instr->regs_count == 4);
321 iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
322 iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
323 iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
324
325 if (instr->nop) {
326 iassert(!instr->repeat);
327 iassert(instr->nop <= 3);
328
329 cat3->src1_r = instr->nop & 0x1;
330 cat3->src2_r = (instr->nop >> 1) & 0x1;
331 } else {
332 cat3->src1_r = !!(src1->flags & IR3_REG_R);
333 cat3->src2_r = !!(src2->flags & IR3_REG_R);
334 }
335
336 if (src1->flags & IR3_REG_RELATIV) {
337 iassert(src1->array.offset < (1 << 10));
338 cat3->rel1.src1 = reg(src1, info, instr->repeat,
339 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
340 IR3_REG_HALF | absneg);
341 cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST);
342 cat3->rel1.src1_rel = 1;
343 } else if (src1->flags & IR3_REG_CONST) {
344 iassert(src1->num < (1 << 12));
345 cat3->c1.src1 = reg(src1, info, instr->repeat,
346 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
347 cat3->c1.src1_c = 1;
348 } else {
349 iassert(src1->num < (1 << 11));
350 cat3->src1 = reg(src1, info, instr->repeat,
351 IR3_REG_R | IR3_REG_HALF | absneg);
352 }
353
354 cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
355
356 cat3->src2 = reg(src2, info, instr->repeat,
357 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
358 cat3->src2_c = !!(src2->flags & IR3_REG_CONST);
359 cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
360
361 if (src3->flags & IR3_REG_RELATIV) {
362 iassert(src3->array.offset < (1 << 10));
363 cat3->rel2.src3 = reg(src3, info, instr->repeat,
364 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
365 IR3_REG_HALF | absneg);
366 cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST);
367 cat3->rel2.src3_rel = 1;
368 } else if (src3->flags & IR3_REG_CONST) {
369 iassert(src3->num < (1 << 12));
370 cat3->c2.src3 = reg(src3, info, instr->repeat,
371 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
372 cat3->c2.src3_c = 1;
373 } else {
374 iassert(src3->num < (1 << 11));
375 cat3->src3 = reg(src3, info, instr->repeat,
376 IR3_REG_R | IR3_REG_HALF | absneg);
377 }
378
379 cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
380 cat3->src3_r = !!(src3->flags & IR3_REG_R);
381
382 cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
383 cat3->repeat = instr->repeat;
384 cat3->sat = !!(instr->flags & IR3_INSTR_SAT);
385 cat3->ss = !!(instr->flags & IR3_INSTR_SS);
386 cat3->ul = !!(instr->flags & IR3_INSTR_UL);
387 cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
388 cat3->opc = instr->opc;
389 cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
390 cat3->sync = !!(instr->flags & IR3_INSTR_SY);
391 cat3->opc_cat = 3;
392
393 return 0;
394 }
395
396 static int emit_cat4(struct ir3_instruction *instr, void *ptr,
397 struct ir3_info *info)
398 {
399 struct ir3_register *dst = instr->regs[0];
400 struct ir3_register *src = instr->regs[1];
401 instr_cat4_t *cat4 = ptr;
402
403 iassert(instr->regs_count == 2);
404
405 if (src->flags & IR3_REG_RELATIV) {
406 iassert(src->array.offset < (1 << 10));
407 cat4->rel.src = reg(src, info, instr->repeat,
408 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
409 IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
410 cat4->rel.src_c = !!(src->flags & IR3_REG_CONST);
411 cat4->rel.src_rel = 1;
412 } else if (src->flags & IR3_REG_CONST) {
413 iassert(src->num < (1 << 12));
414 cat4->c.src = reg(src, info, instr->repeat,
415 IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
416 IR3_REG_R | IR3_REG_HALF);
417 cat4->c.src_c = 1;
418 } else {
419 iassert(src->num < (1 << 11));
420 cat4->src = reg(src, info, instr->repeat,
421 IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
422 IR3_REG_R | IR3_REG_HALF);
423 }
424
425 cat4->src_im = !!(src->flags & IR3_REG_IMMED);
426 cat4->src_neg = !!(src->flags & IR3_REG_FNEG);
427 cat4->src_abs = !!(src->flags & IR3_REG_FABS);
428 cat4->src_r = !!(src->flags & IR3_REG_R);
429
430 cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
431 cat4->repeat = instr->repeat;
432 cat4->sat = !!(instr->flags & IR3_INSTR_SAT);
433 cat4->ss = !!(instr->flags & IR3_INSTR_SS);
434 cat4->ul = !!(instr->flags & IR3_INSTR_UL);
435 cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
436 cat4->full = ! (src->flags & IR3_REG_HALF);
437 cat4->opc = instr->opc;
438 cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
439 cat4->sync = !!(instr->flags & IR3_INSTR_SY);
440 cat4->opc_cat = 4;
441
442 return 0;
443 }
444
445 static int emit_cat5(struct ir3_instruction *instr, void *ptr,
446 struct ir3_info *info)
447 {
448 struct ir3_register *dst = instr->regs[0];
449 struct ir3_register *src1 = instr->regs[1];
450 struct ir3_register *src2 = instr->regs[2];
451 struct ir3_register *src3 = instr->regs[3];
452 instr_cat5_t *cat5 = ptr;
453
454 iassert_type(dst, type_size(instr->cat5.type) == 32)
455
456 assume(src1 || !src2);
457 assume(src2 || !src3);
458
459 if (src1) {
460 cat5->full = ! (src1->flags & IR3_REG_HALF);
461 cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
462 }
463
464 if (instr->flags & IR3_INSTR_S2EN) {
465 if (src2) {
466 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
467 cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
468 }
469 if (src3) {
470 iassert(src3->flags & IR3_REG_HALF);
471 cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
472 }
473 iassert(!(instr->cat5.samp | instr->cat5.tex));
474 } else {
475 iassert(!src3);
476 if (src2) {
477 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
478 cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
479 }
480 cat5->norm.samp = instr->cat5.samp;
481 cat5->norm.tex = instr->cat5.tex;
482 }
483
484 cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
485 cat5->wrmask = dst->wrmask;
486 cat5->type = instr->cat5.type;
487 cat5->is_3d = !!(instr->flags & IR3_INSTR_3D);
488 cat5->is_a = !!(instr->flags & IR3_INSTR_A);
489 cat5->is_s = !!(instr->flags & IR3_INSTR_S);
490 cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN);
491 cat5->is_o = !!(instr->flags & IR3_INSTR_O);
492 cat5->is_p = !!(instr->flags & IR3_INSTR_P);
493 cat5->opc = instr->opc;
494 cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
495 cat5->sync = !!(instr->flags & IR3_INSTR_SY);
496 cat5->opc_cat = 5;
497
498 return 0;
499 }
500
501 static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
502 struct ir3_info *info)
503 {
504 struct ir3_register *src1, *src2;
505 instr_cat6_a6xx_t *cat6 = ptr;
506 bool has_dest = (instr->opc == OPC_LDIB);
507
508 /* first reg should be SSBO binding point: */
509 iassert(instr->regs[1]->flags & IR3_REG_IMMED);
510
511 src1 = instr->regs[2];
512
513 if (has_dest) {
514 /* the src2 field in the instruction is actually the destination
515 * register for load instructions:
516 */
517 src2 = instr->regs[0];
518 } else {
519 src2 = instr->regs[3];
520 }
521
522 cat6->type = instr->cat6.type;
523 cat6->d = instr->cat6.d - 1;
524 cat6->typed = instr->cat6.typed;
525 cat6->type_size = instr->cat6.iim_val - 1;
526 cat6->opc = instr->opc;
527 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
528 cat6->sync = !!(instr->flags & IR3_INSTR_SY);
529 cat6->opc_cat = 6;
530
531 cat6->src1 = reg(src1, info, instr->repeat, 0);
532 cat6->src2 = reg(src2, info, instr->repeat, 0);
533 cat6->ssbo = instr->regs[1]->iim_val;
534
535 switch (instr->opc) {
536 case OPC_ATOMIC_ADD:
537 case OPC_ATOMIC_SUB:
538 case OPC_ATOMIC_XCHG:
539 case OPC_ATOMIC_INC:
540 case OPC_ATOMIC_DEC:
541 case OPC_ATOMIC_CMPXCHG:
542 case OPC_ATOMIC_MIN:
543 case OPC_ATOMIC_MAX:
544 case OPC_ATOMIC_AND:
545 case OPC_ATOMIC_OR:
546 case OPC_ATOMIC_XOR:
547 cat6->pad1 = 0x1;
548 cat6->pad2 = 0xc;
549 cat6->pad3 = 0x0;
550 cat6->pad4 = 0x3;
551 break;
552 case OPC_STIB:
553 cat6->pad1 = 0x0;
554 cat6->pad2 = 0xc;
555 cat6->pad3 = 0x0;
556 cat6->pad4 = 0x2;
557 break;
558 case OPC_LDIB:
559 cat6->pad1 = 0x1;
560 cat6->pad2 = 0xc;
561 cat6->pad3 = 0x0;
562 cat6->pad4 = 0x2;
563 break;
564 case OPC_LDC:
565 cat6->pad1 = 0x0;
566 cat6->pad2 = 0x8;
567 cat6->pad3 = 0x0;
568 cat6->pad4 = 0x2;
569 break;
570 default:
571 iassert(0);
572 }
573
574 return 0;
575 }
576
577 static int emit_cat6(struct ir3_instruction *instr, void *ptr,
578 struct ir3_info *info)
579 {
580 struct ir3_register *dst, *src1, *src2;
581 instr_cat6_t *cat6 = ptr;
582
583 /* In a6xx we start using a new instruction encoding for some of
584 * these instructions:
585 */
586 if (info->gpu_id >= 600) {
587 switch (instr->opc) {
588 case OPC_ATOMIC_ADD:
589 case OPC_ATOMIC_SUB:
590 case OPC_ATOMIC_XCHG:
591 case OPC_ATOMIC_INC:
592 case OPC_ATOMIC_DEC:
593 case OPC_ATOMIC_CMPXCHG:
594 case OPC_ATOMIC_MIN:
595 case OPC_ATOMIC_MAX:
596 case OPC_ATOMIC_AND:
597 case OPC_ATOMIC_OR:
598 case OPC_ATOMIC_XOR:
599 /* The shared variants of these still use the old encoding: */
600 if (!(instr->flags & IR3_INSTR_G))
601 break;
602 /* fallthrough */
603 case OPC_STIB:
604 case OPC_LDIB:
605 case OPC_LDC:
606 return emit_cat6_a6xx(instr, ptr, info);
607 default:
608 break;
609 }
610 }
611
612 bool type_full = type_size(instr->cat6.type) == 32;
613
614 cat6->type = instr->cat6.type;
615 cat6->opc = instr->opc;
616 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
617 cat6->sync = !!(instr->flags & IR3_INSTR_SY);
618 cat6->g = !!(instr->flags & IR3_INSTR_G);
619 cat6->opc_cat = 6;
620
621 switch (instr->opc) {
622 case OPC_RESINFO:
623 case OPC_RESFMT:
624 iassert_type(instr->regs[0], type_full); /* dst */
625 iassert_type(instr->regs[1], type_full); /* src1 */
626 break;
627 case OPC_L2G:
628 case OPC_G2L:
629 iassert_type(instr->regs[0], true); /* dst */
630 iassert_type(instr->regs[1], true); /* src1 */
631 break;
632 case OPC_STG:
633 case OPC_STL:
634 case OPC_STP:
635 case OPC_STLW:
636 case OPC_STIB:
637 /* no dst, so regs[0] is dummy */
638 iassert_type(instr->regs[1], true); /* dst */
639 iassert_type(instr->regs[2], type_full); /* src1 */
640 iassert_type(instr->regs[3], true); /* src2 */
641 break;
642 default:
643 iassert_type(instr->regs[0], type_full); /* dst */
644 iassert_type(instr->regs[1], true); /* src1 */
645 if (instr->regs_count > 2)
646 iassert_type(instr->regs[2], true); /* src1 */
647 break;
648 }
649
650 /* the "dst" for a store instruction is (from the perspective
651 * of data flow in the shader, ie. register use/def, etc) in
652 * fact a register that is read by the instruction, rather
653 * than written:
654 */
655 if (is_store(instr)) {
656 iassert(instr->regs_count >= 3);
657
658 dst = instr->regs[1];
659 src1 = instr->regs[2];
660 src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
661 } else {
662 iassert(instr->regs_count >= 2);
663
664 dst = instr->regs[0];
665 src1 = instr->regs[1];
666 src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
667 }
668
669 /* TODO we need a more comprehensive list about which instructions
670 * can be encoded which way. Or possibly use IR3_INSTR_0 flag to
671 * indicate to use the src_off encoding even if offset is zero
672 * (but then what to do about dst_off?)
673 */
674 if (is_atomic(instr->opc)) {
675 instr_cat6ldgb_t *ldgb = ptr;
676
677 /* maybe these two bits both determine the instruction encoding? */
678 cat6->src_off = false;
679
680 ldgb->d = instr->cat6.d - 1;
681 ldgb->typed = instr->cat6.typed;
682 ldgb->type_size = instr->cat6.iim_val - 1;
683
684 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
685
686 if (ldgb->g) {
687 struct ir3_register *src3 = instr->regs[3];
688 struct ir3_register *src4 = instr->regs[4];
689
690 /* first src is src_ssbo: */
691 iassert(src1->flags & IR3_REG_IMMED);
692 ldgb->src_ssbo = src1->uim_val;
693
694 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
695 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
696 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
697 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
698
699 ldgb->src3 = reg(src4, info, instr->repeat, 0);
700 ldgb->pad0 = 0x1;
701 ldgb->pad3 = 0x1;
702 } else {
703 ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
704 ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
705 ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
706 ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
707 ldgb->pad0 = 0x1;
708 ldgb->pad3 = 0x0;
709 }
710
711 return 0;
712 } else if (instr->opc == OPC_LDGB) {
713 struct ir3_register *src3 = instr->regs[3];
714 instr_cat6ldgb_t *ldgb = ptr;
715
716 /* maybe these two bits both determine the instruction encoding? */
717 cat6->src_off = false;
718
719 ldgb->d = instr->cat6.d - 1;
720 ldgb->typed = instr->cat6.typed;
721 ldgb->type_size = instr->cat6.iim_val - 1;
722
723 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
724
725 /* first src is src_ssbo: */
726 iassert(src1->flags & IR3_REG_IMMED);
727 ldgb->src_ssbo = src1->uim_val;
728
729 /* then next two are src1/src2: */
730 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
731 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
732 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
733 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
734
735 ldgb->pad0 = 0x0;
736 ldgb->pad3 = 0x1;
737
738 return 0;
739 } else if (instr->opc == OPC_RESINFO) {
740 instr_cat6ldgb_t *ldgb = ptr;
741
742 ldgb->d = instr->cat6.d - 1;
743
744 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
745
746 /* first src is src_ssbo: */
747 iassert(src1->flags & IR3_REG_IMMED);
748 ldgb->src_ssbo = src1->uim_val;
749
750 return 0;
751 } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
752 struct ir3_register *src3 = instr->regs[4];
753 instr_cat6stgb_t *stgb = ptr;
754
755 /* maybe these two bits both determine the instruction encoding? */
756 cat6->src_off = true;
757 stgb->pad3 = 0x2;
758
759 stgb->d = instr->cat6.d - 1;
760 stgb->typed = instr->cat6.typed;
761 stgb->type_size = instr->cat6.iim_val - 1;
762
763 /* first src is dst_ssbo: */
764 iassert(dst->flags & IR3_REG_IMMED);
765 stgb->dst_ssbo = dst->uim_val;
766
767 /* then src1/src2/src3: */
768 stgb->src1 = reg(src1, info, instr->repeat, 0);
769 stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
770 stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
771 stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
772 stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
773
774 return 0;
775 } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
776 (instr->opc == OPC_LDL)) {
777 instr_cat6a_t *cat6a = ptr;
778
779 cat6->src_off = true;
780
781 cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
782 cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
783 if (src2) {
784 cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
785 cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
786 }
787 cat6a->off = instr->cat6.src_offset;
788 } else {
789 instr_cat6b_t *cat6b = ptr;
790
791 cat6->src_off = false;
792
793 cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
794 cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
795 if (src2) {
796 cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
797 cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
798 }
799 }
800
801 if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
802 (instr->opc == OPC_STL)) {
803 instr_cat6c_t *cat6c = ptr;
804 cat6->dst_off = true;
805 cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
806 cat6c->off = instr->cat6.dst_offset;
807 } else {
808 instr_cat6d_t *cat6d = ptr;
809 cat6->dst_off = false;
810 cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
811 }
812
813 return 0;
814 }
815
816 static int emit_cat7(struct ir3_instruction *instr, void *ptr,
817 struct ir3_info *info)
818 {
819 instr_cat7_t *cat7 = ptr;
820
821 cat7->ss = !!(instr->flags & IR3_INSTR_SS);
822 cat7->w = instr->cat7.w;
823 cat7->r = instr->cat7.r;
824 cat7->l = instr->cat7.l;
825 cat7->g = instr->cat7.g;
826 cat7->opc = instr->opc;
827 cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
828 cat7->sync = !!(instr->flags & IR3_INSTR_SY);
829 cat7->opc_cat = 7;
830
831 return 0;
832 }
833
834 static int (*emit[])(struct ir3_instruction *instr, void *ptr,
835 struct ir3_info *info) = {
836 emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
837 emit_cat7,
838 };
839
840 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
841 uint32_t gpu_id)
842 {
843 uint32_t *ptr, *dwords;
844
845 info->gpu_id = gpu_id;
846 info->max_reg = -1;
847 info->max_half_reg = -1;
848 info->max_const = -1;
849 info->instrs_count = 0;
850 info->sizedwords = 0;
851 info->ss = info->sy = 0;
852
853 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
854 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
855 info->sizedwords += 2;
856 }
857 }
858
859 /* need an integer number of instruction "groups" (sets of 16
860 * instructions on a4xx or sets of 4 instructions on a3xx),
861 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
862 */
863 if (gpu_id >= 400) {
864 info->sizedwords = align(info->sizedwords, 16 * 2);
865 } else {
866 info->sizedwords = align(info->sizedwords, 4 * 2);
867 }
868
869 ptr = dwords = calloc(4, info->sizedwords);
870
871 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
872 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
873 int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
874 if (ret)
875 goto fail;
876 info->instrs_count += 1 + instr->repeat;
877 dwords += 2;
878
879 if (instr->flags & IR3_INSTR_SS)
880 info->ss++;
881
882 if (instr->flags & IR3_INSTR_SY)
883 info->sy++;
884 }
885 }
886
887 return ptr;
888
889 fail:
890 free(ptr);
891 return NULL;
892 }
893
894 static struct ir3_register * reg_create(struct ir3 *shader,
895 int num, int flags)
896 {
897 struct ir3_register *reg =
898 ir3_alloc(shader, sizeof(struct ir3_register));
899 reg->wrmask = 1;
900 reg->flags = flags;
901 reg->num = num;
902 return reg;
903 }
904
905 static void insert_instr(struct ir3_block *block,
906 struct ir3_instruction *instr)
907 {
908 struct ir3 *shader = block->shader;
909 #ifdef DEBUG
910 instr->serialno = ++shader->instr_count;
911 #endif
912 list_addtail(&instr->node, &block->instr_list);
913
914 if (is_input(instr))
915 array_insert(shader, shader->baryfs, instr);
916 }
917
918 struct ir3_block * ir3_block_create(struct ir3 *shader)
919 {
920 struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
921 #ifdef DEBUG
922 block->serialno = ++shader->block_count;
923 #endif
924 block->shader = shader;
925 list_inithead(&block->node);
926 list_inithead(&block->instr_list);
927 return block;
928 }
929
930 static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
931 {
932 struct ir3_instruction *instr;
933 unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
934 char *ptr = ir3_alloc(block->shader, sz);
935
936 instr = (struct ir3_instruction *)ptr;
937 ptr += sizeof(*instr);
938 instr->regs = (struct ir3_register **)ptr;
939
940 #ifdef DEBUG
941 instr->regs_max = nreg;
942 #endif
943
944 return instr;
945 }
946
947 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
948 opc_t opc, int nreg)
949 {
950 struct ir3_instruction *instr = instr_create(block, nreg);
951 instr->block = block;
952 instr->opc = opc;
953 insert_instr(block, instr);
954 return instr;
955 }
956
957 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
958 {
959 /* NOTE: we could be slightly more clever, at least for non-meta,
960 * and choose # of regs based on category.
961 */
962 return ir3_instr_create2(block, opc, 4);
963 }
964
965 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
966 {
967 struct ir3_instruction *new_instr = instr_create(instr->block,
968 instr->regs_count);
969 struct ir3_register **regs;
970 unsigned i;
971
972 regs = new_instr->regs;
973 *new_instr = *instr;
974 new_instr->regs = regs;
975
976 insert_instr(instr->block, new_instr);
977
978 /* clone registers: */
979 new_instr->regs_count = 0;
980 for (i = 0; i < instr->regs_count; i++) {
981 struct ir3_register *reg = instr->regs[i];
982 struct ir3_register *new_reg =
983 ir3_reg_create(new_instr, reg->num, reg->flags);
984 *new_reg = *reg;
985 }
986
987 return new_instr;
988 }
989
990 /* Add a false dependency to instruction, to ensure it is scheduled first: */
991 void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
992 {
993 array_insert(instr, instr->deps, dep);
994 }
995
996 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
997 int num, int flags)
998 {
999 struct ir3 *shader = instr->block->shader;
1000 struct ir3_register *reg = reg_create(shader, num, flags);
1001 #ifdef DEBUG
1002 debug_assert(instr->regs_count < instr->regs_max);
1003 #endif
1004 instr->regs[instr->regs_count++] = reg;
1005 return reg;
1006 }
1007
1008 struct ir3_register * ir3_reg_clone(struct ir3 *shader,
1009 struct ir3_register *reg)
1010 {
1011 struct ir3_register *new_reg = reg_create(shader, 0, 0);
1012 *new_reg = *reg;
1013 return new_reg;
1014 }
1015
1016 void
1017 ir3_instr_set_address(struct ir3_instruction *instr,
1018 struct ir3_instruction *addr)
1019 {
1020 if (instr->address != addr) {
1021 struct ir3 *ir = instr->block->shader;
1022 instr->address = addr;
1023 array_insert(ir, ir->indirects, instr);
1024 }
1025 }
1026
1027 void
1028 ir3_block_clear_mark(struct ir3_block *block)
1029 {
1030 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
1031 instr->flags &= ~IR3_INSTR_MARK;
1032 }
1033
1034 void
1035 ir3_clear_mark(struct ir3 *ir)
1036 {
1037 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1038 ir3_block_clear_mark(block);
1039 }
1040 }
1041
1042 /* note: this will destroy instr->depth, don't do it until after sched! */
1043 unsigned
1044 ir3_count_instructions(struct ir3 *ir)
1045 {
1046 unsigned cnt = 0;
1047 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1048 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
1049 instr->ip = cnt++;
1050 }
1051 block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1052 block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1053 }
1054 return cnt;
1055 }
1056
1057 struct ir3_array *
1058 ir3_lookup_array(struct ir3 *ir, unsigned id)
1059 {
1060 list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
1061 if (arr->id == id)
1062 return arr;
1063 return NULL;
1064 }