freedreno/ir3: add image/ssbo <-> ibo/tex mapping
[mesa.git] / src / freedreno / ir3 / ir3_context.c
1 /*
2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_math.h"
28
29 #include "ir3_compiler.h"
30 #include "ir3_context.h"
31 #include "ir3_image.h"
32 #include "ir3_shader.h"
33 #include "ir3_nir.h"
34
35 struct ir3_context *
36 ir3_context_init(struct ir3_compiler *compiler,
37 struct ir3_shader_variant *so)
38 {
39 struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
40
41 if (compiler->gpu_id >= 400) {
42 if (so->type == MESA_SHADER_VERTEX) {
43 ctx->astc_srgb = so->key.vastc_srgb;
44 } else if (so->type == MESA_SHADER_FRAGMENT) {
45 ctx->astc_srgb = so->key.fastc_srgb;
46 }
47
48 } else {
49 if (so->type == MESA_SHADER_VERTEX) {
50 ctx->samples = so->key.vsamples;
51 } else if (so->type == MESA_SHADER_FRAGMENT) {
52 ctx->samples = so->key.fsamples;
53 }
54 }
55
56 if (compiler->gpu_id >= 400) {
57 ctx->funcs = &ir3_a4xx_funcs;
58 }
59
60 ctx->compiler = compiler;
61 ctx->so = so;
62 ctx->def_ht = _mesa_hash_table_create(ctx,
63 _mesa_hash_pointer, _mesa_key_pointer_equal);
64 ctx->block_ht = _mesa_hash_table_create(ctx,
65 _mesa_hash_pointer, _mesa_key_pointer_equal);
66
67 /* TODO: maybe generate some sort of bitmask of what key
68 * lowers vs what shader has (ie. no need to lower
69 * texture clamp lowering if no texture sample instrs)..
70 * although should be done further up the stack to avoid
71 * creating duplicate variants..
72 */
73
74 if (ir3_key_lowers_nir(&so->key)) {
75 nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
76 ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
77 } else {
78 /* fast-path for shader key that lowers nothing in NIR: */
79 ctx->s = nir_shader_clone(ctx, so->shader->nir);
80 }
81
82 /* this needs to be the last pass run, so do this here instead of
83 * in ir3_optimize_nir():
84 */
85 NIR_PASS_V(ctx->s, nir_lower_bool_to_int32);
86 NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
87 NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
88
89 if (ir3_shader_debug & IR3_DBG_DISASM) {
90 DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
91 so->shader->id, so->id, so->type,
92 so->key.color_two_side, so->key.half_precision);
93 nir_print_shader(ctx->s, stdout);
94 }
95
96 if (shader_debug_enabled(so->type)) {
97 fprintf(stderr, "NIR (final form) for %s shader:\n",
98 _mesa_shader_stage_to_string(so->type));
99 nir_print_shader(ctx->s, stderr);
100 }
101
102 ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
103
104 so->num_uniforms = ctx->s->num_uniforms;
105 so->num_ubos = ctx->s->info.num_ubos;
106
107 ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
108
109 /* Layout of constant registers, each section aligned to vec4. Note
110 * that pointer size (ubo, etc) changes depending on generation.
111 *
112 * user consts
113 * UBO addresses
114 * SSBO sizes
115 * if (vertex shader) {
116 * driver params (IR3_DP_*)
117 * if (stream_output.num_outputs > 0)
118 * stream-out addresses
119 * }
120 * immediates
121 *
122 * Immediates go last mostly because they are inserted in the CP pass
123 * after the nir -> ir3 frontend.
124 */
125 unsigned constoff = align(ctx->s->num_uniforms, 4);
126 unsigned ptrsz = ir3_pointer_size(ctx);
127
128 memset(&so->constbase, ~0, sizeof(so->constbase));
129
130 if (so->num_ubos > 0) {
131 so->constbase.ubo = constoff;
132 constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
133 }
134
135 if (so->const_layout.ssbo_size.count > 0) {
136 unsigned cnt = so->const_layout.ssbo_size.count;
137 so->constbase.ssbo_sizes = constoff;
138 constoff += align(cnt, 4) / 4;
139 }
140
141 if (so->const_layout.image_dims.count > 0) {
142 unsigned cnt = so->const_layout.image_dims.count;
143 so->constbase.image_dims = constoff;
144 constoff += align(cnt, 4) / 4;
145 }
146
147 unsigned num_driver_params = 0;
148 if (so->type == MESA_SHADER_VERTEX) {
149 num_driver_params = IR3_DP_VS_COUNT;
150 } else if (so->type == MESA_SHADER_COMPUTE) {
151 num_driver_params = IR3_DP_CS_COUNT;
152 }
153
154 so->constbase.driver_param = constoff;
155 constoff += align(num_driver_params, 4) / 4;
156
157 if ((so->type == MESA_SHADER_VERTEX) &&
158 (compiler->gpu_id < 500) &&
159 so->shader->stream_output.num_outputs > 0) {
160 so->constbase.tfbo = constoff;
161 constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
162 }
163
164 so->constbase.immediate = constoff;
165
166 return ctx;
167 }
168
169 void
170 ir3_context_free(struct ir3_context *ctx)
171 {
172 ralloc_free(ctx);
173 }
174
175 /*
176 * Misc helpers
177 */
178
179 /* allocate a n element value array (to be populated by caller) and
180 * insert in def_ht
181 */
182 struct ir3_instruction **
183 ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
184 {
185 struct ir3_instruction **value =
186 ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
187 _mesa_hash_table_insert(ctx->def_ht, dst, value);
188 return value;
189 }
190
191 struct ir3_instruction **
192 ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
193 {
194 struct ir3_instruction **value;
195
196 if (dst->is_ssa) {
197 value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
198 } else {
199 value = ralloc_array(ctx, struct ir3_instruction *, n);
200 }
201
202 /* NOTE: in non-ssa case, we don't really need to store last_dst
203 * but this helps us catch cases where put_dst() call is forgotten
204 */
205 compile_assert(ctx, !ctx->last_dst);
206 ctx->last_dst = value;
207 ctx->last_dst_n = n;
208
209 return value;
210 }
211
212 struct ir3_instruction * const *
213 ir3_get_src(struct ir3_context *ctx, nir_src *src)
214 {
215 if (src->is_ssa) {
216 struct hash_entry *entry;
217 entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
218 compile_assert(ctx, entry);
219 return entry->data;
220 } else {
221 nir_register *reg = src->reg.reg;
222 struct ir3_array *arr = ir3_get_array(ctx, reg);
223 unsigned num_components = arr->r->num_components;
224 struct ir3_instruction *addr = NULL;
225 struct ir3_instruction **value =
226 ralloc_array(ctx, struct ir3_instruction *, num_components);
227
228 if (src->reg.indirect)
229 addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
230 reg->num_components);
231
232 for (unsigned i = 0; i < num_components; i++) {
233 unsigned n = src->reg.base_offset * reg->num_components + i;
234 compile_assert(ctx, n < arr->length);
235 value[i] = ir3_create_array_load(ctx, arr, n, addr);
236 }
237
238 return value;
239 }
240 }
241
242 void
243 put_dst(struct ir3_context *ctx, nir_dest *dst)
244 {
245 unsigned bit_size = nir_dest_bit_size(*dst);
246
247 if (bit_size < 32) {
248 for (unsigned i = 0; i < ctx->last_dst_n; i++) {
249 struct ir3_instruction *dst = ctx->last_dst[i];
250 dst->regs[0]->flags |= IR3_REG_HALF;
251 if (ctx->last_dst[i]->opc == OPC_META_FO)
252 dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
253 }
254 }
255
256 if (!dst->is_ssa) {
257 nir_register *reg = dst->reg.reg;
258 struct ir3_array *arr = ir3_get_array(ctx, reg);
259 unsigned num_components = ctx->last_dst_n;
260 struct ir3_instruction *addr = NULL;
261
262 if (dst->reg.indirect)
263 addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
264 reg->num_components);
265
266 for (unsigned i = 0; i < num_components; i++) {
267 unsigned n = dst->reg.base_offset * reg->num_components + i;
268 compile_assert(ctx, n < arr->length);
269 if (!ctx->last_dst[i])
270 continue;
271 ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
272 }
273
274 ralloc_free(ctx->last_dst);
275 }
276 ctx->last_dst = NULL;
277 ctx->last_dst_n = 0;
278 }
279
280 struct ir3_instruction *
281 ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
282 unsigned arrsz)
283 {
284 struct ir3_block *block = ctx->block;
285 struct ir3_instruction *collect;
286
287 if (arrsz == 0)
288 return NULL;
289
290 unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
291
292 collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
293 ir3_reg_create(collect, 0, flags); /* dst */
294 for (unsigned i = 0; i < arrsz; i++) {
295 struct ir3_instruction *elem = arr[i];
296
297 /* Since arrays are pre-colored in RA, we can't assume that
298 * things will end up in the right place. (Ie. if a collect
299 * joins elements from two different arrays.) So insert an
300 * extra mov.
301 *
302 * We could possibly skip this if all the collected elements
303 * are contiguous elements in a single array.. not sure how
304 * likely that is to happen.
305 *
306 * Fixes a problem with glamor shaders, that in effect do
307 * something like:
308 *
309 * if (foo)
310 * texcoord = ..
311 * else
312 * texcoord = ..
313 * color = texture2D(tex, texcoord);
314 *
315 * In this case, texcoord will end up as nir registers (which
316 * translate to ir3 array's of length 1. And we can't assume
317 * the two (or more) arrays will get allocated in consecutive
318 * scalar registers.
319 *
320 */
321 if (elem->regs[0]->flags & IR3_REG_ARRAY) {
322 type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
323 elem = ir3_MOV(block, elem, type);
324 }
325
326 compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
327 ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
328 }
329
330 return collect;
331 }
332
333 /* helper for instructions that produce multiple consecutive scalar
334 * outputs which need to have a split/fanout meta instruction inserted
335 */
336 void
337 ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
338 struct ir3_instruction *src, unsigned base, unsigned n)
339 {
340 struct ir3_instruction *prev = NULL;
341
342 if ((n == 1) && (src->regs[0]->wrmask == 0x1)) {
343 dst[0] = src;
344 return;
345 }
346
347 for (int i = 0, j = 0; i < n; i++) {
348 struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
349 ir3_reg_create(split, 0, IR3_REG_SSA);
350 ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
351 split->fo.off = i + base;
352
353 if (prev) {
354 split->cp.left = prev;
355 split->cp.left_cnt++;
356 prev->cp.right = split;
357 prev->cp.right_cnt++;
358 }
359 prev = split;
360
361 if (src->regs[0]->wrmask & (1 << (i + base)))
362 dst[j++] = split;
363 }
364 }
365
366 void
367 ir3_context_error(struct ir3_context *ctx, const char *format, ...)
368 {
369 struct hash_table *errors = NULL;
370 va_list ap;
371 va_start(ap, format);
372 if (ctx->cur_instr) {
373 errors = _mesa_hash_table_create(NULL,
374 _mesa_hash_pointer,
375 _mesa_key_pointer_equal);
376 char *msg = ralloc_vasprintf(errors, format, ap);
377 _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
378 } else {
379 _debug_vprintf(format, ap);
380 }
381 va_end(ap);
382 nir_print_shader_annotated(ctx->s, stdout, errors);
383 ralloc_free(errors);
384 ctx->error = true;
385 debug_assert(0);
386 }
387
388 static struct ir3_instruction *
389 create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
390 {
391 struct ir3_instruction *instr, *immed;
392
393 /* TODO in at least some cases, the backend could probably be
394 * made clever enough to propagate IR3_REG_HALF..
395 */
396 instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
397 instr->regs[0]->flags |= IR3_REG_HALF;
398
399 switch(align){
400 case 1:
401 /* src *= 1: */
402 break;
403 case 2:
404 /* src *= 2 => src <<= 1: */
405 immed = create_immed(block, 1);
406 immed->regs[0]->flags |= IR3_REG_HALF;
407
408 instr = ir3_SHL_B(block, instr, 0, immed, 0);
409 instr->regs[0]->flags |= IR3_REG_HALF;
410 instr->regs[1]->flags |= IR3_REG_HALF;
411 break;
412 case 3:
413 /* src *= 3: */
414 immed = create_immed(block, 3);
415 immed->regs[0]->flags |= IR3_REG_HALF;
416
417 instr = ir3_MULL_U(block, instr, 0, immed, 0);
418 instr->regs[0]->flags |= IR3_REG_HALF;
419 instr->regs[1]->flags |= IR3_REG_HALF;
420 break;
421 case 4:
422 /* src *= 4 => src <<= 2: */
423 immed = create_immed(block, 2);
424 immed->regs[0]->flags |= IR3_REG_HALF;
425
426 instr = ir3_SHL_B(block, instr, 0, immed, 0);
427 instr->regs[0]->flags |= IR3_REG_HALF;
428 instr->regs[1]->flags |= IR3_REG_HALF;
429 break;
430 default:
431 unreachable("bad align");
432 return NULL;
433 }
434
435 instr = ir3_MOV(block, instr, TYPE_S16);
436 instr->regs[0]->num = regid(REG_A0, 0);
437 instr->regs[0]->flags |= IR3_REG_HALF;
438 instr->regs[1]->flags |= IR3_REG_HALF;
439
440 return instr;
441 }
442
443 /* caches addr values to avoid generating multiple cov/shl/mova
444 * sequences for each use of a given NIR level src as address
445 */
446 struct ir3_instruction *
447 ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
448 {
449 struct ir3_instruction *addr;
450 unsigned idx = align - 1;
451
452 compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
453
454 if (!ctx->addr_ht[idx]) {
455 ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
456 _mesa_hash_pointer, _mesa_key_pointer_equal);
457 } else {
458 struct hash_entry *entry;
459 entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
460 if (entry)
461 return entry->data;
462 }
463
464 addr = create_addr(ctx->block, src, align);
465 _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
466
467 return addr;
468 }
469
470 struct ir3_instruction *
471 ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
472 {
473 struct ir3_block *b = ctx->block;
474 struct ir3_instruction *cond;
475
476 /* NOTE: only cmps.*.* can write p0.x: */
477 cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
478 cond->cat2.condition = IR3_COND_NE;
479
480 /* condition always goes in predicate register: */
481 cond->regs[0]->num = regid(REG_P0, 0);
482
483 return cond;
484 }
485
486 /*
487 * Array helpers
488 */
489
490 void
491 ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
492 {
493 struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
494 arr->id = ++ctx->num_arrays;
495 /* NOTE: sometimes we get non array regs, for example for arrays of
496 * length 1. See fs-const-array-of-struct-of-array.shader_test. So
497 * treat a non-array as if it was an array of length 1.
498 *
499 * It would be nice if there was a nir pass to convert arrays of
500 * length 1 to ssa.
501 */
502 arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
503 compile_assert(ctx, arr->length > 0);
504 arr->r = reg;
505 list_addtail(&arr->node, &ctx->ir->array_list);
506 }
507
508 struct ir3_array *
509 ir3_get_array(struct ir3_context *ctx, nir_register *reg)
510 {
511 list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
512 if (arr->r == reg)
513 return arr;
514 }
515 ir3_context_error(ctx, "bogus reg: %s\n", reg->name);
516 return NULL;
517 }
518
519 /* relative (indirect) if address!=NULL */
520 struct ir3_instruction *
521 ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
522 struct ir3_instruction *address)
523 {
524 struct ir3_block *block = ctx->block;
525 struct ir3_instruction *mov;
526 struct ir3_register *src;
527
528 mov = ir3_instr_create(block, OPC_MOV);
529 mov->cat1.src_type = TYPE_U32;
530 mov->cat1.dst_type = TYPE_U32;
531 mov->barrier_class = IR3_BARRIER_ARRAY_R;
532 mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
533 ir3_reg_create(mov, 0, 0);
534 src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
535 COND(address, IR3_REG_RELATIV));
536 src->instr = arr->last_write;
537 src->size = arr->length;
538 src->array.id = arr->id;
539 src->array.offset = n;
540
541 if (address)
542 ir3_instr_set_address(mov, address);
543
544 return mov;
545 }
546
547 /* relative (indirect) if address!=NULL */
548 void
549 ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
550 struct ir3_instruction *src, struct ir3_instruction *address)
551 {
552 struct ir3_block *block = ctx->block;
553 struct ir3_instruction *mov;
554 struct ir3_register *dst;
555
556 /* if not relative store, don't create an extra mov, since that
557 * ends up being difficult for cp to remove.
558 */
559 if (!address) {
560 dst = src->regs[0];
561
562 src->barrier_class |= IR3_BARRIER_ARRAY_W;
563 src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
564
565 dst->flags |= IR3_REG_ARRAY;
566 dst->instr = arr->last_write;
567 dst->size = arr->length;
568 dst->array.id = arr->id;
569 dst->array.offset = n;
570
571 arr->last_write = src;
572
573 array_insert(block, block->keeps, src);
574
575 return;
576 }
577
578 mov = ir3_instr_create(block, OPC_MOV);
579 mov->cat1.src_type = TYPE_U32;
580 mov->cat1.dst_type = TYPE_U32;
581 mov->barrier_class = IR3_BARRIER_ARRAY_W;
582 mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
583 dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
584 COND(address, IR3_REG_RELATIV));
585 dst->instr = arr->last_write;
586 dst->size = arr->length;
587 dst->array.id = arr->id;
588 dst->array.offset = n;
589 ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
590
591 if (address)
592 ir3_instr_set_address(mov, address);
593
594 arr->last_write = mov;
595
596 /* the array store may only matter to something in an earlier
597 * block (ie. loops), but since arrays are not in SSA, depth
598 * pass won't know this.. so keep all array stores:
599 */
600 array_insert(block, block->keeps, mov);
601 }