vc4: Fix leak of the compiled shader programs in the cache.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash.h"
29 #include "util/u_memory.h"
30 #include "util/u_pack_color.h"
31 #include "util/format_srgb.h"
32 #include "util/ralloc.h"
33 #include "util/hash_table.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_lowering.h"
37
38 #include "vc4_context.h"
39 #include "vc4_qpu.h"
40 #include "vc4_qir.h"
41 #ifdef USE_VC4_SIMULATOR
42 #include "simpenrose/simpenrose.h"
43 #endif
44
45 struct vc4_key {
46 struct vc4_uncompiled_shader *shader_state;
47 struct {
48 enum pipe_format format;
49 unsigned compare_mode:1;
50 unsigned compare_func:3;
51 unsigned wrap_s:3;
52 unsigned wrap_t:3;
53 uint8_t swizzle[4];
54 } tex[VC4_MAX_TEXTURE_SAMPLERS];
55 uint8_t ucp_enables;
56 };
57
58 struct vc4_fs_key {
59 struct vc4_key base;
60 enum pipe_format color_format;
61 bool depth_enabled;
62 bool stencil_enabled;
63 bool stencil_twoside;
64 bool stencil_full_writemasks;
65 bool is_points;
66 bool is_lines;
67 bool alpha_test;
68 bool point_coord_upper_left;
69 bool light_twoside;
70 uint8_t alpha_test_func;
71 uint8_t logicop_func;
72 uint32_t point_sprite_mask;
73
74 struct pipe_rt_blend_state blend;
75 };
76
77 struct vc4_vs_key {
78 struct vc4_key base;
79
80 /**
81 * This is a proxy for the array of FS input semantics, which is
82 * larger than we would want to put in the key.
83 */
84 uint64_t compiled_fs_id;
85
86 enum pipe_format attr_formats[8];
87 bool is_coord;
88 bool per_vertex_point_size;
89 };
90
91 static void
92 resize_qreg_array(struct vc4_compile *c,
93 struct qreg **regs,
94 uint32_t *size,
95 uint32_t decl_size)
96 {
97 if (*size >= decl_size)
98 return;
99
100 uint32_t old_size = *size;
101 *size = MAX2(*size * 2, decl_size);
102 *regs = reralloc(c, *regs, struct qreg, *size);
103 if (!*regs) {
104 fprintf(stderr, "Malloc failure\n");
105 abort();
106 }
107
108 for (uint32_t i = old_size; i < *size; i++)
109 (*regs)[i] = c->undef;
110 }
111
112 static struct qreg
113 add_uniform(struct vc4_compile *c,
114 enum quniform_contents contents,
115 uint32_t data)
116 {
117 for (int i = 0; i < c->num_uniforms; i++) {
118 if (c->uniform_contents[i] == contents &&
119 c->uniform_data[i] == data) {
120 return (struct qreg) { QFILE_UNIF, i };
121 }
122 }
123
124 uint32_t uniform = c->num_uniforms++;
125 struct qreg u = { QFILE_UNIF, uniform };
126
127 if (uniform >= c->uniform_array_size) {
128 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
129 c->uniform_array_size * 2);
130
131 c->uniform_data = reralloc(c, c->uniform_data,
132 uint32_t,
133 c->uniform_array_size);
134 c->uniform_contents = reralloc(c, c->uniform_contents,
135 enum quniform_contents,
136 c->uniform_array_size);
137 }
138
139 c->uniform_contents[uniform] = contents;
140 c->uniform_data[uniform] = data;
141
142 return u;
143 }
144
145 static struct qreg
146 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
147 uint32_t data)
148 {
149 struct qreg u = add_uniform(c, contents, data);
150 struct qreg t = qir_MOV(c, u);
151 return t;
152 }
153
154 static struct qreg
155 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
156 {
157 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
158 }
159
160 static struct qreg
161 qir_uniform_f(struct vc4_compile *c, float f)
162 {
163 return qir_uniform_ui(c, fui(f));
164 }
165
166 static struct qreg
167 indirect_uniform_load(struct vc4_compile *c,
168 struct tgsi_full_src_register *src, int swiz)
169 {
170 struct tgsi_ind_register *indirect = &src->Indirect;
171 struct vc4_compiler_ubo_range *range = &c->ubo_ranges[indirect->ArrayID];
172 if (!range->used) {
173 range->used = true;
174 range->dst_offset = c->next_ubo_dst_offset;
175 c->next_ubo_dst_offset += range->size;
176 c->num_ubo_ranges++;
177 };
178
179 assert(src->Register.Indirect);
180 assert(indirect->File == TGSI_FILE_ADDRESS);
181
182 struct qreg addr_val = c->addr[indirect->Swizzle];
183 struct qreg indirect_offset =
184 qir_ADD(c, addr_val, qir_uniform_ui(c,
185 range->dst_offset +
186 (src->Register.Index * 16)+
187 swiz * 4));
188 indirect_offset = qir_MIN(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset +
189 range->size - 4)));
190
191 qir_TEX_DIRECT(c, indirect_offset, add_uniform(c, QUNIFORM_UBO_ADDR, 0));
192 struct qreg r4 = qir_TEX_RESULT(c);
193 c->num_texture_samples++;
194 return qir_MOV(c, r4);
195 }
196
197 static struct qreg
198 get_src(struct vc4_compile *c, unsigned tgsi_op,
199 struct tgsi_full_src_register *full_src, int i)
200 {
201 struct tgsi_src_register *src = &full_src->Register;
202 struct qreg r = c->undef;
203
204 uint32_t s = i;
205 switch (i) {
206 case TGSI_SWIZZLE_X:
207 s = src->SwizzleX;
208 break;
209 case TGSI_SWIZZLE_Y:
210 s = src->SwizzleY;
211 break;
212 case TGSI_SWIZZLE_Z:
213 s = src->SwizzleZ;
214 break;
215 case TGSI_SWIZZLE_W:
216 s = src->SwizzleW;
217 break;
218 default:
219 abort();
220 }
221
222 switch (src->File) {
223 case TGSI_FILE_NULL:
224 return r;
225 case TGSI_FILE_TEMPORARY:
226 r = c->temps[src->Index * 4 + s];
227 break;
228 case TGSI_FILE_IMMEDIATE:
229 r = c->consts[src->Index * 4 + s];
230 break;
231 case TGSI_FILE_CONSTANT:
232 if (src->Indirect) {
233 r = indirect_uniform_load(c, full_src, s);
234 } else {
235 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
236 src->Index * 4 + s);
237 }
238 break;
239 case TGSI_FILE_INPUT:
240 r = c->inputs[src->Index * 4 + s];
241 break;
242 case TGSI_FILE_SAMPLER:
243 case TGSI_FILE_SAMPLER_VIEW:
244 r = c->undef;
245 break;
246 default:
247 fprintf(stderr, "unknown src file %d\n", src->File);
248 abort();
249 }
250
251 if (src->Absolute)
252 r = qir_FMAXABS(c, r, r);
253
254 if (src->Negate) {
255 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
256 case TGSI_TYPE_SIGNED:
257 case TGSI_TYPE_UNSIGNED:
258 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
259 break;
260 default:
261 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
262 break;
263 }
264 }
265
266 return r;
267 };
268
269
270 static void
271 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
272 int i, struct qreg val)
273 {
274 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
275
276 assert(!tgsi_dst->Indirect);
277
278 switch (tgsi_dst->File) {
279 case TGSI_FILE_TEMPORARY:
280 c->temps[tgsi_dst->Index * 4 + i] = val;
281 break;
282 case TGSI_FILE_OUTPUT:
283 c->outputs[tgsi_dst->Index * 4 + i] = val;
284 c->num_outputs = MAX2(c->num_outputs,
285 tgsi_dst->Index * 4 + i + 1);
286 break;
287 case TGSI_FILE_ADDRESS:
288 assert(tgsi_dst->Index == 0);
289 c->addr[i] = val;
290 break;
291 default:
292 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
293 abort();
294 }
295 };
296
297 static struct qreg
298 get_swizzled_channel(struct vc4_compile *c,
299 struct qreg *srcs, int swiz)
300 {
301 switch (swiz) {
302 default:
303 case UTIL_FORMAT_SWIZZLE_NONE:
304 fprintf(stderr, "warning: unknown swizzle\n");
305 /* FALLTHROUGH */
306 case UTIL_FORMAT_SWIZZLE_0:
307 return qir_uniform_f(c, 0.0);
308 case UTIL_FORMAT_SWIZZLE_1:
309 return qir_uniform_f(c, 1.0);
310 case UTIL_FORMAT_SWIZZLE_X:
311 case UTIL_FORMAT_SWIZZLE_Y:
312 case UTIL_FORMAT_SWIZZLE_Z:
313 case UTIL_FORMAT_SWIZZLE_W:
314 return srcs[swiz];
315 }
316 }
317
318 static struct qreg
319 tgsi_to_qir_alu(struct vc4_compile *c,
320 struct tgsi_full_instruction *tgsi_inst,
321 enum qop op, struct qreg *src, int i)
322 {
323 struct qreg dst = qir_get_temp(c);
324 qir_emit(c, qir_inst4(op, dst,
325 src[0 * 4 + i],
326 src[1 * 4 + i],
327 src[2 * 4 + i],
328 c->undef));
329 return dst;
330 }
331
332 static struct qreg
333 tgsi_to_qir_scalar(struct vc4_compile *c,
334 struct tgsi_full_instruction *tgsi_inst,
335 enum qop op, struct qreg *src, int i)
336 {
337 struct qreg dst = qir_get_temp(c);
338 qir_emit(c, qir_inst(op, dst,
339 src[0 * 4 + 0],
340 c->undef));
341 return dst;
342 }
343
344 static struct qreg
345 tgsi_to_qir_rcp(struct vc4_compile *c,
346 struct tgsi_full_instruction *tgsi_inst,
347 enum qop op, struct qreg *src, int i)
348 {
349 struct qreg x = src[0 * 4 + 0];
350 struct qreg r = qir_RCP(c, x);
351
352 /* Apply a Newton-Raphson step to improve the accuracy. */
353 r = qir_FMUL(c, r, qir_FSUB(c,
354 qir_uniform_f(c, 2.0),
355 qir_FMUL(c, x, r)));
356
357 return r;
358 }
359
360 static struct qreg
361 tgsi_to_qir_rsq(struct vc4_compile *c,
362 struct tgsi_full_instruction *tgsi_inst,
363 enum qop op, struct qreg *src, int i)
364 {
365 struct qreg x = src[0 * 4 + 0];
366 struct qreg r = qir_RSQ(c, x);
367
368 /* Apply a Newton-Raphson step to improve the accuracy. */
369 r = qir_FMUL(c, r, qir_FSUB(c,
370 qir_uniform_f(c, 1.5),
371 qir_FMUL(c,
372 qir_uniform_f(c, 0.5),
373 qir_FMUL(c, x,
374 qir_FMUL(c, r, r)))));
375
376 return r;
377 }
378
379 static struct qreg
380 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
381 {
382 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
383 struct qreg high = qir_POW(c,
384 qir_FMUL(c,
385 qir_FADD(c,
386 srgb,
387 qir_uniform_f(c, 0.055)),
388 qir_uniform_f(c, 1.0 / 1.055)),
389 qir_uniform_f(c, 2.4));
390
391 qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045)));
392 return qir_SEL_X_Y_NS(c, low, high);
393 }
394
395 static struct qreg
396 qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
397 {
398 struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
399 struct qreg high = qir_FSUB(c,
400 qir_FMUL(c,
401 qir_uniform_f(c, 1.055),
402 qir_POW(c,
403 linear,
404 qir_uniform_f(c, 0.41666))),
405 qir_uniform_f(c, 0.055));
406
407 qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
408 return qir_SEL_X_Y_NS(c, low, high);
409 }
410
411 static struct qreg
412 tgsi_to_qir_umul(struct vc4_compile *c,
413 struct tgsi_full_instruction *tgsi_inst,
414 enum qop op, struct qreg *src, int i)
415 {
416 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
417 qir_uniform_ui(c, 16));
418 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
419 qir_uniform_ui(c, 0xffff));
420 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
421 qir_uniform_ui(c, 16));
422 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
423 qir_uniform_ui(c, 0xffff));
424
425 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
426 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
427 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
428
429 return qir_ADD(c, lolo, qir_SHL(c,
430 qir_ADD(c, hilo, lohi),
431 qir_uniform_ui(c, 16)));
432 }
433
434 static struct qreg
435 tgsi_to_qir_idiv(struct vc4_compile *c,
436 struct tgsi_full_instruction *tgsi_inst,
437 enum qop op, struct qreg *src, int i)
438 {
439 return qir_FTOI(c, qir_FMUL(c,
440 qir_ITOF(c, src[0 * 4 + i]),
441 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
442 }
443
444 static struct qreg
445 tgsi_to_qir_ineg(struct vc4_compile *c,
446 struct tgsi_full_instruction *tgsi_inst,
447 enum qop op, struct qreg *src, int i)
448 {
449 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
450 }
451
452 static struct qreg
453 tgsi_to_qir_seq(struct vc4_compile *c,
454 struct tgsi_full_instruction *tgsi_inst,
455 enum qop op, struct qreg *src, int i)
456 {
457 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
458 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
459 }
460
461 static struct qreg
462 tgsi_to_qir_sne(struct vc4_compile *c,
463 struct tgsi_full_instruction *tgsi_inst,
464 enum qop op, struct qreg *src, int i)
465 {
466 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
467 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
468 }
469
470 static struct qreg
471 tgsi_to_qir_slt(struct vc4_compile *c,
472 struct tgsi_full_instruction *tgsi_inst,
473 enum qop op, struct qreg *src, int i)
474 {
475 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
476 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
477 }
478
479 static struct qreg
480 tgsi_to_qir_sge(struct vc4_compile *c,
481 struct tgsi_full_instruction *tgsi_inst,
482 enum qop op, struct qreg *src, int i)
483 {
484 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
485 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
486 }
487
488 static struct qreg
489 tgsi_to_qir_fseq(struct vc4_compile *c,
490 struct tgsi_full_instruction *tgsi_inst,
491 enum qop op, struct qreg *src, int i)
492 {
493 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
494 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
495 }
496
497 static struct qreg
498 tgsi_to_qir_fsne(struct vc4_compile *c,
499 struct tgsi_full_instruction *tgsi_inst,
500 enum qop op, struct qreg *src, int i)
501 {
502 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
503 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
504 }
505
506 static struct qreg
507 tgsi_to_qir_fslt(struct vc4_compile *c,
508 struct tgsi_full_instruction *tgsi_inst,
509 enum qop op, struct qreg *src, int i)
510 {
511 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
512 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
513 }
514
515 static struct qreg
516 tgsi_to_qir_fsge(struct vc4_compile *c,
517 struct tgsi_full_instruction *tgsi_inst,
518 enum qop op, struct qreg *src, int i)
519 {
520 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
521 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
522 }
523
524 static struct qreg
525 tgsi_to_qir_useq(struct vc4_compile *c,
526 struct tgsi_full_instruction *tgsi_inst,
527 enum qop op, struct qreg *src, int i)
528 {
529 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
530 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
531 }
532
533 static struct qreg
534 tgsi_to_qir_usne(struct vc4_compile *c,
535 struct tgsi_full_instruction *tgsi_inst,
536 enum qop op, struct qreg *src, int i)
537 {
538 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
539 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
540 }
541
542 static struct qreg
543 tgsi_to_qir_islt(struct vc4_compile *c,
544 struct tgsi_full_instruction *tgsi_inst,
545 enum qop op, struct qreg *src, int i)
546 {
547 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
548 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
549 }
550
551 static struct qreg
552 tgsi_to_qir_isge(struct vc4_compile *c,
553 struct tgsi_full_instruction *tgsi_inst,
554 enum qop op, struct qreg *src, int i)
555 {
556 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
557 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
558 }
559
560 static struct qreg
561 tgsi_to_qir_cmp(struct vc4_compile *c,
562 struct tgsi_full_instruction *tgsi_inst,
563 enum qop op, struct qreg *src, int i)
564 {
565 qir_SF(c, src[0 * 4 + i]);
566 return qir_SEL_X_Y_NS(c,
567 src[1 * 4 + i],
568 src[2 * 4 + i]);
569 }
570
571 static struct qreg
572 tgsi_to_qir_ucmp(struct vc4_compile *c,
573 struct tgsi_full_instruction *tgsi_inst,
574 enum qop op, struct qreg *src, int i)
575 {
576 qir_SF(c, src[0 * 4 + i]);
577 return qir_SEL_X_Y_ZC(c,
578 src[1 * 4 + i],
579 src[2 * 4 + i]);
580 }
581
582 static struct qreg
583 tgsi_to_qir_mad(struct vc4_compile *c,
584 struct tgsi_full_instruction *tgsi_inst,
585 enum qop op, struct qreg *src, int i)
586 {
587 return qir_FADD(c,
588 qir_FMUL(c,
589 src[0 * 4 + i],
590 src[1 * 4 + i]),
591 src[2 * 4 + i]);
592 }
593
594 static struct qreg
595 tgsi_to_qir_lrp(struct vc4_compile *c,
596 struct tgsi_full_instruction *tgsi_inst,
597 enum qop op, struct qreg *src, int i)
598 {
599 struct qreg src0 = src[0 * 4 + i];
600 struct qreg src1 = src[1 * 4 + i];
601 struct qreg src2 = src[2 * 4 + i];
602
603 /* LRP is:
604 * src0 * src1 + (1 - src0) * src2.
605 * -> src0 * src1 + src2 - src0 * src2
606 * -> src2 + src0 * (src1 - src2)
607 */
608 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
609
610 }
611
612 static void
613 tgsi_to_qir_tex(struct vc4_compile *c,
614 struct tgsi_full_instruction *tgsi_inst,
615 enum qop op, struct qreg *src)
616 {
617 assert(!tgsi_inst->Instruction.Saturate);
618
619 struct qreg s = src[0 * 4 + 0];
620 struct qreg t = src[0 * 4 + 1];
621 struct qreg r = src[0 * 4 + 2];
622 uint32_t unit = tgsi_inst->Src[1].Register.Index;
623 bool is_txl = tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL;
624
625 struct qreg proj = c->undef;
626 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
627 proj = qir_RCP(c, src[0 * 4 + 3]);
628 s = qir_FMUL(c, s, proj);
629 t = qir_FMUL(c, t, proj);
630 }
631
632 struct qreg texture_u[] = {
633 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
634 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
635 add_uniform(c, QUNIFORM_CONSTANT, 0),
636 add_uniform(c, QUNIFORM_CONSTANT, 0),
637 };
638 uint32_t next_texture_u = 0;
639
640 /* There is no native support for GL texture rectangle coordinates, so
641 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
642 * 1]).
643 */
644 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
645 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
646 s = qir_FMUL(c, s,
647 get_temp_for_uniform(c,
648 QUNIFORM_TEXRECT_SCALE_X,
649 unit));
650 t = qir_FMUL(c, t,
651 get_temp_for_uniform(c,
652 QUNIFORM_TEXRECT_SCALE_Y,
653 unit));
654 }
655
656 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
657 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
658 is_txl) {
659 texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2,
660 unit | (is_txl << 16));
661 }
662
663 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
664 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
665 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
666 struct qreg rcp_ma = qir_RCP(c, ma);
667 s = qir_FMUL(c, s, rcp_ma);
668 t = qir_FMUL(c, t, rcp_ma);
669 r = qir_FMUL(c, r, rcp_ma);
670
671 qir_TEX_R(c, r, texture_u[next_texture_u++]);
672 } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
673 c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
674 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
675 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
676 qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
677 texture_u[next_texture_u++]);
678 }
679
680 if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
681 s = qir_FMIN(c, qir_FMAX(c, s, qir_uniform_f(c, 0.0)),
682 qir_uniform_f(c, 1.0));
683 }
684
685 if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
686 t = qir_FMIN(c, qir_FMAX(c, t, qir_uniform_f(c, 0.0)),
687 qir_uniform_f(c, 1.0));
688 }
689
690 qir_TEX_T(c, t, texture_u[next_texture_u++]);
691
692 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
693 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL)
694 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
695
696 qir_TEX_S(c, s, texture_u[next_texture_u++]);
697
698 c->num_texture_samples++;
699 struct qreg r4 = qir_TEX_RESULT(c);
700
701 enum pipe_format format = c->key->tex[unit].format;
702
703 struct qreg unpacked[4];
704 if (util_format_is_depth_or_stencil(format)) {
705 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
706 qir_uniform_ui(c, 8)));
707 struct qreg normalized = qir_FMUL(c, depthf,
708 qir_uniform_f(c, 1.0f/0xffffff));
709
710 struct qreg depth_output;
711
712 struct qreg one = qir_uniform_f(c, 1.0f);
713 if (c->key->tex[unit].compare_mode) {
714 struct qreg compare = src[0 * 4 + 2];
715
716 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
717 compare = qir_FMUL(c, compare, proj);
718
719 switch (c->key->tex[unit].compare_func) {
720 case PIPE_FUNC_NEVER:
721 depth_output = qir_uniform_f(c, 0.0f);
722 break;
723 case PIPE_FUNC_ALWAYS:
724 depth_output = one;
725 break;
726 case PIPE_FUNC_EQUAL:
727 qir_SF(c, qir_FSUB(c, compare, normalized));
728 depth_output = qir_SEL_X_0_ZS(c, one);
729 break;
730 case PIPE_FUNC_NOTEQUAL:
731 qir_SF(c, qir_FSUB(c, compare, normalized));
732 depth_output = qir_SEL_X_0_ZC(c, one);
733 break;
734 case PIPE_FUNC_GREATER:
735 qir_SF(c, qir_FSUB(c, compare, normalized));
736 depth_output = qir_SEL_X_0_NC(c, one);
737 break;
738 case PIPE_FUNC_GEQUAL:
739 qir_SF(c, qir_FSUB(c, normalized, compare));
740 depth_output = qir_SEL_X_0_NS(c, one);
741 break;
742 case PIPE_FUNC_LESS:
743 qir_SF(c, qir_FSUB(c, compare, normalized));
744 depth_output = qir_SEL_X_0_NS(c, one);
745 break;
746 case PIPE_FUNC_LEQUAL:
747 qir_SF(c, qir_FSUB(c, normalized, compare));
748 depth_output = qir_SEL_X_0_NC(c, one);
749 break;
750 }
751 } else {
752 depth_output = normalized;
753 }
754
755 for (int i = 0; i < 4; i++)
756 unpacked[i] = depth_output;
757 } else {
758 for (int i = 0; i < 4; i++)
759 unpacked[i] = qir_R4_UNPACK(c, r4, i);
760 }
761
762 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
763 struct qreg texture_output[4];
764 for (int i = 0; i < 4; i++) {
765 texture_output[i] = get_swizzled_channel(c, unpacked,
766 format_swiz[i]);
767 }
768
769 if (util_format_is_srgb(format)) {
770 for (int i = 0; i < 3; i++)
771 texture_output[i] = qir_srgb_decode(c,
772 texture_output[i]);
773 }
774
775 for (int i = 0; i < 4; i++) {
776 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
777 continue;
778
779 update_dst(c, tgsi_inst, i,
780 get_swizzled_channel(c, texture_output,
781 c->key->tex[unit].swizzle[i]));
782 }
783 }
784
785 static struct qreg
786 tgsi_to_qir_trunc(struct vc4_compile *c,
787 struct tgsi_full_instruction *tgsi_inst,
788 enum qop op, struct qreg *src, int i)
789 {
790 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
791 }
792
793 /**
794 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
795 * to zero).
796 */
797 static struct qreg
798 tgsi_to_qir_frc(struct vc4_compile *c,
799 struct tgsi_full_instruction *tgsi_inst,
800 enum qop op, struct qreg *src, int i)
801 {
802 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
803 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
804 qir_SF(c, diff);
805 return qir_SEL_X_Y_NS(c,
806 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
807 diff);
808 }
809
810 /**
811 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
812 * zero).
813 */
814 static struct qreg
815 tgsi_to_qir_flr(struct vc4_compile *c,
816 struct tgsi_full_instruction *tgsi_inst,
817 enum qop op, struct qreg *src, int i)
818 {
819 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
820
821 /* This will be < 0 if we truncated and the truncation was of a value
822 * that was < 0 in the first place.
823 */
824 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
825
826 return qir_SEL_X_Y_NS(c,
827 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
828 trunc);
829 }
830
831 /**
832 * Computes ceil(x), which is tricky because our FTOI truncates (rounds to
833 * zero).
834 */
835 static struct qreg
836 tgsi_to_qir_ceil(struct vc4_compile *c,
837 struct tgsi_full_instruction *tgsi_inst,
838 enum qop op, struct qreg *src, int i)
839 {
840 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
841
842 /* This will be < 0 if we truncated and the truncation was of a value
843 * that was > 0 in the first place.
844 */
845 qir_SF(c, qir_FSUB(c, trunc, src[0 * 4 + i]));
846
847 return qir_SEL_X_Y_NS(c,
848 qir_FADD(c, trunc, qir_uniform_f(c, 1.0)),
849 trunc);
850 }
851
852 static struct qreg
853 tgsi_to_qir_abs(struct vc4_compile *c,
854 struct tgsi_full_instruction *tgsi_inst,
855 enum qop op, struct qreg *src, int i)
856 {
857 struct qreg arg = src[0 * 4 + i];
858 return qir_FMAXABS(c, arg, arg);
859 }
860
861 /* Note that this instruction replicates its result from the x channel */
862 static struct qreg
863 tgsi_to_qir_sin(struct vc4_compile *c,
864 struct tgsi_full_instruction *tgsi_inst,
865 enum qop op, struct qreg *src, int i)
866 {
867 float coeff[] = {
868 -2.0 * M_PI,
869 pow(2.0 * M_PI, 3) / (3 * 2 * 1),
870 -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
871 pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
872 -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
873 };
874
875 struct qreg scaled_x =
876 qir_FMUL(c,
877 src[0 * 4 + 0],
878 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
879
880 struct qreg x = qir_FADD(c,
881 tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0),
882 qir_uniform_f(c, -0.5));
883 struct qreg x2 = qir_FMUL(c, x, x);
884 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
885 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
886 x = qir_FMUL(c, x, x2);
887 sum = qir_FADD(c,
888 sum,
889 qir_FMUL(c,
890 x,
891 qir_uniform_f(c, coeff[i])));
892 }
893 return sum;
894 }
895
896 /* Note that this instruction replicates its result from the x channel */
897 static struct qreg
898 tgsi_to_qir_cos(struct vc4_compile *c,
899 struct tgsi_full_instruction *tgsi_inst,
900 enum qop op, struct qreg *src, int i)
901 {
902 float coeff[] = {
903 -1.0f,
904 pow(2.0 * M_PI, 2) / (2 * 1),
905 -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
906 pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
907 -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
908 pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
909 };
910
911 struct qreg scaled_x =
912 qir_FMUL(c, src[0 * 4 + 0],
913 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
914 struct qreg x_frac = qir_FADD(c,
915 tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0),
916 qir_uniform_f(c, -0.5));
917
918 struct qreg sum = qir_uniform_f(c, coeff[0]);
919 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
920 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
921 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
922 if (i != 1)
923 x = qir_FMUL(c, x, x2);
924
925 struct qreg mul = qir_FMUL(c,
926 x,
927 qir_uniform_f(c, coeff[i]));
928 if (i == 0)
929 sum = mul;
930 else
931 sum = qir_FADD(c, sum, mul);
932 }
933 return sum;
934 }
935
936 static struct qreg
937 tgsi_to_qir_clamp(struct vc4_compile *c,
938 struct tgsi_full_instruction *tgsi_inst,
939 enum qop op, struct qreg *src, int i)
940 {
941 return qir_FMAX(c, qir_FMIN(c,
942 src[0 * 4 + i],
943 src[2 * 4 + i]),
944 src[1 * 4 + i]);
945 }
946
947 static struct qreg
948 tgsi_to_qir_ssg(struct vc4_compile *c,
949 struct tgsi_full_instruction *tgsi_inst,
950 enum qop op, struct qreg *src, int i)
951 {
952 qir_SF(c, src[0 * 4 + i]);
953 return qir_SEL_X_Y_NC(c,
954 qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)),
955 qir_uniform_f(c, -1.0));
956 }
957
958 /* Compare to tgsi_to_qir_flr() for the floor logic. */
959 static struct qreg
960 tgsi_to_qir_arl(struct vc4_compile *c,
961 struct tgsi_full_instruction *tgsi_inst,
962 enum qop op, struct qreg *src, int i)
963 {
964 struct qreg trunc = qir_FTOI(c, src[0 * 4 + i]);
965 struct qreg scaled = qir_SHL(c, trunc, qir_uniform_ui(c, 4));
966
967 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], qir_ITOF(c, trunc)));
968
969 return qir_SEL_X_Y_NS(c, qir_SUB(c, scaled, qir_uniform_ui(c, 4)),
970 scaled);
971 }
972
973 static struct qreg
974 tgsi_to_qir_uarl(struct vc4_compile *c,
975 struct tgsi_full_instruction *tgsi_inst,
976 enum qop op, struct qreg *src, int i)
977 {
978 return qir_SHL(c, src[0 * 4 + i], qir_uniform_ui(c, 4));
979 }
980
981 static void
982 emit_vertex_input(struct vc4_compile *c, int attr)
983 {
984 enum pipe_format format = c->vs_key->attr_formats[attr];
985 struct qreg vpm_reads[4];
986
987 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
988 * time, so we always read 4 32-bit VPM entries.
989 */
990 for (int i = 0; i < 4; i++) {
991 vpm_reads[i] = qir_get_temp(c);
992 qir_emit(c, qir_inst(QOP_VPM_READ,
993 vpm_reads[i],
994 c->undef,
995 c->undef));
996 c->num_inputs++;
997 }
998
999 bool format_warned = false;
1000 const struct util_format_description *desc =
1001 util_format_description(format);
1002
1003 for (int i = 0; i < 4; i++) {
1004 uint8_t swiz = desc->swizzle[i];
1005 struct qreg result;
1006
1007 if (swiz > UTIL_FORMAT_SWIZZLE_W)
1008 result = get_swizzled_channel(c, vpm_reads, swiz);
1009 else if (desc->channel[swiz].size == 32 &&
1010 desc->channel[swiz].type == UTIL_FORMAT_TYPE_FLOAT) {
1011 result = get_swizzled_channel(c, vpm_reads, swiz);
1012 } else if (desc->channel[swiz].size == 8 &&
1013 (desc->channel[swiz].type == UTIL_FORMAT_TYPE_UNSIGNED ||
1014 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) &&
1015 desc->channel[swiz].normalized) {
1016 struct qreg vpm = vpm_reads[0];
1017 if (desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED)
1018 vpm = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
1019 result = qir_UNPACK_8(c, vpm, swiz);
1020 } else {
1021 if (!format_warned) {
1022 fprintf(stderr,
1023 "vtx element %d unsupported type: %s\n",
1024 attr, util_format_name(format));
1025 format_warned = true;
1026 }
1027 result = qir_uniform_f(c, 0.0);
1028 }
1029
1030 if (desc->channel[swiz].normalized &&
1031 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) {
1032 result = qir_FSUB(c,
1033 qir_FMUL(c,
1034 result,
1035 qir_uniform_f(c, 2.0)),
1036 qir_uniform_f(c, 1.0));
1037 }
1038
1039 c->inputs[attr * 4 + i] = result;
1040 }
1041 }
1042
1043 static void
1044 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
1045 {
1046 if (c->discard.file == QFILE_NULL)
1047 c->discard = qir_uniform_f(c, 0.0);
1048 qir_SF(c, src[0 * 4 + i]);
1049 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
1050 c->discard);
1051 }
1052
1053 static void
1054 emit_fragcoord_input(struct vc4_compile *c, int attr)
1055 {
1056 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
1057 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
1058 c->inputs[attr * 4 + 2] =
1059 qir_FMUL(c,
1060 qir_ITOF(c, qir_FRAG_Z(c)),
1061 qir_uniform_f(c, 1.0 / 0xffffff));
1062 c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
1063 }
1064
1065 static void
1066 emit_point_coord_input(struct vc4_compile *c, int attr)
1067 {
1068 if (c->point_x.file == QFILE_NULL) {
1069 c->point_x = qir_uniform_f(c, 0.0);
1070 c->point_y = qir_uniform_f(c, 0.0);
1071 }
1072
1073 c->inputs[attr * 4 + 0] = c->point_x;
1074 if (c->fs_key->point_coord_upper_left) {
1075 c->inputs[attr * 4 + 1] = qir_FSUB(c,
1076 qir_uniform_f(c, 1.0),
1077 c->point_y);
1078 } else {
1079 c->inputs[attr * 4 + 1] = c->point_y;
1080 }
1081 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
1082 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
1083 }
1084
1085 static struct qreg
1086 emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
1087 uint8_t index, uint8_t swizzle)
1088 {
1089 uint32_t i = c->num_input_semantics++;
1090 struct qreg vary = {
1091 QFILE_VARY,
1092 i
1093 };
1094
1095 if (c->num_input_semantics >= c->input_semantics_array_size) {
1096 c->input_semantics_array_size =
1097 MAX2(4, c->input_semantics_array_size * 2);
1098
1099 c->input_semantics = reralloc(c, c->input_semantics,
1100 struct vc4_varying_semantic,
1101 c->input_semantics_array_size);
1102 }
1103
1104 c->input_semantics[i].semantic = semantic;
1105 c->input_semantics[i].index = index;
1106 c->input_semantics[i].swizzle = swizzle;
1107
1108 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
1109 }
1110
1111 static void
1112 emit_fragment_input(struct vc4_compile *c, int attr,
1113 struct tgsi_full_declaration *decl)
1114 {
1115 for (int i = 0; i < 4; i++) {
1116 c->inputs[attr * 4 + i] =
1117 emit_fragment_varying(c,
1118 decl->Semantic.Name,
1119 decl->Semantic.Index,
1120 i);
1121 c->num_inputs++;
1122 }
1123 }
1124
1125 static void
1126 emit_face_input(struct vc4_compile *c, int attr)
1127 {
1128 c->inputs[attr * 4 + 0] = qir_FSUB(c,
1129 qir_uniform_f(c, 1.0),
1130 qir_FMUL(c,
1131 qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
1132 qir_uniform_f(c, 2.0)));
1133 c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
1134 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
1135 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
1136 }
1137
1138 static void
1139 add_output(struct vc4_compile *c,
1140 uint32_t decl_offset,
1141 uint8_t semantic_name,
1142 uint8_t semantic_index,
1143 uint8_t semantic_swizzle)
1144 {
1145 uint32_t old_array_size = c->outputs_array_size;
1146 resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
1147 decl_offset + 1);
1148
1149 if (old_array_size != c->outputs_array_size) {
1150 c->output_semantics = reralloc(c,
1151 c->output_semantics,
1152 struct vc4_varying_semantic,
1153 c->outputs_array_size);
1154 }
1155
1156 c->output_semantics[decl_offset].semantic = semantic_name;
1157 c->output_semantics[decl_offset].index = semantic_index;
1158 c->output_semantics[decl_offset].swizzle = semantic_swizzle;
1159 }
1160
1161 static void
1162 add_array_info(struct vc4_compile *c, uint32_t array_id,
1163 uint32_t start, uint32_t size)
1164 {
1165 if (array_id >= c->ubo_ranges_array_size) {
1166 c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2,
1167 array_id + 1);
1168 c->ubo_ranges = reralloc(c, c->ubo_ranges,
1169 struct vc4_compiler_ubo_range,
1170 c->ubo_ranges_array_size);
1171 }
1172
1173 c->ubo_ranges[array_id].dst_offset = 0;
1174 c->ubo_ranges[array_id].src_offset = start;
1175 c->ubo_ranges[array_id].size = size;
1176 c->ubo_ranges[array_id].used = false;
1177 }
1178
1179 static void
1180 emit_tgsi_declaration(struct vc4_compile *c,
1181 struct tgsi_full_declaration *decl)
1182 {
1183 switch (decl->Declaration.File) {
1184 case TGSI_FILE_TEMPORARY: {
1185 uint32_t old_size = c->temps_array_size;
1186 resize_qreg_array(c, &c->temps, &c->temps_array_size,
1187 (decl->Range.Last + 1) * 4);
1188
1189 for (int i = old_size; i < c->temps_array_size; i++)
1190 c->temps[i] = qir_uniform_ui(c, 0);
1191 break;
1192 }
1193
1194 case TGSI_FILE_INPUT:
1195 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
1196 (decl->Range.Last + 1) * 4);
1197
1198 for (int i = decl->Range.First;
1199 i <= decl->Range.Last;
1200 i++) {
1201 if (c->stage == QSTAGE_FRAG) {
1202 if (decl->Semantic.Name ==
1203 TGSI_SEMANTIC_POSITION) {
1204 emit_fragcoord_input(c, i);
1205 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1206 emit_face_input(c, i);
1207 } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
1208 (c->fs_key->point_sprite_mask &
1209 (1 << decl->Semantic.Index))) {
1210 emit_point_coord_input(c, i);
1211 } else {
1212 emit_fragment_input(c, i, decl);
1213 }
1214 } else {
1215 emit_vertex_input(c, i);
1216 }
1217 }
1218 break;
1219
1220 case TGSI_FILE_OUTPUT: {
1221 for (int i = 0; i < 4; i++) {
1222 add_output(c,
1223 decl->Range.First * 4 + i,
1224 decl->Semantic.Name,
1225 decl->Semantic.Index,
1226 i);
1227 }
1228
1229 switch (decl->Semantic.Name) {
1230 case TGSI_SEMANTIC_POSITION:
1231 c->output_position_index = decl->Range.First * 4;
1232 break;
1233 case TGSI_SEMANTIC_CLIPVERTEX:
1234 c->output_clipvertex_index = decl->Range.First * 4;
1235 break;
1236 case TGSI_SEMANTIC_COLOR:
1237 c->output_color_index = decl->Range.First * 4;
1238 break;
1239 case TGSI_SEMANTIC_PSIZE:
1240 c->output_point_size_index = decl->Range.First * 4;
1241 break;
1242 }
1243
1244 break;
1245
1246 case TGSI_FILE_CONSTANT:
1247 add_array_info(c,
1248 decl->Array.ArrayID,
1249 decl->Range.First * 16,
1250 (decl->Range.Last -
1251 decl->Range.First + 1) * 16);
1252 break;
1253 }
1254 }
1255 }
1256
1257 static void
1258 emit_tgsi_instruction(struct vc4_compile *c,
1259 struct tgsi_full_instruction *tgsi_inst)
1260 {
1261 static const struct {
1262 enum qop op;
1263 struct qreg (*func)(struct vc4_compile *c,
1264 struct tgsi_full_instruction *tgsi_inst,
1265 enum qop op,
1266 struct qreg *src, int i);
1267 } op_trans[] = {
1268 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
1269 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
1270 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
1271 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
1272 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
1273 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
1274 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
1275 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
1276 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
1277 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
1278 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
1279 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
1280 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
1281 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
1282 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
1283 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
1284 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
1285 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
1286 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
1287
1288 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
1289 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
1290 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
1291
1292 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
1293 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
1294 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
1295 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
1296 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
1297 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
1298 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
1299 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
1300 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
1301 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
1302 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
1303 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
1304
1305 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
1306 [TGSI_OPCODE_UCMP] = { 0, tgsi_to_qir_ucmp },
1307 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
1308 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
1309 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },
1310 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
1311 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
1312 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
1313 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
1314 [TGSI_OPCODE_CEIL] = { 0, tgsi_to_qir_ceil },
1315 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
1316 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
1317 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
1318 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
1319 [TGSI_OPCODE_CLAMP] = { 0, tgsi_to_qir_clamp },
1320 [TGSI_OPCODE_SSG] = { 0, tgsi_to_qir_ssg },
1321 [TGSI_OPCODE_ARL] = { 0, tgsi_to_qir_arl },
1322 [TGSI_OPCODE_UARL] = { 0, tgsi_to_qir_uarl },
1323 };
1324 static int asdf = 0;
1325 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
1326
1327 if (tgsi_op == TGSI_OPCODE_END)
1328 return;
1329
1330 struct qreg src_regs[12];
1331 for (int s = 0; s < 3; s++) {
1332 for (int i = 0; i < 4; i++) {
1333 src_regs[4 * s + i] =
1334 get_src(c, tgsi_inst->Instruction.Opcode,
1335 &tgsi_inst->Src[s], i);
1336 }
1337 }
1338
1339 switch (tgsi_op) {
1340 case TGSI_OPCODE_TEX:
1341 case TGSI_OPCODE_TXP:
1342 case TGSI_OPCODE_TXB:
1343 case TGSI_OPCODE_TXL:
1344 tgsi_to_qir_tex(c, tgsi_inst,
1345 op_trans[tgsi_op].op, src_regs);
1346 return;
1347 case TGSI_OPCODE_KILL:
1348 c->discard = qir_uniform_f(c, 1.0);
1349 return;
1350 case TGSI_OPCODE_KILL_IF:
1351 for (int i = 0; i < 4; i++)
1352 tgsi_to_qir_kill_if(c, src_regs, i);
1353 return;
1354 default:
1355 break;
1356 }
1357
1358 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
1359 fprintf(stderr, "unknown tgsi inst: ");
1360 tgsi_dump_instruction(tgsi_inst, asdf++);
1361 fprintf(stderr, "\n");
1362 abort();
1363 }
1364
1365 for (int i = 0; i < 4; i++) {
1366 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
1367 continue;
1368
1369 struct qreg result;
1370
1371 result = op_trans[tgsi_op].func(c, tgsi_inst,
1372 op_trans[tgsi_op].op,
1373 src_regs, i);
1374
1375 if (tgsi_inst->Instruction.Saturate) {
1376 float low = (tgsi_inst->Instruction.Saturate ==
1377 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
1378 result = qir_FMAX(c,
1379 qir_FMIN(c,
1380 result,
1381 qir_uniform_f(c, 1.0)),
1382 qir_uniform_f(c, low));
1383 }
1384
1385 update_dst(c, tgsi_inst, i, result);
1386 }
1387 }
1388
1389 static void
1390 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
1391 {
1392 for (int i = 0; i < 4; i++) {
1393 unsigned n = c->num_consts++;
1394 resize_qreg_array(c, &c->consts, &c->consts_array_size, n + 1);
1395 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1396 }
1397 }
1398
1399 static struct qreg
1400 vc4_blend_channel(struct vc4_compile *c,
1401 struct qreg *dst,
1402 struct qreg *src,
1403 struct qreg val,
1404 unsigned factor,
1405 int channel)
1406 {
1407 switch(factor) {
1408 case PIPE_BLENDFACTOR_ONE:
1409 return val;
1410 case PIPE_BLENDFACTOR_SRC_COLOR:
1411 return qir_FMUL(c, val, src[channel]);
1412 case PIPE_BLENDFACTOR_SRC_ALPHA:
1413 return qir_FMUL(c, val, src[3]);
1414 case PIPE_BLENDFACTOR_DST_ALPHA:
1415 return qir_FMUL(c, val, dst[3]);
1416 case PIPE_BLENDFACTOR_DST_COLOR:
1417 return qir_FMUL(c, val, dst[channel]);
1418 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1419 if (channel != 3) {
1420 return qir_FMUL(c,
1421 val,
1422 qir_FMIN(c,
1423 src[3],
1424 qir_FSUB(c,
1425 qir_uniform_f(c, 1.0),
1426 dst[3])));
1427 } else {
1428 return val;
1429 }
1430 case PIPE_BLENDFACTOR_CONST_COLOR:
1431 return qir_FMUL(c, val,
1432 get_temp_for_uniform(c,
1433 QUNIFORM_BLEND_CONST_COLOR,
1434 channel));
1435 case PIPE_BLENDFACTOR_CONST_ALPHA:
1436 return qir_FMUL(c, val,
1437 get_temp_for_uniform(c,
1438 QUNIFORM_BLEND_CONST_COLOR,
1439 3));
1440 case PIPE_BLENDFACTOR_ZERO:
1441 return qir_uniform_f(c, 0.0);
1442 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1443 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1444 src[channel]));
1445 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1446 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1447 src[3]));
1448 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1449 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1450 dst[3]));
1451 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1452 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1453 dst[channel]));
1454 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1455 return qir_FMUL(c, val,
1456 qir_FSUB(c, qir_uniform_f(c, 1.0),
1457 get_temp_for_uniform(c,
1458 QUNIFORM_BLEND_CONST_COLOR,
1459 channel)));
1460 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1461 return qir_FMUL(c, val,
1462 qir_FSUB(c, qir_uniform_f(c, 1.0),
1463 get_temp_for_uniform(c,
1464 QUNIFORM_BLEND_CONST_COLOR,
1465 3)));
1466
1467 default:
1468 case PIPE_BLENDFACTOR_SRC1_COLOR:
1469 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1470 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1471 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1472 /* Unsupported. */
1473 fprintf(stderr, "Unknown blend factor %d\n", factor);
1474 return val;
1475 }
1476 }
1477
1478 static struct qreg
1479 vc4_blend_func(struct vc4_compile *c,
1480 struct qreg src, struct qreg dst,
1481 unsigned func)
1482 {
1483 switch (func) {
1484 case PIPE_BLEND_ADD:
1485 return qir_FADD(c, src, dst);
1486 case PIPE_BLEND_SUBTRACT:
1487 return qir_FSUB(c, src, dst);
1488 case PIPE_BLEND_REVERSE_SUBTRACT:
1489 return qir_FSUB(c, dst, src);
1490 case PIPE_BLEND_MIN:
1491 return qir_FMIN(c, src, dst);
1492 case PIPE_BLEND_MAX:
1493 return qir_FMAX(c, src, dst);
1494
1495 default:
1496 /* Unsupported. */
1497 fprintf(stderr, "Unknown blend func %d\n", func);
1498 return src;
1499
1500 }
1501 }
1502
1503 /**
1504 * Implements fixed function blending in shader code.
1505 *
1506 * VC4 doesn't have any hardware support for blending. Instead, you read the
1507 * current contents of the destination from the tile buffer after having
1508 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1509 * math using your output color and that destination value, and update the
1510 * output color appropriately.
1511 */
1512 static void
1513 vc4_blend(struct vc4_compile *c, struct qreg *result,
1514 struct qreg *dst_color, struct qreg *src_color)
1515 {
1516 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1517
1518 if (!blend->blend_enable) {
1519 for (int i = 0; i < 4; i++)
1520 result[i] = src_color[i];
1521 return;
1522 }
1523
1524 struct qreg src_blend[4], dst_blend[4];
1525 for (int i = 0; i < 3; i++) {
1526 src_blend[i] = vc4_blend_channel(c,
1527 dst_color, src_color,
1528 src_color[i],
1529 blend->rgb_src_factor, i);
1530 dst_blend[i] = vc4_blend_channel(c,
1531 dst_color, src_color,
1532 dst_color[i],
1533 blend->rgb_dst_factor, i);
1534 }
1535 src_blend[3] = vc4_blend_channel(c,
1536 dst_color, src_color,
1537 src_color[3],
1538 blend->alpha_src_factor, 3);
1539 dst_blend[3] = vc4_blend_channel(c,
1540 dst_color, src_color,
1541 dst_color[3],
1542 blend->alpha_dst_factor, 3);
1543
1544 for (int i = 0; i < 3; i++) {
1545 result[i] = vc4_blend_func(c,
1546 src_blend[i], dst_blend[i],
1547 blend->rgb_func);
1548 }
1549 result[3] = vc4_blend_func(c,
1550 src_blend[3], dst_blend[3],
1551 blend->alpha_func);
1552 }
1553
1554 static void
1555 clip_distance_discard(struct vc4_compile *c)
1556 {
1557 for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) {
1558 if (!(c->key->ucp_enables & (1 << i)))
1559 continue;
1560
1561 struct qreg dist = emit_fragment_varying(c,
1562 TGSI_SEMANTIC_CLIPDIST,
1563 i,
1564 TGSI_SWIZZLE_X);
1565
1566 qir_SF(c, dist);
1567
1568 if (c->discard.file == QFILE_NULL)
1569 c->discard = qir_uniform_f(c, 0.0);
1570
1571 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
1572 c->discard);
1573 }
1574 }
1575
1576 static void
1577 alpha_test_discard(struct vc4_compile *c)
1578 {
1579 struct qreg src_alpha;
1580 struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
1581
1582 if (!c->fs_key->alpha_test)
1583 return;
1584
1585 if (c->output_color_index != -1)
1586 src_alpha = c->outputs[c->output_color_index + 3];
1587 else
1588 src_alpha = qir_uniform_f(c, 1.0);
1589
1590 if (c->discard.file == QFILE_NULL)
1591 c->discard = qir_uniform_f(c, 0.0);
1592
1593 switch (c->fs_key->alpha_test_func) {
1594 case PIPE_FUNC_NEVER:
1595 c->discard = qir_uniform_f(c, 1.0);
1596 break;
1597 case PIPE_FUNC_ALWAYS:
1598 break;
1599 case PIPE_FUNC_EQUAL:
1600 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1601 c->discard = qir_SEL_X_Y_ZS(c, c->discard,
1602 qir_uniform_f(c, 1.0));
1603 break;
1604 case PIPE_FUNC_NOTEQUAL:
1605 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1606 c->discard = qir_SEL_X_Y_ZC(c, c->discard,
1607 qir_uniform_f(c, 1.0));
1608 break;
1609 case PIPE_FUNC_GREATER:
1610 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1611 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1612 qir_uniform_f(c, 1.0));
1613 break;
1614 case PIPE_FUNC_GEQUAL:
1615 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1616 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1617 qir_uniform_f(c, 1.0));
1618 break;
1619 case PIPE_FUNC_LESS:
1620 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1621 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1622 qir_uniform_f(c, 1.0));
1623 break;
1624 case PIPE_FUNC_LEQUAL:
1625 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1626 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1627 qir_uniform_f(c, 1.0));
1628 break;
1629 }
1630 }
1631
1632 static struct qreg
1633 vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
1634 {
1635 switch (c->fs_key->logicop_func) {
1636 case PIPE_LOGICOP_CLEAR:
1637 return qir_uniform_f(c, 0.0);
1638 case PIPE_LOGICOP_NOR:
1639 return qir_NOT(c, qir_OR(c, src, dst));
1640 case PIPE_LOGICOP_AND_INVERTED:
1641 return qir_AND(c, qir_NOT(c, src), dst);
1642 case PIPE_LOGICOP_COPY_INVERTED:
1643 return qir_NOT(c, src);
1644 case PIPE_LOGICOP_AND_REVERSE:
1645 return qir_AND(c, src, qir_NOT(c, dst));
1646 case PIPE_LOGICOP_INVERT:
1647 return qir_NOT(c, dst);
1648 case PIPE_LOGICOP_XOR:
1649 return qir_XOR(c, src, dst);
1650 case PIPE_LOGICOP_NAND:
1651 return qir_NOT(c, qir_AND(c, src, dst));
1652 case PIPE_LOGICOP_AND:
1653 return qir_AND(c, src, dst);
1654 case PIPE_LOGICOP_EQUIV:
1655 return qir_NOT(c, qir_XOR(c, src, dst));
1656 case PIPE_LOGICOP_NOOP:
1657 return dst;
1658 case PIPE_LOGICOP_OR_INVERTED:
1659 return qir_OR(c, qir_NOT(c, src), dst);
1660 case PIPE_LOGICOP_OR_REVERSE:
1661 return qir_OR(c, src, qir_NOT(c, dst));
1662 case PIPE_LOGICOP_OR:
1663 return qir_OR(c, src, dst);
1664 case PIPE_LOGICOP_SET:
1665 return qir_uniform_ui(c, ~0);
1666 case PIPE_LOGICOP_COPY:
1667 default:
1668 return src;
1669 }
1670 }
1671
1672 static void
1673 emit_frag_end(struct vc4_compile *c)
1674 {
1675 clip_distance_discard(c);
1676 alpha_test_discard(c);
1677
1678 enum pipe_format color_format = c->fs_key->color_format;
1679 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1680 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1681 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1682 struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1683 struct qreg packed_dst_color = c->undef;
1684
1685 if (c->fs_key->blend.blend_enable ||
1686 c->fs_key->blend.colormask != 0xf ||
1687 c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
1688 struct qreg r4 = qir_TLB_COLOR_READ(c);
1689 for (int i = 0; i < 4; i++)
1690 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1691 for (int i = 0; i < 4; i++) {
1692 dst_color[i] = get_swizzled_channel(c,
1693 tlb_read_color,
1694 format_swiz[i]);
1695 if (util_format_is_srgb(color_format) && i != 3) {
1696 linear_dst_color[i] =
1697 qir_srgb_decode(c, dst_color[i]);
1698 } else {
1699 linear_dst_color[i] = dst_color[i];
1700 }
1701 }
1702
1703 /* Save the packed value for logic ops. Can't reuse r4
1704 * becuase other things might smash it (like sRGB)
1705 */
1706 packed_dst_color = qir_MOV(c, r4);
1707 }
1708
1709 struct qreg blend_color[4];
1710 struct qreg undef_array[4] = {
1711 c->undef, c->undef, c->undef, c->undef
1712 };
1713 vc4_blend(c, blend_color, linear_dst_color,
1714 (c->output_color_index != -1 ?
1715 c->outputs + c->output_color_index :
1716 undef_array));
1717
1718 if (util_format_is_srgb(color_format)) {
1719 for (int i = 0; i < 3; i++)
1720 blend_color[i] = qir_srgb_encode(c, blend_color[i]);
1721 }
1722
1723 /* If the bit isn't set in the color mask, then just return the
1724 * original dst color, instead.
1725 */
1726 for (int i = 0; i < 4; i++) {
1727 if (!(c->fs_key->blend.colormask & (1 << i))) {
1728 blend_color[i] = dst_color[i];
1729 }
1730 }
1731
1732 /* Debug: Sometimes you're getting a black output and just want to see
1733 * if the FS is getting executed at all. Spam magenta into the color
1734 * output.
1735 */
1736 if (0) {
1737 blend_color[0] = qir_uniform_f(c, 1.0);
1738 blend_color[1] = qir_uniform_f(c, 0.0);
1739 blend_color[2] = qir_uniform_f(c, 1.0);
1740 blend_color[3] = qir_uniform_f(c, 0.5);
1741 }
1742
1743 struct qreg swizzled_outputs[4];
1744 for (int i = 0; i < 4; i++) {
1745 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1746 format_swiz[i]);
1747 }
1748
1749 if (c->discard.file != QFILE_NULL)
1750 qir_TLB_DISCARD_SETUP(c, c->discard);
1751
1752 if (c->fs_key->stencil_enabled) {
1753 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
1754 if (c->fs_key->stencil_twoside) {
1755 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
1756 }
1757 if (c->fs_key->stencil_full_writemasks) {
1758 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
1759 }
1760 }
1761
1762 if (c->fs_key->depth_enabled) {
1763 struct qreg z;
1764 if (c->output_position_index != -1) {
1765 z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
1766 qir_uniform_f(c, 0xffffff)));
1767 } else {
1768 z = qir_FRAG_Z(c);
1769 }
1770 qir_TLB_Z_WRITE(c, z);
1771 }
1772
1773 bool color_written = false;
1774 for (int i = 0; i < 4; i++) {
1775 if (swizzled_outputs[i].file != QFILE_NULL)
1776 color_written = true;
1777 }
1778
1779 struct qreg packed_color;
1780 if (color_written) {
1781 /* Fill in any undefined colors. The simulator will assertion
1782 * fail if we read something that wasn't written, and I don't
1783 * know what hardware does.
1784 */
1785 for (int i = 0; i < 4; i++) {
1786 if (swizzled_outputs[i].file == QFILE_NULL)
1787 swizzled_outputs[i] = qir_uniform_f(c, 0.0);
1788 }
1789 packed_color = qir_get_temp(c);
1790 qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
1791 swizzled_outputs[0],
1792 swizzled_outputs[1],
1793 swizzled_outputs[2],
1794 swizzled_outputs[3]));
1795 } else {
1796 packed_color = qir_uniform_ui(c, 0);
1797 }
1798
1799
1800 if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
1801 packed_color = vc4_logicop(c, packed_color, packed_dst_color);
1802 }
1803
1804 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1805 packed_color, c->undef));
1806 }
1807
1808 static void
1809 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1810 {
1811 struct qreg xyi[2];
1812
1813 for (int i = 0; i < 2; i++) {
1814 struct qreg scale =
1815 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1816
1817 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1818 qir_FMUL(c,
1819 c->outputs[c->output_position_index + i],
1820 scale),
1821 rcp_w));
1822 }
1823
1824 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1825 }
1826
1827 static void
1828 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1829 {
1830 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1831 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1832
1833 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
1834 c->outputs[c->output_position_index + 2],
1835 zscale),
1836 zoffset),
1837 rcp_w));
1838 }
1839
1840 static void
1841 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1842 {
1843 qir_VPM_WRITE(c, rcp_w);
1844 }
1845
1846 static void
1847 emit_point_size_write(struct vc4_compile *c)
1848 {
1849 struct qreg point_size;
1850
1851 if (c->output_point_size_index)
1852 point_size = c->outputs[c->output_point_size_index + 3];
1853 else
1854 point_size = qir_uniform_f(c, 1.0);
1855
1856 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
1857 * BCM21553).
1858 */
1859 point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
1860
1861 qir_VPM_WRITE(c, point_size);
1862 }
1863
1864 /**
1865 * Emits a VPM read of the stub vertex attribute set up by vc4_draw.c.
1866 *
1867 * The simulator insists that there be at least one vertex attribute, so
1868 * vc4_draw.c will emit one if it wouldn't have otherwise. The simulator also
1869 * insists that all vertex attributes loaded get read by the VS/CS, so we have
1870 * to consume it here.
1871 */
1872 static void
1873 emit_stub_vpm_read(struct vc4_compile *c)
1874 {
1875 if (c->num_inputs)
1876 return;
1877
1878 for (int i = 0; i < 4; i++) {
1879 qir_emit(c, qir_inst(QOP_VPM_READ,
1880 qir_get_temp(c),
1881 c->undef,
1882 c->undef));
1883 c->num_inputs++;
1884 }
1885 }
1886
1887 static void
1888 emit_ucp_clipdistance(struct vc4_compile *c)
1889 {
1890 unsigned cv;
1891 if (c->output_clipvertex_index != -1)
1892 cv = c->output_clipvertex_index;
1893 else if (c->output_position_index != -1)
1894 cv = c->output_position_index;
1895 else
1896 return;
1897
1898 for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) {
1899 if (!(c->key->ucp_enables & (1 << plane)))
1900 continue;
1901
1902 /* Pick the next outputs[] that hasn't been written to, since
1903 * there are no other program writes left to be processed at
1904 * this point. If something had been declared but not written
1905 * (like a w component), we'll just smash over the top of it.
1906 */
1907 uint32_t output_index = c->num_outputs++;
1908 add_output(c, output_index,
1909 TGSI_SEMANTIC_CLIPDIST,
1910 plane,
1911 TGSI_SWIZZLE_X);
1912
1913
1914 struct qreg dist = qir_uniform_f(c, 0.0);
1915 for (int i = 0; i < 4; i++) {
1916 struct qreg pos_chan = c->outputs[cv + i];
1917 struct qreg ucp =
1918 add_uniform(c, QUNIFORM_USER_CLIP_PLANE,
1919 plane * 4 + i);
1920 dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
1921 }
1922
1923 c->outputs[output_index] = dist;
1924 }
1925 }
1926
1927 static void
1928 emit_vert_end(struct vc4_compile *c,
1929 struct vc4_varying_semantic *fs_inputs,
1930 uint32_t num_fs_inputs)
1931 {
1932 struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
1933
1934 emit_stub_vpm_read(c);
1935 emit_ucp_clipdistance(c);
1936
1937 emit_scaled_viewport_write(c, rcp_w);
1938 emit_zs_write(c, rcp_w);
1939 emit_rcp_wc_write(c, rcp_w);
1940 if (c->vs_key->per_vertex_point_size)
1941 emit_point_size_write(c);
1942
1943 for (int i = 0; i < num_fs_inputs; i++) {
1944 struct vc4_varying_semantic *input = &fs_inputs[i];
1945 int j;
1946
1947 for (j = 0; j < c->num_outputs; j++) {
1948 struct vc4_varying_semantic *output =
1949 &c->output_semantics[j];
1950
1951 if (input->semantic == output->semantic &&
1952 input->index == output->index &&
1953 input->swizzle == output->swizzle) {
1954 qir_VPM_WRITE(c, c->outputs[j]);
1955 break;
1956 }
1957 }
1958 /* Emit padding if we didn't find a declared VS output for
1959 * this FS input.
1960 */
1961 if (j == c->num_outputs)
1962 qir_VPM_WRITE(c, qir_uniform_f(c, 0.0));
1963 }
1964 }
1965
1966 static void
1967 emit_coord_end(struct vc4_compile *c)
1968 {
1969 struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
1970
1971 emit_stub_vpm_read(c);
1972
1973 for (int i = 0; i < 4; i++)
1974 qir_VPM_WRITE(c, c->outputs[c->output_position_index + i]);
1975
1976 emit_scaled_viewport_write(c, rcp_w);
1977 emit_zs_write(c, rcp_w);
1978 emit_rcp_wc_write(c, rcp_w);
1979 if (c->vs_key->per_vertex_point_size)
1980 emit_point_size_write(c);
1981 }
1982
1983 static struct vc4_compile *
1984 vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
1985 struct vc4_key *key)
1986 {
1987 struct vc4_compile *c = qir_compile_init();
1988 int ret;
1989
1990 c->stage = stage;
1991 for (int i = 0; i < 4; i++)
1992 c->addr[i] = qir_uniform_f(c, 0.0);
1993
1994 c->shader_state = &key->shader_state->base;
1995 c->program_id = key->shader_state->program_id;
1996 c->variant_id = key->shader_state->compiled_variant_count++;
1997
1998 c->key = key;
1999 switch (stage) {
2000 case QSTAGE_FRAG:
2001 c->fs_key = (struct vc4_fs_key *)key;
2002 if (c->fs_key->is_points) {
2003 c->point_x = emit_fragment_varying(c, ~0, ~0, 0);
2004 c->point_y = emit_fragment_varying(c, ~0, ~0, 0);
2005 } else if (c->fs_key->is_lines) {
2006 c->line_x = emit_fragment_varying(c, ~0, ~0, 0);
2007 }
2008 break;
2009 case QSTAGE_VERT:
2010 c->vs_key = (struct vc4_vs_key *)key;
2011 break;
2012 case QSTAGE_COORD:
2013 c->vs_key = (struct vc4_vs_key *)key;
2014 break;
2015 }
2016
2017 const struct tgsi_token *tokens = key->shader_state->base.tokens;
2018 if (c->fs_key && c->fs_key->light_twoside) {
2019 if (!key->shader_state->twoside_tokens) {
2020 const struct tgsi_lowering_config lowering_config = {
2021 .color_two_side = true,
2022 };
2023 struct tgsi_shader_info info;
2024 key->shader_state->twoside_tokens =
2025 tgsi_transform_lowering(&lowering_config,
2026 key->shader_state->base.tokens,
2027 &info);
2028
2029 /* If no transformation occurred, then NULL is
2030 * returned and we just use our original tokens.
2031 */
2032 if (!key->shader_state->twoside_tokens) {
2033 key->shader_state->twoside_tokens =
2034 key->shader_state->base.tokens;
2035 }
2036 }
2037 tokens = key->shader_state->twoside_tokens;
2038 }
2039
2040 ret = tgsi_parse_init(&c->parser, tokens);
2041 assert(ret == TGSI_PARSE_OK);
2042
2043 if (vc4_debug & VC4_DEBUG_TGSI) {
2044 fprintf(stderr, "%s prog %d/%d TGSI:\n",
2045 qir_get_stage_name(c->stage),
2046 c->program_id, c->variant_id);
2047 tgsi_dump(tokens, 0);
2048 }
2049
2050 while (!tgsi_parse_end_of_tokens(&c->parser)) {
2051 tgsi_parse_token(&c->parser);
2052
2053 switch (c->parser.FullToken.Token.Type) {
2054 case TGSI_TOKEN_TYPE_DECLARATION:
2055 emit_tgsi_declaration(c,
2056 &c->parser.FullToken.FullDeclaration);
2057 break;
2058
2059 case TGSI_TOKEN_TYPE_INSTRUCTION:
2060 emit_tgsi_instruction(c,
2061 &c->parser.FullToken.FullInstruction);
2062 break;
2063
2064 case TGSI_TOKEN_TYPE_IMMEDIATE:
2065 parse_tgsi_immediate(c,
2066 &c->parser.FullToken.FullImmediate);
2067 break;
2068 }
2069 }
2070
2071 switch (stage) {
2072 case QSTAGE_FRAG:
2073 emit_frag_end(c);
2074 break;
2075 case QSTAGE_VERT:
2076 emit_vert_end(c,
2077 vc4->prog.fs->input_semantics,
2078 vc4->prog.fs->num_inputs);
2079 break;
2080 case QSTAGE_COORD:
2081 emit_coord_end(c);
2082 break;
2083 }
2084
2085 tgsi_parse_free(&c->parser);
2086
2087 qir_optimize(c);
2088
2089 if (vc4_debug & VC4_DEBUG_QIR) {
2090 fprintf(stderr, "%s prog %d/%d QIR:\n",
2091 qir_get_stage_name(c->stage),
2092 c->program_id, c->variant_id);
2093 qir_dump(c);
2094 }
2095 qir_reorder_uniforms(c);
2096 vc4_generate_code(vc4, c);
2097
2098 if (vc4_debug & VC4_DEBUG_SHADERDB) {
2099 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
2100 qir_get_stage_name(c->stage),
2101 c->program_id, c->variant_id,
2102 c->qpu_inst_count);
2103 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d uniforms\n",
2104 qir_get_stage_name(c->stage),
2105 c->program_id, c->variant_id,
2106 c->num_uniforms);
2107 }
2108
2109 return c;
2110 }
2111
2112 static void *
2113 vc4_shader_state_create(struct pipe_context *pctx,
2114 const struct pipe_shader_state *cso)
2115 {
2116 struct vc4_context *vc4 = vc4_context(pctx);
2117 struct vc4_uncompiled_shader *so = CALLOC_STRUCT(vc4_uncompiled_shader);
2118 if (!so)
2119 return NULL;
2120
2121 const struct tgsi_lowering_config lowering_config = {
2122 .lower_DST = true,
2123 .lower_XPD = true,
2124 .lower_SCS = true,
2125 .lower_POW = true,
2126 .lower_LIT = true,
2127 .lower_EXP = true,
2128 .lower_LOG = true,
2129 .lower_DP4 = true,
2130 .lower_DP3 = true,
2131 .lower_DPH = true,
2132 .lower_DP2 = true,
2133 .lower_DP2A = true,
2134 };
2135
2136 struct tgsi_shader_info info;
2137 so->base.tokens = tgsi_transform_lowering(&lowering_config, cso->tokens, &info);
2138 if (!so->base.tokens)
2139 so->base.tokens = tgsi_dup_tokens(cso->tokens);
2140 so->program_id = vc4->next_uncompiled_program_id++;
2141
2142 return so;
2143 }
2144
2145 static void
2146 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
2147 struct vc4_compile *c)
2148 {
2149 int count = c->num_uniforms;
2150 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
2151
2152 uinfo->count = count;
2153 uinfo->data = ralloc_array(shader, uint32_t, count);
2154 memcpy(uinfo->data, c->uniform_data,
2155 count * sizeof(*uinfo->data));
2156 uinfo->contents = ralloc_array(shader, enum quniform_contents, count);
2157 memcpy(uinfo->contents, c->uniform_contents,
2158 count * sizeof(*uinfo->contents));
2159 uinfo->num_texture_samples = c->num_texture_samples;
2160 }
2161
2162 static struct vc4_compiled_shader *
2163 vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
2164 struct vc4_key *key)
2165 {
2166 struct hash_table *ht;
2167 uint32_t key_size;
2168 if (stage == QSTAGE_FRAG) {
2169 ht = vc4->fs_cache;
2170 key_size = sizeof(struct vc4_fs_key);
2171 } else {
2172 ht = vc4->vs_cache;
2173 key_size = sizeof(struct vc4_vs_key);
2174 }
2175
2176 struct vc4_compiled_shader *shader;
2177 struct hash_entry *entry = _mesa_hash_table_search(ht, key);
2178 if (entry)
2179 return entry->data;
2180
2181 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, stage, key);
2182 shader = rzalloc(NULL, struct vc4_compiled_shader);
2183
2184 shader->program_id = vc4->next_compiled_program_id++;
2185 if (stage == QSTAGE_FRAG) {
2186 bool input_live[c->num_input_semantics];
2187 struct simple_node *node;
2188
2189 memset(input_live, 0, sizeof(input_live));
2190 foreach(node, &c->instructions) {
2191 struct qinst *inst = (struct qinst *)node;
2192 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
2193 if (inst->src[i].file == QFILE_VARY)
2194 input_live[inst->src[i].index] = true;
2195 }
2196 }
2197
2198 shader->input_semantics = ralloc_array(shader,
2199 struct vc4_varying_semantic,
2200 c->num_input_semantics);
2201
2202 for (int i = 0; i < c->num_input_semantics; i++) {
2203 struct vc4_varying_semantic *sem = &c->input_semantics[i];
2204
2205 if (!input_live[i])
2206 continue;
2207
2208 /* Skip non-VS-output inputs. */
2209 if (sem->semantic == (uint8_t)~0)
2210 continue;
2211
2212 if (sem->semantic == TGSI_SEMANTIC_COLOR ||
2213 sem->semantic == TGSI_SEMANTIC_BCOLOR) {
2214 shader->color_inputs |= (1 << shader->num_inputs);
2215 }
2216
2217 shader->input_semantics[shader->num_inputs] = *sem;
2218 shader->num_inputs++;
2219 }
2220 } else {
2221 shader->num_inputs = c->num_inputs;
2222 }
2223
2224 copy_uniform_state_to_shader(shader, c);
2225 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
2226 c->qpu_inst_count * sizeof(uint64_t),
2227 "code");
2228
2229 /* Copy the compiler UBO range state to the compiled shader, dropping
2230 * out arrays that were never referenced by an indirect load.
2231 *
2232 * (Note that QIR dead code elimination of an array access still
2233 * leaves that array alive, though)
2234 */
2235 if (c->num_ubo_ranges) {
2236 shader->num_ubo_ranges = c->num_ubo_ranges;
2237 shader->ubo_ranges = ralloc_array(shader, struct vc4_ubo_range,
2238 c->num_ubo_ranges);
2239 uint32_t j = 0;
2240 for (int i = 0; i < c->ubo_ranges_array_size; i++) {
2241 struct vc4_compiler_ubo_range *range =
2242 &c->ubo_ranges[i];
2243 if (!range->used)
2244 continue;
2245
2246 shader->ubo_ranges[j].dst_offset = range->dst_offset;
2247 shader->ubo_ranges[j].src_offset = range->src_offset;
2248 shader->ubo_ranges[j].size = range->size;
2249 shader->ubo_size += c->ubo_ranges[i].size;
2250 j++;
2251 }
2252 }
2253
2254 qir_compile_destroy(c);
2255
2256 struct vc4_key *dup_key;
2257 dup_key = malloc(key_size);
2258 memcpy(dup_key, key, key_size);
2259 _mesa_hash_table_insert(ht, dup_key, shader);
2260
2261 return shader;
2262 }
2263
2264 static void
2265 vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
2266 struct vc4_texture_stateobj *texstate)
2267 {
2268 for (int i = 0; i < texstate->num_textures; i++) {
2269 struct pipe_sampler_view *sampler = texstate->textures[i];
2270 struct pipe_sampler_state *sampler_state =
2271 texstate->samplers[i];
2272
2273 if (sampler) {
2274 key->tex[i].format = sampler->format;
2275 key->tex[i].swizzle[0] = sampler->swizzle_r;
2276 key->tex[i].swizzle[1] = sampler->swizzle_g;
2277 key->tex[i].swizzle[2] = sampler->swizzle_b;
2278 key->tex[i].swizzle[3] = sampler->swizzle_a;
2279 key->tex[i].compare_mode = sampler_state->compare_mode;
2280 key->tex[i].compare_func = sampler_state->compare_func;
2281 key->tex[i].wrap_s = sampler_state->wrap_s;
2282 key->tex[i].wrap_t = sampler_state->wrap_t;
2283 }
2284 }
2285
2286 key->ucp_enables = vc4->rasterizer->base.clip_plane_enable;
2287 }
2288
2289 static void
2290 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
2291 {
2292 struct vc4_fs_key local_key;
2293 struct vc4_fs_key *key = &local_key;
2294
2295 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
2296 VC4_DIRTY_BLEND |
2297 VC4_DIRTY_FRAMEBUFFER |
2298 VC4_DIRTY_ZSA |
2299 VC4_DIRTY_RASTERIZER |
2300 VC4_DIRTY_FRAGTEX |
2301 VC4_DIRTY_TEXSTATE |
2302 VC4_DIRTY_PROG))) {
2303 return;
2304 }
2305
2306 memset(key, 0, sizeof(*key));
2307 vc4_setup_shared_key(vc4, &key->base, &vc4->fragtex);
2308 key->base.shader_state = vc4->prog.bind_fs;
2309 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
2310 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
2311 prim_mode <= PIPE_PRIM_LINE_STRIP);
2312 key->blend = vc4->blend->rt[0];
2313 if (vc4->blend->logicop_enable) {
2314 key->logicop_func = vc4->blend->logicop_func;
2315 } else {
2316 key->logicop_func = PIPE_LOGICOP_COPY;
2317 }
2318 if (vc4->framebuffer.cbufs[0])
2319 key->color_format = vc4->framebuffer.cbufs[0]->format;
2320
2321 key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
2322 key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
2323 key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
2324 key->depth_enabled = (vc4->zsa->base.depth.enabled ||
2325 key->stencil_enabled);
2326 if (vc4->zsa->base.alpha.enabled) {
2327 key->alpha_test = true;
2328 key->alpha_test_func = vc4->zsa->base.alpha.func;
2329 }
2330
2331 if (key->is_points) {
2332 key->point_sprite_mask =
2333 vc4->rasterizer->base.sprite_coord_enable;
2334 key->point_coord_upper_left =
2335 (vc4->rasterizer->base.sprite_coord_mode ==
2336 PIPE_SPRITE_COORD_UPPER_LEFT);
2337 }
2338
2339 key->light_twoside = vc4->rasterizer->base.light_twoside;
2340
2341 struct vc4_compiled_shader *old_fs = vc4->prog.fs;
2342 vc4->prog.fs = vc4_get_compiled_shader(vc4, QSTAGE_FRAG, &key->base);
2343 if (vc4->prog.fs == old_fs)
2344 return;
2345
2346 if (vc4->rasterizer->base.flatshade &&
2347 old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
2348 vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
2349 }
2350 }
2351
2352 static void
2353 vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
2354 {
2355 struct vc4_vs_key local_key;
2356 struct vc4_vs_key *key = &local_key;
2357
2358 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
2359 VC4_DIRTY_RASTERIZER |
2360 VC4_DIRTY_VERTTEX |
2361 VC4_DIRTY_TEXSTATE |
2362 VC4_DIRTY_VTXSTATE |
2363 VC4_DIRTY_PROG))) {
2364 return;
2365 }
2366
2367 memset(key, 0, sizeof(*key));
2368 vc4_setup_shared_key(vc4, &key->base, &vc4->verttex);
2369 key->base.shader_state = vc4->prog.bind_vs;
2370 key->compiled_fs_id = vc4->prog.fs->program_id;
2371
2372 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
2373 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
2374
2375 key->per_vertex_point_size =
2376 (prim_mode == PIPE_PRIM_POINTS &&
2377 vc4->rasterizer->base.point_size_per_vertex);
2378
2379 vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
2380 key->is_coord = true;
2381 vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
2382 }
2383
2384 void
2385 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
2386 {
2387 vc4_update_compiled_fs(vc4, prim_mode);
2388 vc4_update_compiled_vs(vc4, prim_mode);
2389 }
2390
2391 static uint32_t
2392 fs_cache_hash(const void *key)
2393 {
2394 return _mesa_hash_data(key, sizeof(struct vc4_fs_key));
2395 }
2396
2397 static uint32_t
2398 vs_cache_hash(const void *key)
2399 {
2400 return _mesa_hash_data(key, sizeof(struct vc4_vs_key));
2401 }
2402
2403 static bool
2404 fs_cache_compare(const void *key1, const void *key2)
2405 {
2406 return memcmp(key1, key2, sizeof(struct vc4_fs_key)) == 0;
2407 }
2408
2409 static bool
2410 vs_cache_compare(const void *key1, const void *key2)
2411 {
2412 return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0;
2413 }
2414
2415 static void
2416 delete_from_cache_if_matches(struct hash_table *ht,
2417 struct hash_entry *entry,
2418 struct vc4_uncompiled_shader *so)
2419 {
2420 struct vc4_key *key = entry->data;
2421
2422 if (key->shader_state == so) {
2423 struct vc4_compiled_shader *shader = entry->data;
2424 _mesa_hash_table_remove(ht, entry);
2425 vc4_bo_unreference(&shader->bo);
2426 ralloc_free(shader);
2427 }
2428 }
2429
2430 static void
2431 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
2432 {
2433 struct vc4_context *vc4 = vc4_context(pctx);
2434 struct vc4_uncompiled_shader *so = hwcso;
2435
2436 struct hash_entry *entry;
2437 hash_table_foreach(vc4->fs_cache, entry)
2438 delete_from_cache_if_matches(vc4->fs_cache, entry, so);
2439 hash_table_foreach(vc4->vs_cache, entry)
2440 delete_from_cache_if_matches(vc4->vs_cache, entry, so);
2441
2442 if (so->twoside_tokens != so->base.tokens)
2443 free((void *)so->twoside_tokens);
2444 free((void *)so->base.tokens);
2445 free(so);
2446 }
2447
2448 static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
2449 {
2450 switch (p_wrap) {
2451 case PIPE_TEX_WRAP_REPEAT:
2452 return 0;
2453 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2454 return 1;
2455 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2456 return 2;
2457 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2458 return 3;
2459 case PIPE_TEX_WRAP_CLAMP:
2460 return (using_nearest ? 1 : 3);
2461 default:
2462 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
2463 assert(!"not reached");
2464 return 0;
2465 }
2466 }
2467
2468 static void
2469 write_texture_p0(struct vc4_context *vc4,
2470 struct vc4_texture_stateobj *texstate,
2471 uint32_t unit)
2472 {
2473 struct pipe_sampler_view *texture = texstate->textures[unit];
2474 struct vc4_resource *rsc = vc4_resource(texture->texture);
2475
2476 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
2477 VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
2478 VC4_SET_FIELD(texture->u.tex.last_level -
2479 texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
2480 VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
2481 VC4_TEX_P0_CMMODE) |
2482 VC4_SET_FIELD(rsc->vc4_format & 7, VC4_TEX_P0_TYPE));
2483 }
2484
2485 static void
2486 write_texture_p1(struct vc4_context *vc4,
2487 struct vc4_texture_stateobj *texstate,
2488 uint32_t unit)
2489 {
2490 struct pipe_sampler_view *texture = texstate->textures[unit];
2491 struct vc4_resource *rsc = vc4_resource(texture->texture);
2492 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2493 static const uint8_t minfilter_map[6] = {
2494 VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
2495 VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
2496 VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
2497 VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
2498 VC4_TEX_P1_MINFILT_NEAREST,
2499 VC4_TEX_P1_MINFILT_LINEAR,
2500 };
2501 static const uint32_t magfilter_map[] = {
2502 [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
2503 [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
2504 };
2505
2506 bool either_nearest =
2507 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
2508 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
2509
2510 cl_u32(&vc4->uniforms,
2511 VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
2512 VC4_SET_FIELD(texture->texture->height0 & 2047,
2513 VC4_TEX_P1_HEIGHT) |
2514 VC4_SET_FIELD(texture->texture->width0 & 2047,
2515 VC4_TEX_P1_WIDTH) |
2516 VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
2517 VC4_TEX_P1_MAGFILT) |
2518 VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
2519 sampler->min_img_filter],
2520 VC4_TEX_P1_MINFILT) |
2521 VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
2522 VC4_TEX_P1_WRAP_S) |
2523 VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
2524 VC4_TEX_P1_WRAP_T));
2525 }
2526
2527 static void
2528 write_texture_p2(struct vc4_context *vc4,
2529 struct vc4_texture_stateobj *texstate,
2530 uint32_t data)
2531 {
2532 uint32_t unit = data & 0xffff;
2533 struct pipe_sampler_view *texture = texstate->textures[unit];
2534 struct vc4_resource *rsc = vc4_resource(texture->texture);
2535
2536 cl_u32(&vc4->uniforms,
2537 VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
2538 VC4_TEX_P2_PTYPE) |
2539 VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
2540 VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
2541 }
2542
2543
2544 #define SWIZ(x,y,z,w) { \
2545 UTIL_FORMAT_SWIZZLE_##x, \
2546 UTIL_FORMAT_SWIZZLE_##y, \
2547 UTIL_FORMAT_SWIZZLE_##z, \
2548 UTIL_FORMAT_SWIZZLE_##w \
2549 }
2550
2551 static void
2552 write_texture_border_color(struct vc4_context *vc4,
2553 struct vc4_texture_stateobj *texstate,
2554 uint32_t unit)
2555 {
2556 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2557 struct pipe_sampler_view *texture = texstate->textures[unit];
2558 struct vc4_resource *rsc = vc4_resource(texture->texture);
2559 union util_color uc;
2560
2561 const struct util_format_description *tex_format_desc =
2562 util_format_description(texture->format);
2563
2564 float border_color[4];
2565 for (int i = 0; i < 4; i++)
2566 border_color[i] = sampler->border_color.f[i];
2567 if (util_format_is_srgb(texture->format)) {
2568 for (int i = 0; i < 3; i++)
2569 border_color[i] =
2570 util_format_linear_to_srgb_float(border_color[i]);
2571 }
2572
2573 /* Turn the border color into the layout of channels that it would
2574 * have when stored as texture contents.
2575 */
2576 float storage_color[4];
2577 util_format_unswizzle_4f(storage_color,
2578 border_color,
2579 tex_format_desc->swizzle);
2580
2581 /* Now, pack so that when the vc4_format-sampled texture contents are
2582 * replaced with our border color, the vc4_get_format_swizzle()
2583 * swizzling will get the right channels.
2584 */
2585 if (util_format_is_depth_or_stencil(texture->format)) {
2586 uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
2587 sampler->border_color.f[0]) << 8;
2588 } else {
2589 switch (rsc->vc4_format) {
2590 default:
2591 case VC4_TEXTURE_TYPE_RGBA8888:
2592 util_pack_color(storage_color,
2593 PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
2594 break;
2595 case VC4_TEXTURE_TYPE_RGBA4444:
2596 util_pack_color(storage_color,
2597 PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
2598 break;
2599 case VC4_TEXTURE_TYPE_RGB565:
2600 util_pack_color(storage_color,
2601 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
2602 break;
2603 case VC4_TEXTURE_TYPE_ALPHA:
2604 uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
2605 break;
2606 case VC4_TEXTURE_TYPE_LUMALPHA:
2607 uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
2608 (float_to_ubyte(storage_color[0]) << 0));
2609 break;
2610 }
2611 }
2612
2613 cl_u32(&vc4->uniforms, uc.ui[0]);
2614 }
2615
2616 static uint32_t
2617 get_texrect_scale(struct vc4_texture_stateobj *texstate,
2618 enum quniform_contents contents,
2619 uint32_t data)
2620 {
2621 struct pipe_sampler_view *texture = texstate->textures[data];
2622 uint32_t dim;
2623
2624 if (contents == QUNIFORM_TEXRECT_SCALE_X)
2625 dim = texture->texture->width0;
2626 else
2627 dim = texture->texture->height0;
2628
2629 return fui(1.0f / dim);
2630 }
2631
2632 static struct vc4_bo *
2633 vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2634 const uint32_t *gallium_uniforms)
2635 {
2636 if (!shader->ubo_size)
2637 return NULL;
2638
2639 struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
2640 uint32_t *data = vc4_bo_map(ubo);
2641 for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
2642 memcpy(data + shader->ubo_ranges[i].dst_offset,
2643 gallium_uniforms + shader->ubo_ranges[i].src_offset,
2644 shader->ubo_ranges[i].size);
2645 }
2646
2647 return ubo;
2648 }
2649
2650 void
2651 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2652 struct vc4_constbuf_stateobj *cb,
2653 struct vc4_texture_stateobj *texstate)
2654 {
2655 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
2656 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
2657 struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
2658
2659 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
2660
2661 for (int i = 0; i < uinfo->count; i++) {
2662
2663 switch (uinfo->contents[i]) {
2664 case QUNIFORM_CONSTANT:
2665 cl_u32(&vc4->uniforms, uinfo->data[i]);
2666 break;
2667 case QUNIFORM_UNIFORM:
2668 cl_u32(&vc4->uniforms,
2669 gallium_uniforms[uinfo->data[i]]);
2670 break;
2671 case QUNIFORM_VIEWPORT_X_SCALE:
2672 cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
2673 break;
2674 case QUNIFORM_VIEWPORT_Y_SCALE:
2675 cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
2676 break;
2677
2678 case QUNIFORM_VIEWPORT_Z_OFFSET:
2679 cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
2680 break;
2681 case QUNIFORM_VIEWPORT_Z_SCALE:
2682 cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
2683 break;
2684
2685 case QUNIFORM_USER_CLIP_PLANE:
2686 cl_f(&vc4->uniforms,
2687 vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
2688 break;
2689
2690 case QUNIFORM_TEXTURE_CONFIG_P0:
2691 write_texture_p0(vc4, texstate, uinfo->data[i]);
2692 break;
2693
2694 case QUNIFORM_TEXTURE_CONFIG_P1:
2695 write_texture_p1(vc4, texstate, uinfo->data[i]);
2696 break;
2697
2698 case QUNIFORM_TEXTURE_CONFIG_P2:
2699 write_texture_p2(vc4, texstate, uinfo->data[i]);
2700 break;
2701
2702 case QUNIFORM_UBO_ADDR:
2703 cl_reloc(vc4, &vc4->uniforms, ubo, 0);
2704 break;
2705
2706 case QUNIFORM_TEXTURE_BORDER_COLOR:
2707 write_texture_border_color(vc4, texstate, uinfo->data[i]);
2708 break;
2709
2710 case QUNIFORM_TEXRECT_SCALE_X:
2711 case QUNIFORM_TEXRECT_SCALE_Y:
2712 cl_u32(&vc4->uniforms,
2713 get_texrect_scale(texstate,
2714 uinfo->contents[i],
2715 uinfo->data[i]));
2716 break;
2717
2718 case QUNIFORM_BLEND_CONST_COLOR:
2719 cl_f(&vc4->uniforms,
2720 vc4->blend_color.color[uinfo->data[i]]);
2721 break;
2722
2723 case QUNIFORM_STENCIL:
2724 cl_u32(&vc4->uniforms,
2725 vc4->zsa->stencil_uniforms[uinfo->data[i]] |
2726 (uinfo->data[i] <= 1 ?
2727 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
2728 0));
2729 break;
2730
2731 case QUNIFORM_ALPHA_REF:
2732 cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
2733 break;
2734 }
2735 #if 0
2736 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
2737 fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
2738 shader, i, written_val, uif(written_val));
2739 #endif
2740 }
2741 }
2742
2743 static void
2744 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
2745 {
2746 struct vc4_context *vc4 = vc4_context(pctx);
2747 vc4->prog.bind_fs = hwcso;
2748 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
2749 vc4->dirty |= VC4_DIRTY_PROG;
2750 }
2751
2752 static void
2753 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
2754 {
2755 struct vc4_context *vc4 = vc4_context(pctx);
2756 vc4->prog.bind_vs = hwcso;
2757 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
2758 vc4->dirty |= VC4_DIRTY_PROG;
2759 }
2760
2761 void
2762 vc4_program_init(struct pipe_context *pctx)
2763 {
2764 struct vc4_context *vc4 = vc4_context(pctx);
2765
2766 pctx->create_vs_state = vc4_shader_state_create;
2767 pctx->delete_vs_state = vc4_shader_state_delete;
2768
2769 pctx->create_fs_state = vc4_shader_state_create;
2770 pctx->delete_fs_state = vc4_shader_state_delete;
2771
2772 pctx->bind_fs_state = vc4_fp_state_bind;
2773 pctx->bind_vs_state = vc4_vp_state_bind;
2774
2775 vc4->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
2776 fs_cache_compare);
2777 vc4->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
2778 vs_cache_compare);
2779 }
2780
2781 void
2782 vc4_program_fini(struct pipe_context *pctx)
2783 {
2784 struct vc4_context *vc4 = vc4_context(pctx);
2785
2786 struct hash_entry *entry;
2787 hash_table_foreach(vc4->fs_cache, entry) {
2788 struct vc4_compiled_shader *shader = entry->data;
2789 vc4_bo_unreference(&shader->bo);
2790 ralloc_free(shader);
2791 _mesa_hash_table_remove(vc4->fs_cache, entry);
2792 }
2793
2794 hash_table_foreach(vc4->vs_cache, entry) {
2795 struct vc4_compiled_shader *shader = entry->data;
2796 vc4_bo_unreference(&shader->bo);
2797 ralloc_free(shader);
2798 _mesa_hash_table_remove(vc4->vs_cache, entry);
2799 }
2800 }