vc4: Make SF be a flag on the QIR instructions.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash.h"
29 #include "util/u_memory.h"
30 #include "util/u_pack_color.h"
31 #include "util/format_srgb.h"
32 #include "util/ralloc.h"
33 #include "util/hash_table.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_lowering.h"
37
38 #include "vc4_context.h"
39 #include "vc4_qpu.h"
40 #include "vc4_qir.h"
41 #ifdef USE_VC4_SIMULATOR
42 #include "simpenrose/simpenrose.h"
43 #endif
44
45 struct vc4_key {
46 struct vc4_uncompiled_shader *shader_state;
47 struct {
48 enum pipe_format format;
49 unsigned compare_mode:1;
50 unsigned compare_func:3;
51 unsigned wrap_s:3;
52 unsigned wrap_t:3;
53 uint8_t swizzle[4];
54 } tex[VC4_MAX_TEXTURE_SAMPLERS];
55 uint8_t ucp_enables;
56 };
57
58 struct vc4_fs_key {
59 struct vc4_key base;
60 enum pipe_format color_format;
61 bool depth_enabled;
62 bool stencil_enabled;
63 bool stencil_twoside;
64 bool stencil_full_writemasks;
65 bool is_points;
66 bool is_lines;
67 bool alpha_test;
68 bool point_coord_upper_left;
69 bool light_twoside;
70 uint8_t alpha_test_func;
71 uint8_t logicop_func;
72 uint32_t point_sprite_mask;
73
74 struct pipe_rt_blend_state blend;
75 };
76
77 struct vc4_vs_key {
78 struct vc4_key base;
79
80 /**
81 * This is a proxy for the array of FS input semantics, which is
82 * larger than we would want to put in the key.
83 */
84 uint64_t compiled_fs_id;
85
86 enum pipe_format attr_formats[8];
87 bool is_coord;
88 bool per_vertex_point_size;
89 };
90
91 static void
92 resize_qreg_array(struct vc4_compile *c,
93 struct qreg **regs,
94 uint32_t *size,
95 uint32_t decl_size)
96 {
97 if (*size >= decl_size)
98 return;
99
100 uint32_t old_size = *size;
101 *size = MAX2(*size * 2, decl_size);
102 *regs = reralloc(c, *regs, struct qreg, *size);
103 if (!*regs) {
104 fprintf(stderr, "Malloc failure\n");
105 abort();
106 }
107
108 for (uint32_t i = old_size; i < *size; i++)
109 (*regs)[i] = c->undef;
110 }
111
112 static struct qreg
113 add_uniform(struct vc4_compile *c,
114 enum quniform_contents contents,
115 uint32_t data)
116 {
117 for (int i = 0; i < c->num_uniforms; i++) {
118 if (c->uniform_contents[i] == contents &&
119 c->uniform_data[i] == data) {
120 return (struct qreg) { QFILE_UNIF, i };
121 }
122 }
123
124 uint32_t uniform = c->num_uniforms++;
125 struct qreg u = { QFILE_UNIF, uniform };
126
127 if (uniform >= c->uniform_array_size) {
128 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
129 c->uniform_array_size * 2);
130
131 c->uniform_data = reralloc(c, c->uniform_data,
132 uint32_t,
133 c->uniform_array_size);
134 c->uniform_contents = reralloc(c, c->uniform_contents,
135 enum quniform_contents,
136 c->uniform_array_size);
137 }
138
139 c->uniform_contents[uniform] = contents;
140 c->uniform_data[uniform] = data;
141
142 return u;
143 }
144
145 static struct qreg
146 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
147 uint32_t data)
148 {
149 struct qreg u = add_uniform(c, contents, data);
150 struct qreg t = qir_MOV(c, u);
151 return t;
152 }
153
154 static struct qreg
155 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
156 {
157 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
158 }
159
160 static struct qreg
161 qir_uniform_f(struct vc4_compile *c, float f)
162 {
163 return qir_uniform_ui(c, fui(f));
164 }
165
166 static struct qreg
167 indirect_uniform_load(struct vc4_compile *c,
168 struct tgsi_full_src_register *src, int swiz)
169 {
170 struct tgsi_ind_register *indirect = &src->Indirect;
171 struct vc4_compiler_ubo_range *range = &c->ubo_ranges[indirect->ArrayID];
172 if (!range->used) {
173 range->used = true;
174 range->dst_offset = c->next_ubo_dst_offset;
175 c->next_ubo_dst_offset += range->size;
176 c->num_ubo_ranges++;
177 };
178
179 assert(src->Register.Indirect);
180 assert(indirect->File == TGSI_FILE_ADDRESS);
181
182 struct qreg addr_val = c->addr[indirect->Swizzle];
183 struct qreg indirect_offset =
184 qir_ADD(c, addr_val, qir_uniform_ui(c,
185 range->dst_offset +
186 (src->Register.Index * 16)+
187 swiz * 4));
188 indirect_offset = qir_MIN(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset +
189 range->size - 4)));
190
191 qir_TEX_DIRECT(c, indirect_offset, add_uniform(c, QUNIFORM_UBO_ADDR, 0));
192 struct qreg r4 = qir_TEX_RESULT(c);
193 c->num_texture_samples++;
194 return qir_MOV(c, r4);
195 }
196
197 static struct qreg
198 get_src(struct vc4_compile *c, unsigned tgsi_op,
199 struct tgsi_full_src_register *full_src, int i)
200 {
201 struct tgsi_src_register *src = &full_src->Register;
202 struct qreg r = c->undef;
203
204 uint32_t s = i;
205 switch (i) {
206 case TGSI_SWIZZLE_X:
207 s = src->SwizzleX;
208 break;
209 case TGSI_SWIZZLE_Y:
210 s = src->SwizzleY;
211 break;
212 case TGSI_SWIZZLE_Z:
213 s = src->SwizzleZ;
214 break;
215 case TGSI_SWIZZLE_W:
216 s = src->SwizzleW;
217 break;
218 default:
219 abort();
220 }
221
222 switch (src->File) {
223 case TGSI_FILE_NULL:
224 return r;
225 case TGSI_FILE_TEMPORARY:
226 r = c->temps[src->Index * 4 + s];
227 break;
228 case TGSI_FILE_IMMEDIATE:
229 r = c->consts[src->Index * 4 + s];
230 break;
231 case TGSI_FILE_CONSTANT:
232 if (src->Indirect) {
233 r = indirect_uniform_load(c, full_src, s);
234 } else {
235 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
236 src->Index * 4 + s);
237 }
238 break;
239 case TGSI_FILE_INPUT:
240 r = c->inputs[src->Index * 4 + s];
241 break;
242 case TGSI_FILE_SAMPLER:
243 case TGSI_FILE_SAMPLER_VIEW:
244 r = c->undef;
245 break;
246 default:
247 fprintf(stderr, "unknown src file %d\n", src->File);
248 abort();
249 }
250
251 if (src->Absolute)
252 r = qir_FMAXABS(c, r, r);
253
254 if (src->Negate) {
255 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
256 case TGSI_TYPE_SIGNED:
257 case TGSI_TYPE_UNSIGNED:
258 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
259 break;
260 default:
261 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
262 break;
263 }
264 }
265
266 return r;
267 };
268
269
270 static void
271 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
272 int i, struct qreg val)
273 {
274 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
275
276 assert(!tgsi_dst->Indirect);
277
278 switch (tgsi_dst->File) {
279 case TGSI_FILE_TEMPORARY:
280 c->temps[tgsi_dst->Index * 4 + i] = val;
281 break;
282 case TGSI_FILE_OUTPUT:
283 c->outputs[tgsi_dst->Index * 4 + i] = val;
284 c->num_outputs = MAX2(c->num_outputs,
285 tgsi_dst->Index * 4 + i + 1);
286 break;
287 case TGSI_FILE_ADDRESS:
288 assert(tgsi_dst->Index == 0);
289 c->addr[i] = val;
290 break;
291 default:
292 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
293 abort();
294 }
295 };
296
297 static struct qreg
298 get_swizzled_channel(struct vc4_compile *c,
299 struct qreg *srcs, int swiz)
300 {
301 switch (swiz) {
302 default:
303 case UTIL_FORMAT_SWIZZLE_NONE:
304 fprintf(stderr, "warning: unknown swizzle\n");
305 /* FALLTHROUGH */
306 case UTIL_FORMAT_SWIZZLE_0:
307 return qir_uniform_f(c, 0.0);
308 case UTIL_FORMAT_SWIZZLE_1:
309 return qir_uniform_f(c, 1.0);
310 case UTIL_FORMAT_SWIZZLE_X:
311 case UTIL_FORMAT_SWIZZLE_Y:
312 case UTIL_FORMAT_SWIZZLE_Z:
313 case UTIL_FORMAT_SWIZZLE_W:
314 return srcs[swiz];
315 }
316 }
317
318 static inline struct qreg
319 qir_SAT(struct vc4_compile *c, struct qreg val)
320 {
321 return qir_FMAX(c,
322 qir_FMIN(c, val, qir_uniform_f(c, 1.0)),
323 qir_uniform_f(c, 0.0));
324 }
325
326 static struct qreg
327 tgsi_to_qir_alu(struct vc4_compile *c,
328 struct tgsi_full_instruction *tgsi_inst,
329 enum qop op, struct qreg *src, int i)
330 {
331 struct qreg dst = qir_get_temp(c);
332 qir_emit(c, qir_inst4(op, dst,
333 src[0 * 4 + i],
334 src[1 * 4 + i],
335 src[2 * 4 + i],
336 c->undef));
337 return dst;
338 }
339
340 static struct qreg
341 tgsi_to_qir_scalar(struct vc4_compile *c,
342 struct tgsi_full_instruction *tgsi_inst,
343 enum qop op, struct qreg *src, int i)
344 {
345 struct qreg dst = qir_get_temp(c);
346 qir_emit(c, qir_inst(op, dst,
347 src[0 * 4 + 0],
348 c->undef));
349 return dst;
350 }
351
352 static struct qreg
353 tgsi_to_qir_rcp(struct vc4_compile *c,
354 struct tgsi_full_instruction *tgsi_inst,
355 enum qop op, struct qreg *src, int i)
356 {
357 struct qreg x = src[0 * 4 + 0];
358 struct qreg r = qir_RCP(c, x);
359
360 /* Apply a Newton-Raphson step to improve the accuracy. */
361 r = qir_FMUL(c, r, qir_FSUB(c,
362 qir_uniform_f(c, 2.0),
363 qir_FMUL(c, x, r)));
364
365 return r;
366 }
367
368 static struct qreg
369 tgsi_to_qir_rsq(struct vc4_compile *c,
370 struct tgsi_full_instruction *tgsi_inst,
371 enum qop op, struct qreg *src, int i)
372 {
373 struct qreg x = src[0 * 4 + 0];
374 struct qreg r = qir_RSQ(c, x);
375
376 /* Apply a Newton-Raphson step to improve the accuracy. */
377 r = qir_FMUL(c, r, qir_FSUB(c,
378 qir_uniform_f(c, 1.5),
379 qir_FMUL(c,
380 qir_uniform_f(c, 0.5),
381 qir_FMUL(c, x,
382 qir_FMUL(c, r, r)))));
383
384 return r;
385 }
386
387 static struct qreg
388 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
389 {
390 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
391 struct qreg high = qir_POW(c,
392 qir_FMUL(c,
393 qir_FADD(c,
394 srgb,
395 qir_uniform_f(c, 0.055)),
396 qir_uniform_f(c, 1.0 / 1.055)),
397 qir_uniform_f(c, 2.4));
398
399 qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045)));
400 return qir_SEL_X_Y_NS(c, low, high);
401 }
402
403 static struct qreg
404 qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
405 {
406 struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
407 struct qreg high = qir_FSUB(c,
408 qir_FMUL(c,
409 qir_uniform_f(c, 1.055),
410 qir_POW(c,
411 linear,
412 qir_uniform_f(c, 0.41666))),
413 qir_uniform_f(c, 0.055));
414
415 qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
416 return qir_SEL_X_Y_NS(c, low, high);
417 }
418
419 static struct qreg
420 tgsi_to_qir_umul(struct vc4_compile *c,
421 struct tgsi_full_instruction *tgsi_inst,
422 enum qop op, struct qreg *src, int i)
423 {
424 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
425 qir_uniform_ui(c, 16));
426 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
427 qir_uniform_ui(c, 0xffff));
428 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
429 qir_uniform_ui(c, 16));
430 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
431 qir_uniform_ui(c, 0xffff));
432
433 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
434 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
435 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
436
437 return qir_ADD(c, lolo, qir_SHL(c,
438 qir_ADD(c, hilo, lohi),
439 qir_uniform_ui(c, 16)));
440 }
441
442 static struct qreg
443 tgsi_to_qir_umad(struct vc4_compile *c,
444 struct tgsi_full_instruction *tgsi_inst,
445 enum qop op, struct qreg *src, int i)
446 {
447 return qir_ADD(c, tgsi_to_qir_umul(c, NULL, 0, src, i), src[2 * 4 + i]);
448 }
449
450 static struct qreg
451 tgsi_to_qir_idiv(struct vc4_compile *c,
452 struct tgsi_full_instruction *tgsi_inst,
453 enum qop op, struct qreg *src, int i)
454 {
455 return qir_FTOI(c, qir_FMUL(c,
456 qir_ITOF(c, src[0 * 4 + i]),
457 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
458 }
459
460 static struct qreg
461 tgsi_to_qir_ineg(struct vc4_compile *c,
462 struct tgsi_full_instruction *tgsi_inst,
463 enum qop op, struct qreg *src, int i)
464 {
465 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
466 }
467
468 static struct qreg
469 tgsi_to_qir_seq(struct vc4_compile *c,
470 struct tgsi_full_instruction *tgsi_inst,
471 enum qop op, struct qreg *src, int i)
472 {
473 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
474 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
475 }
476
477 static struct qreg
478 tgsi_to_qir_sne(struct vc4_compile *c,
479 struct tgsi_full_instruction *tgsi_inst,
480 enum qop op, struct qreg *src, int i)
481 {
482 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
483 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
484 }
485
486 static struct qreg
487 tgsi_to_qir_slt(struct vc4_compile *c,
488 struct tgsi_full_instruction *tgsi_inst,
489 enum qop op, struct qreg *src, int i)
490 {
491 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
492 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
493 }
494
495 static struct qreg
496 tgsi_to_qir_sge(struct vc4_compile *c,
497 struct tgsi_full_instruction *tgsi_inst,
498 enum qop op, struct qreg *src, int i)
499 {
500 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
501 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
502 }
503
504 static struct qreg
505 tgsi_to_qir_fseq(struct vc4_compile *c,
506 struct tgsi_full_instruction *tgsi_inst,
507 enum qop op, struct qreg *src, int i)
508 {
509 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
510 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
511 }
512
513 static struct qreg
514 tgsi_to_qir_fsne(struct vc4_compile *c,
515 struct tgsi_full_instruction *tgsi_inst,
516 enum qop op, struct qreg *src, int i)
517 {
518 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
519 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
520 }
521
522 static struct qreg
523 tgsi_to_qir_fslt(struct vc4_compile *c,
524 struct tgsi_full_instruction *tgsi_inst,
525 enum qop op, struct qreg *src, int i)
526 {
527 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
528 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
529 }
530
531 static struct qreg
532 tgsi_to_qir_fsge(struct vc4_compile *c,
533 struct tgsi_full_instruction *tgsi_inst,
534 enum qop op, struct qreg *src, int i)
535 {
536 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
537 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
538 }
539
540 static struct qreg
541 tgsi_to_qir_useq(struct vc4_compile *c,
542 struct tgsi_full_instruction *tgsi_inst,
543 enum qop op, struct qreg *src, int i)
544 {
545 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
546 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
547 }
548
549 static struct qreg
550 tgsi_to_qir_usne(struct vc4_compile *c,
551 struct tgsi_full_instruction *tgsi_inst,
552 enum qop op, struct qreg *src, int i)
553 {
554 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
555 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
556 }
557
558 static struct qreg
559 tgsi_to_qir_islt(struct vc4_compile *c,
560 struct tgsi_full_instruction *tgsi_inst,
561 enum qop op, struct qreg *src, int i)
562 {
563 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
564 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
565 }
566
567 static struct qreg
568 tgsi_to_qir_isge(struct vc4_compile *c,
569 struct tgsi_full_instruction *tgsi_inst,
570 enum qop op, struct qreg *src, int i)
571 {
572 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
573 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
574 }
575
576 static struct qreg
577 tgsi_to_qir_cmp(struct vc4_compile *c,
578 struct tgsi_full_instruction *tgsi_inst,
579 enum qop op, struct qreg *src, int i)
580 {
581 qir_SF(c, src[0 * 4 + i]);
582 return qir_SEL_X_Y_NS(c,
583 src[1 * 4 + i],
584 src[2 * 4 + i]);
585 }
586
587 static struct qreg
588 tgsi_to_qir_ucmp(struct vc4_compile *c,
589 struct tgsi_full_instruction *tgsi_inst,
590 enum qop op, struct qreg *src, int i)
591 {
592 qir_SF(c, src[0 * 4 + i]);
593 return qir_SEL_X_Y_ZC(c,
594 src[1 * 4 + i],
595 src[2 * 4 + i]);
596 }
597
598 static struct qreg
599 tgsi_to_qir_mad(struct vc4_compile *c,
600 struct tgsi_full_instruction *tgsi_inst,
601 enum qop op, struct qreg *src, int i)
602 {
603 return qir_FADD(c,
604 qir_FMUL(c,
605 src[0 * 4 + i],
606 src[1 * 4 + i]),
607 src[2 * 4 + i]);
608 }
609
610 static struct qreg
611 tgsi_to_qir_lrp(struct vc4_compile *c,
612 struct tgsi_full_instruction *tgsi_inst,
613 enum qop op, struct qreg *src, int i)
614 {
615 struct qreg src0 = src[0 * 4 + i];
616 struct qreg src1 = src[1 * 4 + i];
617 struct qreg src2 = src[2 * 4 + i];
618
619 /* LRP is:
620 * src0 * src1 + (1 - src0) * src2.
621 * -> src0 * src1 + src2 - src0 * src2
622 * -> src2 + src0 * (src1 - src2)
623 */
624 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
625
626 }
627
628 static void
629 tgsi_to_qir_tex(struct vc4_compile *c,
630 struct tgsi_full_instruction *tgsi_inst,
631 enum qop op, struct qreg *src)
632 {
633 assert(!tgsi_inst->Instruction.Saturate);
634
635 struct qreg s = src[0 * 4 + 0];
636 struct qreg t = src[0 * 4 + 1];
637 struct qreg r = src[0 * 4 + 2];
638 uint32_t unit = tgsi_inst->Src[1].Register.Index;
639 bool is_txl = tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL;
640
641 struct qreg proj = c->undef;
642 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
643 proj = qir_RCP(c, src[0 * 4 + 3]);
644 s = qir_FMUL(c, s, proj);
645 t = qir_FMUL(c, t, proj);
646 }
647
648 struct qreg texture_u[] = {
649 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
650 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
651 add_uniform(c, QUNIFORM_CONSTANT, 0),
652 add_uniform(c, QUNIFORM_CONSTANT, 0),
653 };
654 uint32_t next_texture_u = 0;
655
656 /* There is no native support for GL texture rectangle coordinates, so
657 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
658 * 1]).
659 */
660 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
661 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
662 s = qir_FMUL(c, s,
663 get_temp_for_uniform(c,
664 QUNIFORM_TEXRECT_SCALE_X,
665 unit));
666 t = qir_FMUL(c, t,
667 get_temp_for_uniform(c,
668 QUNIFORM_TEXRECT_SCALE_Y,
669 unit));
670 }
671
672 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
673 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
674 is_txl) {
675 texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2,
676 unit | (is_txl << 16));
677 }
678
679 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
680 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
681 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
682 struct qreg rcp_ma = qir_RCP(c, ma);
683 s = qir_FMUL(c, s, rcp_ma);
684 t = qir_FMUL(c, t, rcp_ma);
685 r = qir_FMUL(c, r, rcp_ma);
686
687 qir_TEX_R(c, r, texture_u[next_texture_u++]);
688 } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
689 c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
690 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
691 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
692 qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
693 texture_u[next_texture_u++]);
694 }
695
696 if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
697 s = qir_SAT(c, s);
698 }
699
700 if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
701 t = qir_SAT(c, t);
702 }
703
704 qir_TEX_T(c, t, texture_u[next_texture_u++]);
705
706 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
707 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL)
708 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
709
710 qir_TEX_S(c, s, texture_u[next_texture_u++]);
711
712 c->num_texture_samples++;
713 struct qreg r4 = qir_TEX_RESULT(c);
714
715 enum pipe_format format = c->key->tex[unit].format;
716
717 struct qreg unpacked[4];
718 if (util_format_is_depth_or_stencil(format)) {
719 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
720 qir_uniform_ui(c, 8)));
721 struct qreg normalized = qir_FMUL(c, depthf,
722 qir_uniform_f(c, 1.0f/0xffffff));
723
724 struct qreg depth_output;
725
726 struct qreg one = qir_uniform_f(c, 1.0f);
727 if (c->key->tex[unit].compare_mode) {
728 struct qreg compare = src[0 * 4 + 2];
729
730 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
731 compare = qir_FMUL(c, compare, proj);
732
733 switch (c->key->tex[unit].compare_func) {
734 case PIPE_FUNC_NEVER:
735 depth_output = qir_uniform_f(c, 0.0f);
736 break;
737 case PIPE_FUNC_ALWAYS:
738 depth_output = one;
739 break;
740 case PIPE_FUNC_EQUAL:
741 qir_SF(c, qir_FSUB(c, compare, normalized));
742 depth_output = qir_SEL_X_0_ZS(c, one);
743 break;
744 case PIPE_FUNC_NOTEQUAL:
745 qir_SF(c, qir_FSUB(c, compare, normalized));
746 depth_output = qir_SEL_X_0_ZC(c, one);
747 break;
748 case PIPE_FUNC_GREATER:
749 qir_SF(c, qir_FSUB(c, compare, normalized));
750 depth_output = qir_SEL_X_0_NC(c, one);
751 break;
752 case PIPE_FUNC_GEQUAL:
753 qir_SF(c, qir_FSUB(c, normalized, compare));
754 depth_output = qir_SEL_X_0_NS(c, one);
755 break;
756 case PIPE_FUNC_LESS:
757 qir_SF(c, qir_FSUB(c, compare, normalized));
758 depth_output = qir_SEL_X_0_NS(c, one);
759 break;
760 case PIPE_FUNC_LEQUAL:
761 qir_SF(c, qir_FSUB(c, normalized, compare));
762 depth_output = qir_SEL_X_0_NC(c, one);
763 break;
764 }
765 } else {
766 depth_output = normalized;
767 }
768
769 for (int i = 0; i < 4; i++)
770 unpacked[i] = depth_output;
771 } else {
772 for (int i = 0; i < 4; i++)
773 unpacked[i] = qir_R4_UNPACK(c, r4, i);
774 }
775
776 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
777 struct qreg texture_output[4];
778 for (int i = 0; i < 4; i++) {
779 texture_output[i] = get_swizzled_channel(c, unpacked,
780 format_swiz[i]);
781 }
782
783 if (util_format_is_srgb(format)) {
784 for (int i = 0; i < 3; i++)
785 texture_output[i] = qir_srgb_decode(c,
786 texture_output[i]);
787 }
788
789 for (int i = 0; i < 4; i++) {
790 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
791 continue;
792
793 update_dst(c, tgsi_inst, i,
794 get_swizzled_channel(c, texture_output,
795 c->key->tex[unit].swizzle[i]));
796 }
797 }
798
799 static struct qreg
800 tgsi_to_qir_trunc(struct vc4_compile *c,
801 struct tgsi_full_instruction *tgsi_inst,
802 enum qop op, struct qreg *src, int i)
803 {
804 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
805 }
806
807 /**
808 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
809 * to zero).
810 */
811 static struct qreg
812 tgsi_to_qir_frc(struct vc4_compile *c,
813 struct tgsi_full_instruction *tgsi_inst,
814 enum qop op, struct qreg *src, int i)
815 {
816 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
817 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
818 qir_SF(c, diff);
819 return qir_SEL_X_Y_NS(c,
820 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
821 diff);
822 }
823
824 /**
825 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
826 * zero).
827 */
828 static struct qreg
829 tgsi_to_qir_flr(struct vc4_compile *c,
830 struct tgsi_full_instruction *tgsi_inst,
831 enum qop op, struct qreg *src, int i)
832 {
833 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
834
835 /* This will be < 0 if we truncated and the truncation was of a value
836 * that was < 0 in the first place.
837 */
838 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
839
840 return qir_SEL_X_Y_NS(c,
841 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
842 trunc);
843 }
844
845 /**
846 * Computes ceil(x), which is tricky because our FTOI truncates (rounds to
847 * zero).
848 */
849 static struct qreg
850 tgsi_to_qir_ceil(struct vc4_compile *c,
851 struct tgsi_full_instruction *tgsi_inst,
852 enum qop op, struct qreg *src, int i)
853 {
854 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
855
856 /* This will be < 0 if we truncated and the truncation was of a value
857 * that was > 0 in the first place.
858 */
859 qir_SF(c, qir_FSUB(c, trunc, src[0 * 4 + i]));
860
861 return qir_SEL_X_Y_NS(c,
862 qir_FADD(c, trunc, qir_uniform_f(c, 1.0)),
863 trunc);
864 }
865
866 static struct qreg
867 tgsi_to_qir_abs(struct vc4_compile *c,
868 struct tgsi_full_instruction *tgsi_inst,
869 enum qop op, struct qreg *src, int i)
870 {
871 struct qreg arg = src[0 * 4 + i];
872 return qir_FMAXABS(c, arg, arg);
873 }
874
875 /* Note that this instruction replicates its result from the x channel */
876 static struct qreg
877 tgsi_to_qir_sin(struct vc4_compile *c,
878 struct tgsi_full_instruction *tgsi_inst,
879 enum qop op, struct qreg *src, int i)
880 {
881 float coeff[] = {
882 -2.0 * M_PI,
883 pow(2.0 * M_PI, 3) / (3 * 2 * 1),
884 -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
885 pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
886 -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
887 };
888
889 struct qreg scaled_x =
890 qir_FMUL(c,
891 src[0 * 4 + 0],
892 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
893
894 struct qreg x = qir_FADD(c,
895 tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0),
896 qir_uniform_f(c, -0.5));
897 struct qreg x2 = qir_FMUL(c, x, x);
898 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
899 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
900 x = qir_FMUL(c, x, x2);
901 sum = qir_FADD(c,
902 sum,
903 qir_FMUL(c,
904 x,
905 qir_uniform_f(c, coeff[i])));
906 }
907 return sum;
908 }
909
910 /* Note that this instruction replicates its result from the x channel */
911 static struct qreg
912 tgsi_to_qir_cos(struct vc4_compile *c,
913 struct tgsi_full_instruction *tgsi_inst,
914 enum qop op, struct qreg *src, int i)
915 {
916 float coeff[] = {
917 -1.0f,
918 pow(2.0 * M_PI, 2) / (2 * 1),
919 -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
920 pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
921 -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
922 pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
923 };
924
925 struct qreg scaled_x =
926 qir_FMUL(c, src[0 * 4 + 0],
927 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
928 struct qreg x_frac = qir_FADD(c,
929 tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0),
930 qir_uniform_f(c, -0.5));
931
932 struct qreg sum = qir_uniform_f(c, coeff[0]);
933 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
934 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
935 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
936 if (i != 1)
937 x = qir_FMUL(c, x, x2);
938
939 struct qreg mul = qir_FMUL(c,
940 x,
941 qir_uniform_f(c, coeff[i]));
942 if (i == 0)
943 sum = mul;
944 else
945 sum = qir_FADD(c, sum, mul);
946 }
947 return sum;
948 }
949
950 static struct qreg
951 tgsi_to_qir_clamp(struct vc4_compile *c,
952 struct tgsi_full_instruction *tgsi_inst,
953 enum qop op, struct qreg *src, int i)
954 {
955 return qir_FMAX(c, qir_FMIN(c,
956 src[0 * 4 + i],
957 src[2 * 4 + i]),
958 src[1 * 4 + i]);
959 }
960
961 static struct qreg
962 tgsi_to_qir_ssg(struct vc4_compile *c,
963 struct tgsi_full_instruction *tgsi_inst,
964 enum qop op, struct qreg *src, int i)
965 {
966 qir_SF(c, src[0 * 4 + i]);
967 return qir_SEL_X_Y_NC(c,
968 qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)),
969 qir_uniform_f(c, -1.0));
970 }
971
972 /* Compare to tgsi_to_qir_flr() for the floor logic. */
973 static struct qreg
974 tgsi_to_qir_arl(struct vc4_compile *c,
975 struct tgsi_full_instruction *tgsi_inst,
976 enum qop op, struct qreg *src, int i)
977 {
978 struct qreg trunc = qir_FTOI(c, src[0 * 4 + i]);
979 struct qreg scaled = qir_SHL(c, trunc, qir_uniform_ui(c, 4));
980
981 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], qir_ITOF(c, trunc)));
982
983 return qir_SEL_X_Y_NS(c, qir_SUB(c, scaled, qir_uniform_ui(c, 4)),
984 scaled);
985 }
986
987 static struct qreg
988 tgsi_to_qir_uarl(struct vc4_compile *c,
989 struct tgsi_full_instruction *tgsi_inst,
990 enum qop op, struct qreg *src, int i)
991 {
992 return qir_SHL(c, src[0 * 4 + i], qir_uniform_ui(c, 4));
993 }
994
995 static struct qreg
996 get_channel_from_vpm(struct vc4_compile *c,
997 struct qreg *vpm_reads,
998 uint8_t swiz,
999 const struct util_format_description *desc)
1000 {
1001 const struct util_format_channel_description *chan =
1002 &desc->channel[swiz];
1003 struct qreg temp;
1004
1005 if (swiz > UTIL_FORMAT_SWIZZLE_W)
1006 return get_swizzled_channel(c, vpm_reads, swiz);
1007 else if (chan->size == 32 &&
1008 chan->type == UTIL_FORMAT_TYPE_FLOAT) {
1009 return get_swizzled_channel(c, vpm_reads, swiz);
1010 } else if (chan->size == 32 &&
1011 chan->type == UTIL_FORMAT_TYPE_SIGNED) {
1012 if (chan->normalized) {
1013 return qir_FMUL(c,
1014 qir_ITOF(c, vpm_reads[swiz]),
1015 qir_uniform_f(c,
1016 1.0 / 0x7fffffff));
1017 } else {
1018 return qir_ITOF(c, vpm_reads[swiz]);
1019 }
1020 } else if (chan->size == 8 &&
1021 (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
1022 chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
1023 struct qreg vpm = vpm_reads[0];
1024 if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
1025 temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
1026 if (chan->normalized) {
1027 return qir_FSUB(c, qir_FMUL(c,
1028 qir_UNPACK_8_F(c, temp, swiz),
1029 qir_uniform_f(c, 2.0)),
1030 qir_uniform_f(c, 1.0));
1031 } else {
1032 return qir_FADD(c,
1033 qir_ITOF(c,
1034 qir_UNPACK_8_I(c, temp,
1035 swiz)),
1036 qir_uniform_f(c, -128.0));
1037 }
1038 } else {
1039 if (chan->normalized) {
1040 return qir_UNPACK_8_F(c, vpm, swiz);
1041 } else {
1042 return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
1043 }
1044 }
1045 } else if (chan->size == 16 &&
1046 (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
1047 chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
1048 struct qreg vpm = vpm_reads[swiz / 2];
1049
1050 /* Note that UNPACK_16F eats a half float, not ints, so we use
1051 * UNPACK_16_I for all of these.
1052 */
1053 if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
1054 temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
1055 if (chan->normalized) {
1056 return qir_FMUL(c, temp,
1057 qir_uniform_f(c, 1/32768.0f));
1058 } else {
1059 return temp;
1060 }
1061 } else {
1062 /* UNPACK_16I sign-extends, so we have to emit ANDs. */
1063 temp = vpm;
1064 if (swiz == 1 || swiz == 3)
1065 temp = qir_UNPACK_16_I(c, temp, 1);
1066 temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
1067 temp = qir_ITOF(c, temp);
1068
1069 if (chan->normalized) {
1070 return qir_FMUL(c, temp,
1071 qir_uniform_f(c, 1 / 65535.0));
1072 } else {
1073 return temp;
1074 }
1075 }
1076 } else {
1077 return c->undef;
1078 }
1079 }
1080
1081 static void
1082 emit_vertex_input(struct vc4_compile *c, int attr)
1083 {
1084 enum pipe_format format = c->vs_key->attr_formats[attr];
1085 uint32_t attr_size = util_format_get_blocksize(format);
1086 struct qreg vpm_reads[4];
1087
1088 c->vattr_sizes[attr] = align(attr_size, 4);
1089 for (int i = 0; i < align(attr_size, 4) / 4; i++) {
1090 struct qreg vpm = { QFILE_VPM, attr * 4 + i };
1091 vpm_reads[i] = qir_MOV(c, vpm);
1092 c->num_inputs++;
1093 }
1094
1095 bool format_warned = false;
1096 const struct util_format_description *desc =
1097 util_format_description(format);
1098
1099 for (int i = 0; i < 4; i++) {
1100 uint8_t swiz = desc->swizzle[i];
1101 struct qreg result = get_channel_from_vpm(c, vpm_reads,
1102 swiz, desc);
1103
1104 if (result.file == QFILE_NULL) {
1105 if (!format_warned) {
1106 fprintf(stderr,
1107 "vtx element %d unsupported type: %s\n",
1108 attr, util_format_name(format));
1109 format_warned = true;
1110 }
1111 result = qir_uniform_f(c, 0.0);
1112 }
1113 c->inputs[attr * 4 + i] = result;
1114 }
1115 }
1116
1117 static void
1118 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
1119 {
1120 if (c->discard.file == QFILE_NULL)
1121 c->discard = qir_uniform_f(c, 0.0);
1122 qir_SF(c, src[0 * 4 + i]);
1123 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
1124 c->discard);
1125 }
1126
1127 static void
1128 emit_fragcoord_input(struct vc4_compile *c, int attr)
1129 {
1130 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
1131 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
1132 c->inputs[attr * 4 + 2] =
1133 qir_FMUL(c,
1134 qir_ITOF(c, qir_FRAG_Z(c)),
1135 qir_uniform_f(c, 1.0 / 0xffffff));
1136 c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
1137 }
1138
1139 static void
1140 emit_point_coord_input(struct vc4_compile *c, int attr)
1141 {
1142 if (c->point_x.file == QFILE_NULL) {
1143 c->point_x = qir_uniform_f(c, 0.0);
1144 c->point_y = qir_uniform_f(c, 0.0);
1145 }
1146
1147 c->inputs[attr * 4 + 0] = c->point_x;
1148 if (c->fs_key->point_coord_upper_left) {
1149 c->inputs[attr * 4 + 1] = qir_FSUB(c,
1150 qir_uniform_f(c, 1.0),
1151 c->point_y);
1152 } else {
1153 c->inputs[attr * 4 + 1] = c->point_y;
1154 }
1155 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
1156 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
1157 }
1158
1159 static struct qreg
1160 emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
1161 uint8_t index, uint8_t swizzle)
1162 {
1163 uint32_t i = c->num_input_semantics++;
1164 struct qreg vary = {
1165 QFILE_VARY,
1166 i
1167 };
1168
1169 if (c->num_input_semantics >= c->input_semantics_array_size) {
1170 c->input_semantics_array_size =
1171 MAX2(4, c->input_semantics_array_size * 2);
1172
1173 c->input_semantics = reralloc(c, c->input_semantics,
1174 struct vc4_varying_semantic,
1175 c->input_semantics_array_size);
1176 }
1177
1178 c->input_semantics[i].semantic = semantic;
1179 c->input_semantics[i].index = index;
1180 c->input_semantics[i].swizzle = swizzle;
1181
1182 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
1183 }
1184
1185 static void
1186 emit_fragment_input(struct vc4_compile *c, int attr,
1187 struct tgsi_full_declaration *decl)
1188 {
1189 for (int i = 0; i < 4; i++) {
1190 c->inputs[attr * 4 + i] =
1191 emit_fragment_varying(c,
1192 decl->Semantic.Name,
1193 decl->Semantic.Index,
1194 i);
1195 c->num_inputs++;
1196 }
1197 }
1198
1199 static void
1200 emit_face_input(struct vc4_compile *c, int attr)
1201 {
1202 c->inputs[attr * 4 + 0] = qir_FSUB(c,
1203 qir_uniform_f(c, 1.0),
1204 qir_FMUL(c,
1205 qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
1206 qir_uniform_f(c, 2.0)));
1207 c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
1208 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
1209 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
1210 }
1211
1212 static void
1213 add_output(struct vc4_compile *c,
1214 uint32_t decl_offset,
1215 uint8_t semantic_name,
1216 uint8_t semantic_index,
1217 uint8_t semantic_swizzle)
1218 {
1219 uint32_t old_array_size = c->outputs_array_size;
1220 resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
1221 decl_offset + 1);
1222
1223 if (old_array_size != c->outputs_array_size) {
1224 c->output_semantics = reralloc(c,
1225 c->output_semantics,
1226 struct vc4_varying_semantic,
1227 c->outputs_array_size);
1228 }
1229
1230 c->output_semantics[decl_offset].semantic = semantic_name;
1231 c->output_semantics[decl_offset].index = semantic_index;
1232 c->output_semantics[decl_offset].swizzle = semantic_swizzle;
1233 }
1234
1235 static void
1236 add_array_info(struct vc4_compile *c, uint32_t array_id,
1237 uint32_t start, uint32_t size)
1238 {
1239 if (array_id >= c->ubo_ranges_array_size) {
1240 c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2,
1241 array_id + 1);
1242 c->ubo_ranges = reralloc(c, c->ubo_ranges,
1243 struct vc4_compiler_ubo_range,
1244 c->ubo_ranges_array_size);
1245 }
1246
1247 c->ubo_ranges[array_id].dst_offset = 0;
1248 c->ubo_ranges[array_id].src_offset = start;
1249 c->ubo_ranges[array_id].size = size;
1250 c->ubo_ranges[array_id].used = false;
1251 }
1252
1253 static void
1254 emit_tgsi_declaration(struct vc4_compile *c,
1255 struct tgsi_full_declaration *decl)
1256 {
1257 switch (decl->Declaration.File) {
1258 case TGSI_FILE_TEMPORARY: {
1259 uint32_t old_size = c->temps_array_size;
1260 resize_qreg_array(c, &c->temps, &c->temps_array_size,
1261 (decl->Range.Last + 1) * 4);
1262
1263 for (int i = old_size; i < c->temps_array_size; i++)
1264 c->temps[i] = qir_uniform_ui(c, 0);
1265 break;
1266 }
1267
1268 case TGSI_FILE_INPUT:
1269 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
1270 (decl->Range.Last + 1) * 4);
1271
1272 for (int i = decl->Range.First;
1273 i <= decl->Range.Last;
1274 i++) {
1275 if (c->stage == QSTAGE_FRAG) {
1276 if (decl->Semantic.Name ==
1277 TGSI_SEMANTIC_POSITION) {
1278 emit_fragcoord_input(c, i);
1279 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1280 emit_face_input(c, i);
1281 } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
1282 (c->fs_key->point_sprite_mask &
1283 (1 << decl->Semantic.Index))) {
1284 emit_point_coord_input(c, i);
1285 } else {
1286 emit_fragment_input(c, i, decl);
1287 }
1288 } else {
1289 emit_vertex_input(c, i);
1290 }
1291 }
1292 break;
1293
1294 case TGSI_FILE_OUTPUT: {
1295 for (int i = 0; i < 4; i++) {
1296 add_output(c,
1297 decl->Range.First * 4 + i,
1298 decl->Semantic.Name,
1299 decl->Semantic.Index,
1300 i);
1301 }
1302
1303 switch (decl->Semantic.Name) {
1304 case TGSI_SEMANTIC_POSITION:
1305 c->output_position_index = decl->Range.First * 4;
1306 break;
1307 case TGSI_SEMANTIC_CLIPVERTEX:
1308 c->output_clipvertex_index = decl->Range.First * 4;
1309 break;
1310 case TGSI_SEMANTIC_COLOR:
1311 c->output_color_index = decl->Range.First * 4;
1312 break;
1313 case TGSI_SEMANTIC_PSIZE:
1314 c->output_point_size_index = decl->Range.First * 4;
1315 break;
1316 }
1317
1318 break;
1319
1320 case TGSI_FILE_CONSTANT:
1321 add_array_info(c,
1322 decl->Array.ArrayID,
1323 decl->Range.First * 16,
1324 (decl->Range.Last -
1325 decl->Range.First + 1) * 16);
1326 break;
1327 }
1328 }
1329 }
1330
1331 static void
1332 emit_tgsi_instruction(struct vc4_compile *c,
1333 struct tgsi_full_instruction *tgsi_inst)
1334 {
1335 static const struct {
1336 enum qop op;
1337 struct qreg (*func)(struct vc4_compile *c,
1338 struct tgsi_full_instruction *tgsi_inst,
1339 enum qop op,
1340 struct qreg *src, int i);
1341 } op_trans[] = {
1342 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
1343 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
1344 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
1345 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
1346 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
1347 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
1348 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
1349 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
1350 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
1351 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
1352 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
1353 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
1354 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
1355 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
1356 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
1357 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
1358 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
1359 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
1360 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
1361
1362 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
1363 [TGSI_OPCODE_UMAD] = { 0, tgsi_to_qir_umad },
1364 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
1365 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
1366
1367 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
1368 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
1369 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
1370 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
1371 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
1372 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
1373 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
1374 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
1375 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
1376 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
1377 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
1378 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
1379
1380 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
1381 [TGSI_OPCODE_UCMP] = { 0, tgsi_to_qir_ucmp },
1382 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
1383 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
1384 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },
1385 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
1386 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
1387 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
1388 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
1389 [TGSI_OPCODE_CEIL] = { 0, tgsi_to_qir_ceil },
1390 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
1391 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
1392 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
1393 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
1394 [TGSI_OPCODE_CLAMP] = { 0, tgsi_to_qir_clamp },
1395 [TGSI_OPCODE_SSG] = { 0, tgsi_to_qir_ssg },
1396 [TGSI_OPCODE_ARL] = { 0, tgsi_to_qir_arl },
1397 [TGSI_OPCODE_UARL] = { 0, tgsi_to_qir_uarl },
1398 };
1399 static int asdf = 0;
1400 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
1401
1402 if (tgsi_op == TGSI_OPCODE_END)
1403 return;
1404
1405 struct qreg src_regs[12];
1406 for (int s = 0; s < 3; s++) {
1407 for (int i = 0; i < 4; i++) {
1408 src_regs[4 * s + i] =
1409 get_src(c, tgsi_inst->Instruction.Opcode,
1410 &tgsi_inst->Src[s], i);
1411 }
1412 }
1413
1414 switch (tgsi_op) {
1415 case TGSI_OPCODE_TEX:
1416 case TGSI_OPCODE_TXP:
1417 case TGSI_OPCODE_TXB:
1418 case TGSI_OPCODE_TXL:
1419 tgsi_to_qir_tex(c, tgsi_inst,
1420 op_trans[tgsi_op].op, src_regs);
1421 return;
1422 case TGSI_OPCODE_KILL:
1423 c->discard = qir_uniform_f(c, 1.0);
1424 return;
1425 case TGSI_OPCODE_KILL_IF:
1426 for (int i = 0; i < 4; i++)
1427 tgsi_to_qir_kill_if(c, src_regs, i);
1428 return;
1429 default:
1430 break;
1431 }
1432
1433 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
1434 fprintf(stderr, "unknown tgsi inst: ");
1435 tgsi_dump_instruction(tgsi_inst, asdf++);
1436 fprintf(stderr, "\n");
1437 abort();
1438 }
1439
1440 for (int i = 0; i < 4; i++) {
1441 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
1442 continue;
1443
1444 struct qreg result;
1445
1446 result = op_trans[tgsi_op].func(c, tgsi_inst,
1447 op_trans[tgsi_op].op,
1448 src_regs, i);
1449
1450 if (tgsi_inst->Instruction.Saturate) {
1451 float low = (tgsi_inst->Instruction.Saturate ==
1452 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
1453 result = qir_FMAX(c,
1454 qir_FMIN(c,
1455 result,
1456 qir_uniform_f(c, 1.0)),
1457 qir_uniform_f(c, low));
1458 }
1459
1460 update_dst(c, tgsi_inst, i, result);
1461 }
1462 }
1463
1464 static void
1465 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
1466 {
1467 for (int i = 0; i < 4; i++) {
1468 unsigned n = c->num_consts++;
1469 resize_qreg_array(c, &c->consts, &c->consts_array_size, n + 1);
1470 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1471 }
1472 }
1473
1474 static struct qreg
1475 vc4_blend_channel(struct vc4_compile *c,
1476 struct qreg *dst,
1477 struct qreg *src,
1478 struct qreg val,
1479 unsigned factor,
1480 int channel)
1481 {
1482 switch(factor) {
1483 case PIPE_BLENDFACTOR_ONE:
1484 return val;
1485 case PIPE_BLENDFACTOR_SRC_COLOR:
1486 return qir_FMUL(c, val, src[channel]);
1487 case PIPE_BLENDFACTOR_SRC_ALPHA:
1488 return qir_FMUL(c, val, src[3]);
1489 case PIPE_BLENDFACTOR_DST_ALPHA:
1490 return qir_FMUL(c, val, dst[3]);
1491 case PIPE_BLENDFACTOR_DST_COLOR:
1492 return qir_FMUL(c, val, dst[channel]);
1493 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1494 if (channel != 3) {
1495 return qir_FMUL(c,
1496 val,
1497 qir_FMIN(c,
1498 src[3],
1499 qir_FSUB(c,
1500 qir_uniform_f(c, 1.0),
1501 dst[3])));
1502 } else {
1503 return val;
1504 }
1505 case PIPE_BLENDFACTOR_CONST_COLOR:
1506 return qir_FMUL(c, val,
1507 get_temp_for_uniform(c,
1508 QUNIFORM_BLEND_CONST_COLOR,
1509 channel));
1510 case PIPE_BLENDFACTOR_CONST_ALPHA:
1511 return qir_FMUL(c, val,
1512 get_temp_for_uniform(c,
1513 QUNIFORM_BLEND_CONST_COLOR,
1514 3));
1515 case PIPE_BLENDFACTOR_ZERO:
1516 return qir_uniform_f(c, 0.0);
1517 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1518 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1519 src[channel]));
1520 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1521 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1522 src[3]));
1523 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1524 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1525 dst[3]));
1526 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1527 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1528 dst[channel]));
1529 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1530 return qir_FMUL(c, val,
1531 qir_FSUB(c, qir_uniform_f(c, 1.0),
1532 get_temp_for_uniform(c,
1533 QUNIFORM_BLEND_CONST_COLOR,
1534 channel)));
1535 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1536 return qir_FMUL(c, val,
1537 qir_FSUB(c, qir_uniform_f(c, 1.0),
1538 get_temp_for_uniform(c,
1539 QUNIFORM_BLEND_CONST_COLOR,
1540 3)));
1541
1542 default:
1543 case PIPE_BLENDFACTOR_SRC1_COLOR:
1544 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1545 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1546 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1547 /* Unsupported. */
1548 fprintf(stderr, "Unknown blend factor %d\n", factor);
1549 return val;
1550 }
1551 }
1552
1553 static struct qreg
1554 vc4_blend_func(struct vc4_compile *c,
1555 struct qreg src, struct qreg dst,
1556 unsigned func)
1557 {
1558 switch (func) {
1559 case PIPE_BLEND_ADD:
1560 return qir_FADD(c, src, dst);
1561 case PIPE_BLEND_SUBTRACT:
1562 return qir_FSUB(c, src, dst);
1563 case PIPE_BLEND_REVERSE_SUBTRACT:
1564 return qir_FSUB(c, dst, src);
1565 case PIPE_BLEND_MIN:
1566 return qir_FMIN(c, src, dst);
1567 case PIPE_BLEND_MAX:
1568 return qir_FMAX(c, src, dst);
1569
1570 default:
1571 /* Unsupported. */
1572 fprintf(stderr, "Unknown blend func %d\n", func);
1573 return src;
1574
1575 }
1576 }
1577
1578 /**
1579 * Implements fixed function blending in shader code.
1580 *
1581 * VC4 doesn't have any hardware support for blending. Instead, you read the
1582 * current contents of the destination from the tile buffer after having
1583 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1584 * math using your output color and that destination value, and update the
1585 * output color appropriately.
1586 */
1587 static void
1588 vc4_blend(struct vc4_compile *c, struct qreg *result,
1589 struct qreg *dst_color, struct qreg *src_color)
1590 {
1591 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1592
1593 if (!blend->blend_enable) {
1594 for (int i = 0; i < 4; i++)
1595 result[i] = src_color[i];
1596 return;
1597 }
1598
1599 struct qreg clamped_src[4];
1600 struct qreg clamped_dst[4];
1601 for (int i = 0; i < 4; i++) {
1602 clamped_src[i] = qir_SAT(c, src_color[i]);
1603 clamped_dst[i] = qir_SAT(c, dst_color[i]);
1604 }
1605 src_color = clamped_src;
1606 dst_color = clamped_dst;
1607
1608 struct qreg src_blend[4], dst_blend[4];
1609 for (int i = 0; i < 3; i++) {
1610 src_blend[i] = vc4_blend_channel(c,
1611 dst_color, src_color,
1612 src_color[i],
1613 blend->rgb_src_factor, i);
1614 dst_blend[i] = vc4_blend_channel(c,
1615 dst_color, src_color,
1616 dst_color[i],
1617 blend->rgb_dst_factor, i);
1618 }
1619 src_blend[3] = vc4_blend_channel(c,
1620 dst_color, src_color,
1621 src_color[3],
1622 blend->alpha_src_factor, 3);
1623 dst_blend[3] = vc4_blend_channel(c,
1624 dst_color, src_color,
1625 dst_color[3],
1626 blend->alpha_dst_factor, 3);
1627
1628 for (int i = 0; i < 3; i++) {
1629 result[i] = vc4_blend_func(c,
1630 src_blend[i], dst_blend[i],
1631 blend->rgb_func);
1632 }
1633 result[3] = vc4_blend_func(c,
1634 src_blend[3], dst_blend[3],
1635 blend->alpha_func);
1636 }
1637
1638 static void
1639 clip_distance_discard(struct vc4_compile *c)
1640 {
1641 for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) {
1642 if (!(c->key->ucp_enables & (1 << i)))
1643 continue;
1644
1645 struct qreg dist = emit_fragment_varying(c,
1646 TGSI_SEMANTIC_CLIPDIST,
1647 i,
1648 TGSI_SWIZZLE_X);
1649
1650 qir_SF(c, dist);
1651
1652 if (c->discard.file == QFILE_NULL)
1653 c->discard = qir_uniform_f(c, 0.0);
1654
1655 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
1656 c->discard);
1657 }
1658 }
1659
1660 static void
1661 alpha_test_discard(struct vc4_compile *c)
1662 {
1663 struct qreg src_alpha;
1664 struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
1665
1666 if (!c->fs_key->alpha_test)
1667 return;
1668
1669 if (c->output_color_index != -1)
1670 src_alpha = c->outputs[c->output_color_index + 3];
1671 else
1672 src_alpha = qir_uniform_f(c, 1.0);
1673
1674 if (c->discard.file == QFILE_NULL)
1675 c->discard = qir_uniform_f(c, 0.0);
1676
1677 switch (c->fs_key->alpha_test_func) {
1678 case PIPE_FUNC_NEVER:
1679 c->discard = qir_uniform_f(c, 1.0);
1680 break;
1681 case PIPE_FUNC_ALWAYS:
1682 break;
1683 case PIPE_FUNC_EQUAL:
1684 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1685 c->discard = qir_SEL_X_Y_ZS(c, c->discard,
1686 qir_uniform_f(c, 1.0));
1687 break;
1688 case PIPE_FUNC_NOTEQUAL:
1689 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1690 c->discard = qir_SEL_X_Y_ZC(c, c->discard,
1691 qir_uniform_f(c, 1.0));
1692 break;
1693 case PIPE_FUNC_GREATER:
1694 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1695 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1696 qir_uniform_f(c, 1.0));
1697 break;
1698 case PIPE_FUNC_GEQUAL:
1699 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1700 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1701 qir_uniform_f(c, 1.0));
1702 break;
1703 case PIPE_FUNC_LESS:
1704 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1705 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1706 qir_uniform_f(c, 1.0));
1707 break;
1708 case PIPE_FUNC_LEQUAL:
1709 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1710 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1711 qir_uniform_f(c, 1.0));
1712 break;
1713 }
1714 }
1715
1716 static struct qreg
1717 vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
1718 {
1719 switch (c->fs_key->logicop_func) {
1720 case PIPE_LOGICOP_CLEAR:
1721 return qir_uniform_f(c, 0.0);
1722 case PIPE_LOGICOP_NOR:
1723 return qir_NOT(c, qir_OR(c, src, dst));
1724 case PIPE_LOGICOP_AND_INVERTED:
1725 return qir_AND(c, qir_NOT(c, src), dst);
1726 case PIPE_LOGICOP_COPY_INVERTED:
1727 return qir_NOT(c, src);
1728 case PIPE_LOGICOP_AND_REVERSE:
1729 return qir_AND(c, src, qir_NOT(c, dst));
1730 case PIPE_LOGICOP_INVERT:
1731 return qir_NOT(c, dst);
1732 case PIPE_LOGICOP_XOR:
1733 return qir_XOR(c, src, dst);
1734 case PIPE_LOGICOP_NAND:
1735 return qir_NOT(c, qir_AND(c, src, dst));
1736 case PIPE_LOGICOP_AND:
1737 return qir_AND(c, src, dst);
1738 case PIPE_LOGICOP_EQUIV:
1739 return qir_NOT(c, qir_XOR(c, src, dst));
1740 case PIPE_LOGICOP_NOOP:
1741 return dst;
1742 case PIPE_LOGICOP_OR_INVERTED:
1743 return qir_OR(c, qir_NOT(c, src), dst);
1744 case PIPE_LOGICOP_OR_REVERSE:
1745 return qir_OR(c, src, qir_NOT(c, dst));
1746 case PIPE_LOGICOP_OR:
1747 return qir_OR(c, src, dst);
1748 case PIPE_LOGICOP_SET:
1749 return qir_uniform_ui(c, ~0);
1750 case PIPE_LOGICOP_COPY:
1751 default:
1752 return src;
1753 }
1754 }
1755
1756 static void
1757 emit_frag_end(struct vc4_compile *c)
1758 {
1759 clip_distance_discard(c);
1760 alpha_test_discard(c);
1761
1762 enum pipe_format color_format = c->fs_key->color_format;
1763 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1764 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1765 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1766 struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1767 struct qreg packed_dst_color = c->undef;
1768
1769 if (c->fs_key->blend.blend_enable ||
1770 c->fs_key->blend.colormask != 0xf ||
1771 c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
1772 struct qreg r4 = qir_TLB_COLOR_READ(c);
1773 for (int i = 0; i < 4; i++)
1774 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1775 for (int i = 0; i < 4; i++) {
1776 dst_color[i] = get_swizzled_channel(c,
1777 tlb_read_color,
1778 format_swiz[i]);
1779 if (util_format_is_srgb(color_format) && i != 3) {
1780 linear_dst_color[i] =
1781 qir_srgb_decode(c, dst_color[i]);
1782 } else {
1783 linear_dst_color[i] = dst_color[i];
1784 }
1785 }
1786
1787 /* Save the packed value for logic ops. Can't reuse r4
1788 * becuase other things might smash it (like sRGB)
1789 */
1790 packed_dst_color = qir_MOV(c, r4);
1791 }
1792
1793 struct qreg blend_color[4];
1794 struct qreg undef_array[4] = {
1795 c->undef, c->undef, c->undef, c->undef
1796 };
1797 vc4_blend(c, blend_color, linear_dst_color,
1798 (c->output_color_index != -1 ?
1799 c->outputs + c->output_color_index :
1800 undef_array));
1801
1802 if (util_format_is_srgb(color_format)) {
1803 for (int i = 0; i < 3; i++)
1804 blend_color[i] = qir_srgb_encode(c, blend_color[i]);
1805 }
1806
1807 /* Debug: Sometimes you're getting a black output and just want to see
1808 * if the FS is getting executed at all. Spam magenta into the color
1809 * output.
1810 */
1811 if (0) {
1812 blend_color[0] = qir_uniform_f(c, 1.0);
1813 blend_color[1] = qir_uniform_f(c, 0.0);
1814 blend_color[2] = qir_uniform_f(c, 1.0);
1815 blend_color[3] = qir_uniform_f(c, 0.5);
1816 }
1817
1818 struct qreg swizzled_outputs[4];
1819 for (int i = 0; i < 4; i++) {
1820 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1821 format_swiz[i]);
1822 }
1823
1824 if (c->discard.file != QFILE_NULL)
1825 qir_TLB_DISCARD_SETUP(c, c->discard);
1826
1827 if (c->fs_key->stencil_enabled) {
1828 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
1829 if (c->fs_key->stencil_twoside) {
1830 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
1831 }
1832 if (c->fs_key->stencil_full_writemasks) {
1833 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
1834 }
1835 }
1836
1837 if (c->fs_key->depth_enabled) {
1838 struct qreg z;
1839 if (c->output_position_index != -1) {
1840 z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
1841 qir_uniform_f(c, 0xffffff)));
1842 } else {
1843 z = qir_FRAG_Z(c);
1844 }
1845 qir_TLB_Z_WRITE(c, z);
1846 }
1847
1848 struct qreg packed_color = c->undef;
1849 for (int i = 0; i < 4; i++) {
1850 if (swizzled_outputs[i].file == QFILE_NULL)
1851 continue;
1852 if (packed_color.file == QFILE_NULL) {
1853 packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
1854 } else {
1855 packed_color = qir_PACK_8_F(c,
1856 packed_color,
1857 swizzled_outputs[i],
1858 i);
1859 }
1860 }
1861
1862 if (packed_color.file == QFILE_NULL)
1863 packed_color = qir_uniform_ui(c, 0);
1864
1865 if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
1866 packed_color = vc4_logicop(c, packed_color, packed_dst_color);
1867 }
1868
1869 /* If the bit isn't set in the color mask, then just return the
1870 * original dst color, instead.
1871 */
1872 uint32_t colormask = 0xffffffff;
1873 for (int i = 0; i < 4; i++) {
1874 if (format_swiz[i] < 4 &&
1875 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
1876 colormask &= ~(0xff << (i * 8));
1877 }
1878 }
1879 if (colormask != 0xffffffff) {
1880 packed_color = qir_OR(c,
1881 qir_AND(c, packed_color,
1882 qir_uniform_ui(c, colormask)),
1883 qir_AND(c, packed_dst_color,
1884 qir_uniform_ui(c, ~colormask)));
1885 }
1886
1887 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1888 packed_color, c->undef));
1889 }
1890
1891 static void
1892 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1893 {
1894 struct qreg xyi[2];
1895
1896 for (int i = 0; i < 2; i++) {
1897 struct qreg scale =
1898 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1899
1900 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1901 qir_FMUL(c,
1902 c->outputs[c->output_position_index + i],
1903 scale),
1904 rcp_w));
1905 }
1906
1907 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1908 }
1909
1910 static void
1911 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1912 {
1913 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1914 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1915
1916 qir_VPM_WRITE(c, qir_FADD(c, qir_FMUL(c, qir_FMUL(c,
1917 c->outputs[c->output_position_index + 2],
1918 zscale),
1919 rcp_w),
1920 zoffset));
1921 }
1922
1923 static void
1924 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1925 {
1926 qir_VPM_WRITE(c, rcp_w);
1927 }
1928
1929 static void
1930 emit_point_size_write(struct vc4_compile *c)
1931 {
1932 struct qreg point_size;
1933
1934 if (c->output_point_size_index != -1)
1935 point_size = c->outputs[c->output_point_size_index + 3];
1936 else
1937 point_size = qir_uniform_f(c, 1.0);
1938
1939 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
1940 * BCM21553).
1941 */
1942 point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
1943
1944 qir_VPM_WRITE(c, point_size);
1945 }
1946
1947 /**
1948 * Emits a VPM read of the stub vertex attribute set up by vc4_draw.c.
1949 *
1950 * The simulator insists that there be at least one vertex attribute, so
1951 * vc4_draw.c will emit one if it wouldn't have otherwise. The simulator also
1952 * insists that all vertex attributes loaded get read by the VS/CS, so we have
1953 * to consume it here.
1954 */
1955 static void
1956 emit_stub_vpm_read(struct vc4_compile *c)
1957 {
1958 if (c->num_inputs)
1959 return;
1960
1961 c->vattr_sizes[0] = 4;
1962 struct qreg vpm = { QFILE_VPM, 0 };
1963 (void)qir_MOV(c, vpm);
1964 c->num_inputs++;
1965 }
1966
1967 static void
1968 emit_ucp_clipdistance(struct vc4_compile *c)
1969 {
1970 unsigned cv;
1971 if (c->output_clipvertex_index != -1)
1972 cv = c->output_clipvertex_index;
1973 else if (c->output_position_index != -1)
1974 cv = c->output_position_index;
1975 else
1976 return;
1977
1978 for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) {
1979 if (!(c->key->ucp_enables & (1 << plane)))
1980 continue;
1981
1982 /* Pick the next outputs[] that hasn't been written to, since
1983 * there are no other program writes left to be processed at
1984 * this point. If something had been declared but not written
1985 * (like a w component), we'll just smash over the top of it.
1986 */
1987 uint32_t output_index = c->num_outputs++;
1988 add_output(c, output_index,
1989 TGSI_SEMANTIC_CLIPDIST,
1990 plane,
1991 TGSI_SWIZZLE_X);
1992
1993
1994 struct qreg dist = qir_uniform_f(c, 0.0);
1995 for (int i = 0; i < 4; i++) {
1996 struct qreg pos_chan = c->outputs[cv + i];
1997 struct qreg ucp =
1998 add_uniform(c, QUNIFORM_USER_CLIP_PLANE,
1999 plane * 4 + i);
2000 dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
2001 }
2002
2003 c->outputs[output_index] = dist;
2004 }
2005 }
2006
2007 static void
2008 emit_vert_end(struct vc4_compile *c,
2009 struct vc4_varying_semantic *fs_inputs,
2010 uint32_t num_fs_inputs)
2011 {
2012 struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
2013
2014 emit_stub_vpm_read(c);
2015 emit_ucp_clipdistance(c);
2016
2017 emit_scaled_viewport_write(c, rcp_w);
2018 emit_zs_write(c, rcp_w);
2019 emit_rcp_wc_write(c, rcp_w);
2020 if (c->vs_key->per_vertex_point_size)
2021 emit_point_size_write(c);
2022
2023 for (int i = 0; i < num_fs_inputs; i++) {
2024 struct vc4_varying_semantic *input = &fs_inputs[i];
2025 int j;
2026
2027 for (j = 0; j < c->num_outputs; j++) {
2028 struct vc4_varying_semantic *output =
2029 &c->output_semantics[j];
2030
2031 if (input->semantic == output->semantic &&
2032 input->index == output->index &&
2033 input->swizzle == output->swizzle) {
2034 qir_VPM_WRITE(c, c->outputs[j]);
2035 break;
2036 }
2037 }
2038 /* Emit padding if we didn't find a declared VS output for
2039 * this FS input.
2040 */
2041 if (j == c->num_outputs)
2042 qir_VPM_WRITE(c, qir_uniform_f(c, 0.0));
2043 }
2044 }
2045
2046 static void
2047 emit_coord_end(struct vc4_compile *c)
2048 {
2049 struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
2050
2051 emit_stub_vpm_read(c);
2052
2053 for (int i = 0; i < 4; i++)
2054 qir_VPM_WRITE(c, c->outputs[c->output_position_index + i]);
2055
2056 emit_scaled_viewport_write(c, rcp_w);
2057 emit_zs_write(c, rcp_w);
2058 emit_rcp_wc_write(c, rcp_w);
2059 if (c->vs_key->per_vertex_point_size)
2060 emit_point_size_write(c);
2061 }
2062
2063 static struct vc4_compile *
2064 vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
2065 struct vc4_key *key)
2066 {
2067 struct vc4_compile *c = qir_compile_init();
2068 int ret;
2069
2070 c->stage = stage;
2071 for (int i = 0; i < 4; i++)
2072 c->addr[i] = qir_uniform_f(c, 0.0);
2073
2074 c->shader_state = &key->shader_state->base;
2075 c->program_id = key->shader_state->program_id;
2076 c->variant_id = key->shader_state->compiled_variant_count++;
2077
2078 c->key = key;
2079 switch (stage) {
2080 case QSTAGE_FRAG:
2081 c->fs_key = (struct vc4_fs_key *)key;
2082 if (c->fs_key->is_points) {
2083 c->point_x = emit_fragment_varying(c, ~0, ~0, 0);
2084 c->point_y = emit_fragment_varying(c, ~0, ~0, 0);
2085 } else if (c->fs_key->is_lines) {
2086 c->line_x = emit_fragment_varying(c, ~0, ~0, 0);
2087 }
2088 break;
2089 case QSTAGE_VERT:
2090 c->vs_key = (struct vc4_vs_key *)key;
2091 break;
2092 case QSTAGE_COORD:
2093 c->vs_key = (struct vc4_vs_key *)key;
2094 break;
2095 }
2096
2097 const struct tgsi_token *tokens = key->shader_state->base.tokens;
2098 if (c->fs_key && c->fs_key->light_twoside) {
2099 if (!key->shader_state->twoside_tokens) {
2100 const struct tgsi_lowering_config lowering_config = {
2101 .color_two_side = true,
2102 };
2103 struct tgsi_shader_info info;
2104 key->shader_state->twoside_tokens =
2105 tgsi_transform_lowering(&lowering_config,
2106 key->shader_state->base.tokens,
2107 &info);
2108
2109 /* If no transformation occurred, then NULL is
2110 * returned and we just use our original tokens.
2111 */
2112 if (!key->shader_state->twoside_tokens) {
2113 key->shader_state->twoside_tokens =
2114 key->shader_state->base.tokens;
2115 }
2116 }
2117 tokens = key->shader_state->twoside_tokens;
2118 }
2119
2120 ret = tgsi_parse_init(&c->parser, tokens);
2121 assert(ret == TGSI_PARSE_OK);
2122
2123 if (vc4_debug & VC4_DEBUG_TGSI) {
2124 fprintf(stderr, "%s prog %d/%d TGSI:\n",
2125 qir_get_stage_name(c->stage),
2126 c->program_id, c->variant_id);
2127 tgsi_dump(tokens, 0);
2128 }
2129
2130 while (!tgsi_parse_end_of_tokens(&c->parser)) {
2131 tgsi_parse_token(&c->parser);
2132
2133 switch (c->parser.FullToken.Token.Type) {
2134 case TGSI_TOKEN_TYPE_DECLARATION:
2135 emit_tgsi_declaration(c,
2136 &c->parser.FullToken.FullDeclaration);
2137 break;
2138
2139 case TGSI_TOKEN_TYPE_INSTRUCTION:
2140 emit_tgsi_instruction(c,
2141 &c->parser.FullToken.FullInstruction);
2142 break;
2143
2144 case TGSI_TOKEN_TYPE_IMMEDIATE:
2145 parse_tgsi_immediate(c,
2146 &c->parser.FullToken.FullImmediate);
2147 break;
2148 }
2149 }
2150
2151 switch (stage) {
2152 case QSTAGE_FRAG:
2153 emit_frag_end(c);
2154 break;
2155 case QSTAGE_VERT:
2156 emit_vert_end(c,
2157 vc4->prog.fs->input_semantics,
2158 vc4->prog.fs->num_inputs);
2159 break;
2160 case QSTAGE_COORD:
2161 emit_coord_end(c);
2162 break;
2163 }
2164
2165 tgsi_parse_free(&c->parser);
2166 if (vc4_debug & VC4_DEBUG_QIR) {
2167 fprintf(stderr, "%s prog %d/%d pre-opt QIR:\n",
2168 qir_get_stage_name(c->stage),
2169 c->program_id, c->variant_id);
2170 qir_dump(c);
2171 }
2172
2173 qir_optimize(c);
2174
2175 if (vc4_debug & VC4_DEBUG_QIR) {
2176 fprintf(stderr, "%s prog %d/%d QIR:\n",
2177 qir_get_stage_name(c->stage),
2178 c->program_id, c->variant_id);
2179 qir_dump(c);
2180 }
2181 qir_reorder_uniforms(c);
2182 vc4_generate_code(vc4, c);
2183
2184 if (vc4_debug & VC4_DEBUG_SHADERDB) {
2185 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
2186 qir_get_stage_name(c->stage),
2187 c->program_id, c->variant_id,
2188 c->qpu_inst_count);
2189 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d uniforms\n",
2190 qir_get_stage_name(c->stage),
2191 c->program_id, c->variant_id,
2192 c->num_uniforms);
2193 }
2194
2195 return c;
2196 }
2197
2198 static void *
2199 vc4_shader_state_create(struct pipe_context *pctx,
2200 const struct pipe_shader_state *cso)
2201 {
2202 struct vc4_context *vc4 = vc4_context(pctx);
2203 struct vc4_uncompiled_shader *so = CALLOC_STRUCT(vc4_uncompiled_shader);
2204 if (!so)
2205 return NULL;
2206
2207 const struct tgsi_lowering_config lowering_config = {
2208 .lower_DST = true,
2209 .lower_XPD = true,
2210 .lower_SCS = true,
2211 .lower_POW = true,
2212 .lower_LIT = true,
2213 .lower_EXP = true,
2214 .lower_LOG = true,
2215 .lower_DP4 = true,
2216 .lower_DP3 = true,
2217 .lower_DPH = true,
2218 .lower_DP2 = true,
2219 .lower_DP2A = true,
2220 };
2221
2222 struct tgsi_shader_info info;
2223 so->base.tokens = tgsi_transform_lowering(&lowering_config, cso->tokens, &info);
2224 if (!so->base.tokens)
2225 so->base.tokens = tgsi_dup_tokens(cso->tokens);
2226 so->program_id = vc4->next_uncompiled_program_id++;
2227
2228 return so;
2229 }
2230
2231 static void
2232 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
2233 struct vc4_compile *c)
2234 {
2235 int count = c->num_uniforms;
2236 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
2237
2238 uinfo->count = count;
2239 uinfo->data = ralloc_array(shader, uint32_t, count);
2240 memcpy(uinfo->data, c->uniform_data,
2241 count * sizeof(*uinfo->data));
2242 uinfo->contents = ralloc_array(shader, enum quniform_contents, count);
2243 memcpy(uinfo->contents, c->uniform_contents,
2244 count * sizeof(*uinfo->contents));
2245 uinfo->num_texture_samples = c->num_texture_samples;
2246 }
2247
2248 static struct vc4_compiled_shader *
2249 vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
2250 struct vc4_key *key)
2251 {
2252 struct hash_table *ht;
2253 uint32_t key_size;
2254 if (stage == QSTAGE_FRAG) {
2255 ht = vc4->fs_cache;
2256 key_size = sizeof(struct vc4_fs_key);
2257 } else {
2258 ht = vc4->vs_cache;
2259 key_size = sizeof(struct vc4_vs_key);
2260 }
2261
2262 struct vc4_compiled_shader *shader;
2263 struct hash_entry *entry = _mesa_hash_table_search(ht, key);
2264 if (entry)
2265 return entry->data;
2266
2267 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, stage, key);
2268 shader = rzalloc(NULL, struct vc4_compiled_shader);
2269
2270 shader->program_id = vc4->next_compiled_program_id++;
2271 if (stage == QSTAGE_FRAG) {
2272 bool input_live[c->num_input_semantics];
2273 struct simple_node *node;
2274
2275 memset(input_live, 0, sizeof(input_live));
2276 foreach(node, &c->instructions) {
2277 struct qinst *inst = (struct qinst *)node;
2278 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
2279 if (inst->src[i].file == QFILE_VARY)
2280 input_live[inst->src[i].index] = true;
2281 }
2282 }
2283
2284 shader->input_semantics = ralloc_array(shader,
2285 struct vc4_varying_semantic,
2286 c->num_input_semantics);
2287
2288 for (int i = 0; i < c->num_input_semantics; i++) {
2289 struct vc4_varying_semantic *sem = &c->input_semantics[i];
2290
2291 if (!input_live[i])
2292 continue;
2293
2294 /* Skip non-VS-output inputs. */
2295 if (sem->semantic == (uint8_t)~0)
2296 continue;
2297
2298 if (sem->semantic == TGSI_SEMANTIC_COLOR ||
2299 sem->semantic == TGSI_SEMANTIC_BCOLOR) {
2300 shader->color_inputs |= (1 << shader->num_inputs);
2301 }
2302
2303 shader->input_semantics[shader->num_inputs] = *sem;
2304 shader->num_inputs++;
2305 }
2306 } else {
2307 shader->num_inputs = c->num_inputs;
2308
2309 shader->vattr_offsets[0] = 0;
2310 for (int i = 0; i < 8; i++) {
2311 shader->vattr_offsets[i + 1] =
2312 shader->vattr_offsets[i] + c->vattr_sizes[i];
2313
2314 if (c->vattr_sizes[i])
2315 shader->vattrs_live |= (1 << i);
2316 }
2317 }
2318
2319 copy_uniform_state_to_shader(shader, c);
2320 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
2321 c->qpu_inst_count * sizeof(uint64_t),
2322 "code");
2323
2324 /* Copy the compiler UBO range state to the compiled shader, dropping
2325 * out arrays that were never referenced by an indirect load.
2326 *
2327 * (Note that QIR dead code elimination of an array access still
2328 * leaves that array alive, though)
2329 */
2330 if (c->num_ubo_ranges) {
2331 shader->num_ubo_ranges = c->num_ubo_ranges;
2332 shader->ubo_ranges = ralloc_array(shader, struct vc4_ubo_range,
2333 c->num_ubo_ranges);
2334 uint32_t j = 0;
2335 for (int i = 0; i < c->ubo_ranges_array_size; i++) {
2336 struct vc4_compiler_ubo_range *range =
2337 &c->ubo_ranges[i];
2338 if (!range->used)
2339 continue;
2340
2341 shader->ubo_ranges[j].dst_offset = range->dst_offset;
2342 shader->ubo_ranges[j].src_offset = range->src_offset;
2343 shader->ubo_ranges[j].size = range->size;
2344 shader->ubo_size += c->ubo_ranges[i].size;
2345 j++;
2346 }
2347 }
2348
2349 qir_compile_destroy(c);
2350
2351 struct vc4_key *dup_key;
2352 dup_key = ralloc_size(shader, key_size);
2353 memcpy(dup_key, key, key_size);
2354 _mesa_hash_table_insert(ht, dup_key, shader);
2355
2356 return shader;
2357 }
2358
2359 static void
2360 vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
2361 struct vc4_texture_stateobj *texstate)
2362 {
2363 for (int i = 0; i < texstate->num_textures; i++) {
2364 struct pipe_sampler_view *sampler = texstate->textures[i];
2365 struct pipe_sampler_state *sampler_state =
2366 texstate->samplers[i];
2367
2368 if (sampler) {
2369 key->tex[i].format = sampler->format;
2370 key->tex[i].swizzle[0] = sampler->swizzle_r;
2371 key->tex[i].swizzle[1] = sampler->swizzle_g;
2372 key->tex[i].swizzle[2] = sampler->swizzle_b;
2373 key->tex[i].swizzle[3] = sampler->swizzle_a;
2374 key->tex[i].compare_mode = sampler_state->compare_mode;
2375 key->tex[i].compare_func = sampler_state->compare_func;
2376 key->tex[i].wrap_s = sampler_state->wrap_s;
2377 key->tex[i].wrap_t = sampler_state->wrap_t;
2378 }
2379 }
2380
2381 key->ucp_enables = vc4->rasterizer->base.clip_plane_enable;
2382 }
2383
2384 static void
2385 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
2386 {
2387 struct vc4_fs_key local_key;
2388 struct vc4_fs_key *key = &local_key;
2389
2390 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
2391 VC4_DIRTY_BLEND |
2392 VC4_DIRTY_FRAMEBUFFER |
2393 VC4_DIRTY_ZSA |
2394 VC4_DIRTY_RASTERIZER |
2395 VC4_DIRTY_FRAGTEX |
2396 VC4_DIRTY_TEXSTATE |
2397 VC4_DIRTY_UNCOMPILED_FS))) {
2398 return;
2399 }
2400
2401 memset(key, 0, sizeof(*key));
2402 vc4_setup_shared_key(vc4, &key->base, &vc4->fragtex);
2403 key->base.shader_state = vc4->prog.bind_fs;
2404 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
2405 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
2406 prim_mode <= PIPE_PRIM_LINE_STRIP);
2407 key->blend = vc4->blend->rt[0];
2408 if (vc4->blend->logicop_enable) {
2409 key->logicop_func = vc4->blend->logicop_func;
2410 } else {
2411 key->logicop_func = PIPE_LOGICOP_COPY;
2412 }
2413 if (vc4->framebuffer.cbufs[0])
2414 key->color_format = vc4->framebuffer.cbufs[0]->format;
2415
2416 key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
2417 key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
2418 key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
2419 key->depth_enabled = (vc4->zsa->base.depth.enabled ||
2420 key->stencil_enabled);
2421 if (vc4->zsa->base.alpha.enabled) {
2422 key->alpha_test = true;
2423 key->alpha_test_func = vc4->zsa->base.alpha.func;
2424 }
2425
2426 if (key->is_points) {
2427 key->point_sprite_mask =
2428 vc4->rasterizer->base.sprite_coord_enable;
2429 key->point_coord_upper_left =
2430 (vc4->rasterizer->base.sprite_coord_mode ==
2431 PIPE_SPRITE_COORD_UPPER_LEFT);
2432 }
2433
2434 key->light_twoside = vc4->rasterizer->base.light_twoside;
2435
2436 struct vc4_compiled_shader *old_fs = vc4->prog.fs;
2437 vc4->prog.fs = vc4_get_compiled_shader(vc4, QSTAGE_FRAG, &key->base);
2438 if (vc4->prog.fs == old_fs)
2439 return;
2440
2441 vc4->dirty |= VC4_DIRTY_COMPILED_FS;
2442 if (vc4->rasterizer->base.flatshade &&
2443 old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
2444 vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
2445 }
2446 }
2447
2448 static void
2449 vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
2450 {
2451 struct vc4_vs_key local_key;
2452 struct vc4_vs_key *key = &local_key;
2453
2454 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
2455 VC4_DIRTY_RASTERIZER |
2456 VC4_DIRTY_VERTTEX |
2457 VC4_DIRTY_TEXSTATE |
2458 VC4_DIRTY_VTXSTATE |
2459 VC4_DIRTY_UNCOMPILED_VS |
2460 VC4_DIRTY_COMPILED_FS))) {
2461 return;
2462 }
2463
2464 memset(key, 0, sizeof(*key));
2465 vc4_setup_shared_key(vc4, &key->base, &vc4->verttex);
2466 key->base.shader_state = vc4->prog.bind_vs;
2467 key->compiled_fs_id = vc4->prog.fs->program_id;
2468
2469 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
2470 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
2471
2472 key->per_vertex_point_size =
2473 (prim_mode == PIPE_PRIM_POINTS &&
2474 vc4->rasterizer->base.point_size_per_vertex);
2475
2476 vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
2477 key->is_coord = true;
2478 vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
2479 }
2480
2481 void
2482 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
2483 {
2484 vc4_update_compiled_fs(vc4, prim_mode);
2485 vc4_update_compiled_vs(vc4, prim_mode);
2486 }
2487
2488 static uint32_t
2489 fs_cache_hash(const void *key)
2490 {
2491 return _mesa_hash_data(key, sizeof(struct vc4_fs_key));
2492 }
2493
2494 static uint32_t
2495 vs_cache_hash(const void *key)
2496 {
2497 return _mesa_hash_data(key, sizeof(struct vc4_vs_key));
2498 }
2499
2500 static bool
2501 fs_cache_compare(const void *key1, const void *key2)
2502 {
2503 return memcmp(key1, key2, sizeof(struct vc4_fs_key)) == 0;
2504 }
2505
2506 static bool
2507 vs_cache_compare(const void *key1, const void *key2)
2508 {
2509 return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0;
2510 }
2511
2512 static void
2513 delete_from_cache_if_matches(struct hash_table *ht,
2514 struct hash_entry *entry,
2515 struct vc4_uncompiled_shader *so)
2516 {
2517 const struct vc4_key *key = entry->key;
2518
2519 if (key->shader_state == so) {
2520 struct vc4_compiled_shader *shader = entry->data;
2521 _mesa_hash_table_remove(ht, entry);
2522 vc4_bo_unreference(&shader->bo);
2523 ralloc_free(shader);
2524 }
2525 }
2526
2527 static void
2528 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
2529 {
2530 struct vc4_context *vc4 = vc4_context(pctx);
2531 struct vc4_uncompiled_shader *so = hwcso;
2532
2533 struct hash_entry *entry;
2534 hash_table_foreach(vc4->fs_cache, entry)
2535 delete_from_cache_if_matches(vc4->fs_cache, entry, so);
2536 hash_table_foreach(vc4->vs_cache, entry)
2537 delete_from_cache_if_matches(vc4->vs_cache, entry, so);
2538
2539 if (so->twoside_tokens != so->base.tokens)
2540 free((void *)so->twoside_tokens);
2541 free((void *)so->base.tokens);
2542 free(so);
2543 }
2544
2545 static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
2546 {
2547 switch (p_wrap) {
2548 case PIPE_TEX_WRAP_REPEAT:
2549 return 0;
2550 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2551 return 1;
2552 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2553 return 2;
2554 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2555 return 3;
2556 case PIPE_TEX_WRAP_CLAMP:
2557 return (using_nearest ? 1 : 3);
2558 default:
2559 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
2560 assert(!"not reached");
2561 return 0;
2562 }
2563 }
2564
2565 static void
2566 write_texture_p0(struct vc4_context *vc4,
2567 struct vc4_texture_stateobj *texstate,
2568 uint32_t unit)
2569 {
2570 struct pipe_sampler_view *texture = texstate->textures[unit];
2571 struct vc4_resource *rsc = vc4_resource(texture->texture);
2572
2573 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
2574 VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
2575 VC4_SET_FIELD(texture->u.tex.last_level -
2576 texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
2577 VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
2578 VC4_TEX_P0_CMMODE) |
2579 VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE));
2580 }
2581
2582 static void
2583 write_texture_p1(struct vc4_context *vc4,
2584 struct vc4_texture_stateobj *texstate,
2585 uint32_t unit)
2586 {
2587 struct pipe_sampler_view *texture = texstate->textures[unit];
2588 struct vc4_resource *rsc = vc4_resource(texture->texture);
2589 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2590 static const uint8_t minfilter_map[6] = {
2591 VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
2592 VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
2593 VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
2594 VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
2595 VC4_TEX_P1_MINFILT_NEAREST,
2596 VC4_TEX_P1_MINFILT_LINEAR,
2597 };
2598 static const uint32_t magfilter_map[] = {
2599 [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
2600 [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
2601 };
2602
2603 bool either_nearest =
2604 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
2605 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
2606
2607 cl_aligned_u32(&vc4->uniforms,
2608 VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
2609 VC4_SET_FIELD(texture->texture->height0 & 2047,
2610 VC4_TEX_P1_HEIGHT) |
2611 VC4_SET_FIELD(texture->texture->width0 & 2047,
2612 VC4_TEX_P1_WIDTH) |
2613 VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
2614 VC4_TEX_P1_MAGFILT) |
2615 VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
2616 sampler->min_img_filter],
2617 VC4_TEX_P1_MINFILT) |
2618 VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
2619 VC4_TEX_P1_WRAP_S) |
2620 VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
2621 VC4_TEX_P1_WRAP_T));
2622 }
2623
2624 static void
2625 write_texture_p2(struct vc4_context *vc4,
2626 struct vc4_texture_stateobj *texstate,
2627 uint32_t data)
2628 {
2629 uint32_t unit = data & 0xffff;
2630 struct pipe_sampler_view *texture = texstate->textures[unit];
2631 struct vc4_resource *rsc = vc4_resource(texture->texture);
2632
2633 cl_aligned_u32(&vc4->uniforms,
2634 VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
2635 VC4_TEX_P2_PTYPE) |
2636 VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
2637 VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
2638 }
2639
2640
2641 #define SWIZ(x,y,z,w) { \
2642 UTIL_FORMAT_SWIZZLE_##x, \
2643 UTIL_FORMAT_SWIZZLE_##y, \
2644 UTIL_FORMAT_SWIZZLE_##z, \
2645 UTIL_FORMAT_SWIZZLE_##w \
2646 }
2647
2648 static void
2649 write_texture_border_color(struct vc4_context *vc4,
2650 struct vc4_texture_stateobj *texstate,
2651 uint32_t unit)
2652 {
2653 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2654 struct pipe_sampler_view *texture = texstate->textures[unit];
2655 struct vc4_resource *rsc = vc4_resource(texture->texture);
2656 union util_color uc;
2657
2658 const struct util_format_description *tex_format_desc =
2659 util_format_description(texture->format);
2660
2661 float border_color[4];
2662 for (int i = 0; i < 4; i++)
2663 border_color[i] = sampler->border_color.f[i];
2664 if (util_format_is_srgb(texture->format)) {
2665 for (int i = 0; i < 3; i++)
2666 border_color[i] =
2667 util_format_linear_to_srgb_float(border_color[i]);
2668 }
2669
2670 /* Turn the border color into the layout of channels that it would
2671 * have when stored as texture contents.
2672 */
2673 float storage_color[4];
2674 util_format_unswizzle_4f(storage_color,
2675 border_color,
2676 tex_format_desc->swizzle);
2677
2678 /* Now, pack so that when the vc4_format-sampled texture contents are
2679 * replaced with our border color, the vc4_get_format_swizzle()
2680 * swizzling will get the right channels.
2681 */
2682 if (util_format_is_depth_or_stencil(texture->format)) {
2683 uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
2684 sampler->border_color.f[0]) << 8;
2685 } else {
2686 switch (rsc->vc4_format) {
2687 default:
2688 case VC4_TEXTURE_TYPE_RGBA8888:
2689 util_pack_color(storage_color,
2690 PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
2691 break;
2692 case VC4_TEXTURE_TYPE_RGBA4444:
2693 util_pack_color(storage_color,
2694 PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
2695 break;
2696 case VC4_TEXTURE_TYPE_RGB565:
2697 util_pack_color(storage_color,
2698 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
2699 break;
2700 case VC4_TEXTURE_TYPE_ALPHA:
2701 uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
2702 break;
2703 case VC4_TEXTURE_TYPE_LUMALPHA:
2704 uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
2705 (float_to_ubyte(storage_color[0]) << 0));
2706 break;
2707 }
2708 }
2709
2710 cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
2711 }
2712
2713 static uint32_t
2714 get_texrect_scale(struct vc4_texture_stateobj *texstate,
2715 enum quniform_contents contents,
2716 uint32_t data)
2717 {
2718 struct pipe_sampler_view *texture = texstate->textures[data];
2719 uint32_t dim;
2720
2721 if (contents == QUNIFORM_TEXRECT_SCALE_X)
2722 dim = texture->texture->width0;
2723 else
2724 dim = texture->texture->height0;
2725
2726 return fui(1.0f / dim);
2727 }
2728
2729 static struct vc4_bo *
2730 vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2731 const uint32_t *gallium_uniforms)
2732 {
2733 if (!shader->ubo_size)
2734 return NULL;
2735
2736 struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
2737 uint32_t *data = vc4_bo_map(ubo);
2738 for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
2739 memcpy(data + shader->ubo_ranges[i].dst_offset,
2740 gallium_uniforms + shader->ubo_ranges[i].src_offset,
2741 shader->ubo_ranges[i].size);
2742 }
2743
2744 return ubo;
2745 }
2746
2747 void
2748 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2749 struct vc4_constbuf_stateobj *cb,
2750 struct vc4_texture_stateobj *texstate)
2751 {
2752 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
2753 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
2754 struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
2755
2756 cl_ensure_space(&vc4->uniforms, (uinfo->count +
2757 uinfo->num_texture_samples) * 4);
2758
2759 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
2760
2761 for (int i = 0; i < uinfo->count; i++) {
2762
2763 switch (uinfo->contents[i]) {
2764 case QUNIFORM_CONSTANT:
2765 cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
2766 break;
2767 case QUNIFORM_UNIFORM:
2768 cl_aligned_u32(&vc4->uniforms,
2769 gallium_uniforms[uinfo->data[i]]);
2770 break;
2771 case QUNIFORM_VIEWPORT_X_SCALE:
2772 cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
2773 break;
2774 case QUNIFORM_VIEWPORT_Y_SCALE:
2775 cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
2776 break;
2777
2778 case QUNIFORM_VIEWPORT_Z_OFFSET:
2779 cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
2780 break;
2781 case QUNIFORM_VIEWPORT_Z_SCALE:
2782 cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
2783 break;
2784
2785 case QUNIFORM_USER_CLIP_PLANE:
2786 cl_aligned_f(&vc4->uniforms,
2787 vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
2788 break;
2789
2790 case QUNIFORM_TEXTURE_CONFIG_P0:
2791 write_texture_p0(vc4, texstate, uinfo->data[i]);
2792 break;
2793
2794 case QUNIFORM_TEXTURE_CONFIG_P1:
2795 write_texture_p1(vc4, texstate, uinfo->data[i]);
2796 break;
2797
2798 case QUNIFORM_TEXTURE_CONFIG_P2:
2799 write_texture_p2(vc4, texstate, uinfo->data[i]);
2800 break;
2801
2802 case QUNIFORM_UBO_ADDR:
2803 cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
2804 break;
2805
2806 case QUNIFORM_TEXTURE_BORDER_COLOR:
2807 write_texture_border_color(vc4, texstate, uinfo->data[i]);
2808 break;
2809
2810 case QUNIFORM_TEXRECT_SCALE_X:
2811 case QUNIFORM_TEXRECT_SCALE_Y:
2812 cl_aligned_u32(&vc4->uniforms,
2813 get_texrect_scale(texstate,
2814 uinfo->contents[i],
2815 uinfo->data[i]));
2816 break;
2817
2818 case QUNIFORM_BLEND_CONST_COLOR:
2819 cl_aligned_f(&vc4->uniforms,
2820 CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
2821 break;
2822
2823 case QUNIFORM_STENCIL:
2824 cl_aligned_u32(&vc4->uniforms,
2825 vc4->zsa->stencil_uniforms[uinfo->data[i]] |
2826 (uinfo->data[i] <= 1 ?
2827 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
2828 0));
2829 break;
2830
2831 case QUNIFORM_ALPHA_REF:
2832 cl_aligned_f(&vc4->uniforms,
2833 vc4->zsa->base.alpha.ref_value);
2834 break;
2835 }
2836 #if 0
2837 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
2838 fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
2839 shader, i, written_val, uif(written_val));
2840 #endif
2841 }
2842 }
2843
2844 static void
2845 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
2846 {
2847 struct vc4_context *vc4 = vc4_context(pctx);
2848 vc4->prog.bind_fs = hwcso;
2849 vc4->dirty |= VC4_DIRTY_UNCOMPILED_FS;
2850 }
2851
2852 static void
2853 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
2854 {
2855 struct vc4_context *vc4 = vc4_context(pctx);
2856 vc4->prog.bind_vs = hwcso;
2857 vc4->dirty |= VC4_DIRTY_UNCOMPILED_VS;
2858 }
2859
2860 void
2861 vc4_program_init(struct pipe_context *pctx)
2862 {
2863 struct vc4_context *vc4 = vc4_context(pctx);
2864
2865 pctx->create_vs_state = vc4_shader_state_create;
2866 pctx->delete_vs_state = vc4_shader_state_delete;
2867
2868 pctx->create_fs_state = vc4_shader_state_create;
2869 pctx->delete_fs_state = vc4_shader_state_delete;
2870
2871 pctx->bind_fs_state = vc4_fp_state_bind;
2872 pctx->bind_vs_state = vc4_vp_state_bind;
2873
2874 vc4->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
2875 fs_cache_compare);
2876 vc4->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
2877 vs_cache_compare);
2878 }
2879
2880 void
2881 vc4_program_fini(struct pipe_context *pctx)
2882 {
2883 struct vc4_context *vc4 = vc4_context(pctx);
2884
2885 struct hash_entry *entry;
2886 hash_table_foreach(vc4->fs_cache, entry) {
2887 struct vc4_compiled_shader *shader = entry->data;
2888 vc4_bo_unreference(&shader->bo);
2889 ralloc_free(shader);
2890 _mesa_hash_table_remove(vc4->fs_cache, entry);
2891 }
2892
2893 hash_table_foreach(vc4->vs_cache, entry) {
2894 struct vc4_compiled_shader *shader = entry->data;
2895 vc4_bo_unreference(&shader->bo);
2896 ralloc_free(shader);
2897 _mesa_hash_table_remove(vc4->vs_cache, entry);
2898 }
2899 }