vc4: Add support for shadow samplers.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash_table.h"
29 #include "util/u_hash.h"
30 #include "util/u_memory.h"
31 #include "tgsi/tgsi_dump.h"
32 #include "tgsi/tgsi_info.h"
33
34 #include "vc4_context.h"
35 #include "vc4_qpu.h"
36 #include "vc4_qir.h"
37 #ifdef USE_VC4_SIMULATOR
38 #include "simpenrose/simpenrose.h"
39 #endif
40
41 struct vc4_key {
42 struct pipe_shader_state *shader_state;
43 struct {
44 enum pipe_format format;
45 unsigned compare_mode:1;
46 unsigned compare_func:3;
47 uint8_t swizzle[4];
48 } tex[VC4_MAX_TEXTURE_SAMPLERS];
49 };
50
51 struct vc4_fs_key {
52 struct vc4_key base;
53 enum pipe_format color_format;
54 bool depth_enabled;
55 bool is_points;
56 bool is_lines;
57
58 struct pipe_rt_blend_state blend;
59 };
60
61 struct vc4_vs_key {
62 struct vc4_key base;
63 enum pipe_format attr_formats[8];
64 };
65
66 static struct qreg
67 add_uniform(struct vc4_compile *c,
68 enum quniform_contents contents,
69 uint32_t data)
70 {
71 uint32_t uniform = c->num_uniforms++;
72 struct qreg u = { QFILE_UNIF, uniform };
73
74 c->uniform_contents[uniform] = contents;
75 c->uniform_data[uniform] = data;
76
77 return u;
78 }
79
80 static struct qreg
81 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
82 uint32_t data)
83 {
84 for (int i = 0; i < c->num_uniforms; i++) {
85 if (c->uniform_contents[i] == contents &&
86 c->uniform_data[i] == data)
87 return c->uniforms[i];
88 }
89
90 struct qreg u = add_uniform(c, contents, data);
91 struct qreg t = qir_MOV(c, u);
92
93 c->uniforms[u.index] = t;
94 return t;
95 }
96
97 static struct qreg
98 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
99 {
100 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
101 }
102
103 static struct qreg
104 qir_uniform_f(struct vc4_compile *c, float f)
105 {
106 return qir_uniform_ui(c, fui(f));
107 }
108
109 static struct qreg
110 get_src(struct vc4_compile *c, unsigned tgsi_op,
111 struct tgsi_src_register *src, int i)
112 {
113 struct qreg r = c->undef;
114
115 uint32_t s = i;
116 switch (i) {
117 case TGSI_SWIZZLE_X:
118 s = src->SwizzleX;
119 break;
120 case TGSI_SWIZZLE_Y:
121 s = src->SwizzleY;
122 break;
123 case TGSI_SWIZZLE_Z:
124 s = src->SwizzleZ;
125 break;
126 case TGSI_SWIZZLE_W:
127 s = src->SwizzleW;
128 break;
129 default:
130 abort();
131 }
132
133 assert(!src->Indirect);
134
135 switch (src->File) {
136 case TGSI_FILE_NULL:
137 return r;
138 case TGSI_FILE_TEMPORARY:
139 r = c->temps[src->Index * 4 + s];
140 break;
141 case TGSI_FILE_IMMEDIATE:
142 r = c->consts[src->Index * 4 + s];
143 break;
144 case TGSI_FILE_CONSTANT:
145 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
146 src->Index * 4 + s);
147 break;
148 case TGSI_FILE_INPUT:
149 r = c->inputs[src->Index * 4 + s];
150 break;
151 case TGSI_FILE_SAMPLER:
152 case TGSI_FILE_SAMPLER_VIEW:
153 r = c->undef;
154 break;
155 default:
156 fprintf(stderr, "unknown src file %d\n", src->File);
157 abort();
158 }
159
160 if (src->Absolute)
161 r = qir_FMAXABS(c, r, r);
162
163 if (src->Negate) {
164 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
165 case TGSI_TYPE_SIGNED:
166 case TGSI_TYPE_UNSIGNED:
167 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
168 break;
169 default:
170 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
171 break;
172 }
173 }
174
175 return r;
176 };
177
178
179 static void
180 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
181 int i, struct qreg val)
182 {
183 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
184
185 assert(!tgsi_dst->Indirect);
186
187 switch (tgsi_dst->File) {
188 case TGSI_FILE_TEMPORARY:
189 c->temps[tgsi_dst->Index * 4 + i] = val;
190 break;
191 case TGSI_FILE_OUTPUT:
192 c->outputs[tgsi_dst->Index * 4 + i] = val;
193 c->num_outputs = MAX2(c->num_outputs,
194 tgsi_dst->Index * 4 + i + 1);
195 break;
196 default:
197 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
198 abort();
199 }
200 };
201
202 static struct qreg
203 get_swizzled_channel(struct vc4_compile *c,
204 struct qreg *srcs, int swiz)
205 {
206 switch (swiz) {
207 default:
208 case UTIL_FORMAT_SWIZZLE_NONE:
209 fprintf(stderr, "warning: unknown swizzle\n");
210 /* FALLTHROUGH */
211 case UTIL_FORMAT_SWIZZLE_0:
212 return qir_uniform_f(c, 0.0);
213 case UTIL_FORMAT_SWIZZLE_1:
214 return qir_uniform_f(c, 1.0);
215 case UTIL_FORMAT_SWIZZLE_X:
216 case UTIL_FORMAT_SWIZZLE_Y:
217 case UTIL_FORMAT_SWIZZLE_Z:
218 case UTIL_FORMAT_SWIZZLE_W:
219 return srcs[swiz];
220 }
221 }
222
223 static struct qreg
224 tgsi_to_qir_alu(struct vc4_compile *c,
225 struct tgsi_full_instruction *tgsi_inst,
226 enum qop op, struct qreg *src, int i)
227 {
228 struct qreg dst = qir_get_temp(c);
229 qir_emit(c, qir_inst4(op, dst,
230 src[0 * 4 + i],
231 src[1 * 4 + i],
232 src[2 * 4 + i],
233 c->undef));
234 return dst;
235 }
236
237 static struct qreg
238 tgsi_to_qir_umul(struct vc4_compile *c,
239 struct tgsi_full_instruction *tgsi_inst,
240 enum qop op, struct qreg *src, int i)
241 {
242 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
243 qir_uniform_ui(c, 16));
244 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
245 qir_uniform_ui(c, 0xffff));
246 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
247 qir_uniform_ui(c, 16));
248 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
249 qir_uniform_ui(c, 0xffff));
250
251 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
252 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
253 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
254
255 return qir_ADD(c, lolo, qir_SHL(c,
256 qir_ADD(c, hilo, lohi),
257 qir_uniform_ui(c, 16)));
258 }
259
260 static struct qreg
261 tgsi_to_qir_idiv(struct vc4_compile *c,
262 struct tgsi_full_instruction *tgsi_inst,
263 enum qop op, struct qreg *src, int i)
264 {
265 return qir_FTOI(c, qir_FMUL(c,
266 qir_ITOF(c, src[0 * 4 + i]),
267 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
268 }
269
270 static struct qreg
271 tgsi_to_qir_ineg(struct vc4_compile *c,
272 struct tgsi_full_instruction *tgsi_inst,
273 enum qop op, struct qreg *src, int i)
274 {
275 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
276 }
277
278 static struct qreg
279 tgsi_to_qir_seq(struct vc4_compile *c,
280 struct tgsi_full_instruction *tgsi_inst,
281 enum qop op, struct qreg *src, int i)
282 {
283 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
284 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
285 }
286
287 static struct qreg
288 tgsi_to_qir_sne(struct vc4_compile *c,
289 struct tgsi_full_instruction *tgsi_inst,
290 enum qop op, struct qreg *src, int i)
291 {
292 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
293 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
294 }
295
296 static struct qreg
297 tgsi_to_qir_slt(struct vc4_compile *c,
298 struct tgsi_full_instruction *tgsi_inst,
299 enum qop op, struct qreg *src, int i)
300 {
301 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
302 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
303 }
304
305 static struct qreg
306 tgsi_to_qir_sge(struct vc4_compile *c,
307 struct tgsi_full_instruction *tgsi_inst,
308 enum qop op, struct qreg *src, int i)
309 {
310 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
311 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
312 }
313
314 static struct qreg
315 tgsi_to_qir_fseq(struct vc4_compile *c,
316 struct tgsi_full_instruction *tgsi_inst,
317 enum qop op, struct qreg *src, int i)
318 {
319 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
320 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
321 }
322
323 static struct qreg
324 tgsi_to_qir_fsne(struct vc4_compile *c,
325 struct tgsi_full_instruction *tgsi_inst,
326 enum qop op, struct qreg *src, int i)
327 {
328 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
329 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
330 }
331
332 static struct qreg
333 tgsi_to_qir_fslt(struct vc4_compile *c,
334 struct tgsi_full_instruction *tgsi_inst,
335 enum qop op, struct qreg *src, int i)
336 {
337 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
338 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
339 }
340
341 static struct qreg
342 tgsi_to_qir_fsge(struct vc4_compile *c,
343 struct tgsi_full_instruction *tgsi_inst,
344 enum qop op, struct qreg *src, int i)
345 {
346 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
347 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
348 }
349
350 static struct qreg
351 tgsi_to_qir_useq(struct vc4_compile *c,
352 struct tgsi_full_instruction *tgsi_inst,
353 enum qop op, struct qreg *src, int i)
354 {
355 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
356 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
357 }
358
359 static struct qreg
360 tgsi_to_qir_usne(struct vc4_compile *c,
361 struct tgsi_full_instruction *tgsi_inst,
362 enum qop op, struct qreg *src, int i)
363 {
364 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
365 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
366 }
367
368 static struct qreg
369 tgsi_to_qir_islt(struct vc4_compile *c,
370 struct tgsi_full_instruction *tgsi_inst,
371 enum qop op, struct qreg *src, int i)
372 {
373 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
374 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
375 }
376
377 static struct qreg
378 tgsi_to_qir_isge(struct vc4_compile *c,
379 struct tgsi_full_instruction *tgsi_inst,
380 enum qop op, struct qreg *src, int i)
381 {
382 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
383 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
384 }
385
386 static struct qreg
387 tgsi_to_qir_cmp(struct vc4_compile *c,
388 struct tgsi_full_instruction *tgsi_inst,
389 enum qop op, struct qreg *src, int i)
390 {
391 qir_SF(c, src[0 * 4 + i]);
392 return qir_SEL_X_Y_NS(c,
393 src[1 * 4 + i],
394 src[2 * 4 + i]);
395 }
396
397 static struct qreg
398 tgsi_to_qir_mad(struct vc4_compile *c,
399 struct tgsi_full_instruction *tgsi_inst,
400 enum qop op, struct qreg *src, int i)
401 {
402 return qir_FADD(c,
403 qir_FMUL(c,
404 src[0 * 4 + i],
405 src[1 * 4 + i]),
406 src[2 * 4 + i]);
407 }
408
409 static struct qreg
410 tgsi_to_qir_lit(struct vc4_compile *c,
411 struct tgsi_full_instruction *tgsi_inst,
412 enum qop op, struct qreg *src, int i)
413 {
414 struct qreg x = src[0 * 4 + 0];
415 struct qreg y = src[0 * 4 + 1];
416 struct qreg w = src[0 * 4 + 3];
417
418 switch (i) {
419 case 0:
420 case 3:
421 return qir_uniform_f(c, 1.0);
422 case 1:
423 return qir_FMAX(c, src[0 * 4 + 0], qir_uniform_f(c, 0.0));
424 case 2: {
425 struct qreg zero = qir_uniform_f(c, 0.0);
426
427 qir_SF(c, x);
428 /* XXX: Clamp w to -128..128 */
429 return qir_SEL_X_0_NC(c,
430 qir_EXP2(c, qir_FMUL(c,
431 w,
432 qir_LOG2(c,
433 qir_FMAX(c,
434 y,
435 zero)))));
436 }
437 default:
438 assert(!"not reached");
439 return c->undef;
440 }
441 }
442
443 static struct qreg
444 tgsi_to_qir_lrp(struct vc4_compile *c,
445 struct tgsi_full_instruction *tgsi_inst,
446 enum qop op, struct qreg *src, int i)
447 {
448 struct qreg src0 = src[0 * 4 + i];
449 struct qreg src1 = src[1 * 4 + i];
450 struct qreg src2 = src[2 * 4 + i];
451
452 /* LRP is:
453 * src0 * src1 + (1 - src0) * src2.
454 * -> src0 * src1 + src2 - src0 * src2
455 * -> src2 + src0 * (src1 - src2)
456 */
457 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
458
459 }
460
461 static void
462 tgsi_to_qir_tex(struct vc4_compile *c,
463 struct tgsi_full_instruction *tgsi_inst,
464 enum qop op, struct qreg *src)
465 {
466 assert(!tgsi_inst->Instruction.Saturate);
467
468 struct qreg s = src[0 * 4 + 0];
469 struct qreg t = src[0 * 4 + 1];
470 uint32_t unit = tgsi_inst->Src[1].Register.Index;
471
472 struct qreg proj = c->undef;
473 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
474 proj = qir_RCP(c, src[0 * 4 + 3]);
475 s = qir_FMUL(c, s, proj);
476 t = qir_FMUL(c, t, proj);
477 }
478
479 /* There is no native support for GL texture rectangle coordinates, so
480 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
481 * 1]).
482 */
483 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
484 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
485 s = qir_FMUL(c, s,
486 get_temp_for_uniform(c,
487 QUNIFORM_TEXRECT_SCALE_X,
488 unit));
489 t = qir_FMUL(c, t,
490 get_temp_for_uniform(c,
491 QUNIFORM_TEXRECT_SCALE_Y,
492 unit));
493 }
494
495 qir_TEX_T(c, t, add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit));
496
497 struct qreg sampler_p1 = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1,
498 unit);
499 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
500 qir_TEX_B(c, src[0 * 4 + 3], sampler_p1);
501 qir_TEX_S(c, s, add_uniform(c, QUNIFORM_CONSTANT, 0));
502 } else {
503 qir_TEX_S(c, s, sampler_p1);
504 }
505
506 c->num_texture_samples++;
507 struct qreg r4 = qir_TEX_RESULT(c);
508
509 enum pipe_format format = c->key->tex[unit].format;
510
511 struct qreg unpacked[4];
512 if (util_format_is_depth_or_stencil(format)) {
513 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
514 qir_uniform_ui(c, 8)));
515 struct qreg normalized = qir_FMUL(c, depthf,
516 qir_uniform_f(c, 1.0f/0xffffff));
517
518 struct qreg depth_output;
519
520 struct qreg compare = src[0 * 4 + 2];
521
522 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
523 compare = qir_FMUL(c, compare, proj);
524
525 struct qreg one = qir_uniform_f(c, 1.0f);
526 if (c->key->tex[unit].compare_mode) {
527 switch (c->key->tex[unit].compare_func) {
528 case PIPE_FUNC_NEVER:
529 depth_output = qir_uniform_f(c, 0.0f);
530 break;
531 case PIPE_FUNC_ALWAYS:
532 depth_output = one;
533 break;
534 case PIPE_FUNC_EQUAL:
535 qir_SF(c, qir_FSUB(c, compare, normalized));
536 depth_output = qir_SEL_X_0_ZS(c, one);
537 break;
538 case PIPE_FUNC_NOTEQUAL:
539 qir_SF(c, qir_FSUB(c, compare, normalized));
540 depth_output = qir_SEL_X_0_ZC(c, one);
541 break;
542 case PIPE_FUNC_GREATER:
543 qir_SF(c, qir_FSUB(c, compare, normalized));
544 depth_output = qir_SEL_X_0_NC(c, one);
545 break;
546 case PIPE_FUNC_GEQUAL:
547 qir_SF(c, qir_FSUB(c, normalized, compare));
548 depth_output = qir_SEL_X_0_NS(c, one);
549 break;
550 case PIPE_FUNC_LESS:
551 qir_SF(c, qir_FSUB(c, compare, normalized));
552 depth_output = qir_SEL_X_0_NS(c, one);
553 break;
554 case PIPE_FUNC_LEQUAL:
555 qir_SF(c, qir_FSUB(c, normalized, compare));
556 depth_output = qir_SEL_X_0_NC(c, one);
557 break;
558 }
559 } else {
560 depth_output = normalized;
561 }
562
563 for (int i = 0; i < 4; i++)
564 unpacked[i] = depth_output;
565 } else {
566 for (int i = 0; i < 4; i++)
567 unpacked[i] = qir_R4_UNPACK(c, r4, i);
568 }
569
570 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
571 uint8_t swiz[4];
572 util_format_compose_swizzles(format_swiz, c->key->tex[unit].swizzle, swiz);
573 for (int i = 0; i < 4; i++) {
574 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
575 continue;
576
577 update_dst(c, tgsi_inst, i,
578 get_swizzled_channel(c, unpacked, swiz[i]));
579 }
580 }
581
582 static struct qreg
583 tgsi_to_qir_pow(struct vc4_compile *c,
584 struct tgsi_full_instruction *tgsi_inst,
585 enum qop op, struct qreg *src, int i)
586 {
587 /* Note that this instruction replicates its result from the x channel
588 */
589 return qir_EXP2(c, qir_FMUL(c,
590 src[1 * 4 + 0],
591 qir_LOG2(c, src[0 * 4 + 0])));
592 }
593
594 static struct qreg
595 tgsi_to_qir_trunc(struct vc4_compile *c,
596 struct tgsi_full_instruction *tgsi_inst,
597 enum qop op, struct qreg *src, int i)
598 {
599 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
600 }
601
602 /**
603 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
604 * to zero).
605 */
606 static struct qreg
607 tgsi_to_qir_frc(struct vc4_compile *c,
608 struct tgsi_full_instruction *tgsi_inst,
609 enum qop op, struct qreg *src, int i)
610 {
611 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
612 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
613 qir_SF(c, diff);
614 return qir_SEL_X_Y_NS(c,
615 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
616 diff);
617 }
618
619 /**
620 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
621 * zero).
622 */
623 static struct qreg
624 tgsi_to_qir_flr(struct vc4_compile *c,
625 struct tgsi_full_instruction *tgsi_inst,
626 enum qop op, struct qreg *src, int i)
627 {
628 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
629
630 /* This will be < 0 if we truncated and the truncation was of a value
631 * that was < 0 in the first place.
632 */
633 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
634
635 return qir_SEL_X_Y_NS(c,
636 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
637 trunc);
638 }
639
640 static struct qreg
641 tgsi_to_qir_dp(struct vc4_compile *c,
642 struct tgsi_full_instruction *tgsi_inst,
643 int num, struct qreg *src, int i)
644 {
645 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
646 for (int j = 1; j < num; j++) {
647 sum = qir_FADD(c, sum, qir_FMUL(c,
648 src[0 * 4 + j],
649 src[1 * 4 + j]));
650 }
651 return sum;
652 }
653
654 static struct qreg
655 tgsi_to_qir_dp2(struct vc4_compile *c,
656 struct tgsi_full_instruction *tgsi_inst,
657 enum qop op, struct qreg *src, int i)
658 {
659 return tgsi_to_qir_dp(c, tgsi_inst, 2, src, i);
660 }
661
662 static struct qreg
663 tgsi_to_qir_dp3(struct vc4_compile *c,
664 struct tgsi_full_instruction *tgsi_inst,
665 enum qop op, struct qreg *src, int i)
666 {
667 return tgsi_to_qir_dp(c, tgsi_inst, 3, src, i);
668 }
669
670 static struct qreg
671 tgsi_to_qir_dp4(struct vc4_compile *c,
672 struct tgsi_full_instruction *tgsi_inst,
673 enum qop op, struct qreg *src, int i)
674 {
675 return tgsi_to_qir_dp(c, tgsi_inst, 4, src, i);
676 }
677
678 static struct qreg
679 tgsi_to_qir_abs(struct vc4_compile *c,
680 struct tgsi_full_instruction *tgsi_inst,
681 enum qop op, struct qreg *src, int i)
682 {
683 struct qreg arg = src[0 * 4 + i];
684 return qir_FMAXABS(c, arg, arg);
685 }
686
687 /* Note that this instruction replicates its result from the x channel */
688 static struct qreg
689 tgsi_to_qir_sin(struct vc4_compile *c,
690 struct tgsi_full_instruction *tgsi_inst,
691 enum qop op, struct qreg *src, int i)
692 {
693 float coeff[] = {
694 2.0 * M_PI,
695 -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
696 pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
697 -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
698 };
699
700 struct qreg scaled_x =
701 qir_FMUL(c,
702 src[0 * 4 + 0],
703 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
704
705
706 struct qreg x = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
707 struct qreg x2 = qir_FMUL(c, x, x);
708 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
709 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
710 x = qir_FMUL(c, x, x2);
711 sum = qir_FADD(c,
712 sum,
713 qir_FMUL(c,
714 x,
715 qir_uniform_f(c, coeff[i])));
716 }
717 return sum;
718 }
719
720 /* Note that this instruction replicates its result from the x channel */
721 static struct qreg
722 tgsi_to_qir_cos(struct vc4_compile *c,
723 struct tgsi_full_instruction *tgsi_inst,
724 enum qop op, struct qreg *src, int i)
725 {
726 float coeff[] = {
727 1.0f,
728 -pow(2.0 * M_PI, 2) / (2 * 1),
729 pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
730 -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
731 };
732
733 struct qreg scaled_x =
734 qir_FMUL(c, src[0 * 4 + 0],
735 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
736 struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
737
738 struct qreg sum = qir_uniform_f(c, coeff[0]);
739 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
740 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
741 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
742 if (i != 1)
743 x = qir_FMUL(c, x, x2);
744
745 struct qreg mul = qir_FMUL(c,
746 x,
747 qir_uniform_f(c, coeff[i]));
748 if (i == 0)
749 sum = mul;
750 else
751 sum = qir_FADD(c, sum, mul);
752 }
753 return sum;
754 }
755
756 static void
757 emit_vertex_input(struct vc4_compile *c, int attr)
758 {
759 enum pipe_format format = c->vs_key->attr_formats[attr];
760 struct qreg vpm_reads[4];
761
762 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
763 * time, so we always read 4 32-bit VPM entries.
764 */
765 for (int i = 0; i < 4; i++) {
766 vpm_reads[i] = qir_get_temp(c);
767 qir_emit(c, qir_inst(QOP_VPM_READ,
768 vpm_reads[i],
769 c->undef,
770 c->undef));
771 c->num_inputs++;
772 }
773
774 bool format_warned = false;
775 const struct util_format_description *desc =
776 util_format_description(format);
777
778 for (int i = 0; i < 4; i++) {
779 uint8_t swiz = desc->swizzle[i];
780
781 if (swiz <= UTIL_FORMAT_SWIZZLE_W &&
782 !format_warned &&
783 (desc->channel[swiz].type != UTIL_FORMAT_TYPE_FLOAT ||
784 desc->channel[swiz].size != 32)) {
785 fprintf(stderr,
786 "vtx element %d unsupported type: %s\n",
787 attr, util_format_name(format));
788 format_warned = true;
789 }
790
791 c->inputs[attr * 4 + i] =
792 get_swizzled_channel(c, vpm_reads, swiz);
793 }
794 }
795
796 static void
797 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
798 {
799 if (c->discard.file == QFILE_NULL)
800 c->discard = qir_uniform_f(c, 0.0);
801 qir_SF(c, src[0 * 4 + i]);
802 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
803 c->discard);
804 }
805
806 static void
807 emit_fragcoord_input(struct vc4_compile *c, int attr)
808 {
809 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
810 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
811 c->inputs[attr * 4 + 2] =
812 qir_FMUL(c,
813 qir_FRAG_Z(c),
814 qir_uniform_f(c, 1.0 / 0xffffff));
815 c->inputs[attr * 4 + 3] = qir_FRAG_RCP_W(c);
816 }
817
818 static struct qreg
819 emit_fragment_varying(struct vc4_compile *c, int index)
820 {
821 struct qreg vary = {
822 QFILE_VARY,
823 index
824 };
825
826 /* XXX: multiply by W */
827 return qir_VARY_ADD_C(c, qir_MOV(c, vary));
828 }
829
830 static void
831 emit_fragment_input(struct vc4_compile *c, int attr)
832 {
833 for (int i = 0; i < 4; i++) {
834 c->inputs[attr * 4 + i] =
835 emit_fragment_varying(c, attr * 4 + i);
836 c->num_inputs++;
837 }
838 }
839
840 static void
841 emit_tgsi_declaration(struct vc4_compile *c,
842 struct tgsi_full_declaration *decl)
843 {
844 switch (decl->Declaration.File) {
845 case TGSI_FILE_INPUT:
846 for (int i = decl->Range.First;
847 i <= decl->Range.Last;
848 i++) {
849 if (c->stage == QSTAGE_FRAG) {
850 if (decl->Semantic.Name ==
851 TGSI_SEMANTIC_POSITION) {
852 emit_fragcoord_input(c, i);
853 } else {
854 emit_fragment_input(c, i);
855 }
856 } else {
857 emit_vertex_input(c, i);
858 }
859 }
860 break;
861 }
862 }
863
864 static void
865 emit_tgsi_instruction(struct vc4_compile *c,
866 struct tgsi_full_instruction *tgsi_inst)
867 {
868 struct {
869 enum qop op;
870 struct qreg (*func)(struct vc4_compile *c,
871 struct tgsi_full_instruction *tgsi_inst,
872 enum qop op,
873 struct qreg *src, int i);
874 } op_trans[] = {
875 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
876 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
877 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
878 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
879 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
880 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
881 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
882 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
883 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
884 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
885 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
886 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
887 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
888 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
889 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
890 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
891 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
892 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
893 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
894
895 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
896 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
897 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
898
899 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
900 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
901 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
902 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
903 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
904 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
905 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
906 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
907 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
908 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
909 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
910 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
911 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
912
913 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
914 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
915 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
916 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
917 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
918 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
919 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
920 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
921 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
922 [TGSI_OPCODE_LIT] = { 0, tgsi_to_qir_lit },
923 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
924 [TGSI_OPCODE_POW] = { 0, tgsi_to_qir_pow },
925 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
926 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
927 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
928 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
929 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
930 };
931 static int asdf = 0;
932 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
933
934 if (tgsi_op == TGSI_OPCODE_END)
935 return;
936
937 struct qreg src_regs[12];
938 for (int s = 0; s < 3; s++) {
939 for (int i = 0; i < 4; i++) {
940 src_regs[4 * s + i] =
941 get_src(c, tgsi_inst->Instruction.Opcode,
942 &tgsi_inst->Src[s].Register, i);
943 }
944 }
945
946 switch (tgsi_op) {
947 case TGSI_OPCODE_TEX:
948 case TGSI_OPCODE_TXP:
949 case TGSI_OPCODE_TXB:
950 tgsi_to_qir_tex(c, tgsi_inst,
951 op_trans[tgsi_op].op, src_regs);
952 return;
953 case TGSI_OPCODE_KILL:
954 c->discard = qir_uniform_f(c, 1.0);
955 return;
956 case TGSI_OPCODE_KILL_IF:
957 for (int i = 0; i < 4; i++)
958 tgsi_to_qir_kill_if(c, src_regs, i);
959 return;
960 default:
961 break;
962 }
963
964 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
965 fprintf(stderr, "unknown tgsi inst: ");
966 tgsi_dump_instruction(tgsi_inst, asdf++);
967 fprintf(stderr, "\n");
968 abort();
969 }
970
971 for (int i = 0; i < 4; i++) {
972 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
973 continue;
974
975 struct qreg result;
976
977 result = op_trans[tgsi_op].func(c, tgsi_inst,
978 op_trans[tgsi_op].op,
979 src_regs, i);
980
981 if (tgsi_inst->Instruction.Saturate) {
982 float low = (tgsi_inst->Instruction.Saturate ==
983 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
984 result = qir_FMAX(c,
985 qir_FMIN(c,
986 result,
987 qir_uniform_f(c, 1.0)),
988 qir_uniform_f(c, low));
989 }
990
991 update_dst(c, tgsi_inst, i, result);
992 }
993 }
994
995 static void
996 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
997 {
998 for (int i = 0; i < 4; i++) {
999 unsigned n = c->num_consts++;
1000 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1001 }
1002 }
1003
1004 static struct qreg
1005 vc4_blend_channel(struct vc4_compile *c,
1006 struct qreg *dst,
1007 struct qreg *src,
1008 struct qreg val,
1009 unsigned factor,
1010 int channel)
1011 {
1012 switch(factor) {
1013 case PIPE_BLENDFACTOR_ONE:
1014 return val;
1015 case PIPE_BLENDFACTOR_SRC_COLOR:
1016 return qir_FMUL(c, val, src[channel]);
1017 case PIPE_BLENDFACTOR_SRC_ALPHA:
1018 return qir_FMUL(c, val, src[3]);
1019 case PIPE_BLENDFACTOR_DST_ALPHA:
1020 return qir_FMUL(c, val, dst[3]);
1021 case PIPE_BLENDFACTOR_DST_COLOR:
1022 return qir_FMUL(c, val, dst[channel]);
1023 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1024 return qir_FMIN(c, src[3], qir_FSUB(c,
1025 qir_uniform_f(c, 1.0),
1026 dst[3]));
1027 case PIPE_BLENDFACTOR_CONST_COLOR:
1028 return qir_FMUL(c, val,
1029 get_temp_for_uniform(c,
1030 QUNIFORM_BLEND_CONST_COLOR,
1031 channel));
1032 case PIPE_BLENDFACTOR_CONST_ALPHA:
1033 return qir_FMUL(c, val,
1034 get_temp_for_uniform(c,
1035 QUNIFORM_BLEND_CONST_COLOR,
1036 3));
1037 case PIPE_BLENDFACTOR_ZERO:
1038 return qir_uniform_f(c, 0.0);
1039 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1040 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1041 src[channel]));
1042 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1043 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1044 src[3]));
1045 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1046 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1047 dst[3]));
1048 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1049 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1050 dst[channel]));
1051 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1052 return qir_FMUL(c, val,
1053 qir_FSUB(c, qir_uniform_f(c, 1.0),
1054 get_temp_for_uniform(c,
1055 QUNIFORM_BLEND_CONST_COLOR,
1056 channel)));
1057 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1058 return qir_FMUL(c, val,
1059 qir_FSUB(c, qir_uniform_f(c, 1.0),
1060 get_temp_for_uniform(c,
1061 QUNIFORM_BLEND_CONST_COLOR,
1062 3)));
1063
1064 default:
1065 case PIPE_BLENDFACTOR_SRC1_COLOR:
1066 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1067 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1068 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1069 /* Unsupported. */
1070 fprintf(stderr, "Unknown blend factor %d\n", factor);
1071 return val;
1072 }
1073 }
1074
1075 static struct qreg
1076 vc4_blend_func(struct vc4_compile *c,
1077 struct qreg src, struct qreg dst,
1078 unsigned func)
1079 {
1080 switch (func) {
1081 case PIPE_BLEND_ADD:
1082 return qir_FADD(c, src, dst);
1083 case PIPE_BLEND_SUBTRACT:
1084 return qir_FSUB(c, src, dst);
1085 case PIPE_BLEND_REVERSE_SUBTRACT:
1086 return qir_FSUB(c, dst, src);
1087 case PIPE_BLEND_MIN:
1088 return qir_FMIN(c, src, dst);
1089 case PIPE_BLEND_MAX:
1090 return qir_FMAX(c, src, dst);
1091
1092 default:
1093 /* Unsupported. */
1094 fprintf(stderr, "Unknown blend func %d\n", func);
1095 return src;
1096
1097 }
1098 }
1099
1100 /**
1101 * Implements fixed function blending in shader code.
1102 *
1103 * VC4 doesn't have any hardware support for blending. Instead, you read the
1104 * current contents of the destination from the tile buffer after having
1105 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1106 * math using your output color and that destination value, and update the
1107 * output color appropriately.
1108 */
1109 static void
1110 vc4_blend(struct vc4_compile *c, struct qreg *result,
1111 struct qreg *dst_color, struct qreg *src_color)
1112 {
1113 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1114
1115 if (!blend->blend_enable) {
1116 for (int i = 0; i < 4; i++)
1117 result[i] = src_color[i];
1118 return;
1119 }
1120
1121 struct qreg src_blend[4], dst_blend[4];
1122 for (int i = 0; i < 3; i++) {
1123 src_blend[i] = vc4_blend_channel(c,
1124 dst_color, src_color,
1125 src_color[i],
1126 blend->rgb_src_factor, i);
1127 dst_blend[i] = vc4_blend_channel(c,
1128 dst_color, src_color,
1129 dst_color[i],
1130 blend->rgb_dst_factor, i);
1131 }
1132 src_blend[3] = vc4_blend_channel(c,
1133 dst_color, src_color,
1134 src_color[3],
1135 blend->alpha_src_factor, 3);
1136 dst_blend[3] = vc4_blend_channel(c,
1137 dst_color, src_color,
1138 dst_color[3],
1139 blend->alpha_dst_factor, 3);
1140
1141 for (int i = 0; i < 3; i++) {
1142 result[i] = vc4_blend_func(c,
1143 src_blend[i], dst_blend[i],
1144 blend->rgb_func);
1145 }
1146 result[3] = vc4_blend_func(c,
1147 src_blend[3], dst_blend[3],
1148 blend->alpha_func);
1149 }
1150
1151 static void
1152 emit_frag_end(struct vc4_compile *c)
1153 {
1154 struct qreg src_color[4] = {
1155 c->outputs[0], c->outputs[1], c->outputs[2], c->outputs[3],
1156 };
1157
1158 enum pipe_format color_format = c->fs_key->color_format;
1159 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1160 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1161 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1162 if (c->fs_key->blend.blend_enable ||
1163 c->fs_key->blend.colormask != 0xf) {
1164 struct qreg r4 = qir_TLB_COLOR_READ(c);
1165 for (int i = 0; i < 4; i++)
1166 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1167 for (int i = 0; i < 4; i++)
1168 dst_color[i] = get_swizzled_channel(c,
1169 tlb_read_color,
1170 format_swiz[i]);
1171 }
1172
1173 struct qreg blend_color[4];
1174 vc4_blend(c, blend_color, dst_color, src_color);
1175
1176 /* If the bit isn't set in the color mask, then just return the
1177 * original dst color, instead.
1178 */
1179 for (int i = 0; i < 4; i++) {
1180 if (!(c->fs_key->blend.colormask & (1 << i))) {
1181 blend_color[i] = dst_color[i];
1182 }
1183 }
1184
1185 /* Debug: Sometimes you're getting a black output and just want to see
1186 * if the FS is getting executed at all. Spam magenta into the color
1187 * output.
1188 */
1189 if (0) {
1190 blend_color[0] = qir_uniform_f(c, 1.0);
1191 blend_color[1] = qir_uniform_f(c, 0.0);
1192 blend_color[2] = qir_uniform_f(c, 1.0);
1193 blend_color[3] = qir_uniform_f(c, 0.5);
1194 }
1195
1196 struct qreg swizzled_outputs[4];
1197 for (int i = 0; i < 4; i++) {
1198 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1199 format_swiz[i]);
1200 }
1201
1202 if (c->discard.file != QFILE_NULL)
1203 qir_TLB_DISCARD_SETUP(c, c->discard);
1204
1205 if (c->fs_key->depth_enabled) {
1206 qir_emit(c, qir_inst(QOP_TLB_PASSTHROUGH_Z_WRITE, c->undef,
1207 c->undef, c->undef));
1208 }
1209
1210 bool color_written = false;
1211 for (int i = 0; i < 4; i++) {
1212 if (swizzled_outputs[i].file != QFILE_NULL)
1213 color_written = true;
1214 }
1215
1216 struct qreg packed_color;
1217 if (color_written) {
1218 /* Fill in any undefined colors. The simulator will assertion
1219 * fail if we read something that wasn't written, and I don't
1220 * know what hardware does.
1221 */
1222 for (int i = 0; i < 4; i++) {
1223 if (swizzled_outputs[i].file == QFILE_NULL)
1224 swizzled_outputs[i] = qir_uniform_f(c, 0.0);
1225 }
1226 packed_color = qir_get_temp(c);
1227 qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
1228 swizzled_outputs[0],
1229 swizzled_outputs[1],
1230 swizzled_outputs[2],
1231 swizzled_outputs[3]));
1232 } else {
1233 packed_color = qir_uniform_ui(c, 0);
1234 }
1235
1236 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1237 packed_color, c->undef));
1238 }
1239
1240 static void
1241 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1242 {
1243 struct qreg xyi[2];
1244
1245 for (int i = 0; i < 2; i++) {
1246 struct qreg scale =
1247 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1248
1249 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1250 qir_FMUL(c,
1251 c->outputs[i],
1252 scale),
1253 rcp_w));
1254 }
1255
1256 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1257 }
1258
1259 static void
1260 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1261 {
1262 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1263 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1264
1265 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
1266 c->outputs[2],
1267 zscale),
1268 zoffset),
1269 rcp_w));
1270 }
1271
1272 static void
1273 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1274 {
1275 qir_VPM_WRITE(c, rcp_w);
1276 }
1277
1278 static void
1279 emit_vert_end(struct vc4_compile *c)
1280 {
1281 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1282
1283 emit_scaled_viewport_write(c, rcp_w);
1284 emit_zs_write(c, rcp_w);
1285 emit_rcp_wc_write(c, rcp_w);
1286
1287 for (int i = 4; i < c->num_outputs; i++) {
1288 qir_VPM_WRITE(c, c->outputs[i]);
1289 }
1290 }
1291
1292 static void
1293 emit_coord_end(struct vc4_compile *c)
1294 {
1295 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1296
1297 for (int i = 0; i < 4; i++)
1298 qir_VPM_WRITE(c, c->outputs[i]);
1299
1300 emit_scaled_viewport_write(c, rcp_w);
1301 emit_zs_write(c, rcp_w);
1302 emit_rcp_wc_write(c, rcp_w);
1303 }
1304
1305 static struct vc4_compile *
1306 vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
1307 struct vc4_key *key)
1308 {
1309 struct vc4_compile *c = qir_compile_init();
1310 int ret;
1311
1312 c->stage = stage;
1313
1314 /* XXX sizing */
1315 c->temps = calloc(sizeof(struct qreg), 1024);
1316 c->inputs = calloc(sizeof(struct qreg), 8 * 4);
1317 c->outputs = calloc(sizeof(struct qreg), 1024);
1318 c->uniforms = calloc(sizeof(struct qreg), 1024);
1319 c->consts = calloc(sizeof(struct qreg), 1024);
1320
1321 c->uniform_data = calloc(sizeof(uint32_t), 1024);
1322 c->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
1323
1324 c->shader_state = key->shader_state;
1325 ret = tgsi_parse_init(&c->parser, c->shader_state->tokens);
1326 assert(ret == TGSI_PARSE_OK);
1327
1328 if (vc4_debug & VC4_DEBUG_TGSI) {
1329 fprintf(stderr, "TGSI:\n");
1330 tgsi_dump(c->shader_state->tokens, 0);
1331 }
1332
1333 c->key = key;
1334 switch (stage) {
1335 case QSTAGE_FRAG:
1336 c->fs_key = (struct vc4_fs_key *)key;
1337 if (c->fs_key->is_points) {
1338 c->point_x = emit_fragment_varying(c, 0);
1339 c->point_y = emit_fragment_varying(c, 0);
1340 } else if (c->fs_key->is_lines) {
1341 c->line_x = emit_fragment_varying(c, 0);
1342 }
1343 break;
1344 case QSTAGE_VERT:
1345 c->vs_key = (struct vc4_vs_key *)key;
1346 break;
1347 case QSTAGE_COORD:
1348 c->vs_key = (struct vc4_vs_key *)key;
1349 break;
1350 }
1351
1352 while (!tgsi_parse_end_of_tokens(&c->parser)) {
1353 tgsi_parse_token(&c->parser);
1354
1355 switch (c->parser.FullToken.Token.Type) {
1356 case TGSI_TOKEN_TYPE_DECLARATION:
1357 emit_tgsi_declaration(c,
1358 &c->parser.FullToken.FullDeclaration);
1359 break;
1360
1361 case TGSI_TOKEN_TYPE_INSTRUCTION:
1362 emit_tgsi_instruction(c,
1363 &c->parser.FullToken.FullInstruction);
1364 break;
1365
1366 case TGSI_TOKEN_TYPE_IMMEDIATE:
1367 parse_tgsi_immediate(c,
1368 &c->parser.FullToken.FullImmediate);
1369 break;
1370 }
1371 }
1372
1373 switch (stage) {
1374 case QSTAGE_FRAG:
1375 emit_frag_end(c);
1376 break;
1377 case QSTAGE_VERT:
1378 emit_vert_end(c);
1379 break;
1380 case QSTAGE_COORD:
1381 emit_coord_end(c);
1382 break;
1383 }
1384
1385 tgsi_parse_free(&c->parser);
1386 free(c->temps);
1387
1388 qir_optimize(c);
1389
1390 if (vc4_debug & VC4_DEBUG_QIR) {
1391 fprintf(stderr, "QIR:\n");
1392 qir_dump(c);
1393 }
1394 vc4_generate_code(c);
1395
1396 if (vc4_debug & VC4_DEBUG_SHADERDB) {
1397 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
1398 qir_get_stage_name(c->stage), c->qpu_inst_count);
1399 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
1400 qir_get_stage_name(c->stage), c->num_uniforms);
1401 }
1402
1403 return c;
1404 }
1405
1406 static void *
1407 vc4_shader_state_create(struct pipe_context *pctx,
1408 const struct pipe_shader_state *cso)
1409 {
1410 struct pipe_shader_state *so = CALLOC_STRUCT(pipe_shader_state);
1411 if (!so)
1412 return NULL;
1413
1414 so->tokens = tgsi_dup_tokens(cso->tokens);
1415
1416 return so;
1417 }
1418
1419 static void
1420 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
1421 int shader_index,
1422 struct vc4_compile *c)
1423 {
1424 int count = c->num_uniforms;
1425 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
1426
1427 uinfo->count = count;
1428 uinfo->data = malloc(count * sizeof(*uinfo->data));
1429 memcpy(uinfo->data, c->uniform_data,
1430 count * sizeof(*uinfo->data));
1431 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
1432 memcpy(uinfo->contents, c->uniform_contents,
1433 count * sizeof(*uinfo->contents));
1434 uinfo->num_texture_samples = c->num_texture_samples;
1435 }
1436
1437 static void
1438 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1439 struct vc4_fs_key *key)
1440 {
1441 struct vc4_compile *c = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
1442 &key->base);
1443 shader->num_inputs = c->num_inputs;
1444 copy_uniform_state_to_shader(shader, 0, c);
1445 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
1446 c->qpu_inst_count * sizeof(uint64_t),
1447 "fs_code");
1448
1449 qir_compile_destroy(c);
1450 }
1451
1452 static void
1453 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1454 struct vc4_vs_key *key)
1455 {
1456 struct vc4_compile *vs_c = vc4_shader_tgsi_to_qir(shader,
1457 QSTAGE_VERT,
1458 &key->base);
1459 copy_uniform_state_to_shader(shader, 0, vs_c);
1460
1461 struct vc4_compile *cs_c = vc4_shader_tgsi_to_qir(shader,
1462 QSTAGE_COORD,
1463 &key->base);
1464 copy_uniform_state_to_shader(shader, 1, cs_c);
1465
1466 uint32_t vs_size = vs_c->qpu_inst_count * sizeof(uint64_t);
1467 uint32_t cs_size = cs_c->qpu_inst_count * sizeof(uint64_t);
1468 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
1469 shader->bo = vc4_bo_alloc(vc4->screen,
1470 shader->coord_shader_offset + cs_size,
1471 "vs_code");
1472
1473 void *map = vc4_bo_map(shader->bo);
1474 memcpy(map, vs_c->qpu_insts, vs_size);
1475 memcpy(map + shader->coord_shader_offset,
1476 cs_c->qpu_insts, cs_size);
1477
1478 qir_compile_destroy(vs_c);
1479 qir_compile_destroy(cs_c);
1480 }
1481
1482 static void
1483 vc4_setup_shared_key(struct vc4_key *key, struct vc4_texture_stateobj *texstate)
1484 {
1485 for (int i = 0; i < texstate->num_textures; i++) {
1486 struct pipe_sampler_view *sampler = texstate->textures[i];
1487 struct pipe_sampler_state *sampler_state =
1488 texstate->samplers[i];
1489
1490 if (sampler) {
1491 struct pipe_resource *prsc = sampler->texture;
1492 key->tex[i].format = prsc->format;
1493 key->tex[i].swizzle[0] = sampler->swizzle_r;
1494 key->tex[i].swizzle[1] = sampler->swizzle_g;
1495 key->tex[i].swizzle[2] = sampler->swizzle_b;
1496 key->tex[i].swizzle[3] = sampler->swizzle_a;
1497 key->tex[i].compare_mode = sampler_state->compare_mode;
1498 key->tex[i].compare_func = sampler_state->compare_func;
1499 }
1500 }
1501 }
1502
1503 static void
1504 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
1505 {
1506 struct vc4_fs_key local_key;
1507 struct vc4_fs_key *key = &local_key;
1508
1509 memset(key, 0, sizeof(*key));
1510 vc4_setup_shared_key(&key->base, &vc4->fragtex);
1511 key->base.shader_state = vc4->prog.bind_fs;
1512 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
1513 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
1514 prim_mode <= PIPE_PRIM_LINE_STRIP);
1515 key->blend = vc4->blend->rt[0];
1516
1517 if (vc4->framebuffer.cbufs[0])
1518 key->color_format = vc4->framebuffer.cbufs[0]->format;
1519
1520 key->depth_enabled = vc4->zsa->base.depth.enabled;
1521
1522 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
1523 if (vc4->prog.fs)
1524 return;
1525
1526 key = malloc(sizeof(*key));
1527 memcpy(key, &local_key, sizeof(*key));
1528
1529 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1530 vc4_fs_compile(vc4, shader, key);
1531 util_hash_table_set(vc4->fs_cache, key, shader);
1532
1533 vc4->prog.fs = shader;
1534 }
1535
1536 static void
1537 vc4_update_compiled_vs(struct vc4_context *vc4)
1538 {
1539 struct vc4_vs_key local_key;
1540 struct vc4_vs_key *key = &local_key;
1541
1542 memset(key, 0, sizeof(*key));
1543 vc4_setup_shared_key(&key->base, &vc4->verttex);
1544 key->base.shader_state = vc4->prog.bind_vs;
1545
1546 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
1547 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
1548
1549 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
1550 if (vc4->prog.vs)
1551 return;
1552
1553 key = malloc(sizeof(*key));
1554 memcpy(key, &local_key, sizeof(*key));
1555
1556 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1557 vc4_vs_compile(vc4, shader, key);
1558 util_hash_table_set(vc4->vs_cache, key, shader);
1559
1560 vc4->prog.vs = shader;
1561 }
1562
1563 void
1564 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
1565 {
1566 vc4_update_compiled_fs(vc4, prim_mode);
1567 vc4_update_compiled_vs(vc4);
1568 }
1569
1570 static unsigned
1571 fs_cache_hash(void *key)
1572 {
1573 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
1574 }
1575
1576 static unsigned
1577 vs_cache_hash(void *key)
1578 {
1579 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
1580 }
1581
1582 static int
1583 fs_cache_compare(void *key1, void *key2)
1584 {
1585 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
1586 }
1587
1588 static int
1589 vs_cache_compare(void *key1, void *key2)
1590 {
1591 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
1592 }
1593
1594 struct delete_state {
1595 struct vc4_context *vc4;
1596 struct pipe_shader_state *shader_state;
1597 };
1598
1599 static enum pipe_error
1600 fs_delete_from_cache(void *in_key, void *in_value, void *data)
1601 {
1602 struct delete_state *del = data;
1603 struct vc4_fs_key *key = in_key;
1604 struct vc4_compiled_shader *shader = in_value;
1605
1606 if (key->base.shader_state == data) {
1607 util_hash_table_remove(del->vc4->fs_cache, key);
1608 vc4_bo_unreference(&shader->bo);
1609 free(shader);
1610 }
1611
1612 return 0;
1613 }
1614
1615 static enum pipe_error
1616 vs_delete_from_cache(void *in_key, void *in_value, void *data)
1617 {
1618 struct delete_state *del = data;
1619 struct vc4_vs_key *key = in_key;
1620 struct vc4_compiled_shader *shader = in_value;
1621
1622 if (key->base.shader_state == data) {
1623 util_hash_table_remove(del->vc4->vs_cache, key);
1624 vc4_bo_unreference(&shader->bo);
1625 free(shader);
1626 }
1627
1628 return 0;
1629 }
1630
1631 static void
1632 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
1633 {
1634 struct vc4_context *vc4 = vc4_context(pctx);
1635 struct pipe_shader_state *so = hwcso;
1636 struct delete_state del;
1637
1638 del.vc4 = vc4;
1639 del.shader_state = so;
1640 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
1641 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
1642
1643 free((void *)so->tokens);
1644 free(so);
1645 }
1646
1647 static uint32_t translate_wrap(uint32_t p_wrap)
1648 {
1649 switch (p_wrap) {
1650 case PIPE_TEX_WRAP_REPEAT:
1651 return 0;
1652 case PIPE_TEX_WRAP_CLAMP:
1653 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1654 return 1;
1655 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1656 return 2;
1657 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1658 return 3;
1659 default:
1660 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
1661 assert(!"not reached");
1662 return 0;
1663 }
1664 }
1665
1666 static void
1667 write_texture_p0(struct vc4_context *vc4,
1668 struct vc4_texture_stateobj *texstate,
1669 uint32_t unit)
1670 {
1671 struct pipe_sampler_view *texture = texstate->textures[unit];
1672 struct vc4_resource *rsc = vc4_resource(texture->texture);
1673
1674 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
1675 rsc->slices[0].offset | texture->u.tex.last_level |
1676 ((rsc->vc4_format & 7) << 4));
1677 }
1678
1679 static void
1680 write_texture_p1(struct vc4_context *vc4,
1681 struct vc4_texture_stateobj *texstate,
1682 uint32_t unit)
1683 {
1684 struct pipe_sampler_view *texture = texstate->textures[unit];
1685 struct vc4_resource *rsc = vc4_resource(texture->texture);
1686 struct pipe_sampler_state *sampler = texstate->samplers[unit];
1687 static const uint32_t mipfilter_map[] = {
1688 [PIPE_TEX_MIPFILTER_NEAREST] = 2,
1689 [PIPE_TEX_MIPFILTER_LINEAR] = 4,
1690 [PIPE_TEX_MIPFILTER_NONE] = 0
1691 };
1692 static const uint32_t imgfilter_map[] = {
1693 [PIPE_TEX_FILTER_NEAREST] = 1,
1694 [PIPE_TEX_FILTER_LINEAR] = 0,
1695 };
1696
1697 cl_u32(&vc4->uniforms,
1698 ((rsc->vc4_format >> 4) << 31) |
1699 (texture->texture->height0 << 20) |
1700 (texture->texture->width0 << 8) |
1701 (imgfilter_map[sampler->mag_img_filter] << 7) |
1702 ((imgfilter_map[sampler->min_img_filter] +
1703 mipfilter_map[sampler->min_mip_filter]) << 4) |
1704 (translate_wrap(sampler->wrap_t) << 2) |
1705 (translate_wrap(sampler->wrap_s) << 0));
1706 }
1707
1708 static uint32_t
1709 get_texrect_scale(struct vc4_texture_stateobj *texstate,
1710 enum quniform_contents contents,
1711 uint32_t data)
1712 {
1713 struct pipe_sampler_view *texture = texstate->textures[data];
1714 uint32_t dim;
1715
1716 if (contents == QUNIFORM_TEXRECT_SCALE_X)
1717 dim = texture->texture->width0;
1718 else
1719 dim = texture->texture->height0;
1720
1721 return fui(1.0f / dim);
1722 }
1723
1724 void
1725 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1726 struct vc4_constbuf_stateobj *cb,
1727 struct vc4_texture_stateobj *texstate,
1728 int shader_index)
1729 {
1730 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
1731 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
1732
1733 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
1734
1735 for (int i = 0; i < uinfo->count; i++) {
1736
1737 switch (uinfo->contents[i]) {
1738 case QUNIFORM_CONSTANT:
1739 cl_u32(&vc4->uniforms, uinfo->data[i]);
1740 break;
1741 case QUNIFORM_UNIFORM:
1742 cl_u32(&vc4->uniforms,
1743 gallium_uniforms[uinfo->data[i]]);
1744 break;
1745 case QUNIFORM_VIEWPORT_X_SCALE:
1746 cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
1747 break;
1748 case QUNIFORM_VIEWPORT_Y_SCALE:
1749 cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
1750 break;
1751
1752 case QUNIFORM_VIEWPORT_Z_OFFSET:
1753 cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
1754 break;
1755 case QUNIFORM_VIEWPORT_Z_SCALE:
1756 cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
1757 break;
1758
1759 case QUNIFORM_TEXTURE_CONFIG_P0:
1760 write_texture_p0(vc4, texstate, uinfo->data[i]);
1761 break;
1762
1763 case QUNIFORM_TEXTURE_CONFIG_P1:
1764 write_texture_p1(vc4, texstate, uinfo->data[i]);
1765 break;
1766
1767 case QUNIFORM_TEXRECT_SCALE_X:
1768 case QUNIFORM_TEXRECT_SCALE_Y:
1769 cl_u32(&vc4->uniforms,
1770 get_texrect_scale(texstate,
1771 uinfo->contents[i],
1772 uinfo->data[i]));
1773 break;
1774
1775 case QUNIFORM_BLEND_CONST_COLOR:
1776 cl_f(&vc4->uniforms,
1777 vc4->blend_color.color[uinfo->data[i]]);
1778 break;
1779 }
1780 #if 0
1781 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
1782 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
1783 shader, shader_index, i, written_val, uif(written_val));
1784 #endif
1785 }
1786 }
1787
1788 static void
1789 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
1790 {
1791 struct vc4_context *vc4 = vc4_context(pctx);
1792 vc4->prog.bind_fs = hwcso;
1793 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
1794 vc4->dirty |= VC4_DIRTY_PROG;
1795 }
1796
1797 static void
1798 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
1799 {
1800 struct vc4_context *vc4 = vc4_context(pctx);
1801 vc4->prog.bind_vs = hwcso;
1802 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
1803 vc4->dirty |= VC4_DIRTY_PROG;
1804 }
1805
1806 void
1807 vc4_program_init(struct pipe_context *pctx)
1808 {
1809 struct vc4_context *vc4 = vc4_context(pctx);
1810
1811 pctx->create_vs_state = vc4_shader_state_create;
1812 pctx->delete_vs_state = vc4_shader_state_delete;
1813
1814 pctx->create_fs_state = vc4_shader_state_create;
1815 pctx->delete_fs_state = vc4_shader_state_delete;
1816
1817 pctx->bind_fs_state = vc4_fp_state_bind;
1818 pctx->bind_vs_state = vc4_vp_state_bind;
1819
1820 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
1821 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
1822 }