vc4: Add support for the SSG opcode.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash_table.h"
29 #include "util/u_hash.h"
30 #include "util/u_memory.h"
31 #include "util/u_pack_color.h"
32 #include "util/format_srgb.h"
33 #include "util/ralloc.h"
34 #include "util/hash_table.h"
35 #include "tgsi/tgsi_dump.h"
36 #include "tgsi/tgsi_info.h"
37 #include "tgsi/tgsi_lowering.h"
38
39 #include "vc4_context.h"
40 #include "vc4_qpu.h"
41 #include "vc4_qir.h"
42 #ifdef USE_VC4_SIMULATOR
43 #include "simpenrose/simpenrose.h"
44 #endif
45
46 struct vc4_key {
47 struct vc4_uncompiled_shader *shader_state;
48 struct {
49 enum pipe_format format;
50 unsigned compare_mode:1;
51 unsigned compare_func:3;
52 unsigned wrap_s:3;
53 unsigned wrap_t:3;
54 uint8_t swizzle[4];
55 } tex[VC4_MAX_TEXTURE_SAMPLERS];
56 };
57
58 struct vc4_fs_key {
59 struct vc4_key base;
60 enum pipe_format color_format;
61 bool depth_enabled;
62 bool stencil_enabled;
63 bool stencil_twoside;
64 bool stencil_full_writemasks;
65 bool is_points;
66 bool is_lines;
67 bool alpha_test;
68 bool point_coord_upper_left;
69 bool light_twoside;
70 uint8_t alpha_test_func;
71 uint32_t point_sprite_mask;
72
73 struct pipe_rt_blend_state blend;
74 };
75
76 struct vc4_vs_key {
77 struct vc4_key base;
78 enum pipe_format attr_formats[8];
79 bool is_coord;
80 bool per_vertex_point_size;
81 };
82
83 static void
84 resize_qreg_array(struct vc4_compile *c,
85 struct qreg **regs,
86 uint32_t *size,
87 uint32_t decl_size)
88 {
89 if (*size >= decl_size)
90 return;
91
92 uint32_t old_size = *size;
93 *size = MAX2(*size * 2, decl_size);
94 *regs = reralloc(c, *regs, struct qreg, *size);
95 if (!*regs) {
96 fprintf(stderr, "Malloc failure\n");
97 abort();
98 }
99
100 for (uint32_t i = old_size; i < *size; i++)
101 (*regs)[i] = c->undef;
102 }
103
104 static struct qreg
105 add_uniform(struct vc4_compile *c,
106 enum quniform_contents contents,
107 uint32_t data)
108 {
109 uint32_t uniform = c->num_uniforms++;
110 struct qreg u = { QFILE_UNIF, uniform };
111
112 if (uniform >= c->uniform_array_size) {
113 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
114 c->uniform_array_size * 2);
115
116 c->uniform_data = reralloc(c, c->uniform_data,
117 uint32_t,
118 c->uniform_array_size);
119 c->uniform_contents = reralloc(c, c->uniform_contents,
120 enum quniform_contents,
121 c->uniform_array_size);
122 }
123
124 c->uniform_contents[uniform] = contents;
125 c->uniform_data[uniform] = data;
126
127 return u;
128 }
129
130 static struct qreg
131 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
132 uint32_t data)
133 {
134 struct qreg u = add_uniform(c, contents, data);
135 struct qreg t = qir_MOV(c, u);
136 return t;
137 }
138
139 static struct qreg
140 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
141 {
142 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
143 }
144
145 static struct qreg
146 qir_uniform_f(struct vc4_compile *c, float f)
147 {
148 return qir_uniform_ui(c, fui(f));
149 }
150
151 static struct qreg
152 get_src(struct vc4_compile *c, unsigned tgsi_op,
153 struct tgsi_src_register *src, int i)
154 {
155 struct qreg r = c->undef;
156
157 uint32_t s = i;
158 switch (i) {
159 case TGSI_SWIZZLE_X:
160 s = src->SwizzleX;
161 break;
162 case TGSI_SWIZZLE_Y:
163 s = src->SwizzleY;
164 break;
165 case TGSI_SWIZZLE_Z:
166 s = src->SwizzleZ;
167 break;
168 case TGSI_SWIZZLE_W:
169 s = src->SwizzleW;
170 break;
171 default:
172 abort();
173 }
174
175 assert(!src->Indirect);
176
177 switch (src->File) {
178 case TGSI_FILE_NULL:
179 return r;
180 case TGSI_FILE_TEMPORARY:
181 r = c->temps[src->Index * 4 + s];
182 break;
183 case TGSI_FILE_IMMEDIATE:
184 r = c->consts[src->Index * 4 + s];
185 break;
186 case TGSI_FILE_CONSTANT:
187 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
188 src->Index * 4 + s);
189 break;
190 case TGSI_FILE_INPUT:
191 r = c->inputs[src->Index * 4 + s];
192 break;
193 case TGSI_FILE_SAMPLER:
194 case TGSI_FILE_SAMPLER_VIEW:
195 r = c->undef;
196 break;
197 default:
198 fprintf(stderr, "unknown src file %d\n", src->File);
199 abort();
200 }
201
202 if (src->Absolute)
203 r = qir_FMAXABS(c, r, r);
204
205 if (src->Negate) {
206 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
207 case TGSI_TYPE_SIGNED:
208 case TGSI_TYPE_UNSIGNED:
209 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
210 break;
211 default:
212 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
213 break;
214 }
215 }
216
217 return r;
218 };
219
220
221 static void
222 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
223 int i, struct qreg val)
224 {
225 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
226
227 assert(!tgsi_dst->Indirect);
228
229 switch (tgsi_dst->File) {
230 case TGSI_FILE_TEMPORARY:
231 c->temps[tgsi_dst->Index * 4 + i] = val;
232 break;
233 case TGSI_FILE_OUTPUT:
234 c->outputs[tgsi_dst->Index * 4 + i] = val;
235 c->num_outputs = MAX2(c->num_outputs,
236 tgsi_dst->Index * 4 + i + 1);
237 break;
238 default:
239 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
240 abort();
241 }
242 };
243
244 static struct qreg
245 get_swizzled_channel(struct vc4_compile *c,
246 struct qreg *srcs, int swiz)
247 {
248 switch (swiz) {
249 default:
250 case UTIL_FORMAT_SWIZZLE_NONE:
251 fprintf(stderr, "warning: unknown swizzle\n");
252 /* FALLTHROUGH */
253 case UTIL_FORMAT_SWIZZLE_0:
254 return qir_uniform_f(c, 0.0);
255 case UTIL_FORMAT_SWIZZLE_1:
256 return qir_uniform_f(c, 1.0);
257 case UTIL_FORMAT_SWIZZLE_X:
258 case UTIL_FORMAT_SWIZZLE_Y:
259 case UTIL_FORMAT_SWIZZLE_Z:
260 case UTIL_FORMAT_SWIZZLE_W:
261 return srcs[swiz];
262 }
263 }
264
265 static struct qreg
266 tgsi_to_qir_alu(struct vc4_compile *c,
267 struct tgsi_full_instruction *tgsi_inst,
268 enum qop op, struct qreg *src, int i)
269 {
270 struct qreg dst = qir_get_temp(c);
271 qir_emit(c, qir_inst4(op, dst,
272 src[0 * 4 + i],
273 src[1 * 4 + i],
274 src[2 * 4 + i],
275 c->undef));
276 return dst;
277 }
278
279 static struct qreg
280 tgsi_to_qir_scalar(struct vc4_compile *c,
281 struct tgsi_full_instruction *tgsi_inst,
282 enum qop op, struct qreg *src, int i)
283 {
284 struct qreg dst = qir_get_temp(c);
285 qir_emit(c, qir_inst(op, dst,
286 src[0 * 4 + 0],
287 c->undef));
288 return dst;
289 }
290
291 static struct qreg
292 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
293 {
294 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
295 struct qreg high = qir_POW(c,
296 qir_FMUL(c,
297 qir_FADD(c,
298 srgb,
299 qir_uniform_f(c, 0.055)),
300 qir_uniform_f(c, 1.0 / 1.055)),
301 qir_uniform_f(c, 2.4));
302
303 qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045)));
304 return qir_SEL_X_Y_NS(c, low, high);
305 }
306
307 static struct qreg
308 qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
309 {
310 struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
311 struct qreg high = qir_FSUB(c,
312 qir_FMUL(c,
313 qir_uniform_f(c, 1.055),
314 qir_POW(c,
315 linear,
316 qir_uniform_f(c, 0.41666))),
317 qir_uniform_f(c, 0.055));
318
319 qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
320 return qir_SEL_X_Y_NS(c, low, high);
321 }
322
323 static struct qreg
324 tgsi_to_qir_umul(struct vc4_compile *c,
325 struct tgsi_full_instruction *tgsi_inst,
326 enum qop op, struct qreg *src, int i)
327 {
328 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
329 qir_uniform_ui(c, 16));
330 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
331 qir_uniform_ui(c, 0xffff));
332 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
333 qir_uniform_ui(c, 16));
334 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
335 qir_uniform_ui(c, 0xffff));
336
337 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
338 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
339 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
340
341 return qir_ADD(c, lolo, qir_SHL(c,
342 qir_ADD(c, hilo, lohi),
343 qir_uniform_ui(c, 16)));
344 }
345
346 static struct qreg
347 tgsi_to_qir_idiv(struct vc4_compile *c,
348 struct tgsi_full_instruction *tgsi_inst,
349 enum qop op, struct qreg *src, int i)
350 {
351 return qir_FTOI(c, qir_FMUL(c,
352 qir_ITOF(c, src[0 * 4 + i]),
353 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
354 }
355
356 static struct qreg
357 tgsi_to_qir_ineg(struct vc4_compile *c,
358 struct tgsi_full_instruction *tgsi_inst,
359 enum qop op, struct qreg *src, int i)
360 {
361 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
362 }
363
364 static struct qreg
365 tgsi_to_qir_seq(struct vc4_compile *c,
366 struct tgsi_full_instruction *tgsi_inst,
367 enum qop op, struct qreg *src, int i)
368 {
369 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
370 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
371 }
372
373 static struct qreg
374 tgsi_to_qir_sne(struct vc4_compile *c,
375 struct tgsi_full_instruction *tgsi_inst,
376 enum qop op, struct qreg *src, int i)
377 {
378 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
379 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
380 }
381
382 static struct qreg
383 tgsi_to_qir_slt(struct vc4_compile *c,
384 struct tgsi_full_instruction *tgsi_inst,
385 enum qop op, struct qreg *src, int i)
386 {
387 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
388 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
389 }
390
391 static struct qreg
392 tgsi_to_qir_sge(struct vc4_compile *c,
393 struct tgsi_full_instruction *tgsi_inst,
394 enum qop op, struct qreg *src, int i)
395 {
396 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
397 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
398 }
399
400 static struct qreg
401 tgsi_to_qir_fseq(struct vc4_compile *c,
402 struct tgsi_full_instruction *tgsi_inst,
403 enum qop op, struct qreg *src, int i)
404 {
405 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
406 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
407 }
408
409 static struct qreg
410 tgsi_to_qir_fsne(struct vc4_compile *c,
411 struct tgsi_full_instruction *tgsi_inst,
412 enum qop op, struct qreg *src, int i)
413 {
414 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
415 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
416 }
417
418 static struct qreg
419 tgsi_to_qir_fslt(struct vc4_compile *c,
420 struct tgsi_full_instruction *tgsi_inst,
421 enum qop op, struct qreg *src, int i)
422 {
423 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
424 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
425 }
426
427 static struct qreg
428 tgsi_to_qir_fsge(struct vc4_compile *c,
429 struct tgsi_full_instruction *tgsi_inst,
430 enum qop op, struct qreg *src, int i)
431 {
432 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
433 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
434 }
435
436 static struct qreg
437 tgsi_to_qir_useq(struct vc4_compile *c,
438 struct tgsi_full_instruction *tgsi_inst,
439 enum qop op, struct qreg *src, int i)
440 {
441 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
442 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
443 }
444
445 static struct qreg
446 tgsi_to_qir_usne(struct vc4_compile *c,
447 struct tgsi_full_instruction *tgsi_inst,
448 enum qop op, struct qreg *src, int i)
449 {
450 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
451 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
452 }
453
454 static struct qreg
455 tgsi_to_qir_islt(struct vc4_compile *c,
456 struct tgsi_full_instruction *tgsi_inst,
457 enum qop op, struct qreg *src, int i)
458 {
459 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
460 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
461 }
462
463 static struct qreg
464 tgsi_to_qir_isge(struct vc4_compile *c,
465 struct tgsi_full_instruction *tgsi_inst,
466 enum qop op, struct qreg *src, int i)
467 {
468 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
469 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
470 }
471
472 static struct qreg
473 tgsi_to_qir_cmp(struct vc4_compile *c,
474 struct tgsi_full_instruction *tgsi_inst,
475 enum qop op, struct qreg *src, int i)
476 {
477 qir_SF(c, src[0 * 4 + i]);
478 return qir_SEL_X_Y_NS(c,
479 src[1 * 4 + i],
480 src[2 * 4 + i]);
481 }
482
483 static struct qreg
484 tgsi_to_qir_mad(struct vc4_compile *c,
485 struct tgsi_full_instruction *tgsi_inst,
486 enum qop op, struct qreg *src, int i)
487 {
488 return qir_FADD(c,
489 qir_FMUL(c,
490 src[0 * 4 + i],
491 src[1 * 4 + i]),
492 src[2 * 4 + i]);
493 }
494
495 static struct qreg
496 tgsi_to_qir_lrp(struct vc4_compile *c,
497 struct tgsi_full_instruction *tgsi_inst,
498 enum qop op, struct qreg *src, int i)
499 {
500 struct qreg src0 = src[0 * 4 + i];
501 struct qreg src1 = src[1 * 4 + i];
502 struct qreg src2 = src[2 * 4 + i];
503
504 /* LRP is:
505 * src0 * src1 + (1 - src0) * src2.
506 * -> src0 * src1 + src2 - src0 * src2
507 * -> src2 + src0 * (src1 - src2)
508 */
509 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
510
511 }
512
513 static void
514 tgsi_to_qir_tex(struct vc4_compile *c,
515 struct tgsi_full_instruction *tgsi_inst,
516 enum qop op, struct qreg *src)
517 {
518 assert(!tgsi_inst->Instruction.Saturate);
519
520 struct qreg s = src[0 * 4 + 0];
521 struct qreg t = src[0 * 4 + 1];
522 struct qreg r = src[0 * 4 + 2];
523 uint32_t unit = tgsi_inst->Src[1].Register.Index;
524
525 struct qreg proj = c->undef;
526 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
527 proj = qir_RCP(c, src[0 * 4 + 3]);
528 s = qir_FMUL(c, s, proj);
529 t = qir_FMUL(c, t, proj);
530 }
531
532 struct qreg texture_u[] = {
533 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
534 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
535 add_uniform(c, QUNIFORM_CONSTANT, 0),
536 add_uniform(c, QUNIFORM_CONSTANT, 0),
537 };
538 uint32_t next_texture_u = 0;
539
540 /* There is no native support for GL texture rectangle coordinates, so
541 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
542 * 1]).
543 */
544 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
545 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
546 s = qir_FMUL(c, s,
547 get_temp_for_uniform(c,
548 QUNIFORM_TEXRECT_SCALE_X,
549 unit));
550 t = qir_FMUL(c, t,
551 get_temp_for_uniform(c,
552 QUNIFORM_TEXRECT_SCALE_Y,
553 unit));
554 }
555
556 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
557 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
558 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
559 struct qreg rcp_ma = qir_RCP(c, ma);
560 s = qir_FMUL(c, s, rcp_ma);
561 t = qir_FMUL(c, t, rcp_ma);
562 r = qir_FMUL(c, r, rcp_ma);
563
564 texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, unit);
565
566 qir_TEX_R(c, r, texture_u[next_texture_u++]);
567 } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
568 c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
569 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
570 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
571 qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
572 texture_u[next_texture_u++]);
573 }
574
575 if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
576 s = qir_FMIN(c, qir_FMAX(c, s, qir_uniform_f(c, 0.0)),
577 qir_uniform_f(c, 1.0));
578 }
579
580 if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
581 t = qir_FMIN(c, qir_FMAX(c, t, qir_uniform_f(c, 0.0)),
582 qir_uniform_f(c, 1.0));
583 }
584
585 qir_TEX_T(c, t, texture_u[next_texture_u++]);
586
587 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB)
588 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
589
590 qir_TEX_S(c, s, texture_u[next_texture_u++]);
591
592 c->num_texture_samples++;
593 struct qreg r4 = qir_TEX_RESULT(c);
594
595 enum pipe_format format = c->key->tex[unit].format;
596
597 struct qreg unpacked[4];
598 if (util_format_is_depth_or_stencil(format)) {
599 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
600 qir_uniform_ui(c, 8)));
601 struct qreg normalized = qir_FMUL(c, depthf,
602 qir_uniform_f(c, 1.0f/0xffffff));
603
604 struct qreg depth_output;
605
606 struct qreg one = qir_uniform_f(c, 1.0f);
607 if (c->key->tex[unit].compare_mode) {
608 struct qreg compare = src[0 * 4 + 2];
609
610 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
611 compare = qir_FMUL(c, compare, proj);
612
613 switch (c->key->tex[unit].compare_func) {
614 case PIPE_FUNC_NEVER:
615 depth_output = qir_uniform_f(c, 0.0f);
616 break;
617 case PIPE_FUNC_ALWAYS:
618 depth_output = one;
619 break;
620 case PIPE_FUNC_EQUAL:
621 qir_SF(c, qir_FSUB(c, compare, normalized));
622 depth_output = qir_SEL_X_0_ZS(c, one);
623 break;
624 case PIPE_FUNC_NOTEQUAL:
625 qir_SF(c, qir_FSUB(c, compare, normalized));
626 depth_output = qir_SEL_X_0_ZC(c, one);
627 break;
628 case PIPE_FUNC_GREATER:
629 qir_SF(c, qir_FSUB(c, compare, normalized));
630 depth_output = qir_SEL_X_0_NC(c, one);
631 break;
632 case PIPE_FUNC_GEQUAL:
633 qir_SF(c, qir_FSUB(c, normalized, compare));
634 depth_output = qir_SEL_X_0_NS(c, one);
635 break;
636 case PIPE_FUNC_LESS:
637 qir_SF(c, qir_FSUB(c, compare, normalized));
638 depth_output = qir_SEL_X_0_NS(c, one);
639 break;
640 case PIPE_FUNC_LEQUAL:
641 qir_SF(c, qir_FSUB(c, normalized, compare));
642 depth_output = qir_SEL_X_0_NC(c, one);
643 break;
644 }
645 } else {
646 depth_output = normalized;
647 }
648
649 for (int i = 0; i < 4; i++)
650 unpacked[i] = depth_output;
651 } else {
652 for (int i = 0; i < 4; i++)
653 unpacked[i] = qir_R4_UNPACK(c, r4, i);
654 }
655
656 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
657 struct qreg texture_output[4];
658 for (int i = 0; i < 4; i++) {
659 texture_output[i] = get_swizzled_channel(c, unpacked,
660 format_swiz[i]);
661 }
662
663 if (util_format_is_srgb(format)) {
664 for (int i = 0; i < 3; i++)
665 texture_output[i] = qir_srgb_decode(c,
666 texture_output[i]);
667 }
668
669 for (int i = 0; i < 4; i++) {
670 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
671 continue;
672
673 update_dst(c, tgsi_inst, i,
674 get_swizzled_channel(c, texture_output,
675 c->key->tex[unit].swizzle[i]));
676 }
677 }
678
679 static struct qreg
680 tgsi_to_qir_trunc(struct vc4_compile *c,
681 struct tgsi_full_instruction *tgsi_inst,
682 enum qop op, struct qreg *src, int i)
683 {
684 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
685 }
686
687 /**
688 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
689 * to zero).
690 */
691 static struct qreg
692 tgsi_to_qir_frc(struct vc4_compile *c,
693 struct tgsi_full_instruction *tgsi_inst,
694 enum qop op, struct qreg *src, int i)
695 {
696 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
697 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
698 qir_SF(c, diff);
699 return qir_SEL_X_Y_NS(c,
700 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
701 diff);
702 }
703
704 /**
705 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
706 * zero).
707 */
708 static struct qreg
709 tgsi_to_qir_flr(struct vc4_compile *c,
710 struct tgsi_full_instruction *tgsi_inst,
711 enum qop op, struct qreg *src, int i)
712 {
713 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
714
715 /* This will be < 0 if we truncated and the truncation was of a value
716 * that was < 0 in the first place.
717 */
718 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
719
720 return qir_SEL_X_Y_NS(c,
721 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
722 trunc);
723 }
724
725 static struct qreg
726 tgsi_to_qir_abs(struct vc4_compile *c,
727 struct tgsi_full_instruction *tgsi_inst,
728 enum qop op, struct qreg *src, int i)
729 {
730 struct qreg arg = src[0 * 4 + i];
731 return qir_FMAXABS(c, arg, arg);
732 }
733
734 /* Note that this instruction replicates its result from the x channel */
735 static struct qreg
736 tgsi_to_qir_sin(struct vc4_compile *c,
737 struct tgsi_full_instruction *tgsi_inst,
738 enum qop op, struct qreg *src, int i)
739 {
740 float coeff[] = {
741 2.0 * M_PI,
742 -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
743 pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
744 -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
745 };
746
747 struct qreg scaled_x =
748 qir_FMUL(c,
749 src[0 * 4 + 0],
750 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
751
752
753 struct qreg x = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
754 struct qreg x2 = qir_FMUL(c, x, x);
755 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
756 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
757 x = qir_FMUL(c, x, x2);
758 sum = qir_FADD(c,
759 sum,
760 qir_FMUL(c,
761 x,
762 qir_uniform_f(c, coeff[i])));
763 }
764 return sum;
765 }
766
767 /* Note that this instruction replicates its result from the x channel */
768 static struct qreg
769 tgsi_to_qir_cos(struct vc4_compile *c,
770 struct tgsi_full_instruction *tgsi_inst,
771 enum qop op, struct qreg *src, int i)
772 {
773 float coeff[] = {
774 1.0f,
775 -pow(2.0 * M_PI, 2) / (2 * 1),
776 pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
777 -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
778 };
779
780 struct qreg scaled_x =
781 qir_FMUL(c, src[0 * 4 + 0],
782 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
783 struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
784
785 struct qreg sum = qir_uniform_f(c, coeff[0]);
786 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
787 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
788 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
789 if (i != 1)
790 x = qir_FMUL(c, x, x2);
791
792 struct qreg mul = qir_FMUL(c,
793 x,
794 qir_uniform_f(c, coeff[i]));
795 if (i == 0)
796 sum = mul;
797 else
798 sum = qir_FADD(c, sum, mul);
799 }
800 return sum;
801 }
802
803 static struct qreg
804 tgsi_to_qir_clamp(struct vc4_compile *c,
805 struct tgsi_full_instruction *tgsi_inst,
806 enum qop op, struct qreg *src, int i)
807 {
808 return qir_FMAX(c, qir_FMIN(c,
809 src[0 * 4 + i],
810 src[2 * 4 + i]),
811 src[1 * 4 + i]);
812 }
813
814 static struct qreg
815 tgsi_to_qir_ssg(struct vc4_compile *c,
816 struct tgsi_full_instruction *tgsi_inst,
817 enum qop op, struct qreg *src, int i)
818 {
819 qir_SF(c, src[0 * 4 + i]);
820 return qir_SEL_X_Y_NC(c,
821 qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)),
822 qir_uniform_f(c, -1.0));
823 }
824
825 static void
826 emit_vertex_input(struct vc4_compile *c, int attr)
827 {
828 enum pipe_format format = c->vs_key->attr_formats[attr];
829 struct qreg vpm_reads[4];
830
831 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
832 * time, so we always read 4 32-bit VPM entries.
833 */
834 for (int i = 0; i < 4; i++) {
835 vpm_reads[i] = qir_get_temp(c);
836 qir_emit(c, qir_inst(QOP_VPM_READ,
837 vpm_reads[i],
838 c->undef,
839 c->undef));
840 c->num_inputs++;
841 }
842
843 bool format_warned = false;
844 const struct util_format_description *desc =
845 util_format_description(format);
846
847 for (int i = 0; i < 4; i++) {
848 uint8_t swiz = desc->swizzle[i];
849 struct qreg result;
850
851 if (swiz > UTIL_FORMAT_SWIZZLE_W)
852 result = get_swizzled_channel(c, vpm_reads, swiz);
853 else if (desc->channel[swiz].size == 32 &&
854 desc->channel[swiz].type == UTIL_FORMAT_TYPE_FLOAT) {
855 result = get_swizzled_channel(c, vpm_reads, swiz);
856 } else if (desc->channel[swiz].size == 8 &&
857 (desc->channel[swiz].type == UTIL_FORMAT_TYPE_UNSIGNED ||
858 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) &&
859 desc->channel[swiz].normalized) {
860 struct qreg vpm = vpm_reads[0];
861 if (desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED)
862 vpm = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
863 result = qir_UNPACK_8(c, vpm, swiz);
864 } else {
865 if (!format_warned) {
866 fprintf(stderr,
867 "vtx element %d unsupported type: %s\n",
868 attr, util_format_name(format));
869 format_warned = true;
870 }
871 result = qir_uniform_f(c, 0.0);
872 }
873
874 if (desc->channel[swiz].normalized &&
875 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) {
876 result = qir_FSUB(c,
877 qir_FMUL(c,
878 result,
879 qir_uniform_f(c, 2.0)),
880 qir_uniform_f(c, 1.0));
881 }
882
883 c->inputs[attr * 4 + i] = result;
884 }
885 }
886
887 static void
888 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
889 {
890 if (c->discard.file == QFILE_NULL)
891 c->discard = qir_uniform_f(c, 0.0);
892 qir_SF(c, src[0 * 4 + i]);
893 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
894 c->discard);
895 }
896
897 static void
898 emit_fragcoord_input(struct vc4_compile *c, int attr)
899 {
900 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
901 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
902 c->inputs[attr * 4 + 2] =
903 qir_FMUL(c,
904 qir_ITOF(c, qir_FRAG_Z(c)),
905 qir_uniform_f(c, 1.0 / 0xffffff));
906 c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
907 }
908
909 static void
910 emit_point_coord_input(struct vc4_compile *c, int attr)
911 {
912 if (c->point_x.file == QFILE_NULL) {
913 c->point_x = qir_uniform_f(c, 0.0);
914 c->point_y = qir_uniform_f(c, 0.0);
915 }
916
917 c->inputs[attr * 4 + 0] = c->point_x;
918 if (c->fs_key->point_coord_upper_left) {
919 c->inputs[attr * 4 + 1] = qir_FSUB(c,
920 qir_uniform_f(c, 1.0),
921 c->point_y);
922 } else {
923 c->inputs[attr * 4 + 1] = c->point_y;
924 }
925 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
926 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
927 }
928
929 static struct qreg
930 emit_fragment_varying(struct vc4_compile *c, int index)
931 {
932 struct qreg vary = {
933 QFILE_VARY,
934 index
935 };
936
937 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
938 }
939
940 static void
941 emit_fragment_input(struct vc4_compile *c, int attr,
942 struct tgsi_full_declaration *decl)
943 {
944 for (int i = 0; i < 4; i++) {
945 c->inputs[attr * 4 + i] =
946 emit_fragment_varying(c, attr * 4 + i);
947 c->num_inputs++;
948
949 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR ||
950 decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR)
951 c->color_inputs |= 1 << i;
952 }
953 }
954
955 static void
956 emit_face_input(struct vc4_compile *c, int attr)
957 {
958 c->inputs[attr * 4 + 0] = qir_FSUB(c,
959 qir_uniform_f(c, 1.0),
960 qir_FMUL(c,
961 qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
962 qir_uniform_f(c, 2.0)));
963 c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
964 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
965 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
966 }
967
968 static void
969 emit_tgsi_declaration(struct vc4_compile *c,
970 struct tgsi_full_declaration *decl)
971 {
972 switch (decl->Declaration.File) {
973 case TGSI_FILE_TEMPORARY: {
974 uint32_t old_size = c->temps_array_size;
975 resize_qreg_array(c, &c->temps, &c->temps_array_size,
976 (decl->Range.Last + 1) * 4);
977
978 for (int i = old_size; i < c->temps_array_size; i++)
979 c->temps[i] = qir_uniform_ui(c, 0);
980 break;
981 }
982
983 case TGSI_FILE_INPUT:
984 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
985 (decl->Range.Last + 1) * 4);
986
987 for (int i = decl->Range.First;
988 i <= decl->Range.Last;
989 i++) {
990 if (c->stage == QSTAGE_FRAG) {
991 if (decl->Semantic.Name ==
992 TGSI_SEMANTIC_POSITION) {
993 emit_fragcoord_input(c, i);
994 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
995 emit_face_input(c, i);
996 } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
997 (c->fs_key->point_sprite_mask &
998 (1 << decl->Semantic.Index))) {
999 emit_point_coord_input(c, i);
1000 } else {
1001 emit_fragment_input(c, i, decl);
1002 }
1003 } else {
1004 emit_vertex_input(c, i);
1005 }
1006 }
1007 break;
1008
1009 case TGSI_FILE_OUTPUT:
1010 resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
1011 (decl->Range.Last + 1) * 4);
1012
1013 switch (decl->Semantic.Name) {
1014 case TGSI_SEMANTIC_POSITION:
1015 c->output_position_index = decl->Range.First * 4;
1016 break;
1017 case TGSI_SEMANTIC_COLOR:
1018 c->output_color_index = decl->Range.First * 4;
1019 break;
1020 case TGSI_SEMANTIC_PSIZE:
1021 c->output_point_size_index = decl->Range.First * 4;
1022 break;
1023 }
1024
1025 break;
1026 }
1027 }
1028
1029 static void
1030 emit_tgsi_instruction(struct vc4_compile *c,
1031 struct tgsi_full_instruction *tgsi_inst)
1032 {
1033 struct {
1034 enum qop op;
1035 struct qreg (*func)(struct vc4_compile *c,
1036 struct tgsi_full_instruction *tgsi_inst,
1037 enum qop op,
1038 struct qreg *src, int i);
1039 } op_trans[] = {
1040 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
1041 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
1042 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
1043 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
1044 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
1045 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
1046 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
1047 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
1048 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
1049 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
1050 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
1051 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
1052 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
1053 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
1054 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
1055 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
1056 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
1057 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
1058 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
1059
1060 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
1061 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
1062 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
1063
1064 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
1065 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
1066 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
1067 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
1068 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
1069 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
1070 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
1071 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
1072 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
1073 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
1074 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
1075 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
1076 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
1077
1078 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
1079 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
1080 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_scalar },
1081 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar },
1082 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
1083 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
1084 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
1085 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
1086 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
1087 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
1088 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
1089 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
1090 [TGSI_OPCODE_CLAMP] = { 0, tgsi_to_qir_clamp },
1091 [TGSI_OPCODE_SSG] = { 0, tgsi_to_qir_ssg },
1092 };
1093 static int asdf = 0;
1094 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
1095
1096 if (tgsi_op == TGSI_OPCODE_END)
1097 return;
1098
1099 struct qreg src_regs[12];
1100 for (int s = 0; s < 3; s++) {
1101 for (int i = 0; i < 4; i++) {
1102 src_regs[4 * s + i] =
1103 get_src(c, tgsi_inst->Instruction.Opcode,
1104 &tgsi_inst->Src[s].Register, i);
1105 }
1106 }
1107
1108 switch (tgsi_op) {
1109 case TGSI_OPCODE_TEX:
1110 case TGSI_OPCODE_TXP:
1111 case TGSI_OPCODE_TXB:
1112 tgsi_to_qir_tex(c, tgsi_inst,
1113 op_trans[tgsi_op].op, src_regs);
1114 return;
1115 case TGSI_OPCODE_KILL:
1116 c->discard = qir_uniform_f(c, 1.0);
1117 return;
1118 case TGSI_OPCODE_KILL_IF:
1119 for (int i = 0; i < 4; i++)
1120 tgsi_to_qir_kill_if(c, src_regs, i);
1121 return;
1122 default:
1123 break;
1124 }
1125
1126 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
1127 fprintf(stderr, "unknown tgsi inst: ");
1128 tgsi_dump_instruction(tgsi_inst, asdf++);
1129 fprintf(stderr, "\n");
1130 abort();
1131 }
1132
1133 for (int i = 0; i < 4; i++) {
1134 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
1135 continue;
1136
1137 struct qreg result;
1138
1139 result = op_trans[tgsi_op].func(c, tgsi_inst,
1140 op_trans[tgsi_op].op,
1141 src_regs, i);
1142
1143 if (tgsi_inst->Instruction.Saturate) {
1144 float low = (tgsi_inst->Instruction.Saturate ==
1145 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
1146 result = qir_FMAX(c,
1147 qir_FMIN(c,
1148 result,
1149 qir_uniform_f(c, 1.0)),
1150 qir_uniform_f(c, low));
1151 }
1152
1153 update_dst(c, tgsi_inst, i, result);
1154 }
1155 }
1156
1157 static void
1158 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
1159 {
1160 for (int i = 0; i < 4; i++) {
1161 unsigned n = c->num_consts++;
1162 resize_qreg_array(c, &c->consts, &c->consts_array_size, n + 1);
1163 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1164 }
1165 }
1166
1167 static struct qreg
1168 vc4_blend_channel(struct vc4_compile *c,
1169 struct qreg *dst,
1170 struct qreg *src,
1171 struct qreg val,
1172 unsigned factor,
1173 int channel)
1174 {
1175 switch(factor) {
1176 case PIPE_BLENDFACTOR_ONE:
1177 return val;
1178 case PIPE_BLENDFACTOR_SRC_COLOR:
1179 return qir_FMUL(c, val, src[channel]);
1180 case PIPE_BLENDFACTOR_SRC_ALPHA:
1181 return qir_FMUL(c, val, src[3]);
1182 case PIPE_BLENDFACTOR_DST_ALPHA:
1183 return qir_FMUL(c, val, dst[3]);
1184 case PIPE_BLENDFACTOR_DST_COLOR:
1185 return qir_FMUL(c, val, dst[channel]);
1186 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1187 return qir_FMIN(c, src[3], qir_FSUB(c,
1188 qir_uniform_f(c, 1.0),
1189 dst[3]));
1190 case PIPE_BLENDFACTOR_CONST_COLOR:
1191 return qir_FMUL(c, val,
1192 get_temp_for_uniform(c,
1193 QUNIFORM_BLEND_CONST_COLOR,
1194 channel));
1195 case PIPE_BLENDFACTOR_CONST_ALPHA:
1196 return qir_FMUL(c, val,
1197 get_temp_for_uniform(c,
1198 QUNIFORM_BLEND_CONST_COLOR,
1199 3));
1200 case PIPE_BLENDFACTOR_ZERO:
1201 return qir_uniform_f(c, 0.0);
1202 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1203 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1204 src[channel]));
1205 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1206 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1207 src[3]));
1208 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1209 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1210 dst[3]));
1211 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1212 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1213 dst[channel]));
1214 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1215 return qir_FMUL(c, val,
1216 qir_FSUB(c, qir_uniform_f(c, 1.0),
1217 get_temp_for_uniform(c,
1218 QUNIFORM_BLEND_CONST_COLOR,
1219 channel)));
1220 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1221 return qir_FMUL(c, val,
1222 qir_FSUB(c, qir_uniform_f(c, 1.0),
1223 get_temp_for_uniform(c,
1224 QUNIFORM_BLEND_CONST_COLOR,
1225 3)));
1226
1227 default:
1228 case PIPE_BLENDFACTOR_SRC1_COLOR:
1229 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1230 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1231 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1232 /* Unsupported. */
1233 fprintf(stderr, "Unknown blend factor %d\n", factor);
1234 return val;
1235 }
1236 }
1237
1238 static struct qreg
1239 vc4_blend_func(struct vc4_compile *c,
1240 struct qreg src, struct qreg dst,
1241 unsigned func)
1242 {
1243 switch (func) {
1244 case PIPE_BLEND_ADD:
1245 return qir_FADD(c, src, dst);
1246 case PIPE_BLEND_SUBTRACT:
1247 return qir_FSUB(c, src, dst);
1248 case PIPE_BLEND_REVERSE_SUBTRACT:
1249 return qir_FSUB(c, dst, src);
1250 case PIPE_BLEND_MIN:
1251 return qir_FMIN(c, src, dst);
1252 case PIPE_BLEND_MAX:
1253 return qir_FMAX(c, src, dst);
1254
1255 default:
1256 /* Unsupported. */
1257 fprintf(stderr, "Unknown blend func %d\n", func);
1258 return src;
1259
1260 }
1261 }
1262
1263 /**
1264 * Implements fixed function blending in shader code.
1265 *
1266 * VC4 doesn't have any hardware support for blending. Instead, you read the
1267 * current contents of the destination from the tile buffer after having
1268 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1269 * math using your output color and that destination value, and update the
1270 * output color appropriately.
1271 */
1272 static void
1273 vc4_blend(struct vc4_compile *c, struct qreg *result,
1274 struct qreg *dst_color, struct qreg *src_color)
1275 {
1276 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1277
1278 if (!blend->blend_enable) {
1279 for (int i = 0; i < 4; i++)
1280 result[i] = src_color[i];
1281 return;
1282 }
1283
1284 struct qreg src_blend[4], dst_blend[4];
1285 for (int i = 0; i < 3; i++) {
1286 src_blend[i] = vc4_blend_channel(c,
1287 dst_color, src_color,
1288 src_color[i],
1289 blend->rgb_src_factor, i);
1290 dst_blend[i] = vc4_blend_channel(c,
1291 dst_color, src_color,
1292 dst_color[i],
1293 blend->rgb_dst_factor, i);
1294 }
1295 src_blend[3] = vc4_blend_channel(c,
1296 dst_color, src_color,
1297 src_color[3],
1298 blend->alpha_src_factor, 3);
1299 dst_blend[3] = vc4_blend_channel(c,
1300 dst_color, src_color,
1301 dst_color[3],
1302 blend->alpha_dst_factor, 3);
1303
1304 for (int i = 0; i < 3; i++) {
1305 result[i] = vc4_blend_func(c,
1306 src_blend[i], dst_blend[i],
1307 blend->rgb_func);
1308 }
1309 result[3] = vc4_blend_func(c,
1310 src_blend[3], dst_blend[3],
1311 blend->alpha_func);
1312 }
1313
1314 static void
1315 alpha_test_discard(struct vc4_compile *c)
1316 {
1317 struct qreg src_alpha;
1318 struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
1319
1320 if (!c->fs_key->alpha_test)
1321 return;
1322
1323 if (c->output_color_index != -1)
1324 src_alpha = c->outputs[c->output_color_index + 3];
1325 else
1326 src_alpha = qir_uniform_f(c, 1.0);
1327
1328 if (c->discard.file == QFILE_NULL)
1329 c->discard = qir_uniform_f(c, 0.0);
1330
1331 switch (c->fs_key->alpha_test_func) {
1332 case PIPE_FUNC_NEVER:
1333 c->discard = qir_uniform_f(c, 1.0);
1334 break;
1335 case PIPE_FUNC_ALWAYS:
1336 break;
1337 case PIPE_FUNC_EQUAL:
1338 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1339 c->discard = qir_SEL_X_Y_ZS(c, c->discard,
1340 qir_uniform_f(c, 1.0));
1341 break;
1342 case PIPE_FUNC_NOTEQUAL:
1343 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1344 c->discard = qir_SEL_X_Y_ZC(c, c->discard,
1345 qir_uniform_f(c, 1.0));
1346 break;
1347 case PIPE_FUNC_GREATER:
1348 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1349 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1350 qir_uniform_f(c, 1.0));
1351 break;
1352 case PIPE_FUNC_GEQUAL:
1353 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1354 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1355 qir_uniform_f(c, 1.0));
1356 break;
1357 case PIPE_FUNC_LESS:
1358 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1359 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1360 qir_uniform_f(c, 1.0));
1361 break;
1362 case PIPE_FUNC_LEQUAL:
1363 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1364 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1365 qir_uniform_f(c, 1.0));
1366 break;
1367 }
1368 }
1369
1370 static void
1371 emit_frag_end(struct vc4_compile *c)
1372 {
1373 alpha_test_discard(c);
1374
1375 enum pipe_format color_format = c->fs_key->color_format;
1376 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1377 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1378 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1379 struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1380 if (c->fs_key->blend.blend_enable ||
1381 c->fs_key->blend.colormask != 0xf) {
1382 struct qreg r4 = qir_TLB_COLOR_READ(c);
1383 for (int i = 0; i < 4; i++)
1384 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1385 for (int i = 0; i < 4; i++) {
1386 dst_color[i] = get_swizzled_channel(c,
1387 tlb_read_color,
1388 format_swiz[i]);
1389 if (util_format_is_srgb(color_format) && i != 3) {
1390 linear_dst_color[i] =
1391 qir_srgb_decode(c, dst_color[i]);
1392 } else {
1393 linear_dst_color[i] = dst_color[i];
1394 }
1395 }
1396 }
1397
1398 struct qreg blend_color[4];
1399 struct qreg undef_array[4] = {
1400 c->undef, c->undef, c->undef, c->undef
1401 };
1402 vc4_blend(c, blend_color, linear_dst_color,
1403 (c->output_color_index != -1 ?
1404 c->outputs + c->output_color_index :
1405 undef_array));
1406
1407 if (util_format_is_srgb(color_format)) {
1408 for (int i = 0; i < 3; i++)
1409 blend_color[i] = qir_srgb_encode(c, blend_color[i]);
1410 }
1411
1412 /* If the bit isn't set in the color mask, then just return the
1413 * original dst color, instead.
1414 */
1415 for (int i = 0; i < 4; i++) {
1416 if (!(c->fs_key->blend.colormask & (1 << i))) {
1417 blend_color[i] = dst_color[i];
1418 }
1419 }
1420
1421 /* Debug: Sometimes you're getting a black output and just want to see
1422 * if the FS is getting executed at all. Spam magenta into the color
1423 * output.
1424 */
1425 if (0) {
1426 blend_color[0] = qir_uniform_f(c, 1.0);
1427 blend_color[1] = qir_uniform_f(c, 0.0);
1428 blend_color[2] = qir_uniform_f(c, 1.0);
1429 blend_color[3] = qir_uniform_f(c, 0.5);
1430 }
1431
1432 struct qreg swizzled_outputs[4];
1433 for (int i = 0; i < 4; i++) {
1434 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1435 format_swiz[i]);
1436 }
1437
1438 if (c->discard.file != QFILE_NULL)
1439 qir_TLB_DISCARD_SETUP(c, c->discard);
1440
1441 if (c->fs_key->stencil_enabled) {
1442 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
1443 if (c->fs_key->stencil_twoside) {
1444 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
1445 }
1446 if (c->fs_key->stencil_full_writemasks) {
1447 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
1448 }
1449 }
1450
1451 if (c->fs_key->depth_enabled) {
1452 struct qreg z;
1453 if (c->output_position_index != -1) {
1454 z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
1455 qir_uniform_f(c, 0xffffff)));
1456 } else {
1457 z = qir_FRAG_Z(c);
1458 }
1459 qir_TLB_Z_WRITE(c, z);
1460 }
1461
1462 bool color_written = false;
1463 for (int i = 0; i < 4; i++) {
1464 if (swizzled_outputs[i].file != QFILE_NULL)
1465 color_written = true;
1466 }
1467
1468 struct qreg packed_color;
1469 if (color_written) {
1470 /* Fill in any undefined colors. The simulator will assertion
1471 * fail if we read something that wasn't written, and I don't
1472 * know what hardware does.
1473 */
1474 for (int i = 0; i < 4; i++) {
1475 if (swizzled_outputs[i].file == QFILE_NULL)
1476 swizzled_outputs[i] = qir_uniform_f(c, 0.0);
1477 }
1478 packed_color = qir_get_temp(c);
1479 qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
1480 swizzled_outputs[0],
1481 swizzled_outputs[1],
1482 swizzled_outputs[2],
1483 swizzled_outputs[3]));
1484 } else {
1485 packed_color = qir_uniform_ui(c, 0);
1486 }
1487
1488 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1489 packed_color, c->undef));
1490 }
1491
1492 static void
1493 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1494 {
1495 struct qreg xyi[2];
1496
1497 for (int i = 0; i < 2; i++) {
1498 struct qreg scale =
1499 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1500
1501 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1502 qir_FMUL(c,
1503 c->outputs[i],
1504 scale),
1505 rcp_w));
1506 }
1507
1508 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1509 }
1510
1511 static void
1512 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1513 {
1514 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1515 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1516
1517 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
1518 c->outputs[2],
1519 zscale),
1520 zoffset),
1521 rcp_w));
1522 }
1523
1524 static void
1525 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1526 {
1527 qir_VPM_WRITE(c, rcp_w);
1528 }
1529
1530 static void
1531 emit_point_size_write(struct vc4_compile *c)
1532 {
1533 struct qreg point_size;
1534
1535 if (c->output_point_size_index)
1536 point_size = c->outputs[c->output_point_size_index + 3];
1537 else
1538 point_size = qir_uniform_f(c, 1.0);
1539
1540 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
1541 * BCM21553).
1542 */
1543 point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
1544
1545 qir_VPM_WRITE(c, point_size);
1546 }
1547
1548 static void
1549 emit_vert_end(struct vc4_compile *c)
1550 {
1551 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1552
1553 emit_scaled_viewport_write(c, rcp_w);
1554 emit_zs_write(c, rcp_w);
1555 emit_rcp_wc_write(c, rcp_w);
1556 if (c->vs_key->per_vertex_point_size)
1557 emit_point_size_write(c);
1558
1559 for (int i = 4; i < c->num_outputs; i++) {
1560 qir_VPM_WRITE(c, c->outputs[i]);
1561 }
1562 }
1563
1564 static void
1565 emit_coord_end(struct vc4_compile *c)
1566 {
1567 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1568
1569 for (int i = 0; i < 4; i++)
1570 qir_VPM_WRITE(c, c->outputs[i]);
1571
1572 emit_scaled_viewport_write(c, rcp_w);
1573 emit_zs_write(c, rcp_w);
1574 emit_rcp_wc_write(c, rcp_w);
1575 if (c->vs_key->per_vertex_point_size)
1576 emit_point_size_write(c);
1577 }
1578
1579 static struct vc4_compile *
1580 vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
1581 struct vc4_key *key)
1582 {
1583 struct vc4_compile *c = qir_compile_init();
1584 int ret;
1585
1586 c->stage = stage;
1587 c->shader_state = &key->shader_state->base;
1588
1589 c->key = key;
1590 switch (stage) {
1591 case QSTAGE_FRAG:
1592 c->fs_key = (struct vc4_fs_key *)key;
1593 if (c->fs_key->is_points) {
1594 c->point_x = emit_fragment_varying(c, 0);
1595 c->point_y = emit_fragment_varying(c, 0);
1596 } else if (c->fs_key->is_lines) {
1597 c->line_x = emit_fragment_varying(c, 0);
1598 }
1599 break;
1600 case QSTAGE_VERT:
1601 c->vs_key = (struct vc4_vs_key *)key;
1602 break;
1603 case QSTAGE_COORD:
1604 c->vs_key = (struct vc4_vs_key *)key;
1605 break;
1606 }
1607
1608 const struct tgsi_token *tokens = key->shader_state->base.tokens;
1609 if (c->fs_key && c->fs_key->light_twoside) {
1610 if (!key->shader_state->twoside_tokens) {
1611 const struct tgsi_lowering_config lowering_config = {
1612 .color_two_side = true,
1613 };
1614 struct tgsi_shader_info info;
1615 key->shader_state->twoside_tokens =
1616 tgsi_transform_lowering(&lowering_config,
1617 key->shader_state->base.tokens,
1618 &info);
1619
1620 /* If no transformation occurred, then NULL is
1621 * returned and we just use our original tokens.
1622 */
1623 if (!key->shader_state->twoside_tokens) {
1624 key->shader_state->twoside_tokens =
1625 key->shader_state->base.tokens;
1626 }
1627 }
1628 tokens = key->shader_state->twoside_tokens;
1629 }
1630
1631 ret = tgsi_parse_init(&c->parser, tokens);
1632 assert(ret == TGSI_PARSE_OK);
1633
1634 if (vc4_debug & VC4_DEBUG_TGSI) {
1635 fprintf(stderr, "TGSI:\n");
1636 tgsi_dump(tokens, 0);
1637 }
1638
1639 while (!tgsi_parse_end_of_tokens(&c->parser)) {
1640 tgsi_parse_token(&c->parser);
1641
1642 switch (c->parser.FullToken.Token.Type) {
1643 case TGSI_TOKEN_TYPE_DECLARATION:
1644 emit_tgsi_declaration(c,
1645 &c->parser.FullToken.FullDeclaration);
1646 break;
1647
1648 case TGSI_TOKEN_TYPE_INSTRUCTION:
1649 emit_tgsi_instruction(c,
1650 &c->parser.FullToken.FullInstruction);
1651 break;
1652
1653 case TGSI_TOKEN_TYPE_IMMEDIATE:
1654 parse_tgsi_immediate(c,
1655 &c->parser.FullToken.FullImmediate);
1656 break;
1657 }
1658 }
1659
1660 switch (stage) {
1661 case QSTAGE_FRAG:
1662 emit_frag_end(c);
1663 break;
1664 case QSTAGE_VERT:
1665 emit_vert_end(c);
1666 break;
1667 case QSTAGE_COORD:
1668 emit_coord_end(c);
1669 break;
1670 }
1671
1672 tgsi_parse_free(&c->parser);
1673
1674 qir_optimize(c);
1675
1676 if (vc4_debug & VC4_DEBUG_QIR) {
1677 fprintf(stderr, "QIR:\n");
1678 qir_dump(c);
1679 }
1680 qir_reorder_uniforms(c);
1681 vc4_generate_code(vc4, c);
1682
1683 if (vc4_debug & VC4_DEBUG_SHADERDB) {
1684 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
1685 qir_get_stage_name(c->stage), c->qpu_inst_count);
1686 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
1687 qir_get_stage_name(c->stage), c->num_uniforms);
1688 }
1689
1690 return c;
1691 }
1692
1693 static void *
1694 vc4_shader_state_create(struct pipe_context *pctx,
1695 const struct pipe_shader_state *cso)
1696 {
1697 struct vc4_uncompiled_shader *so = CALLOC_STRUCT(vc4_uncompiled_shader);
1698 if (!so)
1699 return NULL;
1700
1701 const struct tgsi_lowering_config lowering_config = {
1702 .lower_DST = true,
1703 .lower_XPD = true,
1704 .lower_SCS = true,
1705 .lower_POW = true,
1706 .lower_LIT = true,
1707 .lower_EXP = true,
1708 .lower_LOG = true,
1709 .lower_DP4 = true,
1710 .lower_DP3 = true,
1711 .lower_DPH = true,
1712 .lower_DP2 = true,
1713 .lower_DP2A = true,
1714 };
1715
1716 struct tgsi_shader_info info;
1717 so->base.tokens = tgsi_transform_lowering(&lowering_config, cso->tokens, &info);
1718 if (!so->base.tokens)
1719 so->base.tokens = tgsi_dup_tokens(cso->tokens);
1720
1721 return so;
1722 }
1723
1724 static void
1725 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
1726 struct vc4_compile *c)
1727 {
1728 int count = c->num_uniforms;
1729 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
1730
1731 uinfo->count = count;
1732 uinfo->data = ralloc_array(shader, uint32_t, count);
1733 memcpy(uinfo->data, c->uniform_data,
1734 count * sizeof(*uinfo->data));
1735 uinfo->contents = ralloc_array(shader, enum quniform_contents, count);
1736 memcpy(uinfo->contents, c->uniform_contents,
1737 count * sizeof(*uinfo->contents));
1738 uinfo->num_texture_samples = c->num_texture_samples;
1739 }
1740
1741 static struct vc4_compiled_shader *
1742 vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
1743 struct vc4_key *key)
1744 {
1745 struct util_hash_table *ht;
1746 uint32_t key_size;
1747 if (stage == QSTAGE_FRAG) {
1748 ht = vc4->fs_cache;
1749 key_size = sizeof(struct vc4_fs_key);
1750 } else {
1751 ht = vc4->vs_cache;
1752 key_size = sizeof(struct vc4_vs_key);
1753 }
1754
1755 struct vc4_compiled_shader *shader;
1756 shader = util_hash_table_get(ht, key);
1757 if (shader)
1758 return shader;
1759
1760 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, stage, key);
1761 shader = rzalloc(NULL, struct vc4_compiled_shader);
1762
1763 shader->num_inputs = c->num_inputs;
1764 shader->color_inputs = c->color_inputs;
1765 copy_uniform_state_to_shader(shader, c);
1766 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
1767 c->qpu_inst_count * sizeof(uint64_t),
1768 "code");
1769
1770 qir_compile_destroy(c);
1771
1772 struct vc4_key *dup_key;
1773 dup_key = malloc(key_size);
1774 memcpy(dup_key, key, key_size);
1775 util_hash_table_set(ht, dup_key, shader);
1776
1777 return shader;
1778 }
1779
1780 static void
1781 vc4_setup_shared_key(struct vc4_key *key, struct vc4_texture_stateobj *texstate)
1782 {
1783 for (int i = 0; i < texstate->num_textures; i++) {
1784 struct pipe_sampler_view *sampler = texstate->textures[i];
1785 struct pipe_sampler_state *sampler_state =
1786 texstate->samplers[i];
1787
1788 if (sampler) {
1789 key->tex[i].format = sampler->format;
1790 key->tex[i].swizzle[0] = sampler->swizzle_r;
1791 key->tex[i].swizzle[1] = sampler->swizzle_g;
1792 key->tex[i].swizzle[2] = sampler->swizzle_b;
1793 key->tex[i].swizzle[3] = sampler->swizzle_a;
1794 key->tex[i].compare_mode = sampler_state->compare_mode;
1795 key->tex[i].compare_func = sampler_state->compare_func;
1796 key->tex[i].wrap_s = sampler_state->wrap_s;
1797 key->tex[i].wrap_t = sampler_state->wrap_t;
1798 }
1799 }
1800 }
1801
1802 static void
1803 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
1804 {
1805 struct vc4_fs_key local_key;
1806 struct vc4_fs_key *key = &local_key;
1807
1808 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
1809 VC4_DIRTY_BLEND |
1810 VC4_DIRTY_FRAMEBUFFER |
1811 VC4_DIRTY_ZSA |
1812 VC4_DIRTY_RASTERIZER |
1813 VC4_DIRTY_FRAGTEX |
1814 VC4_DIRTY_TEXSTATE |
1815 VC4_DIRTY_PROG))) {
1816 return;
1817 }
1818
1819 memset(key, 0, sizeof(*key));
1820 vc4_setup_shared_key(&key->base, &vc4->fragtex);
1821 key->base.shader_state = vc4->prog.bind_fs;
1822 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
1823 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
1824 prim_mode <= PIPE_PRIM_LINE_STRIP);
1825 key->blend = vc4->blend->rt[0];
1826
1827 if (vc4->framebuffer.cbufs[0])
1828 key->color_format = vc4->framebuffer.cbufs[0]->format;
1829
1830 key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
1831 key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
1832 key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
1833 key->depth_enabled = (vc4->zsa->base.depth.enabled ||
1834 key->stencil_enabled);
1835 if (vc4->zsa->base.alpha.enabled) {
1836 key->alpha_test = true;
1837 key->alpha_test_func = vc4->zsa->base.alpha.func;
1838 }
1839
1840 if (key->is_points) {
1841 key->point_sprite_mask =
1842 vc4->rasterizer->base.sprite_coord_enable;
1843 key->point_coord_upper_left =
1844 (vc4->rasterizer->base.sprite_coord_mode ==
1845 PIPE_SPRITE_COORD_UPPER_LEFT);
1846 }
1847
1848 key->light_twoside = vc4->rasterizer->base.light_twoside;
1849
1850 struct vc4_compiled_shader *old_fs = vc4->prog.fs;
1851 vc4->prog.fs = vc4_get_compiled_shader(vc4, QSTAGE_FRAG, &key->base);
1852 if (vc4->prog.fs == old_fs)
1853 return;
1854
1855 if (vc4->rasterizer->base.flatshade &&
1856 old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
1857 vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
1858 }
1859 }
1860
1861 static void
1862 vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
1863 {
1864 struct vc4_vs_key local_key;
1865 struct vc4_vs_key *key = &local_key;
1866
1867 if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE |
1868 VC4_DIRTY_RASTERIZER |
1869 VC4_DIRTY_VERTTEX |
1870 VC4_DIRTY_TEXSTATE |
1871 VC4_DIRTY_VTXSTATE |
1872 VC4_DIRTY_PROG))) {
1873 return;
1874 }
1875
1876 memset(key, 0, sizeof(*key));
1877 vc4_setup_shared_key(&key->base, &vc4->verttex);
1878 key->base.shader_state = vc4->prog.bind_vs;
1879
1880 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
1881 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
1882
1883 key->per_vertex_point_size =
1884 (prim_mode == PIPE_PRIM_POINTS &&
1885 vc4->rasterizer->base.point_size_per_vertex);
1886
1887 vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
1888 key->is_coord = true;
1889 vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
1890 }
1891
1892 void
1893 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
1894 {
1895 vc4_update_compiled_fs(vc4, prim_mode);
1896 vc4_update_compiled_vs(vc4, prim_mode);
1897 }
1898
1899 static unsigned
1900 fs_cache_hash(void *key)
1901 {
1902 return _mesa_hash_data(key, sizeof(struct vc4_fs_key));
1903 }
1904
1905 static unsigned
1906 vs_cache_hash(void *key)
1907 {
1908 return _mesa_hash_data(key, sizeof(struct vc4_vs_key));
1909 }
1910
1911 static int
1912 fs_cache_compare(void *key1, void *key2)
1913 {
1914 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
1915 }
1916
1917 static int
1918 vs_cache_compare(void *key1, void *key2)
1919 {
1920 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
1921 }
1922
1923 struct delete_state {
1924 struct vc4_context *vc4;
1925 struct vc4_uncompiled_shader *shader_state;
1926 };
1927
1928 static enum pipe_error
1929 fs_delete_from_cache(void *in_key, void *in_value, void *data)
1930 {
1931 struct delete_state *del = data;
1932 struct vc4_fs_key *key = in_key;
1933 struct vc4_compiled_shader *shader = in_value;
1934
1935 if (key->base.shader_state == data) {
1936 util_hash_table_remove(del->vc4->fs_cache, key);
1937 vc4_bo_unreference(&shader->bo);
1938 ralloc_free(shader);
1939 }
1940
1941 return 0;
1942 }
1943
1944 static enum pipe_error
1945 vs_delete_from_cache(void *in_key, void *in_value, void *data)
1946 {
1947 struct delete_state *del = data;
1948 struct vc4_vs_key *key = in_key;
1949 struct vc4_compiled_shader *shader = in_value;
1950
1951 if (key->base.shader_state == data) {
1952 util_hash_table_remove(del->vc4->vs_cache, key);
1953 vc4_bo_unreference(&shader->bo);
1954 ralloc_free(shader);
1955 }
1956
1957 return 0;
1958 }
1959
1960 static void
1961 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
1962 {
1963 struct vc4_context *vc4 = vc4_context(pctx);
1964 struct vc4_uncompiled_shader *so = hwcso;
1965 struct delete_state del;
1966
1967 del.vc4 = vc4;
1968 del.shader_state = so;
1969 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
1970 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
1971
1972 if (so->twoside_tokens != so->base.tokens)
1973 free((void *)so->twoside_tokens);
1974 free((void *)so->base.tokens);
1975 free(so);
1976 }
1977
1978 static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
1979 {
1980 switch (p_wrap) {
1981 case PIPE_TEX_WRAP_REPEAT:
1982 return 0;
1983 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1984 return 1;
1985 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1986 return 2;
1987 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1988 return 3;
1989 case PIPE_TEX_WRAP_CLAMP:
1990 return (using_nearest ? 1 : 3);
1991 default:
1992 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
1993 assert(!"not reached");
1994 return 0;
1995 }
1996 }
1997
1998 static void
1999 write_texture_p0(struct vc4_context *vc4,
2000 struct vc4_texture_stateobj *texstate,
2001 uint32_t unit)
2002 {
2003 struct pipe_sampler_view *texture = texstate->textures[unit];
2004 struct vc4_resource *rsc = vc4_resource(texture->texture);
2005
2006 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
2007 VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
2008 VC4_SET_FIELD(texture->u.tex.last_level, VC4_TEX_P0_MIPLVLS) |
2009 VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
2010 VC4_TEX_P0_CMMODE) |
2011 VC4_SET_FIELD(rsc->vc4_format & 7, VC4_TEX_P0_TYPE));
2012 }
2013
2014 static void
2015 write_texture_p1(struct vc4_context *vc4,
2016 struct vc4_texture_stateobj *texstate,
2017 uint32_t unit)
2018 {
2019 struct pipe_sampler_view *texture = texstate->textures[unit];
2020 struct vc4_resource *rsc = vc4_resource(texture->texture);
2021 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2022 static const uint8_t minfilter_map[6] = {
2023 VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
2024 VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
2025 VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
2026 VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
2027 VC4_TEX_P1_MINFILT_NEAREST,
2028 VC4_TEX_P1_MINFILT_LINEAR,
2029 };
2030 static const uint32_t magfilter_map[] = {
2031 [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
2032 [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
2033 };
2034
2035 bool either_nearest =
2036 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
2037 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
2038
2039 cl_u32(&vc4->uniforms,
2040 VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
2041 VC4_SET_FIELD(texture->texture->height0 & 2047,
2042 VC4_TEX_P1_HEIGHT) |
2043 VC4_SET_FIELD(texture->texture->width0 & 2047,
2044 VC4_TEX_P1_WIDTH) |
2045 VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
2046 VC4_TEX_P1_MAGFILT) |
2047 VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
2048 sampler->min_img_filter],
2049 VC4_TEX_P1_MINFILT) |
2050 VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
2051 VC4_TEX_P1_WRAP_S) |
2052 VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
2053 VC4_TEX_P1_WRAP_T));
2054 }
2055
2056 static void
2057 write_texture_p2(struct vc4_context *vc4,
2058 struct vc4_texture_stateobj *texstate,
2059 uint32_t unit)
2060 {
2061 struct pipe_sampler_view *texture = texstate->textures[unit];
2062 struct vc4_resource *rsc = vc4_resource(texture->texture);
2063
2064 cl_u32(&vc4->uniforms,
2065 VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
2066 VC4_TEX_P2_PTYPE) |
2067 VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST));
2068 }
2069
2070
2071 #define SWIZ(x,y,z,w) { \
2072 UTIL_FORMAT_SWIZZLE_##x, \
2073 UTIL_FORMAT_SWIZZLE_##y, \
2074 UTIL_FORMAT_SWIZZLE_##z, \
2075 UTIL_FORMAT_SWIZZLE_##w \
2076 }
2077
2078 static void
2079 write_texture_border_color(struct vc4_context *vc4,
2080 struct vc4_texture_stateobj *texstate,
2081 uint32_t unit)
2082 {
2083 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2084 struct pipe_sampler_view *texture = texstate->textures[unit];
2085 struct vc4_resource *rsc = vc4_resource(texture->texture);
2086 union util_color uc;
2087
2088 const struct util_format_description *tex_format_desc =
2089 util_format_description(texture->format);
2090
2091 float border_color[4];
2092 for (int i = 0; i < 4; i++)
2093 border_color[i] = sampler->border_color.f[i];
2094 if (util_format_is_srgb(texture->format)) {
2095 for (int i = 0; i < 3; i++)
2096 border_color[i] =
2097 util_format_linear_to_srgb_float(border_color[i]);
2098 }
2099
2100 /* Turn the border color into the layout of channels that it would
2101 * have when stored as texture contents.
2102 */
2103 float storage_color[4];
2104 util_format_unswizzle_4f(storage_color,
2105 border_color,
2106 tex_format_desc->swizzle);
2107
2108 /* Now, pack so that when the vc4_format-sampled texture contents are
2109 * replaced with our border color, the vc4_get_format_swizzle()
2110 * swizzling will get the right channels.
2111 */
2112 if (util_format_is_depth_or_stencil(texture->format)) {
2113 uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
2114 sampler->border_color.f[0]) << 8;
2115 } else {
2116 switch (rsc->vc4_format) {
2117 default:
2118 case VC4_TEXTURE_TYPE_RGBA8888:
2119 util_pack_color(storage_color,
2120 PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
2121 break;
2122 case VC4_TEXTURE_TYPE_RGBA4444:
2123 util_pack_color(storage_color,
2124 PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
2125 break;
2126 case VC4_TEXTURE_TYPE_RGB565:
2127 util_pack_color(storage_color,
2128 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
2129 break;
2130 case VC4_TEXTURE_TYPE_ALPHA:
2131 uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
2132 break;
2133 case VC4_TEXTURE_TYPE_LUMALPHA:
2134 uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
2135 (float_to_ubyte(storage_color[0]) << 0));
2136 break;
2137 }
2138 }
2139
2140 cl_u32(&vc4->uniforms, uc.ui[0]);
2141 }
2142
2143 static uint32_t
2144 get_texrect_scale(struct vc4_texture_stateobj *texstate,
2145 enum quniform_contents contents,
2146 uint32_t data)
2147 {
2148 struct pipe_sampler_view *texture = texstate->textures[data];
2149 uint32_t dim;
2150
2151 if (contents == QUNIFORM_TEXRECT_SCALE_X)
2152 dim = texture->texture->width0;
2153 else
2154 dim = texture->texture->height0;
2155
2156 return fui(1.0f / dim);
2157 }
2158
2159 void
2160 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2161 struct vc4_constbuf_stateobj *cb,
2162 struct vc4_texture_stateobj *texstate)
2163 {
2164 struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
2165 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
2166
2167 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
2168
2169 for (int i = 0; i < uinfo->count; i++) {
2170
2171 switch (uinfo->contents[i]) {
2172 case QUNIFORM_CONSTANT:
2173 cl_u32(&vc4->uniforms, uinfo->data[i]);
2174 break;
2175 case QUNIFORM_UNIFORM:
2176 cl_u32(&vc4->uniforms,
2177 gallium_uniforms[uinfo->data[i]]);
2178 break;
2179 case QUNIFORM_VIEWPORT_X_SCALE:
2180 cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
2181 break;
2182 case QUNIFORM_VIEWPORT_Y_SCALE:
2183 cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
2184 break;
2185
2186 case QUNIFORM_VIEWPORT_Z_OFFSET:
2187 cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
2188 break;
2189 case QUNIFORM_VIEWPORT_Z_SCALE:
2190 cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
2191 break;
2192
2193 case QUNIFORM_TEXTURE_CONFIG_P0:
2194 write_texture_p0(vc4, texstate, uinfo->data[i]);
2195 break;
2196
2197 case QUNIFORM_TEXTURE_CONFIG_P1:
2198 write_texture_p1(vc4, texstate, uinfo->data[i]);
2199 break;
2200
2201 case QUNIFORM_TEXTURE_CONFIG_P2:
2202 write_texture_p2(vc4, texstate, uinfo->data[i]);
2203 break;
2204
2205 case QUNIFORM_TEXTURE_BORDER_COLOR:
2206 write_texture_border_color(vc4, texstate, uinfo->data[i]);
2207 break;
2208
2209 case QUNIFORM_TEXRECT_SCALE_X:
2210 case QUNIFORM_TEXRECT_SCALE_Y:
2211 cl_u32(&vc4->uniforms,
2212 get_texrect_scale(texstate,
2213 uinfo->contents[i],
2214 uinfo->data[i]));
2215 break;
2216
2217 case QUNIFORM_BLEND_CONST_COLOR:
2218 cl_f(&vc4->uniforms,
2219 vc4->blend_color.color[uinfo->data[i]]);
2220 break;
2221
2222 case QUNIFORM_STENCIL:
2223 cl_u32(&vc4->uniforms,
2224 vc4->zsa->stencil_uniforms[uinfo->data[i]] |
2225 (uinfo->data[i] <= 1 ?
2226 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
2227 0));
2228 break;
2229
2230 case QUNIFORM_ALPHA_REF:
2231 cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
2232 break;
2233 }
2234 #if 0
2235 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
2236 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
2237 shader, i, written_val, uif(written_val));
2238 #endif
2239 }
2240 }
2241
2242 static void
2243 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
2244 {
2245 struct vc4_context *vc4 = vc4_context(pctx);
2246 vc4->prog.bind_fs = hwcso;
2247 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
2248 vc4->dirty |= VC4_DIRTY_PROG;
2249 }
2250
2251 static void
2252 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
2253 {
2254 struct vc4_context *vc4 = vc4_context(pctx);
2255 vc4->prog.bind_vs = hwcso;
2256 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
2257 vc4->dirty |= VC4_DIRTY_PROG;
2258 }
2259
2260 void
2261 vc4_program_init(struct pipe_context *pctx)
2262 {
2263 struct vc4_context *vc4 = vc4_context(pctx);
2264
2265 pctx->create_vs_state = vc4_shader_state_create;
2266 pctx->delete_vs_state = vc4_shader_state_delete;
2267
2268 pctx->create_fs_state = vc4_shader_state_create;
2269 pctx->delete_fs_state = vc4_shader_state_delete;
2270
2271 pctx->bind_fs_state = vc4_fp_state_bind;
2272 pctx->bind_vs_state = vc4_vp_state_bind;
2273
2274 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
2275 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
2276 }