vc4: Add support for sampling from sRGB.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash_table.h"
29 #include "util/u_hash.h"
30 #include "util/u_memory.h"
31 #include "util/u_pack_color.h"
32 #include "util/format_srgb.h"
33 #include "util/ralloc.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_info.h"
36
37 #include "vc4_context.h"
38 #include "vc4_qpu.h"
39 #include "vc4_qir.h"
40 #ifdef USE_VC4_SIMULATOR
41 #include "simpenrose/simpenrose.h"
42 #endif
43
44 struct vc4_key {
45 struct pipe_shader_state *shader_state;
46 struct {
47 enum pipe_format format;
48 unsigned compare_mode:1;
49 unsigned compare_func:3;
50 unsigned wrap_s:3;
51 unsigned wrap_t:3;
52 uint8_t swizzle[4];
53 } tex[VC4_MAX_TEXTURE_SAMPLERS];
54 };
55
56 struct vc4_fs_key {
57 struct vc4_key base;
58 enum pipe_format color_format;
59 bool depth_enabled;
60 bool stencil_enabled;
61 bool stencil_twoside;
62 bool stencil_full_writemasks;
63 bool is_points;
64 bool is_lines;
65 bool alpha_test;
66 bool point_coord_upper_left;
67 uint8_t alpha_test_func;
68 uint32_t point_sprite_mask;
69
70 struct pipe_rt_blend_state blend;
71 };
72
73 struct vc4_vs_key {
74 struct vc4_key base;
75 enum pipe_format attr_formats[8];
76 bool per_vertex_point_size;
77 };
78
79 static void
80 resize_qreg_array(struct vc4_compile *c,
81 struct qreg **regs,
82 uint32_t *size,
83 uint32_t decl_size)
84 {
85 if (*size >= decl_size)
86 return;
87
88 uint32_t old_size = *size;
89 *size = MAX2(*size * 2, decl_size);
90 *regs = reralloc(c, *regs, struct qreg, *size);
91 if (!*regs) {
92 fprintf(stderr, "Malloc failure\n");
93 abort();
94 }
95
96 for (uint32_t i = old_size; i < *size; i++)
97 (*regs)[i] = c->undef;
98 }
99
100 static struct qreg
101 add_uniform(struct vc4_compile *c,
102 enum quniform_contents contents,
103 uint32_t data)
104 {
105 uint32_t uniform = c->num_uniforms++;
106 struct qreg u = { QFILE_UNIF, uniform };
107
108 if (uniform >= c->uniform_array_size) {
109 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
110 c->uniform_array_size * 2);
111
112 c->uniform_data = reralloc(c, c->uniform_data,
113 uint32_t,
114 c->uniform_array_size);
115 c->uniform_contents = reralloc(c, c->uniform_contents,
116 enum quniform_contents,
117 c->uniform_array_size);
118 }
119
120 c->uniform_contents[uniform] = contents;
121 c->uniform_data[uniform] = data;
122
123 return u;
124 }
125
126 static struct qreg
127 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
128 uint32_t data)
129 {
130 struct qreg u = add_uniform(c, contents, data);
131 struct qreg t = qir_MOV(c, u);
132 return t;
133 }
134
135 static struct qreg
136 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
137 {
138 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
139 }
140
141 static struct qreg
142 qir_uniform_f(struct vc4_compile *c, float f)
143 {
144 return qir_uniform_ui(c, fui(f));
145 }
146
147 static struct qreg
148 get_src(struct vc4_compile *c, unsigned tgsi_op,
149 struct tgsi_src_register *src, int i)
150 {
151 struct qreg r = c->undef;
152
153 uint32_t s = i;
154 switch (i) {
155 case TGSI_SWIZZLE_X:
156 s = src->SwizzleX;
157 break;
158 case TGSI_SWIZZLE_Y:
159 s = src->SwizzleY;
160 break;
161 case TGSI_SWIZZLE_Z:
162 s = src->SwizzleZ;
163 break;
164 case TGSI_SWIZZLE_W:
165 s = src->SwizzleW;
166 break;
167 default:
168 abort();
169 }
170
171 assert(!src->Indirect);
172
173 switch (src->File) {
174 case TGSI_FILE_NULL:
175 return r;
176 case TGSI_FILE_TEMPORARY:
177 r = c->temps[src->Index * 4 + s];
178 break;
179 case TGSI_FILE_IMMEDIATE:
180 r = c->consts[src->Index * 4 + s];
181 break;
182 case TGSI_FILE_CONSTANT:
183 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
184 src->Index * 4 + s);
185 break;
186 case TGSI_FILE_INPUT:
187 r = c->inputs[src->Index * 4 + s];
188 break;
189 case TGSI_FILE_SAMPLER:
190 case TGSI_FILE_SAMPLER_VIEW:
191 r = c->undef;
192 break;
193 default:
194 fprintf(stderr, "unknown src file %d\n", src->File);
195 abort();
196 }
197
198 if (src->Absolute)
199 r = qir_FMAXABS(c, r, r);
200
201 if (src->Negate) {
202 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
203 case TGSI_TYPE_SIGNED:
204 case TGSI_TYPE_UNSIGNED:
205 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
206 break;
207 default:
208 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
209 break;
210 }
211 }
212
213 return r;
214 };
215
216
217 static void
218 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
219 int i, struct qreg val)
220 {
221 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
222
223 assert(!tgsi_dst->Indirect);
224
225 switch (tgsi_dst->File) {
226 case TGSI_FILE_TEMPORARY:
227 c->temps[tgsi_dst->Index * 4 + i] = val;
228 break;
229 case TGSI_FILE_OUTPUT:
230 c->outputs[tgsi_dst->Index * 4 + i] = val;
231 c->num_outputs = MAX2(c->num_outputs,
232 tgsi_dst->Index * 4 + i + 1);
233 break;
234 default:
235 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
236 abort();
237 }
238 };
239
240 static struct qreg
241 get_swizzled_channel(struct vc4_compile *c,
242 struct qreg *srcs, int swiz)
243 {
244 switch (swiz) {
245 default:
246 case UTIL_FORMAT_SWIZZLE_NONE:
247 fprintf(stderr, "warning: unknown swizzle\n");
248 /* FALLTHROUGH */
249 case UTIL_FORMAT_SWIZZLE_0:
250 return qir_uniform_f(c, 0.0);
251 case UTIL_FORMAT_SWIZZLE_1:
252 return qir_uniform_f(c, 1.0);
253 case UTIL_FORMAT_SWIZZLE_X:
254 case UTIL_FORMAT_SWIZZLE_Y:
255 case UTIL_FORMAT_SWIZZLE_Z:
256 case UTIL_FORMAT_SWIZZLE_W:
257 return srcs[swiz];
258 }
259 }
260
261 static struct qreg
262 tgsi_to_qir_alu(struct vc4_compile *c,
263 struct tgsi_full_instruction *tgsi_inst,
264 enum qop op, struct qreg *src, int i)
265 {
266 struct qreg dst = qir_get_temp(c);
267 qir_emit(c, qir_inst4(op, dst,
268 src[0 * 4 + i],
269 src[1 * 4 + i],
270 src[2 * 4 + i],
271 c->undef));
272 return dst;
273 }
274
275 static struct qreg
276 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
277 {
278 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
279 struct qreg high = qir_POW(c,
280 qir_FMUL(c,
281 qir_FADD(c,
282 srgb,
283 qir_uniform_f(c, 0.055)),
284 qir_uniform_f(c, 1.0 / 1.055)),
285 qir_uniform_f(c, 2.4));
286
287 qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045)));
288 return qir_SEL_X_Y_NS(c, low, high);
289 }
290
291 static struct qreg
292 tgsi_to_qir_umul(struct vc4_compile *c,
293 struct tgsi_full_instruction *tgsi_inst,
294 enum qop op, struct qreg *src, int i)
295 {
296 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
297 qir_uniform_ui(c, 16));
298 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
299 qir_uniform_ui(c, 0xffff));
300 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
301 qir_uniform_ui(c, 16));
302 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
303 qir_uniform_ui(c, 0xffff));
304
305 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
306 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
307 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
308
309 return qir_ADD(c, lolo, qir_SHL(c,
310 qir_ADD(c, hilo, lohi),
311 qir_uniform_ui(c, 16)));
312 }
313
314 static struct qreg
315 tgsi_to_qir_idiv(struct vc4_compile *c,
316 struct tgsi_full_instruction *tgsi_inst,
317 enum qop op, struct qreg *src, int i)
318 {
319 return qir_FTOI(c, qir_FMUL(c,
320 qir_ITOF(c, src[0 * 4 + i]),
321 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
322 }
323
324 static struct qreg
325 tgsi_to_qir_ineg(struct vc4_compile *c,
326 struct tgsi_full_instruction *tgsi_inst,
327 enum qop op, struct qreg *src, int i)
328 {
329 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
330 }
331
332 static struct qreg
333 tgsi_to_qir_seq(struct vc4_compile *c,
334 struct tgsi_full_instruction *tgsi_inst,
335 enum qop op, struct qreg *src, int i)
336 {
337 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
338 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
339 }
340
341 static struct qreg
342 tgsi_to_qir_sne(struct vc4_compile *c,
343 struct tgsi_full_instruction *tgsi_inst,
344 enum qop op, struct qreg *src, int i)
345 {
346 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
347 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
348 }
349
350 static struct qreg
351 tgsi_to_qir_slt(struct vc4_compile *c,
352 struct tgsi_full_instruction *tgsi_inst,
353 enum qop op, struct qreg *src, int i)
354 {
355 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
356 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
357 }
358
359 static struct qreg
360 tgsi_to_qir_sge(struct vc4_compile *c,
361 struct tgsi_full_instruction *tgsi_inst,
362 enum qop op, struct qreg *src, int i)
363 {
364 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
365 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
366 }
367
368 static struct qreg
369 tgsi_to_qir_fseq(struct vc4_compile *c,
370 struct tgsi_full_instruction *tgsi_inst,
371 enum qop op, struct qreg *src, int i)
372 {
373 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
374 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
375 }
376
377 static struct qreg
378 tgsi_to_qir_fsne(struct vc4_compile *c,
379 struct tgsi_full_instruction *tgsi_inst,
380 enum qop op, struct qreg *src, int i)
381 {
382 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
383 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
384 }
385
386 static struct qreg
387 tgsi_to_qir_fslt(struct vc4_compile *c,
388 struct tgsi_full_instruction *tgsi_inst,
389 enum qop op, struct qreg *src, int i)
390 {
391 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
392 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
393 }
394
395 static struct qreg
396 tgsi_to_qir_fsge(struct vc4_compile *c,
397 struct tgsi_full_instruction *tgsi_inst,
398 enum qop op, struct qreg *src, int i)
399 {
400 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
401 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
402 }
403
404 static struct qreg
405 tgsi_to_qir_useq(struct vc4_compile *c,
406 struct tgsi_full_instruction *tgsi_inst,
407 enum qop op, struct qreg *src, int i)
408 {
409 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
410 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
411 }
412
413 static struct qreg
414 tgsi_to_qir_usne(struct vc4_compile *c,
415 struct tgsi_full_instruction *tgsi_inst,
416 enum qop op, struct qreg *src, int i)
417 {
418 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
419 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
420 }
421
422 static struct qreg
423 tgsi_to_qir_islt(struct vc4_compile *c,
424 struct tgsi_full_instruction *tgsi_inst,
425 enum qop op, struct qreg *src, int i)
426 {
427 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
428 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
429 }
430
431 static struct qreg
432 tgsi_to_qir_isge(struct vc4_compile *c,
433 struct tgsi_full_instruction *tgsi_inst,
434 enum qop op, struct qreg *src, int i)
435 {
436 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
437 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
438 }
439
440 static struct qreg
441 tgsi_to_qir_cmp(struct vc4_compile *c,
442 struct tgsi_full_instruction *tgsi_inst,
443 enum qop op, struct qreg *src, int i)
444 {
445 qir_SF(c, src[0 * 4 + i]);
446 return qir_SEL_X_Y_NS(c,
447 src[1 * 4 + i],
448 src[2 * 4 + i]);
449 }
450
451 static struct qreg
452 tgsi_to_qir_mad(struct vc4_compile *c,
453 struct tgsi_full_instruction *tgsi_inst,
454 enum qop op, struct qreg *src, int i)
455 {
456 return qir_FADD(c,
457 qir_FMUL(c,
458 src[0 * 4 + i],
459 src[1 * 4 + i]),
460 src[2 * 4 + i]);
461 }
462
463 static struct qreg
464 tgsi_to_qir_lit(struct vc4_compile *c,
465 struct tgsi_full_instruction *tgsi_inst,
466 enum qop op, struct qreg *src, int i)
467 {
468 struct qreg x = src[0 * 4 + 0];
469 struct qreg y = src[0 * 4 + 1];
470 struct qreg w = src[0 * 4 + 3];
471
472 switch (i) {
473 case 0:
474 case 3:
475 return qir_uniform_f(c, 1.0);
476 case 1:
477 return qir_FMAX(c, src[0 * 4 + 0], qir_uniform_f(c, 0.0));
478 case 2: {
479 struct qreg zero = qir_uniform_f(c, 0.0);
480
481 qir_SF(c, x);
482 /* XXX: Clamp w to -128..128 */
483 return qir_SEL_X_0_NC(c,
484 qir_EXP2(c, qir_FMUL(c,
485 w,
486 qir_LOG2(c,
487 qir_FMAX(c,
488 y,
489 zero)))));
490 }
491 default:
492 assert(!"not reached");
493 return c->undef;
494 }
495 }
496
497 static struct qreg
498 tgsi_to_qir_lrp(struct vc4_compile *c,
499 struct tgsi_full_instruction *tgsi_inst,
500 enum qop op, struct qreg *src, int i)
501 {
502 struct qreg src0 = src[0 * 4 + i];
503 struct qreg src1 = src[1 * 4 + i];
504 struct qreg src2 = src[2 * 4 + i];
505
506 /* LRP is:
507 * src0 * src1 + (1 - src0) * src2.
508 * -> src0 * src1 + src2 - src0 * src2
509 * -> src2 + src0 * (src1 - src2)
510 */
511 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
512
513 }
514
515 static void
516 tgsi_to_qir_tex(struct vc4_compile *c,
517 struct tgsi_full_instruction *tgsi_inst,
518 enum qop op, struct qreg *src)
519 {
520 assert(!tgsi_inst->Instruction.Saturate);
521
522 struct qreg s = src[0 * 4 + 0];
523 struct qreg t = src[0 * 4 + 1];
524 struct qreg r = src[0 * 4 + 2];
525 uint32_t unit = tgsi_inst->Src[1].Register.Index;
526
527 struct qreg proj = c->undef;
528 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
529 proj = qir_RCP(c, src[0 * 4 + 3]);
530 s = qir_FMUL(c, s, proj);
531 t = qir_FMUL(c, t, proj);
532 }
533
534 struct qreg texture_u[] = {
535 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
536 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
537 add_uniform(c, QUNIFORM_CONSTANT, 0),
538 add_uniform(c, QUNIFORM_CONSTANT, 0),
539 };
540 uint32_t next_texture_u = 0;
541
542 /* There is no native support for GL texture rectangle coordinates, so
543 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
544 * 1]).
545 */
546 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
547 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
548 s = qir_FMUL(c, s,
549 get_temp_for_uniform(c,
550 QUNIFORM_TEXRECT_SCALE_X,
551 unit));
552 t = qir_FMUL(c, t,
553 get_temp_for_uniform(c,
554 QUNIFORM_TEXRECT_SCALE_Y,
555 unit));
556 }
557
558 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
559 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
560 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
561 struct qreg rcp_ma = qir_RCP(c, ma);
562 s = qir_FMUL(c, s, rcp_ma);
563 t = qir_FMUL(c, t, rcp_ma);
564 r = qir_FMUL(c, r, rcp_ma);
565
566 texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, unit);
567
568 qir_TEX_R(c, r, texture_u[next_texture_u++]);
569 } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
570 c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
571 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
572 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
573 qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
574 texture_u[next_texture_u++]);
575 }
576
577 if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
578 s = qir_FMIN(c, qir_FMAX(c, s, qir_uniform_f(c, 0.0)),
579 qir_uniform_f(c, 1.0));
580 }
581
582 if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
583 t = qir_FMIN(c, qir_FMAX(c, t, qir_uniform_f(c, 0.0)),
584 qir_uniform_f(c, 1.0));
585 }
586
587 qir_TEX_T(c, t, texture_u[next_texture_u++]);
588
589 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB)
590 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
591
592 qir_TEX_S(c, s, texture_u[next_texture_u++]);
593
594 c->num_texture_samples++;
595 struct qreg r4 = qir_TEX_RESULT(c);
596
597 enum pipe_format format = c->key->tex[unit].format;
598
599 struct qreg unpacked[4];
600 if (util_format_is_depth_or_stencil(format)) {
601 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
602 qir_uniform_ui(c, 8)));
603 struct qreg normalized = qir_FMUL(c, depthf,
604 qir_uniform_f(c, 1.0f/0xffffff));
605
606 struct qreg depth_output;
607
608 struct qreg one = qir_uniform_f(c, 1.0f);
609 if (c->key->tex[unit].compare_mode) {
610 struct qreg compare = src[0 * 4 + 2];
611
612 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
613 compare = qir_FMUL(c, compare, proj);
614
615 switch (c->key->tex[unit].compare_func) {
616 case PIPE_FUNC_NEVER:
617 depth_output = qir_uniform_f(c, 0.0f);
618 break;
619 case PIPE_FUNC_ALWAYS:
620 depth_output = one;
621 break;
622 case PIPE_FUNC_EQUAL:
623 qir_SF(c, qir_FSUB(c, compare, normalized));
624 depth_output = qir_SEL_X_0_ZS(c, one);
625 break;
626 case PIPE_FUNC_NOTEQUAL:
627 qir_SF(c, qir_FSUB(c, compare, normalized));
628 depth_output = qir_SEL_X_0_ZC(c, one);
629 break;
630 case PIPE_FUNC_GREATER:
631 qir_SF(c, qir_FSUB(c, compare, normalized));
632 depth_output = qir_SEL_X_0_NC(c, one);
633 break;
634 case PIPE_FUNC_GEQUAL:
635 qir_SF(c, qir_FSUB(c, normalized, compare));
636 depth_output = qir_SEL_X_0_NS(c, one);
637 break;
638 case PIPE_FUNC_LESS:
639 qir_SF(c, qir_FSUB(c, compare, normalized));
640 depth_output = qir_SEL_X_0_NS(c, one);
641 break;
642 case PIPE_FUNC_LEQUAL:
643 qir_SF(c, qir_FSUB(c, normalized, compare));
644 depth_output = qir_SEL_X_0_NC(c, one);
645 break;
646 }
647 } else {
648 depth_output = normalized;
649 }
650
651 for (int i = 0; i < 4; i++)
652 unpacked[i] = depth_output;
653 } else {
654 for (int i = 0; i < 4; i++)
655 unpacked[i] = qir_R4_UNPACK(c, r4, i);
656 }
657
658 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
659 struct qreg texture_output[4];
660 for (int i = 0; i < 4; i++) {
661 texture_output[i] = get_swizzled_channel(c, unpacked,
662 format_swiz[i]);
663 }
664
665 if (util_format_is_srgb(format)) {
666 for (int i = 0; i < 3; i++)
667 texture_output[i] = qir_srgb_decode(c,
668 texture_output[i]);
669 }
670
671 for (int i = 0; i < 4; i++) {
672 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
673 continue;
674
675 update_dst(c, tgsi_inst, i,
676 get_swizzled_channel(c, texture_output,
677 c->key->tex[unit].swizzle[i]));
678 }
679 }
680
681 static struct qreg
682 tgsi_to_qir_pow(struct vc4_compile *c,
683 struct tgsi_full_instruction *tgsi_inst,
684 enum qop op, struct qreg *src, int i)
685 {
686 /* Note that this instruction replicates its result from the x channel
687 */
688 return qir_POW(c, src[0 * 4 + 0], src[1 * 4 + 0]);
689 }
690
691 static struct qreg
692 tgsi_to_qir_trunc(struct vc4_compile *c,
693 struct tgsi_full_instruction *tgsi_inst,
694 enum qop op, struct qreg *src, int i)
695 {
696 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
697 }
698
699 /**
700 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
701 * to zero).
702 */
703 static struct qreg
704 tgsi_to_qir_frc(struct vc4_compile *c,
705 struct tgsi_full_instruction *tgsi_inst,
706 enum qop op, struct qreg *src, int i)
707 {
708 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
709 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
710 qir_SF(c, diff);
711 return qir_SEL_X_Y_NS(c,
712 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
713 diff);
714 }
715
716 /**
717 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
718 * zero).
719 */
720 static struct qreg
721 tgsi_to_qir_flr(struct vc4_compile *c,
722 struct tgsi_full_instruction *tgsi_inst,
723 enum qop op, struct qreg *src, int i)
724 {
725 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
726
727 /* This will be < 0 if we truncated and the truncation was of a value
728 * that was < 0 in the first place.
729 */
730 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
731
732 return qir_SEL_X_Y_NS(c,
733 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
734 trunc);
735 }
736
737 static struct qreg
738 tgsi_to_qir_dp(struct vc4_compile *c,
739 struct tgsi_full_instruction *tgsi_inst,
740 int num, struct qreg *src, int i)
741 {
742 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
743 for (int j = 1; j < num; j++) {
744 sum = qir_FADD(c, sum, qir_FMUL(c,
745 src[0 * 4 + j],
746 src[1 * 4 + j]));
747 }
748 return sum;
749 }
750
751 static struct qreg
752 tgsi_to_qir_dp2(struct vc4_compile *c,
753 struct tgsi_full_instruction *tgsi_inst,
754 enum qop op, struct qreg *src, int i)
755 {
756 return tgsi_to_qir_dp(c, tgsi_inst, 2, src, i);
757 }
758
759 static struct qreg
760 tgsi_to_qir_dp3(struct vc4_compile *c,
761 struct tgsi_full_instruction *tgsi_inst,
762 enum qop op, struct qreg *src, int i)
763 {
764 return tgsi_to_qir_dp(c, tgsi_inst, 3, src, i);
765 }
766
767 static struct qreg
768 tgsi_to_qir_dp4(struct vc4_compile *c,
769 struct tgsi_full_instruction *tgsi_inst,
770 enum qop op, struct qreg *src, int i)
771 {
772 return tgsi_to_qir_dp(c, tgsi_inst, 4, src, i);
773 }
774
775 static struct qreg
776 tgsi_to_qir_abs(struct vc4_compile *c,
777 struct tgsi_full_instruction *tgsi_inst,
778 enum qop op, struct qreg *src, int i)
779 {
780 struct qreg arg = src[0 * 4 + i];
781 return qir_FMAXABS(c, arg, arg);
782 }
783
784 /* Note that this instruction replicates its result from the x channel */
785 static struct qreg
786 tgsi_to_qir_sin(struct vc4_compile *c,
787 struct tgsi_full_instruction *tgsi_inst,
788 enum qop op, struct qreg *src, int i)
789 {
790 float coeff[] = {
791 2.0 * M_PI,
792 -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
793 pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
794 -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
795 };
796
797 struct qreg scaled_x =
798 qir_FMUL(c,
799 src[0 * 4 + 0],
800 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
801
802
803 struct qreg x = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
804 struct qreg x2 = qir_FMUL(c, x, x);
805 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
806 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
807 x = qir_FMUL(c, x, x2);
808 sum = qir_FADD(c,
809 sum,
810 qir_FMUL(c,
811 x,
812 qir_uniform_f(c, coeff[i])));
813 }
814 return sum;
815 }
816
817 /* Note that this instruction replicates its result from the x channel */
818 static struct qreg
819 tgsi_to_qir_cos(struct vc4_compile *c,
820 struct tgsi_full_instruction *tgsi_inst,
821 enum qop op, struct qreg *src, int i)
822 {
823 float coeff[] = {
824 1.0f,
825 -pow(2.0 * M_PI, 2) / (2 * 1),
826 pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
827 -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
828 };
829
830 struct qreg scaled_x =
831 qir_FMUL(c, src[0 * 4 + 0],
832 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
833 struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
834
835 struct qreg sum = qir_uniform_f(c, coeff[0]);
836 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
837 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
838 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
839 if (i != 1)
840 x = qir_FMUL(c, x, x2);
841
842 struct qreg mul = qir_FMUL(c,
843 x,
844 qir_uniform_f(c, coeff[i]));
845 if (i == 0)
846 sum = mul;
847 else
848 sum = qir_FADD(c, sum, mul);
849 }
850 return sum;
851 }
852
853 static struct qreg
854 tgsi_to_qir_clamp(struct vc4_compile *c,
855 struct tgsi_full_instruction *tgsi_inst,
856 enum qop op, struct qreg *src, int i)
857 {
858 return qir_FMAX(c, qir_FMIN(c,
859 src[0 * 4 + i],
860 src[2 * 4 + i]),
861 src[1 * 4 + i]);
862 }
863
864 static void
865 emit_vertex_input(struct vc4_compile *c, int attr)
866 {
867 enum pipe_format format = c->vs_key->attr_formats[attr];
868 struct qreg vpm_reads[4];
869
870 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
871 * time, so we always read 4 32-bit VPM entries.
872 */
873 for (int i = 0; i < 4; i++) {
874 vpm_reads[i] = qir_get_temp(c);
875 qir_emit(c, qir_inst(QOP_VPM_READ,
876 vpm_reads[i],
877 c->undef,
878 c->undef));
879 c->num_inputs++;
880 }
881
882 bool format_warned = false;
883 const struct util_format_description *desc =
884 util_format_description(format);
885
886 for (int i = 0; i < 4; i++) {
887 uint8_t swiz = desc->swizzle[i];
888 struct qreg result;
889
890 if (swiz > UTIL_FORMAT_SWIZZLE_W)
891 result = get_swizzled_channel(c, vpm_reads, swiz);
892 else if (desc->channel[swiz].size == 32 &&
893 desc->channel[swiz].type == UTIL_FORMAT_TYPE_FLOAT) {
894 result = get_swizzled_channel(c, vpm_reads, swiz);
895 } else if (desc->channel[swiz].size == 8 &&
896 (desc->channel[swiz].type == UTIL_FORMAT_TYPE_UNSIGNED ||
897 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) &&
898 desc->channel[swiz].normalized) {
899 struct qreg vpm = vpm_reads[0];
900 if (desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED)
901 vpm = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
902 result = qir_UNPACK_8(c, vpm, swiz);
903 } else {
904 if (!format_warned) {
905 fprintf(stderr,
906 "vtx element %d unsupported type: %s\n",
907 attr, util_format_name(format));
908 format_warned = true;
909 }
910 result = qir_uniform_f(c, 0.0);
911 }
912
913 if (desc->channel[swiz].normalized &&
914 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) {
915 result = qir_FSUB(c,
916 qir_FMUL(c,
917 result,
918 qir_uniform_f(c, 2.0)),
919 qir_uniform_f(c, 1.0));
920 }
921
922 c->inputs[attr * 4 + i] = result;
923 }
924 }
925
926 static void
927 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
928 {
929 if (c->discard.file == QFILE_NULL)
930 c->discard = qir_uniform_f(c, 0.0);
931 qir_SF(c, src[0 * 4 + i]);
932 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
933 c->discard);
934 }
935
936 static void
937 emit_fragcoord_input(struct vc4_compile *c, int attr)
938 {
939 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
940 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
941 c->inputs[attr * 4 + 2] =
942 qir_FMUL(c,
943 qir_ITOF(c, qir_FRAG_Z(c)),
944 qir_uniform_f(c, 1.0 / 0xffffff));
945 c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
946 }
947
948 static void
949 emit_point_coord_input(struct vc4_compile *c, int attr)
950 {
951 if (c->point_x.file == QFILE_NULL) {
952 c->point_x = qir_uniform_f(c, 0.0);
953 c->point_y = qir_uniform_f(c, 0.0);
954 }
955
956 c->inputs[attr * 4 + 0] = c->point_x;
957 if (c->fs_key->point_coord_upper_left) {
958 c->inputs[attr * 4 + 1] = qir_FSUB(c,
959 qir_uniform_f(c, 1.0),
960 c->point_y);
961 } else {
962 c->inputs[attr * 4 + 1] = c->point_y;
963 }
964 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
965 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
966 }
967
968 static struct qreg
969 emit_fragment_varying(struct vc4_compile *c, int index)
970 {
971 struct qreg vary = {
972 QFILE_VARY,
973 index
974 };
975
976 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
977 }
978
979 static void
980 emit_fragment_input(struct vc4_compile *c, int attr,
981 struct tgsi_full_declaration *decl)
982 {
983 for (int i = 0; i < 4; i++) {
984 c->inputs[attr * 4 + i] =
985 emit_fragment_varying(c, attr * 4 + i);
986 c->num_inputs++;
987
988 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR ||
989 decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR)
990 c->color_inputs |= 1 << i;
991 }
992 }
993
994 static void
995 emit_face_input(struct vc4_compile *c, int attr)
996 {
997 c->inputs[attr * 4 + 0] = qir_FSUB(c,
998 qir_uniform_f(c, 1.0),
999 qir_FMUL(c,
1000 qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
1001 qir_uniform_f(c, 2.0)));
1002 c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
1003 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
1004 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
1005 }
1006
1007 static void
1008 emit_tgsi_declaration(struct vc4_compile *c,
1009 struct tgsi_full_declaration *decl)
1010 {
1011 switch (decl->Declaration.File) {
1012 case TGSI_FILE_TEMPORARY:
1013 resize_qreg_array(c, &c->temps, &c->temps_array_size,
1014 (decl->Range.Last + 1) * 4);
1015 break;
1016
1017 case TGSI_FILE_INPUT:
1018 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
1019 (decl->Range.Last + 1) * 4);
1020
1021 for (int i = decl->Range.First;
1022 i <= decl->Range.Last;
1023 i++) {
1024 if (c->stage == QSTAGE_FRAG) {
1025 if (decl->Semantic.Name ==
1026 TGSI_SEMANTIC_POSITION) {
1027 emit_fragcoord_input(c, i);
1028 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1029 emit_face_input(c, i);
1030 } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
1031 (c->fs_key->point_sprite_mask &
1032 (1 << decl->Semantic.Index))) {
1033 emit_point_coord_input(c, i);
1034 } else {
1035 emit_fragment_input(c, i, decl);
1036 }
1037 } else {
1038 emit_vertex_input(c, i);
1039 }
1040 }
1041 break;
1042
1043 case TGSI_FILE_OUTPUT:
1044 resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
1045 (decl->Range.Last + 1) * 4);
1046
1047 switch (decl->Semantic.Name) {
1048 case TGSI_SEMANTIC_POSITION:
1049 c->output_position_index = decl->Range.First * 4;
1050 break;
1051 case TGSI_SEMANTIC_COLOR:
1052 c->output_color_index = decl->Range.First * 4;
1053 break;
1054 case TGSI_SEMANTIC_PSIZE:
1055 c->output_point_size_index = decl->Range.First * 4;
1056 break;
1057 }
1058
1059 break;
1060 }
1061 }
1062
1063 static void
1064 emit_tgsi_instruction(struct vc4_compile *c,
1065 struct tgsi_full_instruction *tgsi_inst)
1066 {
1067 struct {
1068 enum qop op;
1069 struct qreg (*func)(struct vc4_compile *c,
1070 struct tgsi_full_instruction *tgsi_inst,
1071 enum qop op,
1072 struct qreg *src, int i);
1073 } op_trans[] = {
1074 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
1075 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
1076 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
1077 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
1078 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
1079 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
1080 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
1081 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
1082 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
1083 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
1084 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
1085 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
1086 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
1087 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
1088 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
1089 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
1090 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
1091 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
1092 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
1093
1094 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
1095 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
1096 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
1097
1098 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
1099 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
1100 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
1101 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
1102 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
1103 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
1104 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
1105 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
1106 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
1107 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
1108 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
1109 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
1110 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
1111
1112 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
1113 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
1114 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
1115 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
1116 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
1117 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
1118 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
1119 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
1120 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
1121 [TGSI_OPCODE_LIT] = { 0, tgsi_to_qir_lit },
1122 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
1123 [TGSI_OPCODE_POW] = { 0, tgsi_to_qir_pow },
1124 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
1125 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
1126 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
1127 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
1128 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
1129 [TGSI_OPCODE_CLAMP] = { 0, tgsi_to_qir_clamp },
1130 };
1131 static int asdf = 0;
1132 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
1133
1134 if (tgsi_op == TGSI_OPCODE_END)
1135 return;
1136
1137 struct qreg src_regs[12];
1138 for (int s = 0; s < 3; s++) {
1139 for (int i = 0; i < 4; i++) {
1140 src_regs[4 * s + i] =
1141 get_src(c, tgsi_inst->Instruction.Opcode,
1142 &tgsi_inst->Src[s].Register, i);
1143 }
1144 }
1145
1146 switch (tgsi_op) {
1147 case TGSI_OPCODE_TEX:
1148 case TGSI_OPCODE_TXP:
1149 case TGSI_OPCODE_TXB:
1150 tgsi_to_qir_tex(c, tgsi_inst,
1151 op_trans[tgsi_op].op, src_regs);
1152 return;
1153 case TGSI_OPCODE_KILL:
1154 c->discard = qir_uniform_f(c, 1.0);
1155 return;
1156 case TGSI_OPCODE_KILL_IF:
1157 for (int i = 0; i < 4; i++)
1158 tgsi_to_qir_kill_if(c, src_regs, i);
1159 return;
1160 default:
1161 break;
1162 }
1163
1164 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
1165 fprintf(stderr, "unknown tgsi inst: ");
1166 tgsi_dump_instruction(tgsi_inst, asdf++);
1167 fprintf(stderr, "\n");
1168 abort();
1169 }
1170
1171 for (int i = 0; i < 4; i++) {
1172 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
1173 continue;
1174
1175 struct qreg result;
1176
1177 result = op_trans[tgsi_op].func(c, tgsi_inst,
1178 op_trans[tgsi_op].op,
1179 src_regs, i);
1180
1181 if (tgsi_inst->Instruction.Saturate) {
1182 float low = (tgsi_inst->Instruction.Saturate ==
1183 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
1184 result = qir_FMAX(c,
1185 qir_FMIN(c,
1186 result,
1187 qir_uniform_f(c, 1.0)),
1188 qir_uniform_f(c, low));
1189 }
1190
1191 update_dst(c, tgsi_inst, i, result);
1192 }
1193 }
1194
1195 static void
1196 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
1197 {
1198 for (int i = 0; i < 4; i++) {
1199 unsigned n = c->num_consts++;
1200 resize_qreg_array(c, &c->consts, &c->consts_array_size, n + 1);
1201 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1202 }
1203 }
1204
1205 static struct qreg
1206 vc4_blend_channel(struct vc4_compile *c,
1207 struct qreg *dst,
1208 struct qreg *src,
1209 struct qreg val,
1210 unsigned factor,
1211 int channel)
1212 {
1213 switch(factor) {
1214 case PIPE_BLENDFACTOR_ONE:
1215 return val;
1216 case PIPE_BLENDFACTOR_SRC_COLOR:
1217 return qir_FMUL(c, val, src[channel]);
1218 case PIPE_BLENDFACTOR_SRC_ALPHA:
1219 return qir_FMUL(c, val, src[3]);
1220 case PIPE_BLENDFACTOR_DST_ALPHA:
1221 return qir_FMUL(c, val, dst[3]);
1222 case PIPE_BLENDFACTOR_DST_COLOR:
1223 return qir_FMUL(c, val, dst[channel]);
1224 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1225 return qir_FMIN(c, src[3], qir_FSUB(c,
1226 qir_uniform_f(c, 1.0),
1227 dst[3]));
1228 case PIPE_BLENDFACTOR_CONST_COLOR:
1229 return qir_FMUL(c, val,
1230 get_temp_for_uniform(c,
1231 QUNIFORM_BLEND_CONST_COLOR,
1232 channel));
1233 case PIPE_BLENDFACTOR_CONST_ALPHA:
1234 return qir_FMUL(c, val,
1235 get_temp_for_uniform(c,
1236 QUNIFORM_BLEND_CONST_COLOR,
1237 3));
1238 case PIPE_BLENDFACTOR_ZERO:
1239 return qir_uniform_f(c, 0.0);
1240 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1241 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1242 src[channel]));
1243 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1244 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1245 src[3]));
1246 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1247 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1248 dst[3]));
1249 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1250 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1251 dst[channel]));
1252 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1253 return qir_FMUL(c, val,
1254 qir_FSUB(c, qir_uniform_f(c, 1.0),
1255 get_temp_for_uniform(c,
1256 QUNIFORM_BLEND_CONST_COLOR,
1257 channel)));
1258 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1259 return qir_FMUL(c, val,
1260 qir_FSUB(c, qir_uniform_f(c, 1.0),
1261 get_temp_for_uniform(c,
1262 QUNIFORM_BLEND_CONST_COLOR,
1263 3)));
1264
1265 default:
1266 case PIPE_BLENDFACTOR_SRC1_COLOR:
1267 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1268 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1269 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1270 /* Unsupported. */
1271 fprintf(stderr, "Unknown blend factor %d\n", factor);
1272 return val;
1273 }
1274 }
1275
1276 static struct qreg
1277 vc4_blend_func(struct vc4_compile *c,
1278 struct qreg src, struct qreg dst,
1279 unsigned func)
1280 {
1281 switch (func) {
1282 case PIPE_BLEND_ADD:
1283 return qir_FADD(c, src, dst);
1284 case PIPE_BLEND_SUBTRACT:
1285 return qir_FSUB(c, src, dst);
1286 case PIPE_BLEND_REVERSE_SUBTRACT:
1287 return qir_FSUB(c, dst, src);
1288 case PIPE_BLEND_MIN:
1289 return qir_FMIN(c, src, dst);
1290 case PIPE_BLEND_MAX:
1291 return qir_FMAX(c, src, dst);
1292
1293 default:
1294 /* Unsupported. */
1295 fprintf(stderr, "Unknown blend func %d\n", func);
1296 return src;
1297
1298 }
1299 }
1300
1301 /**
1302 * Implements fixed function blending in shader code.
1303 *
1304 * VC4 doesn't have any hardware support for blending. Instead, you read the
1305 * current contents of the destination from the tile buffer after having
1306 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1307 * math using your output color and that destination value, and update the
1308 * output color appropriately.
1309 */
1310 static void
1311 vc4_blend(struct vc4_compile *c, struct qreg *result,
1312 struct qreg *dst_color, struct qreg *src_color)
1313 {
1314 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1315
1316 if (!blend->blend_enable) {
1317 for (int i = 0; i < 4; i++)
1318 result[i] = src_color[i];
1319 return;
1320 }
1321
1322 struct qreg src_blend[4], dst_blend[4];
1323 for (int i = 0; i < 3; i++) {
1324 src_blend[i] = vc4_blend_channel(c,
1325 dst_color, src_color,
1326 src_color[i],
1327 blend->rgb_src_factor, i);
1328 dst_blend[i] = vc4_blend_channel(c,
1329 dst_color, src_color,
1330 dst_color[i],
1331 blend->rgb_dst_factor, i);
1332 }
1333 src_blend[3] = vc4_blend_channel(c,
1334 dst_color, src_color,
1335 src_color[3],
1336 blend->alpha_src_factor, 3);
1337 dst_blend[3] = vc4_blend_channel(c,
1338 dst_color, src_color,
1339 dst_color[3],
1340 blend->alpha_dst_factor, 3);
1341
1342 for (int i = 0; i < 3; i++) {
1343 result[i] = vc4_blend_func(c,
1344 src_blend[i], dst_blend[i],
1345 blend->rgb_func);
1346 }
1347 result[3] = vc4_blend_func(c,
1348 src_blend[3], dst_blend[3],
1349 blend->alpha_func);
1350 }
1351
1352 static void
1353 alpha_test_discard(struct vc4_compile *c)
1354 {
1355 struct qreg src_alpha;
1356 struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
1357
1358 if (!c->fs_key->alpha_test)
1359 return;
1360
1361 if (c->output_color_index != -1)
1362 src_alpha = c->outputs[c->output_color_index + 3];
1363 else
1364 src_alpha = qir_uniform_f(c, 1.0);
1365
1366 if (c->discard.file == QFILE_NULL)
1367 c->discard = qir_uniform_f(c, 0.0);
1368
1369 switch (c->fs_key->alpha_test_func) {
1370 case PIPE_FUNC_NEVER:
1371 c->discard = qir_uniform_f(c, 1.0);
1372 break;
1373 case PIPE_FUNC_ALWAYS:
1374 break;
1375 case PIPE_FUNC_EQUAL:
1376 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1377 c->discard = qir_SEL_X_Y_ZS(c, c->discard,
1378 qir_uniform_f(c, 1.0));
1379 break;
1380 case PIPE_FUNC_NOTEQUAL:
1381 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1382 c->discard = qir_SEL_X_Y_ZC(c, c->discard,
1383 qir_uniform_f(c, 1.0));
1384 break;
1385 case PIPE_FUNC_GREATER:
1386 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1387 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1388 qir_uniform_f(c, 1.0));
1389 break;
1390 case PIPE_FUNC_GEQUAL:
1391 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1392 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1393 qir_uniform_f(c, 1.0));
1394 break;
1395 case PIPE_FUNC_LESS:
1396 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1397 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1398 qir_uniform_f(c, 1.0));
1399 break;
1400 case PIPE_FUNC_LEQUAL:
1401 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1402 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1403 qir_uniform_f(c, 1.0));
1404 break;
1405 }
1406 }
1407
1408 static void
1409 emit_frag_end(struct vc4_compile *c)
1410 {
1411 alpha_test_discard(c);
1412
1413 enum pipe_format color_format = c->fs_key->color_format;
1414 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1415 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1416 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1417 if (c->fs_key->blend.blend_enable ||
1418 c->fs_key->blend.colormask != 0xf) {
1419 struct qreg r4 = qir_TLB_COLOR_READ(c);
1420 for (int i = 0; i < 4; i++)
1421 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1422 for (int i = 0; i < 4; i++)
1423 dst_color[i] = get_swizzled_channel(c,
1424 tlb_read_color,
1425 format_swiz[i]);
1426 }
1427
1428 struct qreg blend_color[4];
1429 struct qreg undef_array[4] = {
1430 c->undef, c->undef, c->undef, c->undef
1431 };
1432 vc4_blend(c, blend_color, dst_color,
1433 (c->output_color_index != -1 ?
1434 c->outputs + c->output_color_index :
1435 undef_array));
1436
1437 /* If the bit isn't set in the color mask, then just return the
1438 * original dst color, instead.
1439 */
1440 for (int i = 0; i < 4; i++) {
1441 if (!(c->fs_key->blend.colormask & (1 << i))) {
1442 blend_color[i] = dst_color[i];
1443 }
1444 }
1445
1446 /* Debug: Sometimes you're getting a black output and just want to see
1447 * if the FS is getting executed at all. Spam magenta into the color
1448 * output.
1449 */
1450 if (0) {
1451 blend_color[0] = qir_uniform_f(c, 1.0);
1452 blend_color[1] = qir_uniform_f(c, 0.0);
1453 blend_color[2] = qir_uniform_f(c, 1.0);
1454 blend_color[3] = qir_uniform_f(c, 0.5);
1455 }
1456
1457 struct qreg swizzled_outputs[4];
1458 for (int i = 0; i < 4; i++) {
1459 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1460 format_swiz[i]);
1461 }
1462
1463 if (c->discard.file != QFILE_NULL)
1464 qir_TLB_DISCARD_SETUP(c, c->discard);
1465
1466 if (c->fs_key->stencil_enabled) {
1467 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
1468 if (c->fs_key->stencil_twoside) {
1469 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
1470 }
1471 if (c->fs_key->stencil_full_writemasks) {
1472 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
1473 }
1474 }
1475
1476 if (c->fs_key->depth_enabled) {
1477 struct qreg z;
1478 if (c->output_position_index != -1) {
1479 z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
1480 qir_uniform_f(c, 0xffffff)));
1481 } else {
1482 z = qir_FRAG_Z(c);
1483 }
1484 qir_TLB_Z_WRITE(c, z);
1485 }
1486
1487 bool color_written = false;
1488 for (int i = 0; i < 4; i++) {
1489 if (swizzled_outputs[i].file != QFILE_NULL)
1490 color_written = true;
1491 }
1492
1493 struct qreg packed_color;
1494 if (color_written) {
1495 /* Fill in any undefined colors. The simulator will assertion
1496 * fail if we read something that wasn't written, and I don't
1497 * know what hardware does.
1498 */
1499 for (int i = 0; i < 4; i++) {
1500 if (swizzled_outputs[i].file == QFILE_NULL)
1501 swizzled_outputs[i] = qir_uniform_f(c, 0.0);
1502 }
1503 packed_color = qir_get_temp(c);
1504 qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
1505 swizzled_outputs[0],
1506 swizzled_outputs[1],
1507 swizzled_outputs[2],
1508 swizzled_outputs[3]));
1509 } else {
1510 packed_color = qir_uniform_ui(c, 0);
1511 }
1512
1513 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1514 packed_color, c->undef));
1515 }
1516
1517 static void
1518 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1519 {
1520 struct qreg xyi[2];
1521
1522 for (int i = 0; i < 2; i++) {
1523 struct qreg scale =
1524 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1525
1526 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1527 qir_FMUL(c,
1528 c->outputs[i],
1529 scale),
1530 rcp_w));
1531 }
1532
1533 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1534 }
1535
1536 static void
1537 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1538 {
1539 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1540 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1541
1542 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
1543 c->outputs[2],
1544 zscale),
1545 zoffset),
1546 rcp_w));
1547 }
1548
1549 static void
1550 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1551 {
1552 qir_VPM_WRITE(c, rcp_w);
1553 }
1554
1555 static void
1556 emit_point_size_write(struct vc4_compile *c)
1557 {
1558 struct qreg point_size;
1559
1560 if (c->output_point_size_index)
1561 point_size = c->outputs[c->output_point_size_index + 3];
1562 else
1563 point_size = qir_uniform_f(c, 1.0);
1564
1565 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
1566 * BCM21553).
1567 */
1568 point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
1569
1570 qir_VPM_WRITE(c, point_size);
1571 }
1572
1573 static void
1574 emit_vert_end(struct vc4_compile *c)
1575 {
1576 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1577
1578 emit_scaled_viewport_write(c, rcp_w);
1579 emit_zs_write(c, rcp_w);
1580 emit_rcp_wc_write(c, rcp_w);
1581 if (c->vs_key->per_vertex_point_size)
1582 emit_point_size_write(c);
1583
1584 for (int i = 4; i < c->num_outputs; i++) {
1585 qir_VPM_WRITE(c, c->outputs[i]);
1586 }
1587 }
1588
1589 static void
1590 emit_coord_end(struct vc4_compile *c)
1591 {
1592 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1593
1594 for (int i = 0; i < 4; i++)
1595 qir_VPM_WRITE(c, c->outputs[i]);
1596
1597 emit_scaled_viewport_write(c, rcp_w);
1598 emit_zs_write(c, rcp_w);
1599 emit_rcp_wc_write(c, rcp_w);
1600 if (c->vs_key->per_vertex_point_size)
1601 emit_point_size_write(c);
1602 }
1603
1604 static struct vc4_compile *
1605 vc4_shader_tgsi_to_qir(struct vc4_context *vc4,
1606 struct vc4_compiled_shader *shader, enum qstage stage,
1607 struct vc4_key *key)
1608 {
1609 struct vc4_compile *c = qir_compile_init();
1610 int ret;
1611
1612 c->stage = stage;
1613
1614 c->shader_state = key->shader_state;
1615 ret = tgsi_parse_init(&c->parser, c->shader_state->tokens);
1616 assert(ret == TGSI_PARSE_OK);
1617
1618 if (vc4_debug & VC4_DEBUG_TGSI) {
1619 fprintf(stderr, "TGSI:\n");
1620 tgsi_dump(c->shader_state->tokens, 0);
1621 }
1622
1623 c->key = key;
1624 switch (stage) {
1625 case QSTAGE_FRAG:
1626 c->fs_key = (struct vc4_fs_key *)key;
1627 if (c->fs_key->is_points) {
1628 c->point_x = emit_fragment_varying(c, 0);
1629 c->point_y = emit_fragment_varying(c, 0);
1630 } else if (c->fs_key->is_lines) {
1631 c->line_x = emit_fragment_varying(c, 0);
1632 }
1633 break;
1634 case QSTAGE_VERT:
1635 c->vs_key = (struct vc4_vs_key *)key;
1636 break;
1637 case QSTAGE_COORD:
1638 c->vs_key = (struct vc4_vs_key *)key;
1639 break;
1640 }
1641
1642 while (!tgsi_parse_end_of_tokens(&c->parser)) {
1643 tgsi_parse_token(&c->parser);
1644
1645 switch (c->parser.FullToken.Token.Type) {
1646 case TGSI_TOKEN_TYPE_DECLARATION:
1647 emit_tgsi_declaration(c,
1648 &c->parser.FullToken.FullDeclaration);
1649 break;
1650
1651 case TGSI_TOKEN_TYPE_INSTRUCTION:
1652 emit_tgsi_instruction(c,
1653 &c->parser.FullToken.FullInstruction);
1654 break;
1655
1656 case TGSI_TOKEN_TYPE_IMMEDIATE:
1657 parse_tgsi_immediate(c,
1658 &c->parser.FullToken.FullImmediate);
1659 break;
1660 }
1661 }
1662
1663 switch (stage) {
1664 case QSTAGE_FRAG:
1665 emit_frag_end(c);
1666 break;
1667 case QSTAGE_VERT:
1668 emit_vert_end(c);
1669 break;
1670 case QSTAGE_COORD:
1671 emit_coord_end(c);
1672 break;
1673 }
1674
1675 tgsi_parse_free(&c->parser);
1676
1677 qir_optimize(c);
1678
1679 if (vc4_debug & VC4_DEBUG_QIR) {
1680 fprintf(stderr, "QIR:\n");
1681 qir_dump(c);
1682 }
1683 qir_reorder_uniforms(c);
1684 vc4_generate_code(vc4, c);
1685
1686 if (vc4_debug & VC4_DEBUG_SHADERDB) {
1687 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
1688 qir_get_stage_name(c->stage), c->qpu_inst_count);
1689 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
1690 qir_get_stage_name(c->stage), c->num_uniforms);
1691 }
1692
1693 return c;
1694 }
1695
1696 static void *
1697 vc4_shader_state_create(struct pipe_context *pctx,
1698 const struct pipe_shader_state *cso)
1699 {
1700 struct pipe_shader_state *so = CALLOC_STRUCT(pipe_shader_state);
1701 if (!so)
1702 return NULL;
1703
1704 so->tokens = tgsi_dup_tokens(cso->tokens);
1705
1706 return so;
1707 }
1708
1709 static void
1710 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
1711 int shader_index,
1712 struct vc4_compile *c)
1713 {
1714 int count = c->num_uniforms;
1715 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
1716
1717 uinfo->count = count;
1718 uinfo->data = malloc(count * sizeof(*uinfo->data));
1719 memcpy(uinfo->data, c->uniform_data,
1720 count * sizeof(*uinfo->data));
1721 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
1722 memcpy(uinfo->contents, c->uniform_contents,
1723 count * sizeof(*uinfo->contents));
1724 uinfo->num_texture_samples = c->num_texture_samples;
1725 }
1726
1727 static void
1728 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1729 struct vc4_fs_key *key)
1730 {
1731 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, shader,
1732 QSTAGE_FRAG,
1733 &key->base);
1734 shader->num_inputs = c->num_inputs;
1735 shader->color_inputs = c->color_inputs;
1736 copy_uniform_state_to_shader(shader, 0, c);
1737 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
1738 c->qpu_inst_count * sizeof(uint64_t),
1739 "fs_code");
1740
1741 qir_compile_destroy(c);
1742 }
1743
1744 static void
1745 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1746 struct vc4_vs_key *key)
1747 {
1748 struct vc4_compile *vs_c = vc4_shader_tgsi_to_qir(vc4, shader,
1749 QSTAGE_VERT,
1750 &key->base);
1751 copy_uniform_state_to_shader(shader, 0, vs_c);
1752
1753 struct vc4_compile *cs_c = vc4_shader_tgsi_to_qir(vc4, shader,
1754 QSTAGE_COORD,
1755 &key->base);
1756 copy_uniform_state_to_shader(shader, 1, cs_c);
1757
1758 uint32_t vs_size = vs_c->qpu_inst_count * sizeof(uint64_t);
1759 uint32_t cs_size = cs_c->qpu_inst_count * sizeof(uint64_t);
1760 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
1761 shader->bo = vc4_bo_alloc(vc4->screen,
1762 shader->coord_shader_offset + cs_size,
1763 "vs_code");
1764
1765 void *map = vc4_bo_map(shader->bo);
1766 memcpy(map, vs_c->qpu_insts, vs_size);
1767 memcpy(map + shader->coord_shader_offset,
1768 cs_c->qpu_insts, cs_size);
1769
1770 qir_compile_destroy(vs_c);
1771 qir_compile_destroy(cs_c);
1772 }
1773
1774 static void
1775 vc4_setup_shared_key(struct vc4_key *key, struct vc4_texture_stateobj *texstate)
1776 {
1777 for (int i = 0; i < texstate->num_textures; i++) {
1778 struct pipe_sampler_view *sampler = texstate->textures[i];
1779 struct pipe_sampler_state *sampler_state =
1780 texstate->samplers[i];
1781
1782 if (sampler) {
1783 key->tex[i].format = sampler->format;
1784 key->tex[i].swizzle[0] = sampler->swizzle_r;
1785 key->tex[i].swizzle[1] = sampler->swizzle_g;
1786 key->tex[i].swizzle[2] = sampler->swizzle_b;
1787 key->tex[i].swizzle[3] = sampler->swizzle_a;
1788 key->tex[i].compare_mode = sampler_state->compare_mode;
1789 key->tex[i].compare_func = sampler_state->compare_func;
1790 key->tex[i].wrap_s = sampler_state->wrap_s;
1791 key->tex[i].wrap_t = sampler_state->wrap_t;
1792 }
1793 }
1794 }
1795
1796 static void
1797 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
1798 {
1799 struct vc4_fs_key local_key;
1800 struct vc4_fs_key *key = &local_key;
1801
1802 memset(key, 0, sizeof(*key));
1803 vc4_setup_shared_key(&key->base, &vc4->fragtex);
1804 key->base.shader_state = vc4->prog.bind_fs;
1805 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
1806 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
1807 prim_mode <= PIPE_PRIM_LINE_STRIP);
1808 key->blend = vc4->blend->rt[0];
1809
1810 if (vc4->framebuffer.cbufs[0])
1811 key->color_format = vc4->framebuffer.cbufs[0]->format;
1812
1813 key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
1814 key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
1815 key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
1816 key->depth_enabled = (vc4->zsa->base.depth.enabled ||
1817 key->stencil_enabled);
1818 if (vc4->zsa->base.alpha.enabled) {
1819 key->alpha_test = true;
1820 key->alpha_test_func = vc4->zsa->base.alpha.func;
1821 }
1822
1823 if (key->is_points) {
1824 key->point_sprite_mask =
1825 vc4->rasterizer->base.sprite_coord_enable;
1826 key->point_coord_upper_left =
1827 (vc4->rasterizer->base.sprite_coord_mode ==
1828 PIPE_SPRITE_COORD_UPPER_LEFT);
1829 }
1830
1831 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
1832 if (vc4->prog.fs)
1833 return;
1834
1835 key = malloc(sizeof(*key));
1836 memcpy(key, &local_key, sizeof(*key));
1837
1838 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1839 vc4_fs_compile(vc4, shader, key);
1840 util_hash_table_set(vc4->fs_cache, key, shader);
1841
1842 if (vc4->rasterizer->base.flatshade &&
1843 vc4->prog.fs &&
1844 vc4->prog.fs->color_inputs != shader->color_inputs) {
1845 vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
1846 }
1847
1848 vc4->prog.fs = shader;
1849 }
1850
1851 static void
1852 vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
1853 {
1854 struct vc4_vs_key local_key;
1855 struct vc4_vs_key *key = &local_key;
1856
1857 memset(key, 0, sizeof(*key));
1858 vc4_setup_shared_key(&key->base, &vc4->verttex);
1859 key->base.shader_state = vc4->prog.bind_vs;
1860
1861 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
1862 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
1863
1864 key->per_vertex_point_size =
1865 (prim_mode == PIPE_PRIM_POINTS &&
1866 vc4->rasterizer->base.point_size_per_vertex);
1867
1868 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
1869 if (vc4->prog.vs)
1870 return;
1871
1872 key = malloc(sizeof(*key));
1873 memcpy(key, &local_key, sizeof(*key));
1874
1875 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1876 vc4_vs_compile(vc4, shader, key);
1877 util_hash_table_set(vc4->vs_cache, key, shader);
1878
1879 vc4->prog.vs = shader;
1880 }
1881
1882 void
1883 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
1884 {
1885 vc4_update_compiled_fs(vc4, prim_mode);
1886 vc4_update_compiled_vs(vc4, prim_mode);
1887 }
1888
1889 static unsigned
1890 fs_cache_hash(void *key)
1891 {
1892 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
1893 }
1894
1895 static unsigned
1896 vs_cache_hash(void *key)
1897 {
1898 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
1899 }
1900
1901 static int
1902 fs_cache_compare(void *key1, void *key2)
1903 {
1904 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
1905 }
1906
1907 static int
1908 vs_cache_compare(void *key1, void *key2)
1909 {
1910 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
1911 }
1912
1913 struct delete_state {
1914 struct vc4_context *vc4;
1915 struct pipe_shader_state *shader_state;
1916 };
1917
1918 static enum pipe_error
1919 fs_delete_from_cache(void *in_key, void *in_value, void *data)
1920 {
1921 struct delete_state *del = data;
1922 struct vc4_fs_key *key = in_key;
1923 struct vc4_compiled_shader *shader = in_value;
1924
1925 if (key->base.shader_state == data) {
1926 util_hash_table_remove(del->vc4->fs_cache, key);
1927 vc4_bo_unreference(&shader->bo);
1928 free(shader);
1929 }
1930
1931 return 0;
1932 }
1933
1934 static enum pipe_error
1935 vs_delete_from_cache(void *in_key, void *in_value, void *data)
1936 {
1937 struct delete_state *del = data;
1938 struct vc4_vs_key *key = in_key;
1939 struct vc4_compiled_shader *shader = in_value;
1940
1941 if (key->base.shader_state == data) {
1942 util_hash_table_remove(del->vc4->vs_cache, key);
1943 vc4_bo_unreference(&shader->bo);
1944 free(shader);
1945 }
1946
1947 return 0;
1948 }
1949
1950 static void
1951 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
1952 {
1953 struct vc4_context *vc4 = vc4_context(pctx);
1954 struct pipe_shader_state *so = hwcso;
1955 struct delete_state del;
1956
1957 del.vc4 = vc4;
1958 del.shader_state = so;
1959 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
1960 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
1961
1962 free((void *)so->tokens);
1963 free(so);
1964 }
1965
1966 static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
1967 {
1968 switch (p_wrap) {
1969 case PIPE_TEX_WRAP_REPEAT:
1970 return 0;
1971 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1972 return 1;
1973 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1974 return 2;
1975 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1976 return 3;
1977 case PIPE_TEX_WRAP_CLAMP:
1978 return (using_nearest ? 1 : 3);
1979 default:
1980 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
1981 assert(!"not reached");
1982 return 0;
1983 }
1984 }
1985
1986 static void
1987 write_texture_p0(struct vc4_context *vc4,
1988 struct vc4_texture_stateobj *texstate,
1989 uint32_t unit)
1990 {
1991 struct pipe_sampler_view *texture = texstate->textures[unit];
1992 struct vc4_resource *rsc = vc4_resource(texture->texture);
1993
1994 bool is_cube = texture->target == PIPE_TEXTURE_CUBE;
1995
1996 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
1997 rsc->slices[0].offset | texture->u.tex.last_level |
1998 is_cube << 9 |
1999 ((rsc->vc4_format & 7) << 4));
2000 }
2001
2002 static void
2003 write_texture_p1(struct vc4_context *vc4,
2004 struct vc4_texture_stateobj *texstate,
2005 uint32_t unit)
2006 {
2007 struct pipe_sampler_view *texture = texstate->textures[unit];
2008 struct vc4_resource *rsc = vc4_resource(texture->texture);
2009 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2010 static const uint8_t minfilter_map[6] = {
2011 2, 4, /* mipfilter nearest */
2012 3, 5, /* mipfilter linear */
2013 1, 0, /* mipfilter none */
2014 };
2015 static const uint32_t magfilter_map[] = {
2016 [PIPE_TEX_FILTER_NEAREST] = 1,
2017 [PIPE_TEX_FILTER_LINEAR] = 0,
2018 };
2019
2020 bool either_nearest =
2021 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
2022 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
2023
2024 cl_u32(&vc4->uniforms,
2025 ((rsc->vc4_format >> 4) << 31) |
2026 (texture->texture->height0 << 20) |
2027 (texture->texture->width0 << 8) |
2028 (magfilter_map[sampler->mag_img_filter] << 7) |
2029 (minfilter_map[sampler->min_mip_filter * 2 +
2030 sampler->min_img_filter] << 4) |
2031 (translate_wrap(sampler->wrap_t, either_nearest) << 2) |
2032 (translate_wrap(sampler->wrap_s, either_nearest) << 0));
2033 }
2034
2035 static void
2036 write_texture_p2(struct vc4_context *vc4,
2037 struct vc4_texture_stateobj *texstate,
2038 uint32_t unit)
2039 {
2040 struct pipe_sampler_view *texture = texstate->textures[unit];
2041 struct vc4_resource *rsc = vc4_resource(texture->texture);
2042
2043 cl_u32(&vc4->uniforms, (1 << 30) | rsc->cube_map_stride);
2044 }
2045
2046
2047 #define SWIZ(x,y,z,w) { \
2048 UTIL_FORMAT_SWIZZLE_##x, \
2049 UTIL_FORMAT_SWIZZLE_##y, \
2050 UTIL_FORMAT_SWIZZLE_##z, \
2051 UTIL_FORMAT_SWIZZLE_##w \
2052 }
2053
2054 static void
2055 write_texture_border_color(struct vc4_context *vc4,
2056 struct vc4_texture_stateobj *texstate,
2057 uint32_t unit)
2058 {
2059 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2060 struct pipe_sampler_view *texture = texstate->textures[unit];
2061 struct vc4_resource *rsc = vc4_resource(texture->texture);
2062 union util_color uc;
2063
2064 const struct util_format_description *tex_format_desc =
2065 util_format_description(texture->format);
2066
2067 float border_color[4];
2068 for (int i = 0; i < 4; i++)
2069 border_color[i] = sampler->border_color.f[i];
2070 if (util_format_is_srgb(texture->format)) {
2071 for (int i = 0; i < 3; i++)
2072 border_color[i] =
2073 util_format_linear_to_srgb_float(border_color[i]);
2074 }
2075
2076 /* Turn the border color into the layout of channels that it would
2077 * have when stored as texture contents.
2078 */
2079 float storage_color[4];
2080 util_format_unswizzle_4f(storage_color,
2081 border_color,
2082 tex_format_desc->swizzle);
2083
2084 /* Now, pack so that when the vc4_format-sampled texture contents are
2085 * replaced with our border color, the vc4_get_format_swizzle()
2086 * swizzling will get the right channels.
2087 */
2088 if (util_format_is_depth_or_stencil(texture->format)) {
2089 uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
2090 sampler->border_color.f[0]) << 8;
2091 } else {
2092 switch (rsc->vc4_format) {
2093 default:
2094 case VC4_TEXTURE_TYPE_RGBA8888:
2095 util_pack_color(storage_color,
2096 PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
2097 break;
2098 case VC4_TEXTURE_TYPE_RGBA4444:
2099 util_pack_color(storage_color,
2100 PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
2101 break;
2102 case VC4_TEXTURE_TYPE_RGB565:
2103 util_pack_color(storage_color,
2104 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
2105 break;
2106 case VC4_TEXTURE_TYPE_ALPHA:
2107 uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
2108 break;
2109 case VC4_TEXTURE_TYPE_LUMALPHA:
2110 uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
2111 (float_to_ubyte(storage_color[0]) << 0));
2112 break;
2113 }
2114 }
2115
2116 cl_u32(&vc4->uniforms, uc.ui[0]);
2117 }
2118
2119 static uint32_t
2120 get_texrect_scale(struct vc4_texture_stateobj *texstate,
2121 enum quniform_contents contents,
2122 uint32_t data)
2123 {
2124 struct pipe_sampler_view *texture = texstate->textures[data];
2125 uint32_t dim;
2126
2127 if (contents == QUNIFORM_TEXRECT_SCALE_X)
2128 dim = texture->texture->width0;
2129 else
2130 dim = texture->texture->height0;
2131
2132 return fui(1.0f / dim);
2133 }
2134
2135 void
2136 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2137 struct vc4_constbuf_stateobj *cb,
2138 struct vc4_texture_stateobj *texstate,
2139 int shader_index)
2140 {
2141 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
2142 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
2143
2144 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
2145
2146 for (int i = 0; i < uinfo->count; i++) {
2147
2148 switch (uinfo->contents[i]) {
2149 case QUNIFORM_CONSTANT:
2150 cl_u32(&vc4->uniforms, uinfo->data[i]);
2151 break;
2152 case QUNIFORM_UNIFORM:
2153 cl_u32(&vc4->uniforms,
2154 gallium_uniforms[uinfo->data[i]]);
2155 break;
2156 case QUNIFORM_VIEWPORT_X_SCALE:
2157 cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
2158 break;
2159 case QUNIFORM_VIEWPORT_Y_SCALE:
2160 cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
2161 break;
2162
2163 case QUNIFORM_VIEWPORT_Z_OFFSET:
2164 cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
2165 break;
2166 case QUNIFORM_VIEWPORT_Z_SCALE:
2167 cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
2168 break;
2169
2170 case QUNIFORM_TEXTURE_CONFIG_P0:
2171 write_texture_p0(vc4, texstate, uinfo->data[i]);
2172 break;
2173
2174 case QUNIFORM_TEXTURE_CONFIG_P1:
2175 write_texture_p1(vc4, texstate, uinfo->data[i]);
2176 break;
2177
2178 case QUNIFORM_TEXTURE_CONFIG_P2:
2179 write_texture_p2(vc4, texstate, uinfo->data[i]);
2180 break;
2181
2182 case QUNIFORM_TEXTURE_BORDER_COLOR:
2183 write_texture_border_color(vc4, texstate, uinfo->data[i]);
2184 break;
2185
2186 case QUNIFORM_TEXRECT_SCALE_X:
2187 case QUNIFORM_TEXRECT_SCALE_Y:
2188 cl_u32(&vc4->uniforms,
2189 get_texrect_scale(texstate,
2190 uinfo->contents[i],
2191 uinfo->data[i]));
2192 break;
2193
2194 case QUNIFORM_BLEND_CONST_COLOR:
2195 cl_f(&vc4->uniforms,
2196 vc4->blend_color.color[uinfo->data[i]]);
2197 break;
2198
2199 case QUNIFORM_STENCIL:
2200 cl_u32(&vc4->uniforms,
2201 vc4->zsa->stencil_uniforms[uinfo->data[i]] |
2202 (uinfo->data[i] <= 1 ?
2203 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
2204 0));
2205 break;
2206
2207 case QUNIFORM_ALPHA_REF:
2208 cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
2209 break;
2210 }
2211 #if 0
2212 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
2213 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
2214 shader, shader_index, i, written_val, uif(written_val));
2215 #endif
2216 }
2217 }
2218
2219 static void
2220 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
2221 {
2222 struct vc4_context *vc4 = vc4_context(pctx);
2223 vc4->prog.bind_fs = hwcso;
2224 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
2225 vc4->dirty |= VC4_DIRTY_PROG;
2226 }
2227
2228 static void
2229 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
2230 {
2231 struct vc4_context *vc4 = vc4_context(pctx);
2232 vc4->prog.bind_vs = hwcso;
2233 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
2234 vc4->dirty |= VC4_DIRTY_PROG;
2235 }
2236
2237 void
2238 vc4_program_init(struct pipe_context *pctx)
2239 {
2240 struct vc4_context *vc4 = vc4_context(pctx);
2241
2242 pctx->create_vs_state = vc4_shader_state_create;
2243 pctx->delete_vs_state = vc4_shader_state_delete;
2244
2245 pctx->create_fs_state = vc4_shader_state_create;
2246 pctx->delete_fs_state = vc4_shader_state_delete;
2247
2248 pctx->bind_fs_state = vc4_fp_state_bind;
2249 pctx->bind_vs_state = vc4_vp_state_bind;
2250
2251 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
2252 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
2253 }