vc4: Make the last static array in vc4_program.c dynamically sized.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <inttypes.h>
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash_table.h"
29 #include "util/u_hash.h"
30 #include "util/u_memory.h"
31 #include "util/u_pack_color.h"
32 #include "util/ralloc.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_info.h"
35
36 #include "vc4_context.h"
37 #include "vc4_qpu.h"
38 #include "vc4_qir.h"
39 #ifdef USE_VC4_SIMULATOR
40 #include "simpenrose/simpenrose.h"
41 #endif
42
43 struct vc4_key {
44 struct pipe_shader_state *shader_state;
45 struct {
46 enum pipe_format format;
47 unsigned compare_mode:1;
48 unsigned compare_func:3;
49 unsigned wrap_s:3;
50 unsigned wrap_t:3;
51 uint8_t swizzle[4];
52 } tex[VC4_MAX_TEXTURE_SAMPLERS];
53 };
54
55 struct vc4_fs_key {
56 struct vc4_key base;
57 enum pipe_format color_format;
58 bool depth_enabled;
59 bool stencil_enabled;
60 bool stencil_twoside;
61 bool stencil_full_writemasks;
62 bool is_points;
63 bool is_lines;
64 bool alpha_test;
65 bool point_coord_upper_left;
66 uint8_t alpha_test_func;
67 uint32_t point_sprite_mask;
68
69 struct pipe_rt_blend_state blend;
70 };
71
72 struct vc4_vs_key {
73 struct vc4_key base;
74 enum pipe_format attr_formats[8];
75 bool per_vertex_point_size;
76 };
77
78 static void
79 resize_qreg_array(struct vc4_compile *c,
80 struct qreg **regs,
81 uint32_t *size,
82 uint32_t decl_size)
83 {
84 if (*size >= decl_size)
85 return;
86
87 uint32_t old_size = *size;
88 *size = MAX2(*size * 2, decl_size);
89 *regs = reralloc(c, *regs, struct qreg, *size);
90 if (!*regs) {
91 fprintf(stderr, "Malloc failure\n");
92 abort();
93 }
94
95 for (uint32_t i = old_size; i < *size; i++)
96 (*regs)[i] = c->undef;
97 }
98
99 static struct qreg
100 add_uniform(struct vc4_compile *c,
101 enum quniform_contents contents,
102 uint32_t data)
103 {
104 uint32_t uniform = c->num_uniforms++;
105 struct qreg u = { QFILE_UNIF, uniform };
106
107 if (uniform >= c->uniform_array_size) {
108 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
109 c->uniform_array_size * 2);
110
111 c->uniform_data = reralloc(c, c->uniform_data,
112 uint32_t,
113 c->uniform_array_size);
114 c->uniform_contents = reralloc(c, c->uniform_contents,
115 enum quniform_contents,
116 c->uniform_array_size);
117 }
118
119 c->uniform_contents[uniform] = contents;
120 c->uniform_data[uniform] = data;
121
122 return u;
123 }
124
125 static struct qreg
126 get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
127 uint32_t data)
128 {
129 struct qreg u = add_uniform(c, contents, data);
130 struct qreg t = qir_MOV(c, u);
131 return t;
132 }
133
134 static struct qreg
135 qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
136 {
137 return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
138 }
139
140 static struct qreg
141 qir_uniform_f(struct vc4_compile *c, float f)
142 {
143 return qir_uniform_ui(c, fui(f));
144 }
145
146 static struct qreg
147 get_src(struct vc4_compile *c, unsigned tgsi_op,
148 struct tgsi_src_register *src, int i)
149 {
150 struct qreg r = c->undef;
151
152 uint32_t s = i;
153 switch (i) {
154 case TGSI_SWIZZLE_X:
155 s = src->SwizzleX;
156 break;
157 case TGSI_SWIZZLE_Y:
158 s = src->SwizzleY;
159 break;
160 case TGSI_SWIZZLE_Z:
161 s = src->SwizzleZ;
162 break;
163 case TGSI_SWIZZLE_W:
164 s = src->SwizzleW;
165 break;
166 default:
167 abort();
168 }
169
170 assert(!src->Indirect);
171
172 switch (src->File) {
173 case TGSI_FILE_NULL:
174 return r;
175 case TGSI_FILE_TEMPORARY:
176 r = c->temps[src->Index * 4 + s];
177 break;
178 case TGSI_FILE_IMMEDIATE:
179 r = c->consts[src->Index * 4 + s];
180 break;
181 case TGSI_FILE_CONSTANT:
182 r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
183 src->Index * 4 + s);
184 break;
185 case TGSI_FILE_INPUT:
186 r = c->inputs[src->Index * 4 + s];
187 break;
188 case TGSI_FILE_SAMPLER:
189 case TGSI_FILE_SAMPLER_VIEW:
190 r = c->undef;
191 break;
192 default:
193 fprintf(stderr, "unknown src file %d\n", src->File);
194 abort();
195 }
196
197 if (src->Absolute)
198 r = qir_FMAXABS(c, r, r);
199
200 if (src->Negate) {
201 switch (tgsi_opcode_infer_src_type(tgsi_op)) {
202 case TGSI_TYPE_SIGNED:
203 case TGSI_TYPE_UNSIGNED:
204 r = qir_SUB(c, qir_uniform_ui(c, 0), r);
205 break;
206 default:
207 r = qir_FSUB(c, qir_uniform_f(c, 0.0), r);
208 break;
209 }
210 }
211
212 return r;
213 };
214
215
216 static void
217 update_dst(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst,
218 int i, struct qreg val)
219 {
220 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
221
222 assert(!tgsi_dst->Indirect);
223
224 switch (tgsi_dst->File) {
225 case TGSI_FILE_TEMPORARY:
226 c->temps[tgsi_dst->Index * 4 + i] = val;
227 break;
228 case TGSI_FILE_OUTPUT:
229 c->outputs[tgsi_dst->Index * 4 + i] = val;
230 c->num_outputs = MAX2(c->num_outputs,
231 tgsi_dst->Index * 4 + i + 1);
232 break;
233 default:
234 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
235 abort();
236 }
237 };
238
239 static struct qreg
240 get_swizzled_channel(struct vc4_compile *c,
241 struct qreg *srcs, int swiz)
242 {
243 switch (swiz) {
244 default:
245 case UTIL_FORMAT_SWIZZLE_NONE:
246 fprintf(stderr, "warning: unknown swizzle\n");
247 /* FALLTHROUGH */
248 case UTIL_FORMAT_SWIZZLE_0:
249 return qir_uniform_f(c, 0.0);
250 case UTIL_FORMAT_SWIZZLE_1:
251 return qir_uniform_f(c, 1.0);
252 case UTIL_FORMAT_SWIZZLE_X:
253 case UTIL_FORMAT_SWIZZLE_Y:
254 case UTIL_FORMAT_SWIZZLE_Z:
255 case UTIL_FORMAT_SWIZZLE_W:
256 return srcs[swiz];
257 }
258 }
259
260 static struct qreg
261 tgsi_to_qir_alu(struct vc4_compile *c,
262 struct tgsi_full_instruction *tgsi_inst,
263 enum qop op, struct qreg *src, int i)
264 {
265 struct qreg dst = qir_get_temp(c);
266 qir_emit(c, qir_inst4(op, dst,
267 src[0 * 4 + i],
268 src[1 * 4 + i],
269 src[2 * 4 + i],
270 c->undef));
271 return dst;
272 }
273
274 static struct qreg
275 tgsi_to_qir_umul(struct vc4_compile *c,
276 struct tgsi_full_instruction *tgsi_inst,
277 enum qop op, struct qreg *src, int i)
278 {
279 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
280 qir_uniform_ui(c, 16));
281 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
282 qir_uniform_ui(c, 0xffff));
283 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
284 qir_uniform_ui(c, 16));
285 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
286 qir_uniform_ui(c, 0xffff));
287
288 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
289 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
290 struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
291
292 return qir_ADD(c, lolo, qir_SHL(c,
293 qir_ADD(c, hilo, lohi),
294 qir_uniform_ui(c, 16)));
295 }
296
297 static struct qreg
298 tgsi_to_qir_idiv(struct vc4_compile *c,
299 struct tgsi_full_instruction *tgsi_inst,
300 enum qop op, struct qreg *src, int i)
301 {
302 return qir_FTOI(c, qir_FMUL(c,
303 qir_ITOF(c, src[0 * 4 + i]),
304 qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
305 }
306
307 static struct qreg
308 tgsi_to_qir_ineg(struct vc4_compile *c,
309 struct tgsi_full_instruction *tgsi_inst,
310 enum qop op, struct qreg *src, int i)
311 {
312 return qir_SUB(c, qir_uniform_ui(c, 0), src[0 * 4 + i]);
313 }
314
315 static struct qreg
316 tgsi_to_qir_seq(struct vc4_compile *c,
317 struct tgsi_full_instruction *tgsi_inst,
318 enum qop op, struct qreg *src, int i)
319 {
320 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
321 return qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
322 }
323
324 static struct qreg
325 tgsi_to_qir_sne(struct vc4_compile *c,
326 struct tgsi_full_instruction *tgsi_inst,
327 enum qop op, struct qreg *src, int i)
328 {
329 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
330 return qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
331 }
332
333 static struct qreg
334 tgsi_to_qir_slt(struct vc4_compile *c,
335 struct tgsi_full_instruction *tgsi_inst,
336 enum qop op, struct qreg *src, int i)
337 {
338 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
339 return qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
340 }
341
342 static struct qreg
343 tgsi_to_qir_sge(struct vc4_compile *c,
344 struct tgsi_full_instruction *tgsi_inst,
345 enum qop op, struct qreg *src, int i)
346 {
347 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
348 return qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
349 }
350
351 static struct qreg
352 tgsi_to_qir_fseq(struct vc4_compile *c,
353 struct tgsi_full_instruction *tgsi_inst,
354 enum qop op, struct qreg *src, int i)
355 {
356 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
357 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
358 }
359
360 static struct qreg
361 tgsi_to_qir_fsne(struct vc4_compile *c,
362 struct tgsi_full_instruction *tgsi_inst,
363 enum qop op, struct qreg *src, int i)
364 {
365 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
366 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
367 }
368
369 static struct qreg
370 tgsi_to_qir_fslt(struct vc4_compile *c,
371 struct tgsi_full_instruction *tgsi_inst,
372 enum qop op, struct qreg *src, int i)
373 {
374 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
375 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
376 }
377
378 static struct qreg
379 tgsi_to_qir_fsge(struct vc4_compile *c,
380 struct tgsi_full_instruction *tgsi_inst,
381 enum qop op, struct qreg *src, int i)
382 {
383 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
384 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
385 }
386
387 static struct qreg
388 tgsi_to_qir_useq(struct vc4_compile *c,
389 struct tgsi_full_instruction *tgsi_inst,
390 enum qop op, struct qreg *src, int i)
391 {
392 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
393 return qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
394 }
395
396 static struct qreg
397 tgsi_to_qir_usne(struct vc4_compile *c,
398 struct tgsi_full_instruction *tgsi_inst,
399 enum qop op, struct qreg *src, int i)
400 {
401 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
402 return qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
403 }
404
405 static struct qreg
406 tgsi_to_qir_islt(struct vc4_compile *c,
407 struct tgsi_full_instruction *tgsi_inst,
408 enum qop op, struct qreg *src, int i)
409 {
410 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
411 return qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
412 }
413
414 static struct qreg
415 tgsi_to_qir_isge(struct vc4_compile *c,
416 struct tgsi_full_instruction *tgsi_inst,
417 enum qop op, struct qreg *src, int i)
418 {
419 qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
420 return qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
421 }
422
423 static struct qreg
424 tgsi_to_qir_cmp(struct vc4_compile *c,
425 struct tgsi_full_instruction *tgsi_inst,
426 enum qop op, struct qreg *src, int i)
427 {
428 qir_SF(c, src[0 * 4 + i]);
429 return qir_SEL_X_Y_NS(c,
430 src[1 * 4 + i],
431 src[2 * 4 + i]);
432 }
433
434 static struct qreg
435 tgsi_to_qir_mad(struct vc4_compile *c,
436 struct tgsi_full_instruction *tgsi_inst,
437 enum qop op, struct qreg *src, int i)
438 {
439 return qir_FADD(c,
440 qir_FMUL(c,
441 src[0 * 4 + i],
442 src[1 * 4 + i]),
443 src[2 * 4 + i]);
444 }
445
446 static struct qreg
447 tgsi_to_qir_lit(struct vc4_compile *c,
448 struct tgsi_full_instruction *tgsi_inst,
449 enum qop op, struct qreg *src, int i)
450 {
451 struct qreg x = src[0 * 4 + 0];
452 struct qreg y = src[0 * 4 + 1];
453 struct qreg w = src[0 * 4 + 3];
454
455 switch (i) {
456 case 0:
457 case 3:
458 return qir_uniform_f(c, 1.0);
459 case 1:
460 return qir_FMAX(c, src[0 * 4 + 0], qir_uniform_f(c, 0.0));
461 case 2: {
462 struct qreg zero = qir_uniform_f(c, 0.0);
463
464 qir_SF(c, x);
465 /* XXX: Clamp w to -128..128 */
466 return qir_SEL_X_0_NC(c,
467 qir_EXP2(c, qir_FMUL(c,
468 w,
469 qir_LOG2(c,
470 qir_FMAX(c,
471 y,
472 zero)))));
473 }
474 default:
475 assert(!"not reached");
476 return c->undef;
477 }
478 }
479
480 static struct qreg
481 tgsi_to_qir_lrp(struct vc4_compile *c,
482 struct tgsi_full_instruction *tgsi_inst,
483 enum qop op, struct qreg *src, int i)
484 {
485 struct qreg src0 = src[0 * 4 + i];
486 struct qreg src1 = src[1 * 4 + i];
487 struct qreg src2 = src[2 * 4 + i];
488
489 /* LRP is:
490 * src0 * src1 + (1 - src0) * src2.
491 * -> src0 * src1 + src2 - src0 * src2
492 * -> src2 + src0 * (src1 - src2)
493 */
494 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
495
496 }
497
498 static void
499 tgsi_to_qir_tex(struct vc4_compile *c,
500 struct tgsi_full_instruction *tgsi_inst,
501 enum qop op, struct qreg *src)
502 {
503 assert(!tgsi_inst->Instruction.Saturate);
504
505 struct qreg s = src[0 * 4 + 0];
506 struct qreg t = src[0 * 4 + 1];
507 struct qreg r = src[0 * 4 + 2];
508 uint32_t unit = tgsi_inst->Src[1].Register.Index;
509
510 struct qreg proj = c->undef;
511 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
512 proj = qir_RCP(c, src[0 * 4 + 3]);
513 s = qir_FMUL(c, s, proj);
514 t = qir_FMUL(c, t, proj);
515 }
516
517 struct qreg texture_u[] = {
518 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
519 add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
520 add_uniform(c, QUNIFORM_CONSTANT, 0),
521 add_uniform(c, QUNIFORM_CONSTANT, 0),
522 };
523 uint32_t next_texture_u = 0;
524
525 /* There is no native support for GL texture rectangle coordinates, so
526 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
527 * 1]).
528 */
529 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
530 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
531 s = qir_FMUL(c, s,
532 get_temp_for_uniform(c,
533 QUNIFORM_TEXRECT_SCALE_X,
534 unit));
535 t = qir_FMUL(c, t,
536 get_temp_for_uniform(c,
537 QUNIFORM_TEXRECT_SCALE_Y,
538 unit));
539 }
540
541 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
542 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
543 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
544 struct qreg rcp_ma = qir_RCP(c, ma);
545 s = qir_FMUL(c, s, rcp_ma);
546 t = qir_FMUL(c, t, rcp_ma);
547 r = qir_FMUL(c, r, rcp_ma);
548
549 texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, unit);
550
551 qir_TEX_R(c, r, texture_u[next_texture_u++]);
552 } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
553 c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
554 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
555 c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
556 qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
557 texture_u[next_texture_u++]);
558 }
559
560 if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
561 s = qir_FMIN(c, qir_FMAX(c, s, qir_uniform_f(c, 0.0)),
562 qir_uniform_f(c, 1.0));
563 }
564
565 if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
566 t = qir_FMIN(c, qir_FMAX(c, t, qir_uniform_f(c, 0.0)),
567 qir_uniform_f(c, 1.0));
568 }
569
570 qir_TEX_T(c, t, texture_u[next_texture_u++]);
571
572 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB)
573 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
574
575 qir_TEX_S(c, s, texture_u[next_texture_u++]);
576
577 c->num_texture_samples++;
578 struct qreg r4 = qir_TEX_RESULT(c);
579
580 enum pipe_format format = c->key->tex[unit].format;
581
582 struct qreg unpacked[4];
583 if (util_format_is_depth_or_stencil(format)) {
584 struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
585 qir_uniform_ui(c, 8)));
586 struct qreg normalized = qir_FMUL(c, depthf,
587 qir_uniform_f(c, 1.0f/0xffffff));
588
589 struct qreg depth_output;
590
591 struct qreg one = qir_uniform_f(c, 1.0f);
592 if (c->key->tex[unit].compare_mode) {
593 struct qreg compare = src[0 * 4 + 2];
594
595 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP)
596 compare = qir_FMUL(c, compare, proj);
597
598 switch (c->key->tex[unit].compare_func) {
599 case PIPE_FUNC_NEVER:
600 depth_output = qir_uniform_f(c, 0.0f);
601 break;
602 case PIPE_FUNC_ALWAYS:
603 depth_output = one;
604 break;
605 case PIPE_FUNC_EQUAL:
606 qir_SF(c, qir_FSUB(c, compare, normalized));
607 depth_output = qir_SEL_X_0_ZS(c, one);
608 break;
609 case PIPE_FUNC_NOTEQUAL:
610 qir_SF(c, qir_FSUB(c, compare, normalized));
611 depth_output = qir_SEL_X_0_ZC(c, one);
612 break;
613 case PIPE_FUNC_GREATER:
614 qir_SF(c, qir_FSUB(c, compare, normalized));
615 depth_output = qir_SEL_X_0_NC(c, one);
616 break;
617 case PIPE_FUNC_GEQUAL:
618 qir_SF(c, qir_FSUB(c, normalized, compare));
619 depth_output = qir_SEL_X_0_NS(c, one);
620 break;
621 case PIPE_FUNC_LESS:
622 qir_SF(c, qir_FSUB(c, compare, normalized));
623 depth_output = qir_SEL_X_0_NS(c, one);
624 break;
625 case PIPE_FUNC_LEQUAL:
626 qir_SF(c, qir_FSUB(c, normalized, compare));
627 depth_output = qir_SEL_X_0_NC(c, one);
628 break;
629 }
630 } else {
631 depth_output = normalized;
632 }
633
634 for (int i = 0; i < 4; i++)
635 unpacked[i] = depth_output;
636 } else {
637 for (int i = 0; i < 4; i++)
638 unpacked[i] = qir_R4_UNPACK(c, r4, i);
639 }
640
641 const uint8_t *format_swiz = vc4_get_format_swizzle(format);
642 uint8_t swiz[4];
643 util_format_compose_swizzles(format_swiz, c->key->tex[unit].swizzle, swiz);
644 for (int i = 0; i < 4; i++) {
645 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
646 continue;
647
648 update_dst(c, tgsi_inst, i,
649 get_swizzled_channel(c, unpacked, swiz[i]));
650 }
651 }
652
653 static struct qreg
654 tgsi_to_qir_pow(struct vc4_compile *c,
655 struct tgsi_full_instruction *tgsi_inst,
656 enum qop op, struct qreg *src, int i)
657 {
658 /* Note that this instruction replicates its result from the x channel
659 */
660 return qir_EXP2(c, qir_FMUL(c,
661 src[1 * 4 + 0],
662 qir_LOG2(c, src[0 * 4 + 0])));
663 }
664
665 static struct qreg
666 tgsi_to_qir_trunc(struct vc4_compile *c,
667 struct tgsi_full_instruction *tgsi_inst,
668 enum qop op, struct qreg *src, int i)
669 {
670 return qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
671 }
672
673 /**
674 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
675 * to zero).
676 */
677 static struct qreg
678 tgsi_to_qir_frc(struct vc4_compile *c,
679 struct tgsi_full_instruction *tgsi_inst,
680 enum qop op, struct qreg *src, int i)
681 {
682 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
683 struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
684 qir_SF(c, diff);
685 return qir_SEL_X_Y_NS(c,
686 qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
687 diff);
688 }
689
690 /**
691 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
692 * zero).
693 */
694 static struct qreg
695 tgsi_to_qir_flr(struct vc4_compile *c,
696 struct tgsi_full_instruction *tgsi_inst,
697 enum qop op, struct qreg *src, int i)
698 {
699 struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
700
701 /* This will be < 0 if we truncated and the truncation was of a value
702 * that was < 0 in the first place.
703 */
704 qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
705
706 return qir_SEL_X_Y_NS(c,
707 qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
708 trunc);
709 }
710
711 static struct qreg
712 tgsi_to_qir_dp(struct vc4_compile *c,
713 struct tgsi_full_instruction *tgsi_inst,
714 int num, struct qreg *src, int i)
715 {
716 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
717 for (int j = 1; j < num; j++) {
718 sum = qir_FADD(c, sum, qir_FMUL(c,
719 src[0 * 4 + j],
720 src[1 * 4 + j]));
721 }
722 return sum;
723 }
724
725 static struct qreg
726 tgsi_to_qir_dp2(struct vc4_compile *c,
727 struct tgsi_full_instruction *tgsi_inst,
728 enum qop op, struct qreg *src, int i)
729 {
730 return tgsi_to_qir_dp(c, tgsi_inst, 2, src, i);
731 }
732
733 static struct qreg
734 tgsi_to_qir_dp3(struct vc4_compile *c,
735 struct tgsi_full_instruction *tgsi_inst,
736 enum qop op, struct qreg *src, int i)
737 {
738 return tgsi_to_qir_dp(c, tgsi_inst, 3, src, i);
739 }
740
741 static struct qreg
742 tgsi_to_qir_dp4(struct vc4_compile *c,
743 struct tgsi_full_instruction *tgsi_inst,
744 enum qop op, struct qreg *src, int i)
745 {
746 return tgsi_to_qir_dp(c, tgsi_inst, 4, src, i);
747 }
748
749 static struct qreg
750 tgsi_to_qir_abs(struct vc4_compile *c,
751 struct tgsi_full_instruction *tgsi_inst,
752 enum qop op, struct qreg *src, int i)
753 {
754 struct qreg arg = src[0 * 4 + i];
755 return qir_FMAXABS(c, arg, arg);
756 }
757
758 /* Note that this instruction replicates its result from the x channel */
759 static struct qreg
760 tgsi_to_qir_sin(struct vc4_compile *c,
761 struct tgsi_full_instruction *tgsi_inst,
762 enum qop op, struct qreg *src, int i)
763 {
764 float coeff[] = {
765 2.0 * M_PI,
766 -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
767 pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
768 -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
769 };
770
771 struct qreg scaled_x =
772 qir_FMUL(c,
773 src[0 * 4 + 0],
774 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
775
776
777 struct qreg x = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
778 struct qreg x2 = qir_FMUL(c, x, x);
779 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
780 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
781 x = qir_FMUL(c, x, x2);
782 sum = qir_FADD(c,
783 sum,
784 qir_FMUL(c,
785 x,
786 qir_uniform_f(c, coeff[i])));
787 }
788 return sum;
789 }
790
791 /* Note that this instruction replicates its result from the x channel */
792 static struct qreg
793 tgsi_to_qir_cos(struct vc4_compile *c,
794 struct tgsi_full_instruction *tgsi_inst,
795 enum qop op, struct qreg *src, int i)
796 {
797 float coeff[] = {
798 1.0f,
799 -pow(2.0 * M_PI, 2) / (2 * 1),
800 pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
801 -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
802 };
803
804 struct qreg scaled_x =
805 qir_FMUL(c, src[0 * 4 + 0],
806 qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
807 struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, &scaled_x, 0);
808
809 struct qreg sum = qir_uniform_f(c, coeff[0]);
810 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
811 struct qreg x = x2; /* Current x^2, x^4, or x^6 */
812 for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
813 if (i != 1)
814 x = qir_FMUL(c, x, x2);
815
816 struct qreg mul = qir_FMUL(c,
817 x,
818 qir_uniform_f(c, coeff[i]));
819 if (i == 0)
820 sum = mul;
821 else
822 sum = qir_FADD(c, sum, mul);
823 }
824 return sum;
825 }
826
827 static struct qreg
828 tgsi_to_qir_clamp(struct vc4_compile *c,
829 struct tgsi_full_instruction *tgsi_inst,
830 enum qop op, struct qreg *src, int i)
831 {
832 return qir_FMAX(c, qir_FMIN(c,
833 src[0 * 4 + i],
834 src[2 * 4 + i]),
835 src[1 * 4 + i]);
836 }
837
838 static void
839 emit_vertex_input(struct vc4_compile *c, int attr)
840 {
841 enum pipe_format format = c->vs_key->attr_formats[attr];
842 struct qreg vpm_reads[4];
843
844 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
845 * time, so we always read 4 32-bit VPM entries.
846 */
847 for (int i = 0; i < 4; i++) {
848 vpm_reads[i] = qir_get_temp(c);
849 qir_emit(c, qir_inst(QOP_VPM_READ,
850 vpm_reads[i],
851 c->undef,
852 c->undef));
853 c->num_inputs++;
854 }
855
856 bool format_warned = false;
857 const struct util_format_description *desc =
858 util_format_description(format);
859
860 for (int i = 0; i < 4; i++) {
861 uint8_t swiz = desc->swizzle[i];
862 struct qreg result;
863
864 if (swiz > UTIL_FORMAT_SWIZZLE_W)
865 result = get_swizzled_channel(c, vpm_reads, swiz);
866 else if (desc->channel[swiz].size == 32 &&
867 desc->channel[swiz].type == UTIL_FORMAT_TYPE_FLOAT) {
868 result = get_swizzled_channel(c, vpm_reads, swiz);
869 } else if (desc->channel[swiz].size == 8 &&
870 (desc->channel[swiz].type == UTIL_FORMAT_TYPE_UNSIGNED ||
871 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) &&
872 desc->channel[swiz].normalized) {
873 struct qreg vpm = vpm_reads[0];
874 if (desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED)
875 vpm = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
876 result = qir_UNPACK_8(c, vpm, swiz);
877 } else {
878 if (!format_warned) {
879 fprintf(stderr,
880 "vtx element %d unsupported type: %s\n",
881 attr, util_format_name(format));
882 format_warned = true;
883 }
884 result = qir_uniform_f(c, 0.0);
885 }
886
887 if (desc->channel[swiz].normalized &&
888 desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) {
889 result = qir_FSUB(c,
890 qir_FMUL(c,
891 result,
892 qir_uniform_f(c, 2.0)),
893 qir_uniform_f(c, 1.0));
894 }
895
896 c->inputs[attr * 4 + i] = result;
897 }
898 }
899
900 static void
901 tgsi_to_qir_kill_if(struct vc4_compile *c, struct qreg *src, int i)
902 {
903 if (c->discard.file == QFILE_NULL)
904 c->discard = qir_uniform_f(c, 0.0);
905 qir_SF(c, src[0 * 4 + i]);
906 c->discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
907 c->discard);
908 }
909
910 static void
911 emit_fragcoord_input(struct vc4_compile *c, int attr)
912 {
913 c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
914 c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
915 c->inputs[attr * 4 + 2] =
916 qir_FMUL(c,
917 qir_ITOF(c, qir_FRAG_Z(c)),
918 qir_uniform_f(c, 1.0 / 0xffffff));
919 c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
920 }
921
922 static void
923 emit_point_coord_input(struct vc4_compile *c, int attr)
924 {
925 if (c->point_x.file == QFILE_NULL) {
926 c->point_x = qir_uniform_f(c, 0.0);
927 c->point_y = qir_uniform_f(c, 0.0);
928 }
929
930 c->inputs[attr * 4 + 0] = c->point_x;
931 if (c->fs_key->point_coord_upper_left) {
932 c->inputs[attr * 4 + 1] = qir_FSUB(c,
933 qir_uniform_f(c, 1.0),
934 c->point_y);
935 } else {
936 c->inputs[attr * 4 + 1] = c->point_y;
937 }
938 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
939 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
940 }
941
942 static struct qreg
943 emit_fragment_varying(struct vc4_compile *c, int index)
944 {
945 struct qreg vary = {
946 QFILE_VARY,
947 index
948 };
949
950 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
951 }
952
953 static void
954 emit_fragment_input(struct vc4_compile *c, int attr,
955 struct tgsi_full_declaration *decl)
956 {
957 for (int i = 0; i < 4; i++) {
958 c->inputs[attr * 4 + i] =
959 emit_fragment_varying(c, attr * 4 + i);
960 c->num_inputs++;
961
962 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR ||
963 decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR)
964 c->color_inputs |= 1 << i;
965 }
966 }
967
968 static void
969 emit_face_input(struct vc4_compile *c, int attr)
970 {
971 c->inputs[attr * 4 + 0] = qir_FSUB(c,
972 qir_uniform_f(c, 1.0),
973 qir_FMUL(c,
974 qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
975 qir_uniform_f(c, 2.0)));
976 c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
977 c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
978 c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
979 }
980
981 static void
982 emit_tgsi_declaration(struct vc4_compile *c,
983 struct tgsi_full_declaration *decl)
984 {
985 switch (decl->Declaration.File) {
986 case TGSI_FILE_TEMPORARY:
987 resize_qreg_array(c, &c->temps, &c->temps_array_size,
988 (decl->Range.Last + 1) * 4);
989 break;
990
991 case TGSI_FILE_INPUT:
992 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
993 (decl->Range.Last + 1) * 4);
994
995 for (int i = decl->Range.First;
996 i <= decl->Range.Last;
997 i++) {
998 if (c->stage == QSTAGE_FRAG) {
999 if (decl->Semantic.Name ==
1000 TGSI_SEMANTIC_POSITION) {
1001 emit_fragcoord_input(c, i);
1002 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1003 emit_face_input(c, i);
1004 } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
1005 (c->fs_key->point_sprite_mask &
1006 (1 << decl->Semantic.Index))) {
1007 emit_point_coord_input(c, i);
1008 } else {
1009 emit_fragment_input(c, i, decl);
1010 }
1011 } else {
1012 emit_vertex_input(c, i);
1013 }
1014 }
1015 break;
1016
1017 case TGSI_FILE_OUTPUT:
1018 resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
1019 (decl->Range.Last + 1) * 4);
1020
1021 switch (decl->Semantic.Name) {
1022 case TGSI_SEMANTIC_POSITION:
1023 c->output_position_index = decl->Range.First * 4;
1024 break;
1025 case TGSI_SEMANTIC_COLOR:
1026 c->output_color_index = decl->Range.First * 4;
1027 break;
1028 case TGSI_SEMANTIC_PSIZE:
1029 c->output_point_size_index = decl->Range.First * 4;
1030 break;
1031 }
1032
1033 break;
1034 }
1035 }
1036
1037 static void
1038 emit_tgsi_instruction(struct vc4_compile *c,
1039 struct tgsi_full_instruction *tgsi_inst)
1040 {
1041 struct {
1042 enum qop op;
1043 struct qreg (*func)(struct vc4_compile *c,
1044 struct tgsi_full_instruction *tgsi_inst,
1045 enum qop op,
1046 struct qreg *src, int i);
1047 } op_trans[] = {
1048 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
1049 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
1050 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
1051 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
1052 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
1053 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
1054 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
1055 [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
1056 [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
1057 [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
1058 [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
1059 [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
1060 [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
1061 [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
1062 [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
1063 [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
1064 [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
1065 [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
1066 [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
1067
1068 [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
1069 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
1070 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
1071
1072 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
1073 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
1074 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
1075 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
1076 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
1077 [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
1078 [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
1079 [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
1080 [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
1081 [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
1082 [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
1083 [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
1084 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
1085
1086 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
1087 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
1088 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
1089 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
1090 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
1091 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
1092 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
1093 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
1094 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
1095 [TGSI_OPCODE_LIT] = { 0, tgsi_to_qir_lit },
1096 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
1097 [TGSI_OPCODE_POW] = { 0, tgsi_to_qir_pow },
1098 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
1099 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
1100 [TGSI_OPCODE_FLR] = { 0, tgsi_to_qir_flr },
1101 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
1102 [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
1103 [TGSI_OPCODE_CLAMP] = { 0, tgsi_to_qir_clamp },
1104 };
1105 static int asdf = 0;
1106 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
1107
1108 if (tgsi_op == TGSI_OPCODE_END)
1109 return;
1110
1111 struct qreg src_regs[12];
1112 for (int s = 0; s < 3; s++) {
1113 for (int i = 0; i < 4; i++) {
1114 src_regs[4 * s + i] =
1115 get_src(c, tgsi_inst->Instruction.Opcode,
1116 &tgsi_inst->Src[s].Register, i);
1117 }
1118 }
1119
1120 switch (tgsi_op) {
1121 case TGSI_OPCODE_TEX:
1122 case TGSI_OPCODE_TXP:
1123 case TGSI_OPCODE_TXB:
1124 tgsi_to_qir_tex(c, tgsi_inst,
1125 op_trans[tgsi_op].op, src_regs);
1126 return;
1127 case TGSI_OPCODE_KILL:
1128 c->discard = qir_uniform_f(c, 1.0);
1129 return;
1130 case TGSI_OPCODE_KILL_IF:
1131 for (int i = 0; i < 4; i++)
1132 tgsi_to_qir_kill_if(c, src_regs, i);
1133 return;
1134 default:
1135 break;
1136 }
1137
1138 if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
1139 fprintf(stderr, "unknown tgsi inst: ");
1140 tgsi_dump_instruction(tgsi_inst, asdf++);
1141 fprintf(stderr, "\n");
1142 abort();
1143 }
1144
1145 for (int i = 0; i < 4; i++) {
1146 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
1147 continue;
1148
1149 struct qreg result;
1150
1151 result = op_trans[tgsi_op].func(c, tgsi_inst,
1152 op_trans[tgsi_op].op,
1153 src_regs, i);
1154
1155 if (tgsi_inst->Instruction.Saturate) {
1156 float low = (tgsi_inst->Instruction.Saturate ==
1157 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
1158 result = qir_FMAX(c,
1159 qir_FMIN(c,
1160 result,
1161 qir_uniform_f(c, 1.0)),
1162 qir_uniform_f(c, low));
1163 }
1164
1165 update_dst(c, tgsi_inst, i, result);
1166 }
1167 }
1168
1169 static void
1170 parse_tgsi_immediate(struct vc4_compile *c, struct tgsi_full_immediate *imm)
1171 {
1172 for (int i = 0; i < 4; i++) {
1173 unsigned n = c->num_consts++;
1174 resize_qreg_array(c, &c->consts, &c->consts_array_size, n + 1);
1175 c->consts[n] = qir_uniform_ui(c, imm->u[i].Uint);
1176 }
1177 }
1178
1179 static struct qreg
1180 vc4_blend_channel(struct vc4_compile *c,
1181 struct qreg *dst,
1182 struct qreg *src,
1183 struct qreg val,
1184 unsigned factor,
1185 int channel)
1186 {
1187 switch(factor) {
1188 case PIPE_BLENDFACTOR_ONE:
1189 return val;
1190 case PIPE_BLENDFACTOR_SRC_COLOR:
1191 return qir_FMUL(c, val, src[channel]);
1192 case PIPE_BLENDFACTOR_SRC_ALPHA:
1193 return qir_FMUL(c, val, src[3]);
1194 case PIPE_BLENDFACTOR_DST_ALPHA:
1195 return qir_FMUL(c, val, dst[3]);
1196 case PIPE_BLENDFACTOR_DST_COLOR:
1197 return qir_FMUL(c, val, dst[channel]);
1198 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1199 return qir_FMIN(c, src[3], qir_FSUB(c,
1200 qir_uniform_f(c, 1.0),
1201 dst[3]));
1202 case PIPE_BLENDFACTOR_CONST_COLOR:
1203 return qir_FMUL(c, val,
1204 get_temp_for_uniform(c,
1205 QUNIFORM_BLEND_CONST_COLOR,
1206 channel));
1207 case PIPE_BLENDFACTOR_CONST_ALPHA:
1208 return qir_FMUL(c, val,
1209 get_temp_for_uniform(c,
1210 QUNIFORM_BLEND_CONST_COLOR,
1211 3));
1212 case PIPE_BLENDFACTOR_ZERO:
1213 return qir_uniform_f(c, 0.0);
1214 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
1215 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1216 src[channel]));
1217 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
1218 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1219 src[3]));
1220 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1221 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1222 dst[3]));
1223 case PIPE_BLENDFACTOR_INV_DST_COLOR:
1224 return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
1225 dst[channel]));
1226 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
1227 return qir_FMUL(c, val,
1228 qir_FSUB(c, qir_uniform_f(c, 1.0),
1229 get_temp_for_uniform(c,
1230 QUNIFORM_BLEND_CONST_COLOR,
1231 channel)));
1232 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
1233 return qir_FMUL(c, val,
1234 qir_FSUB(c, qir_uniform_f(c, 1.0),
1235 get_temp_for_uniform(c,
1236 QUNIFORM_BLEND_CONST_COLOR,
1237 3)));
1238
1239 default:
1240 case PIPE_BLENDFACTOR_SRC1_COLOR:
1241 case PIPE_BLENDFACTOR_SRC1_ALPHA:
1242 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
1243 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
1244 /* Unsupported. */
1245 fprintf(stderr, "Unknown blend factor %d\n", factor);
1246 return val;
1247 }
1248 }
1249
1250 static struct qreg
1251 vc4_blend_func(struct vc4_compile *c,
1252 struct qreg src, struct qreg dst,
1253 unsigned func)
1254 {
1255 switch (func) {
1256 case PIPE_BLEND_ADD:
1257 return qir_FADD(c, src, dst);
1258 case PIPE_BLEND_SUBTRACT:
1259 return qir_FSUB(c, src, dst);
1260 case PIPE_BLEND_REVERSE_SUBTRACT:
1261 return qir_FSUB(c, dst, src);
1262 case PIPE_BLEND_MIN:
1263 return qir_FMIN(c, src, dst);
1264 case PIPE_BLEND_MAX:
1265 return qir_FMAX(c, src, dst);
1266
1267 default:
1268 /* Unsupported. */
1269 fprintf(stderr, "Unknown blend func %d\n", func);
1270 return src;
1271
1272 }
1273 }
1274
1275 /**
1276 * Implements fixed function blending in shader code.
1277 *
1278 * VC4 doesn't have any hardware support for blending. Instead, you read the
1279 * current contents of the destination from the tile buffer after having
1280 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1281 * math using your output color and that destination value, and update the
1282 * output color appropriately.
1283 */
1284 static void
1285 vc4_blend(struct vc4_compile *c, struct qreg *result,
1286 struct qreg *dst_color, struct qreg *src_color)
1287 {
1288 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
1289
1290 if (!blend->blend_enable) {
1291 for (int i = 0; i < 4; i++)
1292 result[i] = src_color[i];
1293 return;
1294 }
1295
1296 struct qreg src_blend[4], dst_blend[4];
1297 for (int i = 0; i < 3; i++) {
1298 src_blend[i] = vc4_blend_channel(c,
1299 dst_color, src_color,
1300 src_color[i],
1301 blend->rgb_src_factor, i);
1302 dst_blend[i] = vc4_blend_channel(c,
1303 dst_color, src_color,
1304 dst_color[i],
1305 blend->rgb_dst_factor, i);
1306 }
1307 src_blend[3] = vc4_blend_channel(c,
1308 dst_color, src_color,
1309 src_color[3],
1310 blend->alpha_src_factor, 3);
1311 dst_blend[3] = vc4_blend_channel(c,
1312 dst_color, src_color,
1313 dst_color[3],
1314 blend->alpha_dst_factor, 3);
1315
1316 for (int i = 0; i < 3; i++) {
1317 result[i] = vc4_blend_func(c,
1318 src_blend[i], dst_blend[i],
1319 blend->rgb_func);
1320 }
1321 result[3] = vc4_blend_func(c,
1322 src_blend[3], dst_blend[3],
1323 blend->alpha_func);
1324 }
1325
1326 static void
1327 alpha_test_discard(struct vc4_compile *c)
1328 {
1329 struct qreg src_alpha;
1330 struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
1331
1332 if (!c->fs_key->alpha_test)
1333 return;
1334
1335 if (c->output_color_index != -1)
1336 src_alpha = c->outputs[c->output_color_index + 3];
1337 else
1338 src_alpha = qir_uniform_f(c, 1.0);
1339
1340 if (c->discard.file == QFILE_NULL)
1341 c->discard = qir_uniform_f(c, 0.0);
1342
1343 switch (c->fs_key->alpha_test_func) {
1344 case PIPE_FUNC_NEVER:
1345 c->discard = qir_uniform_f(c, 1.0);
1346 break;
1347 case PIPE_FUNC_ALWAYS:
1348 break;
1349 case PIPE_FUNC_EQUAL:
1350 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1351 c->discard = qir_SEL_X_Y_ZS(c, c->discard,
1352 qir_uniform_f(c, 1.0));
1353 break;
1354 case PIPE_FUNC_NOTEQUAL:
1355 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1356 c->discard = qir_SEL_X_Y_ZC(c, c->discard,
1357 qir_uniform_f(c, 1.0));
1358 break;
1359 case PIPE_FUNC_GREATER:
1360 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1361 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1362 qir_uniform_f(c, 1.0));
1363 break;
1364 case PIPE_FUNC_GEQUAL:
1365 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1366 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1367 qir_uniform_f(c, 1.0));
1368 break;
1369 case PIPE_FUNC_LESS:
1370 qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
1371 c->discard = qir_SEL_X_Y_NS(c, c->discard,
1372 qir_uniform_f(c, 1.0));
1373 break;
1374 case PIPE_FUNC_LEQUAL:
1375 qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
1376 c->discard = qir_SEL_X_Y_NC(c, c->discard,
1377 qir_uniform_f(c, 1.0));
1378 break;
1379 }
1380 }
1381
1382 static void
1383 emit_frag_end(struct vc4_compile *c)
1384 {
1385 alpha_test_discard(c);
1386
1387 enum pipe_format color_format = c->fs_key->color_format;
1388 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
1389 struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
1390 struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
1391 if (c->fs_key->blend.blend_enable ||
1392 c->fs_key->blend.colormask != 0xf) {
1393 struct qreg r4 = qir_TLB_COLOR_READ(c);
1394 for (int i = 0; i < 4; i++)
1395 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
1396 for (int i = 0; i < 4; i++)
1397 dst_color[i] = get_swizzled_channel(c,
1398 tlb_read_color,
1399 format_swiz[i]);
1400 }
1401
1402 struct qreg blend_color[4];
1403 struct qreg undef_array[4] = {
1404 c->undef, c->undef, c->undef, c->undef
1405 };
1406 vc4_blend(c, blend_color, dst_color,
1407 (c->output_color_index != -1 ?
1408 c->outputs + c->output_color_index :
1409 undef_array));
1410
1411 /* If the bit isn't set in the color mask, then just return the
1412 * original dst color, instead.
1413 */
1414 for (int i = 0; i < 4; i++) {
1415 if (!(c->fs_key->blend.colormask & (1 << i))) {
1416 blend_color[i] = dst_color[i];
1417 }
1418 }
1419
1420 /* Debug: Sometimes you're getting a black output and just want to see
1421 * if the FS is getting executed at all. Spam magenta into the color
1422 * output.
1423 */
1424 if (0) {
1425 blend_color[0] = qir_uniform_f(c, 1.0);
1426 blend_color[1] = qir_uniform_f(c, 0.0);
1427 blend_color[2] = qir_uniform_f(c, 1.0);
1428 blend_color[3] = qir_uniform_f(c, 0.5);
1429 }
1430
1431 struct qreg swizzled_outputs[4];
1432 for (int i = 0; i < 4; i++) {
1433 swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
1434 format_swiz[i]);
1435 }
1436
1437 if (c->discard.file != QFILE_NULL)
1438 qir_TLB_DISCARD_SETUP(c, c->discard);
1439
1440 if (c->fs_key->stencil_enabled) {
1441 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
1442 if (c->fs_key->stencil_twoside) {
1443 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
1444 }
1445 if (c->fs_key->stencil_full_writemasks) {
1446 qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
1447 }
1448 }
1449
1450 if (c->fs_key->depth_enabled) {
1451 struct qreg z;
1452 if (c->output_position_index != -1) {
1453 z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
1454 qir_uniform_f(c, 0xffffff)));
1455 } else {
1456 z = qir_FRAG_Z(c);
1457 }
1458 qir_TLB_Z_WRITE(c, z);
1459 }
1460
1461 bool color_written = false;
1462 for (int i = 0; i < 4; i++) {
1463 if (swizzled_outputs[i].file != QFILE_NULL)
1464 color_written = true;
1465 }
1466
1467 struct qreg packed_color;
1468 if (color_written) {
1469 /* Fill in any undefined colors. The simulator will assertion
1470 * fail if we read something that wasn't written, and I don't
1471 * know what hardware does.
1472 */
1473 for (int i = 0; i < 4; i++) {
1474 if (swizzled_outputs[i].file == QFILE_NULL)
1475 swizzled_outputs[i] = qir_uniform_f(c, 0.0);
1476 }
1477 packed_color = qir_get_temp(c);
1478 qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
1479 swizzled_outputs[0],
1480 swizzled_outputs[1],
1481 swizzled_outputs[2],
1482 swizzled_outputs[3]));
1483 } else {
1484 packed_color = qir_uniform_ui(c, 0);
1485 }
1486
1487 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
1488 packed_color, c->undef));
1489 }
1490
1491 static void
1492 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
1493 {
1494 struct qreg xyi[2];
1495
1496 for (int i = 0; i < 2; i++) {
1497 struct qreg scale =
1498 add_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
1499
1500 xyi[i] = qir_FTOI(c, qir_FMUL(c,
1501 qir_FMUL(c,
1502 c->outputs[i],
1503 scale),
1504 rcp_w));
1505 }
1506
1507 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
1508 }
1509
1510 static void
1511 emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
1512 {
1513 struct qreg zscale = add_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
1514 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
1515
1516 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
1517 c->outputs[2],
1518 zscale),
1519 zoffset),
1520 rcp_w));
1521 }
1522
1523 static void
1524 emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
1525 {
1526 qir_VPM_WRITE(c, rcp_w);
1527 }
1528
1529 static void
1530 emit_point_size_write(struct vc4_compile *c)
1531 {
1532 struct qreg point_size;
1533
1534 if (c->output_point_size_index)
1535 point_size = c->outputs[c->output_point_size_index + 3];
1536 else
1537 point_size = qir_uniform_f(c, 1.0);
1538
1539 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
1540 * BCM21553).
1541 */
1542 point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
1543
1544 qir_VPM_WRITE(c, point_size);
1545 }
1546
1547 static void
1548 emit_vert_end(struct vc4_compile *c)
1549 {
1550 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1551
1552 emit_scaled_viewport_write(c, rcp_w);
1553 emit_zs_write(c, rcp_w);
1554 emit_rcp_wc_write(c, rcp_w);
1555 if (c->vs_key->per_vertex_point_size)
1556 emit_point_size_write(c);
1557
1558 for (int i = 4; i < c->num_outputs; i++) {
1559 qir_VPM_WRITE(c, c->outputs[i]);
1560 }
1561 }
1562
1563 static void
1564 emit_coord_end(struct vc4_compile *c)
1565 {
1566 struct qreg rcp_w = qir_RCP(c, c->outputs[3]);
1567
1568 for (int i = 0; i < 4; i++)
1569 qir_VPM_WRITE(c, c->outputs[i]);
1570
1571 emit_scaled_viewport_write(c, rcp_w);
1572 emit_zs_write(c, rcp_w);
1573 emit_rcp_wc_write(c, rcp_w);
1574 if (c->vs_key->per_vertex_point_size)
1575 emit_point_size_write(c);
1576 }
1577
1578 static struct vc4_compile *
1579 vc4_shader_tgsi_to_qir(struct vc4_context *vc4,
1580 struct vc4_compiled_shader *shader, enum qstage stage,
1581 struct vc4_key *key)
1582 {
1583 struct vc4_compile *c = qir_compile_init();
1584 int ret;
1585
1586 c->stage = stage;
1587
1588 c->shader_state = key->shader_state;
1589 ret = tgsi_parse_init(&c->parser, c->shader_state->tokens);
1590 assert(ret == TGSI_PARSE_OK);
1591
1592 if (vc4_debug & VC4_DEBUG_TGSI) {
1593 fprintf(stderr, "TGSI:\n");
1594 tgsi_dump(c->shader_state->tokens, 0);
1595 }
1596
1597 c->key = key;
1598 switch (stage) {
1599 case QSTAGE_FRAG:
1600 c->fs_key = (struct vc4_fs_key *)key;
1601 if (c->fs_key->is_points) {
1602 c->point_x = emit_fragment_varying(c, 0);
1603 c->point_y = emit_fragment_varying(c, 0);
1604 } else if (c->fs_key->is_lines) {
1605 c->line_x = emit_fragment_varying(c, 0);
1606 }
1607 break;
1608 case QSTAGE_VERT:
1609 c->vs_key = (struct vc4_vs_key *)key;
1610 break;
1611 case QSTAGE_COORD:
1612 c->vs_key = (struct vc4_vs_key *)key;
1613 break;
1614 }
1615
1616 while (!tgsi_parse_end_of_tokens(&c->parser)) {
1617 tgsi_parse_token(&c->parser);
1618
1619 switch (c->parser.FullToken.Token.Type) {
1620 case TGSI_TOKEN_TYPE_DECLARATION:
1621 emit_tgsi_declaration(c,
1622 &c->parser.FullToken.FullDeclaration);
1623 break;
1624
1625 case TGSI_TOKEN_TYPE_INSTRUCTION:
1626 emit_tgsi_instruction(c,
1627 &c->parser.FullToken.FullInstruction);
1628 break;
1629
1630 case TGSI_TOKEN_TYPE_IMMEDIATE:
1631 parse_tgsi_immediate(c,
1632 &c->parser.FullToken.FullImmediate);
1633 break;
1634 }
1635 }
1636
1637 switch (stage) {
1638 case QSTAGE_FRAG:
1639 emit_frag_end(c);
1640 break;
1641 case QSTAGE_VERT:
1642 emit_vert_end(c);
1643 break;
1644 case QSTAGE_COORD:
1645 emit_coord_end(c);
1646 break;
1647 }
1648
1649 tgsi_parse_free(&c->parser);
1650
1651 qir_optimize(c);
1652
1653 if (vc4_debug & VC4_DEBUG_QIR) {
1654 fprintf(stderr, "QIR:\n");
1655 qir_dump(c);
1656 }
1657 qir_reorder_uniforms(c);
1658 vc4_generate_code(vc4, c);
1659
1660 if (vc4_debug & VC4_DEBUG_SHADERDB) {
1661 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
1662 qir_get_stage_name(c->stage), c->qpu_inst_count);
1663 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
1664 qir_get_stage_name(c->stage), c->num_uniforms);
1665 }
1666
1667 return c;
1668 }
1669
1670 static void *
1671 vc4_shader_state_create(struct pipe_context *pctx,
1672 const struct pipe_shader_state *cso)
1673 {
1674 struct pipe_shader_state *so = CALLOC_STRUCT(pipe_shader_state);
1675 if (!so)
1676 return NULL;
1677
1678 so->tokens = tgsi_dup_tokens(cso->tokens);
1679
1680 return so;
1681 }
1682
1683 static void
1684 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
1685 int shader_index,
1686 struct vc4_compile *c)
1687 {
1688 int count = c->num_uniforms;
1689 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
1690
1691 uinfo->count = count;
1692 uinfo->data = malloc(count * sizeof(*uinfo->data));
1693 memcpy(uinfo->data, c->uniform_data,
1694 count * sizeof(*uinfo->data));
1695 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
1696 memcpy(uinfo->contents, c->uniform_contents,
1697 count * sizeof(*uinfo->contents));
1698 uinfo->num_texture_samples = c->num_texture_samples;
1699 }
1700
1701 static void
1702 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1703 struct vc4_fs_key *key)
1704 {
1705 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, shader,
1706 QSTAGE_FRAG,
1707 &key->base);
1708 shader->num_inputs = c->num_inputs;
1709 shader->color_inputs = c->color_inputs;
1710 copy_uniform_state_to_shader(shader, 0, c);
1711 shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
1712 c->qpu_inst_count * sizeof(uint64_t),
1713 "fs_code");
1714
1715 qir_compile_destroy(c);
1716 }
1717
1718 static void
1719 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
1720 struct vc4_vs_key *key)
1721 {
1722 struct vc4_compile *vs_c = vc4_shader_tgsi_to_qir(vc4, shader,
1723 QSTAGE_VERT,
1724 &key->base);
1725 copy_uniform_state_to_shader(shader, 0, vs_c);
1726
1727 struct vc4_compile *cs_c = vc4_shader_tgsi_to_qir(vc4, shader,
1728 QSTAGE_COORD,
1729 &key->base);
1730 copy_uniform_state_to_shader(shader, 1, cs_c);
1731
1732 uint32_t vs_size = vs_c->qpu_inst_count * sizeof(uint64_t);
1733 uint32_t cs_size = cs_c->qpu_inst_count * sizeof(uint64_t);
1734 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
1735 shader->bo = vc4_bo_alloc(vc4->screen,
1736 shader->coord_shader_offset + cs_size,
1737 "vs_code");
1738
1739 void *map = vc4_bo_map(shader->bo);
1740 memcpy(map, vs_c->qpu_insts, vs_size);
1741 memcpy(map + shader->coord_shader_offset,
1742 cs_c->qpu_insts, cs_size);
1743
1744 qir_compile_destroy(vs_c);
1745 qir_compile_destroy(cs_c);
1746 }
1747
1748 static void
1749 vc4_setup_shared_key(struct vc4_key *key, struct vc4_texture_stateobj *texstate)
1750 {
1751 for (int i = 0; i < texstate->num_textures; i++) {
1752 struct pipe_sampler_view *sampler = texstate->textures[i];
1753 struct pipe_sampler_state *sampler_state =
1754 texstate->samplers[i];
1755
1756 if (sampler) {
1757 struct pipe_resource *prsc = sampler->texture;
1758 key->tex[i].format = prsc->format;
1759 key->tex[i].swizzle[0] = sampler->swizzle_r;
1760 key->tex[i].swizzle[1] = sampler->swizzle_g;
1761 key->tex[i].swizzle[2] = sampler->swizzle_b;
1762 key->tex[i].swizzle[3] = sampler->swizzle_a;
1763 key->tex[i].compare_mode = sampler_state->compare_mode;
1764 key->tex[i].compare_func = sampler_state->compare_func;
1765 key->tex[i].wrap_s = sampler_state->wrap_s;
1766 key->tex[i].wrap_t = sampler_state->wrap_t;
1767 }
1768 }
1769 }
1770
1771 static void
1772 vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
1773 {
1774 struct vc4_fs_key local_key;
1775 struct vc4_fs_key *key = &local_key;
1776
1777 memset(key, 0, sizeof(*key));
1778 vc4_setup_shared_key(&key->base, &vc4->fragtex);
1779 key->base.shader_state = vc4->prog.bind_fs;
1780 key->is_points = (prim_mode == PIPE_PRIM_POINTS);
1781 key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
1782 prim_mode <= PIPE_PRIM_LINE_STRIP);
1783 key->blend = vc4->blend->rt[0];
1784
1785 if (vc4->framebuffer.cbufs[0])
1786 key->color_format = vc4->framebuffer.cbufs[0]->format;
1787
1788 key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
1789 key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
1790 key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
1791 key->depth_enabled = (vc4->zsa->base.depth.enabled ||
1792 key->stencil_enabled);
1793 if (vc4->zsa->base.alpha.enabled) {
1794 key->alpha_test = true;
1795 key->alpha_test_func = vc4->zsa->base.alpha.func;
1796 }
1797
1798 if (key->is_points) {
1799 key->point_sprite_mask =
1800 vc4->rasterizer->base.sprite_coord_enable;
1801 key->point_coord_upper_left =
1802 (vc4->rasterizer->base.sprite_coord_mode ==
1803 PIPE_SPRITE_COORD_UPPER_LEFT);
1804 }
1805
1806 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
1807 if (vc4->prog.fs)
1808 return;
1809
1810 key = malloc(sizeof(*key));
1811 memcpy(key, &local_key, sizeof(*key));
1812
1813 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1814 vc4_fs_compile(vc4, shader, key);
1815 util_hash_table_set(vc4->fs_cache, key, shader);
1816
1817 if (vc4->rasterizer->base.flatshade &&
1818 vc4->prog.fs &&
1819 vc4->prog.fs->color_inputs != shader->color_inputs) {
1820 vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
1821 }
1822
1823 vc4->prog.fs = shader;
1824 }
1825
1826 static void
1827 vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
1828 {
1829 struct vc4_vs_key local_key;
1830 struct vc4_vs_key *key = &local_key;
1831
1832 memset(key, 0, sizeof(*key));
1833 vc4_setup_shared_key(&key->base, &vc4->verttex);
1834 key->base.shader_state = vc4->prog.bind_vs;
1835
1836 for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
1837 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
1838
1839 key->per_vertex_point_size =
1840 (prim_mode == PIPE_PRIM_POINTS &&
1841 vc4->rasterizer->base.point_size_per_vertex);
1842
1843 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
1844 if (vc4->prog.vs)
1845 return;
1846
1847 key = malloc(sizeof(*key));
1848 memcpy(key, &local_key, sizeof(*key));
1849
1850 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
1851 vc4_vs_compile(vc4, shader, key);
1852 util_hash_table_set(vc4->vs_cache, key, shader);
1853
1854 vc4->prog.vs = shader;
1855 }
1856
1857 void
1858 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
1859 {
1860 vc4_update_compiled_fs(vc4, prim_mode);
1861 vc4_update_compiled_vs(vc4, prim_mode);
1862 }
1863
1864 static unsigned
1865 fs_cache_hash(void *key)
1866 {
1867 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
1868 }
1869
1870 static unsigned
1871 vs_cache_hash(void *key)
1872 {
1873 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
1874 }
1875
1876 static int
1877 fs_cache_compare(void *key1, void *key2)
1878 {
1879 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
1880 }
1881
1882 static int
1883 vs_cache_compare(void *key1, void *key2)
1884 {
1885 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
1886 }
1887
1888 struct delete_state {
1889 struct vc4_context *vc4;
1890 struct pipe_shader_state *shader_state;
1891 };
1892
1893 static enum pipe_error
1894 fs_delete_from_cache(void *in_key, void *in_value, void *data)
1895 {
1896 struct delete_state *del = data;
1897 struct vc4_fs_key *key = in_key;
1898 struct vc4_compiled_shader *shader = in_value;
1899
1900 if (key->base.shader_state == data) {
1901 util_hash_table_remove(del->vc4->fs_cache, key);
1902 vc4_bo_unreference(&shader->bo);
1903 free(shader);
1904 }
1905
1906 return 0;
1907 }
1908
1909 static enum pipe_error
1910 vs_delete_from_cache(void *in_key, void *in_value, void *data)
1911 {
1912 struct delete_state *del = data;
1913 struct vc4_vs_key *key = in_key;
1914 struct vc4_compiled_shader *shader = in_value;
1915
1916 if (key->base.shader_state == data) {
1917 util_hash_table_remove(del->vc4->vs_cache, key);
1918 vc4_bo_unreference(&shader->bo);
1919 free(shader);
1920 }
1921
1922 return 0;
1923 }
1924
1925 static void
1926 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
1927 {
1928 struct vc4_context *vc4 = vc4_context(pctx);
1929 struct pipe_shader_state *so = hwcso;
1930 struct delete_state del;
1931
1932 del.vc4 = vc4;
1933 del.shader_state = so;
1934 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
1935 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
1936
1937 free((void *)so->tokens);
1938 free(so);
1939 }
1940
1941 static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
1942 {
1943 switch (p_wrap) {
1944 case PIPE_TEX_WRAP_REPEAT:
1945 return 0;
1946 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1947 return 1;
1948 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1949 return 2;
1950 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1951 return 3;
1952 case PIPE_TEX_WRAP_CLAMP:
1953 return (using_nearest ? 1 : 3);
1954 default:
1955 fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
1956 assert(!"not reached");
1957 return 0;
1958 }
1959 }
1960
1961 static void
1962 write_texture_p0(struct vc4_context *vc4,
1963 struct vc4_texture_stateobj *texstate,
1964 uint32_t unit)
1965 {
1966 struct pipe_sampler_view *texture = texstate->textures[unit];
1967 struct vc4_resource *rsc = vc4_resource(texture->texture);
1968
1969 bool is_cube = texture->target == PIPE_TEXTURE_CUBE;
1970
1971 cl_reloc(vc4, &vc4->uniforms, rsc->bo,
1972 rsc->slices[0].offset | texture->u.tex.last_level |
1973 is_cube << 9 |
1974 ((rsc->vc4_format & 7) << 4));
1975 }
1976
1977 static void
1978 write_texture_p1(struct vc4_context *vc4,
1979 struct vc4_texture_stateobj *texstate,
1980 uint32_t unit)
1981 {
1982 struct pipe_sampler_view *texture = texstate->textures[unit];
1983 struct vc4_resource *rsc = vc4_resource(texture->texture);
1984 struct pipe_sampler_state *sampler = texstate->samplers[unit];
1985 static const uint32_t mipfilter_map[] = {
1986 [PIPE_TEX_MIPFILTER_NEAREST] = 2,
1987 [PIPE_TEX_MIPFILTER_LINEAR] = 4,
1988 [PIPE_TEX_MIPFILTER_NONE] = 0
1989 };
1990 static const uint32_t imgfilter_map[] = {
1991 [PIPE_TEX_FILTER_NEAREST] = 1,
1992 [PIPE_TEX_FILTER_LINEAR] = 0,
1993 };
1994
1995 bool either_nearest =
1996 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
1997 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
1998
1999 cl_u32(&vc4->uniforms,
2000 ((rsc->vc4_format >> 4) << 31) |
2001 (texture->texture->height0 << 20) |
2002 (texture->texture->width0 << 8) |
2003 (imgfilter_map[sampler->mag_img_filter] << 7) |
2004 ((imgfilter_map[sampler->min_img_filter] +
2005 mipfilter_map[sampler->min_mip_filter]) << 4) |
2006 (translate_wrap(sampler->wrap_t, either_nearest) << 2) |
2007 (translate_wrap(sampler->wrap_s, either_nearest) << 0));
2008 }
2009
2010 static void
2011 write_texture_p2(struct vc4_context *vc4,
2012 struct vc4_texture_stateobj *texstate,
2013 uint32_t unit)
2014 {
2015 struct pipe_sampler_view *texture = texstate->textures[unit];
2016 struct vc4_resource *rsc = vc4_resource(texture->texture);
2017
2018 cl_u32(&vc4->uniforms, (1 << 30) | rsc->cube_map_stride);
2019 }
2020
2021
2022 #define SWIZ(x,y,z,w) { \
2023 UTIL_FORMAT_SWIZZLE_##x, \
2024 UTIL_FORMAT_SWIZZLE_##y, \
2025 UTIL_FORMAT_SWIZZLE_##z, \
2026 UTIL_FORMAT_SWIZZLE_##w \
2027 }
2028
2029 static void
2030 write_texture_border_color(struct vc4_context *vc4,
2031 struct vc4_texture_stateobj *texstate,
2032 uint32_t unit)
2033 {
2034 struct pipe_sampler_state *sampler = texstate->samplers[unit];
2035 struct pipe_sampler_view *texture = texstate->textures[unit];
2036 struct vc4_resource *rsc = vc4_resource(texture->texture);
2037 union util_color uc;
2038
2039 const struct util_format_description *tex_format_desc =
2040 util_format_description(texture->format);
2041
2042 /* Turn the border color into the layout of channels that it would
2043 * have when stored as texture contents.
2044 */
2045 float storage_color[4];
2046 util_format_unswizzle_4f(storage_color,
2047 sampler->border_color.f,
2048 tex_format_desc->swizzle);
2049
2050 /* Now, pack so that when the vc4_format-sampled texture contents are
2051 * replaced with our border color, the vc4_get_format_swizzle()
2052 * swizzling will get the right channels.
2053 */
2054 if (util_format_is_depth_or_stencil(texture->format)) {
2055 uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
2056 sampler->border_color.f[0]) << 8;
2057 } else {
2058 switch (rsc->vc4_format) {
2059 default:
2060 case VC4_TEXTURE_TYPE_RGBA8888:
2061 util_pack_color(storage_color,
2062 PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
2063 break;
2064 case VC4_TEXTURE_TYPE_RGBA4444:
2065 util_pack_color(storage_color,
2066 PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
2067 break;
2068 case VC4_TEXTURE_TYPE_RGB565:
2069 util_pack_color(storage_color,
2070 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
2071 break;
2072 case VC4_TEXTURE_TYPE_ALPHA:
2073 uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
2074 break;
2075 case VC4_TEXTURE_TYPE_LUMALPHA:
2076 uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
2077 (float_to_ubyte(storage_color[0]) << 0));
2078 break;
2079 }
2080 }
2081
2082 cl_u32(&vc4->uniforms, uc.ui[0]);
2083 }
2084
2085 static uint32_t
2086 get_texrect_scale(struct vc4_texture_stateobj *texstate,
2087 enum quniform_contents contents,
2088 uint32_t data)
2089 {
2090 struct pipe_sampler_view *texture = texstate->textures[data];
2091 uint32_t dim;
2092
2093 if (contents == QUNIFORM_TEXRECT_SCALE_X)
2094 dim = texture->texture->width0;
2095 else
2096 dim = texture->texture->height0;
2097
2098 return fui(1.0f / dim);
2099 }
2100
2101 void
2102 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
2103 struct vc4_constbuf_stateobj *cb,
2104 struct vc4_texture_stateobj *texstate,
2105 int shader_index)
2106 {
2107 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
2108 const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
2109
2110 cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
2111
2112 for (int i = 0; i < uinfo->count; i++) {
2113
2114 switch (uinfo->contents[i]) {
2115 case QUNIFORM_CONSTANT:
2116 cl_u32(&vc4->uniforms, uinfo->data[i]);
2117 break;
2118 case QUNIFORM_UNIFORM:
2119 cl_u32(&vc4->uniforms,
2120 gallium_uniforms[uinfo->data[i]]);
2121 break;
2122 case QUNIFORM_VIEWPORT_X_SCALE:
2123 cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
2124 break;
2125 case QUNIFORM_VIEWPORT_Y_SCALE:
2126 cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
2127 break;
2128
2129 case QUNIFORM_VIEWPORT_Z_OFFSET:
2130 cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
2131 break;
2132 case QUNIFORM_VIEWPORT_Z_SCALE:
2133 cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
2134 break;
2135
2136 case QUNIFORM_TEXTURE_CONFIG_P0:
2137 write_texture_p0(vc4, texstate, uinfo->data[i]);
2138 break;
2139
2140 case QUNIFORM_TEXTURE_CONFIG_P1:
2141 write_texture_p1(vc4, texstate, uinfo->data[i]);
2142 break;
2143
2144 case QUNIFORM_TEXTURE_CONFIG_P2:
2145 write_texture_p2(vc4, texstate, uinfo->data[i]);
2146 break;
2147
2148 case QUNIFORM_TEXTURE_BORDER_COLOR:
2149 write_texture_border_color(vc4, texstate, uinfo->data[i]);
2150 break;
2151
2152 case QUNIFORM_TEXRECT_SCALE_X:
2153 case QUNIFORM_TEXRECT_SCALE_Y:
2154 cl_u32(&vc4->uniforms,
2155 get_texrect_scale(texstate,
2156 uinfo->contents[i],
2157 uinfo->data[i]));
2158 break;
2159
2160 case QUNIFORM_BLEND_CONST_COLOR:
2161 cl_f(&vc4->uniforms,
2162 vc4->blend_color.color[uinfo->data[i]]);
2163 break;
2164
2165 case QUNIFORM_STENCIL:
2166 cl_u32(&vc4->uniforms,
2167 vc4->zsa->stencil_uniforms[uinfo->data[i]] |
2168 (uinfo->data[i] <= 1 ?
2169 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
2170 0));
2171 break;
2172
2173 case QUNIFORM_ALPHA_REF:
2174 cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
2175 break;
2176 }
2177 #if 0
2178 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
2179 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
2180 shader, shader_index, i, written_val, uif(written_val));
2181 #endif
2182 }
2183 }
2184
2185 static void
2186 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
2187 {
2188 struct vc4_context *vc4 = vc4_context(pctx);
2189 vc4->prog.bind_fs = hwcso;
2190 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
2191 vc4->dirty |= VC4_DIRTY_PROG;
2192 }
2193
2194 static void
2195 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
2196 {
2197 struct vc4_context *vc4 = vc4_context(pctx);
2198 vc4->prog.bind_vs = hwcso;
2199 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
2200 vc4->dirty |= VC4_DIRTY_PROG;
2201 }
2202
2203 void
2204 vc4_program_init(struct pipe_context *pctx)
2205 {
2206 struct vc4_context *vc4 = vc4_context(pctx);
2207
2208 pctx->create_vs_state = vc4_shader_state_create;
2209 pctx->delete_vs_state = vc4_shader_state_delete;
2210
2211 pctx->create_fs_state = vc4_shader_state_create;
2212 pctx->delete_fs_state = vc4_shader_state_delete;
2213
2214 pctx->bind_fs_state = vc4_fp_state_bind;
2215 pctx->bind_vs_state = vc4_vp_state_bind;
2216
2217 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
2218 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
2219 }