vc4: Add support for the lit opcode.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <inttypes.h>
27 #include "pipe/p_state.h"
28 #include "util/u_format.h"
29 #include "util/u_hash_table.h"
30 #include "util/u_hash.h"
31 #include "util/u_memory.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_dump.h"
34
35 #include "vc4_context.h"
36 #include "vc4_qpu.h"
37 #include "vc4_qir.h"
38
39 struct tgsi_to_qir {
40 struct tgsi_parse_context parser;
41 struct qcompile *c;
42 struct qreg *temps;
43 struct qreg *inputs;
44 struct qreg *outputs;
45 struct qreg *uniforms;
46 struct qreg *consts;
47 uint32_t num_consts;
48
49 struct vc4_shader_state *shader_state;
50 struct vc4_fs_key *fs_key;
51 struct vc4_vs_key *vs_key;
52
53 uint32_t *uniform_data;
54 enum quniform_contents *uniform_contents;
55 uint32_t num_uniforms;
56 uint32_t num_inputs;
57 uint32_t num_outputs;
58 };
59
60 struct vc4_key {
61 struct vc4_shader_state *shader_state;
62 };
63
64 struct vc4_fs_key {
65 struct vc4_key base;
66 enum pipe_format color_format;
67 };
68
69 struct vc4_vs_key {
70 struct vc4_key base;
71 enum pipe_format attr_formats[8];
72 };
73
74 static struct qreg
75 add_uniform(struct tgsi_to_qir *trans,
76 enum quniform_contents contents,
77 uint32_t data)
78 {
79 uint32_t uniform = trans->num_uniforms++;
80 struct qreg u = { QFILE_UNIF, uniform };
81
82 trans->uniform_contents[uniform] = contents;
83 trans->uniform_data[uniform] = data;
84
85 return u;
86 }
87
88 static struct qreg
89 get_temp_for_uniform(struct tgsi_to_qir *trans, enum quniform_contents contents,
90 uint32_t data)
91 {
92 struct qcompile *c = trans->c;
93
94 for (int i = 0; i < trans->num_uniforms; i++) {
95 if (trans->uniform_contents[i] == contents &&
96 trans->uniform_data[i] == data)
97 return trans->uniforms[i];
98 }
99
100 struct qreg u = add_uniform(trans, contents, data);
101 struct qreg t = qir_MOV(c, u);
102
103 trans->uniforms[u.index] = t;
104 return t;
105 }
106
107 static struct qreg
108 qir_uniform_ui(struct tgsi_to_qir *trans, uint32_t ui)
109 {
110 return get_temp_for_uniform(trans, QUNIFORM_CONSTANT, ui);
111 }
112
113 static struct qreg
114 qir_uniform_f(struct tgsi_to_qir *trans, float f)
115 {
116 return qir_uniform_ui(trans, fui(f));
117 }
118
119 static struct qreg
120 get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
121 {
122 struct qcompile *c = trans->c;
123 struct qreg r = c->undef;
124
125 uint32_t s = i;
126 switch (i) {
127 case TGSI_SWIZZLE_X:
128 s = src->SwizzleX;
129 break;
130 case TGSI_SWIZZLE_Y:
131 s = src->SwizzleY;
132 break;
133 case TGSI_SWIZZLE_Z:
134 s = src->SwizzleZ;
135 break;
136 case TGSI_SWIZZLE_W:
137 s = src->SwizzleW;
138 break;
139 default:
140 abort();
141 }
142
143 assert(!src->Indirect);
144
145 switch (src->File) {
146 case TGSI_FILE_NULL:
147 return r;
148 case TGSI_FILE_TEMPORARY:
149 r = trans->temps[src->Index * 4 + s];
150 break;
151 case TGSI_FILE_IMMEDIATE:
152 r = trans->consts[src->Index * 4 + s];
153 break;
154 case TGSI_FILE_CONSTANT:
155 r = get_temp_for_uniform(trans, QUNIFORM_UNIFORM,
156 src->Index * 4 + s);
157 break;
158 case TGSI_FILE_INPUT:
159 r = trans->inputs[src->Index * 4 + s];
160 break;
161 default:
162 fprintf(stderr, "unknown src file %d\n", src->File);
163 abort();
164 }
165
166 if (src->Absolute)
167 r = qir_FMAXABS(c, r, r);
168
169 if (src->Negate)
170 r = qir_FSUB(c, qir_uniform_f(trans, 0), r);
171
172 return r;
173 };
174
175
176 static void
177 update_dst(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst,
178 int i, struct qreg val)
179 {
180 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
181
182 assert(!tgsi_dst->Indirect);
183
184 switch (tgsi_dst->File) {
185 case TGSI_FILE_TEMPORARY:
186 trans->temps[tgsi_dst->Index * 4 + i] = val;
187 break;
188 case TGSI_FILE_OUTPUT:
189 trans->outputs[tgsi_dst->Index * 4 + i] = val;
190 trans->num_outputs = MAX2(trans->num_outputs,
191 tgsi_dst->Index * 4 + i + 1);
192 break;
193 default:
194 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
195 abort();
196 }
197 };
198
199 static struct qreg
200 tgsi_to_qir_alu(struct tgsi_to_qir *trans,
201 struct tgsi_full_instruction *tgsi_inst,
202 enum qop op, struct qreg *src, int i)
203 {
204 struct qcompile *c = trans->c;
205 struct qreg dst = qir_get_temp(c);
206 qir_emit(c, qir_inst4(op, dst,
207 src[0 * 4 + i],
208 src[1 * 4 + i],
209 src[2 * 4 + i],
210 c->undef));
211 return dst;
212 }
213
214 static struct qreg
215 tgsi_to_qir_mad(struct tgsi_to_qir *trans,
216 struct tgsi_full_instruction *tgsi_inst,
217 enum qop op, struct qreg *src, int i)
218 {
219 struct qcompile *c = trans->c;
220 return qir_FADD(c,
221 qir_FMUL(c,
222 src[0 * 4 + i],
223 src[1 * 4 + i]),
224 src[2 * 4 + i]);
225 }
226
227 static struct qreg
228 tgsi_to_qir_lit(struct tgsi_to_qir *trans,
229 struct tgsi_full_instruction *tgsi_inst,
230 enum qop op, struct qreg *src, int i)
231 {
232 struct qcompile *c = trans->c;
233 struct qreg x = src[0 * 4 + 0];
234 struct qreg y = src[0 * 4 + 1];
235 struct qreg w = src[0 * 4 + 3];
236
237 switch (i) {
238 case 0:
239 case 3:
240 return qir_uniform_f(trans, 1.0);
241 case 1:
242 return qir_FMAX(c, src[0 * 4 + 0], qir_uniform_f(trans, 0.0));
243 case 2: {
244 struct qreg zero = qir_uniform_f(trans, 0.0);
245
246 /* XXX: Clamp w to -128..128 */
247 return qir_CMP(c,
248 x,
249 zero,
250 qir_EXP2(c, qir_FMUL(c,
251 w,
252 qir_LOG2(c,
253 qir_FMAX(c,
254 y,
255 zero)))));
256 }
257 default:
258 assert(!"not reached");
259 return c->undef;
260 }
261 }
262
263 static struct qreg
264 tgsi_to_qir_lrp(struct tgsi_to_qir *trans,
265 struct tgsi_full_instruction *tgsi_inst,
266 enum qop op, struct qreg *src, int i)
267 {
268 struct qcompile *c = trans->c;
269 struct qreg src0 = src[0 * 4 + i];
270 struct qreg src1 = src[1 * 4 + i];
271 struct qreg src2 = src[2 * 4 + i];
272
273 /* LRP is:
274 * src0 * src1 + (1 - src0) * src2.
275 * -> src0 * src1 + src2 - src0 * src2
276 * -> src2 + src0 * (src1 - src2)
277 */
278 return qir_FADD(c, src2, qir_FMUL(c, src0, qir_FSUB(c, src1, src2)));
279
280 }
281
282 static struct qreg
283 tgsi_to_qir_pow(struct tgsi_to_qir *trans,
284 struct tgsi_full_instruction *tgsi_inst,
285 enum qop op, struct qreg *src, int i)
286 {
287 struct qcompile *c = trans->c;
288
289 /* Note that this instruction replicates its result from the x channel
290 */
291 return qir_EXP2(c, qir_FMUL(c,
292 src[1 * 4 + 0],
293 qir_LOG2(c, src[0 * 4 + 0])));
294 }
295
296 static struct qreg
297 tgsi_to_qir_dp(struct tgsi_to_qir *trans,
298 struct tgsi_full_instruction *tgsi_inst,
299 int num, struct qreg *src, int i)
300 {
301 struct qcompile *c = trans->c;
302
303 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
304 for (int j = 1; j < num; j++) {
305 sum = qir_FADD(c, sum, qir_FMUL(c,
306 src[0 * 4 + j],
307 src[1 * 4 + j]));
308 }
309 return sum;
310 }
311
312 static struct qreg
313 tgsi_to_qir_dp2(struct tgsi_to_qir *trans,
314 struct tgsi_full_instruction *tgsi_inst,
315 enum qop op, struct qreg *src, int i)
316 {
317 return tgsi_to_qir_dp(trans, tgsi_inst, 2, src, i);
318 }
319
320 static struct qreg
321 tgsi_to_qir_dp3(struct tgsi_to_qir *trans,
322 struct tgsi_full_instruction *tgsi_inst,
323 enum qop op, struct qreg *src, int i)
324 {
325 return tgsi_to_qir_dp(trans, tgsi_inst, 3, src, i);
326 }
327
328 static struct qreg
329 tgsi_to_qir_dp4(struct tgsi_to_qir *trans,
330 struct tgsi_full_instruction *tgsi_inst,
331 enum qop op, struct qreg *src, int i)
332 {
333 return tgsi_to_qir_dp(trans, tgsi_inst, 4, src, i);
334 }
335
336 static struct qreg
337 tgsi_to_qir_abs(struct tgsi_to_qir *trans,
338 struct tgsi_full_instruction *tgsi_inst,
339 enum qop op, struct qreg *src, int i)
340 {
341 struct qcompile *c = trans->c;
342 struct qreg arg = src[0 * 4 + i];
343 return qir_FMAXABS(c, arg, arg);
344 }
345
346 static void
347 emit_tgsi_declaration(struct tgsi_to_qir *trans,
348 struct tgsi_full_declaration *decl)
349 {
350 struct qcompile *c = trans->c;
351
352 switch (decl->Declaration.File) {
353 case TGSI_FILE_INPUT:
354 if (c->stage == QSTAGE_FRAG) {
355 for (int index = decl->Range.First;
356 index <= decl->Range.Last;
357 index++) {
358 for (int i = 0; i < 4; i++) {
359 struct qreg vary = {
360 QFILE_VARY,
361 index * 4 + i
362 };
363
364 /* XXX: multiply by W */
365 trans->inputs[index * 4 + i] =
366 qir_VARY_ADD_C(c,
367 qir_MOV(c,
368 vary));
369
370 trans->num_inputs++;
371 }
372 }
373 }
374 break;
375 }
376 }
377
378 static void
379 emit_tgsi_instruction(struct tgsi_to_qir *trans,
380 struct tgsi_full_instruction *tgsi_inst)
381 {
382 struct qcompile *c = trans->c;
383 struct {
384 enum qop op;
385 struct qreg (*func)(struct tgsi_to_qir *trans,
386 struct tgsi_full_instruction *tgsi_inst,
387 enum qop op,
388 struct qreg *src, int i);
389 } op_trans[] = {
390 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
391 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
392 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
393 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
394 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
395 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
396 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
397 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
398 [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
399 [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
400 [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
401 [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
402 [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu },
403 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
404 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
405 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
406 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
407 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
408 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
409 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
410 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
411 [TGSI_OPCODE_LIT] = { 0, tgsi_to_qir_lit },
412 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },
413 [TGSI_OPCODE_POW] = { 0, tgsi_to_qir_pow },
414 };
415 static int asdf = 0;
416 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
417
418 if (tgsi_op == TGSI_OPCODE_END)
419 return;
420
421 if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
422 fprintf(stderr, "unknown tgsi inst: ");
423 tgsi_dump_instruction(tgsi_inst, asdf++);
424 fprintf(stderr, "\n");
425 abort();
426 }
427
428 struct qreg src_regs[12];
429 for (int s = 0; s < 3; s++) {
430 for (int i = 0; i < 4; i++) {
431 src_regs[4 * s + i] =
432 get_src(trans, &tgsi_inst->Src[s].Register, i);
433 }
434 }
435
436 for (int i = 0; i < 4; i++) {
437 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
438 continue;
439
440 struct qreg result;
441
442 result = op_trans[tgsi_op].func(trans, tgsi_inst,
443 op_trans[tgsi_op].op,
444 src_regs, i);
445
446 if (tgsi_inst->Instruction.Saturate) {
447 float low = (tgsi_inst->Instruction.Saturate ==
448 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
449 result = qir_FMAX(c,
450 qir_FMIN(c,
451 result,
452 qir_uniform_f(trans, 1.0)),
453 qir_uniform_f(trans, low));
454 }
455
456 update_dst(trans, tgsi_inst, i, result);
457 }
458 }
459
460 static void
461 parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
462 {
463 for (int i = 0; i < 4; i++) {
464 unsigned n = trans->num_consts++;
465 trans->consts[n] = qir_uniform_ui(trans, imm->u[i].Uint);
466 }
467 }
468
469 static void
470 emit_frag_init(struct tgsi_to_qir *trans)
471 {
472 }
473
474 static void
475 emit_vert_init(struct tgsi_to_qir *trans)
476 {
477 struct qcompile *c = trans->c;
478
479 /* XXX: attribute type/size/count */
480 for (int i = 0; i < 4; i++) {
481 trans->inputs[i] = qir_get_temp(c);
482 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
483 c->undef, c->undef));
484 }
485 }
486
487 static void
488 emit_coord_init(struct tgsi_to_qir *trans)
489 {
490 struct qcompile *c = trans->c;
491
492 /* XXX: attribute type/size/count */
493 for (int i = 0; i < 4; i++) {
494 trans->inputs[i] = qir_get_temp(c);
495 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
496 c->undef, c->undef));
497 }
498 }
499
500 static void
501 emit_frag_end(struct tgsi_to_qir *trans)
502 {
503 struct qcompile *c = trans->c;
504
505 struct qreg t = qir_get_temp(c);
506
507 const struct util_format_description *format_desc =
508 util_format_description(trans->fs_key->color_format);
509
510 struct qreg swizzled_outputs[4] = {
511 trans->outputs[format_desc->swizzle[0]],
512 trans->outputs[format_desc->swizzle[1]],
513 trans->outputs[format_desc->swizzle[2]],
514 trans->outputs[format_desc->swizzle[3]],
515 };
516
517 qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
518 swizzled_outputs[0],
519 swizzled_outputs[1],
520 swizzled_outputs[2],
521 swizzled_outputs[3]));
522 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
523 t, c->undef));
524 }
525
526 static void
527 emit_scaled_viewport_write(struct tgsi_to_qir *trans)
528 {
529 struct qcompile *c = trans->c;
530 struct qreg xyi[2];
531
532 for (int i = 0; i < 2; i++) {
533 struct qreg scale =
534 add_uniform(trans, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
535
536 xyi[i] = qir_FTOI(c, qir_FMUL(c, trans->outputs[i], scale));
537 }
538
539 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
540 }
541
542 static void
543 emit_zs_write(struct tgsi_to_qir *trans)
544 {
545 struct qcompile *c = trans->c;
546
547 /* XXX: rescale */
548 qir_VPM_WRITE(c, trans->outputs[2]);
549 }
550
551 static void
552 emit_1_wc_write(struct tgsi_to_qir *trans)
553 {
554 struct qcompile *c = trans->c;
555
556 /* XXX: RCP */
557 qir_VPM_WRITE(c, trans->outputs[3]);
558 }
559
560 static void
561 emit_vert_end(struct tgsi_to_qir *trans)
562 {
563 struct qcompile *c = trans->c;
564
565 emit_scaled_viewport_write(trans);
566 emit_zs_write(trans);
567 emit_1_wc_write(trans);
568
569 for (int i = 4; i < trans->num_outputs; i++) {
570 qir_VPM_WRITE(c, trans->outputs[i]);
571 }
572 }
573
574 static void
575 emit_coord_end(struct tgsi_to_qir *trans)
576 {
577 struct qcompile *c = trans->c;
578
579 for (int i = 0; i < 4; i++)
580 qir_VPM_WRITE(c, trans->outputs[i]);
581
582 emit_scaled_viewport_write(trans);
583 emit_zs_write(trans);
584 emit_1_wc_write(trans);
585 }
586
587 static struct tgsi_to_qir *
588 vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
589 struct vc4_key *key)
590 {
591 struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
592 struct qcompile *c;
593 int ret;
594
595 c = qir_compile_init();
596 c->stage = stage;
597
598 memset(trans, 0, sizeof(*trans));
599 /* XXX sizing */
600 trans->temps = calloc(sizeof(struct qreg), 1024);
601 trans->inputs = calloc(sizeof(struct qreg), 8 * 4);
602 trans->outputs = calloc(sizeof(struct qreg), 1024);
603 trans->uniforms = calloc(sizeof(struct qreg), 1024);
604 trans->consts = calloc(sizeof(struct qreg), 1024);
605
606 trans->uniform_data = calloc(sizeof(uint32_t), 1024);
607 trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
608
609 trans->shader_state = key->shader_state;
610 trans->c = c;
611 ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens);
612 assert(ret == TGSI_PARSE_OK);
613
614 if (vc4_debug & VC4_DEBUG_TGSI) {
615 fprintf(stderr, "TGSI:\n");
616 tgsi_dump(trans->shader_state->base.tokens, 0);
617 }
618
619 switch (stage) {
620 case QSTAGE_FRAG:
621 trans->fs_key = (struct vc4_fs_key *)key;
622 emit_frag_init(trans);
623 break;
624 case QSTAGE_VERT:
625 trans->vs_key = (struct vc4_vs_key *)key;
626 emit_vert_init(trans);
627 break;
628 case QSTAGE_COORD:
629 trans->vs_key = (struct vc4_vs_key *)key;
630 emit_coord_init(trans);
631 break;
632 }
633
634 while (!tgsi_parse_end_of_tokens(&trans->parser)) {
635 tgsi_parse_token(&trans->parser);
636
637 switch (trans->parser.FullToken.Token.Type) {
638 case TGSI_TOKEN_TYPE_DECLARATION:
639 emit_tgsi_declaration(trans,
640 &trans->parser.FullToken.FullDeclaration);
641 break;
642
643 case TGSI_TOKEN_TYPE_INSTRUCTION:
644 emit_tgsi_instruction(trans,
645 &trans->parser.FullToken.FullInstruction);
646 break;
647
648 case TGSI_TOKEN_TYPE_IMMEDIATE:
649 parse_tgsi_immediate(trans,
650 &trans->parser.FullToken.FullImmediate);
651 break;
652 }
653 }
654
655 switch (stage) {
656 case QSTAGE_FRAG:
657 emit_frag_end(trans);
658 break;
659 case QSTAGE_VERT:
660 emit_vert_end(trans);
661 break;
662 case QSTAGE_COORD:
663 emit_coord_end(trans);
664 break;
665 }
666
667 tgsi_parse_free(&trans->parser);
668 free(trans->temps);
669
670 qir_optimize(c);
671
672 if (vc4_debug & VC4_DEBUG_QIR) {
673 fprintf(stderr, "QIR:\n");
674 qir_dump(c);
675 }
676 vc4_generate_code(c);
677
678 if (vc4_debug & VC4_DEBUG_SHADERDB) {
679 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
680 qir_get_stage_name(c->stage), c->qpu_inst_count);
681 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
682 qir_get_stage_name(c->stage), trans->num_uniforms);
683 }
684
685 return trans;
686 }
687
688 static void *
689 vc4_shader_state_create(struct pipe_context *pctx,
690 const struct pipe_shader_state *cso)
691 {
692 struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state);
693 if (!so)
694 return NULL;
695
696 so->base.tokens = tgsi_dup_tokens(cso->tokens);
697
698 return so;
699 }
700
701 static void
702 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
703 int shader_index,
704 struct tgsi_to_qir *trans)
705 {
706 int count = trans->num_uniforms;
707 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
708
709 uinfo->count = count;
710 uinfo->data = malloc(count * sizeof(*uinfo->data));
711 memcpy(uinfo->data, trans->uniform_data,
712 count * sizeof(*uinfo->data));
713 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
714 memcpy(uinfo->contents, trans->uniform_contents,
715 count * sizeof(*uinfo->contents));
716 }
717
718 static void
719 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
720 struct vc4_fs_key *key)
721 {
722 struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
723 &key->base);
724 shader->num_inputs = trans->num_inputs;
725 copy_uniform_state_to_shader(shader, 0, trans);
726 shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
727 trans->c->qpu_inst_count * sizeof(uint64_t),
728 "fs_code");
729
730 qir_compile_destroy(trans->c);
731 free(trans);
732 }
733
734 static void
735 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
736 struct vc4_vs_key *key)
737 {
738 struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader,
739 QSTAGE_VERT,
740 &key->base);
741 copy_uniform_state_to_shader(shader, 0, vs_trans);
742
743 struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader,
744 QSTAGE_COORD,
745 &key->base);
746 copy_uniform_state_to_shader(shader, 1, cs_trans);
747
748 uint32_t vs_size = vs_trans->c->qpu_inst_count * sizeof(uint64_t);
749 uint32_t cs_size = cs_trans->c->qpu_inst_count * sizeof(uint64_t);
750 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
751 shader->bo = vc4_bo_alloc(vc4->screen,
752 shader->coord_shader_offset + cs_size,
753 "vs_code");
754
755 void *map = vc4_bo_map(shader->bo);
756 memcpy(map, vs_trans->c->qpu_insts, vs_size);
757 memcpy(map + shader->coord_shader_offset,
758 cs_trans->c->qpu_insts, cs_size);
759
760 qir_compile_destroy(vs_trans->c);
761 qir_compile_destroy(cs_trans->c);
762 }
763
764 static void
765 vc4_update_compiled_fs(struct vc4_context *vc4)
766 {
767 struct vc4_fs_key local_key;
768 struct vc4_fs_key *key = &local_key;
769
770 memset(key, 0, sizeof(*key));
771 key->base.shader_state = vc4->prog.bind_fs;
772
773 if (vc4->framebuffer.cbufs[0])
774 key->color_format = vc4->framebuffer.cbufs[0]->format;
775
776 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
777 if (vc4->prog.fs)
778 return;
779
780 key = malloc(sizeof(*key));
781 memcpy(key, &local_key, sizeof(*key));
782
783 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
784 vc4_fs_compile(vc4, shader, key);
785 util_hash_table_set(vc4->fs_cache, key, shader);
786
787 vc4->prog.fs = shader;
788 }
789
790 static void
791 vc4_update_compiled_vs(struct vc4_context *vc4)
792 {
793 struct vc4_vs_key local_key;
794 struct vc4_vs_key *key = &local_key;
795
796 memset(key, 0, sizeof(*key));
797 key->base.shader_state = vc4->prog.bind_vs;
798
799 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
800 if (vc4->prog.vs)
801 return;
802
803 key = malloc(sizeof(*key));
804 memcpy(key, &local_key, sizeof(*key));
805
806 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
807 vc4_vs_compile(vc4, shader, key);
808 util_hash_table_set(vc4->vs_cache, key, shader);
809
810 vc4->prog.vs = shader;
811 }
812
813 void
814 vc4_update_compiled_shaders(struct vc4_context *vc4)
815 {
816 vc4_update_compiled_fs(vc4);
817 vc4_update_compiled_vs(vc4);
818 }
819
820 static unsigned
821 fs_cache_hash(void *key)
822 {
823 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
824 }
825
826 static unsigned
827 vs_cache_hash(void *key)
828 {
829 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
830 }
831
832 static int
833 fs_cache_compare(void *key1, void *key2)
834 {
835 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
836 }
837
838 static int
839 vs_cache_compare(void *key1, void *key2)
840 {
841 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
842 }
843
844 struct delete_state {
845 struct vc4_context *vc4;
846 struct vc4_shader_state *shader_state;
847 };
848
849 static enum pipe_error
850 fs_delete_from_cache(void *in_key, void *in_value, void *data)
851 {
852 struct delete_state *del = data;
853 struct vc4_fs_key *key = in_key;
854 struct vc4_compiled_shader *shader = in_value;
855
856 if (key->base.shader_state == data) {
857 util_hash_table_remove(del->vc4->fs_cache, key);
858 vc4_bo_unreference(&shader->bo);
859 free(shader);
860 }
861
862 return 0;
863 }
864
865 static enum pipe_error
866 vs_delete_from_cache(void *in_key, void *in_value, void *data)
867 {
868 struct delete_state *del = data;
869 struct vc4_vs_key *key = in_key;
870 struct vc4_compiled_shader *shader = in_value;
871
872 if (key->base.shader_state == data) {
873 util_hash_table_remove(del->vc4->vs_cache, key);
874 vc4_bo_unreference(&shader->bo);
875 free(shader);
876 }
877
878 return 0;
879 }
880
881 static void
882 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
883 {
884 struct vc4_context *vc4 = vc4_context(pctx);
885 struct vc4_shader_state *so = hwcso;
886 struct delete_state del;
887
888 del.vc4 = vc4;
889 del.shader_state = so;
890 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
891 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
892
893 free((void *)so->base.tokens);
894 free(so);
895 }
896
897 void
898 vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
899 struct vc4_constbuf_stateobj *cb,
900 int shader_index, struct vc4_bo **out_bo,
901 uint32_t *out_offset)
902 {
903 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
904 struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
905 MAX2(1, uinfo->count * 4), "ubo");
906 uint32_t *map = vc4_bo_map(ubo);
907
908 for (int i = 0; i < uinfo->count; i++) {
909 switch (uinfo->contents[i]) {
910 case QUNIFORM_CONSTANT:
911 map[i] = uinfo->data[i];
912 break;
913 case QUNIFORM_UNIFORM:
914 map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
915 break;
916 case QUNIFORM_VIEWPORT_X_SCALE:
917 map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
918 break;
919 case QUNIFORM_VIEWPORT_Y_SCALE:
920 map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
921 break;
922 }
923 #if 0
924 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
925 shader, shader_index, i, map[i], uif(map[i]));
926 #endif
927 }
928
929 *out_bo = ubo;
930 *out_offset = 0;
931 }
932
933 static void
934 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
935 {
936 struct vc4_context *vc4 = vc4_context(pctx);
937 vc4->prog.bind_fs = hwcso;
938 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
939 vc4->dirty |= VC4_DIRTY_PROG;
940 }
941
942 static void
943 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
944 {
945 struct vc4_context *vc4 = vc4_context(pctx);
946 vc4->prog.bind_vs = hwcso;
947 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
948 vc4->dirty |= VC4_DIRTY_PROG;
949 }
950
951 void
952 vc4_program_init(struct pipe_context *pctx)
953 {
954 struct vc4_context *vc4 = vc4_context(pctx);
955
956 pctx->create_vs_state = vc4_shader_state_create;
957 pctx->delete_vs_state = vc4_shader_state_delete;
958
959 pctx->create_fs_state = vc4_shader_state_create;
960 pctx->delete_fs_state = vc4_shader_state_delete;
961
962 pctx->bind_fs_state = vc4_fp_state_bind;
963 pctx->bind_vs_state = vc4_vp_state_bind;
964
965 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
966 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
967 }