vc4: Add support for CMP.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <inttypes.h>
27 #include "pipe/p_state.h"
28 #include "util/u_format.h"
29 #include "util/u_hash_table.h"
30 #include "util/u_hash.h"
31 #include "util/u_memory.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_dump.h"
34
35 #include "vc4_context.h"
36 #include "vc4_qpu.h"
37 #include "vc4_qir.h"
38
39 struct tgsi_to_qir {
40 struct tgsi_parse_context parser;
41 struct qcompile *c;
42 struct qreg *temps;
43 struct qreg *inputs;
44 struct qreg *outputs;
45 struct qreg *uniforms;
46 struct qreg *consts;
47 uint32_t num_consts;
48
49 struct vc4_shader_state *shader_state;
50 struct vc4_fs_key *fs_key;
51 struct vc4_vs_key *vs_key;
52
53 uint32_t *uniform_data;
54 enum quniform_contents *uniform_contents;
55 uint32_t num_uniforms;
56 uint32_t num_inputs;
57 uint32_t num_outputs;
58 };
59
60 struct vc4_key {
61 struct vc4_shader_state *shader_state;
62 };
63
64 struct vc4_fs_key {
65 struct vc4_key base;
66 enum pipe_format color_format;
67 };
68
69 struct vc4_vs_key {
70 struct vc4_key base;
71 enum pipe_format attr_formats[8];
72 };
73
74 static struct qreg
75 get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform)
76 {
77 struct qcompile *c = trans->c;
78 struct qreg u = { QFILE_UNIF, uniform };
79
80 struct qreg t = qir_MOV(c, u);
81 trans->uniforms[uniform] = t;
82 return t;
83 }
84
85 static struct qreg
86 qir_uniform_ui(struct tgsi_to_qir *trans, uint32_t ui)
87 {
88 for (int i = 0; i < trans->num_uniforms; i++) {
89 if (trans->uniform_contents[i] == QUNIFORM_CONSTANT &&
90 trans->uniform_data[i] == ui)
91 return trans->uniforms[i];
92 }
93
94 trans->uniform_contents[trans->num_uniforms] = QUNIFORM_CONSTANT;
95 trans->uniform_data[trans->num_uniforms] = ui;
96 return get_temp_for_uniform(trans, trans->num_uniforms++);
97 }
98
99 static struct qreg
100 qir_uniform_f(struct tgsi_to_qir *trans, float f)
101 {
102 return qir_uniform_ui(trans, fui(f));
103 }
104
105 static struct qreg
106 qir_uniform(struct tgsi_to_qir *trans, uint32_t index)
107 {
108 for (int i = 0; i < trans->num_uniforms; i++) {
109 if (trans->uniform_contents[i] == QUNIFORM_UNIFORM &&
110 trans->uniform_data[i] == index)
111 return trans->uniforms[i];
112 }
113
114 trans->uniform_contents[trans->num_uniforms] = QUNIFORM_UNIFORM;
115 trans->uniform_data[trans->num_uniforms] = index;
116 return get_temp_for_uniform(trans, trans->num_uniforms++);
117 }
118
119 static struct qreg
120 get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
121 {
122 struct qcompile *c = trans->c;
123 struct qreg r = c->undef;
124
125 uint32_t s = i;
126 switch (i) {
127 case TGSI_SWIZZLE_X:
128 s = src->SwizzleX;
129 break;
130 case TGSI_SWIZZLE_Y:
131 s = src->SwizzleY;
132 break;
133 case TGSI_SWIZZLE_Z:
134 s = src->SwizzleZ;
135 break;
136 case TGSI_SWIZZLE_W:
137 s = src->SwizzleW;
138 break;
139 default:
140 abort();
141 }
142
143 assert(!src->Indirect);
144
145 switch (src->File) {
146 case TGSI_FILE_NULL:
147 return r;
148 case TGSI_FILE_TEMPORARY:
149 r = trans->temps[src->Index * 4 + s];
150 break;
151 case TGSI_FILE_IMMEDIATE:
152 r = trans->consts[src->Index * 4 + s];
153 break;
154 case TGSI_FILE_CONSTANT:
155 r = qir_uniform(trans, src->Index * 4 + s);
156 break;
157 case TGSI_FILE_INPUT:
158 r = trans->inputs[src->Index * 4 + s];
159 break;
160 default:
161 fprintf(stderr, "unknown src file %d\n", src->File);
162 abort();
163 }
164
165 if (src->Absolute)
166 r = qir_FMAXABS(c, r, r);
167
168 if (src->Negate)
169 r = qir_FSUB(c, qir_uniform_f(trans, 0), r);
170
171 return r;
172 };
173
174
175 static void
176 update_dst(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst,
177 int i, struct qreg val)
178 {
179 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
180
181 assert(!tgsi_dst->Indirect);
182
183 switch (tgsi_dst->File) {
184 case TGSI_FILE_TEMPORARY:
185 trans->temps[tgsi_dst->Index * 4 + i] = val;
186 break;
187 case TGSI_FILE_OUTPUT:
188 trans->outputs[tgsi_dst->Index * 4 + i] = val;
189 trans->num_outputs = MAX2(trans->num_outputs,
190 tgsi_dst->Index * 4 + i + 1);
191 break;
192 default:
193 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
194 abort();
195 }
196 };
197
198 static struct qreg
199 tgsi_to_qir_alu(struct tgsi_to_qir *trans,
200 struct tgsi_full_instruction *tgsi_inst,
201 enum qop op, struct qreg *src, int i)
202 {
203 struct qcompile *c = trans->c;
204 struct qreg dst = qir_get_temp(c);
205 qir_emit(c, qir_inst4(op, dst,
206 src[0 * 4 + i],
207 src[1 * 4 + i],
208 src[2 * 4 + i],
209 c->undef));
210 return dst;
211 }
212
213 static struct qreg
214 tgsi_to_qir_mad(struct tgsi_to_qir *trans,
215 struct tgsi_full_instruction *tgsi_inst,
216 enum qop op, struct qreg *src, int i)
217 {
218 struct qcompile *c = trans->c;
219 return qir_FADD(c,
220 qir_FMUL(c,
221 src[0 * 4 + i],
222 src[1 * 4 + i]),
223 src[2 * 4 + i]);
224 }
225
226 static struct qreg
227 tgsi_to_qir_dp(struct tgsi_to_qir *trans,
228 struct tgsi_full_instruction *tgsi_inst,
229 int num, struct qreg *src, int i)
230 {
231 struct qcompile *c = trans->c;
232
233 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
234 for (int j = 1; j < num; j++) {
235 sum = qir_FADD(c, sum, qir_FMUL(c,
236 src[0 * 4 + j],
237 src[1 * 4 + j]));
238 }
239 return sum;
240 }
241
242 static struct qreg
243 tgsi_to_qir_dp2(struct tgsi_to_qir *trans,
244 struct tgsi_full_instruction *tgsi_inst,
245 enum qop op, struct qreg *src, int i)
246 {
247 return tgsi_to_qir_dp(trans, tgsi_inst, 2, src, i);
248 }
249
250 static struct qreg
251 tgsi_to_qir_dp3(struct tgsi_to_qir *trans,
252 struct tgsi_full_instruction *tgsi_inst,
253 enum qop op, struct qreg *src, int i)
254 {
255 return tgsi_to_qir_dp(trans, tgsi_inst, 3, src, i);
256 }
257
258 static struct qreg
259 tgsi_to_qir_dp4(struct tgsi_to_qir *trans,
260 struct tgsi_full_instruction *tgsi_inst,
261 enum qop op, struct qreg *src, int i)
262 {
263 return tgsi_to_qir_dp(trans, tgsi_inst, 4, src, i);
264 }
265
266 static struct qreg
267 tgsi_to_qir_abs(struct tgsi_to_qir *trans,
268 struct tgsi_full_instruction *tgsi_inst,
269 enum qop op, struct qreg *src, int i)
270 {
271 struct qcompile *c = trans->c;
272 struct qreg arg = src[0 * 4 + i];
273 return qir_FMAXABS(c, arg, arg);
274 }
275
276 static void
277 emit_tgsi_declaration(struct tgsi_to_qir *trans,
278 struct tgsi_full_declaration *decl)
279 {
280 struct qcompile *c = trans->c;
281
282 switch (decl->Declaration.File) {
283 case TGSI_FILE_INPUT:
284 if (c->stage == QSTAGE_FRAG) {
285 for (int index = decl->Range.First;
286 index <= decl->Range.Last;
287 index++) {
288 for (int i = 0; i < 4; i++) {
289 struct qreg vary = {
290 QFILE_VARY,
291 index * 4 + i
292 };
293
294 /* XXX: multiply by W */
295 trans->inputs[index * 4 + i] =
296 qir_VARY_ADD_C(c,
297 qir_MOV(c,
298 vary));
299
300 trans->num_inputs++;
301 }
302 }
303 }
304 break;
305 }
306 }
307
308 static void
309 emit_tgsi_instruction(struct tgsi_to_qir *trans,
310 struct tgsi_full_instruction *tgsi_inst)
311 {
312 struct qcompile *c = trans->c;
313 struct {
314 enum qop op;
315 struct qreg (*func)(struct tgsi_to_qir *trans,
316 struct tgsi_full_instruction *tgsi_inst,
317 enum qop op,
318 struct qreg *src, int i);
319 } op_trans[] = {
320 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
321 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
322 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
323 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
324 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
325 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
326 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
327 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
328 [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
329 [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
330 [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
331 [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
332 [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu },
333 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
334 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
335 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
336 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
337 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
338 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
339 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
340 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
341 [TGSI_OPCODE_LIT] = { QOP_MOV, tgsi_to_qir_alu }, /* XXX */
342 };
343 static int asdf = 0;
344 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
345
346 if (tgsi_op == TGSI_OPCODE_END)
347 return;
348
349 if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
350 fprintf(stderr, "unknown tgsi inst: ");
351 tgsi_dump_instruction(tgsi_inst, asdf++);
352 fprintf(stderr, "\n");
353 abort();
354 }
355
356 struct qreg src_regs[12];
357 for (int s = 0; s < 3; s++) {
358 for (int i = 0; i < 4; i++) {
359 src_regs[4 * s + i] =
360 get_src(trans, &tgsi_inst->Src[s].Register, i);
361 }
362 }
363
364 for (int i = 0; i < 4; i++) {
365 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
366 continue;
367
368 struct qreg result;
369
370 result = op_trans[tgsi_op].func(trans, tgsi_inst,
371 op_trans[tgsi_op].op,
372 src_regs, i);
373
374 if (tgsi_inst->Instruction.Saturate) {
375 float low = (tgsi_inst->Instruction.Saturate ==
376 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
377 result = qir_FMAX(c,
378 qir_FMIN(c,
379 result,
380 qir_uniform_f(trans, 1.0)),
381 qir_uniform_f(trans, low));
382 }
383
384 update_dst(trans, tgsi_inst, i, result);
385 }
386 }
387
388 static void
389 parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
390 {
391 for (int i = 0; i < 4; i++) {
392 unsigned n = trans->num_consts++;
393 trans->consts[n] = qir_uniform_ui(trans, imm->u[i].Uint);
394 }
395 }
396
397 static void
398 emit_frag_init(struct tgsi_to_qir *trans)
399 {
400 }
401
402 static void
403 emit_vert_init(struct tgsi_to_qir *trans)
404 {
405 struct qcompile *c = trans->c;
406
407 /* XXX: attribute type/size/count */
408 for (int i = 0; i < 4; i++) {
409 trans->inputs[i] = qir_get_temp(c);
410 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
411 c->undef, c->undef));
412 }
413 }
414
415 static void
416 emit_coord_init(struct tgsi_to_qir *trans)
417 {
418 struct qcompile *c = trans->c;
419
420 /* XXX: attribute type/size/count */
421 for (int i = 0; i < 4; i++) {
422 trans->inputs[i] = qir_get_temp(c);
423 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
424 c->undef, c->undef));
425 }
426 }
427
428 static void
429 emit_frag_end(struct tgsi_to_qir *trans)
430 {
431 struct qcompile *c = trans->c;
432
433 struct qreg t = qir_get_temp(c);
434
435 const struct util_format_description *format_desc =
436 util_format_description(trans->fs_key->color_format);
437
438 struct qreg swizzled_outputs[4] = {
439 trans->outputs[format_desc->swizzle[0]],
440 trans->outputs[format_desc->swizzle[1]],
441 trans->outputs[format_desc->swizzle[2]],
442 trans->outputs[format_desc->swizzle[3]],
443 };
444
445 qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
446 swizzled_outputs[0],
447 swizzled_outputs[1],
448 swizzled_outputs[2],
449 swizzled_outputs[3]));
450 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
451 t, c->undef));
452 }
453
454 static void
455 emit_scaled_viewport_write(struct tgsi_to_qir *trans)
456 {
457 struct qcompile *c = trans->c;
458 struct qreg xyi[2];
459
460 for (int i = 0; i < 2; i++) {
461 trans->uniform_contents[trans->num_uniforms] =
462 QUNIFORM_VIEWPORT_X_SCALE + i;
463 struct qreg scale = { QFILE_UNIF, trans->num_uniforms++ };
464
465 xyi[i] = qir_FTOI(c, qir_FMUL(c, trans->outputs[i], scale));
466 }
467
468 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
469 }
470
471 static void
472 emit_zs_write(struct tgsi_to_qir *trans)
473 {
474 struct qcompile *c = trans->c;
475
476 /* XXX: rescale */
477 qir_VPM_WRITE(c, trans->outputs[2]);
478 }
479
480 static void
481 emit_1_wc_write(struct tgsi_to_qir *trans)
482 {
483 struct qcompile *c = trans->c;
484
485 /* XXX: RCP */
486 qir_VPM_WRITE(c, trans->outputs[3]);
487 }
488
489 static void
490 emit_vert_end(struct tgsi_to_qir *trans)
491 {
492 struct qcompile *c = trans->c;
493
494 emit_scaled_viewport_write(trans);
495 emit_zs_write(trans);
496 emit_1_wc_write(trans);
497
498 for (int i = 4; i < trans->num_outputs; i++) {
499 qir_VPM_WRITE(c, trans->outputs[i]);
500 }
501 }
502
503 static void
504 emit_coord_end(struct tgsi_to_qir *trans)
505 {
506 struct qcompile *c = trans->c;
507
508 for (int i = 0; i < 4; i++)
509 qir_VPM_WRITE(c, trans->outputs[i]);
510
511 emit_scaled_viewport_write(trans);
512 emit_zs_write(trans);
513 emit_1_wc_write(trans);
514 }
515
516 static struct tgsi_to_qir *
517 vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
518 struct vc4_key *key)
519 {
520 struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
521 struct qcompile *c;
522 int ret;
523
524 c = qir_compile_init();
525 c->stage = stage;
526
527 memset(trans, 0, sizeof(*trans));
528 /* XXX sizing */
529 trans->temps = calloc(sizeof(struct qreg), 1024);
530 trans->inputs = calloc(sizeof(struct qreg), 8 * 4);
531 trans->outputs = calloc(sizeof(struct qreg), 1024);
532 trans->uniforms = calloc(sizeof(struct qreg), 1024);
533 trans->consts = calloc(sizeof(struct qreg), 1024);
534
535 trans->uniform_data = calloc(sizeof(uint32_t), 1024);
536 trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
537
538 trans->shader_state = key->shader_state;
539 trans->c = c;
540 ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens);
541 assert(ret == TGSI_PARSE_OK);
542
543 if (vc4_debug & VC4_DEBUG_TGSI) {
544 fprintf(stderr, "TGSI:\n");
545 tgsi_dump(trans->shader_state->base.tokens, 0);
546 }
547
548 switch (stage) {
549 case QSTAGE_FRAG:
550 trans->fs_key = (struct vc4_fs_key *)key;
551 emit_frag_init(trans);
552 break;
553 case QSTAGE_VERT:
554 trans->vs_key = (struct vc4_vs_key *)key;
555 emit_vert_init(trans);
556 break;
557 case QSTAGE_COORD:
558 trans->vs_key = (struct vc4_vs_key *)key;
559 emit_coord_init(trans);
560 break;
561 }
562
563 while (!tgsi_parse_end_of_tokens(&trans->parser)) {
564 tgsi_parse_token(&trans->parser);
565
566 switch (trans->parser.FullToken.Token.Type) {
567 case TGSI_TOKEN_TYPE_DECLARATION:
568 emit_tgsi_declaration(trans,
569 &trans->parser.FullToken.FullDeclaration);
570 break;
571
572 case TGSI_TOKEN_TYPE_INSTRUCTION:
573 emit_tgsi_instruction(trans,
574 &trans->parser.FullToken.FullInstruction);
575 break;
576
577 case TGSI_TOKEN_TYPE_IMMEDIATE:
578 parse_tgsi_immediate(trans,
579 &trans->parser.FullToken.FullImmediate);
580 break;
581 }
582 }
583
584 switch (stage) {
585 case QSTAGE_FRAG:
586 emit_frag_end(trans);
587 break;
588 case QSTAGE_VERT:
589 emit_vert_end(trans);
590 break;
591 case QSTAGE_COORD:
592 emit_coord_end(trans);
593 break;
594 }
595
596 if (vc4_debug & VC4_DEBUG_QIR) {
597 fprintf(stderr, "QIR:\n");
598 qir_dump(c);
599 }
600
601 tgsi_parse_free(&trans->parser);
602 free(trans->temps);
603
604 vc4_generate_code(c);
605
606 if (vc4_debug & VC4_DEBUG_SHADERDB) {
607 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
608 qir_get_stage_name(c->stage), c->qpu_inst_count);
609 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
610 qir_get_stage_name(c->stage), trans->num_uniforms);
611 }
612
613 return trans;
614 }
615
616 static void *
617 vc4_shader_state_create(struct pipe_context *pctx,
618 const struct pipe_shader_state *cso)
619 {
620 struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state);
621 if (!so)
622 return NULL;
623
624 so->base.tokens = tgsi_dup_tokens(cso->tokens);
625
626 return so;
627 }
628
629 static void
630 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
631 int shader_index,
632 struct tgsi_to_qir *trans)
633 {
634 int count = trans->num_uniforms;
635 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
636
637 uinfo->count = count;
638 uinfo->data = malloc(count * sizeof(*uinfo->data));
639 memcpy(uinfo->data, trans->uniform_data,
640 count * sizeof(*uinfo->data));
641 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
642 memcpy(uinfo->contents, trans->uniform_contents,
643 count * sizeof(*uinfo->contents));
644 }
645
646 static void
647 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
648 struct vc4_fs_key *key)
649 {
650 struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
651 &key->base);
652 shader->num_inputs = trans->num_inputs;
653 copy_uniform_state_to_shader(shader, 0, trans);
654 shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
655 trans->c->qpu_inst_count * sizeof(uint64_t),
656 "fs_code");
657
658 qir_compile_destroy(trans->c);
659 free(trans);
660 }
661
662 static void
663 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
664 struct vc4_vs_key *key)
665 {
666 struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader,
667 QSTAGE_VERT,
668 &key->base);
669 copy_uniform_state_to_shader(shader, 0, vs_trans);
670
671 struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader,
672 QSTAGE_COORD,
673 &key->base);
674 copy_uniform_state_to_shader(shader, 1, cs_trans);
675
676 uint32_t vs_size = vs_trans->c->qpu_inst_count * sizeof(uint64_t);
677 uint32_t cs_size = cs_trans->c->qpu_inst_count * sizeof(uint64_t);
678 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
679 shader->bo = vc4_bo_alloc(vc4->screen,
680 shader->coord_shader_offset + cs_size,
681 "vs_code");
682
683 void *map = vc4_bo_map(shader->bo);
684 memcpy(map, vs_trans->c->qpu_insts, vs_size);
685 memcpy(map + shader->coord_shader_offset,
686 cs_trans->c->qpu_insts, cs_size);
687
688 qir_compile_destroy(vs_trans->c);
689 qir_compile_destroy(cs_trans->c);
690 }
691
692 static void
693 vc4_update_compiled_fs(struct vc4_context *vc4)
694 {
695 struct vc4_fs_key local_key;
696 struct vc4_fs_key *key = &local_key;
697
698 memset(key, 0, sizeof(*key));
699 key->base.shader_state = vc4->prog.bind_fs;
700
701 if (vc4->framebuffer.cbufs[0])
702 key->color_format = vc4->framebuffer.cbufs[0]->format;
703
704 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
705 if (vc4->prog.fs)
706 return;
707
708 key = malloc(sizeof(*key));
709 memcpy(key, &local_key, sizeof(*key));
710
711 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
712 vc4_fs_compile(vc4, shader, key);
713 util_hash_table_set(vc4->fs_cache, key, shader);
714
715 vc4->prog.fs = shader;
716 }
717
718 static void
719 vc4_update_compiled_vs(struct vc4_context *vc4)
720 {
721 struct vc4_vs_key local_key;
722 struct vc4_vs_key *key = &local_key;
723
724 memset(key, 0, sizeof(*key));
725 key->base.shader_state = vc4->prog.bind_vs;
726
727 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
728 if (vc4->prog.vs)
729 return;
730
731 key = malloc(sizeof(*key));
732 memcpy(key, &local_key, sizeof(*key));
733
734 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
735 vc4_vs_compile(vc4, shader, key);
736 util_hash_table_set(vc4->vs_cache, key, shader);
737
738 vc4->prog.vs = shader;
739 }
740
741 void
742 vc4_update_compiled_shaders(struct vc4_context *vc4)
743 {
744 vc4_update_compiled_fs(vc4);
745 vc4_update_compiled_vs(vc4);
746 }
747
748 static unsigned
749 fs_cache_hash(void *key)
750 {
751 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
752 }
753
754 static unsigned
755 vs_cache_hash(void *key)
756 {
757 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
758 }
759
760 static int
761 fs_cache_compare(void *key1, void *key2)
762 {
763 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
764 }
765
766 static int
767 vs_cache_compare(void *key1, void *key2)
768 {
769 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
770 }
771
772 struct delete_state {
773 struct vc4_context *vc4;
774 struct vc4_shader_state *shader_state;
775 };
776
777 static enum pipe_error
778 fs_delete_from_cache(void *in_key, void *in_value, void *data)
779 {
780 struct delete_state *del = data;
781 struct vc4_fs_key *key = in_key;
782 struct vc4_compiled_shader *shader = in_value;
783
784 if (key->base.shader_state == data) {
785 util_hash_table_remove(del->vc4->fs_cache, key);
786 vc4_bo_unreference(&shader->bo);
787 free(shader);
788 }
789
790 return 0;
791 }
792
793 static enum pipe_error
794 vs_delete_from_cache(void *in_key, void *in_value, void *data)
795 {
796 struct delete_state *del = data;
797 struct vc4_vs_key *key = in_key;
798 struct vc4_compiled_shader *shader = in_value;
799
800 if (key->base.shader_state == data) {
801 util_hash_table_remove(del->vc4->vs_cache, key);
802 vc4_bo_unreference(&shader->bo);
803 free(shader);
804 }
805
806 return 0;
807 }
808
809 static void
810 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
811 {
812 struct vc4_context *vc4 = vc4_context(pctx);
813 struct vc4_shader_state *so = hwcso;
814 struct delete_state del;
815
816 del.vc4 = vc4;
817 del.shader_state = so;
818 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
819 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
820
821 free((void *)so->base.tokens);
822 free(so);
823 }
824
825 void
826 vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
827 struct vc4_constbuf_stateobj *cb,
828 int shader_index, struct vc4_bo **out_bo,
829 uint32_t *out_offset)
830 {
831 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
832 struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
833 MAX2(1, uinfo->count * 4), "ubo");
834 uint32_t *map = vc4_bo_map(ubo);
835
836 for (int i = 0; i < uinfo->count; i++) {
837 switch (uinfo->contents[i]) {
838 case QUNIFORM_CONSTANT:
839 map[i] = uinfo->data[i];
840 break;
841 case QUNIFORM_UNIFORM:
842 map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
843 break;
844 case QUNIFORM_VIEWPORT_X_SCALE:
845 map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
846 break;
847 case QUNIFORM_VIEWPORT_Y_SCALE:
848 map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
849 break;
850 }
851 #if 0
852 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
853 shader, shader_index, i, map[i], uif(map[i]));
854 #endif
855 }
856
857 *out_bo = ubo;
858 *out_offset = 0;
859 }
860
861 static void
862 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
863 {
864 struct vc4_context *vc4 = vc4_context(pctx);
865 vc4->prog.bind_fs = hwcso;
866 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
867 vc4->dirty |= VC4_DIRTY_PROG;
868 }
869
870 static void
871 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
872 {
873 struct vc4_context *vc4 = vc4_context(pctx);
874 vc4->prog.bind_vs = hwcso;
875 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
876 vc4->dirty |= VC4_DIRTY_PROG;
877 }
878
879 void
880 vc4_program_init(struct pipe_context *pctx)
881 {
882 struct vc4_context *vc4 = vc4_context(pctx);
883
884 pctx->create_vs_state = vc4_shader_state_create;
885 pctx->delete_vs_state = vc4_shader_state_delete;
886
887 pctx->create_fs_state = vc4_shader_state_create;
888 pctx->delete_fs_state = vc4_shader_state_delete;
889
890 pctx->bind_fs_state = vc4_fp_state_bind;
891 pctx->bind_vs_state = vc4_vp_state_bind;
892
893 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
894 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
895 }