vc4: Add an initial pass of algebraic optimization.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <inttypes.h>
27 #include "pipe/p_state.h"
28 #include "util/u_format.h"
29 #include "util/u_hash_table.h"
30 #include "util/u_hash.h"
31 #include "util/u_memory.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_dump.h"
34
35 #include "vc4_context.h"
36 #include "vc4_qpu.h"
37 #include "vc4_qir.h"
38
39 struct tgsi_to_qir {
40 struct tgsi_parse_context parser;
41 struct qcompile *c;
42 struct qreg *temps;
43 struct qreg *inputs;
44 struct qreg *outputs;
45 struct qreg *uniforms;
46 struct qreg *consts;
47 uint32_t num_consts;
48
49 struct vc4_shader_state *shader_state;
50 struct vc4_fs_key *fs_key;
51 struct vc4_vs_key *vs_key;
52
53 uint32_t *uniform_data;
54 enum quniform_contents *uniform_contents;
55 uint32_t num_uniforms;
56 uint32_t num_inputs;
57 uint32_t num_outputs;
58 };
59
60 struct vc4_key {
61 struct vc4_shader_state *shader_state;
62 };
63
64 struct vc4_fs_key {
65 struct vc4_key base;
66 enum pipe_format color_format;
67 };
68
69 struct vc4_vs_key {
70 struct vc4_key base;
71 enum pipe_format attr_formats[8];
72 };
73
74 static struct qreg
75 get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform)
76 {
77 struct qcompile *c = trans->c;
78 struct qreg u = { QFILE_UNIF, uniform };
79
80 struct qreg t = qir_MOV(c, u);
81 trans->uniforms[uniform] = t;
82 return t;
83 }
84
85 static struct qreg
86 qir_uniform_ui(struct tgsi_to_qir *trans, uint32_t ui)
87 {
88 for (int i = 0; i < trans->num_uniforms; i++) {
89 if (trans->uniform_contents[i] == QUNIFORM_CONSTANT &&
90 trans->uniform_data[i] == ui)
91 return trans->uniforms[i];
92 }
93
94 trans->uniform_contents[trans->num_uniforms] = QUNIFORM_CONSTANT;
95 trans->uniform_data[trans->num_uniforms] = ui;
96 return get_temp_for_uniform(trans, trans->num_uniforms++);
97 }
98
99 static struct qreg
100 qir_uniform_f(struct tgsi_to_qir *trans, float f)
101 {
102 return qir_uniform_ui(trans, fui(f));
103 }
104
105 static struct qreg
106 qir_uniform(struct tgsi_to_qir *trans, uint32_t index)
107 {
108 for (int i = 0; i < trans->num_uniforms; i++) {
109 if (trans->uniform_contents[i] == QUNIFORM_UNIFORM &&
110 trans->uniform_data[i] == index)
111 return trans->uniforms[i];
112 }
113
114 trans->uniform_contents[trans->num_uniforms] = QUNIFORM_UNIFORM;
115 trans->uniform_data[trans->num_uniforms] = index;
116 return get_temp_for_uniform(trans, trans->num_uniforms++);
117 }
118
119 static struct qreg
120 get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
121 {
122 struct qcompile *c = trans->c;
123 struct qreg r = c->undef;
124
125 uint32_t s = i;
126 switch (i) {
127 case TGSI_SWIZZLE_X:
128 s = src->SwizzleX;
129 break;
130 case TGSI_SWIZZLE_Y:
131 s = src->SwizzleY;
132 break;
133 case TGSI_SWIZZLE_Z:
134 s = src->SwizzleZ;
135 break;
136 case TGSI_SWIZZLE_W:
137 s = src->SwizzleW;
138 break;
139 default:
140 abort();
141 }
142
143 assert(!src->Indirect);
144
145 switch (src->File) {
146 case TGSI_FILE_NULL:
147 return r;
148 case TGSI_FILE_TEMPORARY:
149 r = trans->temps[src->Index * 4 + s];
150 break;
151 case TGSI_FILE_IMMEDIATE:
152 r = trans->consts[src->Index * 4 + s];
153 break;
154 case TGSI_FILE_CONSTANT:
155 r = qir_uniform(trans, src->Index * 4 + s);
156 break;
157 case TGSI_FILE_INPUT:
158 r = trans->inputs[src->Index * 4 + s];
159 break;
160 default:
161 fprintf(stderr, "unknown src file %d\n", src->File);
162 abort();
163 }
164
165 if (src->Absolute)
166 r = qir_FMAXABS(c, r, r);
167
168 if (src->Negate)
169 r = qir_FSUB(c, qir_uniform_f(trans, 0), r);
170
171 return r;
172 };
173
174
175 static void
176 update_dst(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst,
177 int i, struct qreg val)
178 {
179 struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
180
181 assert(!tgsi_dst->Indirect);
182
183 switch (tgsi_dst->File) {
184 case TGSI_FILE_TEMPORARY:
185 trans->temps[tgsi_dst->Index * 4 + i] = val;
186 break;
187 case TGSI_FILE_OUTPUT:
188 trans->outputs[tgsi_dst->Index * 4 + i] = val;
189 trans->num_outputs = MAX2(trans->num_outputs,
190 tgsi_dst->Index * 4 + i + 1);
191 break;
192 default:
193 fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
194 abort();
195 }
196 };
197
198 static struct qreg
199 tgsi_to_qir_alu(struct tgsi_to_qir *trans,
200 struct tgsi_full_instruction *tgsi_inst,
201 enum qop op, struct qreg *src, int i)
202 {
203 struct qcompile *c = trans->c;
204 struct qreg dst = qir_get_temp(c);
205 qir_emit(c, qir_inst4(op, dst,
206 src[0 * 4 + i],
207 src[1 * 4 + i],
208 src[2 * 4 + i],
209 c->undef));
210 return dst;
211 }
212
213 static struct qreg
214 tgsi_to_qir_mad(struct tgsi_to_qir *trans,
215 struct tgsi_full_instruction *tgsi_inst,
216 enum qop op, struct qreg *src, int i)
217 {
218 struct qcompile *c = trans->c;
219 return qir_FADD(c,
220 qir_FMUL(c,
221 src[0 * 4 + i],
222 src[1 * 4 + i]),
223 src[2 * 4 + i]);
224 }
225
226 static struct qreg
227 tgsi_to_qir_dp(struct tgsi_to_qir *trans,
228 struct tgsi_full_instruction *tgsi_inst,
229 int num, struct qreg *src, int i)
230 {
231 struct qcompile *c = trans->c;
232
233 struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
234 for (int j = 1; j < num; j++) {
235 sum = qir_FADD(c, sum, qir_FMUL(c,
236 src[0 * 4 + j],
237 src[1 * 4 + j]));
238 }
239 return sum;
240 }
241
242 static struct qreg
243 tgsi_to_qir_dp2(struct tgsi_to_qir *trans,
244 struct tgsi_full_instruction *tgsi_inst,
245 enum qop op, struct qreg *src, int i)
246 {
247 return tgsi_to_qir_dp(trans, tgsi_inst, 2, src, i);
248 }
249
250 static struct qreg
251 tgsi_to_qir_dp3(struct tgsi_to_qir *trans,
252 struct tgsi_full_instruction *tgsi_inst,
253 enum qop op, struct qreg *src, int i)
254 {
255 return tgsi_to_qir_dp(trans, tgsi_inst, 3, src, i);
256 }
257
258 static struct qreg
259 tgsi_to_qir_dp4(struct tgsi_to_qir *trans,
260 struct tgsi_full_instruction *tgsi_inst,
261 enum qop op, struct qreg *src, int i)
262 {
263 return tgsi_to_qir_dp(trans, tgsi_inst, 4, src, i);
264 }
265
266 static struct qreg
267 tgsi_to_qir_abs(struct tgsi_to_qir *trans,
268 struct tgsi_full_instruction *tgsi_inst,
269 enum qop op, struct qreg *src, int i)
270 {
271 struct qcompile *c = trans->c;
272 struct qreg arg = src[0 * 4 + i];
273 return qir_FMAXABS(c, arg, arg);
274 }
275
276 static void
277 emit_tgsi_declaration(struct tgsi_to_qir *trans,
278 struct tgsi_full_declaration *decl)
279 {
280 struct qcompile *c = trans->c;
281
282 switch (decl->Declaration.File) {
283 case TGSI_FILE_INPUT:
284 if (c->stage == QSTAGE_FRAG) {
285 for (int index = decl->Range.First;
286 index <= decl->Range.Last;
287 index++) {
288 for (int i = 0; i < 4; i++) {
289 struct qreg vary = {
290 QFILE_VARY,
291 index * 4 + i
292 };
293
294 /* XXX: multiply by W */
295 trans->inputs[index * 4 + i] =
296 qir_VARY_ADD_C(c,
297 qir_MOV(c,
298 vary));
299
300 trans->num_inputs++;
301 }
302 }
303 }
304 break;
305 }
306 }
307
308 static void
309 emit_tgsi_instruction(struct tgsi_to_qir *trans,
310 struct tgsi_full_instruction *tgsi_inst)
311 {
312 struct qcompile *c = trans->c;
313 struct {
314 enum qop op;
315 struct qreg (*func)(struct tgsi_to_qir *trans,
316 struct tgsi_full_instruction *tgsi_inst,
317 enum qop op,
318 struct qreg *src, int i);
319 } op_trans[] = {
320 [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
321 [TGSI_OPCODE_ABS] = { 0, tgsi_to_qir_abs },
322 [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
323 [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
324 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
325 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
326 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
327 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
328 [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
329 [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
330 [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
331 [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
332 [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu },
333 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
334 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
335 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
336 [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
337 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_alu },
338 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
339 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_alu },
340 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_alu },
341 [TGSI_OPCODE_LIT] = { QOP_MOV, tgsi_to_qir_alu }, /* XXX */
342 };
343 static int asdf = 0;
344 uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
345
346 if (tgsi_op == TGSI_OPCODE_END)
347 return;
348
349 if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
350 fprintf(stderr, "unknown tgsi inst: ");
351 tgsi_dump_instruction(tgsi_inst, asdf++);
352 fprintf(stderr, "\n");
353 abort();
354 }
355
356 struct qreg src_regs[12];
357 for (int s = 0; s < 3; s++) {
358 for (int i = 0; i < 4; i++) {
359 src_regs[4 * s + i] =
360 get_src(trans, &tgsi_inst->Src[s].Register, i);
361 }
362 }
363
364 for (int i = 0; i < 4; i++) {
365 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
366 continue;
367
368 struct qreg result;
369
370 result = op_trans[tgsi_op].func(trans, tgsi_inst,
371 op_trans[tgsi_op].op,
372 src_regs, i);
373
374 if (tgsi_inst->Instruction.Saturate) {
375 float low = (tgsi_inst->Instruction.Saturate ==
376 TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
377 result = qir_FMAX(c,
378 qir_FMIN(c,
379 result,
380 qir_uniform_f(trans, 1.0)),
381 qir_uniform_f(trans, low));
382 }
383
384 update_dst(trans, tgsi_inst, i, result);
385 }
386 }
387
388 static void
389 parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
390 {
391 for (int i = 0; i < 4; i++) {
392 unsigned n = trans->num_consts++;
393 trans->consts[n] = qir_uniform_ui(trans, imm->u[i].Uint);
394 }
395 }
396
397 static void
398 emit_frag_init(struct tgsi_to_qir *trans)
399 {
400 }
401
402 static void
403 emit_vert_init(struct tgsi_to_qir *trans)
404 {
405 struct qcompile *c = trans->c;
406
407 /* XXX: attribute type/size/count */
408 for (int i = 0; i < 4; i++) {
409 trans->inputs[i] = qir_get_temp(c);
410 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
411 c->undef, c->undef));
412 }
413 }
414
415 static void
416 emit_coord_init(struct tgsi_to_qir *trans)
417 {
418 struct qcompile *c = trans->c;
419
420 /* XXX: attribute type/size/count */
421 for (int i = 0; i < 4; i++) {
422 trans->inputs[i] = qir_get_temp(c);
423 qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
424 c->undef, c->undef));
425 }
426 }
427
428 static void
429 emit_frag_end(struct tgsi_to_qir *trans)
430 {
431 struct qcompile *c = trans->c;
432
433 struct qreg t = qir_get_temp(c);
434
435 const struct util_format_description *format_desc =
436 util_format_description(trans->fs_key->color_format);
437
438 struct qreg swizzled_outputs[4] = {
439 trans->outputs[format_desc->swizzle[0]],
440 trans->outputs[format_desc->swizzle[1]],
441 trans->outputs[format_desc->swizzle[2]],
442 trans->outputs[format_desc->swizzle[3]],
443 };
444
445 qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
446 swizzled_outputs[0],
447 swizzled_outputs[1],
448 swizzled_outputs[2],
449 swizzled_outputs[3]));
450 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
451 t, c->undef));
452 }
453
454 static void
455 emit_scaled_viewport_write(struct tgsi_to_qir *trans)
456 {
457 struct qcompile *c = trans->c;
458 struct qreg xyi[2];
459
460 for (int i = 0; i < 2; i++) {
461 trans->uniform_contents[trans->num_uniforms] =
462 QUNIFORM_VIEWPORT_X_SCALE + i;
463 struct qreg scale = { QFILE_UNIF, trans->num_uniforms++ };
464
465 xyi[i] = qir_FTOI(c, qir_FMUL(c, trans->outputs[i], scale));
466 }
467
468 qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
469 }
470
471 static void
472 emit_zs_write(struct tgsi_to_qir *trans)
473 {
474 struct qcompile *c = trans->c;
475
476 /* XXX: rescale */
477 qir_VPM_WRITE(c, trans->outputs[2]);
478 }
479
480 static void
481 emit_1_wc_write(struct tgsi_to_qir *trans)
482 {
483 struct qcompile *c = trans->c;
484
485 /* XXX: RCP */
486 qir_VPM_WRITE(c, trans->outputs[3]);
487 }
488
489 static void
490 emit_vert_end(struct tgsi_to_qir *trans)
491 {
492 struct qcompile *c = trans->c;
493
494 emit_scaled_viewport_write(trans);
495 emit_zs_write(trans);
496 emit_1_wc_write(trans);
497
498 for (int i = 4; i < trans->num_outputs; i++) {
499 qir_VPM_WRITE(c, trans->outputs[i]);
500 }
501 }
502
503 static void
504 emit_coord_end(struct tgsi_to_qir *trans)
505 {
506 struct qcompile *c = trans->c;
507
508 for (int i = 0; i < 4; i++)
509 qir_VPM_WRITE(c, trans->outputs[i]);
510
511 emit_scaled_viewport_write(trans);
512 emit_zs_write(trans);
513 emit_1_wc_write(trans);
514 }
515
516 static struct tgsi_to_qir *
517 vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
518 struct vc4_key *key)
519 {
520 struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
521 struct qcompile *c;
522 int ret;
523
524 c = qir_compile_init();
525 c->stage = stage;
526
527 memset(trans, 0, sizeof(*trans));
528 /* XXX sizing */
529 trans->temps = calloc(sizeof(struct qreg), 1024);
530 trans->inputs = calloc(sizeof(struct qreg), 8 * 4);
531 trans->outputs = calloc(sizeof(struct qreg), 1024);
532 trans->uniforms = calloc(sizeof(struct qreg), 1024);
533 trans->consts = calloc(sizeof(struct qreg), 1024);
534
535 trans->uniform_data = calloc(sizeof(uint32_t), 1024);
536 trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
537
538 trans->shader_state = key->shader_state;
539 trans->c = c;
540 ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens);
541 assert(ret == TGSI_PARSE_OK);
542
543 if (vc4_debug & VC4_DEBUG_TGSI) {
544 fprintf(stderr, "TGSI:\n");
545 tgsi_dump(trans->shader_state->base.tokens, 0);
546 }
547
548 switch (stage) {
549 case QSTAGE_FRAG:
550 trans->fs_key = (struct vc4_fs_key *)key;
551 emit_frag_init(trans);
552 break;
553 case QSTAGE_VERT:
554 trans->vs_key = (struct vc4_vs_key *)key;
555 emit_vert_init(trans);
556 break;
557 case QSTAGE_COORD:
558 trans->vs_key = (struct vc4_vs_key *)key;
559 emit_coord_init(trans);
560 break;
561 }
562
563 while (!tgsi_parse_end_of_tokens(&trans->parser)) {
564 tgsi_parse_token(&trans->parser);
565
566 switch (trans->parser.FullToken.Token.Type) {
567 case TGSI_TOKEN_TYPE_DECLARATION:
568 emit_tgsi_declaration(trans,
569 &trans->parser.FullToken.FullDeclaration);
570 break;
571
572 case TGSI_TOKEN_TYPE_INSTRUCTION:
573 emit_tgsi_instruction(trans,
574 &trans->parser.FullToken.FullInstruction);
575 break;
576
577 case TGSI_TOKEN_TYPE_IMMEDIATE:
578 parse_tgsi_immediate(trans,
579 &trans->parser.FullToken.FullImmediate);
580 break;
581 }
582 }
583
584 switch (stage) {
585 case QSTAGE_FRAG:
586 emit_frag_end(trans);
587 break;
588 case QSTAGE_VERT:
589 emit_vert_end(trans);
590 break;
591 case QSTAGE_COORD:
592 emit_coord_end(trans);
593 break;
594 }
595
596 tgsi_parse_free(&trans->parser);
597 free(trans->temps);
598
599 qir_optimize(c);
600
601 if (vc4_debug & VC4_DEBUG_QIR) {
602 fprintf(stderr, "QIR:\n");
603 qir_dump(c);
604 }
605 vc4_generate_code(c);
606
607 if (vc4_debug & VC4_DEBUG_SHADERDB) {
608 fprintf(stderr, "SHADER-DB: %s: %d instructions\n",
609 qir_get_stage_name(c->stage), c->qpu_inst_count);
610 fprintf(stderr, "SHADER-DB: %s: %d uniforms\n",
611 qir_get_stage_name(c->stage), trans->num_uniforms);
612 }
613
614 return trans;
615 }
616
617 static void *
618 vc4_shader_state_create(struct pipe_context *pctx,
619 const struct pipe_shader_state *cso)
620 {
621 struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state);
622 if (!so)
623 return NULL;
624
625 so->base.tokens = tgsi_dup_tokens(cso->tokens);
626
627 return so;
628 }
629
630 static void
631 copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
632 int shader_index,
633 struct tgsi_to_qir *trans)
634 {
635 int count = trans->num_uniforms;
636 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
637
638 uinfo->count = count;
639 uinfo->data = malloc(count * sizeof(*uinfo->data));
640 memcpy(uinfo->data, trans->uniform_data,
641 count * sizeof(*uinfo->data));
642 uinfo->contents = malloc(count * sizeof(*uinfo->contents));
643 memcpy(uinfo->contents, trans->uniform_contents,
644 count * sizeof(*uinfo->contents));
645 }
646
647 static void
648 vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
649 struct vc4_fs_key *key)
650 {
651 struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
652 &key->base);
653 shader->num_inputs = trans->num_inputs;
654 copy_uniform_state_to_shader(shader, 0, trans);
655 shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
656 trans->c->qpu_inst_count * sizeof(uint64_t),
657 "fs_code");
658
659 qir_compile_destroy(trans->c);
660 free(trans);
661 }
662
663 static void
664 vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
665 struct vc4_vs_key *key)
666 {
667 struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader,
668 QSTAGE_VERT,
669 &key->base);
670 copy_uniform_state_to_shader(shader, 0, vs_trans);
671
672 struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader,
673 QSTAGE_COORD,
674 &key->base);
675 copy_uniform_state_to_shader(shader, 1, cs_trans);
676
677 uint32_t vs_size = vs_trans->c->qpu_inst_count * sizeof(uint64_t);
678 uint32_t cs_size = cs_trans->c->qpu_inst_count * sizeof(uint64_t);
679 shader->coord_shader_offset = vs_size; /* XXX: alignment? */
680 shader->bo = vc4_bo_alloc(vc4->screen,
681 shader->coord_shader_offset + cs_size,
682 "vs_code");
683
684 void *map = vc4_bo_map(shader->bo);
685 memcpy(map, vs_trans->c->qpu_insts, vs_size);
686 memcpy(map + shader->coord_shader_offset,
687 cs_trans->c->qpu_insts, cs_size);
688
689 qir_compile_destroy(vs_trans->c);
690 qir_compile_destroy(cs_trans->c);
691 }
692
693 static void
694 vc4_update_compiled_fs(struct vc4_context *vc4)
695 {
696 struct vc4_fs_key local_key;
697 struct vc4_fs_key *key = &local_key;
698
699 memset(key, 0, sizeof(*key));
700 key->base.shader_state = vc4->prog.bind_fs;
701
702 if (vc4->framebuffer.cbufs[0])
703 key->color_format = vc4->framebuffer.cbufs[0]->format;
704
705 vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
706 if (vc4->prog.fs)
707 return;
708
709 key = malloc(sizeof(*key));
710 memcpy(key, &local_key, sizeof(*key));
711
712 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
713 vc4_fs_compile(vc4, shader, key);
714 util_hash_table_set(vc4->fs_cache, key, shader);
715
716 vc4->prog.fs = shader;
717 }
718
719 static void
720 vc4_update_compiled_vs(struct vc4_context *vc4)
721 {
722 struct vc4_vs_key local_key;
723 struct vc4_vs_key *key = &local_key;
724
725 memset(key, 0, sizeof(*key));
726 key->base.shader_state = vc4->prog.bind_vs;
727
728 vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
729 if (vc4->prog.vs)
730 return;
731
732 key = malloc(sizeof(*key));
733 memcpy(key, &local_key, sizeof(*key));
734
735 struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
736 vc4_vs_compile(vc4, shader, key);
737 util_hash_table_set(vc4->vs_cache, key, shader);
738
739 vc4->prog.vs = shader;
740 }
741
742 void
743 vc4_update_compiled_shaders(struct vc4_context *vc4)
744 {
745 vc4_update_compiled_fs(vc4);
746 vc4_update_compiled_vs(vc4);
747 }
748
749 static unsigned
750 fs_cache_hash(void *key)
751 {
752 return util_hash_crc32(key, sizeof(struct vc4_fs_key));
753 }
754
755 static unsigned
756 vs_cache_hash(void *key)
757 {
758 return util_hash_crc32(key, sizeof(struct vc4_vs_key));
759 }
760
761 static int
762 fs_cache_compare(void *key1, void *key2)
763 {
764 return memcmp(key1, key2, sizeof(struct vc4_fs_key));
765 }
766
767 static int
768 vs_cache_compare(void *key1, void *key2)
769 {
770 return memcmp(key1, key2, sizeof(struct vc4_vs_key));
771 }
772
773 struct delete_state {
774 struct vc4_context *vc4;
775 struct vc4_shader_state *shader_state;
776 };
777
778 static enum pipe_error
779 fs_delete_from_cache(void *in_key, void *in_value, void *data)
780 {
781 struct delete_state *del = data;
782 struct vc4_fs_key *key = in_key;
783 struct vc4_compiled_shader *shader = in_value;
784
785 if (key->base.shader_state == data) {
786 util_hash_table_remove(del->vc4->fs_cache, key);
787 vc4_bo_unreference(&shader->bo);
788 free(shader);
789 }
790
791 return 0;
792 }
793
794 static enum pipe_error
795 vs_delete_from_cache(void *in_key, void *in_value, void *data)
796 {
797 struct delete_state *del = data;
798 struct vc4_vs_key *key = in_key;
799 struct vc4_compiled_shader *shader = in_value;
800
801 if (key->base.shader_state == data) {
802 util_hash_table_remove(del->vc4->vs_cache, key);
803 vc4_bo_unreference(&shader->bo);
804 free(shader);
805 }
806
807 return 0;
808 }
809
810 static void
811 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
812 {
813 struct vc4_context *vc4 = vc4_context(pctx);
814 struct vc4_shader_state *so = hwcso;
815 struct delete_state del;
816
817 del.vc4 = vc4;
818 del.shader_state = so;
819 util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
820 util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
821
822 free((void *)so->base.tokens);
823 free(so);
824 }
825
826 void
827 vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
828 struct vc4_constbuf_stateobj *cb,
829 int shader_index, struct vc4_bo **out_bo,
830 uint32_t *out_offset)
831 {
832 struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
833 struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
834 MAX2(1, uinfo->count * 4), "ubo");
835 uint32_t *map = vc4_bo_map(ubo);
836
837 for (int i = 0; i < uinfo->count; i++) {
838 switch (uinfo->contents[i]) {
839 case QUNIFORM_CONSTANT:
840 map[i] = uinfo->data[i];
841 break;
842 case QUNIFORM_UNIFORM:
843 map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
844 break;
845 case QUNIFORM_VIEWPORT_X_SCALE:
846 map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
847 break;
848 case QUNIFORM_VIEWPORT_Y_SCALE:
849 map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
850 break;
851 }
852 #if 0
853 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
854 shader, shader_index, i, map[i], uif(map[i]));
855 #endif
856 }
857
858 *out_bo = ubo;
859 *out_offset = 0;
860 }
861
862 static void
863 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
864 {
865 struct vc4_context *vc4 = vc4_context(pctx);
866 vc4->prog.bind_fs = hwcso;
867 vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
868 vc4->dirty |= VC4_DIRTY_PROG;
869 }
870
871 static void
872 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
873 {
874 struct vc4_context *vc4 = vc4_context(pctx);
875 vc4->prog.bind_vs = hwcso;
876 vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
877 vc4->dirty |= VC4_DIRTY_PROG;
878 }
879
880 void
881 vc4_program_init(struct pipe_context *pctx)
882 {
883 struct vc4_context *vc4 = vc4_context(pctx);
884
885 pctx->create_vs_state = vc4_shader_state_create;
886 pctx->delete_vs_state = vc4_shader_state_delete;
887
888 pctx->create_fs_state = vc4_shader_state_create;
889 pctx->delete_fs_state = vc4_shader_state_delete;
890
891 pctx->bind_fs_state = vc4_fp_state_bind;
892 pctx->bind_vs_state = vc4_vp_state_bind;
893
894 vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
895 vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
896 }