2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "pipe/p_state.h"
27 #include "util/u_format.h"
28 #include "util/u_hash_table.h"
29 #include "util/u_hash.h"
30 #include "util/u_memory.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_dump.h"
33 #include "tgsi/tgsi_info.h"
35 #include "vc4_context.h"
38 #ifdef USE_VC4_SIMULATOR
39 #include "simpenrose/simpenrose.h"
43 struct tgsi_parse_context parser
;
48 struct qreg
*uniforms
;
50 struct qreg line_x
, point_x
, point_y
;
55 struct pipe_shader_state
*shader_state
;
57 struct vc4_fs_key
*fs_key
;
58 struct vc4_vs_key
*vs_key
;
60 uint32_t *uniform_data
;
61 enum quniform_contents
*uniform_contents
;
62 uint32_t num_uniforms
;
64 uint32_t num_texture_samples
;
68 struct pipe_shader_state
*shader_state
;
69 enum pipe_format tex_format
[VC4_MAX_TEXTURE_SAMPLERS
];
74 enum pipe_format color_format
;
79 struct pipe_rt_blend_state blend
;
84 enum pipe_format attr_formats
[8];
88 add_uniform(struct tgsi_to_qir
*trans
,
89 enum quniform_contents contents
,
92 uint32_t uniform
= trans
->num_uniforms
++;
93 struct qreg u
= { QFILE_UNIF
, uniform
};
95 trans
->uniform_contents
[uniform
] = contents
;
96 trans
->uniform_data
[uniform
] = data
;
102 get_temp_for_uniform(struct tgsi_to_qir
*trans
, enum quniform_contents contents
,
105 struct qcompile
*c
= trans
->c
;
107 for (int i
= 0; i
< trans
->num_uniforms
; i
++) {
108 if (trans
->uniform_contents
[i
] == contents
&&
109 trans
->uniform_data
[i
] == data
)
110 return trans
->uniforms
[i
];
113 struct qreg u
= add_uniform(trans
, contents
, data
);
114 struct qreg t
= qir_MOV(c
, u
);
116 trans
->uniforms
[u
.index
] = t
;
121 qir_uniform_ui(struct tgsi_to_qir
*trans
, uint32_t ui
)
123 return get_temp_for_uniform(trans
, QUNIFORM_CONSTANT
, ui
);
127 qir_uniform_f(struct tgsi_to_qir
*trans
, float f
)
129 return qir_uniform_ui(trans
, fui(f
));
133 get_src(struct tgsi_to_qir
*trans
, unsigned tgsi_op
,
134 struct tgsi_src_register
*src
, int i
)
136 struct qcompile
*c
= trans
->c
;
137 struct qreg r
= c
->undef
;
157 assert(!src
->Indirect
);
162 case TGSI_FILE_TEMPORARY
:
163 r
= trans
->temps
[src
->Index
* 4 + s
];
165 case TGSI_FILE_IMMEDIATE
:
166 r
= trans
->consts
[src
->Index
* 4 + s
];
168 case TGSI_FILE_CONSTANT
:
169 r
= get_temp_for_uniform(trans
, QUNIFORM_UNIFORM
,
172 case TGSI_FILE_INPUT
:
173 r
= trans
->inputs
[src
->Index
* 4 + s
];
175 case TGSI_FILE_SAMPLER
:
176 case TGSI_FILE_SAMPLER_VIEW
:
180 fprintf(stderr
, "unknown src file %d\n", src
->File
);
185 r
= qir_FMAXABS(c
, r
, r
);
188 switch (tgsi_opcode_infer_src_type(tgsi_op
)) {
189 case TGSI_TYPE_SIGNED
:
190 case TGSI_TYPE_UNSIGNED
:
191 r
= qir_SUB(c
, qir_uniform_ui(trans
, 0), r
);
194 r
= qir_FSUB(c
, qir_uniform_f(trans
, 0.0), r
);
204 update_dst(struct tgsi_to_qir
*trans
, struct tgsi_full_instruction
*tgsi_inst
,
205 int i
, struct qreg val
)
207 struct tgsi_dst_register
*tgsi_dst
= &tgsi_inst
->Dst
[0].Register
;
209 assert(!tgsi_dst
->Indirect
);
211 switch (tgsi_dst
->File
) {
212 case TGSI_FILE_TEMPORARY
:
213 trans
->temps
[tgsi_dst
->Index
* 4 + i
] = val
;
215 case TGSI_FILE_OUTPUT
:
216 trans
->outputs
[tgsi_dst
->Index
* 4 + i
] = val
;
217 trans
->num_outputs
= MAX2(trans
->num_outputs
,
218 tgsi_dst
->Index
* 4 + i
+ 1);
221 fprintf(stderr
, "unknown dst file %d\n", tgsi_dst
->File
);
227 get_swizzled_channel(struct tgsi_to_qir
*trans
,
228 struct qreg
*srcs
, int swiz
)
232 case UTIL_FORMAT_SWIZZLE_NONE
:
233 fprintf(stderr
, "warning: unknown swizzle\n");
235 case UTIL_FORMAT_SWIZZLE_0
:
236 return qir_uniform_f(trans
, 0.0);
237 case UTIL_FORMAT_SWIZZLE_1
:
238 return qir_uniform_f(trans
, 1.0);
239 case UTIL_FORMAT_SWIZZLE_X
:
240 case UTIL_FORMAT_SWIZZLE_Y
:
241 case UTIL_FORMAT_SWIZZLE_Z
:
242 case UTIL_FORMAT_SWIZZLE_W
:
248 tgsi_to_qir_alu(struct tgsi_to_qir
*trans
,
249 struct tgsi_full_instruction
*tgsi_inst
,
250 enum qop op
, struct qreg
*src
, int i
)
252 struct qcompile
*c
= trans
->c
;
253 struct qreg dst
= qir_get_temp(c
);
254 qir_emit(c
, qir_inst4(op
, dst
,
263 tgsi_to_qir_umul(struct tgsi_to_qir
*trans
,
264 struct tgsi_full_instruction
*tgsi_inst
,
265 enum qop op
, struct qreg
*src
, int i
)
267 struct qcompile
*c
= trans
->c
;
269 struct qreg src0_hi
= qir_SHR(c
, src
[0 * 4 + i
],
270 qir_uniform_ui(trans
, 16));
271 struct qreg src0_lo
= qir_AND(c
, src
[0 * 4 + i
],
272 qir_uniform_ui(trans
, 0xffff));
273 struct qreg src1_hi
= qir_SHR(c
, src
[1 * 4 + i
],
274 qir_uniform_ui(trans
, 16));
275 struct qreg src1_lo
= qir_AND(c
, src
[1 * 4 + i
],
276 qir_uniform_ui(trans
, 0xffff));
278 struct qreg hilo
= qir_MUL24(c
, src0_hi
, src1_lo
);
279 struct qreg lohi
= qir_MUL24(c
, src0_lo
, src1_hi
);
280 struct qreg lolo
= qir_MUL24(c
, src0_lo
, src1_lo
);
282 return qir_ADD(c
, lolo
, qir_SHL(c
,
283 qir_ADD(c
, hilo
, lohi
),
284 qir_uniform_ui(trans
, 16)));
288 tgsi_to_qir_idiv(struct tgsi_to_qir
*trans
,
289 struct tgsi_full_instruction
*tgsi_inst
,
290 enum qop op
, struct qreg
*src
, int i
)
292 struct qcompile
*c
= trans
->c
;
293 return qir_FTOI(c
, qir_FMUL(c
,
294 qir_ITOF(c
, src
[0 * 4 + i
]),
295 qir_RCP(c
, qir_ITOF(c
, src
[1 * 4 + i
]))));
299 tgsi_to_qir_ineg(struct tgsi_to_qir
*trans
,
300 struct tgsi_full_instruction
*tgsi_inst
,
301 enum qop op
, struct qreg
*src
, int i
)
303 struct qcompile
*c
= trans
->c
;
304 return qir_SUB(c
, qir_uniform_ui(trans
, 0), src
[0 * 4 + i
]);
308 tgsi_to_qir_seq(struct tgsi_to_qir
*trans
,
309 struct tgsi_full_instruction
*tgsi_inst
,
310 enum qop op
, struct qreg
*src
, int i
)
312 struct qcompile
*c
= trans
->c
;
313 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
314 return qir_SEL_X_0_ZS(c
, qir_uniform_f(trans
, 1.0));
318 tgsi_to_qir_sne(struct tgsi_to_qir
*trans
,
319 struct tgsi_full_instruction
*tgsi_inst
,
320 enum qop op
, struct qreg
*src
, int i
)
322 struct qcompile
*c
= trans
->c
;
323 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
324 return qir_SEL_X_0_ZC(c
, qir_uniform_f(trans
, 1.0));
328 tgsi_to_qir_slt(struct tgsi_to_qir
*trans
,
329 struct tgsi_full_instruction
*tgsi_inst
,
330 enum qop op
, struct qreg
*src
, int i
)
332 struct qcompile
*c
= trans
->c
;
333 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
334 return qir_SEL_X_0_NS(c
, qir_uniform_f(trans
, 1.0));
338 tgsi_to_qir_sge(struct tgsi_to_qir
*trans
,
339 struct tgsi_full_instruction
*tgsi_inst
,
340 enum qop op
, struct qreg
*src
, int i
)
342 struct qcompile
*c
= trans
->c
;
343 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
344 return qir_SEL_X_0_NC(c
, qir_uniform_f(trans
, 1.0));
348 tgsi_to_qir_fseq(struct tgsi_to_qir
*trans
,
349 struct tgsi_full_instruction
*tgsi_inst
,
350 enum qop op
, struct qreg
*src
, int i
)
352 struct qcompile
*c
= trans
->c
;
353 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
354 return qir_SEL_X_0_ZS(c
, qir_uniform_ui(trans
, ~0));
358 tgsi_to_qir_fsne(struct tgsi_to_qir
*trans
,
359 struct tgsi_full_instruction
*tgsi_inst
,
360 enum qop op
, struct qreg
*src
, int i
)
362 struct qcompile
*c
= trans
->c
;
363 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
364 return qir_SEL_X_0_ZC(c
, qir_uniform_ui(trans
, ~0));
368 tgsi_to_qir_fslt(struct tgsi_to_qir
*trans
,
369 struct tgsi_full_instruction
*tgsi_inst
,
370 enum qop op
, struct qreg
*src
, int i
)
372 struct qcompile
*c
= trans
->c
;
373 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
374 return qir_SEL_X_0_NS(c
, qir_uniform_ui(trans
, ~0));
378 tgsi_to_qir_fsge(struct tgsi_to_qir
*trans
,
379 struct tgsi_full_instruction
*tgsi_inst
,
380 enum qop op
, struct qreg
*src
, int i
)
382 struct qcompile
*c
= trans
->c
;
383 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
384 return qir_SEL_X_0_NC(c
, qir_uniform_ui(trans
, ~0));
388 tgsi_to_qir_useq(struct tgsi_to_qir
*trans
,
389 struct tgsi_full_instruction
*tgsi_inst
,
390 enum qop op
, struct qreg
*src
, int i
)
392 struct qcompile
*c
= trans
->c
;
393 qir_SF(c
, qir_SUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
394 return qir_SEL_X_0_ZS(c
, qir_uniform_ui(trans
, ~0));
398 tgsi_to_qir_usne(struct tgsi_to_qir
*trans
,
399 struct tgsi_full_instruction
*tgsi_inst
,
400 enum qop op
, struct qreg
*src
, int i
)
402 struct qcompile
*c
= trans
->c
;
403 qir_SF(c
, qir_SUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
404 return qir_SEL_X_0_ZC(c
, qir_uniform_ui(trans
, ~0));
408 tgsi_to_qir_islt(struct tgsi_to_qir
*trans
,
409 struct tgsi_full_instruction
*tgsi_inst
,
410 enum qop op
, struct qreg
*src
, int i
)
412 struct qcompile
*c
= trans
->c
;
413 qir_SF(c
, qir_SUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
414 return qir_SEL_X_0_NS(c
, qir_uniform_ui(trans
, ~0));
418 tgsi_to_qir_isge(struct tgsi_to_qir
*trans
,
419 struct tgsi_full_instruction
*tgsi_inst
,
420 enum qop op
, struct qreg
*src
, int i
)
422 struct qcompile
*c
= trans
->c
;
423 qir_SF(c
, qir_SUB(c
, src
[0 * 4 + i
], src
[1 * 4 + i
]));
424 return qir_SEL_X_0_NC(c
, qir_uniform_ui(trans
, ~0));
428 tgsi_to_qir_cmp(struct tgsi_to_qir
*trans
,
429 struct tgsi_full_instruction
*tgsi_inst
,
430 enum qop op
, struct qreg
*src
, int i
)
432 struct qcompile
*c
= trans
->c
;
433 qir_SF(c
, src
[0 * 4 + i
]);
434 return qir_SEL_X_Y_NS(c
,
440 tgsi_to_qir_mad(struct tgsi_to_qir
*trans
,
441 struct tgsi_full_instruction
*tgsi_inst
,
442 enum qop op
, struct qreg
*src
, int i
)
444 struct qcompile
*c
= trans
->c
;
453 tgsi_to_qir_lit(struct tgsi_to_qir
*trans
,
454 struct tgsi_full_instruction
*tgsi_inst
,
455 enum qop op
, struct qreg
*src
, int i
)
457 struct qcompile
*c
= trans
->c
;
458 struct qreg x
= src
[0 * 4 + 0];
459 struct qreg y
= src
[0 * 4 + 1];
460 struct qreg w
= src
[0 * 4 + 3];
465 return qir_uniform_f(trans
, 1.0);
467 return qir_FMAX(c
, src
[0 * 4 + 0], qir_uniform_f(trans
, 0.0));
469 struct qreg zero
= qir_uniform_f(trans
, 0.0);
472 /* XXX: Clamp w to -128..128 */
473 return qir_SEL_X_0_NC(c
,
474 qir_EXP2(c
, qir_FMUL(c
,
482 assert(!"not reached");
488 tgsi_to_qir_lrp(struct tgsi_to_qir
*trans
,
489 struct tgsi_full_instruction
*tgsi_inst
,
490 enum qop op
, struct qreg
*src
, int i
)
492 struct qcompile
*c
= trans
->c
;
493 struct qreg src0
= src
[0 * 4 + i
];
494 struct qreg src1
= src
[1 * 4 + i
];
495 struct qreg src2
= src
[2 * 4 + i
];
498 * src0 * src1 + (1 - src0) * src2.
499 * -> src0 * src1 + src2 - src0 * src2
500 * -> src2 + src0 * (src1 - src2)
502 return qir_FADD(c
, src2
, qir_FMUL(c
, src0
, qir_FSUB(c
, src1
, src2
)));
507 tgsi_to_qir_tex(struct tgsi_to_qir
*trans
,
508 struct tgsi_full_instruction
*tgsi_inst
,
509 enum qop op
, struct qreg
*src
)
511 struct qcompile
*c
= trans
->c
;
513 assert(!tgsi_inst
->Instruction
.Saturate
);
515 struct qreg s
= src
[0 * 4 + 0];
516 struct qreg t
= src
[0 * 4 + 1];
517 uint32_t unit
= tgsi_inst
->Src
[1].Register
.Index
;
519 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
520 struct qreg proj
= qir_RCP(c
, src
[0 * 4 + 3]);
521 s
= qir_FMUL(c
, s
, proj
);
522 t
= qir_FMUL(c
, t
, proj
);
525 /* There is no native support for GL texture rectangle coordinates, so
526 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
529 if (tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_RECT
) {
531 get_temp_for_uniform(trans
,
532 QUNIFORM_TEXRECT_SCALE_X
,
535 get_temp_for_uniform(trans
,
536 QUNIFORM_TEXRECT_SCALE_Y
,
540 qir_TEX_T(c
, t
, add_uniform(trans
, QUNIFORM_TEXTURE_CONFIG_P0
,
543 struct qreg sampler_p1
= add_uniform(trans
, QUNIFORM_TEXTURE_CONFIG_P1
,
545 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
) {
546 qir_TEX_B(c
, src
[0 * 4 + 3], sampler_p1
);
547 qir_TEX_S(c
, s
, add_uniform(trans
, QUNIFORM_CONSTANT
, 0));
549 qir_TEX_S(c
, s
, sampler_p1
);
552 trans
->num_texture_samples
++;
553 qir_emit(c
, qir_inst(QOP_TEX_RESULT
, c
->undef
, c
->undef
, c
->undef
));
555 struct qreg unpacked
[4];
556 for (int i
= 0; i
< 4; i
++)
557 unpacked
[i
] = qir_R4_UNPACK(c
, i
);
559 enum pipe_format format
= trans
->key
->tex_format
[unit
];
560 const uint8_t *swiz
= vc4_get_format_swizzle(format
);
561 for (int i
= 0; i
< 4; i
++) {
562 if (!(tgsi_inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
565 update_dst(trans
, tgsi_inst
, i
,
566 get_swizzled_channel(trans
, unpacked
, swiz
[i
]));
571 tgsi_to_qir_pow(struct tgsi_to_qir
*trans
,
572 struct tgsi_full_instruction
*tgsi_inst
,
573 enum qop op
, struct qreg
*src
, int i
)
575 struct qcompile
*c
= trans
->c
;
577 /* Note that this instruction replicates its result from the x channel
579 return qir_EXP2(c
, qir_FMUL(c
,
581 qir_LOG2(c
, src
[0 * 4 + 0])));
585 tgsi_to_qir_trunc(struct tgsi_to_qir
*trans
,
586 struct tgsi_full_instruction
*tgsi_inst
,
587 enum qop op
, struct qreg
*src
, int i
)
589 struct qcompile
*c
= trans
->c
;
590 return qir_ITOF(c
, qir_FTOI(c
, src
[0 * 4 + i
]));
594 * Computes x - floor(x), which is tricky because our FTOI truncates (rounds
598 tgsi_to_qir_frc(struct tgsi_to_qir
*trans
,
599 struct tgsi_full_instruction
*tgsi_inst
,
600 enum qop op
, struct qreg
*src
, int i
)
602 struct qcompile
*c
= trans
->c
;
603 struct qreg trunc
= qir_ITOF(c
, qir_FTOI(c
, src
[0 * 4 + i
]));
604 struct qreg diff
= qir_FSUB(c
, src
[0 * 4 + i
], trunc
);
606 return qir_SEL_X_Y_NS(c
,
607 qir_FADD(c
, diff
, qir_uniform_f(trans
, 1.0)),
612 * Computes floor(x), which is tricky because our FTOI truncates (rounds to
616 tgsi_to_qir_flr(struct tgsi_to_qir
*trans
,
617 struct tgsi_full_instruction
*tgsi_inst
,
618 enum qop op
, struct qreg
*src
, int i
)
620 struct qcompile
*c
= trans
->c
;
621 struct qreg trunc
= qir_ITOF(c
, qir_FTOI(c
, src
[0 * 4 + i
]));
623 /* This will be < 0 if we truncated and the truncation was of a value
624 * that was < 0 in the first place.
626 qir_SF(c
, qir_FSUB(c
, src
[0 * 4 + i
], trunc
));
628 return qir_SEL_X_Y_NS(c
,
629 qir_FSUB(c
, trunc
, qir_uniform_f(trans
, 1.0)),
634 tgsi_to_qir_dp(struct tgsi_to_qir
*trans
,
635 struct tgsi_full_instruction
*tgsi_inst
,
636 int num
, struct qreg
*src
, int i
)
638 struct qcompile
*c
= trans
->c
;
640 struct qreg sum
= qir_FMUL(c
, src
[0 * 4 + 0], src
[1 * 4 + 0]);
641 for (int j
= 1; j
< num
; j
++) {
642 sum
= qir_FADD(c
, sum
, qir_FMUL(c
,
650 tgsi_to_qir_dp2(struct tgsi_to_qir
*trans
,
651 struct tgsi_full_instruction
*tgsi_inst
,
652 enum qop op
, struct qreg
*src
, int i
)
654 return tgsi_to_qir_dp(trans
, tgsi_inst
, 2, src
, i
);
658 tgsi_to_qir_dp3(struct tgsi_to_qir
*trans
,
659 struct tgsi_full_instruction
*tgsi_inst
,
660 enum qop op
, struct qreg
*src
, int i
)
662 return tgsi_to_qir_dp(trans
, tgsi_inst
, 3, src
, i
);
666 tgsi_to_qir_dp4(struct tgsi_to_qir
*trans
,
667 struct tgsi_full_instruction
*tgsi_inst
,
668 enum qop op
, struct qreg
*src
, int i
)
670 return tgsi_to_qir_dp(trans
, tgsi_inst
, 4, src
, i
);
674 tgsi_to_qir_abs(struct tgsi_to_qir
*trans
,
675 struct tgsi_full_instruction
*tgsi_inst
,
676 enum qop op
, struct qreg
*src
, int i
)
678 struct qcompile
*c
= trans
->c
;
679 struct qreg arg
= src
[0 * 4 + i
];
680 return qir_FMAXABS(c
, arg
, arg
);
683 /* Note that this instruction replicates its result from the x channel */
685 tgsi_to_qir_sin(struct tgsi_to_qir
*trans
,
686 struct tgsi_full_instruction
*tgsi_inst
,
687 enum qop op
, struct qreg
*src
, int i
)
689 struct qcompile
*c
= trans
->c
;
692 -pow(2.0 * M_PI
, 3) / (3 * 2 * 1),
693 pow(2.0 * M_PI
, 5) / (5 * 4 * 3 * 2 * 1),
694 -pow(2.0 * M_PI
, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
697 struct qreg scaled_x
=
700 qir_uniform_f(trans
, 1.0f
/ (M_PI
* 2.0f
)));
703 struct qreg x
= tgsi_to_qir_frc(trans
, NULL
, 0, &scaled_x
, 0);
704 struct qreg x2
= qir_FMUL(c
, x
, x
);
705 struct qreg sum
= qir_FMUL(c
, x
, qir_uniform_f(trans
, coeff
[0]));
706 for (int i
= 1; i
< ARRAY_SIZE(coeff
); i
++) {
707 x
= qir_FMUL(c
, x
, x2
);
712 qir_uniform_f(trans
, coeff
[i
])));
717 /* Note that this instruction replicates its result from the x channel */
719 tgsi_to_qir_cos(struct tgsi_to_qir
*trans
,
720 struct tgsi_full_instruction
*tgsi_inst
,
721 enum qop op
, struct qreg
*src
, int i
)
723 struct qcompile
*c
= trans
->c
;
726 -pow(2.0 * M_PI
, 2) / (2 * 1),
727 pow(2.0 * M_PI
, 4) / (4 * 3 * 2 * 1),
728 -pow(2.0 * M_PI
, 6) / (6 * 5 * 4 * 3 * 2 * 1),
731 struct qreg scaled_x
=
732 qir_FMUL(c
, src
[0 * 4 + 0],
733 qir_uniform_f(trans
, 1.0f
/ (M_PI
* 2.0f
)));
734 struct qreg x_frac
= tgsi_to_qir_frc(trans
, NULL
, 0, &scaled_x
, 0);
736 struct qreg sum
= qir_uniform_f(trans
, coeff
[0]);
737 struct qreg x2
= qir_FMUL(c
, x_frac
, x_frac
);
738 struct qreg x
= x2
; /* Current x^2, x^4, or x^6 */
739 for (int i
= 1; i
< ARRAY_SIZE(coeff
); i
++) {
741 x
= qir_FMUL(c
, x
, x2
);
743 struct qreg mul
= qir_FMUL(c
,
745 qir_uniform_f(trans
, coeff
[i
]));
749 sum
= qir_FADD(c
, sum
, mul
);
755 emit_vertex_input(struct tgsi_to_qir
*trans
, int attr
)
757 enum pipe_format format
= trans
->vs_key
->attr_formats
[attr
];
758 struct qcompile
*c
= trans
->c
;
759 struct qreg vpm_reads
[4];
761 /* Right now, we're setting the VPM offsets to be 16 bytes wide every
762 * time, so we always read 4 32-bit VPM entries.
764 for (int i
= 0; i
< 4; i
++) {
765 vpm_reads
[i
] = qir_get_temp(c
);
766 qir_emit(c
, qir_inst(QOP_VPM_READ
,
773 bool format_warned
= false;
774 const struct util_format_description
*desc
=
775 util_format_description(format
);
777 for (int i
= 0; i
< 4; i
++) {
778 uint8_t swiz
= desc
->swizzle
[i
];
780 if (swiz
<= UTIL_FORMAT_SWIZZLE_W
&&
782 (desc
->channel
[swiz
].type
!= UTIL_FORMAT_TYPE_FLOAT
||
783 desc
->channel
[swiz
].size
!= 32)) {
785 "vtx element %d unsupported type: %s\n",
786 attr
, util_format_name(format
));
787 format_warned
= true;
790 trans
->inputs
[attr
* 4 + i
] =
791 get_swizzled_channel(trans
, vpm_reads
, swiz
);
796 tgsi_to_qir_kill_if(struct tgsi_to_qir
*trans
, struct qreg
*src
, int i
)
798 struct qcompile
*c
= trans
->c
;
800 if (trans
->discard
.file
== QFILE_NULL
)
801 trans
->discard
= qir_uniform_f(trans
, 0.0);
802 qir_SF(c
, src
[0 * 4 + i
]);
803 trans
->discard
= qir_SEL_X_Y_NS(c
,
804 qir_uniform_f(trans
, 1.0),
809 emit_fragcoord_input(struct tgsi_to_qir
*trans
, int attr
)
811 struct qcompile
*c
= trans
->c
;
813 trans
->inputs
[attr
* 4 + 0] = qir_FRAG_X(c
);
814 trans
->inputs
[attr
* 4 + 1] = qir_FRAG_Y(c
);
815 trans
->inputs
[attr
* 4 + 2] =
818 qir_uniform_f(trans
, 1.0 / 0xffffff));
819 trans
->inputs
[attr
* 4 + 3] = qir_FRAG_RCP_W(c
);
823 emit_fragment_varying(struct tgsi_to_qir
*trans
, int index
)
825 struct qcompile
*c
= trans
->c
;
832 /* XXX: multiply by W */
833 return qir_VARY_ADD_C(c
, qir_MOV(c
, vary
));
837 emit_fragment_input(struct tgsi_to_qir
*trans
, int attr
)
839 struct qcompile
*c
= trans
->c
;
841 for (int i
= 0; i
< 4; i
++) {
842 trans
->inputs
[attr
* 4 + i
] =
843 emit_fragment_varying(trans
, attr
* 4 + i
);
849 emit_tgsi_declaration(struct tgsi_to_qir
*trans
,
850 struct tgsi_full_declaration
*decl
)
852 struct qcompile
*c
= trans
->c
;
854 switch (decl
->Declaration
.File
) {
855 case TGSI_FILE_INPUT
:
856 for (int i
= decl
->Range
.First
;
857 i
<= decl
->Range
.Last
;
859 if (c
->stage
== QSTAGE_FRAG
) {
860 if (decl
->Semantic
.Name
==
861 TGSI_SEMANTIC_POSITION
) {
862 emit_fragcoord_input(trans
, i
);
864 emit_fragment_input(trans
, i
);
867 emit_vertex_input(trans
, i
);
875 emit_tgsi_instruction(struct tgsi_to_qir
*trans
,
876 struct tgsi_full_instruction
*tgsi_inst
)
878 struct qcompile
*c
= trans
->c
;
881 struct qreg (*func
)(struct tgsi_to_qir
*trans
,
882 struct tgsi_full_instruction
*tgsi_inst
,
884 struct qreg
*src
, int i
);
886 [TGSI_OPCODE_MOV
] = { QOP_MOV
, tgsi_to_qir_alu
},
887 [TGSI_OPCODE_ABS
] = { 0, tgsi_to_qir_abs
},
888 [TGSI_OPCODE_MUL
] = { QOP_FMUL
, tgsi_to_qir_alu
},
889 [TGSI_OPCODE_ADD
] = { QOP_FADD
, tgsi_to_qir_alu
},
890 [TGSI_OPCODE_SUB
] = { QOP_FSUB
, tgsi_to_qir_alu
},
891 [TGSI_OPCODE_MIN
] = { QOP_FMIN
, tgsi_to_qir_alu
},
892 [TGSI_OPCODE_MAX
] = { QOP_FMAX
, tgsi_to_qir_alu
},
893 [TGSI_OPCODE_F2I
] = { QOP_FTOI
, tgsi_to_qir_alu
},
894 [TGSI_OPCODE_I2F
] = { QOP_ITOF
, tgsi_to_qir_alu
},
895 [TGSI_OPCODE_UADD
] = { QOP_ADD
, tgsi_to_qir_alu
},
896 [TGSI_OPCODE_USHR
] = { QOP_SHR
, tgsi_to_qir_alu
},
897 [TGSI_OPCODE_ISHR
] = { QOP_ASR
, tgsi_to_qir_alu
},
898 [TGSI_OPCODE_SHL
] = { QOP_SHL
, tgsi_to_qir_alu
},
899 [TGSI_OPCODE_IMIN
] = { QOP_MIN
, tgsi_to_qir_alu
},
900 [TGSI_OPCODE_IMAX
] = { QOP_MAX
, tgsi_to_qir_alu
},
901 [TGSI_OPCODE_AND
] = { QOP_AND
, tgsi_to_qir_alu
},
902 [TGSI_OPCODE_OR
] = { QOP_OR
, tgsi_to_qir_alu
},
903 [TGSI_OPCODE_XOR
] = { QOP_XOR
, tgsi_to_qir_alu
},
904 [TGSI_OPCODE_NOT
] = { QOP_NOT
, tgsi_to_qir_alu
},
906 [TGSI_OPCODE_UMUL
] = { 0, tgsi_to_qir_umul
},
907 [TGSI_OPCODE_IDIV
] = { 0, tgsi_to_qir_idiv
},
908 [TGSI_OPCODE_INEG
] = { 0, tgsi_to_qir_ineg
},
910 [TGSI_OPCODE_RSQ
] = { QOP_RSQ
, tgsi_to_qir_alu
},
911 [TGSI_OPCODE_SEQ
] = { 0, tgsi_to_qir_seq
},
912 [TGSI_OPCODE_SNE
] = { 0, tgsi_to_qir_sne
},
913 [TGSI_OPCODE_SGE
] = { 0, tgsi_to_qir_sge
},
914 [TGSI_OPCODE_SLT
] = { 0, tgsi_to_qir_slt
},
915 [TGSI_OPCODE_FSEQ
] = { 0, tgsi_to_qir_fseq
},
916 [TGSI_OPCODE_FSNE
] = { 0, tgsi_to_qir_fsne
},
917 [TGSI_OPCODE_FSGE
] = { 0, tgsi_to_qir_fsge
},
918 [TGSI_OPCODE_FSLT
] = { 0, tgsi_to_qir_fslt
},
919 [TGSI_OPCODE_USEQ
] = { 0, tgsi_to_qir_useq
},
920 [TGSI_OPCODE_USNE
] = { 0, tgsi_to_qir_usne
},
921 [TGSI_OPCODE_ISGE
] = { 0, tgsi_to_qir_isge
},
922 [TGSI_OPCODE_ISLT
] = { 0, tgsi_to_qir_islt
},
924 [TGSI_OPCODE_CMP
] = { 0, tgsi_to_qir_cmp
},
925 [TGSI_OPCODE_MAD
] = { 0, tgsi_to_qir_mad
},
926 [TGSI_OPCODE_DP2
] = { 0, tgsi_to_qir_dp2
},
927 [TGSI_OPCODE_DP3
] = { 0, tgsi_to_qir_dp3
},
928 [TGSI_OPCODE_DP4
] = { 0, tgsi_to_qir_dp4
},
929 [TGSI_OPCODE_RCP
] = { QOP_RCP
, tgsi_to_qir_alu
},
930 [TGSI_OPCODE_RSQ
] = { QOP_RSQ
, tgsi_to_qir_alu
},
931 [TGSI_OPCODE_EX2
] = { QOP_EXP2
, tgsi_to_qir_alu
},
932 [TGSI_OPCODE_LG2
] = { QOP_LOG2
, tgsi_to_qir_alu
},
933 [TGSI_OPCODE_LIT
] = { 0, tgsi_to_qir_lit
},
934 [TGSI_OPCODE_LRP
] = { 0, tgsi_to_qir_lrp
},
935 [TGSI_OPCODE_POW
] = { 0, tgsi_to_qir_pow
},
936 [TGSI_OPCODE_TRUNC
] = { 0, tgsi_to_qir_trunc
},
937 [TGSI_OPCODE_FRC
] = { 0, tgsi_to_qir_frc
},
938 [TGSI_OPCODE_FLR
] = { 0, tgsi_to_qir_flr
},
939 [TGSI_OPCODE_SIN
] = { 0, tgsi_to_qir_sin
},
940 [TGSI_OPCODE_COS
] = { 0, tgsi_to_qir_cos
},
943 uint32_t tgsi_op
= tgsi_inst
->Instruction
.Opcode
;
945 if (tgsi_op
== TGSI_OPCODE_END
)
948 struct qreg src_regs
[12];
949 for (int s
= 0; s
< 3; s
++) {
950 for (int i
= 0; i
< 4; i
++) {
951 src_regs
[4 * s
+ i
] =
952 get_src(trans
, tgsi_inst
->Instruction
.Opcode
,
953 &tgsi_inst
->Src
[s
].Register
, i
);
958 case TGSI_OPCODE_TEX
:
959 case TGSI_OPCODE_TXP
:
960 case TGSI_OPCODE_TXB
:
961 tgsi_to_qir_tex(trans
, tgsi_inst
,
962 op_trans
[tgsi_op
].op
, src_regs
);
964 case TGSI_OPCODE_KILL
:
965 trans
->discard
= qir_uniform_f(trans
, 1.0);
967 case TGSI_OPCODE_KILL_IF
:
968 for (int i
= 0; i
< 4; i
++)
969 tgsi_to_qir_kill_if(trans
, src_regs
, i
);
975 if (tgsi_op
> ARRAY_SIZE(op_trans
) || !(op_trans
[tgsi_op
].func
)) {
976 fprintf(stderr
, "unknown tgsi inst: ");
977 tgsi_dump_instruction(tgsi_inst
, asdf
++);
978 fprintf(stderr
, "\n");
982 for (int i
= 0; i
< 4; i
++) {
983 if (!(tgsi_inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
988 result
= op_trans
[tgsi_op
].func(trans
, tgsi_inst
,
989 op_trans
[tgsi_op
].op
,
992 if (tgsi_inst
->Instruction
.Saturate
) {
993 float low
= (tgsi_inst
->Instruction
.Saturate
==
994 TGSI_SAT_MINUS_PLUS_ONE
? -1.0 : 0.0);
998 qir_uniform_f(trans
, 1.0)),
999 qir_uniform_f(trans
, low
));
1002 update_dst(trans
, tgsi_inst
, i
, result
);
1007 parse_tgsi_immediate(struct tgsi_to_qir
*trans
, struct tgsi_full_immediate
*imm
)
1009 for (int i
= 0; i
< 4; i
++) {
1010 unsigned n
= trans
->num_consts
++;
1011 trans
->consts
[n
] = qir_uniform_ui(trans
, imm
->u
[i
].Uint
);
1016 vc4_blend_channel(struct tgsi_to_qir
*trans
,
1023 struct qcompile
*c
= trans
->c
;
1026 case PIPE_BLENDFACTOR_ONE
:
1028 case PIPE_BLENDFACTOR_SRC_COLOR
:
1029 return qir_FMUL(c
, val
, src
[channel
]);
1030 case PIPE_BLENDFACTOR_SRC_ALPHA
:
1031 return qir_FMUL(c
, val
, src
[3]);
1032 case PIPE_BLENDFACTOR_DST_ALPHA
:
1033 return qir_FMUL(c
, val
, dst
[3]);
1034 case PIPE_BLENDFACTOR_DST_COLOR
:
1035 return qir_FMUL(c
, val
, dst
[channel
]);
1036 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1037 return qir_FMIN(c
, src
[3], qir_FSUB(c
,
1038 qir_uniform_f(trans
, 1.0),
1040 case PIPE_BLENDFACTOR_CONST_COLOR
:
1041 return qir_FMUL(c
, val
,
1042 get_temp_for_uniform(trans
,
1043 QUNIFORM_BLEND_CONST_COLOR
,
1045 case PIPE_BLENDFACTOR_CONST_ALPHA
:
1046 return qir_FMUL(c
, val
,
1047 get_temp_for_uniform(trans
,
1048 QUNIFORM_BLEND_CONST_COLOR
,
1050 case PIPE_BLENDFACTOR_ZERO
:
1051 return qir_uniform_f(trans
, 0.0);
1052 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
1053 return qir_FMUL(c
, val
, qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1055 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
1056 return qir_FMUL(c
, val
, qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1058 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
1059 return qir_FMUL(c
, val
, qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1061 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
1062 return qir_FMUL(c
, val
, qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1064 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
1065 return qir_FMUL(c
, val
,
1066 qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1067 get_temp_for_uniform(trans
,
1068 QUNIFORM_BLEND_CONST_COLOR
,
1070 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
1071 return qir_FMUL(c
, val
,
1072 qir_FSUB(c
, qir_uniform_f(trans
, 1.0),
1073 get_temp_for_uniform(trans
,
1074 QUNIFORM_BLEND_CONST_COLOR
,
1078 case PIPE_BLENDFACTOR_SRC1_COLOR
:
1079 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
1080 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
1081 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
1083 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
1089 vc4_blend_func(struct tgsi_to_qir
*trans
,
1090 struct qreg src
, struct qreg dst
,
1093 struct qcompile
*c
= trans
->c
;
1096 case PIPE_BLEND_ADD
:
1097 return qir_FADD(c
, src
, dst
);
1098 case PIPE_BLEND_SUBTRACT
:
1099 return qir_FSUB(c
, src
, dst
);
1100 case PIPE_BLEND_REVERSE_SUBTRACT
:
1101 return qir_FSUB(c
, dst
, src
);
1102 case PIPE_BLEND_MIN
:
1103 return qir_FMIN(c
, src
, dst
);
1104 case PIPE_BLEND_MAX
:
1105 return qir_FMAX(c
, src
, dst
);
1109 fprintf(stderr
, "Unknown blend func %d\n", func
);
1116 * Implements fixed function blending in shader code.
1118 * VC4 doesn't have any hardware support for blending. Instead, you read the
1119 * current contents of the destination from the tile buffer after having
1120 * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
1121 * math using your output color and that destination value, and update the
1122 * output color appropriately.
1125 vc4_blend(struct tgsi_to_qir
*trans
, struct qreg
*result
,
1126 struct qreg
*dst_color
, struct qreg
*src_color
)
1128 struct pipe_rt_blend_state
*blend
= &trans
->fs_key
->blend
;
1130 if (!blend
->blend_enable
) {
1131 for (int i
= 0; i
< 4; i
++)
1132 result
[i
] = src_color
[i
];
1136 struct qreg src_blend
[4], dst_blend
[4];
1137 for (int i
= 0; i
< 3; i
++) {
1138 src_blend
[i
] = vc4_blend_channel(trans
,
1139 dst_color
, src_color
,
1141 blend
->rgb_src_factor
, i
);
1142 dst_blend
[i
] = vc4_blend_channel(trans
,
1143 dst_color
, src_color
,
1145 blend
->rgb_dst_factor
, i
);
1147 src_blend
[3] = vc4_blend_channel(trans
,
1148 dst_color
, src_color
,
1150 blend
->alpha_src_factor
, 3);
1151 dst_blend
[3] = vc4_blend_channel(trans
,
1152 dst_color
, src_color
,
1154 blend
->alpha_dst_factor
, 3);
1156 for (int i
= 0; i
< 3; i
++) {
1157 result
[i
] = vc4_blend_func(trans
,
1158 src_blend
[i
], dst_blend
[i
],
1161 result
[3] = vc4_blend_func(trans
,
1162 src_blend
[3], dst_blend
[3],
1167 emit_frag_end(struct tgsi_to_qir
*trans
)
1169 struct qcompile
*c
= trans
->c
;
1171 struct qreg src_color
[4] = {
1172 trans
->outputs
[0], trans
->outputs
[1],
1173 trans
->outputs
[2], trans
->outputs
[3],
1176 enum pipe_format color_format
= trans
->fs_key
->color_format
;
1177 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
1178 struct qreg tlb_read_color
[4] = { c
->undef
, c
->undef
, c
->undef
, c
->undef
};
1179 struct qreg dst_color
[4] = { c
->undef
, c
->undef
, c
->undef
, c
->undef
};
1180 if (trans
->fs_key
->blend
.blend_enable
||
1181 trans
->fs_key
->blend
.colormask
!= 0xf) {
1182 qir_emit(c
, qir_inst(QOP_TLB_COLOR_READ
, c
->undef
,
1183 c
->undef
, c
->undef
));
1184 for (int i
= 0; i
< 4; i
++)
1185 tlb_read_color
[i
] = qir_R4_UNPACK(c
, i
);
1186 for (int i
= 0; i
< 4; i
++)
1187 dst_color
[i
] = get_swizzled_channel(trans
,
1192 struct qreg blend_color
[4];
1193 vc4_blend(trans
, blend_color
, dst_color
, src_color
);
1195 /* If the bit isn't set in the color mask, then just return the
1196 * original dst color, instead.
1198 for (int i
= 0; i
< 4; i
++) {
1199 if (!(trans
->fs_key
->blend
.colormask
& (1 << i
))) {
1200 blend_color
[i
] = dst_color
[i
];
1204 /* Debug: Sometimes you're getting a black output and just want to see
1205 * if the FS is getting executed at all. Spam magenta into the color
1209 blend_color
[0] = qir_uniform_f(trans
, 1.0);
1210 blend_color
[1] = qir_uniform_f(trans
, 0.0);
1211 blend_color
[2] = qir_uniform_f(trans
, 1.0);
1212 blend_color
[3] = qir_uniform_f(trans
, 0.5);
1215 struct qreg swizzled_outputs
[4];
1216 for (int i
= 0; i
< 4; i
++) {
1217 swizzled_outputs
[i
] = get_swizzled_channel(trans
, blend_color
,
1221 if (trans
->discard
.file
!= QFILE_NULL
)
1222 qir_TLB_DISCARD_SETUP(c
, trans
->discard
);
1224 if (trans
->fs_key
->depth_enabled
) {
1225 qir_emit(c
, qir_inst(QOP_TLB_PASSTHROUGH_Z_WRITE
, c
->undef
,
1226 c
->undef
, c
->undef
));
1229 bool color_written
= false;
1230 for (int i
= 0; i
< 4; i
++) {
1231 if (swizzled_outputs
[i
].file
!= QFILE_NULL
)
1232 color_written
= true;
1235 struct qreg packed_color
;
1236 if (color_written
) {
1237 /* Fill in any undefined colors. The simulator will assertion
1238 * fail if we read something that wasn't written, and I don't
1239 * know what hardware does.
1241 for (int i
= 0; i
< 4; i
++) {
1242 if (swizzled_outputs
[i
].file
== QFILE_NULL
)
1243 swizzled_outputs
[i
] = qir_uniform_f(trans
, 0.0);
1245 packed_color
= qir_get_temp(c
);
1246 qir_emit(c
, qir_inst4(QOP_PACK_COLORS
, packed_color
,
1247 swizzled_outputs
[0],
1248 swizzled_outputs
[1],
1249 swizzled_outputs
[2],
1250 swizzled_outputs
[3]));
1252 packed_color
= qir_uniform_ui(trans
, 0);
1255 qir_emit(c
, qir_inst(QOP_TLB_COLOR_WRITE
, c
->undef
,
1256 packed_color
, c
->undef
));
1260 emit_scaled_viewport_write(struct tgsi_to_qir
*trans
, struct qreg rcp_w
)
1262 struct qcompile
*c
= trans
->c
;
1265 for (int i
= 0; i
< 2; i
++) {
1267 add_uniform(trans
, QUNIFORM_VIEWPORT_X_SCALE
+ i
, 0);
1269 xyi
[i
] = qir_FTOI(c
, qir_FMUL(c
,
1276 qir_VPM_WRITE(c
, qir_PACK_SCALED(c
, xyi
[0], xyi
[1]));
1280 emit_zs_write(struct tgsi_to_qir
*trans
, struct qreg rcp_w
)
1282 struct qcompile
*c
= trans
->c
;
1284 struct qreg zscale
= add_uniform(trans
, QUNIFORM_VIEWPORT_Z_SCALE
, 0);
1285 struct qreg zoffset
= add_uniform(trans
, QUNIFORM_VIEWPORT_Z_OFFSET
, 0);
1287 qir_VPM_WRITE(c
, qir_FMUL(c
, qir_FADD(c
, qir_FMUL(c
,
1295 emit_rcp_wc_write(struct tgsi_to_qir
*trans
, struct qreg rcp_w
)
1297 struct qcompile
*c
= trans
->c
;
1299 qir_VPM_WRITE(c
, rcp_w
);
1303 emit_vert_end(struct tgsi_to_qir
*trans
)
1305 struct qcompile
*c
= trans
->c
;
1307 struct qreg rcp_w
= qir_RCP(c
, trans
->outputs
[3]);
1309 emit_scaled_viewport_write(trans
, rcp_w
);
1310 emit_zs_write(trans
, rcp_w
);
1311 emit_rcp_wc_write(trans
, rcp_w
);
1313 for (int i
= 4; i
< trans
->num_outputs
; i
++) {
1314 qir_VPM_WRITE(c
, trans
->outputs
[i
]);
1319 emit_coord_end(struct tgsi_to_qir
*trans
)
1321 struct qcompile
*c
= trans
->c
;
1323 struct qreg rcp_w
= qir_RCP(c
, trans
->outputs
[3]);
1325 for (int i
= 0; i
< 4; i
++)
1326 qir_VPM_WRITE(c
, trans
->outputs
[i
]);
1328 emit_scaled_viewport_write(trans
, rcp_w
);
1329 emit_zs_write(trans
, rcp_w
);
1330 emit_rcp_wc_write(trans
, rcp_w
);
1333 static struct tgsi_to_qir
*
1334 vc4_shader_tgsi_to_qir(struct vc4_compiled_shader
*shader
, enum qstage stage
,
1335 struct vc4_key
*key
)
1337 struct tgsi_to_qir
*trans
= CALLOC_STRUCT(tgsi_to_qir
);
1341 c
= qir_compile_init();
1344 memset(trans
, 0, sizeof(*trans
));
1346 trans
->temps
= calloc(sizeof(struct qreg
), 1024);
1347 trans
->inputs
= calloc(sizeof(struct qreg
), 8 * 4);
1348 trans
->outputs
= calloc(sizeof(struct qreg
), 1024);
1349 trans
->uniforms
= calloc(sizeof(struct qreg
), 1024);
1350 trans
->consts
= calloc(sizeof(struct qreg
), 1024);
1352 trans
->uniform_data
= calloc(sizeof(uint32_t), 1024);
1353 trans
->uniform_contents
= calloc(sizeof(enum quniform_contents
), 1024);
1355 trans
->shader_state
= key
->shader_state
;
1357 ret
= tgsi_parse_init(&trans
->parser
, trans
->shader_state
->tokens
);
1358 assert(ret
== TGSI_PARSE_OK
);
1360 if (vc4_debug
& VC4_DEBUG_TGSI
) {
1361 fprintf(stderr
, "TGSI:\n");
1362 tgsi_dump(trans
->shader_state
->tokens
, 0);
1368 trans
->fs_key
= (struct vc4_fs_key
*)key
;
1369 if (trans
->fs_key
->is_points
) {
1370 trans
->point_x
= emit_fragment_varying(trans
, 0);
1371 trans
->point_y
= emit_fragment_varying(trans
, 0);
1372 } else if (trans
->fs_key
->is_lines
) {
1373 trans
->line_x
= emit_fragment_varying(trans
, 0);
1377 trans
->vs_key
= (struct vc4_vs_key
*)key
;
1380 trans
->vs_key
= (struct vc4_vs_key
*)key
;
1384 while (!tgsi_parse_end_of_tokens(&trans
->parser
)) {
1385 tgsi_parse_token(&trans
->parser
);
1387 switch (trans
->parser
.FullToken
.Token
.Type
) {
1388 case TGSI_TOKEN_TYPE_DECLARATION
:
1389 emit_tgsi_declaration(trans
,
1390 &trans
->parser
.FullToken
.FullDeclaration
);
1393 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1394 emit_tgsi_instruction(trans
,
1395 &trans
->parser
.FullToken
.FullInstruction
);
1398 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1399 parse_tgsi_immediate(trans
,
1400 &trans
->parser
.FullToken
.FullImmediate
);
1407 emit_frag_end(trans
);
1410 emit_vert_end(trans
);
1413 emit_coord_end(trans
);
1417 tgsi_parse_free(&trans
->parser
);
1422 if (vc4_debug
& VC4_DEBUG_QIR
) {
1423 fprintf(stderr
, "QIR:\n");
1426 vc4_generate_code(c
);
1428 if (vc4_debug
& VC4_DEBUG_SHADERDB
) {
1429 fprintf(stderr
, "SHADER-DB: %s: %d instructions\n",
1430 qir_get_stage_name(c
->stage
), c
->qpu_inst_count
);
1431 fprintf(stderr
, "SHADER-DB: %s: %d uniforms\n",
1432 qir_get_stage_name(c
->stage
), trans
->num_uniforms
);
1439 vc4_shader_state_create(struct pipe_context
*pctx
,
1440 const struct pipe_shader_state
*cso
)
1442 struct pipe_shader_state
*so
= CALLOC_STRUCT(pipe_shader_state
);
1446 so
->tokens
= tgsi_dup_tokens(cso
->tokens
);
1452 copy_uniform_state_to_shader(struct vc4_compiled_shader
*shader
,
1454 struct tgsi_to_qir
*trans
)
1456 int count
= trans
->num_uniforms
;
1457 struct vc4_shader_uniform_info
*uinfo
= &shader
->uniforms
[shader_index
];
1459 uinfo
->count
= count
;
1460 uinfo
->data
= malloc(count
* sizeof(*uinfo
->data
));
1461 memcpy(uinfo
->data
, trans
->uniform_data
,
1462 count
* sizeof(*uinfo
->data
));
1463 uinfo
->contents
= malloc(count
* sizeof(*uinfo
->contents
));
1464 memcpy(uinfo
->contents
, trans
->uniform_contents
,
1465 count
* sizeof(*uinfo
->contents
));
1466 uinfo
->num_texture_samples
= trans
->num_texture_samples
;
1470 vc4_fs_compile(struct vc4_context
*vc4
, struct vc4_compiled_shader
*shader
,
1471 struct vc4_fs_key
*key
)
1473 struct tgsi_to_qir
*trans
= vc4_shader_tgsi_to_qir(shader
, QSTAGE_FRAG
,
1475 shader
->num_inputs
= trans
->c
->num_inputs
;
1476 copy_uniform_state_to_shader(shader
, 0, trans
);
1477 shader
->bo
= vc4_bo_alloc_mem(vc4
->screen
, trans
->c
->qpu_insts
,
1478 trans
->c
->qpu_inst_count
* sizeof(uint64_t),
1481 qir_compile_destroy(trans
->c
);
1486 vc4_vs_compile(struct vc4_context
*vc4
, struct vc4_compiled_shader
*shader
,
1487 struct vc4_vs_key
*key
)
1489 struct tgsi_to_qir
*vs_trans
= vc4_shader_tgsi_to_qir(shader
,
1492 copy_uniform_state_to_shader(shader
, 0, vs_trans
);
1494 struct tgsi_to_qir
*cs_trans
= vc4_shader_tgsi_to_qir(shader
,
1497 copy_uniform_state_to_shader(shader
, 1, cs_trans
);
1499 uint32_t vs_size
= vs_trans
->c
->qpu_inst_count
* sizeof(uint64_t);
1500 uint32_t cs_size
= cs_trans
->c
->qpu_inst_count
* sizeof(uint64_t);
1501 shader
->coord_shader_offset
= vs_size
; /* XXX: alignment? */
1502 shader
->bo
= vc4_bo_alloc(vc4
->screen
,
1503 shader
->coord_shader_offset
+ cs_size
,
1506 void *map
= vc4_bo_map(shader
->bo
);
1507 memcpy(map
, vs_trans
->c
->qpu_insts
, vs_size
);
1508 memcpy(map
+ shader
->coord_shader_offset
,
1509 cs_trans
->c
->qpu_insts
, cs_size
);
1511 qir_compile_destroy(vs_trans
->c
);
1512 qir_compile_destroy(cs_trans
->c
);
1516 vc4_setup_shared_key(struct vc4_key
*key
, struct vc4_texture_stateobj
*texstate
)
1518 for (int i
= 0; i
< texstate
->num_textures
; i
++) {
1519 struct pipe_sampler_view
*sampler
= texstate
->textures
[i
];
1521 struct pipe_resource
*prsc
= sampler
->texture
;
1522 key
->tex_format
[i
] = prsc
->format
;
1528 vc4_update_compiled_fs(struct vc4_context
*vc4
, uint8_t prim_mode
)
1530 struct vc4_fs_key local_key
;
1531 struct vc4_fs_key
*key
= &local_key
;
1533 memset(key
, 0, sizeof(*key
));
1534 vc4_setup_shared_key(&key
->base
, &vc4
->fragtex
);
1535 key
->base
.shader_state
= vc4
->prog
.bind_fs
;
1536 key
->is_points
= (prim_mode
== PIPE_PRIM_POINTS
);
1537 key
->is_lines
= (prim_mode
>= PIPE_PRIM_LINES
&&
1538 prim_mode
<= PIPE_PRIM_LINE_STRIP
);
1539 key
->blend
= vc4
->blend
->rt
[0];
1541 if (vc4
->framebuffer
.cbufs
[0])
1542 key
->color_format
= vc4
->framebuffer
.cbufs
[0]->format
;
1544 key
->depth_enabled
= vc4
->zsa
->base
.depth
.enabled
;
1546 vc4
->prog
.fs
= util_hash_table_get(vc4
->fs_cache
, key
);
1550 key
= malloc(sizeof(*key
));
1551 memcpy(key
, &local_key
, sizeof(*key
));
1553 struct vc4_compiled_shader
*shader
= CALLOC_STRUCT(vc4_compiled_shader
);
1554 vc4_fs_compile(vc4
, shader
, key
);
1555 util_hash_table_set(vc4
->fs_cache
, key
, shader
);
1557 vc4
->prog
.fs
= shader
;
1561 vc4_update_compiled_vs(struct vc4_context
*vc4
)
1563 struct vc4_vs_key local_key
;
1564 struct vc4_vs_key
*key
= &local_key
;
1566 memset(key
, 0, sizeof(*key
));
1567 vc4_setup_shared_key(&key
->base
, &vc4
->verttex
);
1568 key
->base
.shader_state
= vc4
->prog
.bind_vs
;
1570 for (int i
= 0; i
< ARRAY_SIZE(key
->attr_formats
); i
++)
1571 key
->attr_formats
[i
] = vc4
->vtx
->pipe
[i
].src_format
;
1573 vc4
->prog
.vs
= util_hash_table_get(vc4
->vs_cache
, key
);
1577 key
= malloc(sizeof(*key
));
1578 memcpy(key
, &local_key
, sizeof(*key
));
1580 struct vc4_compiled_shader
*shader
= CALLOC_STRUCT(vc4_compiled_shader
);
1581 vc4_vs_compile(vc4
, shader
, key
);
1582 util_hash_table_set(vc4
->vs_cache
, key
, shader
);
1584 vc4
->prog
.vs
= shader
;
1588 vc4_update_compiled_shaders(struct vc4_context
*vc4
, uint8_t prim_mode
)
1590 vc4_update_compiled_fs(vc4
, prim_mode
);
1591 vc4_update_compiled_vs(vc4
);
1595 fs_cache_hash(void *key
)
1597 return util_hash_crc32(key
, sizeof(struct vc4_fs_key
));
1601 vs_cache_hash(void *key
)
1603 return util_hash_crc32(key
, sizeof(struct vc4_vs_key
));
1607 fs_cache_compare(void *key1
, void *key2
)
1609 return memcmp(key1
, key2
, sizeof(struct vc4_fs_key
));
1613 vs_cache_compare(void *key1
, void *key2
)
1615 return memcmp(key1
, key2
, sizeof(struct vc4_vs_key
));
1618 struct delete_state
{
1619 struct vc4_context
*vc4
;
1620 struct pipe_shader_state
*shader_state
;
1623 static enum pipe_error
1624 fs_delete_from_cache(void *in_key
, void *in_value
, void *data
)
1626 struct delete_state
*del
= data
;
1627 struct vc4_fs_key
*key
= in_key
;
1628 struct vc4_compiled_shader
*shader
= in_value
;
1630 if (key
->base
.shader_state
== data
) {
1631 util_hash_table_remove(del
->vc4
->fs_cache
, key
);
1632 vc4_bo_unreference(&shader
->bo
);
1639 static enum pipe_error
1640 vs_delete_from_cache(void *in_key
, void *in_value
, void *data
)
1642 struct delete_state
*del
= data
;
1643 struct vc4_vs_key
*key
= in_key
;
1644 struct vc4_compiled_shader
*shader
= in_value
;
1646 if (key
->base
.shader_state
== data
) {
1647 util_hash_table_remove(del
->vc4
->vs_cache
, key
);
1648 vc4_bo_unreference(&shader
->bo
);
1656 vc4_shader_state_delete(struct pipe_context
*pctx
, void *hwcso
)
1658 struct vc4_context
*vc4
= vc4_context(pctx
);
1659 struct pipe_shader_state
*so
= hwcso
;
1660 struct delete_state del
;
1663 del
.shader_state
= so
;
1664 util_hash_table_foreach(vc4
->fs_cache
, fs_delete_from_cache
, &del
);
1665 util_hash_table_foreach(vc4
->vs_cache
, vs_delete_from_cache
, &del
);
1667 free((void *)so
->tokens
);
1671 static uint32_t translate_wrap(uint32_t p_wrap
)
1674 case PIPE_TEX_WRAP_REPEAT
:
1676 case PIPE_TEX_WRAP_CLAMP
:
1677 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
1679 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
1681 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
1684 fprintf(stderr
, "Unknown wrap mode %d\n", p_wrap
);
1685 assert(!"not reached");
1691 write_texture_p0(struct vc4_context
*vc4
,
1692 struct vc4_texture_stateobj
*texstate
,
1695 struct pipe_sampler_view
*texture
= texstate
->textures
[unit
];
1696 struct vc4_resource
*rsc
= vc4_resource(texture
->texture
);
1698 cl_reloc(vc4
, &vc4
->uniforms
, rsc
->bo
,
1699 rsc
->slices
[0].offset
| texture
->u
.tex
.last_level
|
1700 ((rsc
->vc4_format
& 7) << 4));
1704 write_texture_p1(struct vc4_context
*vc4
,
1705 struct vc4_texture_stateobj
*texstate
,
1708 struct pipe_sampler_view
*texture
= texstate
->textures
[unit
];
1709 struct vc4_resource
*rsc
= vc4_resource(texture
->texture
);
1710 struct pipe_sampler_state
*sampler
= texstate
->samplers
[unit
];
1711 static const uint32_t mipfilter_map
[] = {
1712 [PIPE_TEX_MIPFILTER_NEAREST
] = 2,
1713 [PIPE_TEX_MIPFILTER_LINEAR
] = 4,
1714 [PIPE_TEX_MIPFILTER_NONE
] = 0
1716 static const uint32_t imgfilter_map
[] = {
1717 [PIPE_TEX_FILTER_NEAREST
] = 1,
1718 [PIPE_TEX_FILTER_LINEAR
] = 0,
1721 cl_u32(&vc4
->uniforms
,
1722 ((rsc
->vc4_format
>> 4) << 31) |
1723 (texture
->texture
->height0
<< 20) |
1724 (texture
->texture
->width0
<< 8) |
1725 (imgfilter_map
[sampler
->mag_img_filter
] << 7) |
1726 ((imgfilter_map
[sampler
->min_img_filter
] +
1727 mipfilter_map
[sampler
->min_mip_filter
]) << 4) |
1728 (translate_wrap(sampler
->wrap_t
) << 2) |
1729 (translate_wrap(sampler
->wrap_s
) << 0));
1733 get_texrect_scale(struct vc4_texture_stateobj
*texstate
,
1734 enum quniform_contents contents
,
1737 struct pipe_sampler_view
*texture
= texstate
->textures
[data
];
1740 if (contents
== QUNIFORM_TEXRECT_SCALE_X
)
1741 dim
= texture
->texture
->width0
;
1743 dim
= texture
->texture
->height0
;
1745 return fui(1.0f
/ dim
);
1749 vc4_write_uniforms(struct vc4_context
*vc4
, struct vc4_compiled_shader
*shader
,
1750 struct vc4_constbuf_stateobj
*cb
,
1751 struct vc4_texture_stateobj
*texstate
,
1754 struct vc4_shader_uniform_info
*uinfo
= &shader
->uniforms
[shader_index
];
1755 const uint32_t *gallium_uniforms
= cb
->cb
[0].user_buffer
;
1757 cl_start_shader_reloc(&vc4
->uniforms
, uinfo
->num_texture_samples
);
1759 for (int i
= 0; i
< uinfo
->count
; i
++) {
1761 switch (uinfo
->contents
[i
]) {
1762 case QUNIFORM_CONSTANT
:
1763 cl_u32(&vc4
->uniforms
, uinfo
->data
[i
]);
1765 case QUNIFORM_UNIFORM
:
1766 cl_u32(&vc4
->uniforms
,
1767 gallium_uniforms
[uinfo
->data
[i
]]);
1769 case QUNIFORM_VIEWPORT_X_SCALE
:
1770 cl_f(&vc4
->uniforms
, vc4
->viewport
.scale
[0] * 16.0f
);
1772 case QUNIFORM_VIEWPORT_Y_SCALE
:
1773 cl_f(&vc4
->uniforms
, vc4
->viewport
.scale
[1] * 16.0f
);
1776 case QUNIFORM_VIEWPORT_Z_OFFSET
:
1777 cl_f(&vc4
->uniforms
, vc4
->viewport
.translate
[2]);
1779 case QUNIFORM_VIEWPORT_Z_SCALE
:
1780 cl_f(&vc4
->uniforms
, vc4
->viewport
.scale
[2]);
1783 case QUNIFORM_TEXTURE_CONFIG_P0
:
1784 write_texture_p0(vc4
, texstate
, uinfo
->data
[i
]);
1787 case QUNIFORM_TEXTURE_CONFIG_P1
:
1788 write_texture_p1(vc4
, texstate
, uinfo
->data
[i
]);
1791 case QUNIFORM_TEXRECT_SCALE_X
:
1792 case QUNIFORM_TEXRECT_SCALE_Y
:
1793 cl_u32(&vc4
->uniforms
,
1794 get_texrect_scale(texstate
,
1799 case QUNIFORM_BLEND_CONST_COLOR
:
1800 cl_f(&vc4
->uniforms
,
1801 vc4
->blend_color
.color
[uinfo
->data
[i
]]);
1805 uint32_t written_val
= *(uint32_t *)(vc4
->uniforms
.next
- 4);
1806 fprintf(stderr
, "%p/%d: %d: 0x%08x (%f)\n",
1807 shader
, shader_index
, i
, written_val
, uif(written_val
));
1813 vc4_fp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
1815 struct vc4_context
*vc4
= vc4_context(pctx
);
1816 vc4
->prog
.bind_fs
= hwcso
;
1817 vc4
->prog
.dirty
|= VC4_SHADER_DIRTY_FP
;
1818 vc4
->dirty
|= VC4_DIRTY_PROG
;
1822 vc4_vp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
1824 struct vc4_context
*vc4
= vc4_context(pctx
);
1825 vc4
->prog
.bind_vs
= hwcso
;
1826 vc4
->prog
.dirty
|= VC4_SHADER_DIRTY_VP
;
1827 vc4
->dirty
|= VC4_DIRTY_PROG
;
1831 vc4_program_init(struct pipe_context
*pctx
)
1833 struct vc4_context
*vc4
= vc4_context(pctx
);
1835 pctx
->create_vs_state
= vc4_shader_state_create
;
1836 pctx
->delete_vs_state
= vc4_shader_state_delete
;
1838 pctx
->create_fs_state
= vc4_shader_state_create
;
1839 pctx
->delete_fs_state
= vc4_shader_state_delete
;
1841 pctx
->bind_fs_state
= vc4_fp_state_bind
;
1842 pctx
->bind_vs_state
= vc4_vp_state_bind
;
1844 vc4
->fs_cache
= util_hash_table_create(fs_cache_hash
, fs_cache_compare
);
1845 vc4
->vs_cache
= util_hash_table_create(vs_cache_hash
, vs_cache_compare
);