2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "r600_formats.h"
25 #include "r600_opcodes.h"
26 #include "r600_shader.h"
27 #include "r600_dump.h"
29 #include "sfn/sfn_nir.h"
31 #include "sb/sb_public.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_scan.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "nir/tgsi_to_nir.h"
40 #include "nir/nir_to_tgsi_info.h"
41 #include "compiler/nir/nir.h"
42 #include "util/u_bitcast.h"
43 #include "util/u_memory.h"
44 #include "util/u_math.h"
49 Why CAYMAN got loops for lots of instructions is explained here.
51 -These 8xx t-slot only ops are implemented in all vector slots.
52 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
53 These 8xx t-slot only opcodes become vector ops, with all four
54 slots expecting the arguments on sources a and b. Result is
55 broadcast to all channels.
56 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
57 These 8xx t-slot only opcodes become vector ops in the z, y, and
59 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
60 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
63 The w slot may have an independent co-issued operation, or if the
64 result is required to be in the w slot, the opcode above may be
65 issued in the w slot as well.
66 The compiler must issue the source argument to slots z, y, and x
69 /* Contents of r0 on entry to various shaders
75 GS - r0.xyw, r1.xyz = per-vertex offsets
81 .w = tess factor base.
83 TES - .x = TessCoord.x
85 - .z = RelPatchID (??)
88 PS - face_gpr.z = SampleMask
91 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
92 static int r600_shader_from_tgsi(struct r600_context
*rctx
,
93 struct r600_pipe_shader
*pipeshader
,
94 union r600_shader_key key
);
96 static void r600_add_gpr_array(struct r600_shader
*ps
, int start_gpr
,
97 int size
, unsigned comp_mask
) {
102 if (ps
->num_arrays
== ps
->max_arrays
) {
103 ps
->max_arrays
+= 64;
104 ps
->arrays
= realloc(ps
->arrays
, ps
->max_arrays
*
105 sizeof(struct r600_shader_array
));
108 int n
= ps
->num_arrays
;
111 ps
->arrays
[n
].comp_mask
= comp_mask
;
112 ps
->arrays
[n
].gpr_start
= start_gpr
;
113 ps
->arrays
[n
].gpr_count
= size
;
116 static void r600_dump_streamout(struct pipe_stream_output_info
*so
)
120 fprintf(stderr
, "STREAMOUT\n");
121 for (i
= 0; i
< so
->num_outputs
; i
++) {
122 unsigned mask
= ((1 << so
->output
[i
].num_components
) - 1) <<
123 so
->output
[i
].start_component
;
124 fprintf(stderr
, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
126 so
->output
[i
].stream
,
127 so
->output
[i
].output_buffer
,
128 so
->output
[i
].dst_offset
, so
->output
[i
].dst_offset
+ so
->output
[i
].num_components
- 1,
129 so
->output
[i
].register_index
,
134 so
->output
[i
].dst_offset
< so
->output
[i
].start_component
? " (will lower)" : "");
138 static int store_shader(struct pipe_context
*ctx
,
139 struct r600_pipe_shader
*shader
)
141 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
144 if (shader
->bo
== NULL
) {
145 shader
->bo
= (struct r600_resource
*)
146 pipe_buffer_create(ctx
->screen
, 0, PIPE_USAGE_IMMUTABLE
, shader
->shader
.bc
.ndw
* 4);
147 if (shader
->bo
== NULL
) {
150 ptr
= r600_buffer_map_sync_with_rings(
151 &rctx
->b
, shader
->bo
,
152 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
153 if (R600_BIG_ENDIAN
) {
154 for (i
= 0; i
< shader
->shader
.bc
.ndw
; ++i
) {
155 ptr
[i
] = util_cpu_to_le32(shader
->shader
.bc
.bytecode
[i
]);
158 memcpy(ptr
, shader
->shader
.bc
.bytecode
, shader
->shader
.bc
.ndw
* sizeof(*ptr
));
160 rctx
->b
.ws
->buffer_unmap(shader
->bo
->buf
);
166 extern const struct nir_shader_compiler_options r600_nir_options
;
167 static int nshader
= 0;
168 int r600_pipe_shader_create(struct pipe_context
*ctx
,
169 struct r600_pipe_shader
*shader
,
170 union r600_shader_key key
)
172 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
173 struct r600_pipe_shader_selector
*sel
= shader
->selector
;
175 struct r600_screen
*rscreen
= (struct r600_screen
*)ctx
->screen
;
177 int processor
= sel
->ir_type
== PIPE_SHADER_IR_TGSI
?
178 tgsi_get_processor_type(sel
->tokens
):
179 pipe_shader_type_from_mesa(sel
->nir
->info
.stage
);
181 bool dump
= r600_can_dump_shader(&rctx
->screen
->b
, processor
);
182 unsigned use_sb
= !(rctx
->screen
->b
.debug_flags
& DBG_NO_SB
) &&
183 !(rscreen
->b
.debug_flags
& DBG_NIR
);
185 unsigned export_shader
;
187 shader
->shader
.bc
.isa
= rctx
->isa
;
189 if (!(rscreen
->b
.debug_flags
& DBG_NIR
)) {
190 assert(sel
->ir_type
== PIPE_SHADER_IR_TGSI
);
191 r
= r600_shader_from_tgsi(rctx
, shader
, key
);
193 R600_ERR("translation from TGSI failed !\n");
197 if (sel
->ir_type
== PIPE_SHADER_IR_TGSI
)
198 sel
->nir
= tgsi_to_nir_noscreen(sel
->tokens
, &r600_nir_options
);
199 nir_tgsi_scan_shader(sel
->nir
, &sel
->info
, true);
200 r
= r600_shader_from_nir(rctx
, shader
, &key
);
202 fprintf(stderr
, "--Failed shader--------------------------------------------------\n");
204 if (sel
->ir_type
== PIPE_SHADER_IR_TGSI
) {
205 fprintf(stderr
, "--TGSI--------------------------------------------------------\n");
206 tgsi_dump(sel
->tokens
, 0);
209 if (rscreen
->b
.debug_flags
& DBG_NIR
) {
210 fprintf(stderr
, "--NIR --------------------------------------------------------\n");
211 nir_print_shader(sel
->nir
, stderr
);
214 R600_ERR("translation from NIR failed !\n");
220 if (sel
->ir_type
== PIPE_SHADER_IR_TGSI
) {
221 fprintf(stderr
, "--TGSI--------------------------------------------------------\n");
222 tgsi_dump(sel
->tokens
, 0);
225 if (sel
->so
.num_outputs
) {
226 r600_dump_streamout(&sel
->so
);
230 if (shader
->shader
.processor_type
== PIPE_SHADER_VERTEX
) {
231 /* only disable for vertex shaders in tess paths */
235 use_sb
&= (shader
->shader
.processor_type
!= PIPE_SHADER_TESS_CTRL
);
236 use_sb
&= (shader
->shader
.processor_type
!= PIPE_SHADER_TESS_EVAL
);
237 use_sb
&= (shader
->shader
.processor_type
!= PIPE_SHADER_COMPUTE
);
239 /* disable SB for shaders using doubles */
240 use_sb
&= !shader
->shader
.uses_doubles
;
242 use_sb
&= !shader
->shader
.uses_atomics
;
243 use_sb
&= !shader
->shader
.uses_images
;
244 use_sb
&= !shader
->shader
.uses_helper_invocation
;
246 /* Check if the bytecode has already been built. */
247 if (!shader
->shader
.bc
.bytecode
) {
248 r
= r600_bytecode_build(&shader
->shader
.bc
);
250 R600_ERR("building bytecode failed !\n");
255 sb_disasm
= use_sb
|| (rctx
->screen
->b
.debug_flags
& DBG_SB_DISASM
);
256 if (dump
&& !sb_disasm
) {
257 fprintf(stderr
, "--------------------------------------------------------------\n");
258 r600_bytecode_disasm(&shader
->shader
.bc
);
259 fprintf(stderr
, "______________________________________________________________\n");
260 } else if ((dump
&& sb_disasm
) || use_sb
) {
261 r
= r600_sb_bytecode_process(rctx
, &shader
->shader
.bc
, &shader
->shader
,
264 R600_ERR("r600_sb_bytecode_process failed !\n");
272 snprintf(fname
, 1024, "shader_from_%s_%d.cpp",
273 (sel
->ir_type
== PIPE_SHADER_IR_TGSI
?
274 (rscreen
->b
.debug_flags
& DBG_NIR
? "tgsi-nir" : "tgsi")
276 f
= fopen(fname
, "w");
277 print_shader_info(f
, nshader
++, &shader
->shader
);
278 print_shader_info(stderr
, nshader
++, &shader
->shader
);
279 print_pipe_info(stderr
, &sel
->info
);
280 if (sel
->ir_type
== PIPE_SHADER_IR_TGSI
) {
281 fprintf(f
, "/****TGSI**********************************\n");
282 tgsi_dump_to_file(sel
->tokens
, 0, f
);
285 if (rscreen
->b
.debug_flags
& DBG_NIR
){
286 fprintf(f
, "/****NIR **********************************\n");
287 nir_print_shader(sel
->nir
, f
);
289 fprintf(f
, "******************************************/\n");
293 if (shader
->gs_copy_shader
) {
296 r
= r600_sb_bytecode_process(rctx
, &shader
->gs_copy_shader
->shader
.bc
,
297 &shader
->gs_copy_shader
->shader
, dump
, 0);
302 if ((r
= store_shader(ctx
, shader
->gs_copy_shader
)))
306 /* Store the shader in a buffer. */
307 if ((r
= store_shader(ctx
, shader
)))
311 switch (shader
->shader
.processor_type
) {
312 case PIPE_SHADER_TESS_CTRL
:
313 evergreen_update_hs_state(ctx
, shader
);
315 case PIPE_SHADER_TESS_EVAL
:
317 evergreen_update_es_state(ctx
, shader
);
319 evergreen_update_vs_state(ctx
, shader
);
321 case PIPE_SHADER_GEOMETRY
:
322 if (rctx
->b
.chip_class
>= EVERGREEN
) {
323 evergreen_update_gs_state(ctx
, shader
);
324 evergreen_update_vs_state(ctx
, shader
->gs_copy_shader
);
326 r600_update_gs_state(ctx
, shader
);
327 r600_update_vs_state(ctx
, shader
->gs_copy_shader
);
330 case PIPE_SHADER_VERTEX
:
331 export_shader
= key
.vs
.as_es
;
332 if (rctx
->b
.chip_class
>= EVERGREEN
) {
334 evergreen_update_ls_state(ctx
, shader
);
335 else if (key
.vs
.as_es
)
336 evergreen_update_es_state(ctx
, shader
);
338 evergreen_update_vs_state(ctx
, shader
);
341 r600_update_es_state(ctx
, shader
);
343 r600_update_vs_state(ctx
, shader
);
346 case PIPE_SHADER_FRAGMENT
:
347 if (rctx
->b
.chip_class
>= EVERGREEN
) {
348 evergreen_update_ps_state(ctx
, shader
);
350 r600_update_ps_state(ctx
, shader
);
353 case PIPE_SHADER_COMPUTE
:
354 evergreen_update_ls_state(ctx
, shader
);
363 r600_pipe_shader_destroy(ctx
, shader
);
367 void r600_pipe_shader_destroy(struct pipe_context
*ctx UNUSED
, struct r600_pipe_shader
*shader
)
369 r600_resource_reference(&shader
->bo
, NULL
);
370 if (shader
->shader
.bc
.cf
.next
)
371 r600_bytecode_clear(&shader
->shader
.bc
);
372 r600_release_command_buffer(&shader
->command_buffer
);
376 * tgsi -> r600 shader
378 struct r600_shader_tgsi_instruction
;
380 struct r600_shader_src
{
387 boolean kc_rel
; /* true if cache bank is indexed */
396 struct r600_shader_ctx
{
397 struct tgsi_shader_info info
;
398 struct tgsi_array_info
*array_infos
;
399 /* flag for each tgsi temp array if its been spilled or not */
400 bool *spilled_arrays
;
401 struct tgsi_parse_context parse
;
402 const struct tgsi_token
*tokens
;
404 unsigned file_offset
[TGSI_FILE_COUNT
];
406 const struct r600_shader_tgsi_instruction
*inst_info
;
407 struct r600_bytecode
*bc
;
408 struct r600_shader
*shader
;
409 struct r600_shader_src src
[4];
412 uint32_t max_driver_temp_used
;
413 /* needed for evergreen interpolation */
414 struct eg_interp eg_interpolators
[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
415 /* evergreen/cayman also store sample mask in face register */
417 /* sample id is .w component stored in fixed point position register */
418 int fixed_pt_position_gpr
;
420 boolean clip_vertex_write
;
422 unsigned edgeflag_output
;
423 int helper_invoc_reg
;
424 int cs_block_size_reg
;
425 int cs_grid_size_reg
;
426 bool cs_block_size_loaded
, cs_grid_size_loaded
;
428 int next_ring_offset
;
429 int gs_out_ring_offset
;
431 struct r600_shader
*gs_for_vs
;
432 int gs_export_gpr_tregs
[4];
433 int gs_rotated_input
[2];
434 const struct pipe_stream_output_info
*gs_stream_output_info
;
435 unsigned enabled_stream_buffers_mask
;
436 unsigned tess_input_info
; /* temp with tess input offsets */
437 unsigned tess_output_info
; /* temp with tess input offsets */
438 unsigned thread_id_gpr
; /* temp with thread id calculated for images */
441 struct r600_shader_tgsi_instruction
{
443 int (*process
)(struct r600_shader_ctx
*ctx
);
446 static int emit_gs_ring_writes(struct r600_shader_ctx
*ctx
, const struct pipe_stream_output_info
*so
, int stream
, bool ind
);
447 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[], eg_shader_tgsi_instruction
[], cm_shader_tgsi_instruction
[];
448 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
);
449 static inline int callstack_push(struct r600_shader_ctx
*ctx
, unsigned reason
);
450 static void fc_pushlevel(struct r600_shader_ctx
*ctx
, int type
);
451 static int tgsi_else(struct r600_shader_ctx
*ctx
);
452 static int tgsi_endif(struct r600_shader_ctx
*ctx
);
453 static int tgsi_bgnloop(struct r600_shader_ctx
*ctx
);
454 static int tgsi_endloop(struct r600_shader_ctx
*ctx
);
455 static int tgsi_loop_brk_cont(struct r600_shader_ctx
*ctx
);
456 static int tgsi_fetch_rel_const(struct r600_shader_ctx
*ctx
,
457 unsigned int cb_idx
, unsigned cb_rel
, unsigned int offset
, unsigned ar_chan
,
458 unsigned int dst_reg
);
459 static void r600_bytecode_src(struct r600_bytecode_alu_src
*bc_src
,
460 const struct r600_shader_src
*shader_src
,
462 static int do_lds_fetch_values(struct r600_shader_ctx
*ctx
, unsigned temp_reg
,
463 unsigned dst_reg
, unsigned mask
);
465 static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx
*ctx
)
467 if (ctx
->bc
->family
== CHIP_HEMLOCK
||
468 ctx
->bc
->family
== CHIP_CYPRESS
||
469 ctx
->bc
->family
== CHIP_JUNIPER
)
474 static int tgsi_last_instruction(unsigned writemask
)
478 for (i
= 0; i
< 4; i
++) {
479 if (writemask
& (1 << i
)) {
486 static int tgsi_is_supported(struct r600_shader_ctx
*ctx
)
488 struct tgsi_full_instruction
*i
= &ctx
->parse
.FullToken
.FullInstruction
;
491 if (i
->Instruction
.NumDstRegs
> 1 && i
->Instruction
.Opcode
!= TGSI_OPCODE_DFRACEXP
) {
492 R600_ERR("too many dst (%d)\n", i
->Instruction
.NumDstRegs
);
496 if (i
->Instruction
.Label
) {
497 R600_ERR("label unsupported\n");
501 for (j
= 0; j
< i
->Instruction
.NumSrcRegs
; j
++) {
502 if (i
->Src
[j
].Register
.Dimension
) {
503 switch (i
->Src
[j
].Register
.File
) {
504 case TGSI_FILE_CONSTANT
:
505 case TGSI_FILE_HW_ATOMIC
:
507 case TGSI_FILE_INPUT
:
508 if (ctx
->type
== PIPE_SHADER_GEOMETRY
||
509 ctx
->type
== PIPE_SHADER_TESS_CTRL
||
510 ctx
->type
== PIPE_SHADER_TESS_EVAL
)
512 case TGSI_FILE_OUTPUT
:
513 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
516 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j
,
517 i
->Src
[j
].Register
.File
,
518 i
->Src
[j
].Register
.Dimension
);
523 for (j
= 0; j
< i
->Instruction
.NumDstRegs
; j
++) {
524 if (i
->Dst
[j
].Register
.Dimension
) {
525 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
527 R600_ERR("unsupported dst (dimension)\n");
534 int eg_get_interpolator_index(unsigned interpolate
, unsigned location
)
536 if (interpolate
== TGSI_INTERPOLATE_COLOR
||
537 interpolate
== TGSI_INTERPOLATE_LINEAR
||
538 interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
540 int is_linear
= interpolate
== TGSI_INTERPOLATE_LINEAR
;
544 case TGSI_INTERPOLATE_LOC_CENTER
:
547 case TGSI_INTERPOLATE_LOC_CENTROID
:
550 case TGSI_INTERPOLATE_LOC_SAMPLE
:
555 return is_linear
* 3 + loc
;
561 static void evergreen_interp_assign_ij_index(struct r600_shader_ctx
*ctx
,
564 int i
= eg_get_interpolator_index(
565 ctx
->shader
->input
[input
].interpolate
,
566 ctx
->shader
->input
[input
].interpolate_location
);
568 ctx
->shader
->input
[input
].ij_index
= ctx
->eg_interpolators
[i
].ij_index
;
571 static int evergreen_interp_alu(struct r600_shader_ctx
*ctx
, int input
)
574 struct r600_bytecode_alu alu
;
575 int gpr
= 0, base_chan
= 0;
576 int ij_index
= ctx
->shader
->input
[input
].ij_index
;
578 /* work out gpr and base_chan from index */
580 base_chan
= (2 * (ij_index
% 2)) + 1;
582 for (i
= 0; i
< 8; i
++) {
583 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
586 alu
.op
= ALU_OP2_INTERP_ZW
;
588 alu
.op
= ALU_OP2_INTERP_XY
;
590 if ((i
> 1) && (i
< 6)) {
591 alu
.dst
.sel
= ctx
->shader
->input
[input
].gpr
;
595 alu
.dst
.chan
= i
% 4;
597 alu
.src
[0].sel
= gpr
;
598 alu
.src
[0].chan
= (base_chan
- (i
% 2));
600 alu
.src
[1].sel
= V_SQ_ALU_SRC_PARAM_BASE
+ ctx
->shader
->input
[input
].lds_pos
;
602 alu
.bank_swizzle_force
= SQ_ALU_VEC_210
;
605 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
612 static int evergreen_interp_flat(struct r600_shader_ctx
*ctx
, int input
)
615 struct r600_bytecode_alu alu
;
617 for (i
= 0; i
< 4; i
++) {
618 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
620 alu
.op
= ALU_OP1_INTERP_LOAD_P0
;
622 alu
.dst
.sel
= ctx
->shader
->input
[input
].gpr
;
627 alu
.src
[0].sel
= V_SQ_ALU_SRC_PARAM_BASE
+ ctx
->shader
->input
[input
].lds_pos
;
632 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
640 * Special export handling in shaders
642 * shader export ARRAY_BASE for EXPORT_POS:
645 * 62, 63 are clip distance vectors
647 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
648 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
649 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
650 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
651 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
652 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
653 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
654 * exclusive from render target index)
655 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
658 * shader export ARRAY_BASE for EXPORT_PIXEL:
660 * 61 computed Z vector
662 * The use of the values exported in the computed Z vector are controlled
663 * by DB_SHADER_CONTROL:
664 * Z_EXPORT_ENABLE - Z as a float in RED
665 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
666 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
667 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
668 * DB_SOURCE_FORMAT - export control restrictions
673 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
674 static int r600_spi_sid(struct r600_shader_io
* io
)
676 int index
, name
= io
->name
;
678 /* These params are handled differently, they don't need
679 * semantic indices, so we'll use 0 for them.
681 if (name
== TGSI_SEMANTIC_POSITION
||
682 name
== TGSI_SEMANTIC_PSIZE
||
683 name
== TGSI_SEMANTIC_EDGEFLAG
||
684 name
== TGSI_SEMANTIC_FACE
||
685 name
== TGSI_SEMANTIC_SAMPLEMASK
)
688 if (name
== TGSI_SEMANTIC_GENERIC
) {
689 /* For generic params simply use sid from tgsi */
692 /* For non-generic params - pack name and sid into 8 bits */
693 index
= 0x80 | (name
<<3) | (io
->sid
);
696 /* Make sure that all really used indices have nonzero value, so
697 * we can just compare it to 0 later instead of comparing the name
698 * with different values to detect special cases. */
705 /* we need this to get a common lds index for vs/tcs/tes input/outputs */
706 int r600_get_lds_unique_index(unsigned semantic_name
, unsigned index
)
708 switch (semantic_name
) {
709 case TGSI_SEMANTIC_POSITION
:
711 case TGSI_SEMANTIC_PSIZE
:
713 case TGSI_SEMANTIC_CLIPDIST
:
716 case TGSI_SEMANTIC_GENERIC
:
718 return 4 + index
- 9;
720 /* same explanation as in the default statement,
721 * the only user hitting this is st/nine.
725 /* patch indices are completely separate and thus start from 0 */
726 case TGSI_SEMANTIC_TESSOUTER
:
728 case TGSI_SEMANTIC_TESSINNER
:
730 case TGSI_SEMANTIC_PATCH
:
734 /* Don't fail here. The result of this function is only used
735 * for LS, TCS, TES, and GS, where legacy GL semantics can't
736 * occur, but this function is called for all vertex shaders
737 * before it's known whether LS will be compiled or not.
743 /* turn input into interpolate on EG */
744 static int evergreen_interp_input(struct r600_shader_ctx
*ctx
, int index
)
748 if (ctx
->shader
->input
[index
].spi_sid
) {
749 ctx
->shader
->input
[index
].lds_pos
= ctx
->shader
->nlds
++;
750 if (ctx
->shader
->input
[index
].interpolate
> 0) {
751 evergreen_interp_assign_ij_index(ctx
, index
);
752 r
= evergreen_interp_alu(ctx
, index
);
754 r
= evergreen_interp_flat(ctx
, index
);
760 static int select_twoside_color(struct r600_shader_ctx
*ctx
, int front
, int back
)
762 struct r600_bytecode_alu alu
;
764 int gpr_front
= ctx
->shader
->input
[front
].gpr
;
765 int gpr_back
= ctx
->shader
->input
[back
].gpr
;
767 for (i
= 0; i
< 4; i
++) {
768 memset(&alu
, 0, sizeof(alu
));
769 alu
.op
= ALU_OP3_CNDGT
;
772 alu
.dst
.sel
= gpr_front
;
773 alu
.src
[0].sel
= ctx
->face_gpr
;
774 alu
.src
[1].sel
= gpr_front
;
775 alu
.src
[2].sel
= gpr_back
;
782 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
789 /* execute a single slot ALU calculation */
790 static int single_alu_op2(struct r600_shader_ctx
*ctx
, int op
,
791 int dst_sel
, int dst_chan
,
792 int src0_sel
, unsigned src0_chan_val
,
793 int src1_sel
, unsigned src1_chan_val
)
795 struct r600_bytecode_alu alu
;
798 if (ctx
->bc
->chip_class
== CAYMAN
&& op
== ALU_OP2_MULLO_INT
) {
799 for (i
= 0; i
< 4; i
++) {
800 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
802 alu
.src
[0].sel
= src0_sel
;
803 if (src0_sel
== V_SQ_ALU_SRC_LITERAL
)
804 alu
.src
[0].value
= src0_chan_val
;
806 alu
.src
[0].chan
= src0_chan_val
;
807 alu
.src
[1].sel
= src1_sel
;
808 if (src1_sel
== V_SQ_ALU_SRC_LITERAL
)
809 alu
.src
[1].value
= src1_chan_val
;
811 alu
.src
[1].chan
= src1_chan_val
;
812 alu
.dst
.sel
= dst_sel
;
814 alu
.dst
.write
= i
== dst_chan
;
816 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
823 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
825 alu
.src
[0].sel
= src0_sel
;
826 if (src0_sel
== V_SQ_ALU_SRC_LITERAL
)
827 alu
.src
[0].value
= src0_chan_val
;
829 alu
.src
[0].chan
= src0_chan_val
;
830 alu
.src
[1].sel
= src1_sel
;
831 if (src1_sel
== V_SQ_ALU_SRC_LITERAL
)
832 alu
.src
[1].value
= src1_chan_val
;
834 alu
.src
[1].chan
= src1_chan_val
;
835 alu
.dst
.sel
= dst_sel
;
836 alu
.dst
.chan
= dst_chan
;
839 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
845 /* execute a single slot ALU calculation */
846 static int single_alu_op3(struct r600_shader_ctx
*ctx
, int op
,
847 int dst_sel
, int dst_chan
,
848 int src0_sel
, unsigned src0_chan_val
,
849 int src1_sel
, unsigned src1_chan_val
,
850 int src2_sel
, unsigned src2_chan_val
)
852 struct r600_bytecode_alu alu
;
855 /* validate this for other ops */
856 assert(op
== ALU_OP3_MULADD_UINT24
|| op
== ALU_OP3_CNDE_INT
|| op
== ALU_OP3_BFE_UINT
);
857 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
859 alu
.src
[0].sel
= src0_sel
;
860 if (src0_sel
== V_SQ_ALU_SRC_LITERAL
)
861 alu
.src
[0].value
= src0_chan_val
;
863 alu
.src
[0].chan
= src0_chan_val
;
864 alu
.src
[1].sel
= src1_sel
;
865 if (src1_sel
== V_SQ_ALU_SRC_LITERAL
)
866 alu
.src
[1].value
= src1_chan_val
;
868 alu
.src
[1].chan
= src1_chan_val
;
869 alu
.src
[2].sel
= src2_sel
;
870 if (src2_sel
== V_SQ_ALU_SRC_LITERAL
)
871 alu
.src
[2].value
= src2_chan_val
;
873 alu
.src
[2].chan
= src2_chan_val
;
874 alu
.dst
.sel
= dst_sel
;
875 alu
.dst
.chan
= dst_chan
;
878 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
884 /* put it in temp_reg.x */
885 static int get_lds_offset0(struct r600_shader_ctx
*ctx
,
887 int temp_reg
, bool is_patch_var
)
891 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */
893 Dimension - patch0_offset (input_vals.z),
894 Non-dim - patch0_data_offset (input_vals.w)
896 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
898 ctx
->tess_output_info
, 0,
900 ctx
->tess_output_info
, is_patch_var
? 3 : 2);
906 static inline int get_address_file_reg(struct r600_shader_ctx
*ctx
, int index
)
908 return index
> 0 ? ctx
->bc
->index_reg
[index
- 1] : ctx
->bc
->ar_reg
;
911 static int r600_get_temp(struct r600_shader_ctx
*ctx
)
913 return ctx
->temp_reg
+ ctx
->max_driver_temp_used
++;
916 static int vs_add_primid_output(struct r600_shader_ctx
*ctx
, int prim_id_sid
)
919 i
= ctx
->shader
->noutput
++;
920 ctx
->shader
->output
[i
].name
= TGSI_SEMANTIC_PRIMID
;
921 ctx
->shader
->output
[i
].sid
= 0;
922 ctx
->shader
->output
[i
].gpr
= 0;
923 ctx
->shader
->output
[i
].interpolate
= TGSI_INTERPOLATE_CONSTANT
;
924 ctx
->shader
->output
[i
].write_mask
= 0x4;
925 ctx
->shader
->output
[i
].spi_sid
= prim_id_sid
;
930 static int tgsi_barrier(struct r600_shader_ctx
*ctx
)
932 struct r600_bytecode_alu alu
;
935 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
936 alu
.op
= ctx
->inst_info
->op
;
939 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
945 static void choose_spill_arrays(struct r600_shader_ctx
*ctx
, int *regno
, unsigned *scratch_space_needed
)
947 // pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays
948 unsigned n
= ctx
->info
.array_max
[TGSI_FILE_TEMPORARY
];
949 unsigned narrays_left
= n
;
950 bool *spilled
= ctx
->spilled_arrays
; // assumed calloc:ed
952 *scratch_space_needed
= 0;
953 while (*regno
> 124 && narrays_left
) {
955 unsigned largest
= 0;
956 unsigned largest_index
= 0;
958 for (i
= 0; i
< n
; i
++) {
959 unsigned size
= ctx
->array_infos
[i
].range
.Last
- ctx
->array_infos
[i
].range
.First
+ 1;
960 if (!spilled
[i
] && size
> largest
) {
966 spilled
[largest_index
] = true;
968 *scratch_space_needed
+= largest
;
973 if (narrays_left
== 0) {
974 ctx
->info
.indirect_files
&= ~(1 << TGSI_FILE_TEMPORARY
);
978 /* Take spilled temp arrays into account when translating tgsi register
979 * indexes into r600 gprs if spilled is false, or scratch array offset if
981 static int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx
*ctx
, unsigned tgsi_reg_index
, bool *spilled
)
984 unsigned spilled_size
= 0;
986 for (i
= 0; i
< ctx
->info
.array_max
[TGSI_FILE_TEMPORARY
]; i
++) {
987 if (tgsi_reg_index
>= ctx
->array_infos
[i
].range
.First
&& tgsi_reg_index
<= ctx
->array_infos
[i
].range
.Last
) {
988 if (ctx
->spilled_arrays
[i
]) {
989 /* vec4 index into spilled scratch memory */
991 return tgsi_reg_index
- ctx
->array_infos
[i
].range
.First
+ spilled_size
;
994 /* regular GPR array */
996 return tgsi_reg_index
- spilled_size
+ ctx
->file_offset
[TGSI_FILE_TEMPORARY
];
1000 if (tgsi_reg_index
< ctx
->array_infos
[i
].range
.First
)
1002 if (ctx
->spilled_arrays
[i
]) {
1003 spilled_size
+= ctx
->array_infos
[i
].range
.Last
- ctx
->array_infos
[i
].range
.First
+ 1;
1007 /* regular GPR index, minus the holes from spilled arrays */
1010 return tgsi_reg_index
- spilled_size
+ ctx
->file_offset
[TGSI_FILE_TEMPORARY
];
1013 /* look up spill area base offset and array size for a spilled temp array */
1014 static void get_spilled_array_base_and_size(struct r600_shader_ctx
*ctx
, unsigned tgsi_reg_index
,
1015 unsigned *array_base
, unsigned *array_size
)
1018 unsigned offset
= 0;
1020 for (i
= 0; i
< ctx
->info
.array_max
[TGSI_FILE_TEMPORARY
]; i
++) {
1021 if (ctx
->spilled_arrays
[i
]) {
1022 unsigned size
= ctx
->array_infos
[i
].range
.Last
- ctx
->array_infos
[i
].range
.First
+ 1;
1024 if (tgsi_reg_index
>= ctx
->array_infos
[i
].range
.First
&& tgsi_reg_index
<= ctx
->array_infos
[i
].range
.Last
) {
1025 *array_base
= offset
;
1026 *array_size
= size
- 1; /* hw counts from 1 */
1036 static int tgsi_declaration(struct r600_shader_ctx
*ctx
)
1038 struct tgsi_full_declaration
*d
= &ctx
->parse
.FullToken
.FullDeclaration
;
1039 int r
, i
, j
, count
= d
->Range
.Last
- d
->Range
.First
+ 1;
1041 switch (d
->Declaration
.File
) {
1042 case TGSI_FILE_INPUT
:
1043 for (j
= 0; j
< count
; j
++) {
1044 i
= ctx
->shader
->ninput
+ j
;
1045 assert(i
< ARRAY_SIZE(ctx
->shader
->input
));
1046 ctx
->shader
->input
[i
].name
= d
->Semantic
.Name
;
1047 ctx
->shader
->input
[i
].sid
= d
->Semantic
.Index
+ j
;
1048 ctx
->shader
->input
[i
].interpolate
= d
->Interp
.Interpolate
;
1049 ctx
->shader
->input
[i
].interpolate_location
= d
->Interp
.Location
;
1050 ctx
->shader
->input
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_INPUT
] + d
->Range
.First
+ j
;
1051 if (ctx
->type
== PIPE_SHADER_FRAGMENT
) {
1052 ctx
->shader
->input
[i
].spi_sid
= r600_spi_sid(&ctx
->shader
->input
[i
]);
1053 switch (ctx
->shader
->input
[i
].name
) {
1054 case TGSI_SEMANTIC_FACE
:
1055 if (ctx
->face_gpr
!= -1)
1056 ctx
->shader
->input
[i
].gpr
= ctx
->face_gpr
; /* already allocated by allocate_system_value_inputs */
1058 ctx
->face_gpr
= ctx
->shader
->input
[i
].gpr
;
1060 case TGSI_SEMANTIC_COLOR
:
1063 case TGSI_SEMANTIC_POSITION
:
1064 ctx
->fragcoord_input
= i
;
1066 case TGSI_SEMANTIC_PRIMID
:
1067 /* set this for now */
1068 ctx
->shader
->gs_prim_id_input
= true;
1069 ctx
->shader
->ps_prim_id_input
= i
;
1072 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
1073 if ((r
= evergreen_interp_input(ctx
, i
)))
1076 } else if (ctx
->type
== PIPE_SHADER_GEOMETRY
) {
1077 /* FIXME probably skip inputs if they aren't passed in the ring */
1078 ctx
->shader
->input
[i
].ring_offset
= ctx
->next_ring_offset
;
1079 ctx
->next_ring_offset
+= 16;
1080 if (ctx
->shader
->input
[i
].name
== TGSI_SEMANTIC_PRIMID
)
1081 ctx
->shader
->gs_prim_id_input
= true;
1084 ctx
->shader
->ninput
+= count
;
1086 case TGSI_FILE_OUTPUT
:
1087 for (j
= 0; j
< count
; j
++) {
1088 i
= ctx
->shader
->noutput
+ j
;
1089 assert(i
< ARRAY_SIZE(ctx
->shader
->output
));
1090 ctx
->shader
->output
[i
].name
= d
->Semantic
.Name
;
1091 ctx
->shader
->output
[i
].sid
= d
->Semantic
.Index
+ j
;
1092 ctx
->shader
->output
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_OUTPUT
] + d
->Range
.First
+ j
;
1093 ctx
->shader
->output
[i
].interpolate
= d
->Interp
.Interpolate
;
1094 ctx
->shader
->output
[i
].write_mask
= d
->Declaration
.UsageMask
;
1095 if (ctx
->type
== PIPE_SHADER_VERTEX
||
1096 ctx
->type
== PIPE_SHADER_GEOMETRY
||
1097 ctx
->type
== PIPE_SHADER_TESS_EVAL
) {
1098 ctx
->shader
->output
[i
].spi_sid
= r600_spi_sid(&ctx
->shader
->output
[i
]);
1099 switch (d
->Semantic
.Name
) {
1100 case TGSI_SEMANTIC_CLIPDIST
:
1102 case TGSI_SEMANTIC_PSIZE
:
1103 ctx
->shader
->vs_out_misc_write
= 1;
1104 ctx
->shader
->vs_out_point_size
= 1;
1106 case TGSI_SEMANTIC_EDGEFLAG
:
1107 ctx
->shader
->vs_out_misc_write
= 1;
1108 ctx
->shader
->vs_out_edgeflag
= 1;
1109 ctx
->edgeflag_output
= i
;
1111 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
1112 ctx
->shader
->vs_out_misc_write
= 1;
1113 ctx
->shader
->vs_out_viewport
= 1;
1115 case TGSI_SEMANTIC_LAYER
:
1116 ctx
->shader
->vs_out_misc_write
= 1;
1117 ctx
->shader
->vs_out_layer
= 1;
1119 case TGSI_SEMANTIC_CLIPVERTEX
:
1120 ctx
->clip_vertex_write
= TRUE
;
1124 if (ctx
->type
== PIPE_SHADER_GEOMETRY
) {
1125 ctx
->gs_out_ring_offset
+= 16;
1127 } else if (ctx
->type
== PIPE_SHADER_FRAGMENT
) {
1128 switch (d
->Semantic
.Name
) {
1129 case TGSI_SEMANTIC_COLOR
:
1130 ctx
->shader
->nr_ps_max_color_exports
++;
1135 ctx
->shader
->noutput
+= count
;
1137 case TGSI_FILE_TEMPORARY
:
1138 if (ctx
->info
.indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
1139 if (d
->Array
.ArrayID
) {
1141 unsigned idx
= map_tgsi_reg_index_to_r600_gpr(ctx
,
1146 r600_add_gpr_array(ctx
->shader
, idx
,
1147 d
->Range
.Last
- d
->Range
.First
+ 1, 0x0F);
1153 case TGSI_FILE_CONSTANT
:
1154 case TGSI_FILE_SAMPLER
:
1155 case TGSI_FILE_SAMPLER_VIEW
:
1156 case TGSI_FILE_ADDRESS
:
1157 case TGSI_FILE_BUFFER
:
1158 case TGSI_FILE_IMAGE
:
1159 case TGSI_FILE_MEMORY
:
1162 case TGSI_FILE_HW_ATOMIC
:
1163 i
= ctx
->shader
->nhwatomic_ranges
;
1164 ctx
->shader
->atomics
[i
].start
= d
->Range
.First
;
1165 ctx
->shader
->atomics
[i
].end
= d
->Range
.Last
;
1166 ctx
->shader
->atomics
[i
].hw_idx
= ctx
->shader
->atomic_base
+ ctx
->shader
->nhwatomic
;
1167 ctx
->shader
->atomics
[i
].array_id
= d
->Array
.ArrayID
;
1168 ctx
->shader
->atomics
[i
].buffer_id
= d
->Dim
.Index2D
;
1169 ctx
->shader
->nhwatomic_ranges
++;
1170 ctx
->shader
->nhwatomic
+= count
;
1173 case TGSI_FILE_SYSTEM_VALUE
:
1174 if (d
->Semantic
.Name
== TGSI_SEMANTIC_SAMPLEMASK
||
1175 d
->Semantic
.Name
== TGSI_SEMANTIC_SAMPLEID
||
1176 d
->Semantic
.Name
== TGSI_SEMANTIC_SAMPLEPOS
) {
1177 break; /* Already handled from allocate_system_value_inputs */
1178 } else if (d
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
) {
1180 } else if (d
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
1182 else if (d
->Semantic
.Name
== TGSI_SEMANTIC_INVOCATIONID
)
1184 else if (d
->Semantic
.Name
== TGSI_SEMANTIC_TESSINNER
||
1185 d
->Semantic
.Name
== TGSI_SEMANTIC_TESSOUTER
) {
1186 int param
= r600_get_lds_unique_index(d
->Semantic
.Name
, 0);
1187 int dreg
= d
->Semantic
.Name
== TGSI_SEMANTIC_TESSINNER
? 3 : 2;
1188 unsigned temp_reg
= r600_get_temp(ctx
);
1190 r
= get_lds_offset0(ctx
, 2, temp_reg
, true);
1194 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
1197 V_SQ_ALU_SRC_LITERAL
, param
* 16);
1201 do_lds_fetch_values(ctx
, temp_reg
, dreg
, 0xf);
1203 else if (d
->Semantic
.Name
== TGSI_SEMANTIC_TESSCOORD
) {
1207 for (i
= 0; i
< 2; i
++) {
1208 struct r600_bytecode_alu alu
;
1209 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1210 alu
.op
= ALU_OP1_MOV
;
1212 alu
.src
[0].chan
= 0 + i
;
1214 alu
.dst
.chan
= 0 + i
;
1216 alu
.last
= (i
== 1) ? 1 : 0;
1217 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
1220 /* ADD r1.z, 1.0f, -r0.x */
1221 struct r600_bytecode_alu alu
;
1222 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1223 alu
.op
= ALU_OP2_ADD
;
1224 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1226 alu
.src
[1].chan
= 0;
1232 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
1235 /* ADD r1.z, r1.z, -r1.y */
1236 alu
.op
= ALU_OP2_ADD
;
1238 alu
.src
[0].chan
= 2;
1240 alu
.src
[1].chan
= 1;
1246 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
1252 R600_ERR("unsupported file %d declaration\n", d
->Declaration
.File
);
1258 static int allocate_system_value_inputs(struct r600_shader_ctx
*ctx
, int gpr_offset
)
1260 struct tgsi_parse_context parse
;
1264 unsigned name
, alternate_name
;
1266 { false, &ctx
->face_gpr
, TGSI_SEMANTIC_SAMPLEMASK
, ~0u }, /* lives in Front Face GPR.z */
1268 { false, &ctx
->fixed_pt_position_gpr
, TGSI_SEMANTIC_SAMPLEID
, TGSI_SEMANTIC_SAMPLEPOS
} /* SAMPLEID is in Fixed Point Position GPR.w */
1273 if (tgsi_parse_init(&parse
, ctx
->tokens
) != TGSI_PARSE_OK
) {
1277 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
1278 while (!tgsi_parse_end_of_tokens(&parse
)) {
1279 tgsi_parse_token(&parse
);
1281 if (parse
.FullToken
.Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
) {
1282 const struct tgsi_full_instruction
*inst
= &parse
.FullToken
.FullInstruction
;
1283 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
||
1284 inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
||
1285 inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_CENTROID
)
1287 int interpolate
, location
, k
;
1289 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
1290 location
= TGSI_INTERPOLATE_LOC_CENTER
;
1291 } else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
) {
1292 location
= TGSI_INTERPOLATE_LOC_CENTER
;
1293 /* Needs sample positions, currently those are always available */
1295 location
= TGSI_INTERPOLATE_LOC_CENTROID
;
1298 interpolate
= ctx
->info
.input_interpolate
[inst
->Src
[0].Register
.Index
];
1299 k
= eg_get_interpolator_index(interpolate
, location
);
1301 ctx
->eg_interpolators
[k
].enabled
= true;
1303 } else if (parse
.FullToken
.Token
.Type
== TGSI_TOKEN_TYPE_DECLARATION
) {
1304 struct tgsi_full_declaration
*d
= &parse
.FullToken
.FullDeclaration
;
1305 if (d
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1306 for (k
= 0; k
< ARRAY_SIZE(inputs
); k
++) {
1307 if (d
->Semantic
.Name
== inputs
[k
].name
||
1308 d
->Semantic
.Name
== inputs
[k
].alternate_name
) {
1309 inputs
[k
].enabled
= true;
1316 tgsi_parse_free(&parse
);
1318 if (ctx
->info
.reads_samplemask
&&
1319 (ctx
->info
.uses_linear_sample
|| ctx
->info
.uses_persp_sample
)) {
1320 inputs
[1].enabled
= true;
1323 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
1325 /* assign gpr to each interpolator according to priority */
1326 for (i
= 0; i
< ARRAY_SIZE(ctx
->eg_interpolators
); i
++) {
1327 if (ctx
->eg_interpolators
[i
].enabled
) {
1328 ctx
->eg_interpolators
[i
].ij_index
= num_baryc
;
1332 num_baryc
= (num_baryc
+ 1) >> 1;
1333 gpr_offset
+= num_baryc
;
1336 for (i
= 0; i
< ARRAY_SIZE(inputs
); i
++) {
1337 boolean enabled
= inputs
[i
].enabled
;
1338 int *reg
= inputs
[i
].reg
;
1339 unsigned name
= inputs
[i
].name
;
1342 int gpr
= gpr_offset
+ num_regs
++;
1343 ctx
->shader
->nsys_inputs
++;
1345 // add to inputs, allocate a gpr
1346 k
= ctx
->shader
->ninput
++;
1347 ctx
->shader
->input
[k
].name
= name
;
1348 ctx
->shader
->input
[k
].sid
= 0;
1349 ctx
->shader
->input
[k
].interpolate
= TGSI_INTERPOLATE_CONSTANT
;
1350 ctx
->shader
->input
[k
].interpolate_location
= TGSI_INTERPOLATE_LOC_CENTER
;
1351 *reg
= ctx
->shader
->input
[k
].gpr
= gpr
;
1355 return gpr_offset
+ num_regs
;
1359 * for evergreen we need to scan the shader to find the number of GPRs we need to
1360 * reserve for interpolation and system values
1362 * we need to know if we are going to emit any sample or centroid inputs
1363 * if perspective and linear are required
1365 static int evergreen_gpr_count(struct r600_shader_ctx
*ctx
)
1369 memset(&ctx
->eg_interpolators
, 0, sizeof(ctx
->eg_interpolators
));
1372 * Could get this information from the shader info. But right now
1373 * we interpolate all declared inputs, whereas the shader info will
1374 * only contain the bits if the inputs are actually used, so it might
1377 for (i
= 0; i
< ctx
->info
.num_inputs
; i
++) {
1379 /* skip position/face/mask/sampleid */
1380 if (ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
||
1381 ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
||
1382 ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_SAMPLEMASK
||
1383 ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_SAMPLEID
)
1386 k
= eg_get_interpolator_index(
1387 ctx
->info
.input_interpolate
[i
],
1388 ctx
->info
.input_interpolate_loc
[i
]);
1390 ctx
->eg_interpolators
[k
].enabled
= TRUE
;
1393 /* XXX PULL MODEL and LINE STIPPLE */
1395 return allocate_system_value_inputs(ctx
, 0);
1398 /* sample_id_sel == NULL means fetch for current sample */
1399 static int load_sample_position(struct r600_shader_ctx
*ctx
, struct r600_shader_src
*sample_id
, int chan_sel
)
1401 struct r600_bytecode_vtx vtx
;
1404 t1
= r600_get_temp(ctx
);
1406 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
1407 vtx
.op
= FETCH_OP_VFETCH
;
1408 vtx
.buffer_id
= R600_BUFFER_INFO_CONST_BUFFER
;
1409 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
1410 if (sample_id
== NULL
) {
1411 assert(ctx
->fixed_pt_position_gpr
!= -1);
1413 vtx
.src_gpr
= ctx
->fixed_pt_position_gpr
; // SAMPLEID is in .w;
1417 struct r600_bytecode_alu alu
;
1419 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1420 alu
.op
= ALU_OP1_MOV
;
1421 r600_bytecode_src(&alu
.src
[0], sample_id
, chan_sel
);
1425 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1432 vtx
.mega_fetch_count
= 16;
1438 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
1439 vtx
.num_format_all
= 2;
1440 vtx
.format_comp_all
= 1;
1441 vtx
.use_const_fields
= 0;
1443 vtx
.endian
= r600_endian_swap(32);
1444 vtx
.srf_mode_all
= 1; /* SRF_MODE_NO_ZERO */
1446 r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
);
1453 static int eg_load_helper_invocation(struct r600_shader_ctx
*ctx
)
1456 struct r600_bytecode_alu alu
;
1458 /* do a vtx fetch with wqm set on the vtx fetch */
1459 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1460 alu
.op
= ALU_OP1_MOV
;
1461 alu
.dst
.sel
= ctx
->helper_invoc_reg
;
1463 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
1464 alu
.src
[0].value
= 0xffffffff;
1467 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1471 /* do a vtx fetch in VPM mode */
1472 struct r600_bytecode_vtx vtx
;
1473 memset(&vtx
, 0, sizeof(vtx
));
1474 vtx
.op
= FETCH_OP_GET_BUFFER_RESINFO
;
1475 vtx
.buffer_id
= R600_BUFFER_INFO_CONST_BUFFER
;
1476 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
1478 vtx
.mega_fetch_count
= 16; /* no idea here really... */
1479 vtx
.dst_gpr
= ctx
->helper_invoc_reg
;
1481 vtx
.dst_sel_y
= 7; /* SEL_Y */
1482 vtx
.dst_sel_z
= 7; /* SEL_Z */
1483 vtx
.dst_sel_w
= 7; /* SEL_W */
1484 vtx
.data_format
= FMT_32
;
1485 if ((r
= r600_bytecode_add_vtx_tc(ctx
->bc
, &vtx
)))
1487 ctx
->bc
->cf_last
->vpm
= 1;
1491 static int cm_load_helper_invocation(struct r600_shader_ctx
*ctx
)
1494 struct r600_bytecode_alu alu
;
1496 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1497 alu
.op
= ALU_OP1_MOV
;
1498 alu
.dst
.sel
= ctx
->helper_invoc_reg
;
1500 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
1501 alu
.src
[0].value
= 0xffffffff;
1504 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1508 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1509 alu
.op
= ALU_OP1_MOV
;
1510 alu
.dst
.sel
= ctx
->helper_invoc_reg
;
1512 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1515 r
= r600_bytecode_add_alu_type(ctx
->bc
, &alu
, CF_OP_ALU_VALID_PIXEL_MODE
);
1519 return ctx
->helper_invoc_reg
;
1522 static int load_block_grid_size(struct r600_shader_ctx
*ctx
, bool load_block
)
1524 struct r600_bytecode_vtx vtx
;
1527 if (ctx
->cs_block_size_loaded
)
1528 return ctx
->cs_block_size_reg
;
1529 if (ctx
->cs_grid_size_loaded
)
1530 return ctx
->cs_grid_size_reg
;
1532 t1
= load_block
? ctx
->cs_block_size_reg
: ctx
->cs_grid_size_reg
;
1533 struct r600_bytecode_alu alu
;
1534 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1535 alu
.op
= ALU_OP1_MOV
;
1536 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1540 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1544 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
1545 vtx
.op
= FETCH_OP_VFETCH
;
1546 vtx
.buffer_id
= R600_BUFFER_INFO_CONST_BUFFER
;
1547 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
1551 vtx
.mega_fetch_count
= 16;
1557 vtx
.data_format
= FMT_32_32_32_32
;
1558 vtx
.num_format_all
= 1;
1559 vtx
.format_comp_all
= 0;
1560 vtx
.use_const_fields
= 0;
1561 vtx
.offset
= load_block
? 0 : 16; // first element is size of buffer
1562 vtx
.endian
= r600_endian_swap(32);
1563 vtx
.srf_mode_all
= 1; /* SRF_MODE_NO_ZERO */
1565 r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
);
1570 ctx
->cs_block_size_loaded
= true;
1572 ctx
->cs_grid_size_loaded
= true;
1576 static void tgsi_src(struct r600_shader_ctx
*ctx
,
1577 const struct tgsi_full_src_register
*tgsi_src
,
1578 struct r600_shader_src
*r600_src
)
1580 memset(r600_src
, 0, sizeof(*r600_src
));
1581 r600_src
->swizzle
[0] = tgsi_src
->Register
.SwizzleX
;
1582 r600_src
->swizzle
[1] = tgsi_src
->Register
.SwizzleY
;
1583 r600_src
->swizzle
[2] = tgsi_src
->Register
.SwizzleZ
;
1584 r600_src
->swizzle
[3] = tgsi_src
->Register
.SwizzleW
;
1585 r600_src
->neg
= tgsi_src
->Register
.Negate
;
1586 r600_src
->abs
= tgsi_src
->Register
.Absolute
;
1588 if (tgsi_src
->Register
.File
== TGSI_FILE_TEMPORARY
) {
1592 idx
= map_tgsi_reg_index_to_r600_gpr(ctx
, tgsi_src
->Register
.Index
, &spilled
);
1595 int reg
= r600_get_temp(ctx
);
1598 r600_src
->sel
= reg
;
1600 if (ctx
->bc
->chip_class
< R700
) {
1601 struct r600_bytecode_output cf
;
1603 memset(&cf
, 0, sizeof(struct r600_bytecode_output
));
1604 cf
.op
= CF_OP_MEM_SCRATCH
;
1614 get_spilled_array_base_and_size(ctx
, tgsi_src
->Register
.Index
,
1615 &cf
.array_base
, &cf
.array_size
);
1617 if (tgsi_src
->Register
.Indirect
) {
1618 cf
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND
;
1619 cf
.index_gpr
= ctx
->bc
->ar_reg
;
1622 cf
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ
;
1623 cf
.array_base
+= idx
;
1627 r
= r600_bytecode_add_output(ctx
->bc
, &cf
);
1630 struct r600_bytecode_vtx vtx
;
1632 if (r600_bytecode_get_need_wait_ack(ctx
->bc
)) {
1633 r600_bytecode_need_wait_ack(ctx
->bc
, false);
1634 r
= r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_WAIT_ACK
);
1637 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
1638 vtx
.op
= FETCH_OP_READ_SCRATCH
;
1640 vtx
.uncached
= 1; // Must bypass cache since prior spill written in same invocation
1642 vtx
.data_format
= FMT_32_32_32_32
;
1643 vtx
.num_format_all
= V_038010_SQ_NUM_FORMAT_INT
;
1644 vtx
.dst_sel_x
= tgsi_src
->Register
.SwizzleX
;
1645 vtx
.dst_sel_y
= tgsi_src
->Register
.SwizzleY
;
1646 vtx
.dst_sel_z
= tgsi_src
->Register
.SwizzleZ
;
1647 vtx
.dst_sel_w
= tgsi_src
->Register
.SwizzleW
;
1649 get_spilled_array_base_and_size(ctx
, tgsi_src
->Register
.Index
,
1650 &vtx
.array_base
, &vtx
.array_size
);
1652 if (tgsi_src
->Register
.Indirect
) {
1654 vtx
.src_gpr
= ctx
->bc
->ar_reg
;
1657 vtx
.array_base
+= idx
;
1661 r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
);
1668 if (tgsi_src
->Register
.Indirect
)
1669 r600_src
->rel
= V_SQ_REL_RELATIVE
;
1671 r600_src
->sel
= idx
;
1677 if (tgsi_src
->Register
.File
== TGSI_FILE_IMMEDIATE
) {
1679 if ((tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleY
) &&
1680 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleZ
) &&
1681 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleW
)) {
1683 index
= tgsi_src
->Register
.Index
* 4 + tgsi_src
->Register
.SwizzleX
;
1684 r600_bytecode_special_constants(ctx
->literals
[index
], &r600_src
->sel
, &r600_src
->neg
, r600_src
->abs
);
1685 if (r600_src
->sel
!= V_SQ_ALU_SRC_LITERAL
)
1688 index
= tgsi_src
->Register
.Index
;
1689 r600_src
->sel
= V_SQ_ALU_SRC_LITERAL
;
1690 memcpy(r600_src
->value
, ctx
->literals
+ index
* 4, sizeof(r600_src
->value
));
1691 } else if (tgsi_src
->Register
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1692 if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_SAMPLEMASK
) {
1693 r600_src
->swizzle
[0] = 2; // Z value
1694 r600_src
->swizzle
[1] = 2;
1695 r600_src
->swizzle
[2] = 2;
1696 r600_src
->swizzle
[3] = 2;
1697 r600_src
->sel
= ctx
->face_gpr
;
1698 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_SAMPLEID
) {
1699 r600_src
->swizzle
[0] = 3; // W value
1700 r600_src
->swizzle
[1] = 3;
1701 r600_src
->swizzle
[2] = 3;
1702 r600_src
->swizzle
[3] = 3;
1703 r600_src
->sel
= ctx
->fixed_pt_position_gpr
;
1704 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_SAMPLEPOS
) {
1705 r600_src
->swizzle
[0] = 0;
1706 r600_src
->swizzle
[1] = 1;
1707 r600_src
->swizzle
[2] = 4;
1708 r600_src
->swizzle
[3] = 4;
1709 r600_src
->sel
= load_sample_position(ctx
, NULL
, -1);
1710 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_INSTANCEID
) {
1711 r600_src
->swizzle
[0] = 3;
1712 r600_src
->swizzle
[1] = 3;
1713 r600_src
->swizzle
[2] = 3;
1714 r600_src
->swizzle
[3] = 3;
1716 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_VERTEXID
) {
1717 r600_src
->swizzle
[0] = 0;
1718 r600_src
->swizzle
[1] = 0;
1719 r600_src
->swizzle
[2] = 0;
1720 r600_src
->swizzle
[3] = 0;
1722 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_THREAD_ID
) {
1724 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_BLOCK_ID
) {
1726 } else if (ctx
->type
!= PIPE_SHADER_TESS_CTRL
&& ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_INVOCATIONID
) {
1727 r600_src
->swizzle
[0] = 3;
1728 r600_src
->swizzle
[1] = 3;
1729 r600_src
->swizzle
[2] = 3;
1730 r600_src
->swizzle
[3] = 3;
1732 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_INVOCATIONID
) {
1733 r600_src
->swizzle
[0] = 2;
1734 r600_src
->swizzle
[1] = 2;
1735 r600_src
->swizzle
[2] = 2;
1736 r600_src
->swizzle
[3] = 2;
1738 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_TESSCOORD
) {
1740 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_TESSINNER
) {
1742 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_TESSOUTER
) {
1744 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_VERTICESIN
) {
1745 r600_src
->sel
= ctx
->tess_input_info
;
1746 r600_src
->swizzle
[0] = 2;
1747 r600_src
->swizzle
[1] = 2;
1748 r600_src
->swizzle
[2] = 2;
1749 r600_src
->swizzle
[3] = 2;
1750 } else if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_PRIMID
) {
1752 r600_src
->swizzle
[0] = 0;
1753 r600_src
->swizzle
[1] = 0;
1754 r600_src
->swizzle
[2] = 0;
1755 r600_src
->swizzle
[3] = 0;
1756 } else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
&& ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_PRIMID
) {
1758 r600_src
->swizzle
[0] = 3;
1759 r600_src
->swizzle
[1] = 3;
1760 r600_src
->swizzle
[2] = 3;
1761 r600_src
->swizzle
[3] = 3;
1762 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_GRID_SIZE
) {
1763 r600_src
->sel
= load_block_grid_size(ctx
, false);
1764 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_BLOCK_SIZE
) {
1765 r600_src
->sel
= load_block_grid_size(ctx
, true);
1766 } else if (ctx
->info
.system_value_semantic_name
[tgsi_src
->Register
.Index
] == TGSI_SEMANTIC_HELPER_INVOCATION
) {
1767 r600_src
->sel
= ctx
->helper_invoc_reg
;
1768 r600_src
->swizzle
[0] = 0;
1769 r600_src
->swizzle
[1] = 0;
1770 r600_src
->swizzle
[2] = 0;
1771 r600_src
->swizzle
[3] = 0;
1774 if (tgsi_src
->Register
.Indirect
)
1775 r600_src
->rel
= V_SQ_REL_RELATIVE
;
1776 r600_src
->sel
= tgsi_src
->Register
.Index
;
1777 r600_src
->sel
+= ctx
->file_offset
[tgsi_src
->Register
.File
];
1779 if (tgsi_src
->Register
.File
== TGSI_FILE_CONSTANT
) {
1780 if (tgsi_src
->Register
.Dimension
) {
1781 r600_src
->kc_bank
= tgsi_src
->Dimension
.Index
;
1782 if (tgsi_src
->Dimension
.Indirect
) {
1783 r600_src
->kc_rel
= 1;
1789 static int tgsi_fetch_rel_const(struct r600_shader_ctx
*ctx
,
1790 unsigned int cb_idx
, unsigned cb_rel
, unsigned int offset
, unsigned ar_chan
,
1791 unsigned int dst_reg
)
1793 struct r600_bytecode_vtx vtx
;
1794 unsigned int ar_reg
;
1798 struct r600_bytecode_alu alu
;
1800 memset(&alu
, 0, sizeof(alu
));
1802 alu
.op
= ALU_OP2_ADD_INT
;
1803 alu
.src
[0].sel
= ctx
->bc
->ar_reg
;
1804 alu
.src
[0].chan
= ar_chan
;
1806 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
1807 alu
.src
[1].value
= offset
;
1809 alu
.dst
.sel
= dst_reg
;
1810 alu
.dst
.chan
= ar_chan
;
1814 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
1819 ar_reg
= ctx
->bc
->ar_reg
;
1822 memset(&vtx
, 0, sizeof(vtx
));
1823 vtx
.buffer_id
= cb_idx
;
1824 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
1825 vtx
.src_gpr
= ar_reg
;
1826 vtx
.src_sel_x
= ar_chan
;
1827 vtx
.mega_fetch_count
= 16;
1828 vtx
.dst_gpr
= dst_reg
;
1829 vtx
.dst_sel_x
= 0; /* SEL_X */
1830 vtx
.dst_sel_y
= 1; /* SEL_Y */
1831 vtx
.dst_sel_z
= 2; /* SEL_Z */
1832 vtx
.dst_sel_w
= 3; /* SEL_W */
1833 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
1834 vtx
.num_format_all
= 2; /* NUM_FORMAT_SCALED */
1835 vtx
.format_comp_all
= 1; /* FORMAT_COMP_SIGNED */
1836 vtx
.endian
= r600_endian_swap(32);
1837 vtx
.buffer_index_mode
= cb_rel
; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE;
1839 if ((r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
)))
1845 static int fetch_gs_input(struct r600_shader_ctx
*ctx
, struct tgsi_full_src_register
*src
, unsigned int dst_reg
)
1847 struct r600_bytecode_vtx vtx
;
1849 unsigned index
= src
->Register
.Index
;
1850 unsigned vtx_id
= src
->Dimension
.Index
;
1851 int offset_reg
= ctx
->gs_rotated_input
[vtx_id
/ 3];
1852 int offset_chan
= vtx_id
% 3;
1855 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
1856 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
1858 if (offset_reg
== ctx
->gs_rotated_input
[0] && offset_chan
== 2)
1861 if (src
->Dimension
.Indirect
|| src
->Register
.Indirect
)
1862 t2
= r600_get_temp(ctx
);
1864 if (src
->Dimension
.Indirect
) {
1866 struct r600_bytecode_alu alu
;
1869 addr_reg
= get_address_file_reg(ctx
, src
->DimIndirect
.Index
);
1870 if (src
->DimIndirect
.Index
> 0) {
1871 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
1879 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
1880 at least this is what fglrx seems to do. */
1881 for (i
= 0; i
< 3; i
++) {
1882 treg
[i
] = r600_get_temp(ctx
);
1884 r600_add_gpr_array(ctx
->shader
, treg
[0], 3, 0x0F);
1886 for (i
= 0; i
< 3; i
++) {
1887 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1888 alu
.op
= ALU_OP1_MOV
;
1889 alu
.src
[0].sel
= ctx
->gs_rotated_input
[0];
1890 alu
.src
[0].chan
= i
== 2 ? 3 : i
;
1891 alu
.dst
.sel
= treg
[i
];
1895 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1899 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
1900 alu
.op
= ALU_OP1_MOV
;
1901 alu
.src
[0].sel
= treg
[0];
1906 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
1913 if (src
->Register
.Indirect
) {
1915 unsigned first
= ctx
->info
.input_array_first
[src
->Indirect
.ArrayID
];
1917 addr_reg
= get_address_file_reg(ctx
, src
->Indirect
.Index
);
1919 /* pull the value from index_reg */
1920 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
1923 V_SQ_ALU_SRC_LITERAL
, first
);
1926 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
1929 V_SQ_ALU_SRC_LITERAL
, 4,
1930 offset_reg
, offset_chan
);
1935 index
= src
->Register
.Index
- first
;
1938 memset(&vtx
, 0, sizeof(vtx
));
1939 vtx
.buffer_id
= R600_GS_RING_CONST_BUFFER
;
1940 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
1941 vtx
.src_gpr
= offset_reg
;
1942 vtx
.src_sel_x
= offset_chan
;
1943 vtx
.offset
= index
* 16; /*bytes*/
1944 vtx
.mega_fetch_count
= 16;
1945 vtx
.dst_gpr
= dst_reg
;
1946 vtx
.dst_sel_x
= 0; /* SEL_X */
1947 vtx
.dst_sel_y
= 1; /* SEL_Y */
1948 vtx
.dst_sel_z
= 2; /* SEL_Z */
1949 vtx
.dst_sel_w
= 3; /* SEL_W */
1950 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
1951 vtx
.use_const_fields
= 1;
1953 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
1956 if ((r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
)))
1962 static int tgsi_split_gs_inputs(struct r600_shader_ctx
*ctx
)
1964 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1967 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1968 struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
1970 if (src
->Register
.File
== TGSI_FILE_INPUT
) {
1971 if (ctx
->shader
->input
[src
->Register
.Index
].name
== TGSI_SEMANTIC_PRIMID
) {
1972 /* primitive id is in R0.z */
1973 ctx
->src
[i
].sel
= 0;
1974 ctx
->src
[i
].swizzle
[0] = 2;
1977 if (src
->Register
.File
== TGSI_FILE_INPUT
&& src
->Register
.Dimension
) {
1978 int treg
= r600_get_temp(ctx
);
1980 fetch_gs_input(ctx
, src
, treg
);
1981 ctx
->src
[i
].sel
= treg
;
1982 ctx
->src
[i
].rel
= 0;
1989 /* Tessellation shaders pass outputs to the next shader using LDS.
1991 * LS outputs = TCS(HS) inputs
1992 * TCS(HS) outputs = TES(DS) inputs
1994 * The LDS layout is:
1995 * - TCS inputs for patch 0
1996 * - TCS inputs for patch 1
1997 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
1999 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
2000 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
2001 * - TCS outputs for patch 1
2002 * - Per-patch TCS outputs for patch 1
2003 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
2004 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
2007 * All three shaders VS(LS), TCS, TES share the same LDS space.
2009 /* this will return with the dw address in temp_reg.x */
2010 static int r600_get_byte_address(struct r600_shader_ctx
*ctx
, int temp_reg
,
2011 const struct tgsi_full_dst_register
*dst
,
2012 const struct tgsi_full_src_register
*src
,
2013 int stride_bytes_reg
, int stride_bytes_chan
)
2015 struct tgsi_full_dst_register reg
;
2016 ubyte
*name
, *index
, *array_first
;
2019 struct tgsi_shader_info
*info
= &ctx
->info
;
2020 /* Set the register description. The address computation is the same
2021 * for sources and destinations. */
2023 reg
.Register
.File
= src
->Register
.File
;
2024 reg
.Register
.Index
= src
->Register
.Index
;
2025 reg
.Register
.Indirect
= src
->Register
.Indirect
;
2026 reg
.Register
.Dimension
= src
->Register
.Dimension
;
2027 reg
.Indirect
= src
->Indirect
;
2028 reg
.Dimension
= src
->Dimension
;
2029 reg
.DimIndirect
= src
->DimIndirect
;
2033 /* If the register is 2-dimensional (e.g. an array of vertices
2034 * in a primitive), calculate the base address of the vertex. */
2035 if (reg
.Register
.Dimension
) {
2037 if (reg
.Dimension
.Indirect
) {
2039 assert (reg
.DimIndirect
.File
== TGSI_FILE_ADDRESS
);
2041 addr_reg
= get_address_file_reg(ctx
, reg
.DimIndirect
.Index
);
2042 /* pull the value from index_reg */
2046 sel
= V_SQ_ALU_SRC_LITERAL
;
2047 chan
= reg
.Dimension
.Index
;
2050 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
2052 stride_bytes_reg
, stride_bytes_chan
,
2059 if (reg
.Register
.File
== TGSI_FILE_INPUT
) {
2060 name
= info
->input_semantic_name
;
2061 index
= info
->input_semantic_index
;
2062 array_first
= info
->input_array_first
;
2063 } else if (reg
.Register
.File
== TGSI_FILE_OUTPUT
) {
2064 name
= info
->output_semantic_name
;
2065 index
= info
->output_semantic_index
;
2066 array_first
= info
->output_array_first
;
2071 if (reg
.Register
.Indirect
) {
2074 /* Add the relative address of the element. */
2075 if (reg
.Indirect
.ArrayID
)
2076 first
= array_first
[reg
.Indirect
.ArrayID
];
2078 first
= reg
.Register
.Index
;
2080 addr_reg
= get_address_file_reg(ctx
, reg
.Indirect
.Index
);
2082 /* pull the value from index_reg */
2083 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
2085 V_SQ_ALU_SRC_LITERAL
, 16,
2091 param
= r600_get_lds_unique_index(name
[first
],
2095 param
= r600_get_lds_unique_index(name
[reg
.Register
.Index
],
2096 index
[reg
.Register
.Index
]);
2099 /* add to base_addr - passed in temp_reg.x */
2101 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
2104 V_SQ_ALU_SRC_LITERAL
, param
* 16);
2112 static int do_lds_fetch_values(struct r600_shader_ctx
*ctx
, unsigned temp_reg
,
2113 unsigned dst_reg
, unsigned mask
)
2115 struct r600_bytecode_alu alu
;
2118 if ((ctx
->bc
->cf_last
->ndw
>>1) >= 0x60)
2119 ctx
->bc
->force_add_cf
= 1;
2121 lasti
= tgsi_last_instruction(mask
);
2122 for (i
= 1; i
<= lasti
; i
++) {
2123 if (!(mask
& (1 << i
)))
2126 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
2129 V_SQ_ALU_SRC_LITERAL
, 4 * i
);
2133 for (i
= 0; i
<= lasti
; i
++) {
2134 if (!(mask
& (1 << i
)))
2137 /* emit an LDS_READ_RET */
2138 memset(&alu
, 0, sizeof(alu
));
2139 alu
.op
= LDS_OP1_LDS_READ_RET
;
2140 alu
.src
[0].sel
= temp_reg
;
2141 alu
.src
[0].chan
= i
;
2142 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2143 alu
.src
[2].sel
= V_SQ_ALU_SRC_0
;
2145 alu
.is_lds_idx_op
= true;
2147 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2151 for (i
= 0; i
<= lasti
; i
++) {
2152 if (!(mask
& (1 << i
)))
2155 /* then read from LDS_OQ_A_POP */
2156 memset(&alu
, 0, sizeof(alu
));
2158 alu
.op
= ALU_OP1_MOV
;
2159 alu
.src
[0].sel
= EG_V_SQ_ALU_SRC_LDS_OQ_A_POP
;
2160 alu
.src
[0].chan
= 0;
2161 alu
.dst
.sel
= dst_reg
;
2165 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2172 static int fetch_mask(struct tgsi_src_register
*reg
)
2175 mask
|= 1 << reg
->SwizzleX
;
2176 mask
|= 1 << reg
->SwizzleY
;
2177 mask
|= 1 << reg
->SwizzleZ
;
2178 mask
|= 1 << reg
->SwizzleW
;
2182 static int fetch_tes_input(struct r600_shader_ctx
*ctx
, struct tgsi_full_src_register
*src
, unsigned int dst_reg
)
2185 unsigned temp_reg
= r600_get_temp(ctx
);
2187 r
= get_lds_offset0(ctx
, 2, temp_reg
,
2188 src
->Register
.Dimension
? false : true);
2192 /* the base address is now in temp.x */
2193 r
= r600_get_byte_address(ctx
, temp_reg
,
2194 NULL
, src
, ctx
->tess_output_info
, 1);
2198 r
= do_lds_fetch_values(ctx
, temp_reg
, dst_reg
, fetch_mask(&src
->Register
));
2204 static int fetch_tcs_input(struct r600_shader_ctx
*ctx
, struct tgsi_full_src_register
*src
, unsigned int dst_reg
)
2207 unsigned temp_reg
= r600_get_temp(ctx
);
2209 /* t.x = ips * r0.y */
2210 r
= single_alu_op2(ctx
, ALU_OP2_MUL_UINT24
,
2212 ctx
->tess_input_info
, 0,
2218 /* the base address is now in temp.x */
2219 r
= r600_get_byte_address(ctx
, temp_reg
,
2220 NULL
, src
, ctx
->tess_input_info
, 1);
2224 r
= do_lds_fetch_values(ctx
, temp_reg
, dst_reg
, fetch_mask(&src
->Register
));
2230 static int fetch_tcs_output(struct r600_shader_ctx
*ctx
, struct tgsi_full_src_register
*src
, unsigned int dst_reg
)
2233 unsigned temp_reg
= r600_get_temp(ctx
);
2235 r
= get_lds_offset0(ctx
, 1, temp_reg
,
2236 src
->Register
.Dimension
? false : true);
2239 /* the base address is now in temp.x */
2240 r
= r600_get_byte_address(ctx
, temp_reg
,
2242 ctx
->tess_output_info
, 1);
2246 r
= do_lds_fetch_values(ctx
, temp_reg
, dst_reg
, fetch_mask(&src
->Register
));
2252 static int tgsi_split_lds_inputs(struct r600_shader_ctx
*ctx
)
2254 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2257 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
2258 struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
2260 if (ctx
->type
== PIPE_SHADER_TESS_EVAL
&& src
->Register
.File
== TGSI_FILE_INPUT
) {
2261 int treg
= r600_get_temp(ctx
);
2262 fetch_tes_input(ctx
, src
, treg
);
2263 ctx
->src
[i
].sel
= treg
;
2264 ctx
->src
[i
].rel
= 0;
2266 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& src
->Register
.File
== TGSI_FILE_INPUT
) {
2267 int treg
= r600_get_temp(ctx
);
2268 fetch_tcs_input(ctx
, src
, treg
);
2269 ctx
->src
[i
].sel
= treg
;
2270 ctx
->src
[i
].rel
= 0;
2272 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& src
->Register
.File
== TGSI_FILE_OUTPUT
) {
2273 int treg
= r600_get_temp(ctx
);
2274 fetch_tcs_output(ctx
, src
, treg
);
2275 ctx
->src
[i
].sel
= treg
;
2276 ctx
->src
[i
].rel
= 0;
2282 static int tgsi_split_constant(struct r600_shader_ctx
*ctx
)
2284 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2285 struct r600_bytecode_alu alu
;
2286 int i
, j
, k
, nconst
, r
;
2288 for (i
= 0, nconst
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
2289 if (inst
->Src
[i
].Register
.File
== TGSI_FILE_CONSTANT
) {
2292 tgsi_src(ctx
, &inst
->Src
[i
], &ctx
->src
[i
]);
2294 for (i
= 0, j
= nconst
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
2295 if (inst
->Src
[i
].Register
.File
!= TGSI_FILE_CONSTANT
) {
2299 if (ctx
->src
[i
].rel
) {
2300 int chan
= inst
->Src
[i
].Indirect
.Swizzle
;
2301 int treg
= r600_get_temp(ctx
);
2302 if ((r
= tgsi_fetch_rel_const(ctx
, ctx
->src
[i
].kc_bank
, ctx
->src
[i
].kc_rel
, ctx
->src
[i
].sel
- 512, chan
, treg
)))
2305 ctx
->src
[i
].kc_bank
= 0;
2306 ctx
->src
[i
].kc_rel
= 0;
2307 ctx
->src
[i
].sel
= treg
;
2308 ctx
->src
[i
].rel
= 0;
2311 int treg
= r600_get_temp(ctx
);
2312 for (k
= 0; k
< 4; k
++) {
2313 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
2314 alu
.op
= ALU_OP1_MOV
;
2315 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
2316 alu
.src
[0].chan
= k
;
2317 alu
.src
[0].rel
= ctx
->src
[i
].rel
;
2318 alu
.src
[0].kc_bank
= ctx
->src
[i
].kc_bank
;
2319 alu
.src
[0].kc_rel
= ctx
->src
[i
].kc_rel
;
2325 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2329 ctx
->src
[i
].sel
= treg
;
2337 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
2338 static int tgsi_split_literal_constant(struct r600_shader_ctx
*ctx
)
2340 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2341 struct r600_bytecode_alu alu
;
2342 int i
, j
, k
, nliteral
, r
;
2344 for (i
= 0, nliteral
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
2345 if (ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
2349 for (i
= 0, j
= nliteral
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
2350 if (j
> 0 && ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
2351 int treg
= r600_get_temp(ctx
);
2352 for (k
= 0; k
< 4; k
++) {
2353 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
2354 alu
.op
= ALU_OP1_MOV
;
2355 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
2356 alu
.src
[0].chan
= k
;
2357 alu
.src
[0].value
= ctx
->src
[i
].value
[k
];
2363 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2367 ctx
->src
[i
].sel
= treg
;
2374 static int process_twoside_color_inputs(struct r600_shader_ctx
*ctx
)
2376 int i
, r
, count
= ctx
->shader
->ninput
;
2378 for (i
= 0; i
< count
; i
++) {
2379 if (ctx
->shader
->input
[i
].name
== TGSI_SEMANTIC_COLOR
) {
2380 r
= select_twoside_color(ctx
, i
, ctx
->shader
->input
[i
].back_color_input
);
2388 static int emit_streamout(struct r600_shader_ctx
*ctx
, struct pipe_stream_output_info
*so
,
2389 int stream
, unsigned *stream_item_size UNUSED
)
2391 unsigned so_gpr
[PIPE_MAX_SHADER_OUTPUTS
];
2392 unsigned start_comp
[PIPE_MAX_SHADER_OUTPUTS
];
2396 /* Sanity checking. */
2397 if (so
->num_outputs
> PIPE_MAX_SO_OUTPUTS
) {
2398 R600_ERR("Too many stream outputs: %d\n", so
->num_outputs
);
2402 for (i
= 0; i
< so
->num_outputs
; i
++) {
2403 if (so
->output
[i
].output_buffer
>= 4) {
2404 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
2405 so
->output
[i
].output_buffer
);
2411 /* Initialize locations where the outputs are stored. */
2412 for (i
= 0; i
< so
->num_outputs
; i
++) {
2414 so_gpr
[i
] = ctx
->shader
->output
[so
->output
[i
].register_index
].gpr
;
2415 start_comp
[i
] = so
->output
[i
].start_component
;
2416 /* Lower outputs with dst_offset < start_component.
2418 * We can only output 4D vectors with a write mask, e.g. we can
2419 * only output the W component at offset 3, etc. If we want
2420 * to store Y, Z, or W at buffer offset 0, we need to use MOV
2421 * to move it to X and output X. */
2422 if (so
->output
[i
].dst_offset
< so
->output
[i
].start_component
) {
2423 unsigned tmp
= r600_get_temp(ctx
);
2425 for (j
= 0; j
< so
->output
[i
].num_components
; j
++) {
2426 struct r600_bytecode_alu alu
;
2427 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
2428 alu
.op
= ALU_OP1_MOV
;
2429 alu
.src
[0].sel
= so_gpr
[i
];
2430 alu
.src
[0].chan
= so
->output
[i
].start_component
+ j
;
2435 if (j
== so
->output
[i
].num_components
- 1)
2437 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2446 /* Write outputs to buffers. */
2447 for (i
= 0; i
< so
->num_outputs
; i
++) {
2448 struct r600_bytecode_output output
;
2450 if (stream
!= -1 && stream
!= so
->output
[i
].stream
)
2453 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
2454 output
.gpr
= so_gpr
[i
];
2455 output
.elem_size
= so
->output
[i
].num_components
- 1;
2456 if (output
.elem_size
== 2)
2457 output
.elem_size
= 3; // 3 not supported, write 4 with junk at end
2458 output
.array_base
= so
->output
[i
].dst_offset
- start_comp
[i
];
2459 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
2460 output
.burst_count
= 1;
2461 /* array_size is an upper limit for the burst_count
2462 * with MEM_STREAM instructions */
2463 output
.array_size
= 0xFFF;
2464 output
.comp_mask
= ((1 << so
->output
[i
].num_components
) - 1) << start_comp
[i
];
2466 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
2467 switch (so
->output
[i
].output_buffer
) {
2469 output
.op
= CF_OP_MEM_STREAM0_BUF0
;
2472 output
.op
= CF_OP_MEM_STREAM0_BUF1
;
2475 output
.op
= CF_OP_MEM_STREAM0_BUF2
;
2478 output
.op
= CF_OP_MEM_STREAM0_BUF3
;
2481 output
.op
+= so
->output
[i
].stream
* 4;
2482 assert(output
.op
>= CF_OP_MEM_STREAM0_BUF0
&& output
.op
<= CF_OP_MEM_STREAM3_BUF3
);
2483 ctx
->enabled_stream_buffers_mask
|= (1 << so
->output
[i
].output_buffer
) << so
->output
[i
].stream
* 4;
2485 switch (so
->output
[i
].output_buffer
) {
2487 output
.op
= CF_OP_MEM_STREAM0
;
2490 output
.op
= CF_OP_MEM_STREAM1
;
2493 output
.op
= CF_OP_MEM_STREAM2
;
2496 output
.op
= CF_OP_MEM_STREAM3
;
2499 ctx
->enabled_stream_buffers_mask
|= 1 << so
->output
[i
].output_buffer
;
2501 r
= r600_bytecode_add_output(ctx
->bc
, &output
);
2510 static void convert_edgeflag_to_int(struct r600_shader_ctx
*ctx
)
2512 struct r600_bytecode_alu alu
;
2515 if (!ctx
->shader
->vs_out_edgeflag
)
2518 reg
= ctx
->shader
->output
[ctx
->edgeflag_output
].gpr
;
2520 /* clamp(x, 0, 1) */
2521 memset(&alu
, 0, sizeof(alu
));
2522 alu
.op
= ALU_OP1_MOV
;
2523 alu
.src
[0].sel
= reg
;
2528 r600_bytecode_add_alu(ctx
->bc
, &alu
);
2530 memset(&alu
, 0, sizeof(alu
));
2531 alu
.op
= ALU_OP1_FLT_TO_INT
;
2532 alu
.src
[0].sel
= reg
;
2536 r600_bytecode_add_alu(ctx
->bc
, &alu
);
2539 int generate_gs_copy_shader(struct r600_context
*rctx
,
2540 struct r600_pipe_shader
*gs
,
2541 struct pipe_stream_output_info
*so
)
2543 struct r600_shader_ctx ctx
= {};
2544 struct r600_shader
*gs_shader
= &gs
->shader
;
2545 struct r600_pipe_shader
*cshader
;
2546 unsigned ocnt
= gs_shader
->noutput
;
2547 struct r600_bytecode_alu alu
;
2548 struct r600_bytecode_vtx vtx
;
2549 struct r600_bytecode_output output
;
2550 struct r600_bytecode_cf
*cf_jump
, *cf_pop
,
2551 *last_exp_pos
= NULL
, *last_exp_param
= NULL
;
2552 int next_clip_pos
= 61, next_param
= 0;
2555 bool only_ring_0
= true;
2556 cshader
= calloc(1, sizeof(struct r600_pipe_shader
));
2560 memcpy(cshader
->shader
.output
, gs_shader
->output
, ocnt
*
2561 sizeof(struct r600_shader_io
));
2563 cshader
->shader
.noutput
= ocnt
;
2565 ctx
.shader
= &cshader
->shader
;
2566 ctx
.bc
= &ctx
.shader
->bc
;
2567 ctx
.type
= ctx
.bc
->type
= PIPE_SHADER_VERTEX
;
2569 r600_bytecode_init(ctx
.bc
, rctx
->b
.chip_class
, rctx
->b
.family
,
2570 rctx
->screen
->has_compressed_msaa_texturing
);
2572 ctx
.bc
->isa
= rctx
->isa
;
2575 memset(cshader
->shader
.ring_item_sizes
, 0, sizeof(cshader
->shader
.ring_item_sizes
));
2577 /* R0.x = R0.x & 0x3fffffff */
2578 memset(&alu
, 0, sizeof(alu
));
2579 alu
.op
= ALU_OP2_AND_INT
;
2580 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
2581 alu
.src
[1].value
= 0x3fffffff;
2583 r600_bytecode_add_alu(ctx
.bc
, &alu
);
2585 /* R0.y = R0.x >> 30 */
2586 memset(&alu
, 0, sizeof(alu
));
2587 alu
.op
= ALU_OP2_LSHR_INT
;
2588 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
2589 alu
.src
[1].value
= 0x1e;
2593 r600_bytecode_add_alu(ctx
.bc
, &alu
);
2595 /* fetch vertex data from GSVS ring */
2596 for (i
= 0; i
< ocnt
; ++i
) {
2597 struct r600_shader_io
*out
= &ctx
.shader
->output
[i
];
2600 out
->ring_offset
= i
* 16;
2602 memset(&vtx
, 0, sizeof(vtx
));
2603 vtx
.op
= FETCH_OP_VFETCH
;
2604 vtx
.buffer_id
= R600_GS_RING_CONST_BUFFER
;
2605 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
2606 vtx
.mega_fetch_count
= 16;
2607 vtx
.offset
= out
->ring_offset
;
2608 vtx
.dst_gpr
= out
->gpr
;
2614 if (rctx
->b
.chip_class
>= EVERGREEN
) {
2615 vtx
.use_const_fields
= 1;
2617 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
2620 r600_bytecode_add_vtx(ctx
.bc
, &vtx
);
2622 ctx
.temp_reg
= i
+ 1;
2623 for (ring
= 3; ring
>= 0; --ring
) {
2624 bool enabled
= false;
2625 for (i
= 0; i
< so
->num_outputs
; i
++) {
2626 if (so
->output
[i
].stream
== ring
) {
2629 only_ring_0
= false;
2633 if (ring
!= 0 && !enabled
) {
2634 cshader
->shader
.ring_item_sizes
[ring
] = 0;
2639 // Patch up jump label
2640 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_POP
);
2641 cf_pop
= ctx
.bc
->cf_last
;
2643 cf_jump
->cf_addr
= cf_pop
->id
+ 2;
2644 cf_jump
->pop_count
= 1;
2645 cf_pop
->cf_addr
= cf_pop
->id
+ 2;
2646 cf_pop
->pop_count
= 1;
2649 /* PRED_SETE_INT __, R0.y, ring */
2650 memset(&alu
, 0, sizeof(alu
));
2651 alu
.op
= ALU_OP2_PRED_SETE_INT
;
2652 alu
.src
[0].chan
= 1;
2653 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
2654 alu
.src
[1].value
= ring
;
2655 alu
.execute_mask
= 1;
2656 alu
.update_pred
= 1;
2658 r600_bytecode_add_alu_type(ctx
.bc
, &alu
, CF_OP_ALU_PUSH_BEFORE
);
2660 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_JUMP
);
2661 cf_jump
= ctx
.bc
->cf_last
;
2664 emit_streamout(&ctx
, so
, only_ring_0
? -1 : ring
, &cshader
->shader
.ring_item_sizes
[ring
]);
2665 cshader
->shader
.ring_item_sizes
[ring
] = ocnt
* 16;
2668 /* bc adds nops - copy it */
2669 if (ctx
.bc
->chip_class
== R600
) {
2670 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
2671 alu
.op
= ALU_OP0_NOP
;
2673 r600_bytecode_add_alu(ctx
.bc
, &alu
);
2675 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_NOP
);
2678 /* export vertex data */
2679 /* XXX factor out common code with r600_shader_from_tgsi ? */
2680 for (i
= 0; i
< ocnt
; ++i
) {
2681 struct r600_shader_io
*out
= &ctx
.shader
->output
[i
];
2682 bool instream0
= true;
2683 if (out
->name
== TGSI_SEMANTIC_CLIPVERTEX
)
2686 for (j
= 0; j
< so
->num_outputs
; j
++) {
2687 if (so
->output
[j
].register_index
== i
) {
2688 if (so
->output
[j
].stream
== 0)
2690 if (so
->output
[j
].stream
> 0)
2696 memset(&output
, 0, sizeof(output
));
2697 output
.gpr
= out
->gpr
;
2698 output
.elem_size
= 3;
2699 output
.swizzle_x
= 0;
2700 output
.swizzle_y
= 1;
2701 output
.swizzle_z
= 2;
2702 output
.swizzle_w
= 3;
2703 output
.burst_count
= 1;
2704 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
2705 output
.op
= CF_OP_EXPORT
;
2706 switch (out
->name
) {
2707 case TGSI_SEMANTIC_POSITION
:
2708 output
.array_base
= 60;
2709 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2712 case TGSI_SEMANTIC_PSIZE
:
2713 output
.array_base
= 61;
2714 if (next_clip_pos
== 61)
2716 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2717 output
.swizzle_y
= 7;
2718 output
.swizzle_z
= 7;
2719 output
.swizzle_w
= 7;
2720 ctx
.shader
->vs_out_misc_write
= 1;
2721 ctx
.shader
->vs_out_point_size
= 1;
2723 case TGSI_SEMANTIC_LAYER
:
2725 /* duplicate it as PARAM to pass to the pixel shader */
2726 output
.array_base
= next_param
++;
2727 r600_bytecode_add_output(ctx
.bc
, &output
);
2728 last_exp_param
= ctx
.bc
->cf_last
;
2730 output
.array_base
= 61;
2731 if (next_clip_pos
== 61)
2733 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2734 output
.swizzle_x
= 7;
2735 output
.swizzle_y
= 7;
2736 output
.swizzle_z
= 0;
2737 output
.swizzle_w
= 7;
2738 ctx
.shader
->vs_out_misc_write
= 1;
2739 ctx
.shader
->vs_out_layer
= 1;
2741 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
2743 /* duplicate it as PARAM to pass to the pixel shader */
2744 output
.array_base
= next_param
++;
2745 r600_bytecode_add_output(ctx
.bc
, &output
);
2746 last_exp_param
= ctx
.bc
->cf_last
;
2748 output
.array_base
= 61;
2749 if (next_clip_pos
== 61)
2751 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2752 ctx
.shader
->vs_out_misc_write
= 1;
2753 ctx
.shader
->vs_out_viewport
= 1;
2754 output
.swizzle_x
= 7;
2755 output
.swizzle_y
= 7;
2756 output
.swizzle_z
= 7;
2757 output
.swizzle_w
= 0;
2759 case TGSI_SEMANTIC_CLIPDIST
:
2760 /* spi_sid is 0 for clipdistance outputs that were generated
2761 * for clipvertex - we don't need to pass them to PS */
2762 ctx
.shader
->clip_dist_write
= gs
->shader
.clip_dist_write
;
2763 ctx
.shader
->cull_dist_write
= gs
->shader
.cull_dist_write
;
2764 ctx
.shader
->cc_dist_mask
= gs
->shader
.cc_dist_mask
;
2766 /* duplicate it as PARAM to pass to the pixel shader */
2767 output
.array_base
= next_param
++;
2768 r600_bytecode_add_output(ctx
.bc
, &output
);
2769 last_exp_param
= ctx
.bc
->cf_last
;
2771 output
.array_base
= next_clip_pos
++;
2772 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2774 case TGSI_SEMANTIC_FOG
:
2775 output
.swizzle_y
= 4; /* 0 */
2776 output
.swizzle_z
= 4; /* 0 */
2777 output
.swizzle_w
= 5; /* 1 */
2780 output
.array_base
= next_param
++;
2783 r600_bytecode_add_output(ctx
.bc
, &output
);
2784 if (output
.type
== V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
)
2785 last_exp_param
= ctx
.bc
->cf_last
;
2787 last_exp_pos
= ctx
.bc
->cf_last
;
2790 if (!last_exp_pos
) {
2791 memset(&output
, 0, sizeof(output
));
2793 output
.elem_size
= 3;
2794 output
.swizzle_x
= 7;
2795 output
.swizzle_y
= 7;
2796 output
.swizzle_z
= 7;
2797 output
.swizzle_w
= 7;
2798 output
.burst_count
= 1;
2800 output
.op
= CF_OP_EXPORT
;
2801 output
.array_base
= 60;
2802 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
2803 r600_bytecode_add_output(ctx
.bc
, &output
);
2804 last_exp_pos
= ctx
.bc
->cf_last
;
2807 if (!last_exp_param
) {
2808 memset(&output
, 0, sizeof(output
));
2810 output
.elem_size
= 3;
2811 output
.swizzle_x
= 7;
2812 output
.swizzle_y
= 7;
2813 output
.swizzle_z
= 7;
2814 output
.swizzle_w
= 7;
2815 output
.burst_count
= 1;
2817 output
.op
= CF_OP_EXPORT
;
2818 output
.array_base
= next_param
++;
2819 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
2820 r600_bytecode_add_output(ctx
.bc
, &output
);
2821 last_exp_param
= ctx
.bc
->cf_last
;
2824 last_exp_pos
->op
= CF_OP_EXPORT_DONE
;
2825 last_exp_param
->op
= CF_OP_EXPORT_DONE
;
2827 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_POP
);
2828 cf_pop
= ctx
.bc
->cf_last
;
2830 cf_jump
->cf_addr
= cf_pop
->id
+ 2;
2831 cf_jump
->pop_count
= 1;
2832 cf_pop
->cf_addr
= cf_pop
->id
+ 2;
2833 cf_pop
->pop_count
= 1;
2835 if (ctx
.bc
->chip_class
== CAYMAN
)
2836 cm_bytecode_add_cf_end(ctx
.bc
);
2838 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_NOP
);
2839 ctx
.bc
->cf_last
->end_of_program
= 1;
2842 gs
->gs_copy_shader
= cshader
;
2843 cshader
->enabled_stream_buffers_mask
= ctx
.enabled_stream_buffers_mask
;
2847 return r600_bytecode_build(ctx
.bc
);
2850 static int emit_inc_ring_offset(struct r600_shader_ctx
*ctx
, int idx
, bool ind
)
2853 struct r600_bytecode_alu alu
;
2856 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
2857 alu
.op
= ALU_OP2_ADD_INT
;
2858 alu
.src
[0].sel
= ctx
->gs_export_gpr_tregs
[idx
];
2859 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
2860 alu
.src
[1].value
= ctx
->gs_out_ring_offset
>> 4;
2861 alu
.dst
.sel
= ctx
->gs_export_gpr_tregs
[idx
];
2864 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
2871 static int emit_gs_ring_writes(struct r600_shader_ctx
*ctx
, const struct pipe_stream_output_info
*so UNUSED
, int stream
, bool ind
)
2873 struct r600_bytecode_output output
;
2876 int effective_stream
= stream
== -1 ? 0 : stream
;
2879 for (i
= 0; i
< ctx
->shader
->noutput
; i
++) {
2880 if (ctx
->gs_for_vs
) {
2881 /* for ES we need to lookup corresponding ring offset expected by GS
2882 * (map this output to GS input by name and sid) */
2883 /* FIXME precompute offsets */
2885 for(k
= 0; k
< ctx
->gs_for_vs
->ninput
; ++k
) {
2886 struct r600_shader_io
*in
= &ctx
->gs_for_vs
->input
[k
];
2887 struct r600_shader_io
*out
= &ctx
->shader
->output
[i
];
2888 if (in
->name
== out
->name
&& in
->sid
== out
->sid
)
2889 ring_offset
= in
->ring_offset
;
2892 if (ring_offset
== -1)
2895 ring_offset
= idx
* 16;
2899 if (stream
> 0 && ctx
->shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
)
2901 /* next_ring_offset after parsing input decls contains total size of
2902 * single vertex data, gs_next_vertex - current vertex index */
2904 ring_offset
+= ctx
->gs_out_ring_offset
* ctx
->gs_next_vertex
;
2906 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
2907 output
.gpr
= ctx
->shader
->output
[i
].gpr
;
2908 output
.elem_size
= 3;
2909 output
.comp_mask
= 0xF;
2910 output
.burst_count
= 1;
2913 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND
;
2915 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
2920 output
.op
= CF_OP_MEM_RING
; break;
2922 output
.op
= CF_OP_MEM_RING1
; break;
2924 output
.op
= CF_OP_MEM_RING2
; break;
2926 output
.op
= CF_OP_MEM_RING3
; break;
2930 output
.array_base
= ring_offset
>> 2; /* in dwords */
2931 output
.array_size
= 0xfff;
2932 output
.index_gpr
= ctx
->gs_export_gpr_tregs
[effective_stream
];
2934 output
.array_base
= ring_offset
>> 2; /* in dwords */
2935 r600_bytecode_add_output(ctx
->bc
, &output
);
2938 ++ctx
->gs_next_vertex
;
2943 static int r600_fetch_tess_io_info(struct r600_shader_ctx
*ctx
)
2946 struct r600_bytecode_vtx vtx
;
2947 int temp_val
= ctx
->temp_reg
;
2948 /* need to store the TCS output somewhere */
2949 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
2951 V_SQ_ALU_SRC_LITERAL
, 0,
2956 /* used by VS/TCS */
2957 if (ctx
->tess_input_info
) {
2958 /* fetch tcs input values into resv space */
2959 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
2960 vtx
.op
= FETCH_OP_VFETCH
;
2961 vtx
.buffer_id
= R600_LDS_INFO_CONST_BUFFER
;
2962 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
2963 vtx
.mega_fetch_count
= 16;
2964 vtx
.data_format
= FMT_32_32_32_32
;
2965 vtx
.num_format_all
= 2;
2966 vtx
.format_comp_all
= 1;
2967 vtx
.use_const_fields
= 0;
2968 vtx
.endian
= r600_endian_swap(32);
2969 vtx
.srf_mode_all
= 1;
2971 vtx
.dst_gpr
= ctx
->tess_input_info
;
2976 vtx
.src_gpr
= temp_val
;
2979 r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
);
2984 /* used by TCS/TES */
2985 if (ctx
->tess_output_info
) {
2986 /* fetch tcs output values into resv space */
2987 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
2988 vtx
.op
= FETCH_OP_VFETCH
;
2989 vtx
.buffer_id
= R600_LDS_INFO_CONST_BUFFER
;
2990 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
2991 vtx
.mega_fetch_count
= 16;
2992 vtx
.data_format
= FMT_32_32_32_32
;
2993 vtx
.num_format_all
= 2;
2994 vtx
.format_comp_all
= 1;
2995 vtx
.use_const_fields
= 0;
2996 vtx
.endian
= r600_endian_swap(32);
2997 vtx
.srf_mode_all
= 1;
2999 vtx
.dst_gpr
= ctx
->tess_output_info
;
3004 vtx
.src_gpr
= temp_val
;
3007 r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
);
3014 static int emit_lds_vs_writes(struct r600_shader_ctx
*ctx
)
3020 /* fetch tcs input values into input_vals */
3021 ctx
->tess_input_info
= r600_get_temp(ctx
);
3022 ctx
->tess_output_info
= 0;
3023 r
= r600_fetch_tess_io_info(ctx
);
3027 temp_reg
= r600_get_temp(ctx
);
3028 /* dst reg contains LDS address stride * idx */
3029 /* MUL vertexID, vertex_dw_stride */
3030 r
= single_alu_op2(ctx
, ALU_OP2_MUL_UINT24
,
3032 ctx
->tess_input_info
, 1,
3033 0, 1); /* rel id in r0.y? */
3037 for (i
= 0; i
< ctx
->shader
->noutput
; i
++) {
3038 struct r600_bytecode_alu alu
;
3039 int param
= r600_get_lds_unique_index(ctx
->shader
->output
[i
].name
, ctx
->shader
->output
[i
].sid
);
3042 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
3045 V_SQ_ALU_SRC_LITERAL
, param
* 16);
3050 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
3052 temp_reg
, param
? 1 : 0,
3053 V_SQ_ALU_SRC_LITERAL
, 8);
3058 for (j
= 0; j
< 2; j
++) {
3059 int chan
= (j
== 1) ? 2 : (param
? 1 : 0);
3060 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3061 alu
.op
= LDS_OP3_LDS_WRITE_REL
;
3062 alu
.src
[0].sel
= temp_reg
;
3063 alu
.src
[0].chan
= chan
;
3064 alu
.src
[1].sel
= ctx
->shader
->output
[i
].gpr
;
3065 alu
.src
[1].chan
= j
* 2;
3066 alu
.src
[2].sel
= ctx
->shader
->output
[i
].gpr
;
3067 alu
.src
[2].chan
= (j
* 2) + 1;
3071 alu
.is_lds_idx_op
= true;
3072 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3080 static int r600_store_tcs_output(struct r600_shader_ctx
*ctx
)
3082 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
3083 const struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
3085 int temp_reg
= r600_get_temp(ctx
);
3086 struct r600_bytecode_alu alu
;
3087 unsigned write_mask
= dst
->Register
.WriteMask
;
3089 if (inst
->Dst
[0].Register
.File
!= TGSI_FILE_OUTPUT
)
3092 r
= get_lds_offset0(ctx
, 1, temp_reg
, dst
->Register
.Dimension
? false : true);
3096 /* the base address is now in temp.x */
3097 r
= r600_get_byte_address(ctx
, temp_reg
,
3098 &inst
->Dst
[0], NULL
, ctx
->tess_output_info
, 1);
3103 lasti
= tgsi_last_instruction(write_mask
);
3104 for (i
= 1; i
<= lasti
; i
++) {
3106 if (!(write_mask
& (1 << i
)))
3108 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
3111 V_SQ_ALU_SRC_LITERAL
, 4 * i
);
3116 for (i
= 0; i
<= lasti
; i
++) {
3117 if (!(write_mask
& (1 << i
)))
3120 if ((i
== 0 && ((write_mask
& 3) == 3)) ||
3121 (i
== 2 && ((write_mask
& 0xc) == 0xc))) {
3122 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3123 alu
.op
= LDS_OP3_LDS_WRITE_REL
;
3124 alu
.src
[0].sel
= temp_reg
;
3125 alu
.src
[0].chan
= i
;
3127 alu
.src
[1].sel
= dst
->Register
.Index
;
3128 alu
.src
[1].sel
+= ctx
->file_offset
[dst
->Register
.File
];
3129 alu
.src
[1].chan
= i
;
3131 alu
.src
[2].sel
= dst
->Register
.Index
;
3132 alu
.src
[2].sel
+= ctx
->file_offset
[dst
->Register
.File
];
3133 alu
.src
[2].chan
= i
+ 1;
3137 alu
.is_lds_idx_op
= true;
3138 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3144 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3145 alu
.op
= LDS_OP2_LDS_WRITE
;
3146 alu
.src
[0].sel
= temp_reg
;
3147 alu
.src
[0].chan
= i
;
3149 alu
.src
[1].sel
= dst
->Register
.Index
;
3150 alu
.src
[1].sel
+= ctx
->file_offset
[dst
->Register
.File
];
3151 alu
.src
[1].chan
= i
;
3153 alu
.src
[2].sel
= V_SQ_ALU_SRC_0
;
3156 alu
.is_lds_idx_op
= true;
3157 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3164 static int r600_tess_factor_read(struct r600_shader_ctx
*ctx
,
3165 int output_idx
, int nc
)
3168 unsigned temp_reg
= r600_get_temp(ctx
);
3169 unsigned name
= ctx
->shader
->output
[output_idx
].name
;
3170 int dreg
= ctx
->shader
->output
[output_idx
].gpr
;
3173 param
= r600_get_lds_unique_index(name
, 0);
3174 r
= get_lds_offset0(ctx
, 1, temp_reg
, true);
3179 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
3182 V_SQ_ALU_SRC_LITERAL
, param
* 16);
3187 do_lds_fetch_values(ctx
, temp_reg
, dreg
, ((1u << nc
) - 1));
3191 static int r600_emit_tess_factor(struct r600_shader_ctx
*ctx
)
3193 int stride
, outer_comps
, inner_comps
;
3194 int tessinner_idx
= -1, tessouter_idx
= -1;
3197 int temp_reg
= r600_get_temp(ctx
);
3198 int treg
[3] = {-1, -1, -1};
3199 struct r600_bytecode_alu alu
;
3200 struct r600_bytecode_cf
*cf_jump
, *cf_pop
;
3202 /* only execute factor emission for invocation 0 */
3203 /* PRED_SETE_INT __, R0.x, 0 */
3204 memset(&alu
, 0, sizeof(alu
));
3205 alu
.op
= ALU_OP2_PRED_SETE_INT
;
3206 alu
.src
[0].chan
= 2;
3207 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
3208 alu
.execute_mask
= 1;
3209 alu
.update_pred
= 1;
3211 r600_bytecode_add_alu_type(ctx
->bc
, &alu
, CF_OP_ALU_PUSH_BEFORE
);
3213 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_JUMP
);
3214 cf_jump
= ctx
->bc
->cf_last
;
3216 treg
[0] = r600_get_temp(ctx
);
3217 switch (ctx
->shader
->tcs_prim_mode
) {
3218 case PIPE_PRIM_LINES
:
3219 stride
= 8; /* 2 dwords, 1 vec2 store */
3223 case PIPE_PRIM_TRIANGLES
:
3224 stride
= 16; /* 4 dwords, 1 vec4 store */
3227 treg
[1] = r600_get_temp(ctx
);
3229 case PIPE_PRIM_QUADS
:
3230 stride
= 24; /* 6 dwords, 2 stores (vec4 + vec2) */
3233 treg
[1] = r600_get_temp(ctx
);
3234 treg
[2] = r600_get_temp(ctx
);
3241 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */
3242 /* TF_WRITE takes index in R.x, value in R.y */
3243 for (j
= 0; j
< ctx
->shader
->noutput
; j
++) {
3244 if (ctx
->shader
->output
[j
].name
== TGSI_SEMANTIC_TESSINNER
)
3246 if (ctx
->shader
->output
[j
].name
== TGSI_SEMANTIC_TESSOUTER
)
3250 if (tessouter_idx
== -1)
3253 if (tessinner_idx
== -1 && inner_comps
)
3256 if (tessouter_idx
!= -1) {
3257 r
= r600_tess_factor_read(ctx
, tessouter_idx
, outer_comps
);
3262 if (tessinner_idx
!= -1) {
3263 r
= r600_tess_factor_read(ctx
, tessinner_idx
, inner_comps
);
3268 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */
3269 /* r.x = relpatchid(r0.y) * tf_stride */
3271 /* multiply incoming r0.y * stride - t.x = r0.y * stride */
3272 /* add incoming r0.w to it: t.x = t.x + r0.w */
3273 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
3276 V_SQ_ALU_SRC_LITERAL
, stride
,
3281 for (i
= 0; i
< outer_comps
+ inner_comps
; i
++) {
3282 int out_idx
= i
>= outer_comps
? tessinner_idx
: tessouter_idx
;
3283 int out_comp
= i
>= outer_comps
? i
- outer_comps
: i
;
3285 if (ctx
->shader
->tcs_prim_mode
== PIPE_PRIM_LINES
) {
3288 else if (out_comp
== 0)
3292 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
3293 treg
[i
/ 2], (2 * (i
% 2)),
3295 V_SQ_ALU_SRC_LITERAL
, 4 * i
);
3298 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
3299 treg
[i
/ 2], 1 + (2 * (i
%2)),
3300 ctx
->shader
->output
[out_idx
].gpr
, out_comp
,
3305 for (i
= 0; i
< outer_comps
+ inner_comps
; i
++) {
3306 struct r600_bytecode_gds gds
;
3308 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
3309 gds
.src_gpr
= treg
[i
/ 2];
3310 gds
.src_sel_x
= 2 * (i
% 2);
3311 gds
.src_sel_y
= 1 + (2 * (i
% 2));
3317 gds
.op
= FETCH_OP_TF_WRITE
;
3318 r
= r600_bytecode_add_gds(ctx
->bc
, &gds
);
3323 // Patch up jump label
3324 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_POP
);
3325 cf_pop
= ctx
->bc
->cf_last
;
3327 cf_jump
->cf_addr
= cf_pop
->id
+ 2;
3328 cf_jump
->pop_count
= 1;
3329 cf_pop
->cf_addr
= cf_pop
->id
+ 2;
3330 cf_pop
->pop_count
= 1;
3336 * We have to work out the thread ID for load and atomic
3337 * operations, which store the returned value to an index
3338 * in an intermediate buffer.
3339 * The index is calculated by taking the thread id,
3340 * calculated from the MBCNT instructions.
3341 * Then the shader engine ID is multiplied by 256,
3342 * and the wave id is added.
3343 * Then the result is multipled by 64 and thread id is
3346 static int load_thread_id_gpr(struct r600_shader_ctx
*ctx
)
3348 struct r600_bytecode_alu alu
;
3351 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3352 alu
.op
= ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT
;
3353 alu
.dst
.sel
= ctx
->temp_reg
;
3355 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
3356 alu
.src
[0].value
= 0xffffffff;
3358 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3362 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3363 alu
.op
= ALU_OP1_MBCNT_32HI_INT
;
3364 alu
.dst
.sel
= ctx
->temp_reg
;
3366 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
3367 alu
.src
[0].value
= 0xffffffff;
3369 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3373 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3374 alu
.op
= ALU_OP3_MULADD_UINT24
;
3375 alu
.dst
.sel
= ctx
->temp_reg
;
3377 alu
.src
[0].sel
= EG_V_SQ_ALU_SRC_SE_ID
;
3378 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
3379 alu
.src
[1].value
= 256;
3380 alu
.src
[2].sel
= EG_V_SQ_ALU_SRC_HW_WAVE_ID
;
3384 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
3388 r
= single_alu_op3(ctx
, ALU_OP3_MULADD_UINT24
,
3389 ctx
->thread_id_gpr
, 1,
3391 V_SQ_ALU_SRC_LITERAL
, 0x40,
3398 static int r600_shader_from_tgsi(struct r600_context
*rctx
,
3399 struct r600_pipe_shader
*pipeshader
,
3400 union r600_shader_key key
)
3402 struct r600_screen
*rscreen
= rctx
->screen
;
3403 struct r600_shader
*shader
= &pipeshader
->shader
;
3404 struct tgsi_token
*tokens
= pipeshader
->selector
->tokens
;
3405 struct pipe_stream_output_info so
= pipeshader
->selector
->so
;
3406 struct tgsi_full_immediate
*immediate
;
3407 struct r600_shader_ctx ctx
;
3408 struct r600_bytecode_output output
[ARRAY_SIZE(shader
->output
)];
3409 unsigned output_done
, noutput
;
3413 int next_param_base
= 0, next_clip_base
;
3414 int max_color_exports
= MAX2(key
.ps
.nr_cbufs
, 1);
3416 bool ring_outputs
= false;
3417 bool lds_outputs
= false;
3418 bool lds_inputs
= false;
3419 bool pos_emitted
= false;
3421 ctx
.bc
= &shader
->bc
;
3422 ctx
.shader
= shader
;
3424 r600_bytecode_init(ctx
.bc
, rscreen
->b
.chip_class
, rscreen
->b
.family
,
3425 rscreen
->has_compressed_msaa_texturing
);
3426 ctx
.tokens
= tokens
;
3427 tgsi_scan_shader(tokens
, &ctx
.info
);
3428 shader
->indirect_files
= ctx
.info
.indirect_files
;
3430 int narrays
= ctx
.info
.array_max
[TGSI_FILE_TEMPORARY
];
3431 ctx
.array_infos
= calloc(narrays
, sizeof(*ctx
.array_infos
));
3432 ctx
.spilled_arrays
= calloc(narrays
, sizeof(bool));
3433 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, narrays
, ctx
.array_infos
);
3435 shader
->uses_helper_invocation
= false;
3436 shader
->uses_doubles
= ctx
.info
.uses_doubles
;
3437 shader
->uses_atomics
= ctx
.info
.file_mask
[TGSI_FILE_HW_ATOMIC
];
3438 shader
->nsys_inputs
= 0;
3440 shader
->uses_images
= ctx
.info
.file_count
[TGSI_FILE_IMAGE
] > 0 ||
3441 ctx
.info
.file_count
[TGSI_FILE_BUFFER
] > 0;
3442 indirect_gprs
= ctx
.info
.indirect_files
& ~((1 << TGSI_FILE_CONSTANT
) | (1 << TGSI_FILE_SAMPLER
));
3443 tgsi_parse_init(&ctx
.parse
, tokens
);
3444 ctx
.type
= ctx
.info
.processor
;
3445 shader
->processor_type
= ctx
.type
;
3446 ctx
.bc
->type
= shader
->processor_type
;
3449 case PIPE_SHADER_VERTEX
:
3450 shader
->vs_as_gs_a
= key
.vs
.as_gs_a
;
3451 shader
->vs_as_es
= key
.vs
.as_es
;
3452 shader
->vs_as_ls
= key
.vs
.as_ls
;
3453 shader
->atomic_base
= key
.vs
.first_atomic_counter
;
3454 if (shader
->vs_as_es
)
3455 ring_outputs
= true;
3456 if (shader
->vs_as_ls
)
3459 case PIPE_SHADER_GEOMETRY
:
3460 ring_outputs
= true;
3461 shader
->atomic_base
= key
.gs
.first_atomic_counter
;
3462 shader
->gs_tri_strip_adj_fix
= key
.gs
.tri_strip_adj_fix
;
3464 case PIPE_SHADER_TESS_CTRL
:
3465 shader
->tcs_prim_mode
= key
.tcs
.prim_mode
;
3466 shader
->atomic_base
= key
.tcs
.first_atomic_counter
;
3470 case PIPE_SHADER_TESS_EVAL
:
3471 shader
->tes_as_es
= key
.tes
.as_es
;
3472 shader
->atomic_base
= key
.tes
.first_atomic_counter
;
3474 if (shader
->tes_as_es
)
3475 ring_outputs
= true;
3477 case PIPE_SHADER_FRAGMENT
:
3478 shader
->two_side
= key
.ps
.color_two_side
;
3479 shader
->atomic_base
= key
.ps
.first_atomic_counter
;
3480 shader
->rat_base
= key
.ps
.nr_cbufs
;
3481 shader
->image_size_const_offset
= key
.ps
.image_size_const_offset
;
3483 case PIPE_SHADER_COMPUTE
:
3484 shader
->rat_base
= 0;
3485 shader
->image_size_const_offset
= ctx
.info
.file_count
[TGSI_FILE_SAMPLER
];
3491 if (shader
->vs_as_es
|| shader
->tes_as_es
) {
3492 ctx
.gs_for_vs
= &rctx
->gs_shader
->current
->shader
;
3494 ctx
.gs_for_vs
= NULL
;
3497 ctx
.next_ring_offset
= 0;
3498 ctx
.gs_out_ring_offset
= 0;
3499 ctx
.gs_next_vertex
= 0;
3500 ctx
.gs_stream_output_info
= &so
;
3502 ctx
.thread_id_gpr
= -1;
3504 ctx
.fixed_pt_position_gpr
= -1;
3505 ctx
.fragcoord_input
= -1;
3506 ctx
.colors_used
= 0;
3507 ctx
.clip_vertex_write
= 0;
3509 ctx
.helper_invoc_reg
= -1;
3510 ctx
.cs_block_size_reg
= -1;
3511 ctx
.cs_grid_size_reg
= -1;
3512 ctx
.cs_block_size_loaded
= false;
3513 ctx
.cs_grid_size_loaded
= false;
3515 shader
->nr_ps_color_exports
= 0;
3516 shader
->nr_ps_max_color_exports
= 0;
3519 /* register allocations */
3520 /* Values [0,127] correspond to GPR[0..127].
3521 * Values [128,159] correspond to constant buffer bank 0
3522 * Values [160,191] correspond to constant buffer bank 1
3523 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
3524 * Values [256,287] correspond to constant buffer bank 2 (EG)
3525 * Values [288,319] correspond to constant buffer bank 3 (EG)
3526 * Other special values are shown in the list below.
3527 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
3528 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
3529 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
3530 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
3531 * 248 SQ_ALU_SRC_0: special constant 0.0.
3532 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
3533 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
3534 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
3535 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
3536 * 253 SQ_ALU_SRC_LITERAL: literal constant.
3537 * 254 SQ_ALU_SRC_PV: previous vector result.
3538 * 255 SQ_ALU_SRC_PS: previous scalar result.
3540 for (i
= 0; i
< TGSI_FILE_COUNT
; i
++) {
3541 ctx
.file_offset
[i
] = 0;
3544 if (ctx
.type
== PIPE_SHADER_VERTEX
) {
3546 ctx
.file_offset
[TGSI_FILE_INPUT
] = 1;
3547 if (ctx
.info
.num_inputs
)
3548 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_CALL_FS
);
3550 if (ctx
.type
== PIPE_SHADER_FRAGMENT
) {
3551 if (ctx
.bc
->chip_class
>= EVERGREEN
)
3552 ctx
.file_offset
[TGSI_FILE_INPUT
] = evergreen_gpr_count(&ctx
);
3554 ctx
.file_offset
[TGSI_FILE_INPUT
] = allocate_system_value_inputs(&ctx
, ctx
.file_offset
[TGSI_FILE_INPUT
]);
3556 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; i
++) {
3557 if (ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_HELPER_INVOCATION
) {
3558 ctx
.helper_invoc_reg
= ctx
.file_offset
[TGSI_FILE_INPUT
]++;
3559 shader
->uses_helper_invocation
= true;
3563 if (ctx
.type
== PIPE_SHADER_GEOMETRY
) {
3564 /* FIXME 1 would be enough in some cases (3 or less input vertices) */
3565 ctx
.file_offset
[TGSI_FILE_INPUT
] = 2;
3567 if (ctx
.type
== PIPE_SHADER_TESS_CTRL
)
3568 ctx
.file_offset
[TGSI_FILE_INPUT
] = 1;
3569 if (ctx
.type
== PIPE_SHADER_TESS_EVAL
) {
3570 bool add_tesscoord
= false, add_tess_inout
= false;
3571 ctx
.file_offset
[TGSI_FILE_INPUT
] = 1;
3572 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; i
++) {
3573 /* if we have tesscoord save one reg */
3574 if (ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_TESSCOORD
)
3575 add_tesscoord
= true;
3576 if (ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_TESSINNER
||
3577 ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_TESSOUTER
)
3578 add_tess_inout
= true;
3580 if (add_tesscoord
|| add_tess_inout
)
3581 ctx
.file_offset
[TGSI_FILE_INPUT
]++;
3583 ctx
.file_offset
[TGSI_FILE_INPUT
]+=2;
3585 if (ctx
.type
== PIPE_SHADER_COMPUTE
) {
3586 ctx
.file_offset
[TGSI_FILE_INPUT
] = 2;
3587 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; i
++) {
3588 if (ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_GRID_SIZE
)
3589 ctx
.cs_grid_size_reg
= ctx
.file_offset
[TGSI_FILE_INPUT
]++;
3590 if (ctx
.info
.system_value_semantic_name
[i
] == TGSI_SEMANTIC_BLOCK_SIZE
)
3591 ctx
.cs_block_size_reg
= ctx
.file_offset
[TGSI_FILE_INPUT
]++;
3595 ctx
.file_offset
[TGSI_FILE_OUTPUT
] =
3596 ctx
.file_offset
[TGSI_FILE_INPUT
] +
3597 ctx
.info
.file_max
[TGSI_FILE_INPUT
] + 1;
3598 ctx
.file_offset
[TGSI_FILE_TEMPORARY
] = ctx
.file_offset
[TGSI_FILE_OUTPUT
] +
3599 ctx
.info
.file_max
[TGSI_FILE_OUTPUT
] + 1;
3601 /* Outside the GPR range. This will be translated to one of the
3602 * kcache banks later. */
3603 ctx
.file_offset
[TGSI_FILE_CONSTANT
] = 512;
3604 ctx
.file_offset
[TGSI_FILE_IMMEDIATE
] = V_SQ_ALU_SRC_LITERAL
;
3606 pipeshader
->scratch_space_needed
= 0;
3607 int regno
= ctx
.file_offset
[TGSI_FILE_TEMPORARY
] +
3608 ctx
.info
.file_max
[TGSI_FILE_TEMPORARY
];
3610 choose_spill_arrays(&ctx
, ®no
, &pipeshader
->scratch_space_needed
);
3611 shader
->indirect_files
= ctx
.info
.indirect_files
;
3613 shader
->needs_scratch_space
= pipeshader
->scratch_space_needed
!= 0;
3615 ctx
.bc
->ar_reg
= ++regno
;
3616 ctx
.bc
->index_reg
[0] = ++regno
;
3617 ctx
.bc
->index_reg
[1] = ++regno
;
3619 if (ctx
.type
== PIPE_SHADER_TESS_CTRL
) {
3620 ctx
.tess_input_info
= ++regno
;
3621 ctx
.tess_output_info
= ++regno
;
3622 } else if (ctx
.type
== PIPE_SHADER_TESS_EVAL
) {
3623 ctx
.tess_input_info
= ++regno
;
3624 ctx
.tess_output_info
= ++regno
;
3625 } else if (ctx
.type
== PIPE_SHADER_GEOMETRY
) {
3626 ctx
.gs_export_gpr_tregs
[0] = ++regno
;
3627 ctx
.gs_export_gpr_tregs
[1] = ++regno
;
3628 ctx
.gs_export_gpr_tregs
[2] = ++regno
;
3629 ctx
.gs_export_gpr_tregs
[3] = ++regno
;
3630 if (ctx
.shader
->gs_tri_strip_adj_fix
) {
3631 ctx
.gs_rotated_input
[0] = ++regno
;
3632 ctx
.gs_rotated_input
[1] = ++regno
;
3634 ctx
.gs_rotated_input
[0] = 0;
3635 ctx
.gs_rotated_input
[1] = 1;
3639 if (shader
->uses_images
) {
3640 ctx
.thread_id_gpr
= ++regno
;
3642 ctx
.temp_reg
= ++regno
;
3644 shader
->max_arrays
= 0;
3645 shader
->num_arrays
= 0;
3646 if (indirect_gprs
) {
3648 if (ctx
.info
.indirect_files
& (1 << TGSI_FILE_INPUT
)) {
3649 r600_add_gpr_array(shader
, ctx
.file_offset
[TGSI_FILE_INPUT
],
3650 ctx
.file_offset
[TGSI_FILE_OUTPUT
] -
3651 ctx
.file_offset
[TGSI_FILE_INPUT
],
3654 if (ctx
.info
.indirect_files
& (1 << TGSI_FILE_OUTPUT
)) {
3655 r600_add_gpr_array(shader
, ctx
.file_offset
[TGSI_FILE_OUTPUT
],
3656 ctx
.file_offset
[TGSI_FILE_TEMPORARY
] -
3657 ctx
.file_offset
[TGSI_FILE_OUTPUT
],
3663 ctx
.literals
= NULL
;
3664 ctx
.max_driver_temp_used
= 0;
3666 shader
->fs_write_all
= ctx
.info
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
3667 ctx
.info
.colors_written
== 1;
3668 shader
->vs_position_window_space
= ctx
.info
.properties
[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
];
3669 shader
->ps_conservative_z
= (uint8_t)ctx
.info
.properties
[TGSI_PROPERTY_FS_DEPTH_LAYOUT
];
3671 if (ctx
.type
== PIPE_SHADER_VERTEX
||
3672 ctx
.type
== PIPE_SHADER_GEOMETRY
||
3673 ctx
.type
== PIPE_SHADER_TESS_EVAL
) {
3674 shader
->cc_dist_mask
= (1 << (ctx
.info
.properties
[TGSI_PROPERTY_NUM_CULLDIST_ENABLED
] +
3675 ctx
.info
.properties
[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
])) - 1;
3676 shader
->clip_dist_write
= (1 << ctx
.info
.properties
[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
]) - 1;
3677 shader
->cull_dist_write
= ((1 << ctx
.info
.properties
[TGSI_PROPERTY_NUM_CULLDIST_ENABLED
]) - 1) << ctx
.info
.properties
[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
];
3680 if (shader
->vs_as_gs_a
)
3681 vs_add_primid_output(&ctx
, key
.vs
.prim_id_out
);
3683 if (ctx
.thread_id_gpr
!= -1) {
3684 r
= load_thread_id_gpr(&ctx
);
3689 if (ctx
.type
== PIPE_SHADER_TESS_EVAL
)
3690 r600_fetch_tess_io_info(&ctx
);
3692 while (!tgsi_parse_end_of_tokens(&ctx
.parse
)) {
3693 tgsi_parse_token(&ctx
.parse
);
3694 switch (ctx
.parse
.FullToken
.Token
.Type
) {
3695 case TGSI_TOKEN_TYPE_IMMEDIATE
:
3696 immediate
= &ctx
.parse
.FullToken
.FullImmediate
;
3697 ctx
.literals
= realloc(ctx
.literals
, (ctx
.nliterals
+ 1) * 16);
3698 if(ctx
.literals
== NULL
) {
3702 ctx
.literals
[ctx
.nliterals
* 4 + 0] = immediate
->u
[0].Uint
;
3703 ctx
.literals
[ctx
.nliterals
* 4 + 1] = immediate
->u
[1].Uint
;
3704 ctx
.literals
[ctx
.nliterals
* 4 + 2] = immediate
->u
[2].Uint
;
3705 ctx
.literals
[ctx
.nliterals
* 4 + 3] = immediate
->u
[3].Uint
;
3708 case TGSI_TOKEN_TYPE_DECLARATION
:
3709 r
= tgsi_declaration(&ctx
);
3713 case TGSI_TOKEN_TYPE_INSTRUCTION
:
3714 case TGSI_TOKEN_TYPE_PROPERTY
:
3717 R600_ERR("unsupported token type %d\n", ctx
.parse
.FullToken
.Token
.Type
);
3723 shader
->ring_item_sizes
[0] = ctx
.next_ring_offset
;
3724 shader
->ring_item_sizes
[1] = 0;
3725 shader
->ring_item_sizes
[2] = 0;
3726 shader
->ring_item_sizes
[3] = 0;
3728 /* Process two side if needed */
3729 if (shader
->two_side
&& ctx
.colors_used
) {
3730 int i
, count
= ctx
.shader
->ninput
;
3731 unsigned next_lds_loc
= ctx
.shader
->nlds
;
3733 /* additional inputs will be allocated right after the existing inputs,
3734 * we won't need them after the color selection, so we don't need to
3735 * reserve these gprs for the rest of the shader code and to adjust
3736 * output offsets etc. */
3737 int gpr
= ctx
.file_offset
[TGSI_FILE_INPUT
] +
3738 ctx
.info
.file_max
[TGSI_FILE_INPUT
] + 1;
3740 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
3741 if (ctx
.face_gpr
== -1) {
3742 i
= ctx
.shader
->ninput
++;
3743 ctx
.shader
->input
[i
].name
= TGSI_SEMANTIC_FACE
;
3744 ctx
.shader
->input
[i
].spi_sid
= 0;
3745 ctx
.shader
->input
[i
].gpr
= gpr
++;
3746 ctx
.face_gpr
= ctx
.shader
->input
[i
].gpr
;
3749 for (i
= 0; i
< count
; i
++) {
3750 if (ctx
.shader
->input
[i
].name
== TGSI_SEMANTIC_COLOR
) {
3751 int ni
= ctx
.shader
->ninput
++;
3752 memcpy(&ctx
.shader
->input
[ni
],&ctx
.shader
->input
[i
], sizeof(struct r600_shader_io
));
3753 ctx
.shader
->input
[ni
].name
= TGSI_SEMANTIC_BCOLOR
;
3754 ctx
.shader
->input
[ni
].spi_sid
= r600_spi_sid(&ctx
.shader
->input
[ni
]);
3755 ctx
.shader
->input
[ni
].gpr
= gpr
++;
3756 // TGSI to LLVM needs to know the lds position of inputs.
3757 // Non LLVM path computes it later (in process_twoside_color)
3758 ctx
.shader
->input
[ni
].lds_pos
= next_lds_loc
++;
3759 ctx
.shader
->input
[i
].back_color_input
= ni
;
3760 if (ctx
.bc
->chip_class
>= EVERGREEN
) {
3761 if ((r
= evergreen_interp_input(&ctx
, ni
)))
3768 if (shader
->fs_write_all
&& rscreen
->b
.chip_class
>= EVERGREEN
)
3769 shader
->nr_ps_max_color_exports
= 8;
3771 if (ctx
.shader
->uses_helper_invocation
) {
3772 if (ctx
.bc
->chip_class
== CAYMAN
)
3773 r
= cm_load_helper_invocation(&ctx
);
3775 r
= eg_load_helper_invocation(&ctx
);
3781 * XXX this relies on fixed_pt_position_gpr only being present when
3782 * this shader should be executed per sample. Should be the case for now...
3784 if (ctx
.fixed_pt_position_gpr
!= -1 && ctx
.info
.reads_samplemask
) {
3786 * Fix up sample mask. The hw always gives us coverage mask for
3787 * the pixel. However, for per-sample shading, we need the
3788 * coverage for the shader invocation only.
3789 * Also, with disabled msaa, only the first bit should be set
3790 * (luckily the same fixup works for both problems).
3791 * For now, we can only do it if we know this shader is always
3792 * executed per sample (due to usage of bits in the shader
3793 * forcing per-sample execution).
3794 * If the fb is not multisampled, we'd do unnecessary work but
3795 * it should still be correct.
3796 * It will however do nothing for sample shading according
3797 * to MinSampleShading.
3799 struct r600_bytecode_alu alu
;
3800 int tmp
= r600_get_temp(&ctx
);
3801 assert(ctx
.face_gpr
!= -1);
3802 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3804 alu
.op
= ALU_OP2_LSHL_INT
;
3805 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
3806 alu
.src
[0].value
= 0x1;
3807 alu
.src
[1].sel
= ctx
.fixed_pt_position_gpr
;
3808 alu
.src
[1].chan
= 3;
3813 if ((r
= r600_bytecode_add_alu(ctx
.bc
, &alu
)))
3816 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3817 alu
.op
= ALU_OP2_AND_INT
;
3818 alu
.src
[0].sel
= tmp
;
3819 alu
.src
[1].sel
= ctx
.face_gpr
;
3820 alu
.src
[1].chan
= 2;
3821 alu
.dst
.sel
= ctx
.face_gpr
;
3825 if ((r
= r600_bytecode_add_alu(ctx
.bc
, &alu
)))
3829 if (ctx
.fragcoord_input
>= 0) {
3830 if (ctx
.bc
->chip_class
== CAYMAN
) {
3831 for (j
= 0 ; j
< 4; j
++) {
3832 struct r600_bytecode_alu alu
;
3833 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3834 alu
.op
= ALU_OP1_RECIP_IEEE
;
3835 alu
.src
[0].sel
= shader
->input
[ctx
.fragcoord_input
].gpr
;
3836 alu
.src
[0].chan
= 3;
3838 alu
.dst
.sel
= shader
->input
[ctx
.fragcoord_input
].gpr
;
3840 alu
.dst
.write
= (j
== 3);
3841 alu
.last
= (j
== 3);
3842 if ((r
= r600_bytecode_add_alu(ctx
.bc
, &alu
)))
3846 struct r600_bytecode_alu alu
;
3847 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3848 alu
.op
= ALU_OP1_RECIP_IEEE
;
3849 alu
.src
[0].sel
= shader
->input
[ctx
.fragcoord_input
].gpr
;
3850 alu
.src
[0].chan
= 3;
3852 alu
.dst
.sel
= shader
->input
[ctx
.fragcoord_input
].gpr
;
3856 if ((r
= r600_bytecode_add_alu(ctx
.bc
, &alu
)))
3861 if (ctx
.type
== PIPE_SHADER_GEOMETRY
) {
3862 struct r600_bytecode_alu alu
;
3865 /* GS thread with no output workaround - emit a cut at start of GS */
3866 if (ctx
.bc
->chip_class
== R600
)
3867 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_CUT_VERTEX
);
3869 for (j
= 0; j
< 4; j
++) {
3870 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
3871 alu
.op
= ALU_OP1_MOV
;
3872 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
3873 alu
.src
[0].value
= 0;
3874 alu
.dst
.sel
= ctx
.gs_export_gpr_tregs
[j
];
3877 r
= r600_bytecode_add_alu(ctx
.bc
, &alu
);
3882 if (ctx
.shader
->gs_tri_strip_adj_fix
) {
3883 r
= single_alu_op2(&ctx
, ALU_OP2_AND_INT
,
3884 ctx
.gs_rotated_input
[0], 2,
3886 V_SQ_ALU_SRC_LITERAL
, 1);
3890 for (i
= 0; i
< 6; i
++) {
3891 int rotated
= (i
+ 4) % 6;
3892 int offset_reg
= i
/ 3;
3893 int offset_chan
= i
% 3;
3894 int rotated_offset_reg
= rotated
/ 3;
3895 int rotated_offset_chan
= rotated
% 3;
3897 if (offset_reg
== 0 && offset_chan
== 2)
3899 if (rotated_offset_reg
== 0 && rotated_offset_chan
== 2)
3900 rotated_offset_chan
= 3;
3902 r
= single_alu_op3(&ctx
, ALU_OP3_CNDE_INT
,
3903 ctx
.gs_rotated_input
[offset_reg
], offset_chan
,
3904 ctx
.gs_rotated_input
[0], 2,
3905 offset_reg
, offset_chan
,
3906 rotated_offset_reg
, rotated_offset_chan
);
3913 if (ctx
.type
== PIPE_SHADER_TESS_CTRL
)
3914 r600_fetch_tess_io_info(&ctx
);
3916 if (shader
->two_side
&& ctx
.colors_used
) {
3917 if ((r
= process_twoside_color_inputs(&ctx
)))
3921 tgsi_parse_init(&ctx
.parse
, tokens
);
3922 while (!tgsi_parse_end_of_tokens(&ctx
.parse
)) {
3923 tgsi_parse_token(&ctx
.parse
);
3924 switch (ctx
.parse
.FullToken
.Token
.Type
) {
3925 case TGSI_TOKEN_TYPE_INSTRUCTION
:
3926 r
= tgsi_is_supported(&ctx
);
3929 ctx
.max_driver_temp_used
= 0;
3930 /* reserve first tmp for everyone */
3931 r600_get_temp(&ctx
);
3933 opcode
= ctx
.parse
.FullToken
.FullInstruction
.Instruction
.Opcode
;
3934 if ((r
= tgsi_split_constant(&ctx
)))
3936 if ((r
= tgsi_split_literal_constant(&ctx
)))
3938 if (ctx
.type
== PIPE_SHADER_GEOMETRY
) {
3939 if ((r
= tgsi_split_gs_inputs(&ctx
)))
3941 } else if (lds_inputs
) {
3942 if ((r
= tgsi_split_lds_inputs(&ctx
)))
3945 if (ctx
.bc
->chip_class
== CAYMAN
)
3946 ctx
.inst_info
= &cm_shader_tgsi_instruction
[opcode
];
3947 else if (ctx
.bc
->chip_class
>= EVERGREEN
)
3948 ctx
.inst_info
= &eg_shader_tgsi_instruction
[opcode
];
3950 ctx
.inst_info
= &r600_shader_tgsi_instruction
[opcode
];
3952 ctx
.bc
->precise
|= ctx
.parse
.FullToken
.FullInstruction
.Instruction
.Precise
;
3954 r
= ctx
.inst_info
->process(&ctx
);
3958 if (ctx
.type
== PIPE_SHADER_TESS_CTRL
) {
3959 r
= r600_store_tcs_output(&ctx
);
3969 /* Reset the temporary register counter. */
3970 ctx
.max_driver_temp_used
= 0;
3972 noutput
= shader
->noutput
;
3974 if (!ring_outputs
&& ctx
.clip_vertex_write
) {
3975 unsigned clipdist_temp
[2];
3977 clipdist_temp
[0] = r600_get_temp(&ctx
);
3978 clipdist_temp
[1] = r600_get_temp(&ctx
);
3980 /* need to convert a clipvertex write into clipdistance writes and not export
3981 the clip vertex anymore */
3983 memset(&shader
->output
[noutput
], 0, 2*sizeof(struct r600_shader_io
));
3984 shader
->output
[noutput
].name
= TGSI_SEMANTIC_CLIPDIST
;
3985 shader
->output
[noutput
].gpr
= clipdist_temp
[0];
3987 shader
->output
[noutput
].name
= TGSI_SEMANTIC_CLIPDIST
;
3988 shader
->output
[noutput
].gpr
= clipdist_temp
[1];
3991 /* reset spi_sid for clipvertex output to avoid confusing spi */
3992 shader
->output
[ctx
.cv_output
].spi_sid
= 0;
3994 shader
->clip_dist_write
= 0xFF;
3995 shader
->cc_dist_mask
= 0xFF;
3997 for (i
= 0; i
< 8; i
++) {
4001 for (j
= 0; j
< 4; j
++) {
4002 struct r600_bytecode_alu alu
;
4003 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4004 alu
.op
= ALU_OP2_DOT4
;
4005 alu
.src
[0].sel
= shader
->output
[ctx
.cv_output
].gpr
;
4006 alu
.src
[0].chan
= j
;
4008 alu
.src
[1].sel
= 512 + i
;
4009 alu
.src
[1].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
4010 alu
.src
[1].chan
= j
;
4012 alu
.dst
.sel
= clipdist_temp
[oreg
];
4014 alu
.dst
.write
= (j
== ochan
);
4017 r
= r600_bytecode_add_alu(ctx
.bc
, &alu
);
4024 /* Add stream outputs. */
4025 if (so
.num_outputs
) {
4027 if (!lds_outputs
&& !ring_outputs
&& ctx
.type
== PIPE_SHADER_VERTEX
)
4029 if (!ring_outputs
&& ctx
.type
== PIPE_SHADER_TESS_EVAL
)
4032 emit_streamout(&ctx
, &so
, -1, NULL
);
4034 pipeshader
->enabled_stream_buffers_mask
= ctx
.enabled_stream_buffers_mask
;
4035 convert_edgeflag_to_int(&ctx
);
4037 if (ctx
.type
== PIPE_SHADER_TESS_CTRL
)
4038 r600_emit_tess_factor(&ctx
);
4041 if (ctx
.type
== PIPE_SHADER_VERTEX
) {
4042 if (ctx
.shader
->noutput
)
4043 emit_lds_vs_writes(&ctx
);
4045 } else if (ring_outputs
) {
4046 if (shader
->vs_as_es
|| shader
->tes_as_es
) {
4047 ctx
.gs_export_gpr_tregs
[0] = r600_get_temp(&ctx
);
4048 ctx
.gs_export_gpr_tregs
[1] = -1;
4049 ctx
.gs_export_gpr_tregs
[2] = -1;
4050 ctx
.gs_export_gpr_tregs
[3] = -1;
4052 emit_gs_ring_writes(&ctx
, &so
, -1, FALSE
);
4056 next_clip_base
= shader
->vs_out_misc_write
? 62 : 61;
4058 for (i
= 0, j
= 0; i
< noutput
; i
++, j
++) {
4059 memset(&output
[j
], 0, sizeof(struct r600_bytecode_output
));
4060 output
[j
].gpr
= shader
->output
[i
].gpr
;
4061 output
[j
].elem_size
= 3;
4062 output
[j
].swizzle_x
= 0;
4063 output
[j
].swizzle_y
= 1;
4064 output
[j
].swizzle_z
= 2;
4065 output
[j
].swizzle_w
= 3;
4066 output
[j
].burst_count
= 1;
4067 output
[j
].type
= 0xffffffff;
4068 output
[j
].op
= CF_OP_EXPORT
;
4070 case PIPE_SHADER_VERTEX
:
4071 case PIPE_SHADER_TESS_EVAL
:
4072 switch (shader
->output
[i
].name
) {
4073 case TGSI_SEMANTIC_POSITION
:
4074 output
[j
].array_base
= 60;
4075 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4079 case TGSI_SEMANTIC_PSIZE
:
4080 output
[j
].array_base
= 61;
4081 output
[j
].swizzle_y
= 7;
4082 output
[j
].swizzle_z
= 7;
4083 output
[j
].swizzle_w
= 7;
4084 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4087 case TGSI_SEMANTIC_EDGEFLAG
:
4088 output
[j
].array_base
= 61;
4089 output
[j
].swizzle_x
= 7;
4090 output
[j
].swizzle_y
= 0;
4091 output
[j
].swizzle_z
= 7;
4092 output
[j
].swizzle_w
= 7;
4093 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4096 case TGSI_SEMANTIC_LAYER
:
4097 /* spi_sid is 0 for outputs that are
4098 * not consumed by PS */
4099 if (shader
->output
[i
].spi_sid
) {
4100 output
[j
].array_base
= next_param_base
++;
4101 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
4103 memcpy(&output
[j
], &output
[j
-1], sizeof(struct r600_bytecode_output
));
4105 output
[j
].array_base
= 61;
4106 output
[j
].swizzle_x
= 7;
4107 output
[j
].swizzle_y
= 7;
4108 output
[j
].swizzle_z
= 0;
4109 output
[j
].swizzle_w
= 7;
4110 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4113 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
4114 /* spi_sid is 0 for outputs that are
4115 * not consumed by PS */
4116 if (shader
->output
[i
].spi_sid
) {
4117 output
[j
].array_base
= next_param_base
++;
4118 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
4120 memcpy(&output
[j
], &output
[j
-1], sizeof(struct r600_bytecode_output
));
4122 output
[j
].array_base
= 61;
4123 output
[j
].swizzle_x
= 7;
4124 output
[j
].swizzle_y
= 7;
4125 output
[j
].swizzle_z
= 7;
4126 output
[j
].swizzle_w
= 0;
4127 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4130 case TGSI_SEMANTIC_CLIPVERTEX
:
4133 case TGSI_SEMANTIC_CLIPDIST
:
4134 output
[j
].array_base
= next_clip_base
++;
4135 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4137 /* spi_sid is 0 for clipdistance outputs that were generated
4138 * for clipvertex - we don't need to pass them to PS */
4139 if (shader
->output
[i
].spi_sid
) {
4141 /* duplicate it as PARAM to pass to the pixel shader */
4142 memcpy(&output
[j
], &output
[j
-1], sizeof(struct r600_bytecode_output
));
4143 output
[j
].array_base
= next_param_base
++;
4144 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
4147 case TGSI_SEMANTIC_FOG
:
4148 output
[j
].swizzle_y
= 4; /* 0 */
4149 output
[j
].swizzle_z
= 4; /* 0 */
4150 output
[j
].swizzle_w
= 5; /* 1 */
4152 case TGSI_SEMANTIC_PRIMID
:
4153 output
[j
].swizzle_x
= 2;
4154 output
[j
].swizzle_y
= 4; /* 0 */
4155 output
[j
].swizzle_z
= 4; /* 0 */
4156 output
[j
].swizzle_w
= 4; /* 0 */
4161 case PIPE_SHADER_FRAGMENT
:
4162 if (shader
->output
[i
].name
== TGSI_SEMANTIC_COLOR
) {
4163 /* never export more colors than the number of CBs */
4164 if (shader
->output
[i
].sid
>= max_color_exports
) {
4169 output
[j
].swizzle_w
= key
.ps
.alpha_to_one
? 5 : 3;
4170 output
[j
].array_base
= shader
->output
[i
].sid
;
4171 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4172 shader
->nr_ps_color_exports
++;
4173 shader
->ps_color_export_mask
|= (0xf << (shader
->output
[i
].sid
* 4));
4175 /* If the i-th target format is set, all previous target formats must
4176 * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well.
4178 if (shader
->output
[i
].sid
> 0)
4179 for (unsigned x
= 0; x
< shader
->output
[i
].sid
; x
++)
4180 shader
->ps_color_export_mask
|= (1 << (x
*4));
4182 if (shader
->output
[i
].sid
> shader
->ps_export_highest
)
4183 shader
->ps_export_highest
= shader
->output
[i
].sid
;
4184 if (shader
->fs_write_all
&& (rscreen
->b
.chip_class
>= EVERGREEN
)) {
4185 for (k
= 1; k
< max_color_exports
; k
++) {
4187 memset(&output
[j
], 0, sizeof(struct r600_bytecode_output
));
4188 output
[j
].gpr
= shader
->output
[i
].gpr
;
4189 output
[j
].elem_size
= 3;
4190 output
[j
].swizzle_x
= 0;
4191 output
[j
].swizzle_y
= 1;
4192 output
[j
].swizzle_z
= 2;
4193 output
[j
].swizzle_w
= key
.ps
.alpha_to_one
? 5 : 3;
4194 output
[j
].burst_count
= 1;
4195 output
[j
].array_base
= k
;
4196 output
[j
].op
= CF_OP_EXPORT
;
4197 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4198 shader
->nr_ps_color_exports
++;
4199 if (k
> shader
->ps_export_highest
)
4200 shader
->ps_export_highest
= k
;
4201 shader
->ps_color_export_mask
|= (0xf << (j
* 4));
4204 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) {
4205 output
[j
].array_base
= 61;
4206 output
[j
].swizzle_x
= 2;
4207 output
[j
].swizzle_y
= 7;
4208 output
[j
].swizzle_z
= output
[j
].swizzle_w
= 7;
4209 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4210 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_STENCIL
) {
4211 output
[j
].array_base
= 61;
4212 output
[j
].swizzle_x
= 7;
4213 output
[j
].swizzle_y
= 1;
4214 output
[j
].swizzle_z
= output
[j
].swizzle_w
= 7;
4215 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4216 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_SAMPLEMASK
) {
4217 output
[j
].array_base
= 61;
4218 output
[j
].swizzle_x
= 7;
4219 output
[j
].swizzle_y
= 7;
4220 output
[j
].swizzle_z
= 0;
4221 output
[j
].swizzle_w
= 7;
4222 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4224 R600_ERR("unsupported fragment output name %d\n", shader
->output
[i
].name
);
4229 case PIPE_SHADER_TESS_CTRL
:
4232 R600_ERR("unsupported processor type %d\n", ctx
.type
);
4237 if (output
[j
].type
== 0xffffffff) {
4238 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
4239 output
[j
].array_base
= next_param_base
++;
4243 /* add fake position export */
4244 if ((ctx
.type
== PIPE_SHADER_VERTEX
|| ctx
.type
== PIPE_SHADER_TESS_EVAL
) && pos_emitted
== false) {
4245 memset(&output
[j
], 0, sizeof(struct r600_bytecode_output
));
4247 output
[j
].elem_size
= 3;
4248 output
[j
].swizzle_x
= 7;
4249 output
[j
].swizzle_y
= 7;
4250 output
[j
].swizzle_z
= 7;
4251 output
[j
].swizzle_w
= 7;
4252 output
[j
].burst_count
= 1;
4253 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
4254 output
[j
].array_base
= 60;
4255 output
[j
].op
= CF_OP_EXPORT
;
4259 /* add fake param output for vertex shader if no param is exported */
4260 if ((ctx
.type
== PIPE_SHADER_VERTEX
|| ctx
.type
== PIPE_SHADER_TESS_EVAL
) && next_param_base
== 0) {
4261 memset(&output
[j
], 0, sizeof(struct r600_bytecode_output
));
4263 output
[j
].elem_size
= 3;
4264 output
[j
].swizzle_x
= 7;
4265 output
[j
].swizzle_y
= 7;
4266 output
[j
].swizzle_z
= 7;
4267 output
[j
].swizzle_w
= 7;
4268 output
[j
].burst_count
= 1;
4269 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
4270 output
[j
].array_base
= 0;
4271 output
[j
].op
= CF_OP_EXPORT
;
4275 /* add fake pixel export */
4276 if (ctx
.type
== PIPE_SHADER_FRAGMENT
&& shader
->nr_ps_color_exports
== 0) {
4277 memset(&output
[j
], 0, sizeof(struct r600_bytecode_output
));
4279 output
[j
].elem_size
= 3;
4280 output
[j
].swizzle_x
= 7;
4281 output
[j
].swizzle_y
= 7;
4282 output
[j
].swizzle_z
= 7;
4283 output
[j
].swizzle_w
= 7;
4284 output
[j
].burst_count
= 1;
4285 output
[j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
4286 output
[j
].array_base
= 0;
4287 output
[j
].op
= CF_OP_EXPORT
;
4289 shader
->nr_ps_color_exports
++;
4290 shader
->ps_color_export_mask
= 0xf;
4295 /* set export done on last export of each type */
4296 for (k
= noutput
- 1, output_done
= 0; k
>= 0; k
--) {
4297 if (!(output_done
& (1 << output
[k
].type
))) {
4298 output_done
|= (1 << output
[k
].type
);
4299 output
[k
].op
= CF_OP_EXPORT_DONE
;
4302 /* add output to bytecode */
4303 for (i
= 0; i
< noutput
; i
++) {
4304 r
= r600_bytecode_add_output(ctx
.bc
, &output
[i
]);
4310 /* add program end */
4311 if (ctx
.bc
->chip_class
== CAYMAN
)
4312 cm_bytecode_add_cf_end(ctx
.bc
);
4314 const struct cf_op_info
*last
= NULL
;
4316 if (ctx
.bc
->cf_last
)
4317 last
= r600_isa_cf(ctx
.bc
->cf_last
->op
);
4319 /* alu clause instructions don't have EOP bit, so add NOP */
4320 if (!last
|| last
->flags
& CF_ALU
|| ctx
.bc
->cf_last
->op
== CF_OP_LOOP_END
|| ctx
.bc
->cf_last
->op
== CF_OP_POP
)
4321 r600_bytecode_add_cfinst(ctx
.bc
, CF_OP_NOP
);
4323 ctx
.bc
->cf_last
->end_of_program
= 1;
4326 /* check GPR limit - we have 124 = 128 - 4
4327 * (4 are reserved as alu clause temporary registers) */
4328 if (ctx
.bc
->ngpr
> 124) {
4329 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx
.bc
->ngpr
);
4334 if (ctx
.type
== PIPE_SHADER_GEOMETRY
) {
4335 if ((r
= generate_gs_copy_shader(rctx
, pipeshader
, &so
)))
4339 free(ctx
.spilled_arrays
);
4340 free(ctx
.array_infos
);
4342 tgsi_parse_free(&ctx
.parse
);
4345 free(ctx
.spilled_arrays
);
4346 free(ctx
.array_infos
);
4348 tgsi_parse_free(&ctx
.parse
);
4352 static int tgsi_unsupported(struct r600_shader_ctx
*ctx
)
4354 const unsigned tgsi_opcode
=
4355 ctx
->parse
.FullToken
.FullInstruction
.Instruction
.Opcode
;
4356 R600_ERR("%s tgsi opcode unsupported\n",
4357 tgsi_get_opcode_name(tgsi_opcode
));
4361 static int tgsi_end(struct r600_shader_ctx
*ctx UNUSED
)
4366 static void r600_bytecode_src(struct r600_bytecode_alu_src
*bc_src
,
4367 const struct r600_shader_src
*shader_src
,
4370 bc_src
->sel
= shader_src
->sel
;
4371 bc_src
->chan
= shader_src
->swizzle
[chan
];
4372 bc_src
->neg
= shader_src
->neg
;
4373 bc_src
->abs
= shader_src
->abs
;
4374 bc_src
->rel
= shader_src
->rel
;
4375 bc_src
->value
= shader_src
->value
[bc_src
->chan
];
4376 bc_src
->kc_bank
= shader_src
->kc_bank
;
4377 bc_src
->kc_rel
= shader_src
->kc_rel
;
4380 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src
*bc_src
)
4386 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src
*bc_src
)
4388 bc_src
->neg
= !bc_src
->neg
;
4391 static void tgsi_dst(struct r600_shader_ctx
*ctx
,
4392 const struct tgsi_full_dst_register
*tgsi_dst
,
4394 struct r600_bytecode_alu_dst
*r600_dst
)
4396 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4398 if (tgsi_dst
->Register
.File
== TGSI_FILE_TEMPORARY
) {
4402 idx
= map_tgsi_reg_index_to_r600_gpr(ctx
, tgsi_dst
->Register
.Index
, &spilled
);
4405 struct r600_bytecode_output cf
;
4408 bool add_pending_output
= true;
4410 memset(&cf
, 0, sizeof(struct r600_bytecode_output
));
4411 get_spilled_array_base_and_size(ctx
, tgsi_dst
->Register
.Index
,
4412 &cf
.array_base
, &cf
.array_size
);
4414 /* If no component has spilled, reserve a register and add the spill code
4415 * ctx->bc->n_pending_outputs is cleared after each instruction group */
4416 if (ctx
->bc
->n_pending_outputs
== 0) {
4417 reg
= r600_get_temp(ctx
);
4419 /* If we are already spilling and the output address is the same like
4420 * before then just reuse the same slot */
4421 struct r600_bytecode_output
*tmpl
= &ctx
->bc
->pending_outputs
[ctx
->bc
->n_pending_outputs
-1];
4422 if ((cf
.array_base
+ idx
== tmpl
->array_base
) ||
4423 (cf
.array_base
== tmpl
->array_base
&&
4424 tmpl
->index_gpr
== ctx
->bc
->ar_reg
&&
4425 tgsi_dst
->Register
.Indirect
)) {
4426 reg
= ctx
->bc
->pending_outputs
[0].gpr
;
4427 add_pending_output
= false;
4429 reg
= r600_get_temp(ctx
);
4433 r600_dst
->sel
= reg
;
4434 r600_dst
->chan
= swizzle
;
4435 r600_dst
->write
= 1;
4436 if (inst
->Instruction
.Saturate
) {
4437 r600_dst
->clamp
= 1;
4440 /* Add new outputs as pending */
4441 if (add_pending_output
) {
4442 cf
.op
= CF_OP_MEM_SCRATCH
;
4445 cf
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
4447 cf
.comp_mask
= inst
->Dst
[0].Register
.WriteMask
;
4454 if (tgsi_dst
->Register
.Indirect
) {
4455 if (ctx
->bc
->chip_class
< R700
)
4456 cf
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND
;
4458 cf
.type
= 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
4459 cf
.index_gpr
= ctx
->bc
->ar_reg
;
4462 cf
.array_base
+= idx
;
4466 r
= r600_bytecode_add_pending_output(ctx
->bc
, &cf
);
4470 if (ctx
->bc
->chip_class
>= R700
)
4471 r600_bytecode_need_wait_ack(ctx
->bc
, true);
4476 r600_dst
->sel
= idx
;
4480 r600_dst
->sel
= tgsi_dst
->Register
.Index
;
4481 r600_dst
->sel
+= ctx
->file_offset
[tgsi_dst
->Register
.File
];
4483 r600_dst
->chan
= swizzle
;
4484 r600_dst
->write
= 1;
4485 if (inst
->Instruction
.Saturate
) {
4486 r600_dst
->clamp
= 1;
4488 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
) {
4489 if (tgsi_dst
->Register
.File
== TGSI_FILE_OUTPUT
) {
4493 if (tgsi_dst
->Register
.Indirect
)
4494 r600_dst
->rel
= V_SQ_REL_RELATIVE
;
4498 static int tgsi_op2_64_params(struct r600_shader_ctx
*ctx
, bool singledest
, bool swap
, int dest_temp
, int op_override
)
4500 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4501 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4502 struct r600_bytecode_alu alu
;
4503 int i
, j
, r
, lasti
= tgsi_last_instruction(write_mask
);
4505 int swizzle_x
= inst
->Src
[0].Register
.SwizzleX
;
4508 switch (write_mask
) {
4510 if (swizzle_x
== 2) {
4517 if (swizzle_x
== 2) {
4526 if (swizzle_x
== 0) {
4533 if (swizzle_x
== 0) {
4544 lasti
= tgsi_last_instruction(write_mask
);
4545 for (i
= 0; i
<= lasti
; i
++) {
4547 if (!(write_mask
& (1 << i
)))
4550 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4553 if (use_tmp
|| dest_temp
) {
4554 alu
.dst
.sel
= use_tmp
? ctx
->temp_reg
: dest_temp
;
4558 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4560 if (i
== 1 || i
== 3)
4563 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4565 alu
.op
= op_override
? op_override
: ctx
->inst_info
->op
;
4566 if (ctx
->parse
.FullToken
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_DABS
) {
4567 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
4569 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
4570 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], fp64_switch(i
));
4573 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], fp64_switch(i
));
4574 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], fp64_switch(i
));
4577 /* handle some special cases */
4578 if (i
== 1 || i
== 3) {
4579 switch (ctx
->parse
.FullToken
.FullInstruction
.Instruction
.Opcode
) {
4580 case TGSI_OPCODE_DABS
:
4581 r600_bytecode_src_set_abs(&alu
.src
[0]);
4590 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4596 write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4598 lasti
= tgsi_last_instruction(write_mask
);
4599 /* move result from temp to dst */
4600 for (i
= 0; i
<= lasti
; i
++) {
4601 if (!(write_mask
& (1 << i
)))
4604 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4605 alu
.op
= ALU_OP1_MOV
;
4608 alu
.dst
.sel
= dest_temp
;
4612 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4613 alu
.src
[0].sel
= ctx
->temp_reg
;
4614 alu
.src
[0].chan
= use_tmp
- 1;
4615 alu
.last
= (i
== lasti
);
4617 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4625 static int tgsi_op2_64(struct r600_shader_ctx
*ctx
)
4627 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4628 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4629 /* confirm writemasking */
4630 if ((write_mask
& 0x3) != 0x3 &&
4631 (write_mask
& 0xc) != 0xc) {
4632 fprintf(stderr
, "illegal writemask for 64-bit: 0x%x\n", write_mask
);
4635 return tgsi_op2_64_params(ctx
, false, false, 0, 0);
4638 static int tgsi_op2_64_single_dest(struct r600_shader_ctx
*ctx
)
4640 return tgsi_op2_64_params(ctx
, true, false, 0, 0);
4643 static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx
*ctx
)
4645 return tgsi_op2_64_params(ctx
, true, true, 0, 0);
4648 static int tgsi_op3_64(struct r600_shader_ctx
*ctx
)
4650 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4651 struct r600_bytecode_alu alu
;
4654 int tmp
= r600_get_temp(ctx
);
4656 for (i
= 0; i
< lasti
+ 1; i
++) {
4658 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4659 alu
.op
= ctx
->inst_info
->op
;
4660 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
4661 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], i
== 3 ? 0 : 1);
4664 if (inst
->Dst
[0].Register
.WriteMask
& (1 << i
))
4665 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4674 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4681 static int tgsi_op2_s(struct r600_shader_ctx
*ctx
, int swap
, int trans_only
)
4683 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4684 struct r600_bytecode_alu alu
;
4685 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4686 int i
, j
, r
, lasti
= tgsi_last_instruction(write_mask
);
4687 /* use temp register if trans_only and more than one dst component */
4688 int use_tmp
= trans_only
&& (write_mask
^ (1 << lasti
));
4689 unsigned op
= ctx
->inst_info
->op
;
4691 if (op
== ALU_OP2_MUL_IEEE
&&
4692 ctx
->info
.properties
[TGSI_PROPERTY_MUL_ZERO_WINS
])
4695 for (i
= 0; i
<= lasti
; i
++) {
4696 if (!(write_mask
& (1 << i
)))
4699 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4701 alu
.dst
.sel
= ctx
->temp_reg
;
4705 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4709 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
4710 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
4713 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
4714 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
4716 if (i
== lasti
|| trans_only
) {
4719 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4725 /* move result from temp to dst */
4726 for (i
= 0; i
<= lasti
; i
++) {
4727 if (!(write_mask
& (1 << i
)))
4730 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4731 alu
.op
= ALU_OP1_MOV
;
4732 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4733 alu
.src
[0].sel
= ctx
->temp_reg
;
4734 alu
.src
[0].chan
= i
;
4735 alu
.last
= (i
== lasti
);
4737 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4745 static int tgsi_op2(struct r600_shader_ctx
*ctx
)
4747 return tgsi_op2_s(ctx
, 0, 0);
4750 static int tgsi_op2_swap(struct r600_shader_ctx
*ctx
)
4752 return tgsi_op2_s(ctx
, 1, 0);
4755 static int tgsi_op2_trans(struct r600_shader_ctx
*ctx
)
4757 return tgsi_op2_s(ctx
, 0, 1);
4760 static int tgsi_ineg(struct r600_shader_ctx
*ctx
)
4762 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4763 struct r600_bytecode_alu alu
;
4765 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
4767 for (i
= 0; i
< lasti
+ 1; i
++) {
4769 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
4771 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4772 alu
.op
= ctx
->inst_info
->op
;
4774 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
4776 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
4778 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4783 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4791 static int tgsi_dneg(struct r600_shader_ctx
*ctx
)
4793 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4794 struct r600_bytecode_alu alu
;
4796 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
4798 for (i
= 0; i
< lasti
+ 1; i
++) {
4800 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
4802 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4803 alu
.op
= ALU_OP1_MOV
;
4805 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
4807 if (i
== 1 || i
== 3)
4808 r600_bytecode_src_toggle_neg(&alu
.src
[0]);
4809 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4814 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4822 static int tgsi_dfracexp(struct r600_shader_ctx
*ctx
)
4824 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4825 struct r600_bytecode_alu alu
;
4826 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4829 for (i
= 0; i
<= 3; i
++) {
4830 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4831 alu
.op
= ctx
->inst_info
->op
;
4833 alu
.dst
.sel
= ctx
->temp_reg
;
4836 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
4837 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], fp64_switch(i
));
4843 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4848 /* Replicate significand result across channels. */
4849 for (i
= 0; i
<= 3; i
++) {
4850 if (!(write_mask
& (1 << i
)))
4853 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4854 alu
.op
= ALU_OP1_MOV
;
4855 alu
.src
[0].chan
= (i
& 1) + 2;
4856 alu
.src
[0].sel
= ctx
->temp_reg
;
4858 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
4861 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4866 for (i
= 0; i
<= 3; i
++) {
4867 if (inst
->Dst
[1].Register
.WriteMask
& (1 << i
)) {
4868 /* MOV third channels to writemask dst1 */
4869 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4870 alu
.op
= ALU_OP1_MOV
;
4871 alu
.src
[0].chan
= 1;
4872 alu
.src
[0].sel
= ctx
->temp_reg
;
4874 tgsi_dst(ctx
, &inst
->Dst
[1], i
, &alu
.dst
);
4876 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4886 static int egcm_int_to_double(struct r600_shader_ctx
*ctx
)
4888 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
4889 struct r600_bytecode_alu alu
;
4891 int write_mask
= inst
->Dst
[0].Register
.WriteMask
;
4892 int temp_reg
= r600_get_temp(ctx
);
4894 assert(inst
->Instruction
.Opcode
== TGSI_OPCODE_I2D
||
4895 inst
->Instruction
.Opcode
== TGSI_OPCODE_U2D
);
4897 for (c
= 0; c
< 2; c
++) {
4899 if (write_mask
& (0x3 << dchan
)) {
4900 /* split into 24-bit int and 8-bit int */
4901 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4902 alu
.op
= ALU_OP2_AND_INT
;
4903 alu
.dst
.sel
= temp_reg
;
4904 alu
.dst
.chan
= dchan
;
4905 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], c
);
4906 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
4907 alu
.src
[1].value
= 0xffffff00;
4909 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4913 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4914 alu
.op
= ALU_OP2_AND_INT
;
4915 alu
.dst
.sel
= temp_reg
;
4916 alu
.dst
.chan
= dchan
+ 1;
4917 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], c
);
4918 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
4919 alu
.src
[1].value
= 0xff;
4922 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4928 for (c
= 0; c
< 2; c
++) {
4930 if (write_mask
& (0x3 << dchan
)) {
4931 for (i
= dchan
; i
<= dchan
+ 1; i
++) {
4932 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4933 alu
.op
= i
== dchan
? ctx
->inst_info
->op
: ALU_OP1_UINT_TO_FLT
;
4935 alu
.src
[0].sel
= temp_reg
;
4936 alu
.src
[0].chan
= i
;
4937 alu
.dst
.sel
= temp_reg
;
4940 if (ctx
->bc
->chip_class
== CAYMAN
)
4941 alu
.last
= i
== dchan
+ 1;
4943 alu
.last
= 1; /* trans only ops on evergreen */
4945 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4952 for (c
= 0; c
< 2; c
++) {
4954 if (write_mask
& (0x3 << dchan
)) {
4955 for (i
= 0; i
< 4; i
++) {
4956 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4957 alu
.op
= ALU_OP1_FLT32_TO_FLT64
;
4959 alu
.src
[0].chan
= dchan
+ (i
/ 2);
4960 if (i
== 0 || i
== 2)
4961 alu
.src
[0].sel
= temp_reg
;
4963 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
4964 alu
.src
[0].value
= 0x0;
4966 alu
.dst
.sel
= ctx
->temp_reg
;
4971 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4976 for (i
= 0; i
<= 1; i
++) {
4977 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
4978 alu
.op
= ALU_OP2_ADD_64
;
4980 alu
.src
[0].chan
= fp64_switch(i
);
4981 alu
.src
[0].sel
= ctx
->temp_reg
;
4983 alu
.src
[1].chan
= fp64_switch(i
+ 2);
4984 alu
.src
[1].sel
= ctx
->temp_reg
;
4985 tgsi_dst(ctx
, &inst
->Dst
[0], dchan
+ i
, &alu
.dst
);
4988 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
4998 static int egcm_double_to_int(struct r600_shader_ctx
*ctx
)
5000 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5001 struct r600_bytecode_alu alu
;
5003 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
5004 int treg
= r600_get_temp(ctx
);
5005 assert(inst
->Instruction
.Opcode
== TGSI_OPCODE_D2I
||
5006 inst
->Instruction
.Opcode
== TGSI_OPCODE_D2U
);
5008 /* do a 64->32 into a temp register */
5009 r
= tgsi_op2_64_params(ctx
, true, false, treg
, ALU_OP1_FLT64_TO_FLT32
);
5013 for (i
= 0; i
<= lasti
; i
++) {
5014 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
5016 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5017 alu
.op
= ctx
->inst_info
->op
;
5019 alu
.src
[0].chan
= i
;
5020 alu
.src
[0].sel
= treg
;
5021 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5022 alu
.last
= (i
== lasti
);
5024 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5032 static int cayman_emit_unary_double_raw(struct r600_bytecode
*bc
,
5035 struct r600_shader_src
*src
,
5038 struct r600_bytecode_alu alu
;
5039 const int last_slot
= 3;
5042 /* these have to write the result to X/Y by the looks of it */
5043 for (int i
= 0 ; i
< last_slot
; i
++) {
5044 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5047 r600_bytecode_src(&alu
.src
[0], src
, 1);
5048 r600_bytecode_src(&alu
.src
[1], src
, 0);
5051 r600_bytecode_src_set_abs(&alu
.src
[1]);
5053 alu
.dst
.sel
= dst_reg
;
5055 alu
.dst
.write
= (i
== 0 || i
== 1);
5057 if (bc
->chip_class
!= CAYMAN
|| i
== last_slot
- 1)
5059 r
= r600_bytecode_add_alu(bc
, &alu
);
5067 static int cayman_emit_double_instr(struct r600_shader_ctx
*ctx
)
5069 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5071 struct r600_bytecode_alu alu
;
5072 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
5073 int t1
= ctx
->temp_reg
;
5075 /* should only be one src regs */
5076 assert(inst
->Instruction
.NumSrcRegs
== 1);
5078 /* only support one double at a time */
5079 assert(inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XY
||
5080 inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_ZW
);
5082 r
= cayman_emit_unary_double_raw(
5083 ctx
->bc
, ctx
->inst_info
->op
, t1
,
5085 ctx
->parse
.FullToken
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_DRSQ
||
5086 ctx
->parse
.FullToken
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_DSQRT
);
5090 for (i
= 0 ; i
<= lasti
; i
++) {
5091 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
5093 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5094 alu
.op
= ALU_OP1_MOV
;
5095 alu
.src
[0].sel
= t1
;
5096 alu
.src
[0].chan
= (i
== 0 || i
== 2) ? 0 : 1;
5097 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5101 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5108 static int cayman_emit_float_instr(struct r600_shader_ctx
*ctx
)
5110 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5112 struct r600_bytecode_alu alu
;
5113 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
5115 for (i
= 0 ; i
< last_slot
; i
++) {
5116 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5117 alu
.op
= ctx
->inst_info
->op
;
5118 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
5119 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], 0);
5121 /* RSQ should take the absolute value of src */
5122 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_RSQ
) {
5123 r600_bytecode_src_set_abs(&alu
.src
[j
]);
5126 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5127 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
5129 if (i
== last_slot
- 1)
5131 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5138 static int cayman_mul_int_instr(struct r600_shader_ctx
*ctx
)
5140 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5142 struct r600_bytecode_alu alu
;
5143 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
5144 int t1
= ctx
->temp_reg
;
5146 for (k
= 0; k
<= lasti
; k
++) {
5147 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << k
)))
5150 for (i
= 0 ; i
< 4; i
++) {
5151 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5152 alu
.op
= ctx
->inst_info
->op
;
5153 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
5154 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], k
);
5158 alu
.dst
.write
= (i
== k
);
5161 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5167 for (i
= 0 ; i
<= lasti
; i
++) {
5168 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
5170 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5171 alu
.op
= ALU_OP1_MOV
;
5172 alu
.src
[0].sel
= t1
;
5173 alu
.src
[0].chan
= i
;
5174 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5178 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5187 static int cayman_mul_double_instr(struct r600_shader_ctx
*ctx
)
5189 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5191 struct r600_bytecode_alu alu
;
5192 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
5193 int t1
= ctx
->temp_reg
;
5195 /* t1 would get overwritten below if we actually tried to
5196 * multiply two pairs of doubles at a time. */
5197 assert(inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XY
||
5198 inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_ZW
);
5200 k
= inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XY
? 0 : 1;
5202 for (i
= 0; i
< 4; i
++) {
5203 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5204 alu
.op
= ctx
->inst_info
->op
;
5205 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
5206 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], k
* 2 + ((i
== 3) ? 0 : 1));
5213 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5218 for (i
= 0; i
<= lasti
; i
++) {
5219 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
5221 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5222 alu
.op
= ALU_OP1_MOV
;
5223 alu
.src
[0].sel
= t1
;
5224 alu
.src
[0].chan
= i
;
5225 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5229 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5238 * Emit RECIP_64 + MUL_64 to implement division.
5240 static int cayman_ddiv_instr(struct r600_shader_ctx
*ctx
)
5242 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5244 struct r600_bytecode_alu alu
;
5245 int t1
= ctx
->temp_reg
;
5248 /* Only support one double at a time. This is the same constraint as
5249 * in DMUL lowering. */
5250 assert(inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XY
||
5251 inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_ZW
);
5253 k
= inst
->Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XY
? 0 : 1;
5255 r
= cayman_emit_unary_double_raw(ctx
->bc
, ALU_OP2_RECIP_64
, t1
, &ctx
->src
[1], false);
5259 for (int i
= 0; i
< 4; i
++) {
5260 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5261 alu
.op
= ALU_OP2_MUL_64
;
5263 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], k
* 2 + ((i
== 3) ? 0 : 1));
5265 alu
.src
[1].sel
= t1
;
5266 alu
.src
[1].chan
= (i
== 3) ? 0 : 1;
5273 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5278 for (int i
= 0; i
< 2; i
++) {
5279 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5280 alu
.op
= ALU_OP1_MOV
;
5281 alu
.src
[0].sel
= t1
;
5282 alu
.src
[0].chan
= i
;
5283 tgsi_dst(ctx
, &inst
->Dst
[0], k
* 2 + i
, &alu
.dst
);
5287 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5295 * r600 - trunc to -PI..PI range
5296 * r700 - normalize by dividing by 2PI
5299 static int tgsi_setup_trig(struct r600_shader_ctx
*ctx
)
5302 struct r600_bytecode_alu alu
;
5304 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5305 alu
.op
= ALU_OP3_MULADD
;
5309 alu
.dst
.sel
= ctx
->temp_reg
;
5312 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
5314 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
5315 alu
.src
[1].chan
= 0;
5316 alu
.src
[1].value
= u_bitcast_f2u(0.5f
* M_1_PI
);
5317 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
5318 alu
.src
[2].chan
= 0;
5320 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5324 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5325 alu
.op
= ALU_OP1_FRACT
;
5328 alu
.dst
.sel
= ctx
->temp_reg
;
5331 alu
.src
[0].sel
= ctx
->temp_reg
;
5332 alu
.src
[0].chan
= 0;
5334 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5338 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5339 alu
.op
= ALU_OP3_MULADD
;
5343 alu
.dst
.sel
= ctx
->temp_reg
;
5346 alu
.src
[0].sel
= ctx
->temp_reg
;
5347 alu
.src
[0].chan
= 0;
5349 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
5350 alu
.src
[1].chan
= 0;
5351 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
5352 alu
.src
[2].chan
= 0;
5354 if (ctx
->bc
->chip_class
== R600
) {
5355 alu
.src
[1].value
= u_bitcast_f2u(2.0f
* M_PI
);
5356 alu
.src
[2].value
= u_bitcast_f2u(-M_PI
);
5358 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
5359 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
5364 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5370 static int cayman_trig(struct r600_shader_ctx
*ctx
)
5372 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5373 struct r600_bytecode_alu alu
;
5374 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
5377 r
= tgsi_setup_trig(ctx
);
5382 for (i
= 0; i
< last_slot
; i
++) {
5383 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5384 alu
.op
= ctx
->inst_info
->op
;
5387 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5388 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
5390 alu
.src
[0].sel
= ctx
->temp_reg
;
5391 alu
.src
[0].chan
= 0;
5392 if (i
== last_slot
- 1)
5394 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5401 static int tgsi_trig(struct r600_shader_ctx
*ctx
)
5403 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5404 struct r600_bytecode_alu alu
;
5406 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
5408 r
= tgsi_setup_trig(ctx
);
5412 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5413 alu
.op
= ctx
->inst_info
->op
;
5415 alu
.dst
.sel
= ctx
->temp_reg
;
5418 alu
.src
[0].sel
= ctx
->temp_reg
;
5419 alu
.src
[0].chan
= 0;
5421 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5425 /* replicate result */
5426 for (i
= 0; i
< lasti
+ 1; i
++) {
5427 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
5430 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5431 alu
.op
= ALU_OP1_MOV
;
5433 alu
.src
[0].sel
= ctx
->temp_reg
;
5434 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5437 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5444 static int tgsi_kill(struct r600_shader_ctx
*ctx
)
5446 const struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5447 struct r600_bytecode_alu alu
;
5450 for (i
= 0; i
< 4; i
++) {
5451 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5452 alu
.op
= ctx
->inst_info
->op
;
5456 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
5458 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_KILL
) {
5459 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
5462 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
5467 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5472 /* kill must be last in ALU */
5473 ctx
->bc
->force_add_cf
= 1;
5474 ctx
->shader
->uses_kill
= TRUE
;
5478 static int tgsi_lit(struct r600_shader_ctx
*ctx
)
5480 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5481 struct r600_bytecode_alu alu
;
5484 /* tmp.x = max(src.y, 0.0) */
5485 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5486 alu
.op
= ALU_OP2_MAX
;
5487 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 1);
5488 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
; /*0.0*/
5489 alu
.src
[1].chan
= 1;
5491 alu
.dst
.sel
= ctx
->temp_reg
;
5496 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5500 if (inst
->Dst
[0].Register
.WriteMask
& (1 << 2))
5506 if (ctx
->bc
->chip_class
== CAYMAN
) {
5507 for (i
= 0; i
< 3; i
++) {
5508 /* tmp.z = log(tmp.x) */
5509 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5510 alu
.op
= ALU_OP1_LOG_CLAMPED
;
5511 alu
.src
[0].sel
= ctx
->temp_reg
;
5512 alu
.src
[0].chan
= 0;
5513 alu
.dst
.sel
= ctx
->temp_reg
;
5521 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5526 /* tmp.z = log(tmp.x) */
5527 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5528 alu
.op
= ALU_OP1_LOG_CLAMPED
;
5529 alu
.src
[0].sel
= ctx
->temp_reg
;
5530 alu
.src
[0].chan
= 0;
5531 alu
.dst
.sel
= ctx
->temp_reg
;
5535 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5540 chan
= alu
.dst
.chan
;
5543 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
5544 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5545 alu
.op
= ALU_OP3_MUL_LIT
;
5546 alu
.src
[0].sel
= sel
;
5547 alu
.src
[0].chan
= chan
;
5548 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], 3);
5549 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[0], 0);
5550 alu
.dst
.sel
= ctx
->temp_reg
;
5555 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5559 if (ctx
->bc
->chip_class
== CAYMAN
) {
5560 for (i
= 0; i
< 3; i
++) {
5561 /* dst.z = exp(tmp.x) */
5562 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5563 alu
.op
= ALU_OP1_EXP_IEEE
;
5564 alu
.src
[0].sel
= ctx
->temp_reg
;
5565 alu
.src
[0].chan
= 0;
5566 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5572 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5577 /* dst.z = exp(tmp.x) */
5578 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5579 alu
.op
= ALU_OP1_EXP_IEEE
;
5580 alu
.src
[0].sel
= ctx
->temp_reg
;
5581 alu
.src
[0].chan
= 0;
5582 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
5584 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5591 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5592 alu
.op
= ALU_OP1_MOV
;
5593 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
; /*1.0*/
5594 alu
.src
[0].chan
= 0;
5595 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
5596 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 0) & 1;
5597 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5601 /* dst.y = max(src.x, 0.0) */
5602 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5603 alu
.op
= ALU_OP2_MAX
;
5604 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
5605 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
; /*0.0*/
5606 alu
.src
[1].chan
= 0;
5607 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
5608 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 1) & 1;
5609 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5614 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5615 alu
.op
= ALU_OP1_MOV
;
5616 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
5617 alu
.src
[0].chan
= 0;
5618 tgsi_dst(ctx
, &inst
->Dst
[0], 3, &alu
.dst
);
5619 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 3) & 1;
5621 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5628 static int tgsi_rsq(struct r600_shader_ctx
*ctx
)
5630 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5631 struct r600_bytecode_alu alu
;
5634 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5636 alu
.op
= ALU_OP1_RECIPSQRT_IEEE
;
5638 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
5639 r600_bytecode_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
5640 r600_bytecode_src_set_abs(&alu
.src
[i
]);
5642 alu
.dst
.sel
= ctx
->temp_reg
;
5645 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5648 /* replicate result */
5649 return tgsi_helper_tempx_replicate(ctx
);
5652 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
)
5654 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5655 struct r600_bytecode_alu alu
;
5658 for (i
= 0; i
< 4; i
++) {
5659 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5660 alu
.src
[0].sel
= ctx
->temp_reg
;
5661 alu
.op
= ALU_OP1_MOV
;
5663 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5664 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
5667 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5674 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx
*ctx
)
5676 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5677 struct r600_bytecode_alu alu
;
5680 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5681 alu
.op
= ctx
->inst_info
->op
;
5682 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
5683 r600_bytecode_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
5685 alu
.dst
.sel
= ctx
->temp_reg
;
5688 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5691 /* replicate result */
5692 return tgsi_helper_tempx_replicate(ctx
);
5695 static int cayman_pow(struct r600_shader_ctx
*ctx
)
5697 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5699 struct r600_bytecode_alu alu
;
5700 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
5702 for (i
= 0; i
< 3; i
++) {
5703 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5704 alu
.op
= ALU_OP1_LOG_IEEE
;
5705 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
5706 alu
.dst
.sel
= ctx
->temp_reg
;
5711 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5717 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5718 alu
.op
= ALU_OP2_MUL
;
5719 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
5720 alu
.src
[1].sel
= ctx
->temp_reg
;
5721 alu
.dst
.sel
= ctx
->temp_reg
;
5724 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5728 for (i
= 0; i
< last_slot
; i
++) {
5729 /* POW(a,b) = EXP2(b * LOG2(a))*/
5730 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5731 alu
.op
= ALU_OP1_EXP_IEEE
;
5732 alu
.src
[0].sel
= ctx
->temp_reg
;
5734 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
5735 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
5736 if (i
== last_slot
- 1)
5738 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5745 static int tgsi_pow(struct r600_shader_ctx
*ctx
)
5747 struct r600_bytecode_alu alu
;
5751 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5752 alu
.op
= ALU_OP1_LOG_IEEE
;
5753 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
5754 alu
.dst
.sel
= ctx
->temp_reg
;
5757 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5761 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5762 alu
.op
= ALU_OP2_MUL
;
5763 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
5764 alu
.src
[1].sel
= ctx
->temp_reg
;
5765 alu
.dst
.sel
= ctx
->temp_reg
;
5768 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5771 /* POW(a,b) = EXP2(b * LOG2(a))*/
5772 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5773 alu
.op
= ALU_OP1_EXP_IEEE
;
5774 alu
.src
[0].sel
= ctx
->temp_reg
;
5775 alu
.dst
.sel
= ctx
->temp_reg
;
5778 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
5781 return tgsi_helper_tempx_replicate(ctx
);
5784 static int emit_mul_int_op(struct r600_bytecode
*bc
,
5785 struct r600_bytecode_alu
*alu_src
)
5787 struct r600_bytecode_alu alu
;
5790 if (bc
->chip_class
== CAYMAN
) {
5791 for (i
= 0; i
< 4; i
++) {
5793 alu
.dst
.write
= (i
== alu_src
->dst
.chan
);
5794 alu
.last
= (i
== 3);
5796 r
= r600_bytecode_add_alu(bc
, &alu
);
5802 r
= r600_bytecode_add_alu(bc
, &alu
);
5809 static int tgsi_divmod(struct r600_shader_ctx
*ctx
, int mod
, int signed_op
)
5811 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
5812 struct r600_bytecode_alu alu
;
5814 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
5815 int lasti
= tgsi_last_instruction(write_mask
);
5816 int tmp0
= ctx
->temp_reg
;
5817 int tmp1
= r600_get_temp(ctx
);
5818 int tmp2
= r600_get_temp(ctx
);
5819 int tmp3
= r600_get_temp(ctx
);
5822 /* Use additional temp if dst register and src register are the same */
5823 if (inst
->Src
[0].Register
.Index
== inst
->Dst
[0].Register
.Index
||
5824 inst
->Src
[1].Register
.Index
== inst
->Dst
[0].Register
.Index
) {
5825 tmp4
= r600_get_temp(ctx
);
5830 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
5832 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
5833 * 2. tmp0.z = lo (tmp0.x * src2)
5834 * 3. tmp0.w = -tmp0.z
5835 * 4. tmp0.y = hi (tmp0.x * src2)
5836 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
5837 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
5838 * 7. tmp1.x = tmp0.x - tmp0.w
5839 * 8. tmp1.y = tmp0.x + tmp0.w
5840 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
5841 * 10. tmp0.z = hi(tmp0.x * src1) = q
5842 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
5844 * 12. tmp0.w = src1 - tmp0.y = r
5845 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
5846 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
5850 * 15. tmp1.z = tmp0.z + 1 = q + 1
5851 * 16. tmp1.w = tmp0.z - 1 = q - 1
5855 * 15. tmp1.z = tmp0.w - src2 = r - src2
5856 * 16. tmp1.w = tmp0.w + src2 = r + src2
5860 * 17. tmp1.x = tmp1.x & tmp1.y
5862 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
5863 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
5865 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
5866 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
5870 * Same as unsigned, using abs values of the operands,
5871 * and fixing the sign of the result in the end.
5874 for (i
= 0; i
< 4; i
++) {
5875 if (!(write_mask
& (1<<i
)))
5880 /* tmp2.x = -src0 */
5881 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5882 alu
.op
= ALU_OP2_SUB_INT
;
5888 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
5890 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
5893 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5896 /* tmp2.y = -src1 */
5897 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5898 alu
.op
= ALU_OP2_SUB_INT
;
5904 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
5906 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
5909 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5912 /* tmp2.z sign bit is set if src0 and src2 signs are different */
5913 /* it will be a sign of the quotient */
5916 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5917 alu
.op
= ALU_OP2_XOR_INT
;
5923 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
5924 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
5927 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5931 /* tmp2.x = |src0| */
5932 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5933 alu
.op
= ALU_OP3_CNDGE_INT
;
5940 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
5941 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
5942 alu
.src
[2].sel
= tmp2
;
5943 alu
.src
[2].chan
= 0;
5946 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5949 /* tmp2.y = |src1| */
5950 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5951 alu
.op
= ALU_OP3_CNDGE_INT
;
5958 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
5959 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
5960 alu
.src
[2].sel
= tmp2
;
5961 alu
.src
[2].chan
= 1;
5964 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5969 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
5970 if (ctx
->bc
->chip_class
== CAYMAN
) {
5971 /* tmp3.x = u2f(src2) */
5972 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5973 alu
.op
= ALU_OP1_UINT_TO_FLT
;
5980 alu
.src
[0].sel
= tmp2
;
5981 alu
.src
[0].chan
= 1;
5983 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
5987 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
5990 /* tmp0.x = recip(tmp3.x) */
5991 for (j
= 0 ; j
< 3; j
++) {
5992 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
5993 alu
.op
= ALU_OP1_RECIP_IEEE
;
5997 alu
.dst
.write
= (j
== 0);
5999 alu
.src
[0].sel
= tmp3
;
6000 alu
.src
[0].chan
= 0;
6004 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6008 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6009 alu
.op
= ALU_OP2_MUL
;
6011 alu
.src
[0].sel
= tmp0
;
6012 alu
.src
[0].chan
= 0;
6014 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
6015 alu
.src
[1].value
= 0x4f800000;
6020 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6024 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6025 alu
.op
= ALU_OP1_FLT_TO_UINT
;
6031 alu
.src
[0].sel
= tmp3
;
6032 alu
.src
[0].chan
= 0;
6035 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6039 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6040 alu
.op
= ALU_OP1_RECIP_UINT
;
6047 alu
.src
[0].sel
= tmp2
;
6048 alu
.src
[0].chan
= 1;
6050 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
6054 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6058 /* 2. tmp0.z = lo (tmp0.x * src2) */
6059 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6060 alu
.op
= ALU_OP2_MULLO_UINT
;
6066 alu
.src
[0].sel
= tmp0
;
6067 alu
.src
[0].chan
= 0;
6069 alu
.src
[1].sel
= tmp2
;
6070 alu
.src
[1].chan
= 1;
6072 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
6075 if ((r
= emit_mul_int_op(ctx
->bc
, &alu
)))
6078 /* 3. tmp0.w = -tmp0.z */
6079 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6080 alu
.op
= ALU_OP2_SUB_INT
;
6086 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
6087 alu
.src
[1].sel
= tmp0
;
6088 alu
.src
[1].chan
= 2;
6091 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6094 /* 4. tmp0.y = hi (tmp0.x * src2) */
6095 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6096 alu
.op
= ALU_OP2_MULHI_UINT
;
6102 alu
.src
[0].sel
= tmp0
;
6103 alu
.src
[0].chan
= 0;
6106 alu
.src
[1].sel
= tmp2
;
6107 alu
.src
[1].chan
= 1;
6109 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
6112 if ((r
= emit_mul_int_op(ctx
->bc
, &alu
)))
6115 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
6116 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6117 alu
.op
= ALU_OP3_CNDE_INT
;
6124 alu
.src
[0].sel
= tmp0
;
6125 alu
.src
[0].chan
= 1;
6126 alu
.src
[1].sel
= tmp0
;
6127 alu
.src
[1].chan
= 3;
6128 alu
.src
[2].sel
= tmp0
;
6129 alu
.src
[2].chan
= 2;
6132 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6135 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
6136 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6137 alu
.op
= ALU_OP2_MULHI_UINT
;
6143 alu
.src
[0].sel
= tmp0
;
6144 alu
.src
[0].chan
= 2;
6146 alu
.src
[1].sel
= tmp0
;
6147 alu
.src
[1].chan
= 0;
6149 if ((r
= emit_mul_int_op(ctx
->bc
, &alu
)))
6152 /* 7. tmp1.x = tmp0.x - tmp0.w */
6153 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6154 alu
.op
= ALU_OP2_SUB_INT
;
6160 alu
.src
[0].sel
= tmp0
;
6161 alu
.src
[0].chan
= 0;
6162 alu
.src
[1].sel
= tmp0
;
6163 alu
.src
[1].chan
= 3;
6166 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6169 /* 8. tmp1.y = tmp0.x + tmp0.w */
6170 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6171 alu
.op
= ALU_OP2_ADD_INT
;
6177 alu
.src
[0].sel
= tmp0
;
6178 alu
.src
[0].chan
= 0;
6179 alu
.src
[1].sel
= tmp0
;
6180 alu
.src
[1].chan
= 3;
6183 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6186 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
6187 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6188 alu
.op
= ALU_OP3_CNDE_INT
;
6195 alu
.src
[0].sel
= tmp0
;
6196 alu
.src
[0].chan
= 1;
6197 alu
.src
[1].sel
= tmp1
;
6198 alu
.src
[1].chan
= 1;
6199 alu
.src
[2].sel
= tmp1
;
6200 alu
.src
[2].chan
= 0;
6203 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6206 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
6207 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6208 alu
.op
= ALU_OP2_MULHI_UINT
;
6214 alu
.src
[0].sel
= tmp0
;
6215 alu
.src
[0].chan
= 0;
6218 alu
.src
[1].sel
= tmp2
;
6219 alu
.src
[1].chan
= 0;
6221 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
6224 if ((r
= emit_mul_int_op(ctx
->bc
, &alu
)))
6227 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
6228 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6229 alu
.op
= ALU_OP2_MULLO_UINT
;
6236 alu
.src
[0].sel
= tmp2
;
6237 alu
.src
[0].chan
= 1;
6239 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
6242 alu
.src
[1].sel
= tmp0
;
6243 alu
.src
[1].chan
= 2;
6245 if ((r
= emit_mul_int_op(ctx
->bc
, &alu
)))
6248 /* 12. tmp0.w = src1 - tmp0.y = r */
6249 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6250 alu
.op
= ALU_OP2_SUB_INT
;
6257 alu
.src
[0].sel
= tmp2
;
6258 alu
.src
[0].chan
= 0;
6260 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6263 alu
.src
[1].sel
= tmp0
;
6264 alu
.src
[1].chan
= 1;
6267 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6270 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
6271 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6272 alu
.op
= ALU_OP2_SETGE_UINT
;
6278 alu
.src
[0].sel
= tmp0
;
6279 alu
.src
[0].chan
= 3;
6281 alu
.src
[1].sel
= tmp2
;
6282 alu
.src
[1].chan
= 1;
6284 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
6288 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6291 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
6292 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6293 alu
.op
= ALU_OP2_SETGE_UINT
;
6300 alu
.src
[0].sel
= tmp2
;
6301 alu
.src
[0].chan
= 0;
6303 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6306 alu
.src
[1].sel
= tmp0
;
6307 alu
.src
[1].chan
= 1;
6310 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6313 if (mod
) { /* UMOD */
6315 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
6316 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6317 alu
.op
= ALU_OP2_SUB_INT
;
6323 alu
.src
[0].sel
= tmp0
;
6324 alu
.src
[0].chan
= 3;
6327 alu
.src
[1].sel
= tmp2
;
6328 alu
.src
[1].chan
= 1;
6330 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
6334 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6337 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
6338 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6339 alu
.op
= ALU_OP2_ADD_INT
;
6345 alu
.src
[0].sel
= tmp0
;
6346 alu
.src
[0].chan
= 3;
6348 alu
.src
[1].sel
= tmp2
;
6349 alu
.src
[1].chan
= 1;
6351 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
6355 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6360 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
6361 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6362 alu
.op
= ALU_OP2_ADD_INT
;
6368 alu
.src
[0].sel
= tmp0
;
6369 alu
.src
[0].chan
= 2;
6370 alu
.src
[1].sel
= V_SQ_ALU_SRC_1_INT
;
6373 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6376 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
6377 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6378 alu
.op
= ALU_OP2_ADD_INT
;
6384 alu
.src
[0].sel
= tmp0
;
6385 alu
.src
[0].chan
= 2;
6386 alu
.src
[1].sel
= V_SQ_ALU_SRC_M_1_INT
;
6389 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6394 /* 17. tmp1.x = tmp1.x & tmp1.y */
6395 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6396 alu
.op
= ALU_OP2_AND_INT
;
6402 alu
.src
[0].sel
= tmp1
;
6403 alu
.src
[0].chan
= 0;
6404 alu
.src
[1].sel
= tmp1
;
6405 alu
.src
[1].chan
= 1;
6408 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6411 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
6412 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
6413 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6414 alu
.op
= ALU_OP3_CNDE_INT
;
6421 alu
.src
[0].sel
= tmp1
;
6422 alu
.src
[0].chan
= 0;
6423 alu
.src
[1].sel
= tmp0
;
6424 alu
.src
[1].chan
= mod
? 3 : 2;
6425 alu
.src
[2].sel
= tmp1
;
6426 alu
.src
[2].chan
= 2;
6429 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6432 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
6433 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6434 alu
.op
= ALU_OP3_CNDE_INT
;
6447 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6451 alu
.src
[0].sel
= tmp1
;
6452 alu
.src
[0].chan
= 1;
6453 alu
.src
[1].sel
= tmp1
;
6454 alu
.src
[1].chan
= 3;
6455 alu
.src
[2].sel
= tmp0
;
6456 alu
.src
[2].chan
= 2;
6459 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6464 /* fix the sign of the result */
6468 /* tmp0.x = -tmp0.z */
6469 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6470 alu
.op
= ALU_OP2_SUB_INT
;
6476 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
6477 alu
.src
[1].sel
= tmp0
;
6478 alu
.src
[1].chan
= 2;
6481 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6484 /* sign of the remainder is the same as the sign of src0 */
6485 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
6486 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6487 alu
.op
= ALU_OP3_CNDGE_INT
;
6495 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6498 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6499 alu
.src
[1].sel
= tmp0
;
6500 alu
.src
[1].chan
= 2;
6501 alu
.src
[2].sel
= tmp0
;
6502 alu
.src
[2].chan
= 0;
6505 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6510 /* tmp0.x = -tmp0.z */
6511 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6512 alu
.op
= ALU_OP2_SUB_INT
;
6518 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
6519 alu
.src
[1].sel
= tmp0
;
6520 alu
.src
[1].chan
= 2;
6523 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6526 /* fix the quotient sign (same as the sign of src0*src1) */
6527 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
6528 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6529 alu
.op
= ALU_OP3_CNDGE_INT
;
6537 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6540 alu
.src
[0].sel
= tmp2
;
6541 alu
.src
[0].chan
= 2;
6542 alu
.src
[1].sel
= tmp0
;
6543 alu
.src
[1].chan
= 2;
6544 alu
.src
[2].sel
= tmp0
;
6545 alu
.src
[2].chan
= 0;
6548 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6555 for (i
= 0; i
<= lasti
; ++i
) {
6556 if (!(write_mask
& (1<<i
)))
6559 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6560 alu
.op
= ALU_OP1_MOV
;
6561 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6562 alu
.src
[0].sel
= tmp4
;
6563 alu
.src
[0].chan
= i
;
6567 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
6575 static int tgsi_udiv(struct r600_shader_ctx
*ctx
)
6577 return tgsi_divmod(ctx
, 0, 0);
6580 static int tgsi_umod(struct r600_shader_ctx
*ctx
)
6582 return tgsi_divmod(ctx
, 1, 0);
6585 static int tgsi_idiv(struct r600_shader_ctx
*ctx
)
6587 return tgsi_divmod(ctx
, 0, 1);
6590 static int tgsi_imod(struct r600_shader_ctx
*ctx
)
6592 return tgsi_divmod(ctx
, 1, 1);
6596 static int tgsi_f2i(struct r600_shader_ctx
*ctx
)
6598 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6599 struct r600_bytecode_alu alu
;
6601 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6602 int last_inst
= tgsi_last_instruction(write_mask
);
6604 for (i
= 0; i
< 4; i
++) {
6605 if (!(write_mask
& (1<<i
)))
6608 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6609 alu
.op
= ALU_OP1_TRUNC
;
6611 alu
.dst
.sel
= ctx
->temp_reg
;
6615 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6618 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6623 for (i
= 0; i
< 4; i
++) {
6624 if (!(write_mask
& (1<<i
)))
6627 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6628 alu
.op
= ctx
->inst_info
->op
;
6630 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6632 alu
.src
[0].sel
= ctx
->temp_reg
;
6633 alu
.src
[0].chan
= i
;
6635 if (i
== last_inst
|| alu
.op
== ALU_OP1_FLT_TO_UINT
)
6637 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6645 static int tgsi_iabs(struct r600_shader_ctx
*ctx
)
6647 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6648 struct r600_bytecode_alu alu
;
6650 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6651 int last_inst
= tgsi_last_instruction(write_mask
);
6654 for (i
= 0; i
< 4; i
++) {
6655 if (!(write_mask
& (1<<i
)))
6658 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6659 alu
.op
= ALU_OP2_SUB_INT
;
6661 alu
.dst
.sel
= ctx
->temp_reg
;
6665 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
6666 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
6670 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6675 /* dst = (src >= 0 ? src : tmp) */
6676 for (i
= 0; i
< 4; i
++) {
6677 if (!(write_mask
& (1<<i
)))
6680 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6681 alu
.op
= ALU_OP3_CNDGE_INT
;
6685 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6687 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6688 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
6689 alu
.src
[2].sel
= ctx
->temp_reg
;
6690 alu
.src
[2].chan
= i
;
6694 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6701 static int tgsi_issg(struct r600_shader_ctx
*ctx
)
6703 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6704 struct r600_bytecode_alu alu
;
6706 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6707 int last_inst
= tgsi_last_instruction(write_mask
);
6709 /* tmp = (src >= 0 ? src : -1) */
6710 for (i
= 0; i
< 4; i
++) {
6711 if (!(write_mask
& (1<<i
)))
6714 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6715 alu
.op
= ALU_OP3_CNDGE_INT
;
6718 alu
.dst
.sel
= ctx
->temp_reg
;
6722 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6723 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
6724 alu
.src
[2].sel
= V_SQ_ALU_SRC_M_1_INT
;
6728 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6733 /* dst = (tmp > 0 ? 1 : tmp) */
6734 for (i
= 0; i
< 4; i
++) {
6735 if (!(write_mask
& (1<<i
)))
6738 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6739 alu
.op
= ALU_OP3_CNDGT_INT
;
6743 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6745 alu
.src
[0].sel
= ctx
->temp_reg
;
6746 alu
.src
[0].chan
= i
;
6748 alu
.src
[1].sel
= V_SQ_ALU_SRC_1_INT
;
6750 alu
.src
[2].sel
= ctx
->temp_reg
;
6751 alu
.src
[2].chan
= i
;
6755 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6764 static int tgsi_ssg(struct r600_shader_ctx
*ctx
)
6766 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6767 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6768 int last_inst
= tgsi_last_instruction(write_mask
);
6769 struct r600_bytecode_alu alu
;
6772 /* tmp = (src > 0 ? 1 : src) */
6773 for (i
= 0; i
<= last_inst
; i
++) {
6774 if (!(write_mask
& (1 << i
)))
6776 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6777 alu
.op
= ALU_OP3_CNDGT
;
6780 alu
.dst
.sel
= ctx
->temp_reg
;
6783 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6784 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
6785 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[0], i
);
6789 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6794 /* dst = (-tmp > 0 ? -1 : tmp) */
6795 for (i
= 0; i
<= last_inst
; i
++) {
6796 if (!(write_mask
& (1 << i
)))
6798 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6799 alu
.op
= ALU_OP3_CNDGT
;
6801 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6803 alu
.src
[0].sel
= ctx
->temp_reg
;
6804 alu
.src
[0].chan
= i
;
6807 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
6810 alu
.src
[2].sel
= ctx
->temp_reg
;
6811 alu
.src
[2].chan
= i
;
6815 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6822 static int tgsi_bfi(struct r600_shader_ctx
*ctx
)
6824 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6825 struct r600_bytecode_alu alu
;
6828 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6829 int last_inst
= tgsi_last_instruction(write_mask
);
6831 t1
= r600_get_temp(ctx
);
6833 for (i
= 0; i
< 4; i
++) {
6834 if (!(write_mask
& (1<<i
)))
6837 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6838 alu
.op
= ALU_OP2_SETGE_INT
;
6839 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[3], i
);
6840 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
6841 alu
.src
[1].value
= 32;
6842 alu
.dst
.sel
= ctx
->temp_reg
;
6845 alu
.last
= i
== last_inst
;
6846 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6851 for (i
= 0; i
< 4; i
++) {
6852 if (!(write_mask
& (1<<i
)))
6855 /* create mask tmp */
6856 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6857 alu
.op
= ALU_OP2_BFM_INT
;
6861 alu
.last
= i
== last_inst
;
6863 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[3], i
);
6864 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
6866 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6871 t2
= r600_get_temp(ctx
);
6873 for (i
= 0; i
< 4; i
++) {
6874 if (!(write_mask
& (1<<i
)))
6877 /* shift insert left */
6878 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6879 alu
.op
= ALU_OP2_LSHL_INT
;
6883 alu
.last
= i
== last_inst
;
6885 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
6886 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
6888 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6893 for (i
= 0; i
< 4; i
++) {
6894 if (!(write_mask
& (1<<i
)))
6897 /* actual bitfield insert */
6898 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6899 alu
.op
= ALU_OP3_BFI_INT
;
6901 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6904 alu
.last
= i
== last_inst
;
6906 alu
.src
[0].sel
= t1
;
6907 alu
.src
[0].chan
= i
;
6908 alu
.src
[1].sel
= t2
;
6909 alu
.src
[1].chan
= i
;
6910 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[0], i
);
6912 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6917 for (i
= 0; i
< 4; i
++) {
6918 if (!(write_mask
& (1<<i
)))
6920 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6921 alu
.op
= ALU_OP3_CNDE_INT
;
6923 alu
.src
[0].sel
= ctx
->temp_reg
;
6924 alu
.src
[0].chan
= i
;
6925 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[1], i
);
6927 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
6929 alu
.src
[1].sel
= alu
.dst
.sel
;
6930 alu
.src
[1].chan
= i
;
6932 alu
.last
= i
== last_inst
;
6933 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6940 static int tgsi_msb(struct r600_shader_ctx
*ctx
)
6942 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
6943 struct r600_bytecode_alu alu
;
6946 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
6947 int last_inst
= tgsi_last_instruction(write_mask
);
6949 assert(ctx
->inst_info
->op
== ALU_OP1_FFBH_INT
||
6950 ctx
->inst_info
->op
== ALU_OP1_FFBH_UINT
);
6954 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */
6955 for (i
= 0; i
< 4; i
++) {
6956 if (!(write_mask
& (1<<i
)))
6959 /* t1 = FFBH_INT / FFBH_UINT */
6960 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6961 alu
.op
= ctx
->inst_info
->op
;
6965 alu
.last
= i
== last_inst
;
6967 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
6969 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6974 t2
= r600_get_temp(ctx
);
6976 for (i
= 0; i
< 4; i
++) {
6977 if (!(write_mask
& (1<<i
)))
6981 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
6982 alu
.op
= ALU_OP2_SUB_INT
;
6986 alu
.last
= i
== last_inst
;
6988 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
6989 alu
.src
[0].value
= 31;
6990 alu
.src
[1].sel
= t1
;
6991 alu
.src
[1].chan
= i
;
6993 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
6998 for (i
= 0; i
< 4; i
++) {
6999 if (!(write_mask
& (1<<i
)))
7002 /* result = t1 >= 0 ? t2 : t1 */
7003 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7004 alu
.op
= ALU_OP3_CNDGE_INT
;
7006 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
7009 alu
.last
= i
== last_inst
;
7011 alu
.src
[0].sel
= t1
;
7012 alu
.src
[0].chan
= i
;
7013 alu
.src
[1].sel
= t2
;
7014 alu
.src
[1].chan
= i
;
7015 alu
.src
[2].sel
= t1
;
7016 alu
.src
[2].chan
= i
;
7018 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7026 static int tgsi_interp_egcm(struct r600_shader_ctx
*ctx
)
7028 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7029 struct r600_bytecode_alu alu
;
7030 int r
, i
= 0, k
, interp_gpr
, interp_base_chan
, tmp
, lasti
;
7032 const int input
= inst
->Src
[0].Register
.Index
+ ctx
->shader
->nsys_inputs
;
7034 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
7036 /* Interpolators have been marked for use already by allocate_system_value_inputs */
7037 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
||
7038 inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7039 location
= TGSI_INTERPOLATE_LOC_CENTER
; /* sample offset will be added explicitly */
7042 location
= TGSI_INTERPOLATE_LOC_CENTROID
;
7043 ctx
->shader
->input
[input
].uses_interpolate_at_centroid
= 1;
7046 k
= eg_get_interpolator_index(ctx
->shader
->input
[input
].interpolate
, location
);
7049 interp_gpr
= ctx
->eg_interpolators
[k
].ij_index
/ 2;
7050 interp_base_chan
= 2 * (ctx
->eg_interpolators
[k
].ij_index
% 2);
7052 /* NOTE: currently offset is not perspective correct */
7053 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
||
7054 inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7055 int sample_gpr
= -1;
7056 int gradientsH
, gradientsV
;
7057 struct r600_bytecode_tex tex
;
7059 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7060 sample_gpr
= load_sample_position(ctx
, &ctx
->src
[1], ctx
->src
[1].swizzle
[0]);
7063 gradientsH
= r600_get_temp(ctx
);
7064 gradientsV
= r600_get_temp(ctx
);
7065 for (i
= 0; i
< 2; i
++) {
7066 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
7067 tex
.op
= i
== 0 ? FETCH_OP_GET_GRADIENTS_H
: FETCH_OP_GET_GRADIENTS_V
;
7068 tex
.src_gpr
= interp_gpr
;
7069 tex
.src_sel_x
= interp_base_chan
+ 0;
7070 tex
.src_sel_y
= interp_base_chan
+ 1;
7073 tex
.dst_gpr
= i
== 0 ? gradientsH
: gradientsV
;
7078 tex
.inst_mod
= 1; // Use per pixel gradient calculation
7080 tex
.resource_id
= tex
.sampler_id
;
7081 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
7086 for (i
= 0; i
< 2; i
++) {
7087 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7088 alu
.op
= ALU_OP3_MULADD
;
7090 alu
.src
[0].sel
= gradientsH
;
7091 alu
.src
[0].chan
= i
;
7092 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7093 alu
.src
[1].sel
= sample_gpr
;
7094 alu
.src
[1].chan
= 2;
7097 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
7099 alu
.src
[2].sel
= interp_gpr
;
7100 alu
.src
[2].chan
= interp_base_chan
+ i
;
7101 alu
.dst
.sel
= ctx
->temp_reg
;
7105 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7110 for (i
= 0; i
< 2; i
++) {
7111 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7112 alu
.op
= ALU_OP3_MULADD
;
7114 alu
.src
[0].sel
= gradientsV
;
7115 alu
.src
[0].chan
= i
;
7116 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7117 alu
.src
[1].sel
= sample_gpr
;
7118 alu
.src
[1].chan
= 3;
7121 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 1);
7123 alu
.src
[2].sel
= ctx
->temp_reg
;
7124 alu
.src
[2].chan
= i
;
7125 alu
.dst
.sel
= ctx
->temp_reg
;
7129 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7135 tmp
= r600_get_temp(ctx
);
7136 for (i
= 0; i
< 8; i
++) {
7137 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7138 alu
.op
= i
< 4 ? ALU_OP2_INTERP_ZW
: ALU_OP2_INTERP_XY
;
7141 if ((i
> 1 && i
< 6)) {
7147 alu
.dst
.chan
= i
% 4;
7149 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
||
7150 inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_SAMPLE
) {
7151 alu
.src
[0].sel
= ctx
->temp_reg
;
7152 alu
.src
[0].chan
= 1 - (i
% 2);
7154 alu
.src
[0].sel
= interp_gpr
;
7155 alu
.src
[0].chan
= interp_base_chan
+ 1 - (i
% 2);
7157 alu
.src
[1].sel
= V_SQ_ALU_SRC_PARAM_BASE
+ ctx
->shader
->input
[input
].lds_pos
;
7158 alu
.src
[1].chan
= 0;
7160 alu
.last
= i
% 4 == 3;
7161 alu
.bank_swizzle_force
= SQ_ALU_VEC_210
;
7163 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7168 // INTERP can't swizzle dst
7169 lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
7170 for (i
= 0; i
<= lasti
; i
++) {
7171 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
7174 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7175 alu
.op
= ALU_OP1_MOV
;
7176 alu
.src
[0].sel
= tmp
;
7177 alu
.src
[0].chan
= ctx
->src
[0].swizzle
[i
];
7178 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
7180 alu
.last
= i
== lasti
;
7181 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7190 static int tgsi_helper_copy(struct r600_shader_ctx
*ctx
, struct tgsi_full_instruction
*inst
)
7192 struct r600_bytecode_alu alu
;
7195 for (i
= 0; i
< 4; i
++) {
7196 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7197 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
))) {
7198 alu
.op
= ALU_OP0_NOP
;
7201 alu
.op
= ALU_OP1_MOV
;
7202 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
7203 alu
.src
[0].sel
= ctx
->temp_reg
;
7204 alu
.src
[0].chan
= i
;
7209 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7216 static int tgsi_make_src_for_op3(struct r600_shader_ctx
*ctx
,
7218 struct r600_bytecode_alu_src
*bc_src
,
7219 const struct r600_shader_src
*shader_src
)
7221 struct r600_bytecode_alu alu
;
7223 int lasti
= tgsi_last_instruction(writemask
);
7226 r600_bytecode_src(&bc_src
[0], shader_src
, 0);
7227 r600_bytecode_src(&bc_src
[1], shader_src
, 1);
7228 r600_bytecode_src(&bc_src
[2], shader_src
, 2);
7229 r600_bytecode_src(&bc_src
[3], shader_src
, 3);
7232 temp_reg
= r600_get_temp(ctx
);
7234 for (i
= 0; i
< lasti
+ 1; i
++) {
7235 if (!(writemask
& (1 << i
)))
7237 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7238 alu
.op
= ALU_OP1_MOV
;
7239 alu
.dst
.sel
= temp_reg
;
7242 alu
.src
[0] = bc_src
[i
];
7246 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7249 memset(&bc_src
[i
], 0, sizeof(*bc_src
));
7250 bc_src
[i
].sel
= temp_reg
;
7257 static int tgsi_op3_dst(struct r600_shader_ctx
*ctx
, int dst
)
7259 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7260 struct r600_bytecode_alu alu
;
7261 struct r600_bytecode_alu_src srcs
[4][4];
7263 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
7264 unsigned op
= ctx
->inst_info
->op
;
7266 if (op
== ALU_OP3_MULADD_IEEE
&&
7267 ctx
->info
.properties
[TGSI_PROPERTY_MUL_ZERO_WINS
])
7268 op
= ALU_OP3_MULADD
;
7270 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
7271 r
= tgsi_make_src_for_op3(ctx
, inst
->Dst
[0].Register
.WriteMask
,
7272 srcs
[j
], &ctx
->src
[j
]);
7277 for (i
= 0; i
< lasti
+ 1; i
++) {
7278 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
7281 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7283 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
7284 alu
.src
[j
] = srcs
[j
][i
];
7288 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
7298 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7305 static int tgsi_op3(struct r600_shader_ctx
*ctx
)
7307 return tgsi_op3_dst(ctx
, -1);
7310 static int tgsi_dp(struct r600_shader_ctx
*ctx
)
7312 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7313 struct r600_bytecode_alu alu
;
7315 unsigned op
= ctx
->inst_info
->op
;
7316 if (op
== ALU_OP2_DOT4_IEEE
&&
7317 ctx
->info
.properties
[TGSI_PROPERTY_MUL_ZERO_WINS
])
7320 for (i
= 0; i
< 4; i
++) {
7321 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7323 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
7324 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
7327 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
7329 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
7330 /* handle some special cases */
7331 switch (inst
->Instruction
.Opcode
) {
7332 case TGSI_OPCODE_DP2
:
7334 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
7335 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
7338 case TGSI_OPCODE_DP3
:
7340 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
7341 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
7350 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7357 static inline boolean
tgsi_tex_src_requires_loading(struct r600_shader_ctx
*ctx
,
7360 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7361 return (inst
->Src
[index
].Register
.File
!= TGSI_FILE_TEMPORARY
&&
7362 inst
->Src
[index
].Register
.File
!= TGSI_FILE_INPUT
&&
7363 inst
->Src
[index
].Register
.File
!= TGSI_FILE_OUTPUT
) ||
7364 ctx
->src
[index
].neg
|| ctx
->src
[index
].abs
||
7365 (inst
->Src
[index
].Register
.File
== TGSI_FILE_INPUT
&& ctx
->type
== PIPE_SHADER_GEOMETRY
);
7368 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx
*ctx
,
7371 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7372 return ctx
->file_offset
[inst
->Src
[index
].Register
.File
] + inst
->Src
[index
].Register
.Index
;
7375 static int do_vtx_fetch_inst(struct r600_shader_ctx
*ctx
, boolean src_requires_loading
)
7377 struct r600_bytecode_vtx vtx
;
7378 struct r600_bytecode_alu alu
;
7379 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7381 int id
= tgsi_tex_get_src_gpr(ctx
, 1);
7382 int sampler_index_mode
= inst
->Src
[1].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7384 src_gpr
= tgsi_tex_get_src_gpr(ctx
, 0);
7385 if (src_requires_loading
) {
7386 for (i
= 0; i
< 4; i
++) {
7387 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7388 alu
.op
= ALU_OP1_MOV
;
7389 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
7390 alu
.dst
.sel
= ctx
->temp_reg
;
7395 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7399 src_gpr
= ctx
->temp_reg
;
7402 memset(&vtx
, 0, sizeof(vtx
));
7403 vtx
.op
= FETCH_OP_VFETCH
;
7404 vtx
.buffer_id
= id
+ R600_MAX_CONST_BUFFERS
;
7405 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
7406 vtx
.src_gpr
= src_gpr
;
7407 vtx
.mega_fetch_count
= 16;
7408 vtx
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
7409 vtx
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7; /* SEL_X */
7410 vtx
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7; /* SEL_Y */
7411 vtx
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7; /* SEL_Z */
7412 vtx
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7; /* SEL_W */
7413 vtx
.use_const_fields
= 1;
7414 vtx
.buffer_index_mode
= sampler_index_mode
;
7416 if ((r
= r600_bytecode_add_vtx(ctx
->bc
, &vtx
)))
7419 if (ctx
->bc
->chip_class
>= EVERGREEN
)
7422 for (i
= 0; i
< 4; i
++) {
7423 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
7424 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
7427 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7428 alu
.op
= ALU_OP2_AND_INT
;
7431 alu
.dst
.sel
= vtx
.dst_gpr
;
7434 alu
.src
[0].sel
= vtx
.dst_gpr
;
7435 alu
.src
[0].chan
= i
;
7437 alu
.src
[1].sel
= R600_SHADER_BUFFER_INFO_SEL
;
7438 alu
.src
[1].sel
+= (id
* 2);
7439 alu
.src
[1].chan
= i
% 4;
7440 alu
.src
[1].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
7444 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7449 if (inst
->Dst
[0].Register
.WriteMask
& 3) {
7450 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7451 alu
.op
= ALU_OP2_OR_INT
;
7454 alu
.dst
.sel
= vtx
.dst_gpr
;
7457 alu
.src
[0].sel
= vtx
.dst_gpr
;
7458 alu
.src
[0].chan
= 3;
7460 alu
.src
[1].sel
= R600_SHADER_BUFFER_INFO_SEL
+ (id
* 2) + 1;
7461 alu
.src
[1].chan
= 0;
7462 alu
.src
[1].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
7465 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7472 static int r600_do_buffer_txq(struct r600_shader_ctx
*ctx
, int reg_idx
, int offset
, int eg_buffer_base
)
7474 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7476 int id
= tgsi_tex_get_src_gpr(ctx
, reg_idx
) + offset
;
7477 int sampler_index_mode
= inst
->Src
[reg_idx
].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7479 if (ctx
->bc
->chip_class
< EVERGREEN
) {
7480 struct r600_bytecode_alu alu
;
7481 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7482 alu
.op
= ALU_OP1_MOV
;
7483 alu
.src
[0].sel
= R600_SHADER_BUFFER_INFO_SEL
;
7484 /* r600 we have them at channel 2 of the second dword */
7485 alu
.src
[0].sel
+= (id
* 2) + 1;
7486 alu
.src
[0].chan
= 1;
7487 alu
.src
[0].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
7488 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
7490 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7495 struct r600_bytecode_vtx vtx
;
7496 memset(&vtx
, 0, sizeof(vtx
));
7497 vtx
.op
= FETCH_OP_GET_BUFFER_RESINFO
;
7498 vtx
.buffer_id
= id
+ eg_buffer_base
;
7499 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
7501 vtx
.mega_fetch_count
= 16; /* no idea here really... */
7502 vtx
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
7503 vtx
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7; /* SEL_X */
7504 vtx
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 4 : 7; /* SEL_Y */
7505 vtx
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 4 : 7; /* SEL_Z */
7506 vtx
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 4 : 7; /* SEL_W */
7507 vtx
.data_format
= FMT_32_32_32_32
;
7508 vtx
.buffer_index_mode
= sampler_index_mode
;
7510 if ((r
= r600_bytecode_add_vtx_tc(ctx
->bc
, &vtx
)))
7517 static int tgsi_tex(struct r600_shader_ctx
*ctx
)
7519 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
7520 struct r600_bytecode_tex tex
;
7521 struct r600_bytecode_tex grad_offs
[3];
7522 struct r600_bytecode_alu alu
;
7524 int r
, i
, j
, n_grad_offs
= 0;
7526 bool read_compressed_msaa
= ctx
->bc
->has_compressed_msaa_texturing
&&
7527 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
&&
7528 (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_MSAA
||
7529 inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY_MSAA
);
7531 bool txf_add_offsets
= inst
->Texture
.NumOffsets
&&
7532 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
&&
7533 inst
->Texture
.Texture
!= TGSI_TEXTURE_BUFFER
;
7535 /* Texture fetch instructions can only use gprs as source.
7536 * Also they cannot negate the source or take the absolute value */
7537 const boolean src_requires_loading
= (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQS
&&
7538 tgsi_tex_src_requires_loading(ctx
, 0)) ||
7539 read_compressed_msaa
|| txf_add_offsets
;
7541 boolean src_loaded
= FALSE
;
7542 unsigned sampler_src_reg
= 1;
7543 int8_t offset_x
= 0, offset_y
= 0, offset_z
= 0;
7544 boolean has_txq_cube_array_z
= false;
7545 unsigned sampler_index_mode
;
7546 int array_index_offset_channel
= -1;
7548 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQ
&&
7549 ((inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
7550 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
)))
7551 if (inst
->Dst
[0].Register
.WriteMask
& 4) {
7552 ctx
->shader
->has_txq_cube_array_z_comp
= true;
7553 has_txq_cube_array_z
= true;
7556 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX2
||
7557 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
7558 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
||
7559 inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
)
7560 sampler_src_reg
= 2;
7562 /* TGSI moves the sampler to src reg 3 for TXD */
7563 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
)
7564 sampler_src_reg
= 3;
7566 sampler_index_mode
= inst
->Src
[sampler_src_reg
].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7568 src_gpr
= tgsi_tex_get_src_gpr(ctx
, 0);
7570 if (inst
->Texture
.Texture
== TGSI_TEXTURE_BUFFER
) {
7571 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQ
) {
7572 if (ctx
->bc
->chip_class
< EVERGREEN
)
7573 ctx
->shader
->uses_tex_buffers
= true;
7574 return r600_do_buffer_txq(ctx
, 1, 0, R600_MAX_CONST_BUFFERS
);
7576 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
) {
7577 if (ctx
->bc
->chip_class
< EVERGREEN
)
7578 ctx
->shader
->uses_tex_buffers
= true;
7579 return do_vtx_fetch_inst(ctx
, src_requires_loading
);
7583 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
7585 /* Add perspective divide */
7586 if (ctx
->bc
->chip_class
== CAYMAN
) {
7588 for (i
= 0; i
< 3; i
++) {
7589 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7590 alu
.op
= ALU_OP1_RECIP_IEEE
;
7591 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7593 alu
.dst
.sel
= ctx
->temp_reg
;
7599 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7606 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7607 alu
.op
= ALU_OP1_RECIP_IEEE
;
7608 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7610 alu
.dst
.sel
= ctx
->temp_reg
;
7611 alu
.dst
.chan
= out_chan
;
7614 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7619 for (i
= 0; i
< 3; i
++) {
7620 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7621 alu
.op
= ALU_OP2_MUL
;
7622 alu
.src
[0].sel
= ctx
->temp_reg
;
7623 alu
.src
[0].chan
= out_chan
;
7624 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
7625 alu
.dst
.sel
= ctx
->temp_reg
;
7628 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7632 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7633 alu
.op
= ALU_OP1_MOV
;
7634 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
7635 alu
.src
[0].chan
= 0;
7636 alu
.dst
.sel
= ctx
->temp_reg
;
7640 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7644 src_gpr
= ctx
->temp_reg
;
7648 if ((inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
||
7649 inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
7650 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
7651 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) &&
7652 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ
) {
7654 static const unsigned src0_swizzle
[] = {2, 2, 0, 1};
7655 static const unsigned src1_swizzle
[] = {1, 0, 2, 2};
7657 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
7658 for (i
= 0; i
< 4; i
++) {
7659 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7660 alu
.op
= ALU_OP2_CUBE
;
7661 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], src0_swizzle
[i
]);
7662 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], src1_swizzle
[i
]);
7663 alu
.dst
.sel
= ctx
->temp_reg
;
7668 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7673 /* tmp1.z = RCP_e(|tmp1.z|) */
7674 if (ctx
->bc
->chip_class
== CAYMAN
) {
7675 for (i
= 0; i
< 3; i
++) {
7676 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7677 alu
.op
= ALU_OP1_RECIP_IEEE
;
7678 alu
.src
[0].sel
= ctx
->temp_reg
;
7679 alu
.src
[0].chan
= 2;
7681 alu
.dst
.sel
= ctx
->temp_reg
;
7687 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7692 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7693 alu
.op
= ALU_OP1_RECIP_IEEE
;
7694 alu
.src
[0].sel
= ctx
->temp_reg
;
7695 alu
.src
[0].chan
= 2;
7697 alu
.dst
.sel
= ctx
->temp_reg
;
7701 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7706 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
7707 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
7708 * muladd has no writemask, have to use another temp
7710 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7711 alu
.op
= ALU_OP3_MULADD
;
7714 alu
.src
[0].sel
= ctx
->temp_reg
;
7715 alu
.src
[0].chan
= 0;
7716 alu
.src
[1].sel
= ctx
->temp_reg
;
7717 alu
.src
[1].chan
= 2;
7719 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
7720 alu
.src
[2].chan
= 0;
7721 alu
.src
[2].value
= u_bitcast_f2u(1.5f
);
7723 alu
.dst
.sel
= ctx
->temp_reg
;
7727 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7731 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7732 alu
.op
= ALU_OP3_MULADD
;
7735 alu
.src
[0].sel
= ctx
->temp_reg
;
7736 alu
.src
[0].chan
= 1;
7737 alu
.src
[1].sel
= ctx
->temp_reg
;
7738 alu
.src
[1].chan
= 2;
7740 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
7741 alu
.src
[2].chan
= 0;
7742 alu
.src
[2].value
= u_bitcast_f2u(1.5f
);
7744 alu
.dst
.sel
= ctx
->temp_reg
;
7749 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7752 /* write initial compare value into Z component
7753 - W src 0 for shadow cube
7754 - X src 1 for shadow cube array */
7755 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
7756 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
7757 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7758 alu
.op
= ALU_OP1_MOV
;
7759 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
)
7760 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
7762 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7763 alu
.dst
.sel
= ctx
->temp_reg
;
7767 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7772 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
7773 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
7774 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
7775 int mytmp
= r600_get_temp(ctx
);
7776 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7777 alu
.op
= ALU_OP1_MOV
;
7778 alu
.src
[0].sel
= ctx
->temp_reg
;
7779 alu
.src
[0].chan
= 3;
7780 alu
.dst
.sel
= mytmp
;
7784 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7788 /* Evaluate the array index according to floor(idx + 0.5). This
7789 * needs to be done before merging the face select value, because
7790 * otherwise the fractional part of the array index will interfere
7791 * with the face select value */
7792 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7793 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7794 alu
.op
= ALU_OP1_RNDNE
;
7795 alu
.dst
.sel
= ctx
->temp_reg
;
7799 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7803 /* Because the array slice index and the cube face index are merged
7804 * into one value we have to make sure the array slice index is >= 0,
7805 * otherwise the face selection will fail */
7806 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7807 alu
.op
= ALU_OP2_MAX
;
7808 alu
.src
[0].sel
= ctx
->temp_reg
;
7809 alu
.src
[0].chan
= 3;
7810 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
7811 alu
.dst
.sel
= ctx
->temp_reg
;
7815 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7819 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */
7820 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7821 alu
.op
= ALU_OP3_MULADD
;
7823 alu
.src
[0].sel
= ctx
->temp_reg
;
7824 alu
.src
[0].chan
= 3;
7825 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
7826 alu
.src
[1].chan
= 0;
7827 alu
.src
[1].value
= u_bitcast_f2u(8.0f
);
7828 alu
.src
[2].sel
= mytmp
;
7829 alu
.src
[2].chan
= 0;
7830 alu
.dst
.sel
= ctx
->temp_reg
;
7834 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7837 } else if (ctx
->bc
->chip_class
< EVERGREEN
) {
7838 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
7839 tex
.op
= FETCH_OP_SET_CUBEMAP_INDEX
;
7840 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
7841 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
7842 tex
.src_gpr
= r600_get_temp(ctx
);
7847 tex
.dst_sel_x
= tex
.dst_sel_y
= tex
.dst_sel_z
= tex
.dst_sel_w
= 7;
7848 tex
.coord_type_x
= 1;
7849 tex
.coord_type_y
= 1;
7850 tex
.coord_type_z
= 1;
7851 tex
.coord_type_w
= 1;
7852 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7853 alu
.op
= ALU_OP1_MOV
;
7854 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7855 alu
.dst
.sel
= tex
.src_gpr
;
7859 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7863 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
7870 /* for cube forms of lod and bias we need to route things */
7871 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
7872 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
||
7873 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
7874 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
) {
7875 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7876 alu
.op
= ALU_OP1_MOV
;
7877 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
7878 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
)
7879 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
7881 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 3);
7882 alu
.dst
.sel
= ctx
->temp_reg
;
7886 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7892 src_gpr
= ctx
->temp_reg
;
7895 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
) {
7896 int temp_h
= 0, temp_v
= 0;
7899 /* if we've already loaded the src (i.e. CUBE don't reload it). */
7900 if (src_loaded
== TRUE
)
7904 for (i
= start_val
; i
< 3; i
++) {
7905 int treg
= r600_get_temp(ctx
);
7914 for (j
= 0; j
< 4; j
++) {
7915 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7916 alu
.op
= ALU_OP1_MOV
;
7917 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[i
], j
);
7923 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7928 for (i
= 1; i
< 3; i
++) {
7929 /* set gradients h/v */
7930 struct r600_bytecode_tex
*t
= &grad_offs
[n_grad_offs
++];
7931 memset(t
, 0, sizeof(struct r600_bytecode_tex
));
7932 t
->op
= (i
== 1) ? FETCH_OP_SET_GRADIENTS_H
:
7933 FETCH_OP_SET_GRADIENTS_V
;
7934 t
->sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
7935 t
->sampler_index_mode
= sampler_index_mode
;
7936 t
->resource_id
= t
->sampler_id
+ R600_MAX_CONST_BUFFERS
;
7937 t
->resource_index_mode
= sampler_index_mode
;
7939 t
->src_gpr
= (i
== 1) ? temp_h
: temp_v
;
7945 t
->dst_gpr
= r600_get_temp(ctx
); /* just to avoid confusing the asm scheduler */
7946 t
->dst_sel_x
= t
->dst_sel_y
= t
->dst_sel_z
= t
->dst_sel_w
= 7;
7947 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
) {
7948 t
->coord_type_x
= 1;
7949 t
->coord_type_y
= 1;
7950 t
->coord_type_z
= 1;
7951 t
->coord_type_w
= 1;
7956 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
) {
7957 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
7958 * incorrectly forces nearest filtering if the texture format is integer.
7959 * The only effect it has on Gather4, which always returns 4 texels for
7960 * bilinear filtering, is that the final coordinates are off by 0.5 of
7963 * The workaround is to subtract 0.5 from the unnormalized coordinates,
7964 * or (0.5 / size) from the normalized coordinates.
7966 if (inst
->Texture
.ReturnType
== TGSI_RETURN_TYPE_SINT
||
7967 inst
->Texture
.ReturnType
== TGSI_RETURN_TYPE_UINT
) {
7968 int treg
= r600_get_temp(ctx
);
7970 /* mov array and comparison oordinate to temp_reg if needed */
7971 if ((inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
7972 inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
||
7973 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
) && !src_loaded
) {
7974 int end
= inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
? 3 : 2;
7975 for (i
= 2; i
<= end
; i
++) {
7976 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7977 alu
.op
= ALU_OP1_MOV
;
7978 alu
.dst
.sel
= ctx
->temp_reg
;
7981 alu
.last
= (i
== end
);
7982 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
7983 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
7989 if (inst
->Texture
.Texture
== TGSI_TEXTURE_RECT
||
7990 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
) {
7991 for (i
= 0; i
< 2; i
++) {
7992 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
7993 alu
.op
= ALU_OP2_ADD
;
7994 alu
.dst
.sel
= ctx
->temp_reg
;
7999 alu
.src
[0].sel
= ctx
->temp_reg
;
8000 alu
.src
[0].chan
= i
;
8002 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
8003 alu
.src
[1].sel
= V_SQ_ALU_SRC_0_5
;
8005 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8011 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
8012 tex
.op
= FETCH_OP_GET_TEXTURE_RESINFO
;
8013 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
8014 tex
.sampler_index_mode
= sampler_index_mode
;
8015 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
8016 tex
.resource_index_mode
= sampler_index_mode
;
8026 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
8030 /* coord.xy = -0.5 * (1.0/int_to_flt(size)) + coord.xy */
8031 if (ctx
->bc
->chip_class
== CAYMAN
) {
8033 for (i
= 0; i
< 2; i
++) {
8034 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8035 alu
.op
= ALU_OP1_INT_TO_FLT
;
8039 alu
.src
[0].sel
= treg
;
8040 alu
.src
[0].chan
= i
;
8041 alu
.last
= (i
== 1) ? 1 : 0;
8042 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8046 for (j
= 0; j
< 2; j
++) {
8047 for (i
= 0; i
< 3; i
++) {
8048 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8049 alu
.op
= ALU_OP1_RECIP_IEEE
;
8050 alu
.src
[0].sel
= treg
;
8051 alu
.src
[0].chan
= j
;
8058 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8064 for (i
= 0; i
< 2; i
++) {
8065 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8066 alu
.op
= ALU_OP1_INT_TO_FLT
;
8070 alu
.src
[0].sel
= treg
;
8071 alu
.src
[0].chan
= i
;
8073 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8077 for (i
= 0; i
< 2; i
++) {
8078 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8079 alu
.op
= ALU_OP1_RECIP_IEEE
;
8080 alu
.src
[0].sel
= treg
;
8081 alu
.src
[0].chan
= i
;
8086 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8091 for (i
= 0; i
< 2; i
++) {
8092 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8093 alu
.op
= ALU_OP3_MULADD
;
8095 alu
.dst
.sel
= ctx
->temp_reg
;
8099 alu
.src
[0].sel
= treg
;
8100 alu
.src
[0].chan
= i
;
8101 alu
.src
[1].sel
= V_SQ_ALU_SRC_0_5
;
8104 alu
.src
[2].sel
= ctx
->temp_reg
;
8105 alu
.src
[2].chan
= i
;
8107 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[0], i
);
8108 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8114 src_gpr
= ctx
->temp_reg
;
8118 if (src_requires_loading
&& !src_loaded
) {
8119 for (i
= 0; i
< 4; i
++) {
8120 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8121 alu
.op
= ALU_OP1_MOV
;
8122 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
8123 alu
.dst
.sel
= ctx
->temp_reg
;
8128 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8133 src_gpr
= ctx
->temp_reg
;
8136 /* get offset values */
8137 if (inst
->Texture
.NumOffsets
) {
8138 assert(inst
->Texture
.NumOffsets
== 1);
8140 /* The texture offset feature doesn't work with the TXF instruction
8141 * and must be emulated by adding the offset to the texture coordinates. */
8142 if (txf_add_offsets
) {
8143 const struct tgsi_texture_offset
*off
= inst
->TexOffsets
;
8145 switch (inst
->Texture
.Texture
) {
8146 case TGSI_TEXTURE_3D
:
8147 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8148 alu
.op
= ALU_OP2_ADD_INT
;
8149 alu
.src
[0].sel
= src_gpr
;
8150 alu
.src
[0].chan
= 2;
8151 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8152 alu
.src
[1].value
= ctx
->literals
[4 * off
[0].Index
+ off
[0].SwizzleZ
];
8153 alu
.dst
.sel
= src_gpr
;
8157 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8162 case TGSI_TEXTURE_2D
:
8163 case TGSI_TEXTURE_SHADOW2D
:
8164 case TGSI_TEXTURE_RECT
:
8165 case TGSI_TEXTURE_SHADOWRECT
:
8166 case TGSI_TEXTURE_2D_ARRAY
:
8167 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
8168 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8169 alu
.op
= ALU_OP2_ADD_INT
;
8170 alu
.src
[0].sel
= src_gpr
;
8171 alu
.src
[0].chan
= 1;
8172 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8173 alu
.src
[1].value
= ctx
->literals
[4 * off
[0].Index
+ off
[0].SwizzleY
];
8174 alu
.dst
.sel
= src_gpr
;
8178 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8183 case TGSI_TEXTURE_1D
:
8184 case TGSI_TEXTURE_SHADOW1D
:
8185 case TGSI_TEXTURE_1D_ARRAY
:
8186 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
8187 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8188 alu
.op
= ALU_OP2_ADD_INT
;
8189 alu
.src
[0].sel
= src_gpr
;
8190 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8191 alu
.src
[1].value
= ctx
->literals
[4 * off
[0].Index
+ off
[0].SwizzleX
];
8192 alu
.dst
.sel
= src_gpr
;
8195 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8199 /* texture offsets do not apply to other texture targets */
8202 switch (inst
->Texture
.Texture
) {
8203 case TGSI_TEXTURE_3D
:
8204 offset_z
= ctx
->literals
[4 * inst
->TexOffsets
[0].Index
+ inst
->TexOffsets
[0].SwizzleZ
] << 1;
8206 case TGSI_TEXTURE_2D
:
8207 case TGSI_TEXTURE_SHADOW2D
:
8208 case TGSI_TEXTURE_RECT
:
8209 case TGSI_TEXTURE_SHADOWRECT
:
8210 case TGSI_TEXTURE_2D_ARRAY
:
8211 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
8212 offset_y
= ctx
->literals
[4 * inst
->TexOffsets
[0].Index
+ inst
->TexOffsets
[0].SwizzleY
] << 1;
8214 case TGSI_TEXTURE_1D
:
8215 case TGSI_TEXTURE_SHADOW1D
:
8216 case TGSI_TEXTURE_1D_ARRAY
:
8217 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
8218 offset_x
= ctx
->literals
[4 * inst
->TexOffsets
[0].Index
+ inst
->TexOffsets
[0].SwizzleX
] << 1;
8223 /* Obtain the sample index for reading a compressed MSAA color texture.
8224 * To read the FMASK, we use the ldfptr instruction, which tells us
8225 * where the samples are stored.
8226 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
8227 * which is the identity mapping. Each nibble says which physical sample
8228 * should be fetched to get that sample.
8230 * Assume src.z contains the sample index. It should be modified like this:
8231 * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
8232 * Then fetch the texel with src.
8234 if (read_compressed_msaa
) {
8235 unsigned sample_chan
= 3;
8236 unsigned temp
= r600_get_temp(ctx
);
8239 /* temp.w = ldfptr() */
8240 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
8241 tex
.op
= FETCH_OP_LD
;
8242 tex
.inst_mod
= 1; /* to indicate this is ldfptr */
8243 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
8244 tex
.sampler_index_mode
= sampler_index_mode
;
8245 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
8246 tex
.resource_index_mode
= sampler_index_mode
;
8247 tex
.src_gpr
= src_gpr
;
8249 tex
.dst_sel_x
= 7; /* mask out these components */
8252 tex
.dst_sel_w
= 0; /* store X */
8257 tex
.offset_x
= offset_x
;
8258 tex
.offset_y
= offset_y
;
8259 tex
.offset_z
= offset_z
;
8260 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
8264 /* temp.x = sample_index*4 */
8265 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8266 alu
.op
= ALU_OP2_MULLO_INT
;
8267 alu
.src
[0].sel
= src_gpr
;
8268 alu
.src
[0].chan
= sample_chan
;
8269 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8270 alu
.src
[1].value
= 4;
8274 r
= emit_mul_int_op(ctx
->bc
, &alu
);
8278 /* sample_index = temp.w >> temp.x */
8279 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8280 alu
.op
= ALU_OP2_LSHR_INT
;
8281 alu
.src
[0].sel
= temp
;
8282 alu
.src
[0].chan
= 3;
8283 alu
.src
[1].sel
= temp
;
8284 alu
.src
[1].chan
= 0;
8285 alu
.dst
.sel
= src_gpr
;
8286 alu
.dst
.chan
= sample_chan
;
8289 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8293 /* sample_index & 0xF */
8294 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8295 alu
.op
= ALU_OP2_AND_INT
;
8296 alu
.src
[0].sel
= src_gpr
;
8297 alu
.src
[0].chan
= sample_chan
;
8298 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8299 alu
.src
[1].value
= 0xF;
8300 alu
.dst
.sel
= src_gpr
;
8301 alu
.dst
.chan
= sample_chan
;
8304 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8308 /* visualize the FMASK */
8309 for (i
= 0; i
< 4; i
++) {
8310 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8311 alu
.op
= ALU_OP1_INT_TO_FLT
;
8312 alu
.src
[0].sel
= src_gpr
;
8313 alu
.src
[0].chan
= sample_chan
;
8314 alu
.dst
.sel
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
8318 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8326 /* does this shader want a num layers from TXQ for a cube array? */
8327 if (has_txq_cube_array_z
) {
8328 int id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
8330 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8331 alu
.op
= ALU_OP1_MOV
;
8333 alu
.src
[0].sel
= R600_SHADER_BUFFER_INFO_SEL
;
8334 if (ctx
->bc
->chip_class
>= EVERGREEN
) {
8335 /* with eg each dword is number of cubes */
8336 alu
.src
[0].sel
+= id
/ 4;
8337 alu
.src
[0].chan
= id
% 4;
8339 /* r600 we have them at channel 2 of the second dword */
8340 alu
.src
[0].sel
+= (id
* 2) + 1;
8341 alu
.src
[0].chan
= 2;
8343 alu
.src
[0].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
8344 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
8346 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8349 /* disable writemask from texture instruction */
8350 inst
->Dst
[0].Register
.WriteMask
&= ~4;
8353 opcode
= ctx
->inst_info
->op
;
8354 if (opcode
== FETCH_OP_GATHER4
&&
8355 inst
->TexOffsets
[0].File
!= TGSI_FILE_NULL
&&
8356 inst
->TexOffsets
[0].File
!= TGSI_FILE_IMMEDIATE
) {
8357 struct r600_bytecode_tex
*t
;
8358 opcode
= FETCH_OP_GATHER4_O
;
8360 /* GATHER4_O/GATHER4_C_O use offset values loaded by
8361 SET_TEXTURE_OFFSETS instruction. The immediate offset values
8362 encoded in the instruction are ignored. */
8363 t
= &grad_offs
[n_grad_offs
++];
8364 memset(t
, 0, sizeof(struct r600_bytecode_tex
));
8365 t
->op
= FETCH_OP_SET_TEXTURE_OFFSETS
;
8366 t
->sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
8367 t
->sampler_index_mode
= sampler_index_mode
;
8368 t
->resource_id
= t
->sampler_id
+ R600_MAX_CONST_BUFFERS
;
8369 t
->resource_index_mode
= sampler_index_mode
;
8371 t
->src_gpr
= ctx
->file_offset
[inst
->TexOffsets
[0].File
] + inst
->TexOffsets
[0].Index
;
8372 t
->src_sel_x
= inst
->TexOffsets
[0].SwizzleX
;
8373 t
->src_sel_y
= inst
->TexOffsets
[0].SwizzleY
;
8374 if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
||
8375 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
)
8376 /* make sure array index selector is 0, this is just a safety
8377 * precausion because TGSI seems to emit something strange here */
8380 t
->src_sel_z
= inst
->TexOffsets
[0].SwizzleZ
;
8390 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
||
8391 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
8392 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
||
8393 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
8394 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
||
8395 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
||
8396 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
8398 case FETCH_OP_SAMPLE
:
8399 opcode
= FETCH_OP_SAMPLE_C
;
8401 case FETCH_OP_SAMPLE_L
:
8402 opcode
= FETCH_OP_SAMPLE_C_L
;
8404 case FETCH_OP_SAMPLE_LB
:
8405 opcode
= FETCH_OP_SAMPLE_C_LB
;
8407 case FETCH_OP_SAMPLE_G
:
8408 opcode
= FETCH_OP_SAMPLE_C_G
;
8410 /* Texture gather variants */
8411 case FETCH_OP_GATHER4
:
8412 opcode
= FETCH_OP_GATHER4_C
;
8414 case FETCH_OP_GATHER4_O
:
8415 opcode
= FETCH_OP_GATHER4_C_O
;
8420 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
8423 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
8424 tex
.sampler_index_mode
= sampler_index_mode
;
8425 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
8426 tex
.resource_index_mode
= sampler_index_mode
;
8427 tex
.src_gpr
= src_gpr
;
8428 tex
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
8430 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_DDX_FINE
||
8431 inst
->Instruction
.Opcode
== TGSI_OPCODE_DDY_FINE
) {
8432 tex
.inst_mod
= 1; /* per pixel gradient calculation instead of per 2x2 quad */
8435 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
) {
8436 int8_t texture_component_select
= ctx
->literals
[4 * inst
->Src
[1].Register
.Index
+ inst
->Src
[1].Register
.SwizzleX
];
8437 tex
.inst_mod
= texture_component_select
;
8439 if (ctx
->bc
->chip_class
== CAYMAN
) {
8440 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
8441 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
8442 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7;
8443 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
8445 /* GATHER4 result order is different from TGSI TG4 */
8446 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 1 : 7;
8447 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 2 : 7;
8448 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 0 : 7;
8449 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
8452 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_LODQ
) {
8453 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
8454 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
8458 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQS
) {
8465 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
8466 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
8467 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7;
8468 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
8472 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQS
) {
8477 } else if (src_loaded
) {
8483 tex
.src_sel_x
= ctx
->src
[0].swizzle
[0];
8484 tex
.src_sel_y
= ctx
->src
[0].swizzle
[1];
8485 tex
.src_sel_z
= ctx
->src
[0].swizzle
[2];
8486 tex
.src_sel_w
= ctx
->src
[0].swizzle
[3];
8487 tex
.src_rel
= ctx
->src
[0].rel
;
8490 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
||
8491 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
8492 inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
8493 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
8497 tex
.src_sel_w
= 2; /* route Z compare or Lod value into W */
8500 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
&&
8501 inst
->Texture
.Texture
!= TGSI_TEXTURE_SHADOWRECT
) {
8502 tex
.coord_type_x
= 1;
8503 tex
.coord_type_y
= 1;
8505 tex
.coord_type_z
= 1;
8506 tex
.coord_type_w
= 1;
8508 tex
.offset_x
= offset_x
;
8509 tex
.offset_y
= offset_y
;
8510 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
&&
8511 (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
||
8512 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
)) {
8516 tex
.offset_z
= offset_z
;
8519 /* Put the depth for comparison in W.
8520 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
8521 * Some instructions expect the depth in Z. */
8522 if ((inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
||
8523 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
8524 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
||
8525 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
) &&
8526 opcode
!= FETCH_OP_SAMPLE_C_L
&&
8527 opcode
!= FETCH_OP_SAMPLE_C_LB
) {
8528 tex
.src_sel_w
= tex
.src_sel_z
;
8531 if (inst
->Texture
.Texture
== TGSI_TEXTURE_1D_ARRAY
||
8532 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
) {
8533 if (opcode
== FETCH_OP_SAMPLE_C_L
||
8534 opcode
== FETCH_OP_SAMPLE_C_LB
) {
8535 /* the array index is read from Y */
8536 tex
.coord_type_y
= 0;
8537 array_index_offset_channel
= tex
.src_sel_y
;
8539 /* the array index is read from Z */
8540 tex
.coord_type_z
= 0;
8541 tex
.src_sel_z
= tex
.src_sel_y
;
8542 array_index_offset_channel
= tex
.src_sel_z
;
8544 } else if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
||
8545 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
) {
8546 tex
.coord_type_z
= 0;
8547 array_index_offset_channel
= tex
.src_sel_z
;
8548 } else if ((inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
8549 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) &&
8550 (ctx
->bc
->chip_class
>= EVERGREEN
))
8551 /* the array index is read from Z, coordinate will be corrected elsewhere */
8552 tex
.coord_type_z
= 0;
8554 /* We have array access to 1D or 2D ARRAY, the coordinates are not int ->
8555 * evaluate the array index */
8556 if (array_index_offset_channel
>= 0 &&
8557 opcode
!= FETCH_OP_LD
&&
8558 opcode
!= FETCH_OP_GET_TEXTURE_RESINFO
) {
8559 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8560 alu
.src
[0].sel
= tex
.src_gpr
;
8561 alu
.src
[0].chan
= array_index_offset_channel
;
8562 alu
.src
[0].rel
= tex
.src_rel
;
8563 alu
.op
= ALU_OP1_RNDNE
;
8564 alu
.dst
.sel
= tex
.src_gpr
;
8565 alu
.dst
.chan
= array_index_offset_channel
;
8566 alu
.dst
.rel
= tex
.src_rel
;
8569 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8574 /* mask unused source components */
8575 if (opcode
== FETCH_OP_SAMPLE
|| opcode
== FETCH_OP_GATHER4
) {
8576 switch (inst
->Texture
.Texture
) {
8577 case TGSI_TEXTURE_2D
:
8578 case TGSI_TEXTURE_RECT
:
8582 case TGSI_TEXTURE_1D_ARRAY
:
8586 case TGSI_TEXTURE_1D
:
8594 /* Emit set gradient and offset instructions. */
8595 for (i
= 0; i
< n_grad_offs
; ++i
) {
8596 r
= r600_bytecode_add_tex(ctx
->bc
, &grad_offs
[i
]);
8601 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
8605 /* add shadow ambient support - gallium doesn't do it yet */
8609 static int find_hw_atomic_counter(struct r600_shader_ctx
*ctx
,
8610 struct tgsi_full_src_register
*src
)
8614 if (src
->Register
.Indirect
) {
8615 for (i
= 0; i
< ctx
->shader
->nhwatomic_ranges
; i
++) {
8616 if (src
->Indirect
.ArrayID
== ctx
->shader
->atomics
[i
].array_id
)
8617 return ctx
->shader
->atomics
[i
].hw_idx
;
8620 uint32_t index
= src
->Register
.Index
;
8621 for (i
= 0; i
< ctx
->shader
->nhwatomic_ranges
; i
++) {
8622 if (ctx
->shader
->atomics
[i
].buffer_id
!= (unsigned)src
->Dimension
.Index
)
8624 if (index
> ctx
->shader
->atomics
[i
].end
)
8626 if (index
< ctx
->shader
->atomics
[i
].start
)
8628 uint32_t offset
= (index
- ctx
->shader
->atomics
[i
].start
);
8629 return ctx
->shader
->atomics
[i
].hw_idx
+ offset
;
8636 static int tgsi_set_gds_temp(struct r600_shader_ctx
*ctx
,
8637 int *uav_id_p
, int *uav_index_mode_p
)
8639 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8640 int uav_id
, uav_index_mode
= 0;
8642 bool is_cm
= (ctx
->bc
->chip_class
== CAYMAN
);
8644 uav_id
= find_hw_atomic_counter(ctx
, &inst
->Src
[0]);
8646 if (inst
->Src
[0].Register
.Indirect
) {
8648 struct r600_bytecode_alu alu
;
8649 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8650 alu
.op
= ALU_OP2_LSHL_INT
;
8651 alu
.src
[0].sel
= get_address_file_reg(ctx
, inst
->Src
[0].Indirect
.Index
);
8652 alu
.src
[0].chan
= 0;
8653 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8654 alu
.src
[1].value
= 2;
8655 alu
.dst
.sel
= ctx
->temp_reg
;
8659 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8663 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
8666 V_SQ_ALU_SRC_LITERAL
, uav_id
* 4);
8672 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
8674 V_SQ_ALU_SRC_LITERAL
, uav_id
* 4,
8680 *uav_index_mode_p
= uav_index_mode
;
8684 static int tgsi_load_gds(struct r600_shader_ctx
*ctx
)
8686 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8688 struct r600_bytecode_gds gds
;
8690 int uav_index_mode
= 0;
8691 bool is_cm
= (ctx
->bc
->chip_class
== CAYMAN
);
8693 r
= tgsi_set_gds_temp(ctx
, &uav_id
, &uav_index_mode
);
8697 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
8698 gds
.op
= FETCH_OP_GDS_READ_RET
;
8699 gds
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
8700 gds
.uav_id
= is_cm
? 0 : uav_id
;
8701 gds
.uav_index_mode
= is_cm
? 0 : uav_index_mode
;
8702 gds
.src_gpr
= ctx
->temp_reg
;
8703 gds
.src_sel_x
= (is_cm
) ? 0 : 4;
8711 gds
.alloc_consume
= !is_cm
;
8712 r
= r600_bytecode_add_gds(ctx
->bc
, &gds
);
8716 ctx
->bc
->cf_last
->vpm
= 1;
8720 /* this fixes up 1D arrays properly */
8721 static int load_index_src(struct r600_shader_ctx
*ctx
, int src_index
, int *idx_gpr
)
8723 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8725 struct r600_bytecode_alu alu
;
8726 int temp_reg
= r600_get_temp(ctx
);
8728 for (i
= 0; i
< 4; i
++) {
8729 bool def_val
= true, write_zero
= false;
8730 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8731 alu
.op
= ALU_OP1_MOV
;
8732 alu
.dst
.sel
= temp_reg
;
8735 switch (inst
->Memory
.Texture
) {
8736 case TGSI_TEXTURE_BUFFER
:
8737 case TGSI_TEXTURE_1D
:
8738 if (i
== 1 || i
== 2 || i
== 3) {
8742 case TGSI_TEXTURE_1D_ARRAY
:
8743 if (i
== 1 || i
== 3)
8746 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[src_index
], 1);
8750 case TGSI_TEXTURE_2D
:
8751 if (i
== 2 || i
== 3)
8761 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
8762 alu
.src
[0].value
= 0;
8763 } else if (def_val
) {
8764 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[src_index
], i
);
8770 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8774 *idx_gpr
= temp_reg
;
8778 static int load_buffer_coord(struct r600_shader_ctx
*ctx
, int src_idx
,
8781 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8783 if (inst
->Src
[src_idx
].Register
.File
== TGSI_FILE_IMMEDIATE
) {
8784 int value
= (ctx
->literals
[4 * inst
->Src
[src_idx
].Register
.Index
+ inst
->Src
[src_idx
].Register
.SwizzleX
]);
8785 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
8787 V_SQ_ALU_SRC_LITERAL
, value
>> 2,
8792 struct r600_bytecode_alu alu
;
8793 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8794 alu
.op
= ALU_OP2_LSHR_INT
;
8795 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[src_idx
], 0);
8796 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
8797 alu
.src
[1].value
= 2;
8798 alu
.dst
.sel
= temp_reg
;
8801 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8808 static int tgsi_load_buffer(struct r600_shader_ctx
*ctx
)
8810 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8811 /* have to work out the offset into the RAT immediate return buffer */
8812 struct r600_bytecode_vtx vtx
;
8813 struct r600_bytecode_cf
*cf
;
8815 int temp_reg
= r600_get_temp(ctx
);
8816 unsigned rat_index_mode
;
8819 rat_index_mode
= inst
->Src
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8820 base
= R600_IMAGE_REAL_RESOURCE_OFFSET
+ ctx
->info
.file_count
[TGSI_FILE_IMAGE
];
8822 r
= load_buffer_coord(ctx
, 1, temp_reg
);
8825 ctx
->bc
->cf_last
->barrier
= 1;
8826 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
8827 vtx
.op
= FETCH_OP_VFETCH
;
8828 vtx
.buffer_id
= inst
->Src
[0].Register
.Index
+ base
;
8829 vtx
.buffer_index_mode
= rat_index_mode
;
8830 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
8831 vtx
.src_gpr
= temp_reg
;
8833 vtx
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
8834 vtx
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7; /* SEL_X */
8835 vtx
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7; /* SEL_Y */
8836 vtx
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7; /* SEL_Z */
8837 vtx
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7; /* SEL_W */
8838 vtx
.num_format_all
= 1;
8839 vtx
.format_comp_all
= 1;
8840 vtx
.srf_mode_all
= 0;
8842 if (inst
->Dst
[0].Register
.WriteMask
& 8) {
8843 vtx
.data_format
= FMT_32_32_32_32
;
8844 vtx
.use_const_fields
= 0;
8845 } else if (inst
->Dst
[0].Register
.WriteMask
& 4) {
8846 vtx
.data_format
= FMT_32_32_32
;
8847 vtx
.use_const_fields
= 0;
8848 } else if (inst
->Dst
[0].Register
.WriteMask
& 2) {
8849 vtx
.data_format
= FMT_32_32
;
8850 vtx
.use_const_fields
= 0;
8852 vtx
.data_format
= FMT_32
;
8853 vtx
.use_const_fields
= 0;
8856 r
= r600_bytecode_add_vtx_tc(ctx
->bc
, &vtx
);
8859 cf
= ctx
->bc
->cf_last
;
8864 static int tgsi_load_rat(struct r600_shader_ctx
*ctx
)
8866 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8867 /* have to work out the offset into the RAT immediate return buffer */
8868 struct r600_bytecode_vtx vtx
;
8869 struct r600_bytecode_cf
*cf
;
8872 unsigned format
, num_format
, format_comp
, endian
;
8873 const struct util_format_description
*desc
;
8874 unsigned rat_index_mode
;
8875 unsigned immed_base
;
8877 rat_index_mode
= inst
->Src
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8879 immed_base
= R600_IMAGE_IMMED_RESOURCE_OFFSET
;
8880 r
= load_index_src(ctx
, 1, &idx_gpr
);
8885 egcm_load_index_reg(ctx
->bc
, 1, false);
8887 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_MEM_RAT
);
8888 cf
= ctx
->bc
->cf_last
;
8890 cf
->rat
.id
= ctx
->shader
->rat_base
+ inst
->Src
[0].Register
.Index
;
8891 cf
->rat
.inst
= V_RAT_INST_NOP_RTN
;
8892 cf
->rat
.index_mode
= rat_index_mode
;
8893 cf
->output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND
;
8894 cf
->output
.gpr
= ctx
->thread_id_gpr
;
8895 cf
->output
.index_gpr
= idx_gpr
;
8896 cf
->output
.comp_mask
= 0xf;
8897 cf
->output
.burst_count
= 1;
8901 cf
->output
.elem_size
= 0;
8903 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_WAIT_ACK
);
8904 cf
= ctx
->bc
->cf_last
;
8907 desc
= util_format_description(inst
->Memory
.Format
);
8908 r600_vertex_data_type(inst
->Memory
.Format
,
8909 &format
, &num_format
, &format_comp
, &endian
);
8910 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
8911 vtx
.op
= FETCH_OP_VFETCH
;
8912 vtx
.buffer_id
= immed_base
+ inst
->Src
[0].Register
.Index
;
8913 vtx
.buffer_index_mode
= rat_index_mode
;
8914 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
8915 vtx
.src_gpr
= ctx
->thread_id_gpr
;
8917 vtx
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
8918 vtx
.dst_sel_x
= desc
->swizzle
[0];
8919 vtx
.dst_sel_y
= desc
->swizzle
[1];
8920 vtx
.dst_sel_z
= desc
->swizzle
[2];
8921 vtx
.dst_sel_w
= desc
->swizzle
[3];
8922 vtx
.srf_mode_all
= 1;
8923 vtx
.data_format
= format
;
8924 vtx
.num_format_all
= num_format
;
8925 vtx
.format_comp_all
= format_comp
;
8926 vtx
.endian
= endian
;
8928 vtx
.mega_fetch_count
= 3;
8929 r
= r600_bytecode_add_vtx_tc(ctx
->bc
, &vtx
);
8932 cf
= ctx
->bc
->cf_last
;
8937 static int tgsi_load_lds(struct r600_shader_ctx
*ctx
)
8939 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8940 struct r600_bytecode_alu alu
;
8942 int temp_reg
= r600_get_temp(ctx
);
8944 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8945 alu
.op
= ALU_OP1_MOV
;
8946 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
8947 alu
.dst
.sel
= temp_reg
;
8950 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
8954 r
= do_lds_fetch_values(ctx
, temp_reg
,
8955 ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
, inst
->Dst
[0].Register
.WriteMask
);
8961 static int tgsi_load(struct r600_shader_ctx
*ctx
)
8963 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8964 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
)
8965 return tgsi_load_rat(ctx
);
8966 if (inst
->Src
[0].Register
.File
== TGSI_FILE_HW_ATOMIC
)
8967 return tgsi_load_gds(ctx
);
8968 if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
)
8969 return tgsi_load_buffer(ctx
);
8970 if (inst
->Src
[0].Register
.File
== TGSI_FILE_MEMORY
)
8971 return tgsi_load_lds(ctx
);
8975 static int tgsi_store_buffer_rat(struct r600_shader_ctx
*ctx
)
8977 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
8978 struct r600_bytecode_cf
*cf
;
8980 unsigned rat_index_mode
;
8982 int temp_reg
= r600_get_temp(ctx
), treg2
= r600_get_temp(ctx
);
8984 r
= load_buffer_coord(ctx
, 0, treg2
);
8988 rat_index_mode
= inst
->Dst
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8990 egcm_load_index_reg(ctx
->bc
, 1, false);
8992 for (i
= 0; i
<= 3; i
++) {
8993 struct r600_bytecode_alu alu
;
8994 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
8995 alu
.op
= ALU_OP1_MOV
;
8996 alu
.dst
.sel
= temp_reg
;
8998 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
8999 alu
.last
= (i
== 3);
9001 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9006 lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
9007 for (i
= 0; i
<= lasti
; i
++) {
9008 struct r600_bytecode_alu alu
;
9009 if (!((1 << i
) & inst
->Dst
[0].Register
.WriteMask
))
9012 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
9015 V_SQ_ALU_SRC_LITERAL
, i
);
9019 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9020 alu
.op
= ALU_OP1_MOV
;
9021 alu
.dst
.sel
= ctx
->temp_reg
;
9024 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
9027 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9031 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_MEM_RAT
);
9032 cf
= ctx
->bc
->cf_last
;
9034 cf
->rat
.id
= ctx
->shader
->rat_base
+ inst
->Dst
[0].Register
.Index
+ ctx
->info
.file_count
[TGSI_FILE_IMAGE
];
9035 cf
->rat
.inst
= V_RAT_INST_STORE_TYPED
;
9036 cf
->rat
.index_mode
= rat_index_mode
;
9037 cf
->output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND
;
9038 cf
->output
.gpr
= ctx
->temp_reg
;
9039 cf
->output
.index_gpr
= temp_reg
;
9040 cf
->output
.comp_mask
= 1;
9041 cf
->output
.burst_count
= 1;
9044 cf
->output
.elem_size
= 0;
9049 static int tgsi_store_rat(struct r600_shader_ctx
*ctx
)
9051 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9052 struct r600_bytecode_cf
*cf
;
9053 bool src_requires_loading
= false;
9054 int val_gpr
, idx_gpr
;
9056 unsigned rat_index_mode
;
9058 rat_index_mode
= inst
->Dst
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
9060 r
= load_index_src(ctx
, 0, &idx_gpr
);
9064 if (inst
->Src
[1].Register
.File
!= TGSI_FILE_TEMPORARY
)
9065 src_requires_loading
= true;
9067 if (src_requires_loading
) {
9068 struct r600_bytecode_alu alu
;
9069 for (i
= 0; i
< 4; i
++) {
9070 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9071 alu
.op
= ALU_OP1_MOV
;
9072 alu
.dst
.sel
= ctx
->temp_reg
;
9075 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
9079 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9083 val_gpr
= ctx
->temp_reg
;
9085 val_gpr
= tgsi_tex_get_src_gpr(ctx
, 1);
9087 egcm_load_index_reg(ctx
->bc
, 1, false);
9089 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_MEM_RAT
);
9090 cf
= ctx
->bc
->cf_last
;
9092 cf
->rat
.id
= ctx
->shader
->rat_base
+ inst
->Dst
[0].Register
.Index
;
9093 cf
->rat
.inst
= V_RAT_INST_STORE_TYPED
;
9094 cf
->rat
.index_mode
= rat_index_mode
;
9095 cf
->output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND
;
9096 cf
->output
.gpr
= val_gpr
;
9097 cf
->output
.index_gpr
= idx_gpr
;
9098 cf
->output
.comp_mask
= 0xf;
9099 cf
->output
.burst_count
= 1;
9102 cf
->output
.elem_size
= 0;
9106 static int tgsi_store_lds(struct r600_shader_ctx
*ctx
)
9108 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9109 struct r600_bytecode_alu alu
;
9111 int write_mask
= inst
->Dst
[0].Register
.WriteMask
;
9112 int temp_reg
= r600_get_temp(ctx
);
9115 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9116 alu
.op
= ALU_OP1_MOV
;
9117 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9118 alu
.dst
.sel
= temp_reg
;
9121 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9125 lasti
= tgsi_last_instruction(write_mask
);
9126 for (i
= 1; i
<= lasti
; i
++) {
9127 if (!(write_mask
& (1 << i
)))
9129 r
= single_alu_op2(ctx
, ALU_OP2_ADD_INT
,
9132 V_SQ_ALU_SRC_LITERAL
, 4 * i
);
9136 for (i
= 0; i
<= lasti
; i
++) {
9137 if (!(write_mask
& (1 << i
)))
9140 if ((i
== 0 && ((write_mask
& 3) == 3)) ||
9141 (i
== 2 && ((write_mask
& 0xc) == 0xc))) {
9142 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9143 alu
.op
= LDS_OP3_LDS_WRITE_REL
;
9145 alu
.src
[0].sel
= temp_reg
;
9146 alu
.src
[0].chan
= i
;
9147 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
9148 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[1], i
+ 1);
9150 alu
.is_lds_idx_op
= true;
9152 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9158 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9159 alu
.op
= LDS_OP2_LDS_WRITE
;
9161 alu
.src
[0].sel
= temp_reg
;
9162 alu
.src
[0].chan
= i
;
9163 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
9166 alu
.is_lds_idx_op
= true;
9168 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9175 static int tgsi_store(struct r600_shader_ctx
*ctx
)
9177 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9178 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_BUFFER
)
9179 return tgsi_store_buffer_rat(ctx
);
9180 else if (inst
->Dst
[0].Register
.File
== TGSI_FILE_MEMORY
)
9181 return tgsi_store_lds(ctx
);
9183 return tgsi_store_rat(ctx
);
9186 static int tgsi_atomic_op_rat(struct r600_shader_ctx
*ctx
)
9188 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9189 /* have to work out the offset into the RAT immediate return buffer */
9190 struct r600_bytecode_alu alu
;
9191 struct r600_bytecode_vtx vtx
;
9192 struct r600_bytecode_cf
*cf
;
9195 unsigned format
, num_format
, format_comp
, endian
;
9196 const struct util_format_description
*desc
;
9197 unsigned rat_index_mode
;
9198 unsigned immed_base
;
9201 immed_base
= R600_IMAGE_IMMED_RESOURCE_OFFSET
;
9202 rat_base
= ctx
->shader
->rat_base
;
9204 if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
) {
9205 immed_base
+= ctx
->info
.file_count
[TGSI_FILE_IMAGE
];
9206 rat_base
+= ctx
->info
.file_count
[TGSI_FILE_IMAGE
];
9208 r
= load_buffer_coord(ctx
, 1, ctx
->temp_reg
);
9211 idx_gpr
= ctx
->temp_reg
;
9213 r
= load_index_src(ctx
, 1, &idx_gpr
);
9218 rat_index_mode
= inst
->Src
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
9220 if (ctx
->inst_info
->op
== V_RAT_INST_CMPXCHG_INT_RTN
) {
9221 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9222 alu
.op
= ALU_OP1_MOV
;
9223 alu
.dst
.sel
= ctx
->thread_id_gpr
;
9226 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[3], 0);
9228 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9232 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9233 alu
.op
= ALU_OP1_MOV
;
9234 alu
.dst
.sel
= ctx
->thread_id_gpr
;
9235 if (ctx
->bc
->chip_class
== CAYMAN
)
9240 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[2], 0);
9242 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9246 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9247 alu
.op
= ALU_OP1_MOV
;
9248 alu
.dst
.sel
= ctx
->thread_id_gpr
;
9251 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[2], 0);
9253 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9259 egcm_load_index_reg(ctx
->bc
, 1, false);
9260 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_MEM_RAT
);
9261 cf
= ctx
->bc
->cf_last
;
9263 cf
->rat
.id
= rat_base
+ inst
->Src
[0].Register
.Index
;
9264 cf
->rat
.inst
= ctx
->inst_info
->op
;
9265 cf
->rat
.index_mode
= rat_index_mode
;
9266 cf
->output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND
;
9267 cf
->output
.gpr
= ctx
->thread_id_gpr
;
9268 cf
->output
.index_gpr
= idx_gpr
;
9269 cf
->output
.comp_mask
= 0xf;
9270 cf
->output
.burst_count
= 1;
9274 cf
->output
.elem_size
= 0;
9275 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_WAIT_ACK
);
9276 cf
= ctx
->bc
->cf_last
;
9280 memset(&vtx
, 0, sizeof(struct r600_bytecode_vtx
));
9281 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
) {
9282 desc
= util_format_description(inst
->Memory
.Format
);
9283 r600_vertex_data_type(inst
->Memory
.Format
,
9284 &format
, &num_format
, &format_comp
, &endian
);
9285 vtx
.dst_sel_x
= desc
->swizzle
[0];
9293 vtx
.op
= FETCH_OP_VFETCH
;
9294 vtx
.buffer_id
= immed_base
+ inst
->Src
[0].Register
.Index
;
9295 vtx
.buffer_index_mode
= rat_index_mode
;
9296 vtx
.fetch_type
= SQ_VTX_FETCH_NO_INDEX_OFFSET
;
9297 vtx
.src_gpr
= ctx
->thread_id_gpr
;
9299 vtx
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
9303 vtx
.use_const_fields
= 0;
9304 vtx
.srf_mode_all
= 1;
9305 vtx
.data_format
= format
;
9306 vtx
.num_format_all
= num_format
;
9307 vtx
.format_comp_all
= format_comp
;
9308 vtx
.endian
= endian
;
9310 vtx
.mega_fetch_count
= 0xf;
9311 r
= r600_bytecode_add_vtx_tc(ctx
->bc
, &vtx
);
9314 cf
= ctx
->bc
->cf_last
;
9320 static int get_gds_op(int opcode
)
9323 case TGSI_OPCODE_ATOMUADD
:
9324 return FETCH_OP_GDS_ADD_RET
;
9325 case TGSI_OPCODE_ATOMAND
:
9326 return FETCH_OP_GDS_AND_RET
;
9327 case TGSI_OPCODE_ATOMOR
:
9328 return FETCH_OP_GDS_OR_RET
;
9329 case TGSI_OPCODE_ATOMXOR
:
9330 return FETCH_OP_GDS_XOR_RET
;
9331 case TGSI_OPCODE_ATOMUMIN
:
9332 return FETCH_OP_GDS_MIN_UINT_RET
;
9333 case TGSI_OPCODE_ATOMUMAX
:
9334 return FETCH_OP_GDS_MAX_UINT_RET
;
9335 case TGSI_OPCODE_ATOMXCHG
:
9336 return FETCH_OP_GDS_XCHG_RET
;
9337 case TGSI_OPCODE_ATOMCAS
:
9338 return FETCH_OP_GDS_CMP_XCHG_RET
;
9344 static int tgsi_atomic_op_gds(struct r600_shader_ctx
*ctx
)
9346 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9347 struct r600_bytecode_gds gds
;
9348 struct r600_bytecode_alu alu
;
9349 int gds_op
= get_gds_op(inst
->Instruction
.Opcode
);
9352 int uav_index_mode
= 0;
9353 bool is_cm
= (ctx
->bc
->chip_class
== CAYMAN
);
9356 fprintf(stderr
, "unknown GDS op for opcode %d\n", inst
->Instruction
.Opcode
);
9360 r
= tgsi_set_gds_temp(ctx
, &uav_id
, &uav_index_mode
);
9364 if (gds_op
== FETCH_OP_GDS_CMP_XCHG_RET
) {
9365 if (inst
->Src
[3].Register
.File
== TGSI_FILE_IMMEDIATE
) {
9366 int value
= (ctx
->literals
[4 * inst
->Src
[3].Register
.Index
+ inst
->Src
[3].Register
.SwizzleX
]);
9367 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9368 alu
.op
= ALU_OP1_MOV
;
9369 alu
.dst
.sel
= ctx
->temp_reg
;
9370 alu
.dst
.chan
= is_cm
? 2 : 1;
9371 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
9372 alu
.src
[0].value
= value
;
9375 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9379 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9380 alu
.op
= ALU_OP1_MOV
;
9381 alu
.dst
.sel
= ctx
->temp_reg
;
9382 alu
.dst
.chan
= is_cm
? 2 : 1;
9383 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[3], 0);
9386 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9391 if (inst
->Src
[2].Register
.File
== TGSI_FILE_IMMEDIATE
) {
9392 int value
= (ctx
->literals
[4 * inst
->Src
[2].Register
.Index
+ inst
->Src
[2].Register
.SwizzleX
]);
9393 int abs_value
= abs(value
);
9394 if (abs_value
!= value
&& gds_op
== FETCH_OP_GDS_ADD_RET
)
9395 gds_op
= FETCH_OP_GDS_SUB_RET
;
9396 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9397 alu
.op
= ALU_OP1_MOV
;
9398 alu
.dst
.sel
= ctx
->temp_reg
;
9399 alu
.dst
.chan
= is_cm
? 1 : 0;
9400 alu
.src
[0].sel
= V_SQ_ALU_SRC_LITERAL
;
9401 alu
.src
[0].value
= abs_value
;
9404 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9408 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9409 alu
.op
= ALU_OP1_MOV
;
9410 alu
.dst
.sel
= ctx
->temp_reg
;
9411 alu
.dst
.chan
= is_cm
? 1 : 0;
9412 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[2], 0);
9415 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9421 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
9423 gds
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
9424 gds
.uav_id
= is_cm
? 0 : uav_id
;
9425 gds
.uav_index_mode
= is_cm
? 0 : uav_index_mode
;
9426 gds
.src_gpr
= ctx
->temp_reg
;
9428 gds
.src_sel_x
= is_cm
? 0 : 4;
9429 gds
.src_sel_y
= is_cm
? 1 : 0;
9430 if (gds_op
== FETCH_OP_GDS_CMP_XCHG_RET
)
9431 gds
.src_sel_z
= is_cm
? 2 : 1;
9438 gds
.alloc_consume
= !is_cm
;
9440 r
= r600_bytecode_add_gds(ctx
->bc
, &gds
);
9443 ctx
->bc
->cf_last
->vpm
= 1;
9447 static int get_lds_op(int opcode
)
9450 case TGSI_OPCODE_ATOMUADD
:
9451 return LDS_OP2_LDS_ADD_RET
;
9452 case TGSI_OPCODE_ATOMAND
:
9453 return LDS_OP2_LDS_AND_RET
;
9454 case TGSI_OPCODE_ATOMOR
:
9455 return LDS_OP2_LDS_OR_RET
;
9456 case TGSI_OPCODE_ATOMXOR
:
9457 return LDS_OP2_LDS_XOR_RET
;
9458 case TGSI_OPCODE_ATOMUMIN
:
9459 return LDS_OP2_LDS_MIN_UINT_RET
;
9460 case TGSI_OPCODE_ATOMUMAX
:
9461 return LDS_OP2_LDS_MAX_UINT_RET
;
9462 case TGSI_OPCODE_ATOMIMIN
:
9463 return LDS_OP2_LDS_MIN_INT_RET
;
9464 case TGSI_OPCODE_ATOMIMAX
:
9465 return LDS_OP2_LDS_MAX_INT_RET
;
9466 case TGSI_OPCODE_ATOMXCHG
:
9467 return LDS_OP2_LDS_XCHG_RET
;
9468 case TGSI_OPCODE_ATOMCAS
:
9469 return LDS_OP3_LDS_CMP_XCHG_RET
;
9475 static int tgsi_atomic_op_lds(struct r600_shader_ctx
*ctx
)
9477 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9478 int lds_op
= get_lds_op(inst
->Instruction
.Opcode
);
9481 struct r600_bytecode_alu alu
;
9482 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9484 alu
.is_lds_idx_op
= true;
9486 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], 0);
9487 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], 0);
9488 if (lds_op
== LDS_OP3_LDS_CMP_XCHG_RET
)
9489 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[3], 0);
9491 alu
.src
[2].sel
= V_SQ_ALU_SRC_0
;
9492 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9496 /* then read from LDS_OQ_A_POP */
9497 memset(&alu
, 0, sizeof(alu
));
9499 alu
.op
= ALU_OP1_MOV
;
9500 alu
.src
[0].sel
= EG_V_SQ_ALU_SRC_LDS_OQ_A_POP
;
9501 alu
.src
[0].chan
= 0;
9502 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
9505 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9512 static int tgsi_atomic_op(struct r600_shader_ctx
*ctx
)
9514 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9515 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
)
9516 return tgsi_atomic_op_rat(ctx
);
9517 if (inst
->Src
[0].Register
.File
== TGSI_FILE_HW_ATOMIC
)
9518 return tgsi_atomic_op_gds(ctx
);
9519 if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
)
9520 return tgsi_atomic_op_rat(ctx
);
9521 if (inst
->Src
[0].Register
.File
== TGSI_FILE_MEMORY
)
9522 return tgsi_atomic_op_lds(ctx
);
9526 static int tgsi_resq(struct r600_shader_ctx
*ctx
)
9528 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9529 unsigned sampler_index_mode
;
9530 struct r600_bytecode_tex tex
;
9532 boolean has_txq_cube_array_z
= false;
9534 if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
||
9535 (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
&& inst
->Memory
.Texture
== TGSI_TEXTURE_BUFFER
)) {
9536 if (ctx
->bc
->chip_class
< EVERGREEN
)
9537 ctx
->shader
->uses_tex_buffers
= true;
9538 unsigned eg_buffer_base
= 0;
9539 eg_buffer_base
= R600_IMAGE_REAL_RESOURCE_OFFSET
;
9540 if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
)
9541 eg_buffer_base
+= ctx
->info
.file_count
[TGSI_FILE_IMAGE
];
9542 return r600_do_buffer_txq(ctx
, 0, ctx
->shader
->image_size_const_offset
, eg_buffer_base
);
9545 if (inst
->Memory
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
&&
9546 inst
->Dst
[0].Register
.WriteMask
& 4) {
9547 ctx
->shader
->has_txq_cube_array_z_comp
= true;
9548 has_txq_cube_array_z
= true;
9551 sampler_index_mode
= inst
->Src
[0].Indirect
.Index
== 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
9552 if (sampler_index_mode
)
9553 egcm_load_index_reg(ctx
->bc
, 1, false);
9556 /* does this shader want a num layers from TXQ for a cube array? */
9557 if (has_txq_cube_array_z
) {
9558 int id
= tgsi_tex_get_src_gpr(ctx
, 0) + ctx
->shader
->image_size_const_offset
;
9559 struct r600_bytecode_alu alu
;
9561 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9562 alu
.op
= ALU_OP1_MOV
;
9564 alu
.src
[0].sel
= R600_SHADER_BUFFER_INFO_SEL
;
9565 /* with eg each dword is either number of cubes */
9566 alu
.src
[0].sel
+= id
/ 4;
9567 alu
.src
[0].chan
= id
% 4;
9568 alu
.src
[0].kc_bank
= R600_BUFFER_INFO_CONST_BUFFER
;
9569 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
9571 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9574 /* disable writemask from texture instruction */
9575 inst
->Dst
[0].Register
.WriteMask
&= ~4;
9577 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
9578 tex
.op
= ctx
->inst_info
->op
;
9579 tex
.sampler_id
= R600_IMAGE_REAL_RESOURCE_OFFSET
+ inst
->Src
[0].Register
.Index
;
9580 tex
.sampler_index_mode
= sampler_index_mode
;
9581 tex
.resource_id
= tex
.sampler_id
;
9582 tex
.resource_index_mode
= sampler_index_mode
;
9587 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
9588 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
9589 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7;
9590 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
9591 tex
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
9592 r
= r600_bytecode_add_tex(ctx
->bc
, &tex
);
9599 static int tgsi_lrp(struct r600_shader_ctx
*ctx
)
9601 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9602 struct r600_bytecode_alu alu
;
9603 unsigned lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
9604 struct r600_bytecode_alu_src srcs
[2][4];
9608 /* optimize if it's just an equal balance */
9609 if (ctx
->src
[0].sel
== V_SQ_ALU_SRC_0_5
) {
9610 for (i
= 0; i
< lasti
+ 1; i
++) {
9611 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9614 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9615 alu
.op
= ALU_OP2_ADD
;
9616 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[1], i
);
9617 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
9619 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
9624 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9632 for (i
= 0; i
< lasti
+ 1; i
++) {
9633 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9636 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9637 alu
.op
= ALU_OP2_ADD
;
9638 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
9639 alu
.src
[0].chan
= 0;
9640 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[0], i
);
9641 r600_bytecode_src_toggle_neg(&alu
.src
[1]);
9642 alu
.dst
.sel
= ctx
->temp_reg
;
9648 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9653 /* (1 - src0) * src2 */
9654 for (i
= 0; i
< lasti
+ 1; i
++) {
9655 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9658 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9659 alu
.op
= ALU_OP2_MUL
;
9660 alu
.src
[0].sel
= ctx
->temp_reg
;
9661 alu
.src
[0].chan
= i
;
9662 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
9663 alu
.dst
.sel
= ctx
->temp_reg
;
9669 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9674 /* src0 * src1 + (1 - src0) * src2 */
9676 for (i
= 0; i
< 2; i
++) {
9677 r
= tgsi_make_src_for_op3(ctx
, inst
->Dst
[0].Register
.WriteMask
,
9678 srcs
[i
], &ctx
->src
[i
]);
9683 for (i
= 0; i
< lasti
+ 1; i
++) {
9684 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9687 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9688 alu
.op
= ALU_OP3_MULADD
;
9690 alu
.src
[0] = srcs
[0][i
];
9691 alu
.src
[1] = srcs
[1][i
];
9692 alu
.src
[2].sel
= ctx
->temp_reg
;
9693 alu
.src
[2].chan
= i
;
9695 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
9700 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9707 static int tgsi_cmp(struct r600_shader_ctx
*ctx
)
9709 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9710 struct r600_bytecode_alu alu
;
9712 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
9713 struct r600_bytecode_alu_src srcs
[3][4];
9717 if (ctx
->src
[0].abs
&& ctx
->src
[0].neg
) {
9719 ctx
->src
[0].abs
= 0;
9720 ctx
->src
[0].neg
= 0;
9725 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
9726 r
= tgsi_make_src_for_op3(ctx
, inst
->Dst
[0].Register
.WriteMask
,
9727 srcs
[j
], &ctx
->src
[j
]);
9732 for (i
= 0; i
< lasti
+ 1; i
++) {
9733 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9736 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9738 alu
.src
[0] = srcs
[0][i
];
9739 alu
.src
[1] = srcs
[2][i
];
9740 alu
.src
[2] = srcs
[1][i
];
9742 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
9748 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9755 static int tgsi_ucmp(struct r600_shader_ctx
*ctx
)
9757 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9758 struct r600_bytecode_alu alu
;
9760 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
9762 for (i
= 0; i
< lasti
+ 1; i
++) {
9763 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
9766 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9767 alu
.op
= ALU_OP3_CNDE_INT
;
9768 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
9769 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
9770 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[1], i
);
9771 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
9777 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9784 static int tgsi_exp(struct r600_shader_ctx
*ctx
)
9786 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9787 struct r600_bytecode_alu alu
;
9791 /* result.x = 2^floor(src); */
9792 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
9793 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9795 alu
.op
= ALU_OP1_FLOOR
;
9796 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9798 alu
.dst
.sel
= ctx
->temp_reg
;
9802 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9806 if (ctx
->bc
->chip_class
== CAYMAN
) {
9807 for (i
= 0; i
< 3; i
++) {
9808 alu
.op
= ALU_OP1_EXP_IEEE
;
9809 alu
.src
[0].sel
= ctx
->temp_reg
;
9810 alu
.src
[0].chan
= 0;
9812 alu
.dst
.sel
= ctx
->temp_reg
;
9814 alu
.dst
.write
= i
== 0;
9816 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9821 alu
.op
= ALU_OP1_EXP_IEEE
;
9822 alu
.src
[0].sel
= ctx
->temp_reg
;
9823 alu
.src
[0].chan
= 0;
9825 alu
.dst
.sel
= ctx
->temp_reg
;
9829 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9835 /* result.y = tmp - floor(tmp); */
9836 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
9837 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9839 alu
.op
= ALU_OP1_FRACT
;
9840 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9842 alu
.dst
.sel
= ctx
->temp_reg
;
9844 r
= tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
9853 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9858 /* result.z = RoughApprox2ToX(tmp);*/
9859 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 0x1) {
9860 if (ctx
->bc
->chip_class
== CAYMAN
) {
9861 for (i
= 0; i
< 3; i
++) {
9862 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9863 alu
.op
= ALU_OP1_EXP_IEEE
;
9864 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9866 alu
.dst
.sel
= ctx
->temp_reg
;
9873 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9878 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9879 alu
.op
= ALU_OP1_EXP_IEEE
;
9880 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9882 alu
.dst
.sel
= ctx
->temp_reg
;
9888 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9894 /* result.w = 1.0;*/
9895 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 0x1) {
9896 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9898 alu
.op
= ALU_OP1_MOV
;
9899 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
9900 alu
.src
[0].chan
= 0;
9902 alu
.dst
.sel
= ctx
->temp_reg
;
9906 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9910 return tgsi_helper_copy(ctx
, inst
);
9913 static int tgsi_log(struct r600_shader_ctx
*ctx
)
9915 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
9916 struct r600_bytecode_alu alu
;
9920 /* result.x = floor(log2(|src|)); */
9921 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
9922 if (ctx
->bc
->chip_class
== CAYMAN
) {
9923 for (i
= 0; i
< 3; i
++) {
9924 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9926 alu
.op
= ALU_OP1_LOG_IEEE
;
9927 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9928 r600_bytecode_src_set_abs(&alu
.src
[0]);
9930 alu
.dst
.sel
= ctx
->temp_reg
;
9936 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9942 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9944 alu
.op
= ALU_OP1_LOG_IEEE
;
9945 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9946 r600_bytecode_src_set_abs(&alu
.src
[0]);
9948 alu
.dst
.sel
= ctx
->temp_reg
;
9952 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9957 alu
.op
= ALU_OP1_FLOOR
;
9958 alu
.src
[0].sel
= ctx
->temp_reg
;
9959 alu
.src
[0].chan
= 0;
9961 alu
.dst
.sel
= ctx
->temp_reg
;
9966 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9971 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
9972 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
9974 if (ctx
->bc
->chip_class
== CAYMAN
) {
9975 for (i
= 0; i
< 3; i
++) {
9976 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9978 alu
.op
= ALU_OP1_LOG_IEEE
;
9979 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9980 r600_bytecode_src_set_abs(&alu
.src
[0]);
9982 alu
.dst
.sel
= ctx
->temp_reg
;
9989 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
9994 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
9996 alu
.op
= ALU_OP1_LOG_IEEE
;
9997 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
9998 r600_bytecode_src_set_abs(&alu
.src
[0]);
10000 alu
.dst
.sel
= ctx
->temp_reg
;
10005 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10010 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10012 alu
.op
= ALU_OP1_FLOOR
;
10013 alu
.src
[0].sel
= ctx
->temp_reg
;
10014 alu
.src
[0].chan
= 1;
10016 alu
.dst
.sel
= ctx
->temp_reg
;
10021 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10025 if (ctx
->bc
->chip_class
== CAYMAN
) {
10026 for (i
= 0; i
< 3; i
++) {
10027 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10028 alu
.op
= ALU_OP1_EXP_IEEE
;
10029 alu
.src
[0].sel
= ctx
->temp_reg
;
10030 alu
.src
[0].chan
= 1;
10032 alu
.dst
.sel
= ctx
->temp_reg
;
10039 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10044 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10045 alu
.op
= ALU_OP1_EXP_IEEE
;
10046 alu
.src
[0].sel
= ctx
->temp_reg
;
10047 alu
.src
[0].chan
= 1;
10049 alu
.dst
.sel
= ctx
->temp_reg
;
10054 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10059 if (ctx
->bc
->chip_class
== CAYMAN
) {
10060 for (i
= 0; i
< 3; i
++) {
10061 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10062 alu
.op
= ALU_OP1_RECIP_IEEE
;
10063 alu
.src
[0].sel
= ctx
->temp_reg
;
10064 alu
.src
[0].chan
= 1;
10066 alu
.dst
.sel
= ctx
->temp_reg
;
10073 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10078 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10079 alu
.op
= ALU_OP1_RECIP_IEEE
;
10080 alu
.src
[0].sel
= ctx
->temp_reg
;
10081 alu
.src
[0].chan
= 1;
10083 alu
.dst
.sel
= ctx
->temp_reg
;
10088 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10093 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10095 alu
.op
= ALU_OP2_MUL
;
10097 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10098 r600_bytecode_src_set_abs(&alu
.src
[0]);
10100 alu
.src
[1].sel
= ctx
->temp_reg
;
10101 alu
.src
[1].chan
= 1;
10103 alu
.dst
.sel
= ctx
->temp_reg
;
10108 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10113 /* result.z = log2(|src|);*/
10114 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 1) {
10115 if (ctx
->bc
->chip_class
== CAYMAN
) {
10116 for (i
= 0; i
< 3; i
++) {
10117 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10119 alu
.op
= ALU_OP1_LOG_IEEE
;
10120 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10121 r600_bytecode_src_set_abs(&alu
.src
[0]);
10123 alu
.dst
.sel
= ctx
->temp_reg
;
10130 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10135 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10137 alu
.op
= ALU_OP1_LOG_IEEE
;
10138 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10139 r600_bytecode_src_set_abs(&alu
.src
[0]);
10141 alu
.dst
.sel
= ctx
->temp_reg
;
10146 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10152 /* result.w = 1.0; */
10153 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 1) {
10154 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10156 alu
.op
= ALU_OP1_MOV
;
10157 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
10158 alu
.src
[0].chan
= 0;
10160 alu
.dst
.sel
= ctx
->temp_reg
;
10165 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10170 return tgsi_helper_copy(ctx
, inst
);
10173 static int tgsi_eg_arl(struct r600_shader_ctx
*ctx
)
10175 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10176 struct r600_bytecode_alu alu
;
10178 int i
, lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10179 unsigned reg
= get_address_file_reg(ctx
, inst
->Dst
[0].Register
.Index
);
10181 assert(inst
->Dst
[0].Register
.Index
< 3);
10182 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10184 switch (inst
->Instruction
.Opcode
) {
10185 case TGSI_OPCODE_ARL
:
10186 alu
.op
= ALU_OP1_FLT_TO_INT_FLOOR
;
10188 case TGSI_OPCODE_ARR
:
10189 alu
.op
= ALU_OP1_FLT_TO_INT
;
10191 case TGSI_OPCODE_UARL
:
10192 alu
.op
= ALU_OP1_MOV
;
10199 for (i
= 0; i
<= lasti
; ++i
) {
10200 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
10202 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
10203 alu
.last
= i
== lasti
;
10207 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10212 if (inst
->Dst
[0].Register
.Index
> 0)
10213 ctx
->bc
->index_loaded
[inst
->Dst
[0].Register
.Index
- 1] = 0;
10215 ctx
->bc
->ar_loaded
= 0;
10219 static int tgsi_r600_arl(struct r600_shader_ctx
*ctx
)
10221 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10222 struct r600_bytecode_alu alu
;
10224 int i
, lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10226 switch (inst
->Instruction
.Opcode
) {
10227 case TGSI_OPCODE_ARL
:
10228 memset(&alu
, 0, sizeof(alu
));
10229 alu
.op
= ALU_OP1_FLOOR
;
10230 alu
.dst
.sel
= ctx
->bc
->ar_reg
;
10232 for (i
= 0; i
<= lasti
; ++i
) {
10233 if (inst
->Dst
[0].Register
.WriteMask
& (1 << i
)) {
10235 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
10236 alu
.last
= i
== lasti
;
10237 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
10242 memset(&alu
, 0, sizeof(alu
));
10243 alu
.op
= ALU_OP1_FLT_TO_INT
;
10244 alu
.src
[0].sel
= ctx
->bc
->ar_reg
;
10245 alu
.dst
.sel
= ctx
->bc
->ar_reg
;
10247 /* FLT_TO_INT is trans-only on r600/r700 */
10249 for (i
= 0; i
<= lasti
; ++i
) {
10251 alu
.src
[0].chan
= i
;
10252 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
10256 case TGSI_OPCODE_ARR
:
10257 memset(&alu
, 0, sizeof(alu
));
10258 alu
.op
= ALU_OP1_FLT_TO_INT
;
10259 alu
.dst
.sel
= ctx
->bc
->ar_reg
;
10261 /* FLT_TO_INT is trans-only on r600/r700 */
10263 for (i
= 0; i
<= lasti
; ++i
) {
10264 if (inst
->Dst
[0].Register
.WriteMask
& (1 << i
)) {
10266 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
10267 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
10272 case TGSI_OPCODE_UARL
:
10273 memset(&alu
, 0, sizeof(alu
));
10274 alu
.op
= ALU_OP1_MOV
;
10275 alu
.dst
.sel
= ctx
->bc
->ar_reg
;
10277 for (i
= 0; i
<= lasti
; ++i
) {
10278 if (inst
->Dst
[0].Register
.WriteMask
& (1 << i
)) {
10280 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
10281 alu
.last
= i
== lasti
;
10282 if ((r
= r600_bytecode_add_alu(ctx
->bc
, &alu
)))
10292 ctx
->bc
->ar_loaded
= 0;
10296 static int tgsi_opdst(struct r600_shader_ctx
*ctx
)
10298 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10299 struct r600_bytecode_alu alu
;
10302 for (i
= 0; i
< 4; i
++) {
10303 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10305 alu
.op
= ALU_OP2_MUL
;
10306 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
10308 if (i
== 0 || i
== 3) {
10309 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
10311 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], i
);
10314 if (i
== 0 || i
== 2) {
10315 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
10317 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], i
);
10321 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10328 static int emit_logic_pred(struct r600_shader_ctx
*ctx
, int opcode
, int alu_type
,
10329 struct r600_bytecode_alu_src
*src
)
10331 struct r600_bytecode_alu alu
;
10334 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10336 alu
.execute_mask
= 1;
10337 alu
.update_pred
= 1;
10339 alu
.dst
.sel
= ctx
->temp_reg
;
10344 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
10345 alu
.src
[1].chan
= 0;
10349 r
= r600_bytecode_add_alu_type(ctx
->bc
, &alu
, alu_type
);
10355 static int pops(struct r600_shader_ctx
*ctx
, int pops
)
10357 unsigned force_pop
= ctx
->bc
->force_add_cf
;
10361 if (ctx
->bc
->cf_last
) {
10362 if (ctx
->bc
->cf_last
->op
== CF_OP_ALU
)
10364 else if (ctx
->bc
->cf_last
->op
== CF_OP_ALU_POP_AFTER
)
10368 if (alu_pop
== 1) {
10369 ctx
->bc
->cf_last
->op
= CF_OP_ALU_POP_AFTER
;
10370 ctx
->bc
->force_add_cf
= 1;
10371 } else if (alu_pop
== 2) {
10372 ctx
->bc
->cf_last
->op
= CF_OP_ALU_POP2_AFTER
;
10373 ctx
->bc
->force_add_cf
= 1;
10380 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_POP
);
10381 ctx
->bc
->cf_last
->pop_count
= pops
;
10382 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
10388 static inline int callstack_update_max_depth(struct r600_shader_ctx
*ctx
,
10391 struct r600_stack_info
*stack
= &ctx
->bc
->stack
;
10395 unsigned entry_size
= stack
->entry_size
;
10397 elements
= (stack
->loop
+ stack
->push_wqm
) * entry_size
;
10398 elements
+= stack
->push
;
10400 switch (ctx
->bc
->chip_class
) {
10403 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
10404 * the stack must be reserved to hold the current active/continue
10406 if (reason
== FC_PUSH_VPM
|| stack
->push
> 0) {
10412 /* r9xx: any stack operation on empty stack consumes 2 additional
10417 /* FIXME: do the two elements added above cover the cases for the
10421 /* r8xx+: 2 extra elements are not always required, but one extra
10422 * element must be added for each of the following cases:
10423 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
10425 * (Currently we don't use ALU_ELSE_AFTER.)
10426 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
10427 * PUSH instruction executed.
10429 * NOTE: it seems we also need to reserve additional element in some
10430 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
10431 * then STACK_SIZE should be 2 instead of 1 */
10432 if (reason
== FC_PUSH_VPM
|| stack
->push
> 0) {
10442 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4
10443 * for all chips, so we use 4 in the final formula, not the real entry_size
10447 entries
= (elements
+ (entry_size
- 1)) / entry_size
;
10449 if (entries
> stack
->max_entries
)
10450 stack
->max_entries
= entries
;
10454 static inline void callstack_pop(struct r600_shader_ctx
*ctx
, unsigned reason
)
10458 --ctx
->bc
->stack
.push
;
10459 assert(ctx
->bc
->stack
.push
>= 0);
10462 --ctx
->bc
->stack
.push_wqm
;
10463 assert(ctx
->bc
->stack
.push_wqm
>= 0);
10466 --ctx
->bc
->stack
.loop
;
10467 assert(ctx
->bc
->stack
.loop
>= 0);
10475 static inline int callstack_push(struct r600_shader_ctx
*ctx
, unsigned reason
)
10479 ++ctx
->bc
->stack
.push
;
10482 ++ctx
->bc
->stack
.push_wqm
;
10485 ++ctx
->bc
->stack
.loop
;
10491 return callstack_update_max_depth(ctx
, reason
);
10494 static void fc_set_mid(struct r600_shader_ctx
*ctx
, int fc_sp
)
10496 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[fc_sp
];
10498 sp
->mid
= realloc((void *)sp
->mid
,
10499 sizeof(struct r600_bytecode_cf
*) * (sp
->num_mid
+ 1));
10500 sp
->mid
[sp
->num_mid
] = ctx
->bc
->cf_last
;
10504 static void fc_pushlevel(struct r600_shader_ctx
*ctx
, int type
)
10506 assert(ctx
->bc
->fc_sp
< ARRAY_SIZE(ctx
->bc
->fc_stack
));
10507 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
= type
;
10508 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
= ctx
->bc
->cf_last
;
10512 static void fc_poplevel(struct r600_shader_ctx
*ctx
)
10514 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1];
10524 static int emit_return(struct r600_shader_ctx
*ctx
)
10526 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_RETURN
));
10530 static int emit_jump_to_offset(struct r600_shader_ctx
*ctx
, int pops
, int offset
)
10533 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_JUMP
));
10534 ctx
->bc
->cf_last
->pop_count
= pops
;
10535 /* XXX work out offset */
10539 static int emit_setret_in_loop_flag(struct r600_shader_ctx
*ctx
, unsigned flag_value
)
10544 static void emit_testflag(struct r600_shader_ctx
*ctx
)
10549 static void emit_return_on_flag(struct r600_shader_ctx
*ctx
, unsigned ifidx
)
10551 emit_testflag(ctx
);
10552 emit_jump_to_offset(ctx
, 1, 4);
10553 emit_setret_in_loop_flag(ctx
, V_SQ_ALU_SRC_0
);
10554 pops(ctx
, ifidx
+ 1);
10558 static void break_loop_on_flag(struct r600_shader_ctx
*ctx
, unsigned fc_sp
)
10560 emit_testflag(ctx
);
10562 r600_bytecode_add_cfinst(ctx
->bc
, ctx
->inst_info
->op
);
10563 ctx
->bc
->cf_last
->pop_count
= 1;
10565 fc_set_mid(ctx
, fc_sp
);
10571 static int emit_if(struct r600_shader_ctx
*ctx
, int opcode
,
10572 struct r600_bytecode_alu_src
*src
)
10574 int alu_type
= CF_OP_ALU_PUSH_BEFORE
;
10575 bool needs_workaround
= false;
10576 int elems
= callstack_push(ctx
, FC_PUSH_VPM
);
10578 if (ctx
->bc
->chip_class
== CAYMAN
&& ctx
->bc
->stack
.loop
> 1)
10579 needs_workaround
= true;
10581 if (ctx
->bc
->chip_class
== EVERGREEN
&& ctx_needs_stack_workaround_8xx(ctx
)) {
10582 unsigned dmod1
= (elems
- 1) % ctx
->bc
->stack
.entry_size
;
10583 unsigned dmod2
= (elems
) % ctx
->bc
->stack
.entry_size
;
10585 if (elems
&& (!dmod1
|| !dmod2
))
10586 needs_workaround
= true;
10589 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
10590 * LOOP_STARTxxx for nested loops may put the branch stack into a state
10591 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
10592 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
10593 if (needs_workaround
) {
10594 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_PUSH
);
10595 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
10596 alu_type
= CF_OP_ALU
;
10599 emit_logic_pred(ctx
, opcode
, alu_type
, src
);
10601 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_JUMP
);
10603 fc_pushlevel(ctx
, FC_IF
);
10608 static int tgsi_if(struct r600_shader_ctx
*ctx
)
10610 struct r600_bytecode_alu_src alu_src
;
10611 r600_bytecode_src(&alu_src
, &ctx
->src
[0], 0);
10613 return emit_if(ctx
, ALU_OP2_PRED_SETNE
, &alu_src
);
10616 static int tgsi_uif(struct r600_shader_ctx
*ctx
)
10618 struct r600_bytecode_alu_src alu_src
;
10619 r600_bytecode_src(&alu_src
, &ctx
->src
[0], 0);
10620 return emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
10623 static int tgsi_else(struct r600_shader_ctx
*ctx
)
10625 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_ELSE
);
10626 ctx
->bc
->cf_last
->pop_count
= 1;
10628 fc_set_mid(ctx
, ctx
->bc
->fc_sp
- 1);
10629 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].start
->cf_addr
= ctx
->bc
->cf_last
->id
;
10633 static int tgsi_endif(struct r600_shader_ctx
*ctx
)
10637 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].type
!= FC_IF
) {
10638 R600_ERR("if/endif unbalanced in shader\n");
10642 /* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */
10643 if (ctx
->bc
->cf_last
->eg_alu_extended
)
10646 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].mid
== NULL
) {
10647 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ offset
;
10648 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].start
->pop_count
= 1;
10650 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].mid
[0]->cf_addr
= ctx
->bc
->cf_last
->id
+ offset
;
10654 callstack_pop(ctx
, FC_PUSH_VPM
);
10658 static int tgsi_bgnloop(struct r600_shader_ctx
*ctx
)
10660 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
10661 * limited to 4096 iterations, like the other LOOP_* instructions. */
10662 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_LOOP_START_DX10
);
10664 fc_pushlevel(ctx
, FC_LOOP
);
10666 /* check stack depth */
10667 callstack_push(ctx
, FC_LOOP
);
10671 static int tgsi_endloop(struct r600_shader_ctx
*ctx
)
10675 r600_bytecode_add_cfinst(ctx
->bc
, CF_OP_LOOP_END
);
10677 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].type
!= FC_LOOP
) {
10678 R600_ERR("loop/endloop in shader code are not paired.\n");
10682 /* fixup loop pointers - from r600isa
10683 LOOP END points to CF after LOOP START,
10684 LOOP START point to CF after LOOP END
10685 BRK/CONT point to LOOP END CF
10687 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].start
->id
+ 2;
10689 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
10691 for (i
= 0; i
< ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].num_mid
; i
++) {
10692 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
- 1].mid
[i
]->cf_addr
= ctx
->bc
->cf_last
->id
;
10694 /* XXX add LOOPRET support */
10696 callstack_pop(ctx
, FC_LOOP
);
10700 static int tgsi_loop_brk_cont(struct r600_shader_ctx
*ctx
)
10704 for (fscp
= ctx
->bc
->fc_sp
; fscp
> 0; fscp
--)
10706 if (FC_LOOP
== ctx
->bc
->fc_stack
[fscp
- 1].type
)
10711 R600_ERR("Break not inside loop/endloop pair\n");
10715 r600_bytecode_add_cfinst(ctx
->bc
, ctx
->inst_info
->op
);
10717 fc_set_mid(ctx
, fscp
- 1);
10722 static int tgsi_gs_emit(struct r600_shader_ctx
*ctx
)
10724 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10725 int stream
= ctx
->literals
[inst
->Src
[0].Register
.Index
* 4 + inst
->Src
[0].Register
.SwizzleX
];
10728 if (ctx
->inst_info
->op
== CF_OP_EMIT_VERTEX
)
10729 emit_gs_ring_writes(ctx
, ctx
->gs_stream_output_info
, stream
, TRUE
);
10731 r
= r600_bytecode_add_cfinst(ctx
->bc
, ctx
->inst_info
->op
);
10733 ctx
->bc
->cf_last
->count
= stream
; // Count field for CUT/EMIT_VERTEX indicates which stream
10734 if (ctx
->inst_info
->op
== CF_OP_EMIT_VERTEX
)
10735 return emit_inc_ring_offset(ctx
, stream
, TRUE
);
10740 static int tgsi_umad(struct r600_shader_ctx
*ctx
)
10742 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10743 struct r600_bytecode_alu alu
;
10745 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10748 for (i
= 0; i
< lasti
+ 1; i
++) {
10749 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
10752 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10755 alu
.dst
.sel
= ctx
->temp_reg
;
10758 alu
.op
= ALU_OP2_MULLO_UINT
;
10759 for (j
= 0; j
< 2; j
++) {
10760 r600_bytecode_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
10764 r
= emit_mul_int_op(ctx
->bc
, &alu
);
10770 for (i
= 0; i
< lasti
+ 1; i
++) {
10771 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
10774 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10775 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
10777 alu
.op
= ALU_OP2_ADD_INT
;
10779 alu
.src
[0].sel
= ctx
->temp_reg
;
10780 alu
.src
[0].chan
= i
;
10782 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[2], i
);
10786 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10793 static int tgsi_pk2h(struct r600_shader_ctx
*ctx
)
10795 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10796 struct r600_bytecode_alu alu
;
10798 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10800 /* temp.xy = f32_to_f16(src) */
10801 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10802 alu
.op
= ALU_OP1_FLT32_TO_FLT16
;
10804 alu
.dst
.sel
= ctx
->temp_reg
;
10806 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10807 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10811 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 1);
10813 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10817 /* dst.x = temp.y * 0x10000 + temp.x */
10818 for (i
= 0; i
< lasti
+ 1; i
++) {
10819 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
10822 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10823 alu
.op
= ALU_OP3_MULADD_UINT24
;
10825 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
10826 alu
.last
= i
== lasti
;
10827 alu
.src
[0].sel
= ctx
->temp_reg
;
10828 alu
.src
[0].chan
= 1;
10829 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
10830 alu
.src
[1].value
= 0x10000;
10831 alu
.src
[2].sel
= ctx
->temp_reg
;
10832 alu
.src
[2].chan
= 0;
10833 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10841 static int tgsi_up2h(struct r600_shader_ctx
*ctx
)
10843 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10844 struct r600_bytecode_alu alu
;
10846 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10848 /* temp.x = src.x */
10849 /* note: no need to mask out the high bits */
10850 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10851 alu
.op
= ALU_OP1_MOV
;
10853 alu
.dst
.sel
= ctx
->temp_reg
;
10855 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10856 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10860 /* temp.y = src.x >> 16 */
10861 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10862 alu
.op
= ALU_OP2_LSHR_INT
;
10864 alu
.dst
.sel
= ctx
->temp_reg
;
10866 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
10867 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
10868 alu
.src
[1].value
= 16;
10870 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10874 /* dst.wz = dst.xy = f16_to_f32(temp.xy) */
10875 for (i
= 0; i
< lasti
+ 1; i
++) {
10876 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
10878 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10879 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
10880 alu
.op
= ALU_OP1_FLT16_TO_FLT32
;
10881 alu
.src
[0].sel
= ctx
->temp_reg
;
10882 alu
.src
[0].chan
= i
% 2;
10883 alu
.last
= i
== lasti
;
10884 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10892 static int tgsi_bfe(struct r600_shader_ctx
*ctx
)
10894 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10895 struct r600_bytecode_alu alu
;
10896 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
10900 if ((inst
->Src
[0].Register
.File
== inst
->Dst
[0].Register
.File
&&
10901 inst
->Src
[0].Register
.Index
== inst
->Dst
[0].Register
.Index
) ||
10902 (inst
->Src
[2].Register
.File
== inst
->Dst
[0].Register
.File
&&
10903 inst
->Src
[2].Register
.Index
== inst
->Dst
[0].Register
.Index
))
10904 dst
= r600_get_temp(ctx
);
10906 r
= tgsi_op3_dst(ctx
, dst
);
10910 for (i
= 0; i
< lasti
+ 1; i
++) {
10911 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10912 alu
.op
= ALU_OP2_SETGE_INT
;
10913 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[2], i
);
10914 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
10915 alu
.src
[1].value
= 32;
10916 alu
.dst
.sel
= ctx
->temp_reg
;
10921 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10926 for (i
= 0; i
< lasti
+ 1; i
++) {
10927 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10928 alu
.op
= ALU_OP3_CNDE_INT
;
10930 alu
.src
[0].sel
= ctx
->temp_reg
;
10931 alu
.src
[0].chan
= i
;
10933 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
10935 alu
.src
[1].sel
= dst
;
10937 alu
.src
[1].sel
= alu
.dst
.sel
;
10938 alu
.src
[1].chan
= i
;
10939 r600_bytecode_src(&alu
.src
[2], &ctx
->src
[0], i
);
10943 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10951 static int tgsi_clock(struct r600_shader_ctx
*ctx
)
10953 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
10954 struct r600_bytecode_alu alu
;
10957 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10958 alu
.op
= ALU_OP1_MOV
;
10959 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
10960 alu
.src
[0].sel
= EG_V_SQ_ALU_SRC_TIME_LO
;
10961 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10964 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10965 alu
.op
= ALU_OP1_MOV
;
10966 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
10967 alu
.src
[0].sel
= EG_V_SQ_ALU_SRC_TIME_HI
;
10969 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
10975 static int emit_u64add(struct r600_shader_ctx
*ctx
, int op
,
10977 int src0_sel
, int src0_chan
,
10978 int src1_sel
, int src1_chan
)
10980 struct r600_bytecode_alu alu
;
10984 if (op
== ALU_OP2_ADD_INT
)
10985 opc
= ALU_OP2_ADDC_UINT
;
10987 opc
= ALU_OP2_SUBB_UINT
;
10989 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
10991 alu
.dst
.sel
= treg
;
10994 alu
.src
[0].sel
= src0_sel
;
10995 alu
.src
[0].chan
= src0_chan
+ 0;
10996 alu
.src
[1].sel
= src1_sel
;
10997 alu
.src
[1].chan
= src1_chan
+ 0;
10998 alu
.src
[1].neg
= 0;
10999 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11003 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11005 alu
.dst
.sel
= treg
;
11008 alu
.src
[0].sel
= src0_sel
;
11009 alu
.src
[0].chan
= src0_chan
+ 1;
11010 alu
.src
[1].sel
= src1_sel
;
11011 alu
.src
[1].chan
= src1_chan
+ 1;
11012 alu
.src
[1].neg
= 0;
11013 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11017 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11019 alu
.dst
.sel
= treg
;
11023 alu
.src
[0].sel
= src0_sel
;
11024 alu
.src
[0].chan
= src0_chan
+ 0;
11025 alu
.src
[1].sel
= src1_sel
;
11026 alu
.src
[1].chan
= src1_chan
+ 0;
11027 alu
.src
[1].neg
= 0;
11028 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11032 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11034 alu
.dst
.sel
= treg
;
11037 alu
.src
[0].sel
= treg
;
11038 alu
.src
[0].chan
= 1;
11039 alu
.src
[1].sel
= treg
;
11040 alu
.src
[1].chan
= 2;
11042 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11048 static int egcm_u64add(struct r600_shader_ctx
*ctx
)
11050 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
11051 struct r600_bytecode_alu alu
;
11053 int treg
= ctx
->temp_reg
;
11054 int op
= ALU_OP2_ADD_INT
, opc
= ALU_OP2_ADDC_UINT
;
11056 if (ctx
->src
[1].neg
) {
11057 op
= ALU_OP2_SUB_INT
;
11058 opc
= ALU_OP2_SUBB_UINT
;
11060 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11062 alu
.dst
.sel
= treg
;
11065 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11066 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11067 alu
.src
[1].neg
= 0;
11068 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11072 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11074 alu
.dst
.sel
= treg
;
11077 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 1);
11078 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 1);
11079 alu
.src
[1].neg
= 0;
11080 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11084 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11086 alu
.dst
.sel
= treg
;
11090 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11091 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11092 alu
.src
[1].neg
= 0;
11093 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11097 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11099 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
11100 alu
.src
[0].sel
= treg
;
11101 alu
.src
[0].chan
= 1;
11102 alu
.src
[1].sel
= treg
;
11103 alu
.src
[1].chan
= 2;
11105 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11108 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11109 alu
.op
= ALU_OP1_MOV
;
11110 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
11111 alu
.src
[0].sel
= treg
;
11112 alu
.src
[0].chan
= 0;
11114 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11120 /* result.y = mul_high a, b
11122 result.y += a.x * b.y + a.y * b.x;
11124 static int egcm_u64mul(struct r600_shader_ctx
*ctx
)
11126 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
11127 struct r600_bytecode_alu alu
;
11129 int treg
= ctx
->temp_reg
;
11131 /* temp.x = mul_lo a.x, b.x */
11132 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11133 alu
.op
= ALU_OP2_MULLO_UINT
;
11134 alu
.dst
.sel
= treg
;
11137 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11138 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11139 r
= emit_mul_int_op(ctx
->bc
, &alu
);
11143 /* temp.y = mul_hi a.x, b.x */
11144 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11145 alu
.op
= ALU_OP2_MULHI_UINT
;
11146 alu
.dst
.sel
= treg
;
11149 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11150 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11151 r
= emit_mul_int_op(ctx
->bc
, &alu
);
11155 /* temp.z = mul a.x, b.y */
11156 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11157 alu
.op
= ALU_OP2_MULLO_UINT
;
11158 alu
.dst
.sel
= treg
;
11161 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11162 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 1);
11163 r
= emit_mul_int_op(ctx
->bc
, &alu
);
11167 /* temp.w = mul a.y, b.x */
11168 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11169 alu
.op
= ALU_OP2_MULLO_UINT
;
11170 alu
.dst
.sel
= treg
;
11173 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 1);
11174 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11175 r
= emit_mul_int_op(ctx
->bc
, &alu
);
11179 /* temp.z = temp.z + temp.w */
11180 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11181 alu
.op
= ALU_OP2_ADD_INT
;
11182 alu
.dst
.sel
= treg
;
11185 alu
.src
[0].sel
= treg
;
11186 alu
.src
[0].chan
= 2;
11187 alu
.src
[1].sel
= treg
;
11188 alu
.src
[1].chan
= 3;
11190 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11194 /* temp.y = temp.y + temp.z */
11195 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11196 alu
.op
= ALU_OP2_ADD_INT
;
11197 alu
.dst
.sel
= treg
;
11200 alu
.src
[0].sel
= treg
;
11201 alu
.src
[0].chan
= 1;
11202 alu
.src
[1].sel
= treg
;
11203 alu
.src
[1].chan
= 2;
11205 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11209 /* dst.x = temp.x */
11210 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11211 alu
.op
= ALU_OP1_MOV
;
11212 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
11213 alu
.src
[0].sel
= treg
;
11214 alu
.src
[0].chan
= 0;
11215 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11219 /* dst.y = temp.y */
11220 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11221 alu
.op
= ALU_OP1_MOV
;
11222 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
11223 alu
.src
[0].sel
= treg
;
11224 alu
.src
[0].chan
= 1;
11226 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11233 static int emit_u64sge(struct r600_shader_ctx
*ctx
,
11235 int src0_sel
, int src0_base_chan
,
11236 int src1_sel
, int src1_base_chan
)
11239 /* for 64-bit sge */
11240 /* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */
11241 r
= single_alu_op2(ctx
, ALU_OP2_SETGT_UINT
,
11243 src0_sel
, src0_base_chan
+ 1,
11244 src1_sel
, src1_base_chan
+ 1);
11248 r
= single_alu_op2(ctx
, ALU_OP2_SETGE_UINT
,
11250 src0_sel
, src0_base_chan
,
11251 src1_sel
, src1_base_chan
);
11255 r
= single_alu_op2(ctx
, ALU_OP2_SETE_INT
,
11257 src0_sel
, src0_base_chan
+ 1,
11258 src1_sel
, src1_base_chan
+ 1);
11262 r
= single_alu_op2(ctx
, ALU_OP2_AND_INT
,
11269 r
= single_alu_op2(ctx
, ALU_OP2_OR_INT
,
11278 /* this isn't a complete div it's just enough for qbo shader to work */
11279 static int egcm_u64div(struct r600_shader_ctx
*ctx
)
11281 struct r600_bytecode_alu alu
;
11282 struct r600_bytecode_alu_src alu_num_hi
, alu_num_lo
, alu_denom_hi
, alu_denom_lo
, alu_src
;
11284 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
11286 /* make sure we are dividing my a const with 0 in the high bits */
11287 if (ctx
->src
[1].sel
!= V_SQ_ALU_SRC_LITERAL
)
11289 if (ctx
->src
[1].value
[ctx
->src
[1].swizzle
[1]] != 0)
11291 /* make sure we are doing one division */
11292 if (inst
->Dst
[0].Register
.WriteMask
!= 0x3)
11295 /* emit_if uses ctx->temp_reg so we can't */
11296 int treg
= r600_get_temp(ctx
);
11297 int tmp_num
= r600_get_temp(ctx
);
11298 int sub_tmp
= r600_get_temp(ctx
);
11300 /* tmp quot are tmp_num.zw */
11301 r600_bytecode_src(&alu_num_lo
, &ctx
->src
[0], 0);
11302 r600_bytecode_src(&alu_num_hi
, &ctx
->src
[0], 1);
11303 r600_bytecode_src(&alu_denom_lo
, &ctx
->src
[1], 0);
11304 r600_bytecode_src(&alu_denom_hi
, &ctx
->src
[1], 1);
11306 /* MOV tmp_num.xy, numerator */
11307 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11309 alu_num_lo
.sel
, alu_num_lo
.chan
,
11313 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11315 alu_num_hi
.sel
, alu_num_hi
.chan
,
11320 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11322 V_SQ_ALU_SRC_LITERAL
, 0,
11327 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11329 V_SQ_ALU_SRC_LITERAL
, 0,
11334 /* treg 0 is log2_denom */
11335 /* normally this gets the MSB for the denom high value
11336 - however we know this will always be 0 here. */
11337 r
= single_alu_op2(ctx
,
11340 V_SQ_ALU_SRC_LITERAL
, 32,
11345 /* normally check demon hi for 0, but we know it is already */
11346 /* t0.z = num_hi >= denom_lo */
11347 r
= single_alu_op2(ctx
,
11348 ALU_OP2_SETGE_UINT
,
11350 alu_num_hi
.sel
, alu_num_hi
.chan
,
11351 V_SQ_ALU_SRC_LITERAL
, alu_denom_lo
.value
);
11355 memset(&alu_src
, 0, sizeof(alu_src
));
11356 alu_src
.sel
= treg
;
11358 r
= emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
11362 /* for loops in here */
11363 /* get msb t0.x = msb(src[1].x) first */
11364 int msb_lo
= util_last_bit(alu_denom_lo
.value
);
11365 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11367 V_SQ_ALU_SRC_LITERAL
, msb_lo
,
11372 /* unroll the asm here */
11373 for (i
= 0; i
< 31; i
++) {
11374 r
= single_alu_op2(ctx
, ALU_OP2_SETGE_UINT
,
11376 V_SQ_ALU_SRC_LITERAL
, i
,
11381 /* we can do this on the CPU */
11382 uint32_t denom_lo_shl
= alu_denom_lo
.value
<< (31 - i
);
11383 /* t0.z = tmp_num.y >= t0.z */
11384 r
= single_alu_op2(ctx
, ALU_OP2_SETGE_UINT
,
11387 V_SQ_ALU_SRC_LITERAL
, denom_lo_shl
);
11391 r
= single_alu_op2(ctx
, ALU_OP2_AND_INT
,
11398 memset(&alu_src
, 0, sizeof(alu_src
));
11399 alu_src
.sel
= treg
;
11401 r
= emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
11405 r
= single_alu_op2(ctx
, ALU_OP2_SUB_INT
,
11408 V_SQ_ALU_SRC_LITERAL
, denom_lo_shl
);
11412 r
= single_alu_op2(ctx
, ALU_OP2_OR_INT
,
11415 V_SQ_ALU_SRC_LITERAL
, 1U << (31 - i
));
11419 r
= tgsi_endif(ctx
);
11424 /* log2_denom is always <= 31, so manually peel the last loop
11427 r
= single_alu_op2(ctx
, ALU_OP2_SETGE_UINT
,
11430 V_SQ_ALU_SRC_LITERAL
, alu_denom_lo
.value
);
11434 memset(&alu_src
, 0, sizeof(alu_src
));
11435 alu_src
.sel
= treg
;
11437 r
= emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
11441 r
= single_alu_op2(ctx
, ALU_OP2_SUB_INT
,
11444 V_SQ_ALU_SRC_LITERAL
, alu_denom_lo
.value
);
11448 r
= single_alu_op2(ctx
, ALU_OP2_OR_INT
,
11451 V_SQ_ALU_SRC_LITERAL
, 1U);
11454 r
= tgsi_endif(ctx
);
11458 r
= tgsi_endif(ctx
);
11462 /* onto the second loop to unroll */
11463 for (i
= 0; i
< 31; i
++) {
11464 r
= single_alu_op2(ctx
, ALU_OP2_SETGE_UINT
,
11466 V_SQ_ALU_SRC_LITERAL
, (63 - (31 - i
)),
11471 uint64_t denom_shl
= (uint64_t)alu_denom_lo
.value
<< (31 - i
);
11472 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11474 V_SQ_ALU_SRC_LITERAL
, (denom_shl
& 0xffffffff),
11479 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11481 V_SQ_ALU_SRC_LITERAL
, (denom_shl
>> 32),
11486 r
= emit_u64sge(ctx
, sub_tmp
,
11492 r
= single_alu_op2(ctx
, ALU_OP2_AND_INT
,
11499 memset(&alu_src
, 0, sizeof(alu_src
));
11500 alu_src
.sel
= treg
;
11502 r
= emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
11507 r
= emit_u64add(ctx
, ALU_OP2_SUB_INT
,
11514 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11521 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11528 r
= single_alu_op2(ctx
, ALU_OP2_OR_INT
,
11531 V_SQ_ALU_SRC_LITERAL
, 1U << (31 - i
));
11535 r
= tgsi_endif(ctx
);
11540 /* log2_denom is always <= 63, so manually peel the last loop
11543 uint64_t denom_shl
= (uint64_t)alu_denom_lo
.value
;
11544 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11546 V_SQ_ALU_SRC_LITERAL
, (denom_shl
& 0xffffffff),
11551 r
= single_alu_op2(ctx
, ALU_OP1_MOV
,
11553 V_SQ_ALU_SRC_LITERAL
, (denom_shl
>> 32),
11558 r
= emit_u64sge(ctx
, sub_tmp
,
11564 memset(&alu_src
, 0, sizeof(alu_src
));
11565 alu_src
.sel
= sub_tmp
;
11567 r
= emit_if(ctx
, ALU_OP2_PRED_SETNE_INT
, &alu_src
);
11571 r
= emit_u64add(ctx
, ALU_OP2_SUB_INT
,
11578 r
= single_alu_op2(ctx
, ALU_OP2_OR_INT
,
11581 V_SQ_ALU_SRC_LITERAL
, 1U);
11584 r
= tgsi_endif(ctx
);
11588 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11589 alu
.op
= ALU_OP1_MOV
;
11590 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
11591 alu
.src
[0].sel
= tmp_num
;
11592 alu
.src
[0].chan
= 2;
11593 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11597 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11598 alu
.op
= ALU_OP1_MOV
;
11599 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
11600 alu
.src
[0].sel
= tmp_num
;
11601 alu
.src
[0].chan
= 3;
11603 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11609 static int egcm_u64sne(struct r600_shader_ctx
*ctx
)
11611 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
11612 struct r600_bytecode_alu alu
;
11614 int treg
= ctx
->temp_reg
;
11616 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11617 alu
.op
= ALU_OP2_SETNE_INT
;
11618 alu
.dst
.sel
= treg
;
11621 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 0);
11622 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 0);
11623 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11627 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11628 alu
.op
= ALU_OP2_SETNE_INT
;
11629 alu
.dst
.sel
= treg
;
11632 r600_bytecode_src(&alu
.src
[0], &ctx
->src
[0], 1);
11633 r600_bytecode_src(&alu
.src
[1], &ctx
->src
[1], 1);
11635 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11639 memset(&alu
, 0, sizeof(struct r600_bytecode_alu
));
11640 alu
.op
= ALU_OP2_OR_INT
;
11641 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
11642 alu
.src
[0].sel
= treg
;
11643 alu
.src
[0].chan
= 0;
11644 alu
.src
[1].sel
= treg
;
11645 alu
.src
[1].chan
= 1;
11647 r
= r600_bytecode_add_alu(ctx
->bc
, &alu
);
11653 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[] = {
11654 [TGSI_OPCODE_ARL
] = { ALU_OP0_NOP
, tgsi_r600_arl
},
11655 [TGSI_OPCODE_MOV
] = { ALU_OP1_MOV
, tgsi_op2
},
11656 [TGSI_OPCODE_LIT
] = { ALU_OP0_NOP
, tgsi_lit
},
11658 [TGSI_OPCODE_RCP
] = { ALU_OP1_RECIP_IEEE
, tgsi_trans_srcx_replicate
},
11660 [TGSI_OPCODE_RSQ
] = { ALU_OP0_NOP
, tgsi_rsq
},
11661 [TGSI_OPCODE_EXP
] = { ALU_OP0_NOP
, tgsi_exp
},
11662 [TGSI_OPCODE_LOG
] = { ALU_OP0_NOP
, tgsi_log
},
11663 [TGSI_OPCODE_MUL
] = { ALU_OP2_MUL_IEEE
, tgsi_op2
},
11664 [TGSI_OPCODE_ADD
] = { ALU_OP2_ADD
, tgsi_op2
},
11665 [TGSI_OPCODE_DP3
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11666 [TGSI_OPCODE_DP4
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11667 [TGSI_OPCODE_DST
] = { ALU_OP0_NOP
, tgsi_opdst
},
11668 /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
11669 [TGSI_OPCODE_MIN
] = { ALU_OP2_MIN_DX10
, tgsi_op2
},
11670 [TGSI_OPCODE_MAX
] = { ALU_OP2_MAX_DX10
, tgsi_op2
},
11671 [TGSI_OPCODE_SLT
] = { ALU_OP2_SETGT
, tgsi_op2_swap
},
11672 [TGSI_OPCODE_SGE
] = { ALU_OP2_SETGE
, tgsi_op2
},
11673 [TGSI_OPCODE_MAD
] = { ALU_OP3_MULADD_IEEE
, tgsi_op3
},
11674 [TGSI_OPCODE_LRP
] = { ALU_OP0_NOP
, tgsi_lrp
},
11675 [TGSI_OPCODE_FMA
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11676 [TGSI_OPCODE_SQRT
] = { ALU_OP1_SQRT_IEEE
, tgsi_trans_srcx_replicate
},
11677 [21] = { ALU_OP0_NOP
, tgsi_unsupported
},
11678 [22] = { ALU_OP0_NOP
, tgsi_unsupported
},
11679 [23] = { ALU_OP0_NOP
, tgsi_unsupported
},
11680 [TGSI_OPCODE_FRC
] = { ALU_OP1_FRACT
, tgsi_op2
},
11681 [25] = { ALU_OP0_NOP
, tgsi_unsupported
},
11682 [TGSI_OPCODE_FLR
] = { ALU_OP1_FLOOR
, tgsi_op2
},
11683 [TGSI_OPCODE_ROUND
] = { ALU_OP1_RNDNE
, tgsi_op2
},
11684 [TGSI_OPCODE_EX2
] = { ALU_OP1_EXP_IEEE
, tgsi_trans_srcx_replicate
},
11685 [TGSI_OPCODE_LG2
] = { ALU_OP1_LOG_IEEE
, tgsi_trans_srcx_replicate
},
11686 [TGSI_OPCODE_POW
] = { ALU_OP0_NOP
, tgsi_pow
},
11687 [31] = { ALU_OP0_NOP
, tgsi_unsupported
},
11688 [32] = { ALU_OP0_NOP
, tgsi_unsupported
},
11689 [TGSI_OPCODE_CLOCK
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11690 [34] = { ALU_OP0_NOP
, tgsi_unsupported
},
11691 [35] = { ALU_OP0_NOP
, tgsi_unsupported
},
11692 [TGSI_OPCODE_COS
] = { ALU_OP1_COS
, tgsi_trig
},
11693 [TGSI_OPCODE_DDX
] = { FETCH_OP_GET_GRADIENTS_H
, tgsi_tex
},
11694 [TGSI_OPCODE_DDY
] = { FETCH_OP_GET_GRADIENTS_V
, tgsi_tex
},
11695 [TGSI_OPCODE_KILL
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* unconditional kill */
11696 [TGSI_OPCODE_PK2H
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11697 [TGSI_OPCODE_PK2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11698 [TGSI_OPCODE_PK4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11699 [TGSI_OPCODE_PK4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11700 [44] = { ALU_OP0_NOP
, tgsi_unsupported
},
11701 [TGSI_OPCODE_SEQ
] = { ALU_OP2_SETE
, tgsi_op2
},
11702 [46] = { ALU_OP0_NOP
, tgsi_unsupported
},
11703 [TGSI_OPCODE_SGT
] = { ALU_OP2_SETGT
, tgsi_op2
},
11704 [TGSI_OPCODE_SIN
] = { ALU_OP1_SIN
, tgsi_trig
},
11705 [TGSI_OPCODE_SLE
] = { ALU_OP2_SETGE
, tgsi_op2_swap
},
11706 [TGSI_OPCODE_SNE
] = { ALU_OP2_SETNE
, tgsi_op2
},
11707 [51] = { ALU_OP0_NOP
, tgsi_unsupported
},
11708 [TGSI_OPCODE_TEX
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
11709 [TGSI_OPCODE_TXD
] = { FETCH_OP_SAMPLE_G
, tgsi_tex
},
11710 [TGSI_OPCODE_TXP
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
11711 [TGSI_OPCODE_UP2H
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11712 [TGSI_OPCODE_UP2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11713 [TGSI_OPCODE_UP4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11714 [TGSI_OPCODE_UP4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11715 [59] = { ALU_OP0_NOP
, tgsi_unsupported
},
11716 [60] = { ALU_OP0_NOP
, tgsi_unsupported
},
11717 [TGSI_OPCODE_ARR
] = { ALU_OP0_NOP
, tgsi_r600_arl
},
11718 [62] = { ALU_OP0_NOP
, tgsi_unsupported
},
11719 [TGSI_OPCODE_CAL
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11720 [TGSI_OPCODE_RET
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11721 [TGSI_OPCODE_SSG
] = { ALU_OP0_NOP
, tgsi_ssg
},
11722 [TGSI_OPCODE_CMP
] = { ALU_OP0_NOP
, tgsi_cmp
},
11723 [67] = { ALU_OP0_NOP
, tgsi_unsupported
},
11724 [TGSI_OPCODE_TXB
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
11725 [69] = { ALU_OP0_NOP
, tgsi_unsupported
},
11726 [TGSI_OPCODE_DIV
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11727 [TGSI_OPCODE_DP2
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11728 [TGSI_OPCODE_TXL
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
11729 [TGSI_OPCODE_BRK
] = { CF_OP_LOOP_BREAK
, tgsi_loop_brk_cont
},
11730 [TGSI_OPCODE_IF
] = { ALU_OP0_NOP
, tgsi_if
},
11731 [TGSI_OPCODE_UIF
] = { ALU_OP0_NOP
, tgsi_uif
},
11732 [76] = { ALU_OP0_NOP
, tgsi_unsupported
},
11733 [TGSI_OPCODE_ELSE
] = { ALU_OP0_NOP
, tgsi_else
},
11734 [TGSI_OPCODE_ENDIF
] = { ALU_OP0_NOP
, tgsi_endif
},
11735 [TGSI_OPCODE_DDX_FINE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11736 [TGSI_OPCODE_DDY_FINE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11737 [81] = { ALU_OP0_NOP
, tgsi_unsupported
},
11738 [82] = { ALU_OP0_NOP
, tgsi_unsupported
},
11739 [TGSI_OPCODE_CEIL
] = { ALU_OP1_CEIL
, tgsi_op2
},
11740 [TGSI_OPCODE_I2F
] = { ALU_OP1_INT_TO_FLT
, tgsi_op2_trans
},
11741 [TGSI_OPCODE_NOT
] = { ALU_OP1_NOT_INT
, tgsi_op2
},
11742 [TGSI_OPCODE_TRUNC
] = { ALU_OP1_TRUNC
, tgsi_op2
},
11743 [TGSI_OPCODE_SHL
] = { ALU_OP2_LSHL_INT
, tgsi_op2_trans
},
11744 [88] = { ALU_OP0_NOP
, tgsi_unsupported
},
11745 [TGSI_OPCODE_AND
] = { ALU_OP2_AND_INT
, tgsi_op2
},
11746 [TGSI_OPCODE_OR
] = { ALU_OP2_OR_INT
, tgsi_op2
},
11747 [TGSI_OPCODE_MOD
] = { ALU_OP0_NOP
, tgsi_imod
},
11748 [TGSI_OPCODE_XOR
] = { ALU_OP2_XOR_INT
, tgsi_op2
},
11749 [93] = { ALU_OP0_NOP
, tgsi_unsupported
},
11750 [TGSI_OPCODE_TXF
] = { FETCH_OP_LD
, tgsi_tex
},
11751 [TGSI_OPCODE_TXQ
] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
11752 [TGSI_OPCODE_CONT
] = { CF_OP_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
11753 [TGSI_OPCODE_EMIT
] = { CF_OP_EMIT_VERTEX
, tgsi_gs_emit
},
11754 [TGSI_OPCODE_ENDPRIM
] = { CF_OP_CUT_VERTEX
, tgsi_gs_emit
},
11755 [TGSI_OPCODE_BGNLOOP
] = { ALU_OP0_NOP
, tgsi_bgnloop
},
11756 [TGSI_OPCODE_BGNSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11757 [TGSI_OPCODE_ENDLOOP
] = { ALU_OP0_NOP
, tgsi_endloop
},
11758 [TGSI_OPCODE_ENDSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11759 [103] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
11760 [TGSI_OPCODE_TXQS
] = { FETCH_OP_GET_NUMBER_OF_SAMPLES
, tgsi_tex
},
11761 [TGSI_OPCODE_RESQ
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11762 [106] = { ALU_OP0_NOP
, tgsi_unsupported
},
11763 [TGSI_OPCODE_NOP
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11764 [TGSI_OPCODE_FSEQ
] = { ALU_OP2_SETE_DX10
, tgsi_op2
},
11765 [TGSI_OPCODE_FSGE
] = { ALU_OP2_SETGE_DX10
, tgsi_op2
},
11766 [TGSI_OPCODE_FSLT
] = { ALU_OP2_SETGT_DX10
, tgsi_op2_swap
},
11767 [TGSI_OPCODE_FSNE
] = { ALU_OP2_SETNE_DX10
, tgsi_op2_swap
},
11768 [TGSI_OPCODE_MEMBAR
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11769 [113] = { ALU_OP0_NOP
, tgsi_unsupported
},
11770 [114] = { ALU_OP0_NOP
, tgsi_unsupported
},
11771 [115] = { ALU_OP0_NOP
, tgsi_unsupported
},
11772 [TGSI_OPCODE_KILL_IF
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* conditional kill */
11773 [TGSI_OPCODE_END
] = { ALU_OP0_NOP
, tgsi_end
}, /* aka HALT */
11774 [TGSI_OPCODE_DFMA
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11775 [TGSI_OPCODE_F2I
] = { ALU_OP1_FLT_TO_INT
, tgsi_op2_trans
},
11776 [TGSI_OPCODE_IDIV
] = { ALU_OP0_NOP
, tgsi_idiv
},
11777 [TGSI_OPCODE_IMAX
] = { ALU_OP2_MAX_INT
, tgsi_op2
},
11778 [TGSI_OPCODE_IMIN
] = { ALU_OP2_MIN_INT
, tgsi_op2
},
11779 [TGSI_OPCODE_INEG
] = { ALU_OP2_SUB_INT
, tgsi_ineg
},
11780 [TGSI_OPCODE_ISGE
] = { ALU_OP2_SETGE_INT
, tgsi_op2
},
11781 [TGSI_OPCODE_ISHR
] = { ALU_OP2_ASHR_INT
, tgsi_op2_trans
},
11782 [TGSI_OPCODE_ISLT
] = { ALU_OP2_SETGT_INT
, tgsi_op2_swap
},
11783 [TGSI_OPCODE_F2U
] = { ALU_OP1_FLT_TO_UINT
, tgsi_op2_trans
},
11784 [TGSI_OPCODE_U2F
] = { ALU_OP1_UINT_TO_FLT
, tgsi_op2_trans
},
11785 [TGSI_OPCODE_UADD
] = { ALU_OP2_ADD_INT
, tgsi_op2
},
11786 [TGSI_OPCODE_UDIV
] = { ALU_OP0_NOP
, tgsi_udiv
},
11787 [TGSI_OPCODE_UMAD
] = { ALU_OP0_NOP
, tgsi_umad
},
11788 [TGSI_OPCODE_UMAX
] = { ALU_OP2_MAX_UINT
, tgsi_op2
},
11789 [TGSI_OPCODE_UMIN
] = { ALU_OP2_MIN_UINT
, tgsi_op2
},
11790 [TGSI_OPCODE_UMOD
] = { ALU_OP0_NOP
, tgsi_umod
},
11791 [TGSI_OPCODE_UMUL
] = { ALU_OP2_MULLO_UINT
, tgsi_op2_trans
},
11792 [TGSI_OPCODE_USEQ
] = { ALU_OP2_SETE_INT
, tgsi_op2
},
11793 [TGSI_OPCODE_USGE
] = { ALU_OP2_SETGE_UINT
, tgsi_op2
},
11794 [TGSI_OPCODE_USHR
] = { ALU_OP2_LSHR_INT
, tgsi_op2_trans
},
11795 [TGSI_OPCODE_USLT
] = { ALU_OP2_SETGT_UINT
, tgsi_op2_swap
},
11796 [TGSI_OPCODE_USNE
] = { ALU_OP2_SETNE_INT
, tgsi_op2_swap
},
11797 [TGSI_OPCODE_SWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11798 [TGSI_OPCODE_CASE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11799 [TGSI_OPCODE_DEFAULT
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11800 [TGSI_OPCODE_ENDSWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11801 [TGSI_OPCODE_SAMPLE
] = { 0, tgsi_unsupported
},
11802 [TGSI_OPCODE_SAMPLE_I
] = { 0, tgsi_unsupported
},
11803 [TGSI_OPCODE_SAMPLE_I_MS
] = { 0, tgsi_unsupported
},
11804 [TGSI_OPCODE_SAMPLE_B
] = { 0, tgsi_unsupported
},
11805 [TGSI_OPCODE_SAMPLE_C
] = { 0, tgsi_unsupported
},
11806 [TGSI_OPCODE_SAMPLE_C_LZ
] = { 0, tgsi_unsupported
},
11807 [TGSI_OPCODE_SAMPLE_D
] = { 0, tgsi_unsupported
},
11808 [TGSI_OPCODE_SAMPLE_L
] = { 0, tgsi_unsupported
},
11809 [TGSI_OPCODE_GATHER4
] = { 0, tgsi_unsupported
},
11810 [TGSI_OPCODE_SVIEWINFO
] = { 0, tgsi_unsupported
},
11811 [TGSI_OPCODE_SAMPLE_POS
] = { 0, tgsi_unsupported
},
11812 [TGSI_OPCODE_SAMPLE_INFO
] = { 0, tgsi_unsupported
},
11813 [TGSI_OPCODE_UARL
] = { ALU_OP1_MOVA_INT
, tgsi_r600_arl
},
11814 [TGSI_OPCODE_UCMP
] = { ALU_OP0_NOP
, tgsi_ucmp
},
11815 [TGSI_OPCODE_IABS
] = { 0, tgsi_iabs
},
11816 [TGSI_OPCODE_ISSG
] = { 0, tgsi_issg
},
11817 [TGSI_OPCODE_LOAD
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11818 [TGSI_OPCODE_STORE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11819 [163] = { ALU_OP0_NOP
, tgsi_unsupported
},
11820 [164] = { ALU_OP0_NOP
, tgsi_unsupported
},
11821 [165] = { ALU_OP0_NOP
, tgsi_unsupported
},
11822 [TGSI_OPCODE_BARRIER
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11823 [TGSI_OPCODE_ATOMUADD
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11824 [TGSI_OPCODE_ATOMXCHG
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11825 [TGSI_OPCODE_ATOMCAS
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11826 [TGSI_OPCODE_ATOMAND
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11827 [TGSI_OPCODE_ATOMOR
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11828 [TGSI_OPCODE_ATOMXOR
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11829 [TGSI_OPCODE_ATOMUMIN
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11830 [TGSI_OPCODE_ATOMUMAX
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11831 [TGSI_OPCODE_ATOMIMIN
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11832 [TGSI_OPCODE_ATOMIMAX
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11833 [TGSI_OPCODE_TEX2
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
11834 [TGSI_OPCODE_TXB2
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
11835 [TGSI_OPCODE_TXL2
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
11836 [TGSI_OPCODE_IMUL_HI
] = { ALU_OP2_MULHI_INT
, tgsi_op2_trans
},
11837 [TGSI_OPCODE_UMUL_HI
] = { ALU_OP2_MULHI_UINT
, tgsi_op2_trans
},
11838 [TGSI_OPCODE_TG4
] = { FETCH_OP_GATHER4
, tgsi_unsupported
},
11839 [TGSI_OPCODE_LODQ
] = { FETCH_OP_GET_LOD
, tgsi_unsupported
},
11840 [TGSI_OPCODE_IBFE
] = { ALU_OP3_BFE_INT
, tgsi_unsupported
},
11841 [TGSI_OPCODE_UBFE
] = { ALU_OP3_BFE_UINT
, tgsi_unsupported
},
11842 [TGSI_OPCODE_BFI
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11843 [TGSI_OPCODE_BREV
] = { ALU_OP1_BFREV_INT
, tgsi_unsupported
},
11844 [TGSI_OPCODE_POPC
] = { ALU_OP1_BCNT_INT
, tgsi_unsupported
},
11845 [TGSI_OPCODE_LSB
] = { ALU_OP1_FFBL_INT
, tgsi_unsupported
},
11846 [TGSI_OPCODE_IMSB
] = { ALU_OP1_FFBH_INT
, tgsi_unsupported
},
11847 [TGSI_OPCODE_UMSB
] = { ALU_OP1_FFBH_UINT
, tgsi_unsupported
},
11848 [TGSI_OPCODE_INTERP_CENTROID
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11849 [TGSI_OPCODE_INTERP_SAMPLE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11850 [TGSI_OPCODE_INTERP_OFFSET
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11851 [TGSI_OPCODE_LAST
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11854 static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction
[] = {
11855 [TGSI_OPCODE_ARL
] = { ALU_OP0_NOP
, tgsi_eg_arl
},
11856 [TGSI_OPCODE_MOV
] = { ALU_OP1_MOV
, tgsi_op2
},
11857 [TGSI_OPCODE_LIT
] = { ALU_OP0_NOP
, tgsi_lit
},
11858 [TGSI_OPCODE_RCP
] = { ALU_OP1_RECIP_IEEE
, tgsi_trans_srcx_replicate
},
11859 [TGSI_OPCODE_RSQ
] = { ALU_OP0_NOP
, tgsi_rsq
},
11860 [TGSI_OPCODE_EXP
] = { ALU_OP0_NOP
, tgsi_exp
},
11861 [TGSI_OPCODE_LOG
] = { ALU_OP0_NOP
, tgsi_log
},
11862 [TGSI_OPCODE_MUL
] = { ALU_OP2_MUL_IEEE
, tgsi_op2
},
11863 [TGSI_OPCODE_ADD
] = { ALU_OP2_ADD
, tgsi_op2
},
11864 [TGSI_OPCODE_DP3
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11865 [TGSI_OPCODE_DP4
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11866 [TGSI_OPCODE_DST
] = { ALU_OP0_NOP
, tgsi_opdst
},
11867 [TGSI_OPCODE_MIN
] = { ALU_OP2_MIN_DX10
, tgsi_op2
},
11868 [TGSI_OPCODE_MAX
] = { ALU_OP2_MAX_DX10
, tgsi_op2
},
11869 [TGSI_OPCODE_SLT
] = { ALU_OP2_SETGT
, tgsi_op2_swap
},
11870 [TGSI_OPCODE_SGE
] = { ALU_OP2_SETGE
, tgsi_op2
},
11871 [TGSI_OPCODE_MAD
] = { ALU_OP3_MULADD_IEEE
, tgsi_op3
},
11872 [TGSI_OPCODE_LRP
] = { ALU_OP0_NOP
, tgsi_lrp
},
11873 [TGSI_OPCODE_FMA
] = { ALU_OP3_FMA
, tgsi_op3
},
11874 [TGSI_OPCODE_SQRT
] = { ALU_OP1_SQRT_IEEE
, tgsi_trans_srcx_replicate
},
11875 [21] = { ALU_OP0_NOP
, tgsi_unsupported
},
11876 [22] = { ALU_OP0_NOP
, tgsi_unsupported
},
11877 [23] = { ALU_OP0_NOP
, tgsi_unsupported
},
11878 [TGSI_OPCODE_FRC
] = { ALU_OP1_FRACT
, tgsi_op2
},
11879 [25] = { ALU_OP0_NOP
, tgsi_unsupported
},
11880 [TGSI_OPCODE_FLR
] = { ALU_OP1_FLOOR
, tgsi_op2
},
11881 [TGSI_OPCODE_ROUND
] = { ALU_OP1_RNDNE
, tgsi_op2
},
11882 [TGSI_OPCODE_EX2
] = { ALU_OP1_EXP_IEEE
, tgsi_trans_srcx_replicate
},
11883 [TGSI_OPCODE_LG2
] = { ALU_OP1_LOG_IEEE
, tgsi_trans_srcx_replicate
},
11884 [TGSI_OPCODE_POW
] = { ALU_OP0_NOP
, tgsi_pow
},
11885 [31] = { ALU_OP0_NOP
, tgsi_unsupported
},
11886 [32] = { ALU_OP0_NOP
, tgsi_unsupported
},
11887 [TGSI_OPCODE_CLOCK
] = { ALU_OP0_NOP
, tgsi_clock
},
11888 [34] = { ALU_OP0_NOP
, tgsi_unsupported
},
11889 [35] = { ALU_OP0_NOP
, tgsi_unsupported
},
11890 [TGSI_OPCODE_COS
] = { ALU_OP1_COS
, tgsi_trig
},
11891 [TGSI_OPCODE_DDX
] = { FETCH_OP_GET_GRADIENTS_H
, tgsi_tex
},
11892 [TGSI_OPCODE_DDY
] = { FETCH_OP_GET_GRADIENTS_V
, tgsi_tex
},
11893 [TGSI_OPCODE_KILL
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* unconditional kill */
11894 [TGSI_OPCODE_PK2H
] = { ALU_OP0_NOP
, tgsi_pk2h
},
11895 [TGSI_OPCODE_PK2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11896 [TGSI_OPCODE_PK4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11897 [TGSI_OPCODE_PK4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11898 [44] = { ALU_OP0_NOP
, tgsi_unsupported
},
11899 [TGSI_OPCODE_SEQ
] = { ALU_OP2_SETE
, tgsi_op2
},
11900 [46] = { ALU_OP0_NOP
, tgsi_unsupported
},
11901 [TGSI_OPCODE_SGT
] = { ALU_OP2_SETGT
, tgsi_op2
},
11902 [TGSI_OPCODE_SIN
] = { ALU_OP1_SIN
, tgsi_trig
},
11903 [TGSI_OPCODE_SLE
] = { ALU_OP2_SETGE
, tgsi_op2_swap
},
11904 [TGSI_OPCODE_SNE
] = { ALU_OP2_SETNE
, tgsi_op2
},
11905 [51] = { ALU_OP0_NOP
, tgsi_unsupported
},
11906 [TGSI_OPCODE_TEX
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
11907 [TGSI_OPCODE_TXD
] = { FETCH_OP_SAMPLE_G
, tgsi_tex
},
11908 [TGSI_OPCODE_TXP
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
11909 [TGSI_OPCODE_UP2H
] = { ALU_OP0_NOP
, tgsi_up2h
},
11910 [TGSI_OPCODE_UP2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11911 [TGSI_OPCODE_UP4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11912 [TGSI_OPCODE_UP4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11913 [59] = { ALU_OP0_NOP
, tgsi_unsupported
},
11914 [60] = { ALU_OP0_NOP
, tgsi_unsupported
},
11915 [TGSI_OPCODE_ARR
] = { ALU_OP0_NOP
, tgsi_eg_arl
},
11916 [62] = { ALU_OP0_NOP
, tgsi_unsupported
},
11917 [TGSI_OPCODE_CAL
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11918 [TGSI_OPCODE_RET
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11919 [TGSI_OPCODE_SSG
] = { ALU_OP0_NOP
, tgsi_ssg
},
11920 [TGSI_OPCODE_CMP
] = { ALU_OP0_NOP
, tgsi_cmp
},
11921 [67] = { ALU_OP0_NOP
, tgsi_unsupported
},
11922 [TGSI_OPCODE_TXB
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
11923 [69] = { ALU_OP0_NOP
, tgsi_unsupported
},
11924 [TGSI_OPCODE_DIV
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11925 [TGSI_OPCODE_DP2
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
11926 [TGSI_OPCODE_TXL
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
11927 [TGSI_OPCODE_BRK
] = { CF_OP_LOOP_BREAK
, tgsi_loop_brk_cont
},
11928 [TGSI_OPCODE_IF
] = { ALU_OP0_NOP
, tgsi_if
},
11929 [TGSI_OPCODE_UIF
] = { ALU_OP0_NOP
, tgsi_uif
},
11930 [76] = { ALU_OP0_NOP
, tgsi_unsupported
},
11931 [TGSI_OPCODE_ELSE
] = { ALU_OP0_NOP
, tgsi_else
},
11932 [TGSI_OPCODE_ENDIF
] = { ALU_OP0_NOP
, tgsi_endif
},
11933 [TGSI_OPCODE_DDX_FINE
] = { FETCH_OP_GET_GRADIENTS_H
, tgsi_tex
},
11934 [TGSI_OPCODE_DDY_FINE
] = { FETCH_OP_GET_GRADIENTS_V
, tgsi_tex
},
11935 [82] = { ALU_OP0_NOP
, tgsi_unsupported
},
11936 [TGSI_OPCODE_CEIL
] = { ALU_OP1_CEIL
, tgsi_op2
},
11937 [TGSI_OPCODE_I2F
] = { ALU_OP1_INT_TO_FLT
, tgsi_op2_trans
},
11938 [TGSI_OPCODE_NOT
] = { ALU_OP1_NOT_INT
, tgsi_op2
},
11939 [TGSI_OPCODE_TRUNC
] = { ALU_OP1_TRUNC
, tgsi_op2
},
11940 [TGSI_OPCODE_SHL
] = { ALU_OP2_LSHL_INT
, tgsi_op2
},
11941 [88] = { ALU_OP0_NOP
, tgsi_unsupported
},
11942 [TGSI_OPCODE_AND
] = { ALU_OP2_AND_INT
, tgsi_op2
},
11943 [TGSI_OPCODE_OR
] = { ALU_OP2_OR_INT
, tgsi_op2
},
11944 [TGSI_OPCODE_MOD
] = { ALU_OP0_NOP
, tgsi_imod
},
11945 [TGSI_OPCODE_XOR
] = { ALU_OP2_XOR_INT
, tgsi_op2
},
11946 [93] = { ALU_OP0_NOP
, tgsi_unsupported
},
11947 [TGSI_OPCODE_TXF
] = { FETCH_OP_LD
, tgsi_tex
},
11948 [TGSI_OPCODE_TXQ
] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
11949 [TGSI_OPCODE_CONT
] = { CF_OP_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
11950 [TGSI_OPCODE_EMIT
] = { CF_OP_EMIT_VERTEX
, tgsi_gs_emit
},
11951 [TGSI_OPCODE_ENDPRIM
] = { CF_OP_CUT_VERTEX
, tgsi_gs_emit
},
11952 [TGSI_OPCODE_BGNLOOP
] = { ALU_OP0_NOP
, tgsi_bgnloop
},
11953 [TGSI_OPCODE_BGNSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11954 [TGSI_OPCODE_ENDLOOP
] = { ALU_OP0_NOP
, tgsi_endloop
},
11955 [TGSI_OPCODE_ENDSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11956 [103] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
11957 [TGSI_OPCODE_TXQS
] = { FETCH_OP_GET_NUMBER_OF_SAMPLES
, tgsi_tex
},
11958 [TGSI_OPCODE_RESQ
] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_resq
},
11959 [106] = { ALU_OP0_NOP
, tgsi_unsupported
},
11960 [TGSI_OPCODE_NOP
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11961 [TGSI_OPCODE_FSEQ
] = { ALU_OP2_SETE_DX10
, tgsi_op2
},
11962 [TGSI_OPCODE_FSGE
] = { ALU_OP2_SETGE_DX10
, tgsi_op2
},
11963 [TGSI_OPCODE_FSLT
] = { ALU_OP2_SETGT_DX10
, tgsi_op2_swap
},
11964 [TGSI_OPCODE_FSNE
] = { ALU_OP2_SETNE_DX10
, tgsi_op2_swap
},
11965 [TGSI_OPCODE_MEMBAR
] = { ALU_OP0_GROUP_BARRIER
, tgsi_barrier
},
11966 [113] = { ALU_OP0_NOP
, tgsi_unsupported
},
11967 [114] = { ALU_OP0_NOP
, tgsi_unsupported
},
11968 [115] = { ALU_OP0_NOP
, tgsi_unsupported
},
11969 [TGSI_OPCODE_KILL_IF
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* conditional kill */
11970 [TGSI_OPCODE_END
] = { ALU_OP0_NOP
, tgsi_end
}, /* aka HALT */
11971 /* Refer below for TGSI_OPCODE_DFMA */
11972 [TGSI_OPCODE_F2I
] = { ALU_OP1_FLT_TO_INT
, tgsi_f2i
},
11973 [TGSI_OPCODE_IDIV
] = { ALU_OP0_NOP
, tgsi_idiv
},
11974 [TGSI_OPCODE_IMAX
] = { ALU_OP2_MAX_INT
, tgsi_op2
},
11975 [TGSI_OPCODE_IMIN
] = { ALU_OP2_MIN_INT
, tgsi_op2
},
11976 [TGSI_OPCODE_INEG
] = { ALU_OP2_SUB_INT
, tgsi_ineg
},
11977 [TGSI_OPCODE_ISGE
] = { ALU_OP2_SETGE_INT
, tgsi_op2
},
11978 [TGSI_OPCODE_ISHR
] = { ALU_OP2_ASHR_INT
, tgsi_op2
},
11979 [TGSI_OPCODE_ISLT
] = { ALU_OP2_SETGT_INT
, tgsi_op2_swap
},
11980 [TGSI_OPCODE_F2U
] = { ALU_OP1_FLT_TO_UINT
, tgsi_f2i
},
11981 [TGSI_OPCODE_U2F
] = { ALU_OP1_UINT_TO_FLT
, tgsi_op2_trans
},
11982 [TGSI_OPCODE_UADD
] = { ALU_OP2_ADD_INT
, tgsi_op2
},
11983 [TGSI_OPCODE_UDIV
] = { ALU_OP0_NOP
, tgsi_udiv
},
11984 [TGSI_OPCODE_UMAD
] = { ALU_OP0_NOP
, tgsi_umad
},
11985 [TGSI_OPCODE_UMAX
] = { ALU_OP2_MAX_UINT
, tgsi_op2
},
11986 [TGSI_OPCODE_UMIN
] = { ALU_OP2_MIN_UINT
, tgsi_op2
},
11987 [TGSI_OPCODE_UMOD
] = { ALU_OP0_NOP
, tgsi_umod
},
11988 [TGSI_OPCODE_UMUL
] = { ALU_OP2_MULLO_UINT
, tgsi_op2_trans
},
11989 [TGSI_OPCODE_USEQ
] = { ALU_OP2_SETE_INT
, tgsi_op2
},
11990 [TGSI_OPCODE_USGE
] = { ALU_OP2_SETGE_UINT
, tgsi_op2
},
11991 [TGSI_OPCODE_USHR
] = { ALU_OP2_LSHR_INT
, tgsi_op2
},
11992 [TGSI_OPCODE_USLT
] = { ALU_OP2_SETGT_UINT
, tgsi_op2_swap
},
11993 [TGSI_OPCODE_USNE
] = { ALU_OP2_SETNE_INT
, tgsi_op2
},
11994 [TGSI_OPCODE_SWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11995 [TGSI_OPCODE_CASE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11996 [TGSI_OPCODE_DEFAULT
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11997 [TGSI_OPCODE_ENDSWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
11998 [TGSI_OPCODE_SAMPLE
] = { 0, tgsi_unsupported
},
11999 [TGSI_OPCODE_SAMPLE_I
] = { 0, tgsi_unsupported
},
12000 [TGSI_OPCODE_SAMPLE_I_MS
] = { 0, tgsi_unsupported
},
12001 [TGSI_OPCODE_SAMPLE_B
] = { 0, tgsi_unsupported
},
12002 [TGSI_OPCODE_SAMPLE_C
] = { 0, tgsi_unsupported
},
12003 [TGSI_OPCODE_SAMPLE_C_LZ
] = { 0, tgsi_unsupported
},
12004 [TGSI_OPCODE_SAMPLE_D
] = { 0, tgsi_unsupported
},
12005 [TGSI_OPCODE_SAMPLE_L
] = { 0, tgsi_unsupported
},
12006 [TGSI_OPCODE_GATHER4
] = { 0, tgsi_unsupported
},
12007 [TGSI_OPCODE_SVIEWINFO
] = { 0, tgsi_unsupported
},
12008 [TGSI_OPCODE_SAMPLE_POS
] = { 0, tgsi_unsupported
},
12009 [TGSI_OPCODE_SAMPLE_INFO
] = { 0, tgsi_unsupported
},
12010 [TGSI_OPCODE_UARL
] = { ALU_OP1_MOVA_INT
, tgsi_eg_arl
},
12011 [TGSI_OPCODE_UCMP
] = { ALU_OP0_NOP
, tgsi_ucmp
},
12012 [TGSI_OPCODE_IABS
] = { 0, tgsi_iabs
},
12013 [TGSI_OPCODE_ISSG
] = { 0, tgsi_issg
},
12014 [TGSI_OPCODE_LOAD
] = { ALU_OP0_NOP
, tgsi_load
},
12015 [TGSI_OPCODE_STORE
] = { ALU_OP0_NOP
, tgsi_store
},
12016 [163] = { ALU_OP0_NOP
, tgsi_unsupported
},
12017 [164] = { ALU_OP0_NOP
, tgsi_unsupported
},
12018 [165] = { ALU_OP0_NOP
, tgsi_unsupported
},
12019 [TGSI_OPCODE_BARRIER
] = { ALU_OP0_GROUP_BARRIER
, tgsi_barrier
},
12020 [TGSI_OPCODE_ATOMUADD
] = { V_RAT_INST_ADD_RTN
, tgsi_atomic_op
},
12021 [TGSI_OPCODE_ATOMXCHG
] = { V_RAT_INST_XCHG_RTN
, tgsi_atomic_op
},
12022 [TGSI_OPCODE_ATOMCAS
] = { V_RAT_INST_CMPXCHG_INT_RTN
, tgsi_atomic_op
},
12023 [TGSI_OPCODE_ATOMAND
] = { V_RAT_INST_AND_RTN
, tgsi_atomic_op
},
12024 [TGSI_OPCODE_ATOMOR
] = { V_RAT_INST_OR_RTN
, tgsi_atomic_op
},
12025 [TGSI_OPCODE_ATOMXOR
] = { V_RAT_INST_XOR_RTN
, tgsi_atomic_op
},
12026 [TGSI_OPCODE_ATOMUMIN
] = { V_RAT_INST_MIN_UINT_RTN
, tgsi_atomic_op
},
12027 [TGSI_OPCODE_ATOMUMAX
] = { V_RAT_INST_MAX_UINT_RTN
, tgsi_atomic_op
},
12028 [TGSI_OPCODE_ATOMIMIN
] = { V_RAT_INST_MIN_INT_RTN
, tgsi_atomic_op
},
12029 [TGSI_OPCODE_ATOMIMAX
] = { V_RAT_INST_MAX_INT_RTN
, tgsi_atomic_op
},
12030 [TGSI_OPCODE_TEX2
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
12031 [TGSI_OPCODE_TXB2
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
12032 [TGSI_OPCODE_TXL2
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
12033 [TGSI_OPCODE_IMUL_HI
] = { ALU_OP2_MULHI_INT
, tgsi_op2_trans
},
12034 [TGSI_OPCODE_UMUL_HI
] = { ALU_OP2_MULHI_UINT
, tgsi_op2_trans
},
12035 [TGSI_OPCODE_TG4
] = { FETCH_OP_GATHER4
, tgsi_tex
},
12036 [TGSI_OPCODE_LODQ
] = { FETCH_OP_GET_LOD
, tgsi_tex
},
12037 [TGSI_OPCODE_IBFE
] = { ALU_OP3_BFE_INT
, tgsi_bfe
},
12038 [TGSI_OPCODE_UBFE
] = { ALU_OP3_BFE_UINT
, tgsi_bfe
},
12039 [TGSI_OPCODE_BFI
] = { ALU_OP0_NOP
, tgsi_bfi
},
12040 [TGSI_OPCODE_BREV
] = { ALU_OP1_BFREV_INT
, tgsi_op2
},
12041 [TGSI_OPCODE_POPC
] = { ALU_OP1_BCNT_INT
, tgsi_op2
},
12042 [TGSI_OPCODE_LSB
] = { ALU_OP1_FFBL_INT
, tgsi_op2
},
12043 [TGSI_OPCODE_IMSB
] = { ALU_OP1_FFBH_INT
, tgsi_msb
},
12044 [TGSI_OPCODE_UMSB
] = { ALU_OP1_FFBH_UINT
, tgsi_msb
},
12045 [TGSI_OPCODE_INTERP_CENTROID
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12046 [TGSI_OPCODE_INTERP_SAMPLE
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12047 [TGSI_OPCODE_INTERP_OFFSET
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12048 [TGSI_OPCODE_F2D
] = { ALU_OP1_FLT32_TO_FLT64
, tgsi_op2_64
},
12049 [TGSI_OPCODE_D2F
] = { ALU_OP1_FLT64_TO_FLT32
, tgsi_op2_64_single_dest
},
12050 [TGSI_OPCODE_DABS
] = { ALU_OP1_MOV
, tgsi_op2_64
},
12051 [TGSI_OPCODE_DNEG
] = { ALU_OP2_ADD_64
, tgsi_dneg
},
12052 [TGSI_OPCODE_DADD
] = { ALU_OP2_ADD_64
, tgsi_op2_64
},
12053 [TGSI_OPCODE_DMUL
] = { ALU_OP2_MUL_64
, cayman_mul_double_instr
},
12054 [TGSI_OPCODE_DDIV
] = { 0, cayman_ddiv_instr
},
12055 [TGSI_OPCODE_DMAX
] = { ALU_OP2_MAX_64
, tgsi_op2_64
},
12056 [TGSI_OPCODE_DMIN
] = { ALU_OP2_MIN_64
, tgsi_op2_64
},
12057 [TGSI_OPCODE_DSLT
] = { ALU_OP2_SETGT_64
, tgsi_op2_64_single_dest_s
},
12058 [TGSI_OPCODE_DSGE
] = { ALU_OP2_SETGE_64
, tgsi_op2_64_single_dest
},
12059 [TGSI_OPCODE_DSEQ
] = { ALU_OP2_SETE_64
, tgsi_op2_64_single_dest
},
12060 [TGSI_OPCODE_DSNE
] = { ALU_OP2_SETNE_64
, tgsi_op2_64_single_dest
},
12061 [TGSI_OPCODE_DRCP
] = { ALU_OP2_RECIP_64
, cayman_emit_double_instr
},
12062 [TGSI_OPCODE_DSQRT
] = { ALU_OP2_SQRT_64
, cayman_emit_double_instr
},
12063 [TGSI_OPCODE_DMAD
] = { ALU_OP3_FMA_64
, tgsi_op3_64
},
12064 [TGSI_OPCODE_DFMA
] = { ALU_OP3_FMA_64
, tgsi_op3_64
},
12065 [TGSI_OPCODE_DFRAC
] = { ALU_OP1_FRACT_64
, tgsi_op2_64
},
12066 [TGSI_OPCODE_DLDEXP
] = { ALU_OP2_LDEXP_64
, tgsi_op2_64
},
12067 [TGSI_OPCODE_DFRACEXP
] = { ALU_OP1_FREXP_64
, tgsi_dfracexp
},
12068 [TGSI_OPCODE_D2I
] = { ALU_OP1_FLT_TO_INT
, egcm_double_to_int
},
12069 [TGSI_OPCODE_I2D
] = { ALU_OP1_INT_TO_FLT
, egcm_int_to_double
},
12070 [TGSI_OPCODE_D2U
] = { ALU_OP1_FLT_TO_UINT
, egcm_double_to_int
},
12071 [TGSI_OPCODE_U2D
] = { ALU_OP1_UINT_TO_FLT
, egcm_int_to_double
},
12072 [TGSI_OPCODE_DRSQ
] = { ALU_OP2_RECIPSQRT_64
, cayman_emit_double_instr
},
12073 [TGSI_OPCODE_U64SNE
] = { ALU_OP0_NOP
, egcm_u64sne
},
12074 [TGSI_OPCODE_U64ADD
] = { ALU_OP0_NOP
, egcm_u64add
},
12075 [TGSI_OPCODE_U64MUL
] = { ALU_OP0_NOP
, egcm_u64mul
},
12076 [TGSI_OPCODE_U64DIV
] = { ALU_OP0_NOP
, egcm_u64div
},
12077 [TGSI_OPCODE_LAST
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12080 static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction
[] = {
12081 [TGSI_OPCODE_ARL
] = { ALU_OP0_NOP
, tgsi_eg_arl
},
12082 [TGSI_OPCODE_MOV
] = { ALU_OP1_MOV
, tgsi_op2
},
12083 [TGSI_OPCODE_LIT
] = { ALU_OP0_NOP
, tgsi_lit
},
12084 [TGSI_OPCODE_RCP
] = { ALU_OP1_RECIP_IEEE
, cayman_emit_float_instr
},
12085 [TGSI_OPCODE_RSQ
] = { ALU_OP1_RECIPSQRT_IEEE
, cayman_emit_float_instr
},
12086 [TGSI_OPCODE_EXP
] = { ALU_OP0_NOP
, tgsi_exp
},
12087 [TGSI_OPCODE_LOG
] = { ALU_OP0_NOP
, tgsi_log
},
12088 [TGSI_OPCODE_MUL
] = { ALU_OP2_MUL_IEEE
, tgsi_op2
},
12089 [TGSI_OPCODE_ADD
] = { ALU_OP2_ADD
, tgsi_op2
},
12090 [TGSI_OPCODE_DP3
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
12091 [TGSI_OPCODE_DP4
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
12092 [TGSI_OPCODE_DST
] = { ALU_OP0_NOP
, tgsi_opdst
},
12093 [TGSI_OPCODE_MIN
] = { ALU_OP2_MIN_DX10
, tgsi_op2
},
12094 [TGSI_OPCODE_MAX
] = { ALU_OP2_MAX_DX10
, tgsi_op2
},
12095 [TGSI_OPCODE_SLT
] = { ALU_OP2_SETGT
, tgsi_op2_swap
},
12096 [TGSI_OPCODE_SGE
] = { ALU_OP2_SETGE
, tgsi_op2
},
12097 [TGSI_OPCODE_MAD
] = { ALU_OP3_MULADD_IEEE
, tgsi_op3
},
12098 [TGSI_OPCODE_LRP
] = { ALU_OP0_NOP
, tgsi_lrp
},
12099 [TGSI_OPCODE_FMA
] = { ALU_OP3_FMA
, tgsi_op3
},
12100 [TGSI_OPCODE_SQRT
] = { ALU_OP1_SQRT_IEEE
, cayman_emit_float_instr
},
12101 [21] = { ALU_OP0_NOP
, tgsi_unsupported
},
12102 [22] = { ALU_OP0_NOP
, tgsi_unsupported
},
12103 [23] = { ALU_OP0_NOP
, tgsi_unsupported
},
12104 [TGSI_OPCODE_FRC
] = { ALU_OP1_FRACT
, tgsi_op2
},
12105 [25] = { ALU_OP0_NOP
, tgsi_unsupported
},
12106 [TGSI_OPCODE_FLR
] = { ALU_OP1_FLOOR
, tgsi_op2
},
12107 [TGSI_OPCODE_ROUND
] = { ALU_OP1_RNDNE
, tgsi_op2
},
12108 [TGSI_OPCODE_EX2
] = { ALU_OP1_EXP_IEEE
, cayman_emit_float_instr
},
12109 [TGSI_OPCODE_LG2
] = { ALU_OP1_LOG_IEEE
, cayman_emit_float_instr
},
12110 [TGSI_OPCODE_POW
] = { ALU_OP0_NOP
, cayman_pow
},
12111 [31] = { ALU_OP0_NOP
, tgsi_unsupported
},
12112 [32] = { ALU_OP0_NOP
, tgsi_unsupported
},
12113 [TGSI_OPCODE_CLOCK
] = { ALU_OP0_NOP
, tgsi_clock
},
12114 [34] = { ALU_OP0_NOP
, tgsi_unsupported
},
12115 [35] = { ALU_OP0_NOP
, tgsi_unsupported
},
12116 [TGSI_OPCODE_COS
] = { ALU_OP1_COS
, cayman_trig
},
12117 [TGSI_OPCODE_DDX
] = { FETCH_OP_GET_GRADIENTS_H
, tgsi_tex
},
12118 [TGSI_OPCODE_DDY
] = { FETCH_OP_GET_GRADIENTS_V
, tgsi_tex
},
12119 [TGSI_OPCODE_KILL
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* unconditional kill */
12120 [TGSI_OPCODE_PK2H
] = { ALU_OP0_NOP
, tgsi_pk2h
},
12121 [TGSI_OPCODE_PK2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12122 [TGSI_OPCODE_PK4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12123 [TGSI_OPCODE_PK4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12124 [44] = { ALU_OP0_NOP
, tgsi_unsupported
},
12125 [TGSI_OPCODE_SEQ
] = { ALU_OP2_SETE
, tgsi_op2
},
12126 [46] = { ALU_OP0_NOP
, tgsi_unsupported
},
12127 [TGSI_OPCODE_SGT
] = { ALU_OP2_SETGT
, tgsi_op2
},
12128 [TGSI_OPCODE_SIN
] = { ALU_OP1_SIN
, cayman_trig
},
12129 [TGSI_OPCODE_SLE
] = { ALU_OP2_SETGE
, tgsi_op2_swap
},
12130 [TGSI_OPCODE_SNE
] = { ALU_OP2_SETNE
, tgsi_op2
},
12131 [51] = { ALU_OP0_NOP
, tgsi_unsupported
},
12132 [TGSI_OPCODE_TEX
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
12133 [TGSI_OPCODE_TXD
] = { FETCH_OP_SAMPLE_G
, tgsi_tex
},
12134 [TGSI_OPCODE_TXP
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
12135 [TGSI_OPCODE_UP2H
] = { ALU_OP0_NOP
, tgsi_up2h
},
12136 [TGSI_OPCODE_UP2US
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12137 [TGSI_OPCODE_UP4B
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12138 [TGSI_OPCODE_UP4UB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12139 [59] = { ALU_OP0_NOP
, tgsi_unsupported
},
12140 [60] = { ALU_OP0_NOP
, tgsi_unsupported
},
12141 [TGSI_OPCODE_ARR
] = { ALU_OP0_NOP
, tgsi_eg_arl
},
12142 [62] = { ALU_OP0_NOP
, tgsi_unsupported
},
12143 [TGSI_OPCODE_CAL
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12144 [TGSI_OPCODE_RET
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12145 [TGSI_OPCODE_SSG
] = { ALU_OP0_NOP
, tgsi_ssg
},
12146 [TGSI_OPCODE_CMP
] = { ALU_OP0_NOP
, tgsi_cmp
},
12147 [67] = { ALU_OP0_NOP
, tgsi_unsupported
},
12148 [TGSI_OPCODE_TXB
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
12149 [69] = { ALU_OP0_NOP
, tgsi_unsupported
},
12150 [TGSI_OPCODE_DIV
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12151 [TGSI_OPCODE_DP2
] = { ALU_OP2_DOT4_IEEE
, tgsi_dp
},
12152 [TGSI_OPCODE_TXL
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
12153 [TGSI_OPCODE_BRK
] = { CF_OP_LOOP_BREAK
, tgsi_loop_brk_cont
},
12154 [TGSI_OPCODE_IF
] = { ALU_OP0_NOP
, tgsi_if
},
12155 [TGSI_OPCODE_UIF
] = { ALU_OP0_NOP
, tgsi_uif
},
12156 [76] = { ALU_OP0_NOP
, tgsi_unsupported
},
12157 [TGSI_OPCODE_ELSE
] = { ALU_OP0_NOP
, tgsi_else
},
12158 [TGSI_OPCODE_ENDIF
] = { ALU_OP0_NOP
, tgsi_endif
},
12159 [TGSI_OPCODE_DDX_FINE
] = { FETCH_OP_GET_GRADIENTS_H
, tgsi_tex
},
12160 [TGSI_OPCODE_DDY_FINE
] = { FETCH_OP_GET_GRADIENTS_V
, tgsi_tex
},
12161 [82] = { ALU_OP0_NOP
, tgsi_unsupported
},
12162 [TGSI_OPCODE_CEIL
] = { ALU_OP1_CEIL
, tgsi_op2
},
12163 [TGSI_OPCODE_I2F
] = { ALU_OP1_INT_TO_FLT
, tgsi_op2
},
12164 [TGSI_OPCODE_NOT
] = { ALU_OP1_NOT_INT
, tgsi_op2
},
12165 [TGSI_OPCODE_TRUNC
] = { ALU_OP1_TRUNC
, tgsi_op2
},
12166 [TGSI_OPCODE_SHL
] = { ALU_OP2_LSHL_INT
, tgsi_op2
},
12167 [88] = { ALU_OP0_NOP
, tgsi_unsupported
},
12168 [TGSI_OPCODE_AND
] = { ALU_OP2_AND_INT
, tgsi_op2
},
12169 [TGSI_OPCODE_OR
] = { ALU_OP2_OR_INT
, tgsi_op2
},
12170 [TGSI_OPCODE_MOD
] = { ALU_OP0_NOP
, tgsi_imod
},
12171 [TGSI_OPCODE_XOR
] = { ALU_OP2_XOR_INT
, tgsi_op2
},
12172 [93] = { ALU_OP0_NOP
, tgsi_unsupported
},
12173 [TGSI_OPCODE_TXF
] = { FETCH_OP_LD
, tgsi_tex
},
12174 [TGSI_OPCODE_TXQ
] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
12175 [TGSI_OPCODE_CONT
] = { CF_OP_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
12176 [TGSI_OPCODE_EMIT
] = { CF_OP_EMIT_VERTEX
, tgsi_gs_emit
},
12177 [TGSI_OPCODE_ENDPRIM
] = { CF_OP_CUT_VERTEX
, tgsi_gs_emit
},
12178 [TGSI_OPCODE_BGNLOOP
] = { ALU_OP0_NOP
, tgsi_bgnloop
},
12179 [TGSI_OPCODE_BGNSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12180 [TGSI_OPCODE_ENDLOOP
] = { ALU_OP0_NOP
, tgsi_endloop
},
12181 [TGSI_OPCODE_ENDSUB
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12182 [103] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_tex
},
12183 [TGSI_OPCODE_TXQS
] = { FETCH_OP_GET_NUMBER_OF_SAMPLES
, tgsi_tex
},
12184 [TGSI_OPCODE_RESQ
] = { FETCH_OP_GET_TEXTURE_RESINFO
, tgsi_resq
},
12185 [106] = { ALU_OP0_NOP
, tgsi_unsupported
},
12186 [TGSI_OPCODE_NOP
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12187 [TGSI_OPCODE_FSEQ
] = { ALU_OP2_SETE_DX10
, tgsi_op2
},
12188 [TGSI_OPCODE_FSGE
] = { ALU_OP2_SETGE_DX10
, tgsi_op2
},
12189 [TGSI_OPCODE_FSLT
] = { ALU_OP2_SETGT_DX10
, tgsi_op2_swap
},
12190 [TGSI_OPCODE_FSNE
] = { ALU_OP2_SETNE_DX10
, tgsi_op2_swap
},
12191 [TGSI_OPCODE_MEMBAR
] = { ALU_OP0_GROUP_BARRIER
, tgsi_barrier
},
12192 [113] = { ALU_OP0_NOP
, tgsi_unsupported
},
12193 [114] = { ALU_OP0_NOP
, tgsi_unsupported
},
12194 [115] = { ALU_OP0_NOP
, tgsi_unsupported
},
12195 [TGSI_OPCODE_KILL_IF
] = { ALU_OP2_KILLGT
, tgsi_kill
}, /* conditional kill */
12196 [TGSI_OPCODE_END
] = { ALU_OP0_NOP
, tgsi_end
}, /* aka HALT */
12197 /* Refer below for TGSI_OPCODE_DFMA */
12198 [TGSI_OPCODE_F2I
] = { ALU_OP1_FLT_TO_INT
, tgsi_op2
},
12199 [TGSI_OPCODE_IDIV
] = { ALU_OP0_NOP
, tgsi_idiv
},
12200 [TGSI_OPCODE_IMAX
] = { ALU_OP2_MAX_INT
, tgsi_op2
},
12201 [TGSI_OPCODE_IMIN
] = { ALU_OP2_MIN_INT
, tgsi_op2
},
12202 [TGSI_OPCODE_INEG
] = { ALU_OP2_SUB_INT
, tgsi_ineg
},
12203 [TGSI_OPCODE_ISGE
] = { ALU_OP2_SETGE_INT
, tgsi_op2
},
12204 [TGSI_OPCODE_ISHR
] = { ALU_OP2_ASHR_INT
, tgsi_op2
},
12205 [TGSI_OPCODE_ISLT
] = { ALU_OP2_SETGT_INT
, tgsi_op2_swap
},
12206 [TGSI_OPCODE_F2U
] = { ALU_OP1_FLT_TO_UINT
, tgsi_op2
},
12207 [TGSI_OPCODE_U2F
] = { ALU_OP1_UINT_TO_FLT
, tgsi_op2
},
12208 [TGSI_OPCODE_UADD
] = { ALU_OP2_ADD_INT
, tgsi_op2
},
12209 [TGSI_OPCODE_UDIV
] = { ALU_OP0_NOP
, tgsi_udiv
},
12210 [TGSI_OPCODE_UMAD
] = { ALU_OP0_NOP
, tgsi_umad
},
12211 [TGSI_OPCODE_UMAX
] = { ALU_OP2_MAX_UINT
, tgsi_op2
},
12212 [TGSI_OPCODE_UMIN
] = { ALU_OP2_MIN_UINT
, tgsi_op2
},
12213 [TGSI_OPCODE_UMOD
] = { ALU_OP0_NOP
, tgsi_umod
},
12214 [TGSI_OPCODE_UMUL
] = { ALU_OP2_MULLO_INT
, cayman_mul_int_instr
},
12215 [TGSI_OPCODE_USEQ
] = { ALU_OP2_SETE_INT
, tgsi_op2
},
12216 [TGSI_OPCODE_USGE
] = { ALU_OP2_SETGE_UINT
, tgsi_op2
},
12217 [TGSI_OPCODE_USHR
] = { ALU_OP2_LSHR_INT
, tgsi_op2
},
12218 [TGSI_OPCODE_USLT
] = { ALU_OP2_SETGT_UINT
, tgsi_op2_swap
},
12219 [TGSI_OPCODE_USNE
] = { ALU_OP2_SETNE_INT
, tgsi_op2
},
12220 [TGSI_OPCODE_SWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12221 [TGSI_OPCODE_CASE
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12222 [TGSI_OPCODE_DEFAULT
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12223 [TGSI_OPCODE_ENDSWITCH
] = { ALU_OP0_NOP
, tgsi_unsupported
},
12224 [TGSI_OPCODE_SAMPLE
] = { 0, tgsi_unsupported
},
12225 [TGSI_OPCODE_SAMPLE_I
] = { 0, tgsi_unsupported
},
12226 [TGSI_OPCODE_SAMPLE_I_MS
] = { 0, tgsi_unsupported
},
12227 [TGSI_OPCODE_SAMPLE_B
] = { 0, tgsi_unsupported
},
12228 [TGSI_OPCODE_SAMPLE_C
] = { 0, tgsi_unsupported
},
12229 [TGSI_OPCODE_SAMPLE_C_LZ
] = { 0, tgsi_unsupported
},
12230 [TGSI_OPCODE_SAMPLE_D
] = { 0, tgsi_unsupported
},
12231 [TGSI_OPCODE_SAMPLE_L
] = { 0, tgsi_unsupported
},
12232 [TGSI_OPCODE_GATHER4
] = { 0, tgsi_unsupported
},
12233 [TGSI_OPCODE_SVIEWINFO
] = { 0, tgsi_unsupported
},
12234 [TGSI_OPCODE_SAMPLE_POS
] = { 0, tgsi_unsupported
},
12235 [TGSI_OPCODE_SAMPLE_INFO
] = { 0, tgsi_unsupported
},
12236 [TGSI_OPCODE_UARL
] = { ALU_OP1_MOVA_INT
, tgsi_eg_arl
},
12237 [TGSI_OPCODE_UCMP
] = { ALU_OP0_NOP
, tgsi_ucmp
},
12238 [TGSI_OPCODE_IABS
] = { 0, tgsi_iabs
},
12239 [TGSI_OPCODE_ISSG
] = { 0, tgsi_issg
},
12240 [TGSI_OPCODE_LOAD
] = { ALU_OP0_NOP
, tgsi_load
},
12241 [TGSI_OPCODE_STORE
] = { ALU_OP0_NOP
, tgsi_store
},
12242 [163] = { ALU_OP0_NOP
, tgsi_unsupported
},
12243 [164] = { ALU_OP0_NOP
, tgsi_unsupported
},
12244 [165] = { ALU_OP0_NOP
, tgsi_unsupported
},
12245 [TGSI_OPCODE_BARRIER
] = { ALU_OP0_GROUP_BARRIER
, tgsi_barrier
},
12246 [TGSI_OPCODE_ATOMUADD
] = { V_RAT_INST_ADD_RTN
, tgsi_atomic_op
},
12247 [TGSI_OPCODE_ATOMXCHG
] = { V_RAT_INST_XCHG_RTN
, tgsi_atomic_op
},
12248 [TGSI_OPCODE_ATOMCAS
] = { V_RAT_INST_CMPXCHG_INT_RTN
, tgsi_atomic_op
},
12249 [TGSI_OPCODE_ATOMAND
] = { V_RAT_INST_AND_RTN
, tgsi_atomic_op
},
12250 [TGSI_OPCODE_ATOMOR
] = { V_RAT_INST_OR_RTN
, tgsi_atomic_op
},
12251 [TGSI_OPCODE_ATOMXOR
] = { V_RAT_INST_XOR_RTN
, tgsi_atomic_op
},
12252 [TGSI_OPCODE_ATOMUMIN
] = { V_RAT_INST_MIN_UINT_RTN
, tgsi_atomic_op
},
12253 [TGSI_OPCODE_ATOMUMAX
] = { V_RAT_INST_MAX_UINT_RTN
, tgsi_atomic_op
},
12254 [TGSI_OPCODE_ATOMIMIN
] = { V_RAT_INST_MIN_INT_RTN
, tgsi_atomic_op
},
12255 [TGSI_OPCODE_ATOMIMAX
] = { V_RAT_INST_MAX_INT_RTN
, tgsi_atomic_op
},
12256 [TGSI_OPCODE_TEX2
] = { FETCH_OP_SAMPLE
, tgsi_tex
},
12257 [TGSI_OPCODE_TXB2
] = { FETCH_OP_SAMPLE_LB
, tgsi_tex
},
12258 [TGSI_OPCODE_TXL2
] = { FETCH_OP_SAMPLE_L
, tgsi_tex
},
12259 [TGSI_OPCODE_IMUL_HI
] = { ALU_OP2_MULHI_INT
, cayman_mul_int_instr
},
12260 [TGSI_OPCODE_UMUL_HI
] = { ALU_OP2_MULHI_UINT
, cayman_mul_int_instr
},
12261 [TGSI_OPCODE_TG4
] = { FETCH_OP_GATHER4
, tgsi_tex
},
12262 [TGSI_OPCODE_LODQ
] = { FETCH_OP_GET_LOD
, tgsi_tex
},
12263 [TGSI_OPCODE_IBFE
] = { ALU_OP3_BFE_INT
, tgsi_bfe
},
12264 [TGSI_OPCODE_UBFE
] = { ALU_OP3_BFE_UINT
, tgsi_bfe
},
12265 [TGSI_OPCODE_BFI
] = { ALU_OP0_NOP
, tgsi_bfi
},
12266 [TGSI_OPCODE_BREV
] = { ALU_OP1_BFREV_INT
, tgsi_op2
},
12267 [TGSI_OPCODE_POPC
] = { ALU_OP1_BCNT_INT
, tgsi_op2
},
12268 [TGSI_OPCODE_LSB
] = { ALU_OP1_FFBL_INT
, tgsi_op2
},
12269 [TGSI_OPCODE_IMSB
] = { ALU_OP1_FFBH_INT
, tgsi_msb
},
12270 [TGSI_OPCODE_UMSB
] = { ALU_OP1_FFBH_UINT
, tgsi_msb
},
12271 [TGSI_OPCODE_INTERP_CENTROID
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12272 [TGSI_OPCODE_INTERP_SAMPLE
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12273 [TGSI_OPCODE_INTERP_OFFSET
] = { ALU_OP0_NOP
, tgsi_interp_egcm
},
12274 [TGSI_OPCODE_F2D
] = { ALU_OP1_FLT32_TO_FLT64
, tgsi_op2_64
},
12275 [TGSI_OPCODE_D2F
] = { ALU_OP1_FLT64_TO_FLT32
, tgsi_op2_64_single_dest
},
12276 [TGSI_OPCODE_DABS
] = { ALU_OP1_MOV
, tgsi_op2_64
},
12277 [TGSI_OPCODE_DNEG
] = { ALU_OP2_ADD_64
, tgsi_dneg
},
12278 [TGSI_OPCODE_DADD
] = { ALU_OP2_ADD_64
, tgsi_op2_64
},
12279 [TGSI_OPCODE_DMUL
] = { ALU_OP2_MUL_64
, cayman_mul_double_instr
},
12280 [TGSI_OPCODE_DDIV
] = { 0, cayman_ddiv_instr
},
12281 [TGSI_OPCODE_DMAX
] = { ALU_OP2_MAX_64
, tgsi_op2_64
},
12282 [TGSI_OPCODE_DMIN
] = { ALU_OP2_MIN_64
, tgsi_op2_64
},
12283 [TGSI_OPCODE_DSLT
] = { ALU_OP2_SETGT_64
, tgsi_op2_64_single_dest_s
},
12284 [TGSI_OPCODE_DSGE
] = { ALU_OP2_SETGE_64
, tgsi_op2_64_single_dest
},
12285 [TGSI_OPCODE_DSEQ
] = { ALU_OP2_SETE_64
, tgsi_op2_64_single_dest
},
12286 [TGSI_OPCODE_DSNE
] = { ALU_OP2_SETNE_64
, tgsi_op2_64_single_dest
},
12287 [TGSI_OPCODE_DRCP
] = { ALU_OP2_RECIP_64
, cayman_emit_double_instr
},
12288 [TGSI_OPCODE_DSQRT
] = { ALU_OP2_SQRT_64
, cayman_emit_double_instr
},
12289 [TGSI_OPCODE_DMAD
] = { ALU_OP3_FMA_64
, tgsi_op3_64
},
12290 [TGSI_OPCODE_DFMA
] = { ALU_OP3_FMA_64
, tgsi_op3_64
},
12291 [TGSI_OPCODE_DFRAC
] = { ALU_OP1_FRACT_64
, tgsi_op2_64
},
12292 [TGSI_OPCODE_DLDEXP
] = { ALU_OP2_LDEXP_64
, tgsi_op2_64
},
12293 [TGSI_OPCODE_DFRACEXP
] = { ALU_OP1_FREXP_64
, tgsi_dfracexp
},
12294 [TGSI_OPCODE_D2I
] = { ALU_OP1_FLT_TO_INT
, egcm_double_to_int
},
12295 [TGSI_OPCODE_I2D
] = { ALU_OP1_INT_TO_FLT
, egcm_int_to_double
},
12296 [TGSI_OPCODE_D2U
] = { ALU_OP1_FLT_TO_UINT
, egcm_double_to_int
},
12297 [TGSI_OPCODE_U2D
] = { ALU_OP1_UINT_TO_FLT
, egcm_int_to_double
},
12298 [TGSI_OPCODE_DRSQ
] = { ALU_OP2_RECIPSQRT_64
, cayman_emit_double_instr
},
12299 [TGSI_OPCODE_U64SNE
] = { ALU_OP0_NOP
, egcm_u64sne
},
12300 [TGSI_OPCODE_U64ADD
] = { ALU_OP0_NOP
, egcm_u64add
},
12301 [TGSI_OPCODE_U64MUL
] = { ALU_OP0_NOP
, egcm_u64mul
},
12302 [TGSI_OPCODE_U64DIV
] = { ALU_OP0_NOP
, egcm_u64div
},
12303 [TGSI_OPCODE_LAST
] = { ALU_OP0_NOP
, tgsi_unsupported
},