2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
40 Why CAYMAN got loops for lots of instructions is explained here.
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
61 int r600_find_vs_semantic_index(struct r600_shader
*vs
,
62 struct r600_shader
*ps
, int id
)
64 struct r600_shader_io
*input
= &ps
->input
[id
];
66 for (int i
= 0; i
< vs
->noutput
; i
++) {
67 if (input
->name
== vs
->output
[i
].name
&&
68 input
->sid
== vs
->output
[i
].sid
) {
75 static int r600_pipe_shader(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
77 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
78 struct r600_shader
*rshader
= &shader
->shader
;
83 if (shader
->bo
== NULL
) {
84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85 shader
->bo
= r600_bo(rctx
->radeon
, rshader
->bc
.ndw
* 4, 4096, PIPE_BIND_VERTEX_BUFFER
, PIPE_USAGE_IMMUTABLE
);
86 if (shader
->bo
== NULL
) {
89 ptr
= (uint32_t*)r600_bo_map(rctx
->radeon
, shader
->bo
, rctx
->ctx
.cs
, PIPE_TRANSFER_WRITE
);
90 if (R600_BIG_ENDIAN
) {
91 for (i
= 0; i
< rshader
->bc
.ndw
; ++i
) {
92 ptr
[i
] = bswap_32(rshader
->bc
.bytecode
[i
]);
95 memcpy(ptr
, rshader
->bc
.bytecode
, rshader
->bc
.ndw
* sizeof(*ptr
));
97 r600_bo_unmap(rctx
->radeon
, shader
->bo
);
100 switch (rshader
->processor_type
) {
101 case TGSI_PROCESSOR_VERTEX
:
102 if (rctx
->chip_class
>= EVERGREEN
) {
103 evergreen_pipe_shader_vs(ctx
, shader
);
105 r600_pipe_shader_vs(ctx
, shader
);
108 case TGSI_PROCESSOR_FRAGMENT
:
109 if (rctx
->chip_class
>= EVERGREEN
) {
110 evergreen_pipe_shader_ps(ctx
, shader
);
112 r600_pipe_shader_ps(ctx
, shader
);
121 static int r600_shader_from_tgsi(struct r600_pipe_context
* rctx
, struct r600_pipe_shader
*pipeshader
);
123 int r600_pipe_shader_create(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
125 static int dump_shaders
= -1;
126 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
129 /* Would like some magic "get_bool_option_once" routine.
131 if (dump_shaders
== -1)
132 dump_shaders
= debug_get_bool_option("R600_DUMP_SHADERS", FALSE
);
135 fprintf(stderr
, "--------------------------------------------------------------\n");
136 tgsi_dump(shader
->tokens
, 0);
138 r
= r600_shader_from_tgsi(rctx
, shader
);
140 R600_ERR("translation from TGSI failed !\n");
143 r
= r600_bc_build(&shader
->shader
.bc
);
145 R600_ERR("building bytecode failed !\n");
149 r600_bc_dump(&shader
->shader
.bc
);
150 fprintf(stderr
, "______________________________________________________________\n");
152 return r600_pipe_shader(ctx
, shader
);
155 void r600_pipe_shader_destroy(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
157 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
159 r600_bo_reference(rctx
->radeon
, &shader
->bo
, NULL
);
160 r600_bc_clear(&shader
->shader
.bc
);
162 memset(&shader
->shader
,0,sizeof(struct r600_shader
));
166 * tgsi -> r600 shader
168 struct r600_shader_tgsi_instruction
;
170 struct r600_shader_src
{
179 struct r600_shader_ctx
{
180 struct tgsi_shader_info info
;
181 struct tgsi_parse_context parse
;
182 const struct tgsi_token
*tokens
;
184 unsigned file_offset
[TGSI_FILE_COUNT
];
187 struct r600_shader_tgsi_instruction
*inst_info
;
189 struct r600_shader
*shader
;
190 struct r600_shader_src src
[4];
193 u32 max_driver_temp_used
;
194 /* needed for evergreen interpolation */
195 boolean input_centroid
;
196 boolean input_linear
;
197 boolean input_perspective
;
201 struct r600_shader_tgsi_instruction
{
202 unsigned tgsi_opcode
;
204 unsigned r600_opcode
;
205 int (*process
)(struct r600_shader_ctx
*ctx
);
208 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[], eg_shader_tgsi_instruction
[], cm_shader_tgsi_instruction
[];
209 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
);
211 static int tgsi_is_supported(struct r600_shader_ctx
*ctx
)
213 struct tgsi_full_instruction
*i
= &ctx
->parse
.FullToken
.FullInstruction
;
216 if (i
->Instruction
.NumDstRegs
> 1) {
217 R600_ERR("too many dst (%d)\n", i
->Instruction
.NumDstRegs
);
220 if (i
->Instruction
.Predicate
) {
221 R600_ERR("predicate unsupported\n");
225 if (i
->Instruction
.Label
) {
226 R600_ERR("label unsupported\n");
230 for (j
= 0; j
< i
->Instruction
.NumSrcRegs
; j
++) {
231 if (i
->Src
[j
].Register
.Dimension
) {
232 R600_ERR("unsupported src %d (dimension %d)\n", j
,
233 i
->Src
[j
].Register
.Dimension
);
237 for (j
= 0; j
< i
->Instruction
.NumDstRegs
; j
++) {
238 if (i
->Dst
[j
].Register
.Dimension
) {
239 R600_ERR("unsupported dst (dimension)\n");
246 static int evergreen_interp_alu(struct r600_shader_ctx
*ctx
, int input
)
249 struct r600_bc_alu alu
;
250 int gpr
= 0, base_chan
= 0;
253 if (ctx
->shader
->input
[input
].interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
) {
255 if (ctx
->shader
->input
[input
].centroid
)
257 } else if (ctx
->shader
->input
[input
].interpolate
== TGSI_INTERPOLATE_LINEAR
) {
259 /* if we have perspective add one */
260 if (ctx
->input_perspective
) {
262 /* if we have perspective centroid */
263 if (ctx
->input_centroid
)
266 if (ctx
->shader
->input
[input
].centroid
)
270 /* work out gpr and base_chan from index */
272 base_chan
= (2 * (ij_index
% 2)) + 1;
274 for (i
= 0; i
< 8; i
++) {
275 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
278 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW
;
280 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY
;
282 if ((i
> 1) && (i
< 6)) {
283 alu
.dst
.sel
= ctx
->shader
->input
[input
].gpr
;
287 alu
.dst
.chan
= i
% 4;
289 alu
.src
[0].sel
= gpr
;
290 alu
.src
[0].chan
= (base_chan
- (i
% 2));
292 alu
.src
[1].sel
= V_SQ_ALU_SRC_PARAM_BASE
+ ctx
->shader
->input
[input
].lds_pos
;
294 alu
.bank_swizzle_force
= SQ_ALU_VEC_210
;
297 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
305 static int tgsi_declaration(struct r600_shader_ctx
*ctx
)
307 struct tgsi_full_declaration
*d
= &ctx
->parse
.FullToken
.FullDeclaration
;
311 switch (d
->Declaration
.File
) {
312 case TGSI_FILE_INPUT
:
313 i
= ctx
->shader
->ninput
++;
314 ctx
->shader
->input
[i
].name
= d
->Semantic
.Name
;
315 ctx
->shader
->input
[i
].sid
= d
->Semantic
.Index
;
316 ctx
->shader
->input
[i
].interpolate
= d
->Declaration
.Interpolate
;
317 ctx
->shader
->input
[i
].centroid
= d
->Declaration
.Centroid
;
318 ctx
->shader
->input
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_INPUT
] + i
;
319 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->bc
->chip_class
>= EVERGREEN
) {
320 /* turn input into interpolate on EG */
321 if (ctx
->shader
->input
[i
].name
!= TGSI_SEMANTIC_POSITION
) {
322 if (ctx
->shader
->input
[i
].interpolate
> 0) {
323 ctx
->shader
->input
[i
].lds_pos
= ctx
->shader
->nlds
++;
324 evergreen_interp_alu(ctx
, i
);
329 case TGSI_FILE_OUTPUT
:
330 i
= ctx
->shader
->noutput
++;
331 ctx
->shader
->output
[i
].name
= d
->Semantic
.Name
;
332 ctx
->shader
->output
[i
].sid
= d
->Semantic
.Index
;
333 ctx
->shader
->output
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_OUTPUT
] + i
;
334 ctx
->shader
->output
[i
].interpolate
= d
->Declaration
.Interpolate
;
335 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
336 /* these don't count as vertex param exports */
337 if ((ctx
->shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) ||
338 (ctx
->shader
->output
[i
].name
== TGSI_SEMANTIC_PSIZE
))
342 case TGSI_FILE_CONSTANT
:
343 case TGSI_FILE_TEMPORARY
:
344 case TGSI_FILE_SAMPLER
:
345 case TGSI_FILE_ADDRESS
:
348 case TGSI_FILE_SYSTEM_VALUE
:
349 if (d
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
) {
350 struct r600_bc_alu alu
;
351 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
353 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT
);
362 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
368 R600_ERR("unsupported file %d declaration\n", d
->Declaration
.File
);
374 static int r600_get_temp(struct r600_shader_ctx
*ctx
)
376 return ctx
->temp_reg
+ ctx
->max_driver_temp_used
++;
380 * for evergreen we need to scan the shader to find the number of GPRs we need to
381 * reserve for interpolation.
383 * we need to know if we are going to emit
384 * any centroid inputs
385 * if perspective and linear are required
387 static int evergreen_gpr_count(struct r600_shader_ctx
*ctx
)
392 ctx
->input_linear
= FALSE
;
393 ctx
->input_perspective
= FALSE
;
394 ctx
->input_centroid
= FALSE
;
395 ctx
->num_interp_gpr
= 1;
397 /* any centroid inputs */
398 for (i
= 0; i
< ctx
->info
.num_inputs
; i
++) {
399 /* skip position/face */
400 if (ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
||
401 ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
403 if (ctx
->info
.input_interpolate
[i
] == TGSI_INTERPOLATE_LINEAR
)
404 ctx
->input_linear
= TRUE
;
405 if (ctx
->info
.input_interpolate
[i
] == TGSI_INTERPOLATE_PERSPECTIVE
)
406 ctx
->input_perspective
= TRUE
;
407 if (ctx
->info
.input_centroid
[i
])
408 ctx
->input_centroid
= TRUE
;
412 /* ignoring sample for now */
413 if (ctx
->input_perspective
)
415 if (ctx
->input_linear
)
417 if (ctx
->input_centroid
)
420 ctx
->num_interp_gpr
+= (num_baryc
+ 1) >> 1;
422 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
423 return ctx
->num_interp_gpr
;
426 static void tgsi_src(struct r600_shader_ctx
*ctx
,
427 const struct tgsi_full_src_register
*tgsi_src
,
428 struct r600_shader_src
*r600_src
)
430 memset(r600_src
, 0, sizeof(*r600_src
));
431 r600_src
->swizzle
[0] = tgsi_src
->Register
.SwizzleX
;
432 r600_src
->swizzle
[1] = tgsi_src
->Register
.SwizzleY
;
433 r600_src
->swizzle
[2] = tgsi_src
->Register
.SwizzleZ
;
434 r600_src
->swizzle
[3] = tgsi_src
->Register
.SwizzleW
;
435 r600_src
->neg
= tgsi_src
->Register
.Negate
;
436 r600_src
->abs
= tgsi_src
->Register
.Absolute
;
438 if (tgsi_src
->Register
.File
== TGSI_FILE_IMMEDIATE
) {
440 if ((tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleY
) &&
441 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleZ
) &&
442 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleW
)) {
444 index
= tgsi_src
->Register
.Index
* 4 + tgsi_src
->Register
.SwizzleX
;
445 r600_bc_special_constants(ctx
->literals
[index
], &r600_src
->sel
, &r600_src
->neg
);
446 if (r600_src
->sel
!= V_SQ_ALU_SRC_LITERAL
)
449 index
= tgsi_src
->Register
.Index
;
450 r600_src
->sel
= V_SQ_ALU_SRC_LITERAL
;
451 memcpy(r600_src
->value
, ctx
->literals
+ index
* 4, sizeof(r600_src
->value
));
452 } else if (tgsi_src
->Register
.File
== TGSI_FILE_SYSTEM_VALUE
) {
453 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
454 r600_src
->swizzle
[0] = 3;
455 r600_src
->swizzle
[1] = 3;
456 r600_src
->swizzle
[2] = 3;
457 r600_src
->swizzle
[3] = 3;
460 if (tgsi_src
->Register
.Indirect
)
461 r600_src
->rel
= V_SQ_REL_RELATIVE
;
462 r600_src
->sel
= tgsi_src
->Register
.Index
;
463 r600_src
->sel
+= ctx
->file_offset
[tgsi_src
->Register
.File
];
467 static int tgsi_fetch_rel_const(struct r600_shader_ctx
*ctx
, unsigned int offset
, unsigned int dst_reg
)
469 struct r600_bc_vtx vtx
;
474 struct r600_bc_alu alu
;
476 memset(&alu
, 0, sizeof(alu
));
478 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT
);
479 alu
.src
[0].sel
= ctx
->ar_reg
;
481 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
482 alu
.src
[1].value
= offset
;
484 alu
.dst
.sel
= dst_reg
;
488 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
493 ar_reg
= ctx
->ar_reg
;
496 memset(&vtx
, 0, sizeof(vtx
));
497 vtx
.fetch_type
= 2; /* VTX_FETCH_NO_INDEX_OFFSET */
498 vtx
.src_gpr
= ar_reg
;
499 vtx
.mega_fetch_count
= 16;
500 vtx
.dst_gpr
= dst_reg
;
501 vtx
.dst_sel_x
= 0; /* SEL_X */
502 vtx
.dst_sel_y
= 1; /* SEL_Y */
503 vtx
.dst_sel_z
= 2; /* SEL_Z */
504 vtx
.dst_sel_w
= 3; /* SEL_W */
505 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
506 vtx
.num_format_all
= 2; /* NUM_FORMAT_SCALED */
507 vtx
.format_comp_all
= 1; /* FORMAT_COMP_SIGNED */
508 vtx
.srf_mode_all
= 1; /* SRF_MODE_NO_ZERO */
509 vtx
.endian
= r600_endian_swap(32);
511 if ((r
= r600_bc_add_vtx(ctx
->bc
, &vtx
)))
517 static int tgsi_split_constant(struct r600_shader_ctx
*ctx
)
519 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
520 struct r600_bc_alu alu
;
521 int i
, j
, k
, nconst
, r
;
523 for (i
= 0, nconst
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
524 if (inst
->Src
[i
].Register
.File
== TGSI_FILE_CONSTANT
) {
527 tgsi_src(ctx
, &inst
->Src
[i
], &ctx
->src
[i
]);
529 for (i
= 0, j
= nconst
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
530 if (inst
->Src
[i
].Register
.File
!= TGSI_FILE_CONSTANT
) {
534 if (ctx
->src
[i
].rel
) {
535 int treg
= r600_get_temp(ctx
);
536 if ((r
= tgsi_fetch_rel_const(ctx
, ctx
->src
[i
].sel
- 512, treg
)))
539 ctx
->src
[i
].sel
= treg
;
543 int treg
= r600_get_temp(ctx
);
544 for (k
= 0; k
< 4; k
++) {
545 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
546 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
547 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
549 alu
.src
[0].rel
= ctx
->src
[i
].rel
;
555 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
559 ctx
->src
[i
].sel
= treg
;
567 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
568 static int tgsi_split_literal_constant(struct r600_shader_ctx
*ctx
)
570 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
571 struct r600_bc_alu alu
;
572 int i
, j
, k
, nliteral
, r
;
574 for (i
= 0, nliteral
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
575 if (ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
579 for (i
= 0, j
= nliteral
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
580 if (j
> 0 && ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
581 int treg
= r600_get_temp(ctx
);
582 for (k
= 0; k
< 4; k
++) {
583 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
584 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
585 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
587 alu
.src
[0].value
= ctx
->src
[i
].value
[k
];
593 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
597 ctx
->src
[i
].sel
= treg
;
604 static int r600_shader_from_tgsi(struct r600_pipe_context
* rctx
, struct r600_pipe_shader
*pipeshader
)
606 struct r600_shader
*shader
= &pipeshader
->shader
;
607 struct tgsi_token
*tokens
= pipeshader
->tokens
;
608 struct tgsi_full_immediate
*immediate
;
609 struct tgsi_full_property
*property
;
610 struct r600_shader_ctx ctx
;
611 struct r600_bc_output output
[32];
612 unsigned output_done
, noutput
;
614 int i
, j
, r
= 0, pos0
;
616 ctx
.bc
= &shader
->bc
;
618 r600_bc_init(ctx
.bc
, rctx
->chip_class
);
620 tgsi_scan_shader(tokens
, &ctx
.info
);
621 tgsi_parse_init(&ctx
.parse
, tokens
);
622 ctx
.type
= ctx
.parse
.FullHeader
.Processor
.Processor
;
623 shader
->processor_type
= ctx
.type
;
624 ctx
.bc
->type
= shader
->processor_type
;
626 shader
->clamp_color
= (((ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) && rctx
->clamp_fragment_color
) ||
627 ((ctx
.type
== TGSI_PROCESSOR_VERTEX
) && rctx
->clamp_vertex_color
));
629 shader
->nr_cbufs
= rctx
->nr_cbufs
;
631 /* register allocations */
632 /* Values [0,127] correspond to GPR[0..127].
633 * Values [128,159] correspond to constant buffer bank 0
634 * Values [160,191] correspond to constant buffer bank 1
635 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
636 * Values [256,287] correspond to constant buffer bank 2 (EG)
637 * Values [288,319] correspond to constant buffer bank 3 (EG)
638 * Other special values are shown in the list below.
639 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
640 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
641 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
642 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
643 * 248 SQ_ALU_SRC_0: special constant 0.0.
644 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
645 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
646 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
647 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
648 * 253 SQ_ALU_SRC_LITERAL: literal constant.
649 * 254 SQ_ALU_SRC_PV: previous vector result.
650 * 255 SQ_ALU_SRC_PS: previous scalar result.
652 for (i
= 0; i
< TGSI_FILE_COUNT
; i
++) {
653 ctx
.file_offset
[i
] = 0;
655 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
656 ctx
.file_offset
[TGSI_FILE_INPUT
] = 1;
657 if (ctx
.bc
->chip_class
>= EVERGREEN
) {
658 r600_bc_add_cfinst(ctx
.bc
, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
);
660 r600_bc_add_cfinst(ctx
.bc
, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
);
663 if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
.bc
->chip_class
>= EVERGREEN
) {
664 ctx
.file_offset
[TGSI_FILE_INPUT
] = evergreen_gpr_count(&ctx
);
666 ctx
.file_offset
[TGSI_FILE_OUTPUT
] = ctx
.file_offset
[TGSI_FILE_INPUT
] +
667 ctx
.info
.file_max
[TGSI_FILE_INPUT
] + 1;
668 ctx
.file_offset
[TGSI_FILE_TEMPORARY
] = ctx
.file_offset
[TGSI_FILE_OUTPUT
] +
669 ctx
.info
.file_max
[TGSI_FILE_OUTPUT
] + 1;
671 /* Outside the GPR range. This will be translated to one of the
672 * kcache banks later. */
673 ctx
.file_offset
[TGSI_FILE_CONSTANT
] = 512;
675 ctx
.file_offset
[TGSI_FILE_IMMEDIATE
] = V_SQ_ALU_SRC_LITERAL
;
676 ctx
.ar_reg
= ctx
.file_offset
[TGSI_FILE_TEMPORARY
] +
677 ctx
.info
.file_max
[TGSI_FILE_TEMPORARY
] + 1;
678 ctx
.temp_reg
= ctx
.ar_reg
+ 1;
682 shader
->fs_write_all
= FALSE
;
683 while (!tgsi_parse_end_of_tokens(&ctx
.parse
)) {
684 tgsi_parse_token(&ctx
.parse
);
685 switch (ctx
.parse
.FullToken
.Token
.Type
) {
686 case TGSI_TOKEN_TYPE_IMMEDIATE
:
687 immediate
= &ctx
.parse
.FullToken
.FullImmediate
;
688 ctx
.literals
= realloc(ctx
.literals
, (ctx
.nliterals
+ 1) * 16);
689 if(ctx
.literals
== NULL
) {
693 ctx
.literals
[ctx
.nliterals
* 4 + 0] = immediate
->u
[0].Uint
;
694 ctx
.literals
[ctx
.nliterals
* 4 + 1] = immediate
->u
[1].Uint
;
695 ctx
.literals
[ctx
.nliterals
* 4 + 2] = immediate
->u
[2].Uint
;
696 ctx
.literals
[ctx
.nliterals
* 4 + 3] = immediate
->u
[3].Uint
;
699 case TGSI_TOKEN_TYPE_DECLARATION
:
700 r
= tgsi_declaration(&ctx
);
704 case TGSI_TOKEN_TYPE_INSTRUCTION
:
705 r
= tgsi_is_supported(&ctx
);
708 ctx
.max_driver_temp_used
= 0;
709 /* reserve first tmp for everyone */
712 opcode
= ctx
.parse
.FullToken
.FullInstruction
.Instruction
.Opcode
;
713 if ((r
= tgsi_split_constant(&ctx
)))
715 if ((r
= tgsi_split_literal_constant(&ctx
)))
717 if (ctx
.bc
->chip_class
== CAYMAN
)
718 ctx
.inst_info
= &cm_shader_tgsi_instruction
[opcode
];
719 else if (ctx
.bc
->chip_class
>= EVERGREEN
)
720 ctx
.inst_info
= &eg_shader_tgsi_instruction
[opcode
];
722 ctx
.inst_info
= &r600_shader_tgsi_instruction
[opcode
];
723 r
= ctx
.inst_info
->process(&ctx
);
727 case TGSI_TOKEN_TYPE_PROPERTY
:
728 property
= &ctx
.parse
.FullToken
.FullProperty
;
729 if (property
->Property
.PropertyName
== TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
) {
730 if (property
->u
[0].Data
== 1)
731 shader
->fs_write_all
= TRUE
;
735 R600_ERR("unsupported token type %d\n", ctx
.parse
.FullToken
.Token
.Type
);
741 noutput
= shader
->noutput
;
743 /* clamp color outputs */
744 if (shader
->clamp_color
) {
745 for (i
= 0; i
< noutput
; i
++) {
746 if (shader
->output
[i
].name
== TGSI_SEMANTIC_COLOR
||
747 shader
->output
[i
].name
== TGSI_SEMANTIC_BCOLOR
) {
750 for (j
= 0; j
< 4; j
++) {
751 struct r600_bc_alu alu
;
752 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
755 alu
.inst
= BC_INST(ctx
.bc
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
756 alu
.dst
.sel
= shader
->output
[i
].gpr
;
760 alu
.src
[0].sel
= alu
.dst
.sel
;
766 r
= r600_bc_add_alu(ctx
.bc
, &alu
);
776 for (i
= 0, pos0
= 0; i
< noutput
; i
++) {
777 memset(&output
[i
], 0, sizeof(struct r600_bc_output
));
778 output
[i
+ j
].gpr
= shader
->output
[i
].gpr
;
779 output
[i
+ j
].elem_size
= 3;
780 output
[i
+ j
].swizzle_x
= 0;
781 output
[i
+ j
].swizzle_y
= 1;
782 output
[i
+ j
].swizzle_z
= 2;
783 output
[i
+ j
].swizzle_w
= 3;
784 output
[i
+ j
].burst_count
= 1;
785 output
[i
+ j
].barrier
= 1;
786 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
787 output
[i
+ j
].array_base
= i
- pos0
;
788 output
[i
+ j
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
790 case TGSI_PROCESSOR_VERTEX
:
791 if (shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) {
792 output
[i
+ j
].array_base
= 60;
793 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
794 /* position doesn't count in array_base */
797 if (shader
->output
[i
].name
== TGSI_SEMANTIC_PSIZE
) {
798 output
[i
+ j
].array_base
= 61;
799 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
800 /* position doesn't count in array_base */
804 case TGSI_PROCESSOR_FRAGMENT
:
805 if (shader
->output
[i
].name
== TGSI_SEMANTIC_COLOR
) {
806 output
[i
+ j
].array_base
= shader
->output
[i
].sid
;
807 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
808 if (shader
->fs_write_all
&& (rctx
->chip_class
>= EVERGREEN
)) {
809 for (j
= 1; j
< shader
->nr_cbufs
; j
++) {
810 memset(&output
[i
+ j
], 0, sizeof(struct r600_bc_output
));
811 output
[i
+ j
].gpr
= shader
->output
[i
].gpr
;
812 output
[i
+ j
].elem_size
= 3;
813 output
[i
+ j
].swizzle_x
= 0;
814 output
[i
+ j
].swizzle_y
= 1;
815 output
[i
+ j
].swizzle_z
= 2;
816 output
[i
+ j
].swizzle_w
= 3;
817 output
[i
+ j
].burst_count
= 1;
818 output
[i
+ j
].barrier
= 1;
819 output
[i
+ j
].array_base
= shader
->output
[i
].sid
+ j
;
820 output
[i
+ j
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
821 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
825 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) {
826 output
[i
+ j
].array_base
= 61;
827 output
[i
+ j
].swizzle_x
= 2;
828 output
[i
+ j
].swizzle_y
= 7;
829 output
[i
+ j
].swizzle_z
= output
[i
+ j
].swizzle_w
= 7;
830 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
831 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_STENCIL
) {
832 output
[i
+ j
].array_base
= 61;
833 output
[i
+ j
].swizzle_x
= 7;
834 output
[i
+ j
].swizzle_y
= 1;
835 output
[i
+ j
].swizzle_z
= output
[i
+ j
].swizzle_w
= 7;
836 output
[i
+ j
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
838 R600_ERR("unsupported fragment output name %d\n", shader
->output
[i
].name
);
844 R600_ERR("unsupported processor type %d\n", ctx
.type
);
850 /* add fake param output for vertex shader if no param is exported */
851 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
852 for (i
= 0, pos0
= 0; i
< noutput
; i
++) {
853 if (output
[i
].type
== V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
) {
859 memset(&output
[i
], 0, sizeof(struct r600_bc_output
));
861 output
[i
].elem_size
= 3;
862 output
[i
].swizzle_x
= 0;
863 output
[i
].swizzle_y
= 1;
864 output
[i
].swizzle_z
= 2;
865 output
[i
].swizzle_w
= 3;
866 output
[i
].burst_count
= 1;
867 output
[i
].barrier
= 1;
868 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
869 output
[i
].array_base
= 0;
870 output
[i
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
874 /* add fake pixel export */
875 if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
&& !noutput
) {
876 memset(&output
[0], 0, sizeof(struct r600_bc_output
));
878 output
[0].elem_size
= 3;
879 output
[0].swizzle_x
= 7;
880 output
[0].swizzle_y
= 7;
881 output
[0].swizzle_z
= 7;
882 output
[0].swizzle_w
= 7;
883 output
[0].burst_count
= 1;
884 output
[0].barrier
= 1;
885 output
[0].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
886 output
[0].array_base
= 0;
887 output
[0].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
890 /* set export done on last export of each type */
891 for (i
= noutput
- 1, output_done
= 0; i
>= 0; i
--) {
892 if (ctx
.bc
->chip_class
< CAYMAN
) {
893 if (i
== (noutput
- 1)) {
894 output
[i
].end_of_program
= 1;
897 if (!(output_done
& (1 << output
[i
].type
))) {
898 output_done
|= (1 << output
[i
].type
);
899 output
[i
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
);
902 /* add output to bytecode */
903 for (i
= 0; i
< noutput
; i
++) {
904 r
= r600_bc_add_output(ctx
.bc
, &output
[i
]);
908 /* add program end */
909 if (ctx
.bc
->chip_class
== CAYMAN
)
910 cm_bc_add_cf_end(ctx
.bc
);
913 tgsi_parse_free(&ctx
.parse
);
917 tgsi_parse_free(&ctx
.parse
);
921 static int tgsi_unsupported(struct r600_shader_ctx
*ctx
)
923 R600_ERR("%s tgsi opcode unsupported\n",
924 tgsi_get_opcode_name(ctx
->inst_info
->tgsi_opcode
));
928 static int tgsi_end(struct r600_shader_ctx
*ctx
)
933 static void r600_bc_src(struct r600_bc_alu_src
*bc_src
,
934 const struct r600_shader_src
*shader_src
,
937 bc_src
->sel
= shader_src
->sel
;
938 bc_src
->chan
= shader_src
->swizzle
[chan
];
939 bc_src
->neg
= shader_src
->neg
;
940 bc_src
->abs
= shader_src
->abs
;
941 bc_src
->rel
= shader_src
->rel
;
942 bc_src
->value
= shader_src
->value
[bc_src
->chan
];
945 static void r600_bc_src_set_abs(struct r600_bc_alu_src
*bc_src
)
951 static void r600_bc_src_toggle_neg(struct r600_bc_alu_src
*bc_src
)
953 bc_src
->neg
= !bc_src
->neg
;
956 static void tgsi_dst(struct r600_shader_ctx
*ctx
,
957 const struct tgsi_full_dst_register
*tgsi_dst
,
959 struct r600_bc_alu_dst
*r600_dst
)
961 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
963 r600_dst
->sel
= tgsi_dst
->Register
.Index
;
964 r600_dst
->sel
+= ctx
->file_offset
[tgsi_dst
->Register
.File
];
965 r600_dst
->chan
= swizzle
;
967 if (tgsi_dst
->Register
.Indirect
)
968 r600_dst
->rel
= V_SQ_REL_RELATIVE
;
969 if (inst
->Instruction
.Saturate
) {
974 static int tgsi_last_instruction(unsigned writemask
)
978 for (i
= 0; i
< 4; i
++) {
979 if (writemask
& (1 << i
)) {
986 static int tgsi_op2_s(struct r600_shader_ctx
*ctx
, int swap
)
988 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
989 struct r600_bc_alu alu
;
991 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
993 for (i
= 0; i
< lasti
+ 1; i
++) {
994 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
997 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
998 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1000 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1002 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1003 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
1006 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], i
);
1007 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
1009 /* handle some special cases */
1010 switch (ctx
->inst_info
->tgsi_opcode
) {
1011 case TGSI_OPCODE_SUB
:
1012 r600_bc_src_toggle_neg(&alu
.src
[1]);
1014 case TGSI_OPCODE_ABS
:
1015 r600_bc_src_set_abs(&alu
.src
[0]);
1023 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1030 static int tgsi_op2(struct r600_shader_ctx
*ctx
)
1032 return tgsi_op2_s(ctx
, 0);
1035 static int tgsi_op2_swap(struct r600_shader_ctx
*ctx
)
1037 return tgsi_op2_s(ctx
, 1);
1040 static int cayman_emit_float_instr(struct r600_shader_ctx
*ctx
)
1042 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1044 struct r600_bc_alu alu
;
1045 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1047 for (i
= 0 ; i
< last_slot
; i
++) {
1048 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1049 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1050 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1051 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], 0);
1053 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1054 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1056 if (i
== last_slot
- 1)
1058 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1066 * r600 - trunc to -PI..PI range
1067 * r700 - normalize by dividing by 2PI
1070 static int tgsi_setup_trig(struct r600_shader_ctx
*ctx
)
1072 static float half_inv_pi
= 1.0 /(3.1415926535 * 2);
1073 static float double_pi
= 3.1415926535 * 2;
1074 static float neg_pi
= -3.1415926535;
1077 struct r600_bc_alu alu
;
1079 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1080 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
1084 alu
.dst
.sel
= ctx
->temp_reg
;
1087 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1089 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
1090 alu
.src
[1].chan
= 0;
1091 alu
.src
[1].value
= *(uint32_t *)&half_inv_pi
;
1092 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
1093 alu
.src
[2].chan
= 0;
1095 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1099 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1100 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
);
1103 alu
.dst
.sel
= ctx
->temp_reg
;
1106 alu
.src
[0].sel
= ctx
->temp_reg
;
1107 alu
.src
[0].chan
= 0;
1109 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1113 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1114 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
1118 alu
.dst
.sel
= ctx
->temp_reg
;
1121 alu
.src
[0].sel
= ctx
->temp_reg
;
1122 alu
.src
[0].chan
= 0;
1124 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
1125 alu
.src
[1].chan
= 0;
1126 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
1127 alu
.src
[2].chan
= 0;
1129 if (ctx
->bc
->chip_class
== R600
) {
1130 alu
.src
[1].value
= *(uint32_t *)&double_pi
;
1131 alu
.src
[2].value
= *(uint32_t *)&neg_pi
;
1133 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1134 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
1139 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1145 static int cayman_trig(struct r600_shader_ctx
*ctx
)
1147 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1148 struct r600_bc_alu alu
;
1149 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1152 r
= tgsi_setup_trig(ctx
);
1157 for (i
= 0; i
< last_slot
; i
++) {
1158 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1159 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1162 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1163 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1165 alu
.src
[0].sel
= ctx
->temp_reg
;
1166 alu
.src
[0].chan
= 0;
1167 if (i
== last_slot
- 1)
1169 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1176 static int tgsi_trig(struct r600_shader_ctx
*ctx
)
1178 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1179 struct r600_bc_alu alu
;
1181 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
1183 r
= tgsi_setup_trig(ctx
);
1187 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1188 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1190 alu
.dst
.sel
= ctx
->temp_reg
;
1193 alu
.src
[0].sel
= ctx
->temp_reg
;
1194 alu
.src
[0].chan
= 0;
1196 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1200 /* replicate result */
1201 for (i
= 0; i
< lasti
+ 1; i
++) {
1202 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
1205 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1206 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1208 alu
.src
[0].sel
= ctx
->temp_reg
;
1209 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1212 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1219 static int tgsi_scs(struct r600_shader_ctx
*ctx
)
1221 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1222 struct r600_bc_alu alu
;
1225 /* We'll only need the trig stuff if we are going to write to the
1226 * X or Y components of the destination vector.
1228 if (likely(inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
)) {
1229 r
= tgsi_setup_trig(ctx
);
1235 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
1236 if (ctx
->bc
->chip_class
== CAYMAN
) {
1237 for (i
= 0 ; i
< 3; i
++) {
1238 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1239 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
);
1240 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1246 alu
.src
[0].sel
= ctx
->temp_reg
;
1247 alu
.src
[0].chan
= 0;
1250 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1255 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1256 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
);
1257 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
1259 alu
.src
[0].sel
= ctx
->temp_reg
;
1260 alu
.src
[0].chan
= 0;
1262 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1269 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
1270 if (ctx
->bc
->chip_class
== CAYMAN
) {
1271 for (i
= 0 ; i
< 3; i
++) {
1272 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1273 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
);
1274 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1279 alu
.src
[0].sel
= ctx
->temp_reg
;
1280 alu
.src
[0].chan
= 0;
1283 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1288 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1289 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
);
1290 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
1292 alu
.src
[0].sel
= ctx
->temp_reg
;
1293 alu
.src
[0].chan
= 0;
1295 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1302 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
1303 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1305 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1307 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
1309 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1310 alu
.src
[0].chan
= 0;
1314 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1320 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
1321 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1323 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1325 tgsi_dst(ctx
, &inst
->Dst
[0], 3, &alu
.dst
);
1327 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1328 alu
.src
[0].chan
= 0;
1332 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1340 static int tgsi_kill(struct r600_shader_ctx
*ctx
)
1342 struct r600_bc_alu alu
;
1345 for (i
= 0; i
< 4; i
++) {
1346 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1347 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1351 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1353 if (ctx
->inst_info
->tgsi_opcode
== TGSI_OPCODE_KILP
) {
1354 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1357 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
1362 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1367 /* kill must be last in ALU */
1368 ctx
->bc
->force_add_cf
= 1;
1369 ctx
->shader
->uses_kill
= TRUE
;
1373 static int tgsi_lit(struct r600_shader_ctx
*ctx
)
1375 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1376 struct r600_bc_alu alu
;
1379 /* tmp.x = max(src.y, 0.0) */
1380 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1381 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
);
1382 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 1);
1383 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
; /*0.0*/
1384 alu
.src
[1].chan
= 1;
1386 alu
.dst
.sel
= ctx
->temp_reg
;
1391 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1395 if (inst
->Dst
[0].Register
.WriteMask
& (1 << 2))
1401 if (ctx
->bc
->chip_class
== CAYMAN
) {
1402 for (i
= 0; i
< 3; i
++) {
1403 /* tmp.z = log(tmp.x) */
1404 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1405 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
);
1406 alu
.src
[0].sel
= ctx
->temp_reg
;
1407 alu
.src
[0].chan
= 0;
1408 alu
.dst
.sel
= ctx
->temp_reg
;
1416 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1421 /* tmp.z = log(tmp.x) */
1422 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1423 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
);
1424 alu
.src
[0].sel
= ctx
->temp_reg
;
1425 alu
.src
[0].chan
= 0;
1426 alu
.dst
.sel
= ctx
->temp_reg
;
1430 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1435 chan
= alu
.dst
.chan
;
1438 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1439 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1440 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT
);
1441 alu
.src
[0].sel
= sel
;
1442 alu
.src
[0].chan
= chan
;
1443 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], 3);
1444 r600_bc_src(&alu
.src
[2], &ctx
->src
[0], 0);
1445 alu
.dst
.sel
= ctx
->temp_reg
;
1450 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1454 if (ctx
->bc
->chip_class
== CAYMAN
) {
1455 for (i
= 0; i
< 3; i
++) {
1456 /* dst.z = exp(tmp.x) */
1457 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1458 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1459 alu
.src
[0].sel
= ctx
->temp_reg
;
1460 alu
.src
[0].chan
= 0;
1461 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1467 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1472 /* dst.z = exp(tmp.x) */
1473 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1474 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1475 alu
.src
[0].sel
= ctx
->temp_reg
;
1476 alu
.src
[0].chan
= 0;
1477 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
1479 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1486 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1487 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1488 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
; /*1.0*/
1489 alu
.src
[0].chan
= 0;
1490 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
1491 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 0) & 1;
1492 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1496 /* dst.y = max(src.x, 0.0) */
1497 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1498 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
);
1499 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1500 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
; /*0.0*/
1501 alu
.src
[1].chan
= 0;
1502 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
1503 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 1) & 1;
1504 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1509 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1510 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1511 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1512 alu
.src
[0].chan
= 0;
1513 tgsi_dst(ctx
, &inst
->Dst
[0], 3, &alu
.dst
);
1514 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 3) & 1;
1516 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1523 static int tgsi_rsq(struct r600_shader_ctx
*ctx
)
1525 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1526 struct r600_bc_alu alu
;
1529 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1532 * For state trackers other than OpenGL, we'll want to use
1533 * _RECIPSQRT_IEEE instead.
1535 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED
);
1537 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1538 r600_bc_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
1539 r600_bc_src_set_abs(&alu
.src
[i
]);
1541 alu
.dst
.sel
= ctx
->temp_reg
;
1544 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1547 /* replicate result */
1548 return tgsi_helper_tempx_replicate(ctx
);
1551 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
)
1553 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1554 struct r600_bc_alu alu
;
1557 for (i
= 0; i
< 4; i
++) {
1558 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1559 alu
.src
[0].sel
= ctx
->temp_reg
;
1560 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1562 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1563 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1566 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1573 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx
*ctx
)
1575 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1576 struct r600_bc_alu alu
;
1579 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1580 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1581 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1582 r600_bc_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
1584 alu
.dst
.sel
= ctx
->temp_reg
;
1587 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1590 /* replicate result */
1591 return tgsi_helper_tempx_replicate(ctx
);
1594 static int cayman_pow(struct r600_shader_ctx
*ctx
)
1596 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1598 struct r600_bc_alu alu
;
1599 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1601 for (i
= 0; i
< 3; i
++) {
1602 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1603 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
1604 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1605 alu
.dst
.sel
= ctx
->temp_reg
;
1610 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1616 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1617 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1618 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], 0);
1619 alu
.src
[1].sel
= ctx
->temp_reg
;
1620 alu
.dst
.sel
= ctx
->temp_reg
;
1623 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1627 for (i
= 0; i
< last_slot
; i
++) {
1628 /* POW(a,b) = EXP2(b * LOG2(a))*/
1629 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1630 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1631 alu
.src
[0].sel
= ctx
->temp_reg
;
1633 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1634 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1635 if (i
== last_slot
- 1)
1637 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1644 static int tgsi_pow(struct r600_shader_ctx
*ctx
)
1646 struct r600_bc_alu alu
;
1650 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1651 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
1652 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1653 alu
.dst
.sel
= ctx
->temp_reg
;
1656 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1660 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1661 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1662 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], 0);
1663 alu
.src
[1].sel
= ctx
->temp_reg
;
1664 alu
.dst
.sel
= ctx
->temp_reg
;
1667 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1670 /* POW(a,b) = EXP2(b * LOG2(a))*/
1671 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1672 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1673 alu
.src
[0].sel
= ctx
->temp_reg
;
1674 alu
.dst
.sel
= ctx
->temp_reg
;
1677 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1680 return tgsi_helper_tempx_replicate(ctx
);
1683 static int tgsi_ssg(struct r600_shader_ctx
*ctx
)
1685 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1686 struct r600_bc_alu alu
;
1689 /* tmp = (src > 0 ? 1 : src) */
1690 for (i
= 0; i
< 4; i
++) {
1691 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1692 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT
);
1695 alu
.dst
.sel
= ctx
->temp_reg
;
1698 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
1699 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1700 r600_bc_src(&alu
.src
[2], &ctx
->src
[0], i
);
1704 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1709 /* dst = (-tmp > 0 ? -1 : tmp) */
1710 for (i
= 0; i
< 4; i
++) {
1711 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1712 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT
);
1714 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1716 alu
.src
[0].sel
= ctx
->temp_reg
;
1717 alu
.src
[0].chan
= i
;
1720 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1723 alu
.src
[2].sel
= ctx
->temp_reg
;
1724 alu
.src
[2].chan
= i
;
1728 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1735 static int tgsi_helper_copy(struct r600_shader_ctx
*ctx
, struct tgsi_full_instruction
*inst
)
1737 struct r600_bc_alu alu
;
1740 for (i
= 0; i
< 4; i
++) {
1741 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1742 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
))) {
1743 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
);
1746 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1747 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1748 alu
.src
[0].sel
= ctx
->temp_reg
;
1749 alu
.src
[0].chan
= i
;
1754 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1761 static int tgsi_op3(struct r600_shader_ctx
*ctx
)
1763 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1764 struct r600_bc_alu alu
;
1766 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
1768 for (i
= 0; i
< lasti
+ 1; i
++) {
1769 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
1772 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1773 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1774 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1775 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
1778 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1785 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1792 static int tgsi_dp(struct r600_shader_ctx
*ctx
)
1794 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1795 struct r600_bc_alu alu
;
1798 for (i
= 0; i
< 4; i
++) {
1799 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1800 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1801 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1802 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
1805 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1807 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1808 /* handle some special cases */
1809 switch (ctx
->inst_info
->tgsi_opcode
) {
1810 case TGSI_OPCODE_DP2
:
1812 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
1813 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
1816 case TGSI_OPCODE_DP3
:
1818 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
1819 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
1822 case TGSI_OPCODE_DPH
:
1824 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1825 alu
.src
[0].chan
= 0;
1835 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1842 static inline boolean
tgsi_tex_src_requires_loading(struct r600_shader_ctx
*ctx
,
1845 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1846 return (inst
->Src
[index
].Register
.File
!= TGSI_FILE_TEMPORARY
&&
1847 inst
->Src
[index
].Register
.File
!= TGSI_FILE_INPUT
) ||
1848 ctx
->src
[index
].neg
|| ctx
->src
[index
].abs
;
1851 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx
*ctx
,
1854 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1855 return ctx
->file_offset
[inst
->Src
[index
].Register
.File
] + inst
->Src
[index
].Register
.Index
;
1858 static int tgsi_tex(struct r600_shader_ctx
*ctx
)
1860 static float one_point_five
= 1.5f
;
1861 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1862 struct r600_bc_tex tex
;
1863 struct r600_bc_alu alu
;
1867 /* Texture fetch instructions can only use gprs as source.
1868 * Also they cannot negate the source or take the absolute value */
1869 const boolean src_requires_loading
= tgsi_tex_src_requires_loading(ctx
, 0);
1870 boolean src_loaded
= FALSE
;
1871 unsigned sampler_src_reg
= 1;
1873 src_gpr
= tgsi_tex_get_src_gpr(ctx
, 0);
1875 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
) {
1876 /* TGSI moves the sampler to src reg 3 for TXD */
1877 sampler_src_reg
= 3;
1879 for (i
= 1; i
< 3; i
++) {
1880 /* set gradients h/v */
1881 memset(&tex
, 0, sizeof(struct r600_bc_tex
));
1882 tex
.inst
= (i
== 1) ? SQ_TEX_INST_SET_GRADIENTS_H
:
1883 SQ_TEX_INST_SET_GRADIENTS_V
;
1884 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
1885 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
1887 if (tgsi_tex_src_requires_loading(ctx
, i
)) {
1888 tex
.src_gpr
= r600_get_temp(ctx
);
1894 for (j
= 0; j
< 4; j
++) {
1895 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1896 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1897 r600_bc_src(&alu
.src
[0], &ctx
->src
[i
], j
);
1898 alu
.dst
.sel
= tex
.src_gpr
;
1903 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1909 tex
.src_gpr
= tgsi_tex_get_src_gpr(ctx
, i
);
1910 tex
.src_sel_x
= ctx
->src
[i
].swizzle
[0];
1911 tex
.src_sel_y
= ctx
->src
[i
].swizzle
[1];
1912 tex
.src_sel_z
= ctx
->src
[i
].swizzle
[2];
1913 tex
.src_sel_w
= ctx
->src
[i
].swizzle
[3];
1914 tex
.src_rel
= ctx
->src
[i
].rel
;
1916 tex
.dst_gpr
= ctx
->temp_reg
; /* just to avoid confusing the asm scheduler */
1917 tex
.dst_sel_x
= tex
.dst_sel_y
= tex
.dst_sel_z
= tex
.dst_sel_w
= 7;
1918 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
) {
1919 tex
.coord_type_x
= 1;
1920 tex
.coord_type_y
= 1;
1921 tex
.coord_type_z
= 1;
1922 tex
.coord_type_w
= 1;
1924 r
= r600_bc_add_tex(ctx
->bc
, &tex
);
1928 } else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1930 /* Add perspective divide */
1931 if (ctx
->bc
->chip_class
== CAYMAN
) {
1933 for (i
= 0; i
< 3; i
++) {
1934 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1935 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1936 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 3);
1938 alu
.dst
.sel
= ctx
->temp_reg
;
1944 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1951 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1952 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1953 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 3);
1955 alu
.dst
.sel
= ctx
->temp_reg
;
1956 alu
.dst
.chan
= out_chan
;
1959 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1964 for (i
= 0; i
< 3; i
++) {
1965 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1966 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1967 alu
.src
[0].sel
= ctx
->temp_reg
;
1968 alu
.src
[0].chan
= out_chan
;
1969 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
1970 alu
.dst
.sel
= ctx
->temp_reg
;
1973 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1977 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1978 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1979 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1980 alu
.src
[0].chan
= 0;
1981 alu
.dst
.sel
= ctx
->temp_reg
;
1985 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1989 src_gpr
= ctx
->temp_reg
;
1992 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
1993 static const unsigned src0_swizzle
[] = {2, 2, 0, 1};
1994 static const unsigned src1_swizzle
[] = {1, 0, 2, 2};
1996 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1997 for (i
= 0; i
< 4; i
++) {
1998 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1999 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE
);
2000 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src0_swizzle
[i
]);
2001 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], src1_swizzle
[i
]);
2002 alu
.dst
.sel
= ctx
->temp_reg
;
2007 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2012 /* tmp1.z = RCP_e(|tmp1.z|) */
2013 if (ctx
->bc
->chip_class
== CAYMAN
) {
2014 for (i
= 0; i
< 3; i
++) {
2015 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2016 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2017 alu
.src
[0].sel
= ctx
->temp_reg
;
2018 alu
.src
[0].chan
= 2;
2020 alu
.dst
.sel
= ctx
->temp_reg
;
2026 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2031 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2032 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2033 alu
.src
[0].sel
= ctx
->temp_reg
;
2034 alu
.src
[0].chan
= 2;
2036 alu
.dst
.sel
= ctx
->temp_reg
;
2040 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2045 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2046 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2047 * muladd has no writemask, have to use another temp
2049 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2050 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2053 alu
.src
[0].sel
= ctx
->temp_reg
;
2054 alu
.src
[0].chan
= 0;
2055 alu
.src
[1].sel
= ctx
->temp_reg
;
2056 alu
.src
[1].chan
= 2;
2058 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
2059 alu
.src
[2].chan
= 0;
2060 alu
.src
[2].value
= *(uint32_t *)&one_point_five
;
2062 alu
.dst
.sel
= ctx
->temp_reg
;
2066 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2070 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2071 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2074 alu
.src
[0].sel
= ctx
->temp_reg
;
2075 alu
.src
[0].chan
= 1;
2076 alu
.src
[1].sel
= ctx
->temp_reg
;
2077 alu
.src
[1].chan
= 2;
2079 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
2080 alu
.src
[2].chan
= 0;
2081 alu
.src
[2].value
= *(uint32_t *)&one_point_five
;
2083 alu
.dst
.sel
= ctx
->temp_reg
;
2088 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2093 src_gpr
= ctx
->temp_reg
;
2096 if (src_requires_loading
&& !src_loaded
) {
2097 for (i
= 0; i
< 4; i
++) {
2098 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2099 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2100 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2101 alu
.dst
.sel
= ctx
->temp_reg
;
2106 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2111 src_gpr
= ctx
->temp_reg
;
2114 opcode
= ctx
->inst_info
->r600_opcode
;
2115 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
|| inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
) {
2117 case SQ_TEX_INST_SAMPLE
:
2118 opcode
= SQ_TEX_INST_SAMPLE_C
;
2120 case SQ_TEX_INST_SAMPLE_L
:
2121 opcode
= SQ_TEX_INST_SAMPLE_C_L
;
2123 case SQ_TEX_INST_SAMPLE_G
:
2124 opcode
= SQ_TEX_INST_SAMPLE_C_G
;
2129 memset(&tex
, 0, sizeof(struct r600_bc_tex
));
2132 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
2133 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
2134 tex
.src_gpr
= src_gpr
;
2135 tex
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
2136 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
2137 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
2138 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7;
2139 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
2146 tex
.src_sel_x
= ctx
->src
[0].swizzle
[0];
2147 tex
.src_sel_y
= ctx
->src
[0].swizzle
[1];
2148 tex
.src_sel_z
= ctx
->src
[0].swizzle
[2];
2149 tex
.src_sel_w
= ctx
->src
[0].swizzle
[3];
2150 tex
.src_rel
= ctx
->src
[0].rel
;
2153 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
2160 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
) {
2161 tex
.coord_type_x
= 1;
2162 tex
.coord_type_y
= 1;
2163 tex
.coord_type_z
= 1;
2164 tex
.coord_type_w
= 1;
2167 if (inst
->Texture
.Texture
== TGSI_TEXTURE_1D_ARRAY
) {
2168 tex
.coord_type_z
= 0;
2169 tex
.src_sel_z
= tex
.src_sel_y
;
2170 } else if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
)
2171 tex
.coord_type_z
= 0;
2173 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
|| inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
)
2174 tex
.src_sel_w
= tex
.src_sel_z
;
2176 r
= r600_bc_add_tex(ctx
->bc
, &tex
);
2180 /* add shadow ambient support - gallium doesn't do it yet */
2184 static int tgsi_lrp(struct r600_shader_ctx
*ctx
)
2186 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2187 struct r600_bc_alu alu
;
2188 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
2192 /* optimize if it's just an equal balance */
2193 if (ctx
->src
[0].sel
== V_SQ_ALU_SRC_0_5
) {
2194 for (i
= 0; i
< lasti
+ 1; i
++) {
2195 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2198 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2199 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
);
2200 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], i
);
2201 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2203 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2208 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2216 for (i
= 0; i
< lasti
+ 1; i
++) {
2217 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2220 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2221 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
);
2222 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2223 alu
.src
[0].chan
= 0;
2224 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
2225 r600_bc_src_toggle_neg(&alu
.src
[1]);
2226 alu
.dst
.sel
= ctx
->temp_reg
;
2232 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2237 /* (1 - src0) * src2 */
2238 for (i
= 0; i
< lasti
+ 1; i
++) {
2239 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2242 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2243 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2244 alu
.src
[0].sel
= ctx
->temp_reg
;
2245 alu
.src
[0].chan
= i
;
2246 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2247 alu
.dst
.sel
= ctx
->temp_reg
;
2253 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2258 /* src0 * src1 + (1 - src0) * src2 */
2259 for (i
= 0; i
< lasti
+ 1; i
++) {
2260 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2263 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2264 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2266 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2267 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], i
);
2268 alu
.src
[2].sel
= ctx
->temp_reg
;
2269 alu
.src
[2].chan
= i
;
2271 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2276 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2283 static int tgsi_cmp(struct r600_shader_ctx
*ctx
)
2285 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2286 struct r600_bc_alu alu
;
2288 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
2290 for (i
= 0; i
< lasti
+ 1; i
++) {
2291 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2294 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2295 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE
);
2296 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2297 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2298 r600_bc_src(&alu
.src
[2], &ctx
->src
[1], i
);
2299 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2305 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2312 static int tgsi_xpd(struct r600_shader_ctx
*ctx
)
2314 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2315 static const unsigned int src0_swizzle
[] = {2, 0, 1};
2316 static const unsigned int src1_swizzle
[] = {1, 2, 0};
2317 struct r600_bc_alu alu
;
2318 uint32_t use_temp
= 0;
2321 if (inst
->Dst
[0].Register
.WriteMask
!= 0xf)
2324 for (i
= 0; i
< 4; i
++) {
2325 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2326 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2328 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src0_swizzle
[i
]);
2329 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], src1_swizzle
[i
]);
2331 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
2332 alu
.src
[0].chan
= i
;
2333 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2334 alu
.src
[1].chan
= i
;
2337 alu
.dst
.sel
= ctx
->temp_reg
;
2343 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2348 for (i
= 0; i
< 4; i
++) {
2349 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2350 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2353 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src1_swizzle
[i
]);
2354 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], src0_swizzle
[i
]);
2356 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
2357 alu
.src
[0].chan
= i
;
2358 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2359 alu
.src
[1].chan
= i
;
2362 alu
.src
[2].sel
= ctx
->temp_reg
;
2364 alu
.src
[2].chan
= i
;
2367 alu
.dst
.sel
= ctx
->temp_reg
;
2369 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2375 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2380 return tgsi_helper_copy(ctx
, inst
);
2384 static int tgsi_exp(struct r600_shader_ctx
*ctx
)
2386 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2387 struct r600_bc_alu alu
;
2391 /* result.x = 2^floor(src); */
2392 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
2393 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2395 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2396 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2398 alu
.dst
.sel
= ctx
->temp_reg
;
2402 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2406 if (ctx
->bc
->chip_class
== CAYMAN
) {
2407 for (i
= 0; i
< 3; i
++) {
2408 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2409 alu
.src
[0].sel
= ctx
->temp_reg
;
2410 alu
.src
[0].chan
= 0;
2412 alu
.dst
.sel
= ctx
->temp_reg
;
2418 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2423 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2424 alu
.src
[0].sel
= ctx
->temp_reg
;
2425 alu
.src
[0].chan
= 0;
2427 alu
.dst
.sel
= ctx
->temp_reg
;
2431 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2437 /* result.y = tmp - floor(tmp); */
2438 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
2439 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2441 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
);
2442 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2444 alu
.dst
.sel
= ctx
->temp_reg
;
2446 r
= tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2455 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2460 /* result.z = RoughApprox2ToX(tmp);*/
2461 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 0x1) {
2462 if (ctx
->bc
->chip_class
== CAYMAN
) {
2463 for (i
= 0; i
< 3; i
++) {
2464 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2465 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2466 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2468 alu
.dst
.sel
= ctx
->temp_reg
;
2475 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2480 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2481 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2482 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2484 alu
.dst
.sel
= ctx
->temp_reg
;
2490 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2496 /* result.w = 1.0;*/
2497 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 0x1) {
2498 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2500 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2501 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2502 alu
.src
[0].chan
= 0;
2504 alu
.dst
.sel
= ctx
->temp_reg
;
2508 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2512 return tgsi_helper_copy(ctx
, inst
);
2515 static int tgsi_log(struct r600_shader_ctx
*ctx
)
2517 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2518 struct r600_bc_alu alu
;
2522 /* result.x = floor(log2(|src|)); */
2523 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
2524 if (ctx
->bc
->chip_class
== CAYMAN
) {
2525 for (i
= 0; i
< 3; i
++) {
2526 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2528 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2529 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2530 r600_bc_src_set_abs(&alu
.src
[0]);
2532 alu
.dst
.sel
= ctx
->temp_reg
;
2538 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2544 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2546 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2547 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2548 r600_bc_src_set_abs(&alu
.src
[0]);
2550 alu
.dst
.sel
= ctx
->temp_reg
;
2554 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2559 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2560 alu
.src
[0].sel
= ctx
->temp_reg
;
2561 alu
.src
[0].chan
= 0;
2563 alu
.dst
.sel
= ctx
->temp_reg
;
2568 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2573 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2574 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
2576 if (ctx
->bc
->chip_class
== CAYMAN
) {
2577 for (i
= 0; i
< 3; i
++) {
2578 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2580 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2581 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2582 r600_bc_src_set_abs(&alu
.src
[0]);
2584 alu
.dst
.sel
= ctx
->temp_reg
;
2591 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2596 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2598 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2599 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2600 r600_bc_src_set_abs(&alu
.src
[0]);
2602 alu
.dst
.sel
= ctx
->temp_reg
;
2607 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2612 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2614 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2615 alu
.src
[0].sel
= ctx
->temp_reg
;
2616 alu
.src
[0].chan
= 1;
2618 alu
.dst
.sel
= ctx
->temp_reg
;
2623 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2627 if (ctx
->bc
->chip_class
== CAYMAN
) {
2628 for (i
= 0; i
< 3; i
++) {
2629 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2630 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2631 alu
.src
[0].sel
= ctx
->temp_reg
;
2632 alu
.src
[0].chan
= 1;
2634 alu
.dst
.sel
= ctx
->temp_reg
;
2641 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2646 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2647 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2648 alu
.src
[0].sel
= ctx
->temp_reg
;
2649 alu
.src
[0].chan
= 1;
2651 alu
.dst
.sel
= ctx
->temp_reg
;
2656 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2661 if (ctx
->bc
->chip_class
== CAYMAN
) {
2662 for (i
= 0; i
< 3; i
++) {
2663 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2664 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2665 alu
.src
[0].sel
= ctx
->temp_reg
;
2666 alu
.src
[0].chan
= 1;
2668 alu
.dst
.sel
= ctx
->temp_reg
;
2675 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2680 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2681 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2682 alu
.src
[0].sel
= ctx
->temp_reg
;
2683 alu
.src
[0].chan
= 1;
2685 alu
.dst
.sel
= ctx
->temp_reg
;
2690 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2695 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2697 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2699 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2700 r600_bc_src_set_abs(&alu
.src
[0]);
2702 alu
.src
[1].sel
= ctx
->temp_reg
;
2703 alu
.src
[1].chan
= 1;
2705 alu
.dst
.sel
= ctx
->temp_reg
;
2710 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2715 /* result.z = log2(|src|);*/
2716 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 1) {
2717 if (ctx
->bc
->chip_class
== CAYMAN
) {
2718 for (i
= 0; i
< 3; i
++) {
2719 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2721 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2722 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2723 r600_bc_src_set_abs(&alu
.src
[0]);
2725 alu
.dst
.sel
= ctx
->temp_reg
;
2732 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2737 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2739 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2740 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2741 r600_bc_src_set_abs(&alu
.src
[0]);
2743 alu
.dst
.sel
= ctx
->temp_reg
;
2748 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2754 /* result.w = 1.0; */
2755 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 1) {
2756 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2758 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2759 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2760 alu
.src
[0].chan
= 0;
2762 alu
.dst
.sel
= ctx
->temp_reg
;
2767 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2772 return tgsi_helper_copy(ctx
, inst
);
2775 static int tgsi_eg_arl(struct r600_shader_ctx
*ctx
)
2777 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2778 struct r600_bc_alu alu
;
2781 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2783 switch (inst
->Instruction
.Opcode
) {
2784 case TGSI_OPCODE_ARL
:
2785 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR
;
2787 case TGSI_OPCODE_ARR
:
2788 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2795 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2797 alu
.dst
.sel
= ctx
->ar_reg
;
2799 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2803 /* TODO: Note that the MOVA can be avoided if we never use AR for
2804 * indexing non-CB registers in the current ALU clause. Similarly, we
2805 * need to load AR from ar_reg again if we started a new clause
2806 * between ARL and AR usage. The easy way to do that is to remove
2807 * the MOVA here, and load it for the first AR access after ar_reg
2808 * has been modified in each clause. */
2809 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2810 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT
;
2811 alu
.src
[0].sel
= ctx
->ar_reg
;
2812 alu
.src
[0].chan
= 0;
2814 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2819 static int tgsi_r600_arl(struct r600_shader_ctx
*ctx
)
2821 /* TODO from r600c, ar values don't persist between clauses */
2822 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2823 struct r600_bc_alu alu
;
2826 switch (inst
->Instruction
.Opcode
) {
2827 case TGSI_OPCODE_ARL
:
2828 memset(&alu
, 0, sizeof(alu
));
2829 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
;
2830 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2831 alu
.dst
.sel
= ctx
->ar_reg
;
2835 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2838 memset(&alu
, 0, sizeof(alu
));
2839 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2840 alu
.src
[0].sel
= ctx
->ar_reg
;
2841 alu
.dst
.sel
= ctx
->ar_reg
;
2845 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2848 case TGSI_OPCODE_ARR
:
2849 memset(&alu
, 0, sizeof(alu
));
2850 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2851 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2852 alu
.dst
.sel
= ctx
->ar_reg
;
2856 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2864 memset(&alu
, 0, sizeof(alu
));
2865 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT
;
2866 alu
.src
[0].sel
= ctx
->ar_reg
;
2869 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2872 ctx
->bc
->cf_last
->r6xx_uses_waterfall
= 1;
2876 static int tgsi_opdst(struct r600_shader_ctx
*ctx
)
2878 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2879 struct r600_bc_alu alu
;
2882 for (i
= 0; i
< 4; i
++) {
2883 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2885 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2886 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2888 if (i
== 0 || i
== 3) {
2889 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2891 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2894 if (i
== 0 || i
== 2) {
2895 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
2897 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], i
);
2901 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2908 static int emit_logic_pred(struct r600_shader_ctx
*ctx
, int opcode
)
2910 struct r600_bc_alu alu
;
2913 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2917 alu
.dst
.sel
= ctx
->temp_reg
;
2921 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2922 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2923 alu
.src
[1].chan
= 0;
2927 r
= r600_bc_add_alu_type(ctx
->bc
, &alu
, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
));
2933 static int pops(struct r600_shader_ctx
*ctx
, int pops
)
2935 unsigned force_pop
= ctx
->bc
->force_add_cf
;
2939 if (ctx
->bc
->cf_last
) {
2940 if (ctx
->bc
->cf_last
->inst
== CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
) << 3)
2942 else if (ctx
->bc
->cf_last
->inst
== CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER
) << 3)
2947 ctx
->bc
->cf_last
->inst
= CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER
) << 3;
2948 ctx
->bc
->force_add_cf
= 1;
2949 } else if (alu_pop
== 2) {
2950 ctx
->bc
->cf_last
->inst
= CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER
) << 3;
2951 ctx
->bc
->force_add_cf
= 1;
2958 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP
));
2959 ctx
->bc
->cf_last
->pop_count
= pops
;
2960 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
2966 static inline void callstack_decrease_current(struct r600_shader_ctx
*ctx
, unsigned reason
)
2970 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
--;
2974 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
-= 4;
2977 /* TOODO : for 16 vp asic should -= 2; */
2978 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
--;
2983 static inline void callstack_check_depth(struct r600_shader_ctx
*ctx
, unsigned reason
, unsigned check_max_only
)
2985 if (check_max_only
) {
2998 if ((ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+ diff
) >
2999 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
) {
3000 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
=
3001 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+ diff
;
3007 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
++;
3011 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+= 4;
3014 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
++;
3018 if ((ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
) >
3019 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
) {
3020 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
=
3021 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
;
3025 static void fc_set_mid(struct r600_shader_ctx
*ctx
, int fc_sp
)
3027 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[fc_sp
];
3029 sp
->mid
= (struct r600_bc_cf
**)realloc((void *)sp
->mid
,
3030 sizeof(struct r600_bc_cf
*) * (sp
->num_mid
+ 1));
3031 sp
->mid
[sp
->num_mid
] = ctx
->bc
->cf_last
;
3035 static void fc_pushlevel(struct r600_shader_ctx
*ctx
, int type
)
3038 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
= type
;
3039 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
= ctx
->bc
->cf_last
;
3042 static void fc_poplevel(struct r600_shader_ctx
*ctx
)
3044 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
];
3056 static int emit_return(struct r600_shader_ctx
*ctx
)
3058 r600_bc_add_cfinst(ctx
->bc
, V_SQ_CF_WORD1_SQ_CF_INST_RETURN
);
3062 static int emit_jump_to_offset(struct r600_shader_ctx
*ctx
, int pops
, int offset
)
3065 r600_bc_add_cfinst(ctx
->bc
, V_SQ_CF_WORD1_SQ_CF_INST_JUMP
);
3066 ctx
->bc
->cf_last
->pop_count
= pops
;
3067 /* TODO work out offset */
3071 static int emit_setret_in_loop_flag(struct r600_shader_ctx
*ctx
, unsigned flag_value
)
3076 static void emit_testflag(struct r600_shader_ctx
*ctx
)
3081 static void emit_return_on_flag(struct r600_shader_ctx
*ctx
, unsigned ifidx
)
3084 emit_jump_to_offset(ctx
, 1, 4);
3085 emit_setret_in_loop_flag(ctx
, V_SQ_ALU_SRC_0
);
3086 pops(ctx
, ifidx
+ 1);
3090 static void break_loop_on_flag(struct r600_shader_ctx
*ctx
, unsigned fc_sp
)
3094 r600_bc_add_cfinst(ctx
->bc
, ctx
->inst_info
->r600_opcode
);
3095 ctx
->bc
->cf_last
->pop_count
= 1;
3097 fc_set_mid(ctx
, fc_sp
);
3103 static int tgsi_if(struct r600_shader_ctx
*ctx
)
3105 emit_logic_pred(ctx
, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE
));
3107 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP
));
3109 fc_pushlevel(ctx
, FC_IF
);
3111 callstack_check_depth(ctx
, FC_PUSH_VPM
, 0);
3115 static int tgsi_else(struct r600_shader_ctx
*ctx
)
3117 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE
));
3118 ctx
->bc
->cf_last
->pop_count
= 1;
3120 fc_set_mid(ctx
, ctx
->bc
->fc_sp
);
3121 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
;
3125 static int tgsi_endif(struct r600_shader_ctx
*ctx
)
3128 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
!= FC_IF
) {
3129 R600_ERR("if/endif unbalanced in shader\n");
3133 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
== NULL
) {
3134 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3135 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->pop_count
= 1;
3137 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
[0]->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3141 callstack_decrease_current(ctx
, FC_PUSH_VPM
);
3145 static int tgsi_bgnloop(struct r600_shader_ctx
*ctx
)
3147 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
));
3149 fc_pushlevel(ctx
, FC_LOOP
);
3151 /* check stack depth */
3152 callstack_check_depth(ctx
, FC_LOOP
, 0);
3156 static int tgsi_endloop(struct r600_shader_ctx
*ctx
)
3160 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
));
3162 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
!= FC_LOOP
) {
3163 R600_ERR("loop/endloop in shader code are not paired.\n");
3167 /* fixup loop pointers - from r600isa
3168 LOOP END points to CF after LOOP START,
3169 LOOP START point to CF after LOOP END
3170 BRK/CONT point to LOOP END CF
3172 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->id
+ 2;
3174 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3176 for (i
= 0; i
< ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].num_mid
; i
++) {
3177 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
[i
]->cf_addr
= ctx
->bc
->cf_last
->id
;
3179 /* TODO add LOOPRET support */
3181 callstack_decrease_current(ctx
, FC_LOOP
);
3185 static int tgsi_loop_brk_cont(struct r600_shader_ctx
*ctx
)
3189 for (fscp
= ctx
->bc
->fc_sp
; fscp
> 0; fscp
--)
3191 if (FC_LOOP
== ctx
->bc
->fc_stack
[fscp
].type
)
3196 R600_ERR("Break not inside loop/endloop pair\n");
3200 r600_bc_add_cfinst(ctx
->bc
, ctx
->inst_info
->r600_opcode
);
3201 ctx
->bc
->cf_last
->pop_count
= 1;
3203 fc_set_mid(ctx
, fscp
);
3206 callstack_check_depth(ctx
, FC_PUSH_VPM
, 1);
3210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[] = {
3211 {TGSI_OPCODE_ARL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_r600_arl
},
3212 {TGSI_OPCODE_MOV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3213 {TGSI_OPCODE_LIT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3216 * For state trackers other than OpenGL, we'll want to use
3217 * _RECIP_IEEE instead.
3219 {TGSI_OPCODE_RCP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED
, tgsi_trans_srcx_replicate
},
3221 {TGSI_OPCODE_RSQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_rsq
},
3222 {TGSI_OPCODE_EXP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3223 {TGSI_OPCODE_LOG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3224 {TGSI_OPCODE_MUL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3225 {TGSI_OPCODE_ADD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3226 {TGSI_OPCODE_DP3
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3227 {TGSI_OPCODE_DP4
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3228 {TGSI_OPCODE_DST
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3229 {TGSI_OPCODE_MIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3230 {TGSI_OPCODE_MAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3231 {TGSI_OPCODE_SLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3232 {TGSI_OPCODE_SGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3233 {TGSI_OPCODE_MAD
, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3234 {TGSI_OPCODE_SUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3235 {TGSI_OPCODE_LRP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3236 {TGSI_OPCODE_CND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3238 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3239 {TGSI_OPCODE_DP2A
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3241 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3242 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3243 {TGSI_OPCODE_FRC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3244 {TGSI_OPCODE_CLAMP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3245 {TGSI_OPCODE_FLR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3246 {TGSI_OPCODE_ROUND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE
, tgsi_op2
},
3247 {TGSI_OPCODE_EX2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, tgsi_trans_srcx_replicate
},
3248 {TGSI_OPCODE_LG2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, tgsi_trans_srcx_replicate
},
3249 {TGSI_OPCODE_POW
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_pow
},
3250 {TGSI_OPCODE_XPD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3252 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3253 {TGSI_OPCODE_ABS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3254 {TGSI_OPCODE_RCC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3255 {TGSI_OPCODE_DPH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3256 {TGSI_OPCODE_COS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, tgsi_trig
},
3257 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3258 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3259 {TGSI_OPCODE_KILP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3260 {TGSI_OPCODE_PK2H
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3261 {TGSI_OPCODE_PK2US
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3262 {TGSI_OPCODE_PK4B
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3263 {TGSI_OPCODE_PK4UB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3264 {TGSI_OPCODE_RFL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3265 {TGSI_OPCODE_SEQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3266 {TGSI_OPCODE_SFL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3267 {TGSI_OPCODE_SGT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3268 {TGSI_OPCODE_SIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, tgsi_trig
},
3269 {TGSI_OPCODE_SLE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3270 {TGSI_OPCODE_SNE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3271 {TGSI_OPCODE_STR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3272 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3273 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3274 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3275 {TGSI_OPCODE_UP2H
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3276 {TGSI_OPCODE_UP2US
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3277 {TGSI_OPCODE_UP4B
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3278 {TGSI_OPCODE_UP4UB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3279 {TGSI_OPCODE_X2D
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3280 {TGSI_OPCODE_ARA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3281 {TGSI_OPCODE_ARR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_r600_arl
},
3282 {TGSI_OPCODE_BRA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3283 {TGSI_OPCODE_CAL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3284 {TGSI_OPCODE_RET
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3285 {TGSI_OPCODE_SSG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3286 {TGSI_OPCODE_CMP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3287 {TGSI_OPCODE_SCS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3288 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3289 {TGSI_OPCODE_NRM
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3290 {TGSI_OPCODE_DIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3291 {TGSI_OPCODE_DP2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3292 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3293 {TGSI_OPCODE_BRK
, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3294 {TGSI_OPCODE_IF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3296 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3297 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3298 {TGSI_OPCODE_ELSE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3299 {TGSI_OPCODE_ENDIF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3301 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3302 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3303 {TGSI_OPCODE_PUSHA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3304 {TGSI_OPCODE_POPA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3305 {TGSI_OPCODE_CEIL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3306 {TGSI_OPCODE_I2F
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3307 {TGSI_OPCODE_NOT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3308 {TGSI_OPCODE_TRUNC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3309 {TGSI_OPCODE_SHL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3311 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3312 {TGSI_OPCODE_AND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3313 {TGSI_OPCODE_OR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3314 {TGSI_OPCODE_MOD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3315 {TGSI_OPCODE_XOR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3316 {TGSI_OPCODE_SAD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3317 {TGSI_OPCODE_TXF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3318 {TGSI_OPCODE_TXQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3319 {TGSI_OPCODE_CONT
, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3320 {TGSI_OPCODE_EMIT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3321 {TGSI_OPCODE_ENDPRIM
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3322 {TGSI_OPCODE_BGNLOOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3323 {TGSI_OPCODE_BGNSUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3324 {TGSI_OPCODE_ENDLOOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3325 {TGSI_OPCODE_ENDSUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3327 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3328 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3329 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3330 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3331 {TGSI_OPCODE_NOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3333 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3334 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3335 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3336 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3337 {TGSI_OPCODE_NRM4
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3338 {TGSI_OPCODE_CALLNZ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3339 {TGSI_OPCODE_IFC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3340 {TGSI_OPCODE_BREAKC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3341 {TGSI_OPCODE_KIL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3342 {TGSI_OPCODE_END
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3344 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3345 {TGSI_OPCODE_F2I
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3346 {TGSI_OPCODE_IDIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3347 {TGSI_OPCODE_IMAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3348 {TGSI_OPCODE_IMIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3349 {TGSI_OPCODE_INEG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3350 {TGSI_OPCODE_ISGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3351 {TGSI_OPCODE_ISHR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3352 {TGSI_OPCODE_ISLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3353 {TGSI_OPCODE_F2U
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3354 {TGSI_OPCODE_U2F
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3355 {TGSI_OPCODE_UADD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3356 {TGSI_OPCODE_UDIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3357 {TGSI_OPCODE_UMAD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3358 {TGSI_OPCODE_UMAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3359 {TGSI_OPCODE_UMIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3360 {TGSI_OPCODE_UMOD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3361 {TGSI_OPCODE_UMUL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3362 {TGSI_OPCODE_USEQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3363 {TGSI_OPCODE_USGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3364 {TGSI_OPCODE_USHR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3365 {TGSI_OPCODE_USLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3366 {TGSI_OPCODE_USNE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3367 {TGSI_OPCODE_SWITCH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3368 {TGSI_OPCODE_CASE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3369 {TGSI_OPCODE_DEFAULT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3370 {TGSI_OPCODE_ENDSWITCH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3371 {TGSI_OPCODE_LAST
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3374 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction
[] = {
3375 {TGSI_OPCODE_ARL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3376 {TGSI_OPCODE_MOV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3377 {TGSI_OPCODE_LIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3378 {TGSI_OPCODE_RCP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
, tgsi_trans_srcx_replicate
},
3379 {TGSI_OPCODE_RSQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
, tgsi_rsq
},
3380 {TGSI_OPCODE_EXP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3381 {TGSI_OPCODE_LOG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3382 {TGSI_OPCODE_MUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3383 {TGSI_OPCODE_ADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3384 {TGSI_OPCODE_DP3
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3385 {TGSI_OPCODE_DP4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3386 {TGSI_OPCODE_DST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3387 {TGSI_OPCODE_MIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3388 {TGSI_OPCODE_MAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3389 {TGSI_OPCODE_SLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3390 {TGSI_OPCODE_SGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3391 {TGSI_OPCODE_MAD
, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3392 {TGSI_OPCODE_SUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3393 {TGSI_OPCODE_LRP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3394 {TGSI_OPCODE_CND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3396 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3397 {TGSI_OPCODE_DP2A
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3399 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3400 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3401 {TGSI_OPCODE_FRC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3402 {TGSI_OPCODE_CLAMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3403 {TGSI_OPCODE_FLR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3404 {TGSI_OPCODE_ROUND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE
, tgsi_op2
},
3405 {TGSI_OPCODE_EX2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, tgsi_trans_srcx_replicate
},
3406 {TGSI_OPCODE_LG2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, tgsi_trans_srcx_replicate
},
3407 {TGSI_OPCODE_POW
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_pow
},
3408 {TGSI_OPCODE_XPD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3410 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3411 {TGSI_OPCODE_ABS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3412 {TGSI_OPCODE_RCC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3413 {TGSI_OPCODE_DPH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3414 {TGSI_OPCODE_COS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, tgsi_trig
},
3415 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3416 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3417 {TGSI_OPCODE_KILP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3418 {TGSI_OPCODE_PK2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3419 {TGSI_OPCODE_PK2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3420 {TGSI_OPCODE_PK4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3421 {TGSI_OPCODE_PK4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3422 {TGSI_OPCODE_RFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3423 {TGSI_OPCODE_SEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3424 {TGSI_OPCODE_SFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3425 {TGSI_OPCODE_SGT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3426 {TGSI_OPCODE_SIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, tgsi_trig
},
3427 {TGSI_OPCODE_SLE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3428 {TGSI_OPCODE_SNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3429 {TGSI_OPCODE_STR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3430 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3431 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3432 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3433 {TGSI_OPCODE_UP2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3434 {TGSI_OPCODE_UP2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3435 {TGSI_OPCODE_UP4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3436 {TGSI_OPCODE_UP4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3437 {TGSI_OPCODE_X2D
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3438 {TGSI_OPCODE_ARA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3439 {TGSI_OPCODE_ARR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3440 {TGSI_OPCODE_BRA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3441 {TGSI_OPCODE_CAL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3442 {TGSI_OPCODE_RET
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3443 {TGSI_OPCODE_SSG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3444 {TGSI_OPCODE_CMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3445 {TGSI_OPCODE_SCS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3446 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3447 {TGSI_OPCODE_NRM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3448 {TGSI_OPCODE_DIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3449 {TGSI_OPCODE_DP2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3450 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3451 {TGSI_OPCODE_BRK
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3452 {TGSI_OPCODE_IF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3454 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3455 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3456 {TGSI_OPCODE_ELSE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3457 {TGSI_OPCODE_ENDIF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3459 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3460 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3461 {TGSI_OPCODE_PUSHA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3462 {TGSI_OPCODE_POPA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3463 {TGSI_OPCODE_CEIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3464 {TGSI_OPCODE_I2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3465 {TGSI_OPCODE_NOT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3466 {TGSI_OPCODE_TRUNC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3467 {TGSI_OPCODE_SHL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3469 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3470 {TGSI_OPCODE_AND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3471 {TGSI_OPCODE_OR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3472 {TGSI_OPCODE_MOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3473 {TGSI_OPCODE_XOR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3474 {TGSI_OPCODE_SAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3475 {TGSI_OPCODE_TXF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3476 {TGSI_OPCODE_TXQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3477 {TGSI_OPCODE_CONT
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3478 {TGSI_OPCODE_EMIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3479 {TGSI_OPCODE_ENDPRIM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3480 {TGSI_OPCODE_BGNLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3481 {TGSI_OPCODE_BGNSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3482 {TGSI_OPCODE_ENDLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3483 {TGSI_OPCODE_ENDSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3485 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3486 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3487 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3488 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3489 {TGSI_OPCODE_NOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3491 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3492 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3493 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3494 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3495 {TGSI_OPCODE_NRM4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3496 {TGSI_OPCODE_CALLNZ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3497 {TGSI_OPCODE_IFC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3498 {TGSI_OPCODE_BREAKC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3499 {TGSI_OPCODE_KIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3500 {TGSI_OPCODE_END
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3502 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3503 {TGSI_OPCODE_F2I
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3504 {TGSI_OPCODE_IDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3505 {TGSI_OPCODE_IMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3506 {TGSI_OPCODE_IMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3507 {TGSI_OPCODE_INEG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3508 {TGSI_OPCODE_ISGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3509 {TGSI_OPCODE_ISHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3510 {TGSI_OPCODE_ISLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3511 {TGSI_OPCODE_F2U
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3512 {TGSI_OPCODE_U2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3513 {TGSI_OPCODE_UADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3514 {TGSI_OPCODE_UDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3515 {TGSI_OPCODE_UMAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3516 {TGSI_OPCODE_UMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3517 {TGSI_OPCODE_UMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3518 {TGSI_OPCODE_UMOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3519 {TGSI_OPCODE_UMUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3520 {TGSI_OPCODE_USEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3521 {TGSI_OPCODE_USGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3522 {TGSI_OPCODE_USHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3523 {TGSI_OPCODE_USLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3524 {TGSI_OPCODE_USNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3525 {TGSI_OPCODE_SWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3526 {TGSI_OPCODE_CASE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3527 {TGSI_OPCODE_DEFAULT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3528 {TGSI_OPCODE_ENDSWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3529 {TGSI_OPCODE_LAST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3532 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction
[] = {
3533 {TGSI_OPCODE_ARL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3534 {TGSI_OPCODE_MOV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3535 {TGSI_OPCODE_LIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3536 {TGSI_OPCODE_RCP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
, cayman_emit_float_instr
},
3537 {TGSI_OPCODE_RSQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
, cayman_emit_float_instr
},
3538 {TGSI_OPCODE_EXP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3539 {TGSI_OPCODE_LOG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3540 {TGSI_OPCODE_MUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3541 {TGSI_OPCODE_ADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3542 {TGSI_OPCODE_DP3
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3543 {TGSI_OPCODE_DP4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3544 {TGSI_OPCODE_DST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3545 {TGSI_OPCODE_MIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3546 {TGSI_OPCODE_MAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3547 {TGSI_OPCODE_SLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3548 {TGSI_OPCODE_SGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3549 {TGSI_OPCODE_MAD
, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3550 {TGSI_OPCODE_SUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3551 {TGSI_OPCODE_LRP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3552 {TGSI_OPCODE_CND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3554 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3555 {TGSI_OPCODE_DP2A
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3557 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3558 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3559 {TGSI_OPCODE_FRC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3560 {TGSI_OPCODE_CLAMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3561 {TGSI_OPCODE_FLR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3562 {TGSI_OPCODE_ROUND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE
, tgsi_op2
},
3563 {TGSI_OPCODE_EX2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, cayman_emit_float_instr
},
3564 {TGSI_OPCODE_LG2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, cayman_emit_float_instr
},
3565 {TGSI_OPCODE_POW
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, cayman_pow
},
3566 {TGSI_OPCODE_XPD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3568 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3569 {TGSI_OPCODE_ABS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3570 {TGSI_OPCODE_RCC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3571 {TGSI_OPCODE_DPH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3572 {TGSI_OPCODE_COS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, cayman_trig
},
3573 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3574 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3575 {TGSI_OPCODE_KILP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3576 {TGSI_OPCODE_PK2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3577 {TGSI_OPCODE_PK2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3578 {TGSI_OPCODE_PK4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3579 {TGSI_OPCODE_PK4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3580 {TGSI_OPCODE_RFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3581 {TGSI_OPCODE_SEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3582 {TGSI_OPCODE_SFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3583 {TGSI_OPCODE_SGT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3584 {TGSI_OPCODE_SIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, cayman_trig
},
3585 {TGSI_OPCODE_SLE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3586 {TGSI_OPCODE_SNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3587 {TGSI_OPCODE_STR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3588 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3589 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3590 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3591 {TGSI_OPCODE_UP2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3592 {TGSI_OPCODE_UP2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3593 {TGSI_OPCODE_UP4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3594 {TGSI_OPCODE_UP4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3595 {TGSI_OPCODE_X2D
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3596 {TGSI_OPCODE_ARA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3597 {TGSI_OPCODE_ARR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3598 {TGSI_OPCODE_BRA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3599 {TGSI_OPCODE_CAL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3600 {TGSI_OPCODE_RET
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3601 {TGSI_OPCODE_SSG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3602 {TGSI_OPCODE_CMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3603 {TGSI_OPCODE_SCS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3604 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3605 {TGSI_OPCODE_NRM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3606 {TGSI_OPCODE_DIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3607 {TGSI_OPCODE_DP2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3608 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3609 {TGSI_OPCODE_BRK
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3610 {TGSI_OPCODE_IF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3612 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3613 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3614 {TGSI_OPCODE_ELSE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3615 {TGSI_OPCODE_ENDIF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3617 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3618 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3619 {TGSI_OPCODE_PUSHA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3620 {TGSI_OPCODE_POPA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3621 {TGSI_OPCODE_CEIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3622 {TGSI_OPCODE_I2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3623 {TGSI_OPCODE_NOT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3624 {TGSI_OPCODE_TRUNC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3625 {TGSI_OPCODE_SHL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3627 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3628 {TGSI_OPCODE_AND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3629 {TGSI_OPCODE_OR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3630 {TGSI_OPCODE_MOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3631 {TGSI_OPCODE_XOR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3632 {TGSI_OPCODE_SAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3633 {TGSI_OPCODE_TXF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3634 {TGSI_OPCODE_TXQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3635 {TGSI_OPCODE_CONT
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3636 {TGSI_OPCODE_EMIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3637 {TGSI_OPCODE_ENDPRIM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3638 {TGSI_OPCODE_BGNLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3639 {TGSI_OPCODE_BGNSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3640 {TGSI_OPCODE_ENDLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3641 {TGSI_OPCODE_ENDSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3643 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3644 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3645 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3646 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3647 {TGSI_OPCODE_NOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3649 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3650 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3651 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3652 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3653 {TGSI_OPCODE_NRM4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3654 {TGSI_OPCODE_CALLNZ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3655 {TGSI_OPCODE_IFC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3656 {TGSI_OPCODE_BREAKC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3657 {TGSI_OPCODE_KIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3658 {TGSI_OPCODE_END
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3660 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3661 {TGSI_OPCODE_F2I
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3662 {TGSI_OPCODE_IDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3663 {TGSI_OPCODE_IMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3664 {TGSI_OPCODE_IMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3665 {TGSI_OPCODE_INEG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3666 {TGSI_OPCODE_ISGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3667 {TGSI_OPCODE_ISHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3668 {TGSI_OPCODE_ISLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3669 {TGSI_OPCODE_F2U
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3670 {TGSI_OPCODE_U2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3671 {TGSI_OPCODE_UADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3672 {TGSI_OPCODE_UDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3673 {TGSI_OPCODE_UMAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3674 {TGSI_OPCODE_UMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3675 {TGSI_OPCODE_UMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3676 {TGSI_OPCODE_UMOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3677 {TGSI_OPCODE_UMUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3678 {TGSI_OPCODE_USEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3679 {TGSI_OPCODE_USGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3680 {TGSI_OPCODE_USHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3681 {TGSI_OPCODE_USLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3682 {TGSI_OPCODE_USNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3683 {TGSI_OPCODE_SWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3684 {TGSI_OPCODE_CASE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3685 {TGSI_OPCODE_DEFAULT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3686 {TGSI_OPCODE_ENDSWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3687 {TGSI_OPCODE_LAST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},