2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
40 Why CAYMAN got loops for lots of instructions is explained here.
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
61 int r600_find_vs_semantic_index(struct r600_shader
*vs
,
62 struct r600_shader
*ps
, int id
)
64 struct r600_shader_io
*input
= &ps
->input
[id
];
66 for (int i
= 0; i
< vs
->noutput
; i
++) {
67 if (input
->name
== vs
->output
[i
].name
&&
68 input
->sid
== vs
->output
[i
].sid
) {
75 static int r600_pipe_shader(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
77 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
78 struct r600_shader
*rshader
= &shader
->shader
;
83 if (shader
->bo
== NULL
) {
84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85 shader
->bo
= r600_bo(rctx
->radeon
, rshader
->bc
.ndw
* 4, 4096, PIPE_BIND_VERTEX_BUFFER
, PIPE_USAGE_IMMUTABLE
);
86 if (shader
->bo
== NULL
) {
89 ptr
= (uint32_t*)r600_bo_map(rctx
->radeon
, shader
->bo
, 0, NULL
);
90 if (R600_BIG_ENDIAN
) {
91 for (i
= 0; i
< rshader
->bc
.ndw
; ++i
) {
92 ptr
[i
] = bswap_32(rshader
->bc
.bytecode
[i
]);
95 memcpy(ptr
, rshader
->bc
.bytecode
, rshader
->bc
.ndw
* sizeof(*ptr
));
97 r600_bo_unmap(rctx
->radeon
, shader
->bo
);
100 switch (rshader
->processor_type
) {
101 case TGSI_PROCESSOR_VERTEX
:
102 if (rshader
->family
>= CHIP_CEDAR
) {
103 evergreen_pipe_shader_vs(ctx
, shader
);
105 r600_pipe_shader_vs(ctx
, shader
);
108 case TGSI_PROCESSOR_FRAGMENT
:
109 if (rshader
->family
>= CHIP_CEDAR
) {
110 evergreen_pipe_shader_ps(ctx
, shader
);
112 r600_pipe_shader_ps(ctx
, shader
);
121 static int r600_shader_from_tgsi(struct r600_pipe_context
* rctx
, struct r600_pipe_shader
*pipeshader
);
123 int r600_pipe_shader_create(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
125 static int dump_shaders
= -1;
126 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
129 /* Would like some magic "get_bool_option_once" routine.
131 if (dump_shaders
== -1)
132 dump_shaders
= debug_get_bool_option("R600_DUMP_SHADERS", FALSE
);
135 fprintf(stderr
, "--------------------------------------------------------------\n");
136 tgsi_dump(shader
->tokens
, 0);
138 shader
->shader
.family
= r600_get_family(rctx
->radeon
);
139 r
= r600_shader_from_tgsi(rctx
, shader
);
141 R600_ERR("translation from TGSI failed !\n");
144 r
= r600_bc_build(&shader
->shader
.bc
);
146 R600_ERR("building bytecode failed !\n");
150 r600_bc_dump(&shader
->shader
.bc
);
151 fprintf(stderr
, "______________________________________________________________\n");
153 return r600_pipe_shader(ctx
, shader
);
156 void r600_pipe_shader_destroy(struct pipe_context
*ctx
, struct r600_pipe_shader
*shader
)
158 struct r600_pipe_context
*rctx
= (struct r600_pipe_context
*)ctx
;
160 r600_bo_reference(rctx
->radeon
, &shader
->bo
, NULL
);
161 r600_bc_clear(&shader
->shader
.bc
);
163 memset(&shader
->shader
,0,sizeof(struct r600_shader
));
167 * tgsi -> r600 shader
169 struct r600_shader_tgsi_instruction
;
171 struct r600_shader_src
{
180 struct r600_shader_ctx
{
181 struct tgsi_shader_info info
;
182 struct tgsi_parse_context parse
;
183 const struct tgsi_token
*tokens
;
185 unsigned file_offset
[TGSI_FILE_COUNT
];
188 struct r600_shader_tgsi_instruction
*inst_info
;
190 struct r600_shader
*shader
;
191 struct r600_shader_src src
[4];
194 u32 max_driver_temp_used
;
195 /* needed for evergreen interpolation */
196 boolean input_centroid
;
197 boolean input_linear
;
198 boolean input_perspective
;
202 struct r600_shader_tgsi_instruction
{
203 unsigned tgsi_opcode
;
205 unsigned r600_opcode
;
206 int (*process
)(struct r600_shader_ctx
*ctx
);
209 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[], eg_shader_tgsi_instruction
[], cm_shader_tgsi_instruction
[];
210 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
);
212 static int tgsi_is_supported(struct r600_shader_ctx
*ctx
)
214 struct tgsi_full_instruction
*i
= &ctx
->parse
.FullToken
.FullInstruction
;
217 if (i
->Instruction
.NumDstRegs
> 1) {
218 R600_ERR("too many dst (%d)\n", i
->Instruction
.NumDstRegs
);
221 if (i
->Instruction
.Predicate
) {
222 R600_ERR("predicate unsupported\n");
226 if (i
->Instruction
.Label
) {
227 R600_ERR("label unsupported\n");
231 for (j
= 0; j
< i
->Instruction
.NumSrcRegs
; j
++) {
232 if (i
->Src
[j
].Register
.Dimension
) {
233 R600_ERR("unsupported src %d (dimension %d)\n", j
,
234 i
->Src
[j
].Register
.Dimension
);
238 for (j
= 0; j
< i
->Instruction
.NumDstRegs
; j
++) {
239 if (i
->Dst
[j
].Register
.Dimension
) {
240 R600_ERR("unsupported dst (dimension)\n");
247 static int evergreen_interp_alu(struct r600_shader_ctx
*ctx
, int input
)
250 struct r600_bc_alu alu
;
251 int gpr
= 0, base_chan
= 0;
254 if (ctx
->shader
->input
[input
].interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
) {
256 if (ctx
->shader
->input
[input
].centroid
)
258 } else if (ctx
->shader
->input
[input
].interpolate
== TGSI_INTERPOLATE_LINEAR
) {
260 /* if we have perspective add one */
261 if (ctx
->input_perspective
) {
263 /* if we have perspective centroid */
264 if (ctx
->input_centroid
)
267 if (ctx
->shader
->input
[input
].centroid
)
271 /* work out gpr and base_chan from index */
273 base_chan
= (2 * (ij_index
% 2)) + 1;
275 for (i
= 0; i
< 8; i
++) {
276 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
279 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW
;
281 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY
;
283 if ((i
> 1) && (i
< 6)) {
284 alu
.dst
.sel
= ctx
->shader
->input
[input
].gpr
;
288 alu
.dst
.chan
= i
% 4;
290 alu
.src
[0].sel
= gpr
;
291 alu
.src
[0].chan
= (base_chan
- (i
% 2));
293 alu
.src
[1].sel
= V_SQ_ALU_SRC_PARAM_BASE
+ ctx
->shader
->input
[input
].lds_pos
;
295 alu
.bank_swizzle_force
= SQ_ALU_VEC_210
;
298 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
306 static int tgsi_declaration(struct r600_shader_ctx
*ctx
)
308 struct tgsi_full_declaration
*d
= &ctx
->parse
.FullToken
.FullDeclaration
;
312 switch (d
->Declaration
.File
) {
313 case TGSI_FILE_INPUT
:
314 i
= ctx
->shader
->ninput
++;
315 ctx
->shader
->input
[i
].name
= d
->Semantic
.Name
;
316 ctx
->shader
->input
[i
].sid
= d
->Semantic
.Index
;
317 ctx
->shader
->input
[i
].interpolate
= d
->Declaration
.Interpolate
;
318 ctx
->shader
->input
[i
].centroid
= d
->Declaration
.Centroid
;
319 ctx
->shader
->input
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_INPUT
] + i
;
320 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->bc
->chiprev
>= CHIPREV_EVERGREEN
) {
321 /* turn input into interpolate on EG */
322 if (ctx
->shader
->input
[i
].name
!= TGSI_SEMANTIC_POSITION
) {
323 if (ctx
->shader
->input
[i
].interpolate
> 0) {
324 ctx
->shader
->input
[i
].lds_pos
= ctx
->shader
->nlds
++;
325 evergreen_interp_alu(ctx
, i
);
330 case TGSI_FILE_OUTPUT
:
331 i
= ctx
->shader
->noutput
++;
332 ctx
->shader
->output
[i
].name
= d
->Semantic
.Name
;
333 ctx
->shader
->output
[i
].sid
= d
->Semantic
.Index
;
334 ctx
->shader
->output
[i
].gpr
= ctx
->file_offset
[TGSI_FILE_OUTPUT
] + i
;
335 ctx
->shader
->output
[i
].interpolate
= d
->Declaration
.Interpolate
;
337 case TGSI_FILE_CONSTANT
:
338 case TGSI_FILE_TEMPORARY
:
339 case TGSI_FILE_SAMPLER
:
340 case TGSI_FILE_ADDRESS
:
343 case TGSI_FILE_SYSTEM_VALUE
:
344 if (d
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
) {
345 struct r600_bc_alu alu
;
346 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
348 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT
);
357 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
363 R600_ERR("unsupported file %d declaration\n", d
->Declaration
.File
);
369 static int r600_get_temp(struct r600_shader_ctx
*ctx
)
371 return ctx
->temp_reg
+ ctx
->max_driver_temp_used
++;
375 * for evergreen we need to scan the shader to find the number of GPRs we need to
376 * reserve for interpolation.
378 * we need to know if we are going to emit
379 * any centroid inputs
380 * if perspective and linear are required
382 static int evergreen_gpr_count(struct r600_shader_ctx
*ctx
)
387 ctx
->input_linear
= FALSE
;
388 ctx
->input_perspective
= FALSE
;
389 ctx
->input_centroid
= FALSE
;
390 ctx
->num_interp_gpr
= 1;
392 /* any centroid inputs */
393 for (i
= 0; i
< ctx
->info
.num_inputs
; i
++) {
394 /* skip position/face */
395 if (ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
||
396 ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
398 if (ctx
->info
.input_interpolate
[i
] == TGSI_INTERPOLATE_LINEAR
)
399 ctx
->input_linear
= TRUE
;
400 if (ctx
->info
.input_interpolate
[i
] == TGSI_INTERPOLATE_PERSPECTIVE
)
401 ctx
->input_perspective
= TRUE
;
402 if (ctx
->info
.input_centroid
[i
])
403 ctx
->input_centroid
= TRUE
;
407 /* ignoring sample for now */
408 if (ctx
->input_perspective
)
410 if (ctx
->input_linear
)
412 if (ctx
->input_centroid
)
415 ctx
->num_interp_gpr
+= (num_baryc
+ 1) >> 1;
417 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
418 return ctx
->num_interp_gpr
;
421 static void tgsi_src(struct r600_shader_ctx
*ctx
,
422 const struct tgsi_full_src_register
*tgsi_src
,
423 struct r600_shader_src
*r600_src
)
425 memset(r600_src
, 0, sizeof(*r600_src
));
426 r600_src
->swizzle
[0] = tgsi_src
->Register
.SwizzleX
;
427 r600_src
->swizzle
[1] = tgsi_src
->Register
.SwizzleY
;
428 r600_src
->swizzle
[2] = tgsi_src
->Register
.SwizzleZ
;
429 r600_src
->swizzle
[3] = tgsi_src
->Register
.SwizzleW
;
430 r600_src
->neg
= tgsi_src
->Register
.Negate
;
431 r600_src
->abs
= tgsi_src
->Register
.Absolute
;
433 if (tgsi_src
->Register
.File
== TGSI_FILE_IMMEDIATE
) {
435 if ((tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleY
) &&
436 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleZ
) &&
437 (tgsi_src
->Register
.SwizzleX
== tgsi_src
->Register
.SwizzleW
)) {
439 index
= tgsi_src
->Register
.Index
* 4 + tgsi_src
->Register
.SwizzleX
;
440 r600_bc_special_constants(ctx
->literals
[index
], &r600_src
->sel
, &r600_src
->neg
);
441 if (r600_src
->sel
!= V_SQ_ALU_SRC_LITERAL
)
444 index
= tgsi_src
->Register
.Index
;
445 r600_src
->sel
= V_SQ_ALU_SRC_LITERAL
;
446 memcpy(r600_src
->value
, ctx
->literals
+ index
* 4, sizeof(r600_src
->value
));
447 } else if (tgsi_src
->Register
.File
== TGSI_FILE_SYSTEM_VALUE
) {
448 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
449 r600_src
->swizzle
[0] = 3;
450 r600_src
->swizzle
[1] = 3;
451 r600_src
->swizzle
[2] = 3;
452 r600_src
->swizzle
[3] = 3;
455 if (tgsi_src
->Register
.Indirect
)
456 r600_src
->rel
= V_SQ_REL_RELATIVE
;
457 r600_src
->sel
= tgsi_src
->Register
.Index
;
458 r600_src
->sel
+= ctx
->file_offset
[tgsi_src
->Register
.File
];
462 static int tgsi_fetch_rel_const(struct r600_shader_ctx
*ctx
, unsigned int offset
, unsigned int dst_reg
)
464 struct r600_bc_vtx vtx
;
469 struct r600_bc_alu alu
;
471 memset(&alu
, 0, sizeof(alu
));
473 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT
);
474 alu
.src
[0].sel
= ctx
->ar_reg
;
476 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
477 alu
.src
[1].value
= offset
;
479 alu
.dst
.sel
= dst_reg
;
483 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
488 ar_reg
= ctx
->ar_reg
;
491 memset(&vtx
, 0, sizeof(vtx
));
492 vtx
.fetch_type
= 2; /* VTX_FETCH_NO_INDEX_OFFSET */
493 vtx
.src_gpr
= ar_reg
;
494 vtx
.mega_fetch_count
= 16;
495 vtx
.dst_gpr
= dst_reg
;
496 vtx
.dst_sel_x
= 0; /* SEL_X */
497 vtx
.dst_sel_y
= 1; /* SEL_Y */
498 vtx
.dst_sel_z
= 2; /* SEL_Z */
499 vtx
.dst_sel_w
= 3; /* SEL_W */
500 vtx
.data_format
= FMT_32_32_32_32_FLOAT
;
501 vtx
.num_format_all
= 2; /* NUM_FORMAT_SCALED */
502 vtx
.format_comp_all
= 1; /* FORMAT_COMP_SIGNED */
503 vtx
.srf_mode_all
= 1; /* SRF_MODE_NO_ZERO */
504 vtx
.endian
= r600_endian_swap(32);
506 if ((r
= r600_bc_add_vtx(ctx
->bc
, &vtx
)))
512 static int tgsi_split_constant(struct r600_shader_ctx
*ctx
)
514 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
515 struct r600_bc_alu alu
;
516 int i
, j
, k
, nconst
, r
;
518 for (i
= 0, nconst
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
519 if (inst
->Src
[i
].Register
.File
== TGSI_FILE_CONSTANT
) {
522 tgsi_src(ctx
, &inst
->Src
[i
], &ctx
->src
[i
]);
524 for (i
= 0, j
= nconst
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
525 if (inst
->Src
[i
].Register
.File
!= TGSI_FILE_CONSTANT
) {
529 if (ctx
->src
[i
].rel
) {
530 int treg
= r600_get_temp(ctx
);
531 if ((r
= tgsi_fetch_rel_const(ctx
, ctx
->src
[i
].sel
- 512, treg
)))
534 ctx
->src
[i
].sel
= treg
;
538 int treg
= r600_get_temp(ctx
);
539 for (k
= 0; k
< 4; k
++) {
540 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
541 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
542 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
544 alu
.src
[0].rel
= ctx
->src
[i
].rel
;
550 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
554 ctx
->src
[i
].sel
= treg
;
562 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
563 static int tgsi_split_literal_constant(struct r600_shader_ctx
*ctx
)
565 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
566 struct r600_bc_alu alu
;
567 int i
, j
, k
, nliteral
, r
;
569 for (i
= 0, nliteral
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
570 if (ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
574 for (i
= 0, j
= nliteral
- 1; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
575 if (j
> 0 && ctx
->src
[i
].sel
== V_SQ_ALU_SRC_LITERAL
) {
576 int treg
= r600_get_temp(ctx
);
577 for (k
= 0; k
< 4; k
++) {
578 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
579 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
580 alu
.src
[0].sel
= ctx
->src
[i
].sel
;
582 alu
.src
[0].value
= ctx
->src
[i
].value
[k
];
588 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
592 ctx
->src
[i
].sel
= treg
;
599 static int r600_shader_from_tgsi(struct r600_pipe_context
* rctx
, struct r600_pipe_shader
*pipeshader
)
601 struct r600_shader
*shader
= &pipeshader
->shader
;
602 struct tgsi_token
*tokens
= pipeshader
->tokens
;
603 struct tgsi_full_immediate
*immediate
;
604 struct tgsi_full_property
*property
;
605 struct r600_shader_ctx ctx
;
606 struct r600_bc_output output
[32];
607 unsigned output_done
, noutput
;
611 ctx
.bc
= &shader
->bc
;
613 r
= r600_bc_init(ctx
.bc
, shader
->family
);
617 tgsi_scan_shader(tokens
, &ctx
.info
);
618 tgsi_parse_init(&ctx
.parse
, tokens
);
619 ctx
.type
= ctx
.parse
.FullHeader
.Processor
.Processor
;
620 shader
->processor_type
= ctx
.type
;
621 ctx
.bc
->type
= shader
->processor_type
;
623 shader
->clamp_color
= (((ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) && rctx
->clamp_fragment_color
) ||
624 ((ctx
.type
== TGSI_PROCESSOR_VERTEX
) && rctx
->clamp_vertex_color
));
626 /* register allocations */
627 /* Values [0,127] correspond to GPR[0..127].
628 * Values [128,159] correspond to constant buffer bank 0
629 * Values [160,191] correspond to constant buffer bank 1
630 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
631 * Values [256,287] correspond to constant buffer bank 2 (EG)
632 * Values [288,319] correspond to constant buffer bank 3 (EG)
633 * Other special values are shown in the list below.
634 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
635 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
636 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
637 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
638 * 248 SQ_ALU_SRC_0: special constant 0.0.
639 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
640 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
641 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
642 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
643 * 253 SQ_ALU_SRC_LITERAL: literal constant.
644 * 254 SQ_ALU_SRC_PV: previous vector result.
645 * 255 SQ_ALU_SRC_PS: previous scalar result.
647 for (i
= 0; i
< TGSI_FILE_COUNT
; i
++) {
648 ctx
.file_offset
[i
] = 0;
650 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
651 ctx
.file_offset
[TGSI_FILE_INPUT
] = 1;
652 if (ctx
.bc
->chiprev
>= CHIPREV_EVERGREEN
) {
653 r600_bc_add_cfinst(ctx
.bc
, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
);
655 r600_bc_add_cfinst(ctx
.bc
, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
);
658 if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
.bc
->chiprev
>= CHIPREV_EVERGREEN
) {
659 ctx
.file_offset
[TGSI_FILE_INPUT
] = evergreen_gpr_count(&ctx
);
661 ctx
.file_offset
[TGSI_FILE_OUTPUT
] = ctx
.file_offset
[TGSI_FILE_INPUT
] +
662 ctx
.info
.file_count
[TGSI_FILE_INPUT
];
663 ctx
.file_offset
[TGSI_FILE_TEMPORARY
] = ctx
.file_offset
[TGSI_FILE_OUTPUT
] +
664 ctx
.info
.file_count
[TGSI_FILE_OUTPUT
];
666 /* Outside the GPR range. This will be translated to one of the
667 * kcache banks later. */
668 ctx
.file_offset
[TGSI_FILE_CONSTANT
] = 512;
670 ctx
.file_offset
[TGSI_FILE_IMMEDIATE
] = V_SQ_ALU_SRC_LITERAL
;
671 ctx
.ar_reg
= ctx
.file_offset
[TGSI_FILE_TEMPORARY
] +
672 ctx
.info
.file_count
[TGSI_FILE_TEMPORARY
];
673 ctx
.temp_reg
= ctx
.ar_reg
+ 1;
677 shader
->fs_write_all
= FALSE
;
678 while (!tgsi_parse_end_of_tokens(&ctx
.parse
)) {
679 tgsi_parse_token(&ctx
.parse
);
680 switch (ctx
.parse
.FullToken
.Token
.Type
) {
681 case TGSI_TOKEN_TYPE_IMMEDIATE
:
682 immediate
= &ctx
.parse
.FullToken
.FullImmediate
;
683 ctx
.literals
= realloc(ctx
.literals
, (ctx
.nliterals
+ 1) * 16);
684 if(ctx
.literals
== NULL
) {
688 ctx
.literals
[ctx
.nliterals
* 4 + 0] = immediate
->u
[0].Uint
;
689 ctx
.literals
[ctx
.nliterals
* 4 + 1] = immediate
->u
[1].Uint
;
690 ctx
.literals
[ctx
.nliterals
* 4 + 2] = immediate
->u
[2].Uint
;
691 ctx
.literals
[ctx
.nliterals
* 4 + 3] = immediate
->u
[3].Uint
;
694 case TGSI_TOKEN_TYPE_DECLARATION
:
695 r
= tgsi_declaration(&ctx
);
699 case TGSI_TOKEN_TYPE_INSTRUCTION
:
700 r
= tgsi_is_supported(&ctx
);
703 ctx
.max_driver_temp_used
= 0;
704 /* reserve first tmp for everyone */
707 opcode
= ctx
.parse
.FullToken
.FullInstruction
.Instruction
.Opcode
;
708 if ((r
= tgsi_split_constant(&ctx
)))
710 if ((r
= tgsi_split_literal_constant(&ctx
)))
712 if (ctx
.bc
->chiprev
== CHIPREV_CAYMAN
)
713 ctx
.inst_info
= &cm_shader_tgsi_instruction
[opcode
];
714 else if (ctx
.bc
->chiprev
>= CHIPREV_EVERGREEN
)
715 ctx
.inst_info
= &eg_shader_tgsi_instruction
[opcode
];
717 ctx
.inst_info
= &r600_shader_tgsi_instruction
[opcode
];
718 r
= ctx
.inst_info
->process(&ctx
);
722 case TGSI_TOKEN_TYPE_PROPERTY
:
723 property
= &ctx
.parse
.FullToken
.FullProperty
;
724 if (property
->Property
.PropertyName
== TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
) {
725 if (property
->u
[0].Data
== 1)
726 shader
->fs_write_all
= TRUE
;
730 R600_ERR("unsupported token type %d\n", ctx
.parse
.FullToken
.Token
.Type
);
736 noutput
= shader
->noutput
;
738 /* clamp color outputs */
739 if (shader
->clamp_color
) {
740 for (i
= 0; i
< noutput
; i
++) {
741 if (shader
->output
[i
].name
== TGSI_SEMANTIC_COLOR
||
742 shader
->output
[i
].name
== TGSI_SEMANTIC_BCOLOR
) {
745 for (j
= 0; j
< 4; j
++) {
746 struct r600_bc_alu alu
;
747 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
750 alu
.inst
= BC_INST(ctx
.bc
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
751 alu
.dst
.sel
= shader
->output
[i
].gpr
;
755 alu
.src
[0].sel
= alu
.dst
.sel
;
761 r
= r600_bc_add_alu(ctx
.bc
, &alu
);
770 for (i
= 0, pos0
= 0; i
< noutput
; i
++) {
771 memset(&output
[i
], 0, sizeof(struct r600_bc_output
));
772 output
[i
].gpr
= shader
->output
[i
].gpr
;
773 output
[i
].elem_size
= 3;
774 output
[i
].swizzle_x
= 0;
775 output
[i
].swizzle_y
= 1;
776 output
[i
].swizzle_z
= 2;
777 output
[i
].swizzle_w
= 3;
778 output
[i
].burst_count
= 1;
779 output
[i
].barrier
= 1;
780 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
781 output
[i
].array_base
= i
- pos0
;
782 output
[i
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
784 case TGSI_PROCESSOR_VERTEX
:
785 if (shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) {
786 output
[i
].array_base
= 60;
787 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
788 /* position doesn't count in array_base */
791 if (shader
->output
[i
].name
== TGSI_SEMANTIC_PSIZE
) {
792 output
[i
].array_base
= 61;
793 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
;
794 /* position doesn't count in array_base */
798 case TGSI_PROCESSOR_FRAGMENT
:
799 if (shader
->output
[i
].name
== TGSI_SEMANTIC_COLOR
) {
800 output
[i
].array_base
= shader
->output
[i
].sid
;
801 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
802 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_POSITION
) {
803 output
[i
].array_base
= 61;
804 output
[i
].swizzle_x
= 2;
805 output
[i
].swizzle_y
= 7;
806 output
[i
].swizzle_z
= output
[i
].swizzle_w
= 7;
807 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
808 } else if (shader
->output
[i
].name
== TGSI_SEMANTIC_STENCIL
) {
809 output
[i
].array_base
= 61;
810 output
[i
].swizzle_x
= 7;
811 output
[i
].swizzle_y
= 1;
812 output
[i
].swizzle_z
= output
[i
].swizzle_w
= 7;
813 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
815 R600_ERR("unsupported fragment output name %d\n", shader
->output
[i
].name
);
821 R600_ERR("unsupported processor type %d\n", ctx
.type
);
826 /* add fake param output for vertex shader if no param is exported */
827 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
828 for (i
= 0, pos0
= 0; i
< noutput
; i
++) {
829 if (output
[i
].type
== V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
) {
835 memset(&output
[i
], 0, sizeof(struct r600_bc_output
));
837 output
[i
].elem_size
= 3;
838 output
[i
].swizzle_x
= 0;
839 output
[i
].swizzle_y
= 1;
840 output
[i
].swizzle_z
= 2;
841 output
[i
].swizzle_w
= 3;
842 output
[i
].burst_count
= 1;
843 output
[i
].barrier
= 1;
844 output
[i
].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
;
845 output
[i
].array_base
= 0;
846 output
[i
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
850 /* add fake pixel export */
851 if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
&& !noutput
) {
852 memset(&output
[0], 0, sizeof(struct r600_bc_output
));
854 output
[0].elem_size
= 3;
855 output
[0].swizzle_x
= 7;
856 output
[0].swizzle_y
= 7;
857 output
[0].swizzle_z
= 7;
858 output
[0].swizzle_w
= 7;
859 output
[0].burst_count
= 1;
860 output
[0].barrier
= 1;
861 output
[0].type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
;
862 output
[0].array_base
= 0;
863 output
[0].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
);
866 /* set export done on last export of each type */
867 for (i
= noutput
- 1, output_done
= 0; i
>= 0; i
--) {
868 if (ctx
.bc
->chiprev
< CHIPREV_CAYMAN
) {
869 if (i
== (noutput
- 1)) {
870 output
[i
].end_of_program
= 1;
873 if (!(output_done
& (1 << output
[i
].type
))) {
874 output_done
|= (1 << output
[i
].type
);
875 output
[i
].inst
= BC_INST(ctx
.bc
, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
);
878 /* add output to bytecode */
879 for (i
= 0; i
< noutput
; i
++) {
880 r
= r600_bc_add_output(ctx
.bc
, &output
[i
]);
884 /* add program end */
885 if (ctx
.bc
->chiprev
== CHIPREV_CAYMAN
)
886 cm_bc_add_cf_end(ctx
.bc
);
889 tgsi_parse_free(&ctx
.parse
);
893 tgsi_parse_free(&ctx
.parse
);
897 static int tgsi_unsupported(struct r600_shader_ctx
*ctx
)
899 R600_ERR("%s tgsi opcode unsupported\n",
900 tgsi_get_opcode_name(ctx
->inst_info
->tgsi_opcode
));
904 static int tgsi_end(struct r600_shader_ctx
*ctx
)
909 static void r600_bc_src(struct r600_bc_alu_src
*bc_src
,
910 const struct r600_shader_src
*shader_src
,
913 bc_src
->sel
= shader_src
->sel
;
914 bc_src
->chan
= shader_src
->swizzle
[chan
];
915 bc_src
->neg
= shader_src
->neg
;
916 bc_src
->abs
= shader_src
->abs
;
917 bc_src
->rel
= shader_src
->rel
;
918 bc_src
->value
= shader_src
->value
[bc_src
->chan
];
921 static void tgsi_dst(struct r600_shader_ctx
*ctx
,
922 const struct tgsi_full_dst_register
*tgsi_dst
,
924 struct r600_bc_alu_dst
*r600_dst
)
926 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
928 r600_dst
->sel
= tgsi_dst
->Register
.Index
;
929 r600_dst
->sel
+= ctx
->file_offset
[tgsi_dst
->Register
.File
];
930 r600_dst
->chan
= swizzle
;
932 if (tgsi_dst
->Register
.Indirect
)
933 r600_dst
->rel
= V_SQ_REL_RELATIVE
;
934 if (inst
->Instruction
.Saturate
) {
939 static int tgsi_last_instruction(unsigned writemask
)
943 for (i
= 0; i
< 4; i
++) {
944 if (writemask
& (1 << i
)) {
951 static int tgsi_op2_s(struct r600_shader_ctx
*ctx
, int swap
)
953 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
954 struct r600_bc_alu alu
;
956 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
958 for (i
= 0; i
< lasti
+ 1; i
++) {
959 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
962 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
963 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
965 alu
.inst
= ctx
->inst_info
->r600_opcode
;
967 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
968 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
971 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], i
);
972 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
974 /* handle some special cases */
975 switch (ctx
->inst_info
->tgsi_opcode
) {
976 case TGSI_OPCODE_SUB
:
979 case TGSI_OPCODE_ABS
:
990 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
997 static int tgsi_op2(struct r600_shader_ctx
*ctx
)
999 return tgsi_op2_s(ctx
, 0);
1002 static int tgsi_op2_swap(struct r600_shader_ctx
*ctx
)
1004 return tgsi_op2_s(ctx
, 1);
1007 static int cayman_emit_float_instr(struct r600_shader_ctx
*ctx
)
1009 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1011 struct r600_bc_alu alu
;
1012 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1014 for (i
= 0 ; i
< last_slot
; i
++) {
1015 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1016 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1017 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1018 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], 0);
1020 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1021 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1023 if (i
== last_slot
- 1)
1025 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1033 * r600 - trunc to -PI..PI range
1034 * r700 - normalize by dividing by 2PI
1037 static int tgsi_setup_trig(struct r600_shader_ctx
*ctx
)
1039 static float half_inv_pi
= 1.0 /(3.1415926535 * 2);
1040 static float double_pi
= 3.1415926535 * 2;
1041 static float neg_pi
= -3.1415926535;
1044 struct r600_bc_alu alu
;
1046 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1047 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
1051 alu
.dst
.sel
= ctx
->temp_reg
;
1054 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1056 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
1057 alu
.src
[1].chan
= 0;
1058 alu
.src
[1].value
= *(uint32_t *)&half_inv_pi
;
1059 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
1060 alu
.src
[2].chan
= 0;
1062 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1066 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1067 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
);
1070 alu
.dst
.sel
= ctx
->temp_reg
;
1073 alu
.src
[0].sel
= ctx
->temp_reg
;
1074 alu
.src
[0].chan
= 0;
1076 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1080 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1081 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
1085 alu
.dst
.sel
= ctx
->temp_reg
;
1088 alu
.src
[0].sel
= ctx
->temp_reg
;
1089 alu
.src
[0].chan
= 0;
1091 alu
.src
[1].sel
= V_SQ_ALU_SRC_LITERAL
;
1092 alu
.src
[1].chan
= 0;
1093 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
1094 alu
.src
[2].chan
= 0;
1096 if (ctx
->bc
->chiprev
== CHIPREV_R600
) {
1097 alu
.src
[1].value
= *(uint32_t *)&double_pi
;
1098 alu
.src
[2].value
= *(uint32_t *)&neg_pi
;
1100 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1101 alu
.src
[2].sel
= V_SQ_ALU_SRC_0_5
;
1106 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1112 static int cayman_trig(struct r600_shader_ctx
*ctx
)
1114 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1115 struct r600_bc_alu alu
;
1116 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1119 r
= tgsi_setup_trig(ctx
);
1124 for (i
= 0; i
< last_slot
; i
++) {
1125 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1126 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1129 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1130 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1132 alu
.src
[0].sel
= ctx
->temp_reg
;
1133 alu
.src
[0].chan
= 0;
1134 if (i
== last_slot
- 1)
1136 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1143 static int tgsi_trig(struct r600_shader_ctx
*ctx
)
1145 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1146 struct r600_bc_alu alu
;
1148 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
1150 r
= tgsi_setup_trig(ctx
);
1154 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1155 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1157 alu
.dst
.sel
= ctx
->temp_reg
;
1160 alu
.src
[0].sel
= ctx
->temp_reg
;
1161 alu
.src
[0].chan
= 0;
1163 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1167 /* replicate result */
1168 for (i
= 0; i
< lasti
+ 1; i
++) {
1169 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
1172 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1173 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1175 alu
.src
[0].sel
= ctx
->temp_reg
;
1176 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1179 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1186 static int tgsi_scs(struct r600_shader_ctx
*ctx
)
1188 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1189 struct r600_bc_alu alu
;
1192 /* We'll only need the trig stuff if we are going to write to the
1193 * X or Y components of the destination vector.
1195 if (likely(inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
)) {
1196 r
= tgsi_setup_trig(ctx
);
1202 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
1203 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1204 for (i
= 0 ; i
< 3; i
++) {
1205 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1206 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
);
1207 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1213 alu
.src
[0].sel
= ctx
->temp_reg
;
1214 alu
.src
[0].chan
= 0;
1217 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1222 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1223 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
);
1224 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
1226 alu
.src
[0].sel
= ctx
->temp_reg
;
1227 alu
.src
[0].chan
= 0;
1229 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1236 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
1237 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1238 for (i
= 0 ; i
< 3; i
++) {
1239 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1240 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
);
1241 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1246 alu
.src
[0].sel
= ctx
->temp_reg
;
1247 alu
.src
[0].chan
= 0;
1250 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1255 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1256 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
);
1257 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
1259 alu
.src
[0].sel
= ctx
->temp_reg
;
1260 alu
.src
[0].chan
= 0;
1262 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1269 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
1270 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1272 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1274 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
1276 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1277 alu
.src
[0].chan
= 0;
1281 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1287 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
1288 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1290 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1292 tgsi_dst(ctx
, &inst
->Dst
[0], 3, &alu
.dst
);
1294 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1295 alu
.src
[0].chan
= 0;
1299 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1307 static int tgsi_kill(struct r600_shader_ctx
*ctx
)
1309 struct r600_bc_alu alu
;
1312 for (i
= 0; i
< 4; i
++) {
1313 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1314 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1318 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
1320 if (ctx
->inst_info
->tgsi_opcode
== TGSI_OPCODE_KILP
) {
1321 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1324 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
1329 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1334 /* kill must be last in ALU */
1335 ctx
->bc
->force_add_cf
= 1;
1336 ctx
->shader
->uses_kill
= TRUE
;
1340 static int tgsi_lit(struct r600_shader_ctx
*ctx
)
1342 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1343 struct r600_bc_alu alu
;
1346 if (inst
->Dst
[0].Register
.WriteMask
& (1 << 2))
1352 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1353 for (i
= 0; i
< 3; i
++) {
1354 /* dst.z = log(src.y) */
1355 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1356 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
);
1357 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 1);
1358 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1365 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1370 /* dst.z = log(src.y) */
1371 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1372 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
);
1373 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 1);
1374 alu
.dst
.sel
= ctx
->temp_reg
;
1378 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1383 chan
= alu
.dst
.chan
;
1386 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1387 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1388 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT
);
1389 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 3);
1390 alu
.src
[1].sel
= sel
;
1391 alu
.src
[1].chan
= chan
;
1393 r600_bc_src(&alu
.src
[2], &ctx
->src
[0], 0);
1394 alu
.dst
.sel
= ctx
->temp_reg
;
1399 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1403 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1404 for (i
= 0; i
< 3; i
++) {
1405 /* dst.z = exp(tmp.x) */
1406 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1407 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1408 alu
.src
[0].sel
= ctx
->temp_reg
;
1409 alu
.src
[0].chan
= 0;
1410 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1416 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1421 /* dst.z = exp(tmp.x) */
1422 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1423 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1424 alu
.src
[0].sel
= ctx
->temp_reg
;
1425 alu
.src
[0].chan
= 0;
1426 tgsi_dst(ctx
, &inst
->Dst
[0], 2, &alu
.dst
);
1428 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1435 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1436 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1437 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
; /*1.0*/
1438 alu
.src
[0].chan
= 0;
1439 tgsi_dst(ctx
, &inst
->Dst
[0], 0, &alu
.dst
);
1440 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 0) & 1;
1441 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1445 /* dst.y = max(src.x, 0.0) */
1446 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1447 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
);
1448 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1449 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
; /*0.0*/
1450 alu
.src
[1].chan
= 0;
1451 tgsi_dst(ctx
, &inst
->Dst
[0], 1, &alu
.dst
);
1452 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 1) & 1;
1453 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1458 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1459 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1460 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1461 alu
.src
[0].chan
= 0;
1462 tgsi_dst(ctx
, &inst
->Dst
[0], 3, &alu
.dst
);
1463 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> 3) & 1;
1465 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1472 static int tgsi_rsq(struct r600_shader_ctx
*ctx
)
1474 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1475 struct r600_bc_alu alu
;
1478 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1481 * For state trackers other than OpenGL, we'll want to use
1482 * _RECIPSQRT_IEEE instead.
1484 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED
);
1486 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1487 r600_bc_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
1490 alu
.dst
.sel
= ctx
->temp_reg
;
1493 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1496 /* replicate result */
1497 return tgsi_helper_tempx_replicate(ctx
);
1500 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx
*ctx
)
1502 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1503 struct r600_bc_alu alu
;
1506 for (i
= 0; i
< 4; i
++) {
1507 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1508 alu
.src
[0].sel
= ctx
->temp_reg
;
1509 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1511 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1512 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1515 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1522 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx
*ctx
)
1524 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1525 struct r600_bc_alu alu
;
1528 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1529 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1530 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1531 r600_bc_src(&alu
.src
[i
], &ctx
->src
[i
], 0);
1533 alu
.dst
.sel
= ctx
->temp_reg
;
1536 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1539 /* replicate result */
1540 return tgsi_helper_tempx_replicate(ctx
);
1543 static int cayman_pow(struct r600_shader_ctx
*ctx
)
1545 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1547 struct r600_bc_alu alu
;
1548 int last_slot
= (inst
->Dst
[0].Register
.WriteMask
& 0x8) ? 4 : 3;
1550 for (i
= 0; i
< 3; i
++) {
1551 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1552 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
1553 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1554 alu
.dst
.sel
= ctx
->temp_reg
;
1559 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1565 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1566 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1567 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], 0);
1568 alu
.src
[1].sel
= ctx
->temp_reg
;
1569 alu
.dst
.sel
= ctx
->temp_reg
;
1572 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1576 for (i
= 0; i
< last_slot
; i
++) {
1577 /* POW(a,b) = EXP2(b * LOG2(a))*/
1578 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1579 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1580 alu
.src
[0].sel
= ctx
->temp_reg
;
1582 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1583 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1584 if (i
== last_slot
- 1)
1586 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1593 static int tgsi_pow(struct r600_shader_ctx
*ctx
)
1595 struct r600_bc_alu alu
;
1599 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1600 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
1601 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
1602 alu
.dst
.sel
= ctx
->temp_reg
;
1605 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1609 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1610 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1611 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], 0);
1612 alu
.src
[1].sel
= ctx
->temp_reg
;
1613 alu
.dst
.sel
= ctx
->temp_reg
;
1616 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1619 /* POW(a,b) = EXP2(b * LOG2(a))*/
1620 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1621 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
1622 alu
.src
[0].sel
= ctx
->temp_reg
;
1623 alu
.dst
.sel
= ctx
->temp_reg
;
1626 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1629 return tgsi_helper_tempx_replicate(ctx
);
1632 static int tgsi_ssg(struct r600_shader_ctx
*ctx
)
1634 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1635 struct r600_bc_alu alu
;
1638 /* tmp = (src > 0 ? 1 : src) */
1639 for (i
= 0; i
< 4; i
++) {
1640 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1641 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT
);
1644 alu
.dst
.sel
= ctx
->temp_reg
;
1647 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
1648 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1649 r600_bc_src(&alu
.src
[2], &ctx
->src
[0], i
);
1653 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1658 /* dst = (-tmp > 0 ? -1 : tmp) */
1659 for (i
= 0; i
< 4; i
++) {
1660 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1661 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT
);
1663 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1665 alu
.src
[0].sel
= ctx
->temp_reg
;
1666 alu
.src
[0].chan
= i
;
1669 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
1672 alu
.src
[2].sel
= ctx
->temp_reg
;
1673 alu
.src
[2].chan
= i
;
1677 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1684 static int tgsi_helper_copy(struct r600_shader_ctx
*ctx
, struct tgsi_full_instruction
*inst
)
1686 struct r600_bc_alu alu
;
1689 for (i
= 0; i
< 4; i
++) {
1690 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1691 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
))) {
1692 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
);
1695 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1696 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1697 alu
.src
[0].sel
= ctx
->temp_reg
;
1698 alu
.src
[0].chan
= i
;
1703 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1710 static int tgsi_op3(struct r600_shader_ctx
*ctx
)
1712 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1713 struct r600_bc_alu alu
;
1715 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
1717 for (i
= 0; i
< lasti
+ 1; i
++) {
1718 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
1721 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1722 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1723 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1724 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
1727 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1734 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1741 static int tgsi_dp(struct r600_shader_ctx
*ctx
)
1743 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1744 struct r600_bc_alu alu
;
1747 for (i
= 0; i
< 4; i
++) {
1748 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1749 alu
.inst
= ctx
->inst_info
->r600_opcode
;
1750 for (j
= 0; j
< inst
->Instruction
.NumSrcRegs
; j
++) {
1751 r600_bc_src(&alu
.src
[j
], &ctx
->src
[j
], i
);
1754 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
1756 alu
.dst
.write
= (inst
->Dst
[0].Register
.WriteMask
>> i
) & 1;
1757 /* handle some special cases */
1758 switch (ctx
->inst_info
->tgsi_opcode
) {
1759 case TGSI_OPCODE_DP2
:
1761 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
1762 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
1765 case TGSI_OPCODE_DP3
:
1767 alu
.src
[0].sel
= alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
1768 alu
.src
[0].chan
= alu
.src
[1].chan
= 0;
1771 case TGSI_OPCODE_DPH
:
1773 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1774 alu
.src
[0].chan
= 0;
1784 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1791 static inline boolean
tgsi_tex_src_requires_loading(struct r600_shader_ctx
*ctx
,
1794 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1795 return (inst
->Src
[index
].Register
.File
!= TGSI_FILE_TEMPORARY
&&
1796 inst
->Src
[index
].Register
.File
!= TGSI_FILE_INPUT
) ||
1797 ctx
->src
[index
].neg
|| ctx
->src
[index
].abs
;
1800 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx
*ctx
,
1803 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1804 return ctx
->file_offset
[inst
->Src
[index
].Register
.File
] + inst
->Src
[index
].Register
.Index
;
1807 static int tgsi_tex(struct r600_shader_ctx
*ctx
)
1809 static float one_point_five
= 1.5f
;
1810 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
1811 struct r600_bc_tex tex
;
1812 struct r600_bc_alu alu
;
1816 /* Texture fetch instructions can only use gprs as source.
1817 * Also they cannot negate the source or take the absolute value */
1818 const boolean src_requires_loading
= tgsi_tex_src_requires_loading(ctx
, 0);
1819 boolean src_loaded
= FALSE
;
1820 unsigned sampler_src_reg
= 1;
1822 src_gpr
= tgsi_tex_get_src_gpr(ctx
, 0);
1824 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
) {
1825 /* TGSI moves the sampler to src reg 3 for TXD */
1826 sampler_src_reg
= 3;
1828 for (i
= 1; i
< 3; i
++) {
1829 /* set gradients h/v */
1830 memset(&tex
, 0, sizeof(struct r600_bc_tex
));
1831 tex
.inst
= (i
== 1) ? SQ_TEX_INST_SET_GRADIENTS_H
:
1832 SQ_TEX_INST_SET_GRADIENTS_V
;
1833 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
1834 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
1836 if (tgsi_tex_src_requires_loading(ctx
, i
)) {
1837 tex
.src_gpr
= r600_get_temp(ctx
);
1843 for (j
= 0; j
< 4; j
++) {
1844 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1845 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1846 r600_bc_src(&alu
.src
[0], &ctx
->src
[i
], j
);
1847 alu
.dst
.sel
= tex
.src_gpr
;
1852 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1858 tex
.src_gpr
= tgsi_tex_get_src_gpr(ctx
, i
);
1859 tex
.src_sel_x
= ctx
->src
[i
].swizzle
[0];
1860 tex
.src_sel_y
= ctx
->src
[i
].swizzle
[1];
1861 tex
.src_sel_z
= ctx
->src
[i
].swizzle
[2];
1862 tex
.src_sel_w
= ctx
->src
[i
].swizzle
[3];
1863 tex
.src_rel
= ctx
->src
[i
].rel
;
1865 tex
.dst_gpr
= ctx
->temp_reg
; /* just to avoid confusing the asm scheduler */
1866 tex
.dst_sel_x
= tex
.dst_sel_y
= tex
.dst_sel_z
= tex
.dst_sel_w
= 7;
1867 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
) {
1868 tex
.coord_type_x
= 1;
1869 tex
.coord_type_y
= 1;
1870 tex
.coord_type_z
= 1;
1871 tex
.coord_type_w
= 1;
1873 r
= r600_bc_add_tex(ctx
->bc
, &tex
);
1877 } else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1879 /* Add perspective divide */
1880 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1882 for (i
= 0; i
< 3; i
++) {
1883 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1884 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1885 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 3);
1887 alu
.dst
.sel
= ctx
->temp_reg
;
1893 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1900 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1901 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1902 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 3);
1904 alu
.dst
.sel
= ctx
->temp_reg
;
1905 alu
.dst
.chan
= out_chan
;
1908 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1913 for (i
= 0; i
< 3; i
++) {
1914 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1915 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
1916 alu
.src
[0].sel
= ctx
->temp_reg
;
1917 alu
.src
[0].chan
= out_chan
;
1918 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
1919 alu
.dst
.sel
= ctx
->temp_reg
;
1922 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1926 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1927 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
1928 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
1929 alu
.src
[0].chan
= 0;
1930 alu
.dst
.sel
= ctx
->temp_reg
;
1934 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1938 src_gpr
= ctx
->temp_reg
;
1941 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
1942 static const unsigned src0_swizzle
[] = {2, 2, 0, 1};
1943 static const unsigned src1_swizzle
[] = {1, 0, 2, 2};
1945 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1946 for (i
= 0; i
< 4; i
++) {
1947 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1948 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE
);
1949 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src0_swizzle
[i
]);
1950 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], src1_swizzle
[i
]);
1951 alu
.dst
.sel
= ctx
->temp_reg
;
1956 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1961 /* tmp1.z = RCP_e(|tmp1.z|) */
1962 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
1963 for (i
= 0; i
< 3; i
++) {
1964 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1965 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1966 alu
.src
[0].sel
= ctx
->temp_reg
;
1967 alu
.src
[0].chan
= 2;
1969 alu
.dst
.sel
= ctx
->temp_reg
;
1975 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1980 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1981 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
1982 alu
.src
[0].sel
= ctx
->temp_reg
;
1983 alu
.src
[0].chan
= 2;
1985 alu
.dst
.sel
= ctx
->temp_reg
;
1989 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
1994 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1995 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1996 * muladd has no writemask, have to use another temp
1998 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
1999 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2002 alu
.src
[0].sel
= ctx
->temp_reg
;
2003 alu
.src
[0].chan
= 0;
2004 alu
.src
[1].sel
= ctx
->temp_reg
;
2005 alu
.src
[1].chan
= 2;
2007 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
2008 alu
.src
[2].chan
= 0;
2009 alu
.src
[2].value
= *(uint32_t *)&one_point_five
;
2011 alu
.dst
.sel
= ctx
->temp_reg
;
2015 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2019 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2020 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2023 alu
.src
[0].sel
= ctx
->temp_reg
;
2024 alu
.src
[0].chan
= 1;
2025 alu
.src
[1].sel
= ctx
->temp_reg
;
2026 alu
.src
[1].chan
= 2;
2028 alu
.src
[2].sel
= V_SQ_ALU_SRC_LITERAL
;
2029 alu
.src
[2].chan
= 0;
2030 alu
.src
[2].value
= *(uint32_t *)&one_point_five
;
2032 alu
.dst
.sel
= ctx
->temp_reg
;
2037 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2042 src_gpr
= ctx
->temp_reg
;
2045 if (src_requires_loading
&& !src_loaded
) {
2046 for (i
= 0; i
< 4; i
++) {
2047 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2048 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2049 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2050 alu
.dst
.sel
= ctx
->temp_reg
;
2055 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2060 src_gpr
= ctx
->temp_reg
;
2063 opcode
= ctx
->inst_info
->r600_opcode
;
2064 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
|| inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
) {
2066 case SQ_TEX_INST_SAMPLE
:
2067 opcode
= SQ_TEX_INST_SAMPLE_C
;
2069 case SQ_TEX_INST_SAMPLE_L
:
2070 opcode
= SQ_TEX_INST_SAMPLE_C_L
;
2072 case SQ_TEX_INST_SAMPLE_G
:
2073 opcode
= SQ_TEX_INST_SAMPLE_C_G
;
2078 memset(&tex
, 0, sizeof(struct r600_bc_tex
));
2081 tex
.sampler_id
= tgsi_tex_get_src_gpr(ctx
, sampler_src_reg
);
2082 tex
.resource_id
= tex
.sampler_id
+ R600_MAX_CONST_BUFFERS
;
2083 tex
.src_gpr
= src_gpr
;
2084 tex
.dst_gpr
= ctx
->file_offset
[inst
->Dst
[0].Register
.File
] + inst
->Dst
[0].Register
.Index
;
2085 tex
.dst_sel_x
= (inst
->Dst
[0].Register
.WriteMask
& 1) ? 0 : 7;
2086 tex
.dst_sel_y
= (inst
->Dst
[0].Register
.WriteMask
& 2) ? 1 : 7;
2087 tex
.dst_sel_z
= (inst
->Dst
[0].Register
.WriteMask
& 4) ? 2 : 7;
2088 tex
.dst_sel_w
= (inst
->Dst
[0].Register
.WriteMask
& 8) ? 3 : 7;
2095 tex
.src_sel_x
= ctx
->src
[0].swizzle
[0];
2096 tex
.src_sel_y
= ctx
->src
[0].swizzle
[1];
2097 tex
.src_sel_z
= ctx
->src
[0].swizzle
[2];
2098 tex
.src_sel_w
= ctx
->src
[0].swizzle
[3];
2099 tex
.src_rel
= ctx
->src
[0].rel
;
2102 if (inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
2109 if (inst
->Texture
.Texture
!= TGSI_TEXTURE_RECT
) {
2110 tex
.coord_type_x
= 1;
2111 tex
.coord_type_y
= 1;
2112 tex
.coord_type_z
= 1;
2113 tex
.coord_type_w
= 1;
2116 if (inst
->Texture
.Texture
== TGSI_TEXTURE_1D_ARRAY
) {
2117 tex
.coord_type_z
= 0;
2118 tex
.src_sel_z
= tex
.src_sel_y
;
2119 } else if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY
)
2120 tex
.coord_type_z
= 0;
2122 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
|| inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
)
2123 tex
.src_sel_w
= tex
.src_sel_z
;
2125 r
= r600_bc_add_tex(ctx
->bc
, &tex
);
2129 /* add shadow ambient support - gallium doesn't do it yet */
2133 static int tgsi_lrp(struct r600_shader_ctx
*ctx
)
2135 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2136 struct r600_bc_alu alu
;
2137 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
2141 /* optimize if it's just an equal balance */
2142 if (ctx
->src
[0].sel
== V_SQ_ALU_SRC_0_5
) {
2143 for (i
= 0; i
< lasti
+ 1; i
++) {
2144 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2147 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2148 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
);
2149 r600_bc_src(&alu
.src
[0], &ctx
->src
[1], i
);
2150 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2152 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2157 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2165 for (i
= 0; i
< lasti
+ 1; i
++) {
2166 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2169 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2170 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
);
2171 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2172 alu
.src
[0].chan
= 0;
2173 r600_bc_src(&alu
.src
[1], &ctx
->src
[0], i
);
2175 alu
.dst
.sel
= ctx
->temp_reg
;
2181 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2186 /* (1 - src0) * src2 */
2187 for (i
= 0; i
< lasti
+ 1; i
++) {
2188 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2191 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2192 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2193 alu
.src
[0].sel
= ctx
->temp_reg
;
2194 alu
.src
[0].chan
= i
;
2195 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2196 alu
.dst
.sel
= ctx
->temp_reg
;
2202 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2207 /* src0 * src1 + (1 - src0) * src2 */
2208 for (i
= 0; i
< lasti
+ 1; i
++) {
2209 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2212 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2213 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2215 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2216 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], i
);
2217 alu
.src
[2].sel
= ctx
->temp_reg
;
2218 alu
.src
[2].chan
= i
;
2220 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2225 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2232 static int tgsi_cmp(struct r600_shader_ctx
*ctx
)
2234 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2235 struct r600_bc_alu alu
;
2237 int lasti
= tgsi_last_instruction(inst
->Dst
[0].Register
.WriteMask
);
2239 for (i
= 0; i
< lasti
+ 1; i
++) {
2240 if (!(inst
->Dst
[0].Register
.WriteMask
& (1 << i
)))
2243 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2244 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE
);
2245 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2246 r600_bc_src(&alu
.src
[1], &ctx
->src
[2], i
);
2247 r600_bc_src(&alu
.src
[2], &ctx
->src
[1], i
);
2248 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2254 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2261 static int tgsi_xpd(struct r600_shader_ctx
*ctx
)
2263 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2264 static const unsigned int src0_swizzle
[] = {2, 0, 1};
2265 static const unsigned int src1_swizzle
[] = {1, 2, 0};
2266 struct r600_bc_alu alu
;
2267 uint32_t use_temp
= 0;
2270 if (inst
->Dst
[0].Register
.WriteMask
!= 0xf)
2273 for (i
= 0; i
< 4; i
++) {
2274 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2275 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2277 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src0_swizzle
[i
]);
2278 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], src1_swizzle
[i
]);
2280 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
2281 alu
.src
[0].chan
= i
;
2282 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2283 alu
.src
[1].chan
= i
;
2286 alu
.dst
.sel
= ctx
->temp_reg
;
2292 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2297 for (i
= 0; i
< 4; i
++) {
2298 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2299 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
);
2302 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], src1_swizzle
[i
]);
2303 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], src0_swizzle
[i
]);
2305 alu
.src
[0].sel
= V_SQ_ALU_SRC_0
;
2306 alu
.src
[0].chan
= i
;
2307 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2308 alu
.src
[1].chan
= i
;
2311 alu
.src
[2].sel
= ctx
->temp_reg
;
2313 alu
.src
[2].chan
= i
;
2316 alu
.dst
.sel
= ctx
->temp_reg
;
2318 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2324 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2329 return tgsi_helper_copy(ctx
, inst
);
2333 static int tgsi_exp(struct r600_shader_ctx
*ctx
)
2335 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2336 struct r600_bc_alu alu
;
2340 /* result.x = 2^floor(src); */
2341 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
2342 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2344 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2345 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2347 alu
.dst
.sel
= ctx
->temp_reg
;
2351 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2355 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2356 for (i
= 0; i
< 3; i
++) {
2357 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2358 alu
.src
[0].sel
= ctx
->temp_reg
;
2359 alu
.src
[0].chan
= 0;
2361 alu
.dst
.sel
= ctx
->temp_reg
;
2367 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2372 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2373 alu
.src
[0].sel
= ctx
->temp_reg
;
2374 alu
.src
[0].chan
= 0;
2376 alu
.dst
.sel
= ctx
->temp_reg
;
2380 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2386 /* result.y = tmp - floor(tmp); */
2387 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
2388 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2390 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
);
2391 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2393 alu
.dst
.sel
= ctx
->temp_reg
;
2395 r
= tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2404 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2409 /* result.z = RoughApprox2ToX(tmp);*/
2410 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 0x1) {
2411 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2412 for (i
= 0; i
< 3; i
++) {
2413 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2414 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2415 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2417 alu
.dst
.sel
= ctx
->temp_reg
;
2424 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2429 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2430 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2431 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2433 alu
.dst
.sel
= ctx
->temp_reg
;
2439 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2445 /* result.w = 1.0;*/
2446 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 0x1) {
2447 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2449 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2450 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2451 alu
.src
[0].chan
= 0;
2453 alu
.dst
.sel
= ctx
->temp_reg
;
2457 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2461 return tgsi_helper_copy(ctx
, inst
);
2464 static int tgsi_log(struct r600_shader_ctx
*ctx
)
2466 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2467 struct r600_bc_alu alu
;
2471 /* result.x = floor(log2(src)); */
2472 if (inst
->Dst
[0].Register
.WriteMask
& 1) {
2473 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2474 for (i
= 0; i
< 3; i
++) {
2475 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2477 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2478 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2480 alu
.dst
.sel
= ctx
->temp_reg
;
2486 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2492 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2494 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2495 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2497 alu
.dst
.sel
= ctx
->temp_reg
;
2501 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2506 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2507 alu
.src
[0].sel
= ctx
->temp_reg
;
2508 alu
.src
[0].chan
= 0;
2510 alu
.dst
.sel
= ctx
->temp_reg
;
2515 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2520 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2521 if ((inst
->Dst
[0].Register
.WriteMask
>> 1) & 1) {
2523 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2524 for (i
= 0; i
< 3; i
++) {
2525 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2527 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2528 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2530 alu
.dst
.sel
= ctx
->temp_reg
;
2537 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2542 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2544 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2545 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2547 alu
.dst
.sel
= ctx
->temp_reg
;
2552 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2557 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2559 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
);
2560 alu
.src
[0].sel
= ctx
->temp_reg
;
2561 alu
.src
[0].chan
= 1;
2563 alu
.dst
.sel
= ctx
->temp_reg
;
2568 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2572 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2573 for (i
= 0; i
< 3; i
++) {
2574 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2575 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2576 alu
.src
[0].sel
= ctx
->temp_reg
;
2577 alu
.src
[0].chan
= 1;
2579 alu
.dst
.sel
= ctx
->temp_reg
;
2586 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2591 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2592 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
);
2593 alu
.src
[0].sel
= ctx
->temp_reg
;
2594 alu
.src
[0].chan
= 1;
2596 alu
.dst
.sel
= ctx
->temp_reg
;
2601 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2606 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2607 for (i
= 0; i
< 3; i
++) {
2608 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2609 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2610 alu
.src
[0].sel
= ctx
->temp_reg
;
2611 alu
.src
[0].chan
= 1;
2613 alu
.dst
.sel
= ctx
->temp_reg
;
2620 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2625 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2626 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
);
2627 alu
.src
[0].sel
= ctx
->temp_reg
;
2628 alu
.src
[0].chan
= 1;
2630 alu
.dst
.sel
= ctx
->temp_reg
;
2635 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2640 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2642 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2644 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2646 alu
.src
[1].sel
= ctx
->temp_reg
;
2647 alu
.src
[1].chan
= 1;
2649 alu
.dst
.sel
= ctx
->temp_reg
;
2654 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2659 /* result.z = log2(src);*/
2660 if ((inst
->Dst
[0].Register
.WriteMask
>> 2) & 1) {
2661 if (ctx
->bc
->chiprev
== CHIPREV_CAYMAN
) {
2662 for (i
= 0; i
< 3; i
++) {
2663 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2665 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2666 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2668 alu
.dst
.sel
= ctx
->temp_reg
;
2675 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2680 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2682 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
);
2683 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2685 alu
.dst
.sel
= ctx
->temp_reg
;
2690 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2696 /* result.w = 1.0; */
2697 if ((inst
->Dst
[0].Register
.WriteMask
>> 3) & 1) {
2698 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2700 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
);
2701 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2702 alu
.src
[0].chan
= 0;
2704 alu
.dst
.sel
= ctx
->temp_reg
;
2709 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2714 return tgsi_helper_copy(ctx
, inst
);
2717 static int tgsi_eg_arl(struct r600_shader_ctx
*ctx
)
2719 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2720 struct r600_bc_alu alu
;
2723 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2725 switch (inst
->Instruction
.Opcode
) {
2726 case TGSI_OPCODE_ARL
:
2727 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR
;
2729 case TGSI_OPCODE_ARR
:
2730 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2737 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2739 alu
.dst
.sel
= ctx
->ar_reg
;
2741 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2745 /* TODO: Note that the MOVA can be avoided if we never use AR for
2746 * indexing non-CB registers in the current ALU clause. Similarly, we
2747 * need to load AR from ar_reg again if we started a new clause
2748 * between ARL and AR usage. The easy way to do that is to remove
2749 * the MOVA here, and load it for the first AR access after ar_reg
2750 * has been modified in each clause. */
2751 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2752 alu
.inst
= EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT
;
2753 alu
.src
[0].sel
= ctx
->ar_reg
;
2754 alu
.src
[0].chan
= 0;
2756 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2761 static int tgsi_r600_arl(struct r600_shader_ctx
*ctx
)
2763 /* TODO from r600c, ar values don't persist between clauses */
2764 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2765 struct r600_bc_alu alu
;
2768 switch (inst
->Instruction
.Opcode
) {
2769 case TGSI_OPCODE_ARL
:
2770 memset(&alu
, 0, sizeof(alu
));
2771 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
;
2772 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2773 alu
.dst
.sel
= ctx
->ar_reg
;
2777 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2780 memset(&alu
, 0, sizeof(alu
));
2781 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2782 alu
.src
[0].sel
= ctx
->ar_reg
;
2783 alu
.dst
.sel
= ctx
->ar_reg
;
2787 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2790 case TGSI_OPCODE_ARR
:
2791 memset(&alu
, 0, sizeof(alu
));
2792 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
;
2793 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2794 alu
.dst
.sel
= ctx
->ar_reg
;
2798 if ((r
= r600_bc_add_alu(ctx
->bc
, &alu
)))
2806 memset(&alu
, 0, sizeof(alu
));
2807 alu
.inst
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT
;
2808 alu
.src
[0].sel
= ctx
->ar_reg
;
2811 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2814 ctx
->bc
->cf_last
->r6xx_uses_waterfall
= 1;
2818 static int tgsi_opdst(struct r600_shader_ctx
*ctx
)
2820 struct tgsi_full_instruction
*inst
= &ctx
->parse
.FullToken
.FullInstruction
;
2821 struct r600_bc_alu alu
;
2824 for (i
= 0; i
< 4; i
++) {
2825 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2827 alu
.inst
= CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
);
2828 tgsi_dst(ctx
, &inst
->Dst
[0], i
, &alu
.dst
);
2830 if (i
== 0 || i
== 3) {
2831 alu
.src
[0].sel
= V_SQ_ALU_SRC_1
;
2833 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], i
);
2836 if (i
== 0 || i
== 2) {
2837 alu
.src
[1].sel
= V_SQ_ALU_SRC_1
;
2839 r600_bc_src(&alu
.src
[1], &ctx
->src
[1], i
);
2843 r
= r600_bc_add_alu(ctx
->bc
, &alu
);
2850 static int emit_logic_pred(struct r600_shader_ctx
*ctx
, int opcode
)
2852 struct r600_bc_alu alu
;
2855 memset(&alu
, 0, sizeof(struct r600_bc_alu
));
2859 alu
.dst
.sel
= ctx
->temp_reg
;
2863 r600_bc_src(&alu
.src
[0], &ctx
->src
[0], 0);
2864 alu
.src
[1].sel
= V_SQ_ALU_SRC_0
;
2865 alu
.src
[1].chan
= 0;
2869 r
= r600_bc_add_alu_type(ctx
->bc
, &alu
, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
));
2875 static int pops(struct r600_shader_ctx
*ctx
, int pops
)
2878 if (ctx
->bc
->cf_last
) {
2879 if (ctx
->bc
->cf_last
->inst
== CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
) << 3)
2881 else if (ctx
->bc
->cf_last
->inst
== CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER
) << 3)
2886 ctx
->bc
->cf_last
->inst
= CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER
) << 3;
2887 ctx
->bc
->force_add_cf
= 1;
2888 } else if (alu_pop
== 2) {
2889 ctx
->bc
->cf_last
->inst
= CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER
) << 3;
2890 ctx
->bc
->force_add_cf
= 1;
2892 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP
));
2893 ctx
->bc
->cf_last
->pop_count
= pops
;
2894 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
2899 static inline void callstack_decrease_current(struct r600_shader_ctx
*ctx
, unsigned reason
)
2903 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
--;
2907 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
-= 4;
2910 /* TOODO : for 16 vp asic should -= 2; */
2911 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
--;
2916 static inline void callstack_check_depth(struct r600_shader_ctx
*ctx
, unsigned reason
, unsigned check_max_only
)
2918 if (check_max_only
) {
2931 if ((ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+ diff
) >
2932 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
) {
2933 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
=
2934 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+ diff
;
2940 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
++;
2944 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
+= 4;
2947 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
++;
2951 if ((ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
) >
2952 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
) {
2953 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].max
=
2954 ctx
->bc
->callstack
[ctx
->bc
->call_sp
].current
;
2958 static void fc_set_mid(struct r600_shader_ctx
*ctx
, int fc_sp
)
2960 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[fc_sp
];
2962 sp
->mid
= (struct r600_bc_cf
**)realloc((void *)sp
->mid
,
2963 sizeof(struct r600_bc_cf
*) * (sp
->num_mid
+ 1));
2964 sp
->mid
[sp
->num_mid
] = ctx
->bc
->cf_last
;
2968 static void fc_pushlevel(struct r600_shader_ctx
*ctx
, int type
)
2971 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
= type
;
2972 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
= ctx
->bc
->cf_last
;
2975 static void fc_poplevel(struct r600_shader_ctx
*ctx
)
2977 struct r600_cf_stack_entry
*sp
= &ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
];
2989 static int emit_return(struct r600_shader_ctx
*ctx
)
2991 r600_bc_add_cfinst(ctx
->bc
, V_SQ_CF_WORD1_SQ_CF_INST_RETURN
);
2995 static int emit_jump_to_offset(struct r600_shader_ctx
*ctx
, int pops
, int offset
)
2998 r600_bc_add_cfinst(ctx
->bc
, V_SQ_CF_WORD1_SQ_CF_INST_JUMP
);
2999 ctx
->bc
->cf_last
->pop_count
= pops
;
3000 /* TODO work out offset */
3004 static int emit_setret_in_loop_flag(struct r600_shader_ctx
*ctx
, unsigned flag_value
)
3009 static void emit_testflag(struct r600_shader_ctx
*ctx
)
3014 static void emit_return_on_flag(struct r600_shader_ctx
*ctx
, unsigned ifidx
)
3017 emit_jump_to_offset(ctx
, 1, 4);
3018 emit_setret_in_loop_flag(ctx
, V_SQ_ALU_SRC_0
);
3019 pops(ctx
, ifidx
+ 1);
3023 static void break_loop_on_flag(struct r600_shader_ctx
*ctx
, unsigned fc_sp
)
3027 r600_bc_add_cfinst(ctx
->bc
, ctx
->inst_info
->r600_opcode
);
3028 ctx
->bc
->cf_last
->pop_count
= 1;
3030 fc_set_mid(ctx
, fc_sp
);
3036 static int tgsi_if(struct r600_shader_ctx
*ctx
)
3038 emit_logic_pred(ctx
, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE
));
3040 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP
));
3042 fc_pushlevel(ctx
, FC_IF
);
3044 callstack_check_depth(ctx
, FC_PUSH_VPM
, 0);
3048 static int tgsi_else(struct r600_shader_ctx
*ctx
)
3050 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE
));
3051 ctx
->bc
->cf_last
->pop_count
= 1;
3053 fc_set_mid(ctx
, ctx
->bc
->fc_sp
);
3054 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
;
3058 static int tgsi_endif(struct r600_shader_ctx
*ctx
)
3061 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
!= FC_IF
) {
3062 R600_ERR("if/endif unbalanced in shader\n");
3066 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
== NULL
) {
3067 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3068 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->pop_count
= 1;
3070 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
[0]->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3074 callstack_decrease_current(ctx
, FC_PUSH_VPM
);
3078 static int tgsi_bgnloop(struct r600_shader_ctx
*ctx
)
3080 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
));
3082 fc_pushlevel(ctx
, FC_LOOP
);
3084 /* check stack depth */
3085 callstack_check_depth(ctx
, FC_LOOP
, 0);
3089 static int tgsi_endloop(struct r600_shader_ctx
*ctx
)
3093 r600_bc_add_cfinst(ctx
->bc
, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
));
3095 if (ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].type
!= FC_LOOP
) {
3096 R600_ERR("loop/endloop in shader code are not paired.\n");
3100 /* fixup loop pointers - from r600isa
3101 LOOP END points to CF after LOOP START,
3102 LOOP START point to CF after LOOP END
3103 BRK/CONT point to LOOP END CF
3105 ctx
->bc
->cf_last
->cf_addr
= ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->id
+ 2;
3107 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].start
->cf_addr
= ctx
->bc
->cf_last
->id
+ 2;
3109 for (i
= 0; i
< ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].num_mid
; i
++) {
3110 ctx
->bc
->fc_stack
[ctx
->bc
->fc_sp
].mid
[i
]->cf_addr
= ctx
->bc
->cf_last
->id
;
3112 /* TODO add LOOPRET support */
3114 callstack_decrease_current(ctx
, FC_LOOP
);
3118 static int tgsi_loop_brk_cont(struct r600_shader_ctx
*ctx
)
3122 for (fscp
= ctx
->bc
->fc_sp
; fscp
> 0; fscp
--)
3124 if (FC_LOOP
== ctx
->bc
->fc_stack
[fscp
].type
)
3129 R600_ERR("Break not inside loop/endloop pair\n");
3133 r600_bc_add_cfinst(ctx
->bc
, ctx
->inst_info
->r600_opcode
);
3134 ctx
->bc
->cf_last
->pop_count
= 1;
3136 fc_set_mid(ctx
, fscp
);
3139 callstack_check_depth(ctx
, FC_PUSH_VPM
, 1);
3143 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction
[] = {
3144 {TGSI_OPCODE_ARL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_r600_arl
},
3145 {TGSI_OPCODE_MOV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3146 {TGSI_OPCODE_LIT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3149 * For state trackers other than OpenGL, we'll want to use
3150 * _RECIP_IEEE instead.
3152 {TGSI_OPCODE_RCP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED
, tgsi_trans_srcx_replicate
},
3154 {TGSI_OPCODE_RSQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_rsq
},
3155 {TGSI_OPCODE_EXP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3156 {TGSI_OPCODE_LOG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3157 {TGSI_OPCODE_MUL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3158 {TGSI_OPCODE_ADD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3159 {TGSI_OPCODE_DP3
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3160 {TGSI_OPCODE_DP4
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3161 {TGSI_OPCODE_DST
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3162 {TGSI_OPCODE_MIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3163 {TGSI_OPCODE_MAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3164 {TGSI_OPCODE_SLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3165 {TGSI_OPCODE_SGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3166 {TGSI_OPCODE_MAD
, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3167 {TGSI_OPCODE_SUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3168 {TGSI_OPCODE_LRP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3169 {TGSI_OPCODE_CND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3171 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3172 {TGSI_OPCODE_DP2A
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3174 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3175 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3176 {TGSI_OPCODE_FRC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3177 {TGSI_OPCODE_CLAMP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3178 {TGSI_OPCODE_FLR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3179 {TGSI_OPCODE_ROUND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3180 {TGSI_OPCODE_EX2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, tgsi_trans_srcx_replicate
},
3181 {TGSI_OPCODE_LG2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, tgsi_trans_srcx_replicate
},
3182 {TGSI_OPCODE_POW
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_pow
},
3183 {TGSI_OPCODE_XPD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3185 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3186 {TGSI_OPCODE_ABS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3187 {TGSI_OPCODE_RCC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3188 {TGSI_OPCODE_DPH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3189 {TGSI_OPCODE_COS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, tgsi_trig
},
3190 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3191 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3192 {TGSI_OPCODE_KILP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3193 {TGSI_OPCODE_PK2H
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3194 {TGSI_OPCODE_PK2US
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3195 {TGSI_OPCODE_PK4B
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3196 {TGSI_OPCODE_PK4UB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3197 {TGSI_OPCODE_RFL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3198 {TGSI_OPCODE_SEQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3199 {TGSI_OPCODE_SFL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3200 {TGSI_OPCODE_SGT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3201 {TGSI_OPCODE_SIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, tgsi_trig
},
3202 {TGSI_OPCODE_SLE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3203 {TGSI_OPCODE_SNE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3204 {TGSI_OPCODE_STR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3205 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3206 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3207 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3208 {TGSI_OPCODE_UP2H
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3209 {TGSI_OPCODE_UP2US
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3210 {TGSI_OPCODE_UP4B
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3211 {TGSI_OPCODE_UP4UB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3212 {TGSI_OPCODE_X2D
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3213 {TGSI_OPCODE_ARA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3214 {TGSI_OPCODE_ARR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_r600_arl
},
3215 {TGSI_OPCODE_BRA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3216 {TGSI_OPCODE_CAL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3217 {TGSI_OPCODE_RET
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3218 {TGSI_OPCODE_SSG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3219 {TGSI_OPCODE_CMP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3220 {TGSI_OPCODE_SCS
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3221 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3222 {TGSI_OPCODE_NRM
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3223 {TGSI_OPCODE_DIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3224 {TGSI_OPCODE_DP2
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3225 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3226 {TGSI_OPCODE_BRK
, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3227 {TGSI_OPCODE_IF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3229 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3230 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3231 {TGSI_OPCODE_ELSE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3232 {TGSI_OPCODE_ENDIF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3234 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3235 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3236 {TGSI_OPCODE_PUSHA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3237 {TGSI_OPCODE_POPA
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3238 {TGSI_OPCODE_CEIL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3239 {TGSI_OPCODE_I2F
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3240 {TGSI_OPCODE_NOT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3241 {TGSI_OPCODE_TRUNC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3242 {TGSI_OPCODE_SHL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3244 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3245 {TGSI_OPCODE_AND
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3246 {TGSI_OPCODE_OR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3247 {TGSI_OPCODE_MOD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3248 {TGSI_OPCODE_XOR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3249 {TGSI_OPCODE_SAD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3250 {TGSI_OPCODE_TXF
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3251 {TGSI_OPCODE_TXQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3252 {TGSI_OPCODE_CONT
, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3253 {TGSI_OPCODE_EMIT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3254 {TGSI_OPCODE_ENDPRIM
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3255 {TGSI_OPCODE_BGNLOOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3256 {TGSI_OPCODE_BGNSUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3257 {TGSI_OPCODE_ENDLOOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3258 {TGSI_OPCODE_ENDSUB
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3260 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3261 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3262 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3263 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3264 {TGSI_OPCODE_NOP
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3266 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3267 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3268 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3269 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3270 {TGSI_OPCODE_NRM4
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3271 {TGSI_OPCODE_CALLNZ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3272 {TGSI_OPCODE_IFC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3273 {TGSI_OPCODE_BREAKC
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3274 {TGSI_OPCODE_KIL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3275 {TGSI_OPCODE_END
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3277 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3278 {TGSI_OPCODE_F2I
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3279 {TGSI_OPCODE_IDIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3280 {TGSI_OPCODE_IMAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3281 {TGSI_OPCODE_IMIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3282 {TGSI_OPCODE_INEG
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3283 {TGSI_OPCODE_ISGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3284 {TGSI_OPCODE_ISHR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3285 {TGSI_OPCODE_ISLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3286 {TGSI_OPCODE_F2U
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3287 {TGSI_OPCODE_U2F
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3288 {TGSI_OPCODE_UADD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3289 {TGSI_OPCODE_UDIV
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3290 {TGSI_OPCODE_UMAD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3291 {TGSI_OPCODE_UMAX
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3292 {TGSI_OPCODE_UMIN
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3293 {TGSI_OPCODE_UMOD
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3294 {TGSI_OPCODE_UMUL
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3295 {TGSI_OPCODE_USEQ
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3296 {TGSI_OPCODE_USGE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3297 {TGSI_OPCODE_USHR
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3298 {TGSI_OPCODE_USLT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3299 {TGSI_OPCODE_USNE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3300 {TGSI_OPCODE_SWITCH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3301 {TGSI_OPCODE_CASE
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3302 {TGSI_OPCODE_DEFAULT
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3303 {TGSI_OPCODE_ENDSWITCH
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3304 {TGSI_OPCODE_LAST
, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3307 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction
[] = {
3308 {TGSI_OPCODE_ARL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3309 {TGSI_OPCODE_MOV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3310 {TGSI_OPCODE_LIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3311 {TGSI_OPCODE_RCP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
, tgsi_trans_srcx_replicate
},
3312 {TGSI_OPCODE_RSQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
, tgsi_rsq
},
3313 {TGSI_OPCODE_EXP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3314 {TGSI_OPCODE_LOG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3315 {TGSI_OPCODE_MUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3316 {TGSI_OPCODE_ADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3317 {TGSI_OPCODE_DP3
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3318 {TGSI_OPCODE_DP4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3319 {TGSI_OPCODE_DST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3320 {TGSI_OPCODE_MIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3321 {TGSI_OPCODE_MAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3322 {TGSI_OPCODE_SLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3323 {TGSI_OPCODE_SGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3324 {TGSI_OPCODE_MAD
, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3325 {TGSI_OPCODE_SUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3326 {TGSI_OPCODE_LRP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3327 {TGSI_OPCODE_CND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3329 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3330 {TGSI_OPCODE_DP2A
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3332 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3333 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3334 {TGSI_OPCODE_FRC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3335 {TGSI_OPCODE_CLAMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3336 {TGSI_OPCODE_FLR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3337 {TGSI_OPCODE_ROUND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3338 {TGSI_OPCODE_EX2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, tgsi_trans_srcx_replicate
},
3339 {TGSI_OPCODE_LG2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, tgsi_trans_srcx_replicate
},
3340 {TGSI_OPCODE_POW
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_pow
},
3341 {TGSI_OPCODE_XPD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3343 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3344 {TGSI_OPCODE_ABS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3345 {TGSI_OPCODE_RCC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3346 {TGSI_OPCODE_DPH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3347 {TGSI_OPCODE_COS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, tgsi_trig
},
3348 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3349 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3350 {TGSI_OPCODE_KILP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3351 {TGSI_OPCODE_PK2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3352 {TGSI_OPCODE_PK2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3353 {TGSI_OPCODE_PK4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3354 {TGSI_OPCODE_PK4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3355 {TGSI_OPCODE_RFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3356 {TGSI_OPCODE_SEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3357 {TGSI_OPCODE_SFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3358 {TGSI_OPCODE_SGT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3359 {TGSI_OPCODE_SIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, tgsi_trig
},
3360 {TGSI_OPCODE_SLE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3361 {TGSI_OPCODE_SNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3362 {TGSI_OPCODE_STR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3363 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3364 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3365 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3366 {TGSI_OPCODE_UP2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3367 {TGSI_OPCODE_UP2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3368 {TGSI_OPCODE_UP4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3369 {TGSI_OPCODE_UP4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3370 {TGSI_OPCODE_X2D
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3371 {TGSI_OPCODE_ARA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3372 {TGSI_OPCODE_ARR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3373 {TGSI_OPCODE_BRA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3374 {TGSI_OPCODE_CAL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3375 {TGSI_OPCODE_RET
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3376 {TGSI_OPCODE_SSG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3377 {TGSI_OPCODE_CMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3378 {TGSI_OPCODE_SCS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3379 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3380 {TGSI_OPCODE_NRM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3381 {TGSI_OPCODE_DIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3382 {TGSI_OPCODE_DP2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3383 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3384 {TGSI_OPCODE_BRK
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3385 {TGSI_OPCODE_IF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3387 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3388 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3389 {TGSI_OPCODE_ELSE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3390 {TGSI_OPCODE_ENDIF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3392 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3393 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3394 {TGSI_OPCODE_PUSHA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3395 {TGSI_OPCODE_POPA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3396 {TGSI_OPCODE_CEIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3397 {TGSI_OPCODE_I2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3398 {TGSI_OPCODE_NOT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3399 {TGSI_OPCODE_TRUNC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3400 {TGSI_OPCODE_SHL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3402 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3403 {TGSI_OPCODE_AND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3404 {TGSI_OPCODE_OR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3405 {TGSI_OPCODE_MOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3406 {TGSI_OPCODE_XOR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3407 {TGSI_OPCODE_SAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3408 {TGSI_OPCODE_TXF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3409 {TGSI_OPCODE_TXQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3410 {TGSI_OPCODE_CONT
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3411 {TGSI_OPCODE_EMIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3412 {TGSI_OPCODE_ENDPRIM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3413 {TGSI_OPCODE_BGNLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3414 {TGSI_OPCODE_BGNSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3415 {TGSI_OPCODE_ENDLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3416 {TGSI_OPCODE_ENDSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3418 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3419 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3420 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3421 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3422 {TGSI_OPCODE_NOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3424 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3425 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3426 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3427 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3428 {TGSI_OPCODE_NRM4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3429 {TGSI_OPCODE_CALLNZ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3430 {TGSI_OPCODE_IFC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3431 {TGSI_OPCODE_BREAKC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3432 {TGSI_OPCODE_KIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3433 {TGSI_OPCODE_END
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3435 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3436 {TGSI_OPCODE_F2I
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3437 {TGSI_OPCODE_IDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3438 {TGSI_OPCODE_IMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3439 {TGSI_OPCODE_IMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3440 {TGSI_OPCODE_INEG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3441 {TGSI_OPCODE_ISGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3442 {TGSI_OPCODE_ISHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3443 {TGSI_OPCODE_ISLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3444 {TGSI_OPCODE_F2U
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3445 {TGSI_OPCODE_U2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3446 {TGSI_OPCODE_UADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3447 {TGSI_OPCODE_UDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3448 {TGSI_OPCODE_UMAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3449 {TGSI_OPCODE_UMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3450 {TGSI_OPCODE_UMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3451 {TGSI_OPCODE_UMOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3452 {TGSI_OPCODE_UMUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3453 {TGSI_OPCODE_USEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3454 {TGSI_OPCODE_USGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3455 {TGSI_OPCODE_USHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3456 {TGSI_OPCODE_USLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3457 {TGSI_OPCODE_USNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3458 {TGSI_OPCODE_SWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3459 {TGSI_OPCODE_CASE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3460 {TGSI_OPCODE_DEFAULT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3461 {TGSI_OPCODE_ENDSWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3462 {TGSI_OPCODE_LAST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3465 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction
[] = {
3466 {TGSI_OPCODE_ARL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3467 {TGSI_OPCODE_MOV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3468 {TGSI_OPCODE_LIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lit
},
3469 {TGSI_OPCODE_RCP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
, cayman_emit_float_instr
},
3470 {TGSI_OPCODE_RSQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
, cayman_emit_float_instr
},
3471 {TGSI_OPCODE_EXP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_exp
},
3472 {TGSI_OPCODE_LOG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_log
},
3473 {TGSI_OPCODE_MUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, tgsi_op2
},
3474 {TGSI_OPCODE_ADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3475 {TGSI_OPCODE_DP3
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3476 {TGSI_OPCODE_DP4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3477 {TGSI_OPCODE_DST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_opdst
},
3478 {TGSI_OPCODE_MIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, tgsi_op2
},
3479 {TGSI_OPCODE_MAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, tgsi_op2
},
3480 {TGSI_OPCODE_SLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2_swap
},
3481 {TGSI_OPCODE_SGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2
},
3482 {TGSI_OPCODE_MAD
, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, tgsi_op3
},
3483 {TGSI_OPCODE_SUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, tgsi_op2
},
3484 {TGSI_OPCODE_LRP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_lrp
},
3485 {TGSI_OPCODE_CND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3487 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3488 {TGSI_OPCODE_DP2A
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3490 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3491 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3492 {TGSI_OPCODE_FRC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, tgsi_op2
},
3493 {TGSI_OPCODE_CLAMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3494 {TGSI_OPCODE_FLR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, tgsi_op2
},
3495 {TGSI_OPCODE_ROUND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3496 {TGSI_OPCODE_EX2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, cayman_emit_float_instr
},
3497 {TGSI_OPCODE_LG2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, cayman_emit_float_instr
},
3498 {TGSI_OPCODE_POW
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, cayman_pow
},
3499 {TGSI_OPCODE_XPD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_xpd
},
3501 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3502 {TGSI_OPCODE_ABS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, tgsi_op2
},
3503 {TGSI_OPCODE_RCC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3504 {TGSI_OPCODE_DPH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3505 {TGSI_OPCODE_COS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, cayman_trig
},
3506 {TGSI_OPCODE_DDX
, 0, SQ_TEX_INST_GET_GRADIENTS_H
, tgsi_tex
},
3507 {TGSI_OPCODE_DDY
, 0, SQ_TEX_INST_GET_GRADIENTS_V
, tgsi_tex
},
3508 {TGSI_OPCODE_KILP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* predicated kill */
3509 {TGSI_OPCODE_PK2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3510 {TGSI_OPCODE_PK2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3511 {TGSI_OPCODE_PK4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3512 {TGSI_OPCODE_PK4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3513 {TGSI_OPCODE_RFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3514 {TGSI_OPCODE_SEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, tgsi_op2
},
3515 {TGSI_OPCODE_SFL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3516 {TGSI_OPCODE_SGT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, tgsi_op2
},
3517 {TGSI_OPCODE_SIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, cayman_trig
},
3518 {TGSI_OPCODE_SLE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, tgsi_op2_swap
},
3519 {TGSI_OPCODE_SNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, tgsi_op2
},
3520 {TGSI_OPCODE_STR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3521 {TGSI_OPCODE_TEX
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3522 {TGSI_OPCODE_TXD
, 0, SQ_TEX_INST_SAMPLE_G
, tgsi_tex
},
3523 {TGSI_OPCODE_TXP
, 0, SQ_TEX_INST_SAMPLE
, tgsi_tex
},
3524 {TGSI_OPCODE_UP2H
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3525 {TGSI_OPCODE_UP2US
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3526 {TGSI_OPCODE_UP4B
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3527 {TGSI_OPCODE_UP4UB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3528 {TGSI_OPCODE_X2D
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3529 {TGSI_OPCODE_ARA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3530 {TGSI_OPCODE_ARR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_eg_arl
},
3531 {TGSI_OPCODE_BRA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3532 {TGSI_OPCODE_CAL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3533 {TGSI_OPCODE_RET
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3534 {TGSI_OPCODE_SSG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_ssg
},
3535 {TGSI_OPCODE_CMP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_cmp
},
3536 {TGSI_OPCODE_SCS
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_scs
},
3537 {TGSI_OPCODE_TXB
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3538 {TGSI_OPCODE_NRM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3539 {TGSI_OPCODE_DIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3540 {TGSI_OPCODE_DP2
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, tgsi_dp
},
3541 {TGSI_OPCODE_TXL
, 0, SQ_TEX_INST_SAMPLE_L
, tgsi_tex
},
3542 {TGSI_OPCODE_BRK
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
, tgsi_loop_brk_cont
},
3543 {TGSI_OPCODE_IF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_if
},
3545 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3546 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3547 {TGSI_OPCODE_ELSE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_else
},
3548 {TGSI_OPCODE_ENDIF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endif
},
3550 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3551 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3552 {TGSI_OPCODE_PUSHA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3553 {TGSI_OPCODE_POPA
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3554 {TGSI_OPCODE_CEIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3555 {TGSI_OPCODE_I2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3556 {TGSI_OPCODE_NOT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3557 {TGSI_OPCODE_TRUNC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, tgsi_op2
},
3558 {TGSI_OPCODE_SHL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3560 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3561 {TGSI_OPCODE_AND
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3562 {TGSI_OPCODE_OR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3563 {TGSI_OPCODE_MOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3564 {TGSI_OPCODE_XOR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3565 {TGSI_OPCODE_SAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3566 {TGSI_OPCODE_TXF
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3567 {TGSI_OPCODE_TXQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3568 {TGSI_OPCODE_CONT
, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
, tgsi_loop_brk_cont
},
3569 {TGSI_OPCODE_EMIT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3570 {TGSI_OPCODE_ENDPRIM
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3571 {TGSI_OPCODE_BGNLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_bgnloop
},
3572 {TGSI_OPCODE_BGNSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3573 {TGSI_OPCODE_ENDLOOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_endloop
},
3574 {TGSI_OPCODE_ENDSUB
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3576 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3577 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3578 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3579 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3580 {TGSI_OPCODE_NOP
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3582 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3583 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3584 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3585 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3586 {TGSI_OPCODE_NRM4
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3587 {TGSI_OPCODE_CALLNZ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3588 {TGSI_OPCODE_IFC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3589 {TGSI_OPCODE_BREAKC
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3590 {TGSI_OPCODE_KIL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, tgsi_kill
}, /* conditional kill */
3591 {TGSI_OPCODE_END
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_end
}, /* aka HALT */
3593 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3594 {TGSI_OPCODE_F2I
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3595 {TGSI_OPCODE_IDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3596 {TGSI_OPCODE_IMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3597 {TGSI_OPCODE_IMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3598 {TGSI_OPCODE_INEG
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3599 {TGSI_OPCODE_ISGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3600 {TGSI_OPCODE_ISHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3601 {TGSI_OPCODE_ISLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3602 {TGSI_OPCODE_F2U
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3603 {TGSI_OPCODE_U2F
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3604 {TGSI_OPCODE_UADD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3605 {TGSI_OPCODE_UDIV
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3606 {TGSI_OPCODE_UMAD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3607 {TGSI_OPCODE_UMAX
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3608 {TGSI_OPCODE_UMIN
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3609 {TGSI_OPCODE_UMOD
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3610 {TGSI_OPCODE_UMUL
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3611 {TGSI_OPCODE_USEQ
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3612 {TGSI_OPCODE_USGE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3613 {TGSI_OPCODE_USHR
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3614 {TGSI_OPCODE_USLT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3615 {TGSI_OPCODE_USNE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3616 {TGSI_OPCODE_SWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3617 {TGSI_OPCODE_CASE
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3618 {TGSI_OPCODE_DEFAULT
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3619 {TGSI_OPCODE_ENDSWITCH
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},
3620 {TGSI_OPCODE_LAST
, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, tgsi_unsupported
},