2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include <util/u_format.h>
29 #include "r600_screen.h"
30 #include "r600_context.h"
34 struct r600_alu_instruction
{
36 enum r600_instruction instruction
;
39 static int r600_shader_alu_translate(struct r600_shader
*rshader
,
40 struct r600_shader_node
*node
,
41 struct c_instruction
*instruction
);
42 struct r600_alu_instruction r600_alu_instruction
[C_OPCODE_LAST
];
43 struct r600_instruction_info r600_instruction_info
[];
45 int r600_shader_insert_fetch(struct c_shader
*shader
)
47 struct c_vector
*vi
, *vr
, *v
, *nv
;
48 struct c_instruction instruction
;
51 if (shader
->type
!= C_PROGRAM_TYPE_VS
)
53 vi
= c_shader_vector_new(shader
, C_FILE_INPUT
, C_SEMANTIC_VERTEXID
, -1);
56 c_list_for_each_safe(v
, nv
, &shader
->files
[C_FILE_INPUT
].vectors
) {
59 vr
= c_shader_vector_new(shader
, C_FILE_RESOURCE
, C_SEMANTIC_GENERIC
, -1);
62 memset(&instruction
, 0, sizeof(struct c_instruction
));
64 instruction
.op
[0].opcode
= C_OPCODE_VFETCH
;
65 instruction
.op
[1].opcode
= C_OPCODE_VFETCH
;
66 instruction
.op
[2].opcode
= C_OPCODE_VFETCH
;
67 instruction
.op
[3].opcode
= C_OPCODE_VFETCH
;
68 instruction
.op
[0].ninput
= 2;
69 instruction
.op
[1].ninput
= 2;
70 instruction
.op
[2].ninput
= 2;
71 instruction
.op
[3].ninput
= 2;
72 instruction
.op
[0].output
.vector
= v
;
73 instruction
.op
[1].output
.vector
= v
;
74 instruction
.op
[2].output
.vector
= v
;
75 instruction
.op
[3].output
.vector
= v
;
76 instruction
.op
[0].input
[0].vector
= vi
;
77 instruction
.op
[0].input
[1].vector
= vr
;
78 instruction
.op
[1].input
[0].vector
= vi
;
79 instruction
.op
[1].input
[1].vector
= vr
;
80 instruction
.op
[2].input
[0].vector
= vi
;
81 instruction
.op
[2].input
[1].vector
= vr
;
82 instruction
.op
[3].input
[0].vector
= vi
;
83 instruction
.op
[3].input
[1].vector
= vr
;
84 instruction
.op
[0].output
.swizzle
= C_SWIZZLE_X
;
85 instruction
.op
[1].output
.swizzle
= C_SWIZZLE_Y
;
86 instruction
.op
[2].output
.swizzle
= C_SWIZZLE_Z
;
87 instruction
.op
[3].output
.swizzle
= C_SWIZZLE_W
;
88 r
= c_node_add_new_instruction_head(&shader
->entry
, &instruction
);
92 shader
->files
[C_FILE_INPUT
].nvectors
--;
93 c_list_add_tail(v
, &shader
->files
[C_FILE_TEMPORARY
].vectors
);
94 shader
->files
[C_FILE_TEMPORARY
].nvectors
++;
95 v
->file
= C_FILE_TEMPORARY
;
100 void r600_shader_cleanup(struct r600_shader
*rshader
)
102 struct r600_shader_node
*n
, *nn
;
103 struct r600_shader_vfetch
*vf
, *nvf
;
104 struct r600_shader_alu
*alu
, *nalu
;
110 for (i
= 0; i
< rshader
->nvector
; i
++) {
111 free(rshader
->gpr
[i
]);
116 c_list_for_each_safe(n
, nn
, &rshader
->nodes
) {
118 c_list_for_each_safe(vf
, nvf
, &n
->vfetch
) {
122 c_list_for_each_safe(alu
, nalu
, &n
->alu
) {
128 free(rshader
->bcode
);
132 int r600_shader_vfetch_bytecode(struct r600_shader
*rshader
,
133 struct r600_shader_node
*rnode
,
134 struct r600_shader_vfetch
*vfetch
,
139 vfetch
->cf_addr
= id
;
140 rshader
->bcode
[id
++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch
->src
[1].sel
) |
141 S_SQ_VTX_WORD0_SRC_GPR(vfetch
->src
[0].sel
) |
142 S_SQ_VTX_WORD0_SRC_SEL_X(vfetch
->src
[0].sel
) |
143 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
144 rshader
->bcode
[id
++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch
->dst
[0].chan
) |
145 S_SQ_VTX_WORD1_DST_SEL_Y(vfetch
->dst
[1].chan
) |
146 S_SQ_VTX_WORD1_DST_SEL_Z(vfetch
->dst
[2].chan
) |
147 S_SQ_VTX_WORD1_DST_SEL_W(vfetch
->dst
[3].chan
) |
148 S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
149 S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch
->dst
[0].sel
);
150 rshader
->bcode
[id
++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
151 rshader
->bcode
[id
++] = 0;
156 int r600_shader_update(struct r600_shader
*rshader
, enum pipe_format
*resource_format
)
158 struct r600_shader_node
*rnode
;
159 struct r600_shader_vfetch
*vfetch
;
162 memcpy(rshader
->resource_format
, resource_format
,
163 rshader
->nresource
* sizeof(enum pipe_format
));
164 c_list_for_each(rnode
, &rshader
->nodes
) {
165 c_list_for_each(vfetch
, &rnode
->vfetch
) {
166 const struct util_format_description
*desc
;
167 i
= vfetch
->cf_addr
+ 1;
168 rshader
->bcode
[i
] &= C_SQ_VTX_WORD1_DST_SEL_X
;
169 rshader
->bcode
[i
] &= C_SQ_VTX_WORD1_DST_SEL_Y
;
170 rshader
->bcode
[i
] &= C_SQ_VTX_WORD1_DST_SEL_Z
;
171 rshader
->bcode
[i
] &= C_SQ_VTX_WORD1_DST_SEL_W
;
172 desc
= util_format_description(resource_format
[vfetch
->src
[1].sel
]);
174 fprintf(stderr
, "%s unknown format %d\n", __func__
, resource_format
[vfetch
->src
[1].sel
]);
177 /* WARNING so far TGSI swizzle match R600 ones */
178 rshader
->bcode
[i
] |= S_SQ_VTX_WORD1_DST_SEL_X(desc
->swizzle
[0]);
179 rshader
->bcode
[i
] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc
->swizzle
[1]);
180 rshader
->bcode
[i
] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc
->swizzle
[2]);
181 rshader
->bcode
[i
] |= S_SQ_VTX_WORD1_DST_SEL_W(desc
->swizzle
[3]);
187 int r600_shader_register(struct r600_shader
*rshader
)
189 struct c_vector
*v
, *nv
;
190 unsigned tid
, cid
, rid
, i
;
192 rshader
->nvector
= rshader
->cshader
.nvectors
;
193 rshader
->gpr
= calloc(rshader
->nvector
, sizeof(void*));
194 if (rshader
->gpr
== NULL
)
199 /* alloc input first */
200 c_list_for_each(v
, &rshader
->cshader
.files
[C_FILE_INPUT
].vectors
) {
205 memcpy(nv
, v
, sizeof(struct c_vector
));
207 rshader
->gpr
[v
->id
] = nv
;
209 for (i
= 0; i
< C_FILE_COUNT
; i
++) {
210 if (i
== C_FILE_INPUT
|| i
== C_FILE_IMMEDIATE
)
212 c_list_for_each(v
, &rshader
->cshader
.files
[i
].vectors
) {
215 case C_FILE_TEMPORARY
:
220 memcpy(nv
, v
, sizeof(struct c_vector
));
222 rshader
->gpr
[v
->id
] = nv
;
224 case C_FILE_CONSTANT
:
229 memcpy(nv
, v
, sizeof(struct c_vector
));
230 nv
->id
= (cid
++) + 256;
231 rshader
->gpr
[v
->id
] = nv
;
233 case C_FILE_RESOURCE
:
238 memcpy(nv
, v
, sizeof(struct c_vector
));
240 rshader
->gpr
[v
->id
] = nv
;
243 fprintf(stderr
, "%s:%d unsupported file %d\n", __func__
, __LINE__
, v
->file
);
249 rshader
->nconstant
= cid
;
250 rshader
->nresource
= rid
;
254 int r600_shader_find_gpr(struct r600_shader
*rshader
, struct c_vector
*v
, unsigned swizzle
,
255 struct r600_shader_operand
*operand
)
257 struct c_vector
*tmp
;
259 /* Values [0,127] correspond to GPR[0..127].
260 * Values [256,511] correspond to cfile constants c[0..255].
261 * Other special values are shown in the list below.
262 * 248 SQ_ALU_SRC_0: special constant 0.0.
263 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
264 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
265 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
266 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
267 * 253 SQ_ALU_SRC_LITERAL: literal constant.
268 * 254 SQ_ALU_SRC_PV: previous vector result.
269 * 255 SQ_ALU_SRC_PS: previous scalar result.
278 if (v
->file
== C_FILE_IMMEDIATE
) {
281 tmp
= rshader
->gpr
[v
->id
];
283 fprintf(stderr
, "%s %d unknown register\n", __FILE__
, __LINE__
);
286 operand
->sel
= tmp
->id
;
288 operand
->chan
= swizzle
;
304 fprintf(stderr
, "%s %d invalid swizzle %d\n", __FILE__
, __LINE__
, swizzle
);
310 static struct r600_shader_node
*r600_shader_new_node(struct r600_shader
*rshader
, struct c_node
*node
)
312 struct r600_shader_node
*rnode
;
314 rnode
= CALLOC_STRUCT(r600_shader_node
);
318 c_list_init(&rnode
->vfetch
);
319 c_list_init(&rnode
->alu
);
320 c_list_add_tail(rnode
, &rshader
->nodes
);
324 static int r600_shader_add_vfetch(struct r600_shader
*rshader
,
325 struct r600_shader_node
*node
,
326 struct c_instruction
*instruction
)
328 struct r600_shader_vfetch
*vfetch
;
329 struct r600_shader_node
*rnode
;
332 if (instruction
== NULL
)
334 if (instruction
->op
[0].opcode
!= C_OPCODE_VFETCH
)
336 if (!c_list_empty(&node
->alu
)) {
337 rnode
= r600_shader_new_node(rshader
, node
->node
);
342 vfetch
= calloc(1, sizeof(struct r600_shader_vfetch
));
345 r
= r600_shader_find_gpr(rshader
, instruction
->op
[0].output
.vector
, 0, &vfetch
->dst
[0]);
348 r
= r600_shader_find_gpr(rshader
, instruction
->op
[0].input
[0].vector
, 0, &vfetch
->src
[0]);
351 r
= r600_shader_find_gpr(rshader
, instruction
->op
[0].input
[1].vector
, 0, &vfetch
->src
[1]);
354 vfetch
->dst
[0].chan
= C_SWIZZLE_X
;
355 vfetch
->dst
[1].chan
= C_SWIZZLE_Y
;
356 vfetch
->dst
[2].chan
= C_SWIZZLE_Z
;
357 vfetch
->dst
[3].chan
= C_SWIZZLE_W
;
358 c_list_add_tail(vfetch
, &node
->vfetch
);
363 static int r600_node_translate(struct r600_shader
*rshader
, struct c_node
*node
)
365 struct c_instruction
*instruction
;
366 struct r600_shader_node
*rnode
;
369 rnode
= r600_shader_new_node(rshader
, node
);
372 c_list_for_each(instruction
, &node
->insts
) {
373 switch (instruction
->op
[0].opcode
) {
374 case C_OPCODE_VFETCH
:
375 r
= r600_shader_add_vfetch(rshader
, rnode
, instruction
);
377 fprintf(stderr
, "%s %d vfetch failed\n", __func__
, __LINE__
);
382 r
= r600_shader_alu_translate(rshader
, rnode
, instruction
);
384 fprintf(stderr
, "%s %d alu failed\n", __func__
, __LINE__
);
393 int r600_shader_translate_rec(struct r600_shader
*rshader
, struct c_node
*node
)
395 struct c_node_link
*link
;
398 if (node
->opcode
== C_OPCODE_END
)
400 r
= r600_node_translate(rshader
, node
);
403 c_list_for_each(link
, &node
->childs
) {
404 r
= r600_shader_translate_rec(rshader
, link
->node
);
411 static struct r600_shader_alu
*r600_shader_insert_alu(struct r600_shader
*rshader
, struct r600_shader_node
*node
)
413 struct r600_shader_alu
*alu
;
415 alu
= CALLOC_STRUCT(r600_shader_alu
);
418 alu
->alu
[0].inst
= INST_NOP
;
419 alu
->alu
[1].inst
= INST_NOP
;
420 alu
->alu
[2].inst
= INST_NOP
;
421 alu
->alu
[3].inst
= INST_NOP
;
422 alu
->alu
[4].inst
= INST_NOP
;
423 alu
->alu
[0].opcode
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
;
424 alu
->alu
[1].opcode
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
;
425 alu
->alu
[2].opcode
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
;
426 alu
->alu
[3].opcode
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
;
427 alu
->alu
[4].opcode
= V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
;
428 c_list_add_tail(alu
, &node
->alu
);
432 static int r600_shader_alu_translate(struct r600_shader
*rshader
,
433 struct r600_shader_node
*node
,
434 struct c_instruction
*instruction
)
436 struct r600_shader_node
*rnode
;
437 struct r600_shader_alu
*alu
;
438 int i
, j
, r
, comp
, litteral_lastcomp
= -1;
440 if (!c_list_empty(&node
->vfetch
)) {
441 rnode
= r600_shader_new_node(rshader
, node
->node
);
443 fprintf(stderr
, "%s %d new node failed\n", __func__
, __LINE__
);
450 alu
= r600_shader_insert_alu(rshader
, node
);
452 /* check special operation like lit */
454 /* go through operation */
455 for (i
= 0; i
< instruction
->nop
; i
++) {
456 struct r600_alu_instruction
*ainfo
= &r600_alu_instruction
[instruction
->op
[i
].opcode
];
457 struct r600_instruction_info
*iinfo
= &r600_instruction_info
[ainfo
->instruction
];
460 /* check that output is a valid component */
461 comp
= instruction
->op
[i
].output
.swizzle
;
471 fprintf(stderr
, "%s %d invalid output\n", __func__
, __LINE__
);
474 alu
->alu
[comp
].inst
= ainfo
->instruction
;
475 alu
->alu
[comp
].opcode
= iinfo
->opcode
;
476 alu
->alu
[comp
].is_op3
= iinfo
->is_op3
;
477 for (j
= 0; j
< instruction
->op
[i
].ninput
; j
++) {
478 r
= r600_shader_find_gpr(rshader
, instruction
->op
[i
].input
[j
].vector
,
479 instruction
->op
[i
].input
[j
].swizzle
, &alu
->alu
[comp
].src
[j
]);
481 fprintf(stderr
, "%s %d register failed\n", __FILE__
, __LINE__
);
484 if (instruction
->op
[i
].input
[j
].vector
->file
== C_FILE_IMMEDIATE
) {
485 r
= instruction
->op
[i
].input
[j
].swizzle
;
495 fprintf(stderr
, "%s %d invalid input\n", __func__
, __LINE__
);
498 alu
->literal
[r
] = instruction
->op
[i
].input
[j
].vector
->channel
[r
]->value
;
499 if (r
> litteral_lastcomp
) {
500 litteral_lastcomp
= r
;
504 r
= r600_shader_find_gpr(rshader
, instruction
->op
[i
].output
.vector
,
505 instruction
->op
[i
].output
.swizzle
, &alu
->alu
[comp
].dst
);
507 fprintf(stderr
, "%s %d register failed\n", __FILE__
, __LINE__
);
511 switch (litteral_lastcomp
) {
524 printf("nliteral: %d\n", alu
->nliteral
);
525 for (i
= instruction
->nop
; i
>= 0; i
--) {
526 if (alu
->alu
[i
].inst
!= INST_NOP
) {
527 alu
->alu
[i
].last
= 1;
535 void r600_shader_node_place(struct r600_shader
*rshader
)
537 struct r600_shader_node
*node
, *nnode
;
538 struct r600_shader_alu
*alu
, *nalu
;
539 struct r600_shader_vfetch
*vfetch
, *nvfetch
;
540 unsigned cf_id
= 0, cf_addr
= 0;
544 c_list_for_each_safe(node
, nnode
, &rshader
->nodes
) {
545 c_list_for_each_safe(alu
, nalu
, &node
->alu
) {
546 node
->nslot
+= alu
->nalu
;
547 node
->nslot
+= alu
->nliteral
>> 1;
550 c_list_for_each_safe(vfetch
, nvfetch
, &node
->vfetch
) {
554 if (!c_list_empty(&node
->vfetch
)) {
555 /* fetch node need to be 16 bytes aligned*/
557 cf_addr
&= 0xFFFFFFFEUL
;
560 node
->cf_addr
= cf_addr
;
562 cf_addr
+= node
->nslot
* 2;
565 rshader
->nslot
= cf_addr
;
566 c_list_for_each_safe(node
, nnode
, &rshader
->nodes
) {
567 node
->cf_addr
+= cf_id
* 2;
569 rshader
->ncf
+= rshader
->cshader
.files
[C_FILE_OUTPUT
].nvectors
;
570 rshader
->ndw
= rshader
->ncf
* 2 + rshader
->nslot
* 2;
573 int r600_shader_legalize(struct r600_shader
*rshader
)
579 static int r600_cshader_legalize_rec(struct c_shader
*shader
, struct c_node
*node
)
581 struct c_node_link
*link
;
582 struct c_instruction
*i
;
583 struct c_operand operand
;
587 c_list_for_each(i
, &node
->insts
) {
588 for (k
= 0; k
< i
->nop
; k
++) {
589 switch (i
->op
[k
].opcode
) {
591 i
->op
[k
].opcode
= C_OPCODE_SGT
;
592 memcpy(&operand
, &i
->op
[k
].input
[0], sizeof(struct c_operand
));
593 memcpy(&i
->op
[k
].input
[0], &i
->op
[k
].input
[1], sizeof(struct c_operand
));
594 memcpy(&i
->op
[k
].input
[1], &operand
, sizeof(struct c_operand
));
601 c_list_for_each(link
, &node
->childs
) {
602 r
= r600_cshader_legalize_rec(shader
, link
->node
);
610 int r600_cshader_legalize(struct c_shader
*shader
)
612 return r600_cshader_legalize_rec(shader
, &shader
->entry
);
616 struct r600_instruction_info r600_instruction_info
[] = {
617 {INST_ADD
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
, 0, 0},
618 {INST_MUL
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
, 0, 0},
619 {INST_MUL_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE
, 0, 0},
620 {INST_MAX
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
, 0, 0},
621 {INST_MIN
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
, 0, 0},
622 {INST_MAX_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10
, 0, 0},
623 {INST_MIN_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10
, 0, 0},
624 {INST_SETE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
, 0, 0},
625 {INST_SETGT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
, 0, 0},
626 {INST_SETGE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
, 0, 0},
627 {INST_SETNE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
, 0, 0},
628 {INST_SETE_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10
, 0, 0},
629 {INST_SETGT_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10
, 0, 0},
630 {INST_SETGE_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10
, 0, 0},
631 {INST_SETNE_DX10
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10
, 0, 0},
632 {INST_FRACT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
, 0, 0},
633 {INST_TRUNC
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
, 0, 0},
634 {INST_CEIL
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL
, 0, 0},
635 {INST_RNDNE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE
, 0, 0},
636 {INST_FLOOR
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
, 0, 0},
637 {INST_MOVA
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA
, 0, 0},
638 {INST_MOVA_FLOOR
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR
, 0, 0},
639 {INST_MOVA_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT
, 0, 0},
640 {INST_MOV
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
, 0, 0},
641 {INST_NOP
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
, 0, 0},
642 {INST_PRED_SETGT_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT
, 0, 0},
643 {INST_PRED_SETGE_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT
, 0, 0},
644 {INST_PRED_SETE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE
, 0, 0},
645 {INST_PRED_SETGT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT
, 0, 0},
646 {INST_PRED_SETGE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE
, 0, 0},
647 {INST_PRED_SETNE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE
, 0, 0},
648 {INST_PRED_SET_INV
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV
, 0, 0},
649 {INST_PRED_SET_POP
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP
, 0, 0},
650 {INST_PRED_SET_CLR
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR
, 0, 0},
651 {INST_PRED_SET_RESTORE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE
, 0, 0},
652 {INST_PRED_SETE_PUSH
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH
, 0, 0},
653 {INST_PRED_SETGT_PUSH
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH
, 0, 0},
654 {INST_PRED_SETGE_PUSH
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH
, 0, 0},
655 {INST_PRED_SETNE_PUSH
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH
, 0, 0},
656 {INST_KILLE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE
, 0, 0},
657 {INST_KILLGT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
, 0, 0},
658 {INST_KILLGE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE
, 0, 0},
659 {INST_KILLNE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE
, 0, 0},
660 {INST_AND_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT
, 0, 0},
661 {INST_OR_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT
, 0, 0},
662 {INST_XOR_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT
, 0, 0},
663 {INST_NOT_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT
, 0, 0},
664 {INST_ADD_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT
, 0, 0},
665 {INST_SUB_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT
, 0, 0},
666 {INST_MAX_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT
, 0, 0},
667 {INST_MIN_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT
, 0, 0},
668 {INST_MAX_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT
, 0, 0},
669 {INST_MIN_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT
, 0, 0},
670 {INST_SETE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT
, 0, 0},
671 {INST_SETGT_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT
, 0, 0},
672 {INST_SETGE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT
, 0, 0},
673 {INST_SETNE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT
, 0, 0},
674 {INST_SETGT_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT
, 0, 0},
675 {INST_SETGE_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT
, 0, 0},
676 {INST_KILLGT_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT
, 0, 0},
677 {INST_KILLGE_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT
, 0, 0},
678 {INST_PRED_SETE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT
, 0, 0},
679 {INST_PRED_SETGT_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT
, 0, 0},
680 {INST_PRED_SETGE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT
, 0, 0},
681 {INST_PRED_SETNE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT
, 0, 0},
682 {INST_KILLE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT
, 0, 0},
683 {INST_KILLGT_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT
, 0, 0},
684 {INST_KILLGE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT
, 0, 0},
685 {INST_KILLNE_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT
, 0, 0},
686 {INST_PRED_SETE_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT
, 0, 0},
687 {INST_PRED_SETGT_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT
, 0, 0},
688 {INST_PRED_SETGE_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT
, 0, 0},
689 {INST_PRED_SETNE_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT
, 0, 0},
690 {INST_PRED_SETLT_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT
, 0, 0},
691 {INST_PRED_SETLE_PUSH_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT
, 0, 0},
692 {INST_DOT4
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
, 0, 0},
693 {INST_DOT4_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE
, 0, 0},
694 {INST_CUBE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE
, 0, 0},
695 {INST_MAX4
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4
, 0, 0},
696 {INST_MOVA_GPR_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT
, 0, 0},
697 {INST_EXP_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
, 1, 0},
698 {INST_LOG_CLAMPED
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
, 1, 0},
699 {INST_LOG_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
, 1, 0},
700 {INST_RECIP_CLAMPED
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED
, 1, 0},
701 {INST_RECIP_FF
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF
, 1, 0},
702 {INST_RECIP_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
, 1, 0},
703 {INST_RECIPSQRT_CLAMPED
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED
, 1, 0},
704 {INST_RECIPSQRT_FF
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF
, 1, 0},
705 {INST_RECIPSQRT_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
, 1, 0},
706 {INST_SQRT_IEEE
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE
, 1, 0},
707 {INST_FLT_TO_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
, 1, 0},
708 {INST_INT_TO_FLT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT
, 1, 0},
709 {INST_UINT_TO_FLT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT
, 1, 0},
710 {INST_SIN
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
, 1, 0},
711 {INST_COS
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
, 1, 0},
712 {INST_ASHR_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT
, 1, 0},
713 {INST_LSHR_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT
, 1, 0},
714 {INST_LSHL_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT
, 1, 0},
715 {INST_MULLO_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT
, 1, 0},
716 {INST_MULHI_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT
, 1, 0},
717 {INST_MULLO_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT
, 1, 0},
718 {INST_MULHI_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT
, 1, 0},
719 {INST_RECIP_INT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT
, 1, 0},
720 {INST_RECIP_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT
, 1, 0},
721 {INST_FLT_TO_UINT
, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT
, 1, 0},
722 {INST_MUL_LIT
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT
, 1, 1},
723 {INST_MUL_LIT_M2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2
, 1, 1},
724 {INST_MUL_LIT_M4
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4
, 1, 1},
725 {INST_MUL_LIT_D2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2
, 1, 1},
726 {INST_MULADD
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD
, 0, 1},
727 {INST_MULADD_M2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2
, 0, 1},
728 {INST_MULADD_M4
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4
, 0, 1},
729 {INST_MULADD_D2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2
, 0, 1},
730 {INST_MULADD_IEEE
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE
, 0, 1},
731 {INST_MULADD_IEEE_M2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2
, 0, 1},
732 {INST_MULADD_IEEE_M4
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4
, 0, 1},
733 {INST_MULADD_IEEE_D2
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2
, 0, 1},
734 {INST_CNDE
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE
, 0, 1},
735 {INST_CNDGT
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT
, 0, 1},
736 {INST_CNDGE
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE
, 0, 1},
737 {INST_CNDE_INT
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT
, 0, 1},
738 {INST_CNDGT_INT
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT
, 0, 1},
739 {INST_CNDGE_INT
, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT
, 0, 1},
742 struct r600_alu_instruction r600_alu_instruction
[C_OPCODE_LAST
] = {
743 {C_OPCODE_NOP
, INST_NOP
},
744 {C_OPCODE_MOV
, INST_MOV
},
745 {C_OPCODE_LIT
, INST_NOP
},
746 {C_OPCODE_RCP
, INST_RECIP_IEEE
},
747 {C_OPCODE_RSQ
, INST_RECIPSQRT_IEEE
},
748 {C_OPCODE_EXP
, INST_EXP_IEEE
},
749 {C_OPCODE_LOG
, INST_LOG_IEEE
},
750 {C_OPCODE_MUL
, INST_MUL
},
751 {C_OPCODE_ADD
, INST_ADD
},
752 {C_OPCODE_DP3
, INST_DOT4
},
753 {C_OPCODE_DP4
, INST_DOT4
},
754 {C_OPCODE_DST
, INST_NOP
},
755 {C_OPCODE_MIN
, INST_MIN
},
756 {C_OPCODE_MAX
, INST_MAX
},
757 {C_OPCODE_SLT
, INST_NOP
},
758 {C_OPCODE_SGE
, INST_NOP
},
759 {C_OPCODE_MAD
, INST_MULADD
},
760 {C_OPCODE_SUB
, INST_COUNT
},
761 {C_OPCODE_LRP
, INST_NOP
},
762 {C_OPCODE_CND
, INST_NOP
},
764 {C_OPCODE_DP2A
, INST_NOP
},
767 {C_OPCODE_FRC
, INST_NOP
},
768 {C_OPCODE_CLAMP
, INST_NOP
},
769 {C_OPCODE_FLR
, INST_NOP
},
770 {C_OPCODE_ROUND
, INST_NOP
},
771 {C_OPCODE_EX2
, INST_NOP
},
772 {C_OPCODE_LG2
, INST_NOP
},
773 {C_OPCODE_POW
, INST_NOP
},
774 {C_OPCODE_XPD
, INST_NOP
},
776 {C_OPCODE_ABS
, INST_COUNT
},
777 {C_OPCODE_RCC
, INST_NOP
},
778 {C_OPCODE_DPH
, INST_NOP
},
779 {C_OPCODE_COS
, INST_COS
},
780 {C_OPCODE_DDX
, INST_NOP
},
781 {C_OPCODE_DDY
, INST_NOP
},
782 {C_OPCODE_KILP
, INST_NOP
},
783 {C_OPCODE_PK2H
, INST_NOP
},
784 {C_OPCODE_PK2US
, INST_NOP
},
785 {C_OPCODE_PK4B
, INST_NOP
},
786 {C_OPCODE_PK4UB
, INST_NOP
},
787 {C_OPCODE_RFL
, INST_NOP
},
788 {C_OPCODE_SEQ
, INST_NOP
},
789 {C_OPCODE_SFL
, INST_NOP
},
790 {C_OPCODE_SGT
, INST_SETGT
},
791 {C_OPCODE_SIN
, INST_SIN
},
792 {C_OPCODE_SLE
, INST_NOP
},
793 {C_OPCODE_SNE
, INST_NOP
},
794 {C_OPCODE_STR
, INST_NOP
},
795 {C_OPCODE_TEX
, INST_NOP
},
796 {C_OPCODE_TXD
, INST_NOP
},
797 {C_OPCODE_TXP
, INST_NOP
},
798 {C_OPCODE_UP2H
, INST_NOP
},
799 {C_OPCODE_UP2US
, INST_NOP
},
800 {C_OPCODE_UP4B
, INST_NOP
},
801 {C_OPCODE_UP4UB
, INST_NOP
},
802 {C_OPCODE_X2D
, INST_NOP
},
803 {C_OPCODE_ARA
, INST_NOP
},
804 {C_OPCODE_ARR
, INST_NOP
},
805 {C_OPCODE_BRA
, INST_NOP
},
806 {C_OPCODE_CAL
, INST_NOP
},
807 {C_OPCODE_RET
, INST_NOP
},
808 {C_OPCODE_SSG
, INST_NOP
},
809 {C_OPCODE_CMP
, INST_NOP
},
810 {C_OPCODE_SCS
, INST_NOP
},
811 {C_OPCODE_TXB
, INST_NOP
},
812 {C_OPCODE_NRM
, INST_NOP
},
813 {C_OPCODE_DIV
, INST_NOP
},
814 {C_OPCODE_DP2
, INST_NOP
},
815 {C_OPCODE_TXL
, INST_NOP
},
816 {C_OPCODE_BRK
, INST_NOP
},
817 {C_OPCODE_IF
, INST_NOP
},
818 {C_OPCODE_BGNFOR
, INST_NOP
},
819 {C_OPCODE_REP
, INST_NOP
},
820 {C_OPCODE_ELSE
, INST_NOP
},
821 {C_OPCODE_ENDIF
, INST_NOP
},
822 {C_OPCODE_ENDFOR
, INST_NOP
},
823 {C_OPCODE_ENDREP
, INST_NOP
},
824 {C_OPCODE_PUSHA
, INST_NOP
},
825 {C_OPCODE_POPA
, INST_NOP
},
826 {C_OPCODE_CEIL
, INST_NOP
},
827 {C_OPCODE_I2F
, INST_NOP
},
828 {C_OPCODE_NOT
, INST_NOP
},
829 {C_OPCODE_TRUNC
, INST_NOP
},
830 {C_OPCODE_SHL
, INST_NOP
},
832 {C_OPCODE_AND
, INST_NOP
},
833 {C_OPCODE_OR
, INST_NOP
},
834 {C_OPCODE_MOD
, INST_NOP
},
835 {C_OPCODE_XOR
, INST_NOP
},
836 {C_OPCODE_SAD
, INST_NOP
},
837 {C_OPCODE_TXF
, INST_NOP
},
838 {C_OPCODE_TXQ
, INST_NOP
},
839 {C_OPCODE_CONT
, INST_NOP
},
840 {C_OPCODE_EMIT
, INST_NOP
},
841 {C_OPCODE_ENDPRIM
, INST_NOP
},
842 {C_OPCODE_BGNLOOP
, INST_NOP
},
843 {C_OPCODE_BGNSUB
, INST_NOP
},
844 {C_OPCODE_ENDLOOP
, INST_NOP
},
845 {C_OPCODE_ENDSUB
, INST_NOP
},
855 {C_OPCODE_NRM4
, INST_NOP
},
856 {C_OPCODE_CALLNZ
, INST_NOP
},
857 {C_OPCODE_IFC
, INST_NOP
},
858 {C_OPCODE_BREAKC
, INST_NOP
},
859 {C_OPCODE_KIL
, INST_NOP
},
860 {C_OPCODE_END
, INST_NOP
},
862 {C_OPCODE_F2I
, INST_NOP
},
863 {C_OPCODE_IDIV
, INST_NOP
},
864 {C_OPCODE_IMAX
, INST_NOP
},
865 {C_OPCODE_IMIN
, INST_NOP
},
866 {C_OPCODE_INEG
, INST_NOP
},
867 {C_OPCODE_ISGE
, INST_NOP
},
868 {C_OPCODE_ISHR
, INST_NOP
},
869 {C_OPCODE_ISLT
, INST_NOP
},
870 {C_OPCODE_F2U
, INST_NOP
},
871 {C_OPCODE_U2F
, INST_NOP
},
872 {C_OPCODE_UADD
, INST_NOP
},
873 {C_OPCODE_UDIV
, INST_NOP
},
874 {C_OPCODE_UMAD
, INST_NOP
},
875 {C_OPCODE_UMAX
, INST_NOP
},
876 {C_OPCODE_UMIN
, INST_NOP
},
877 {C_OPCODE_UMOD
, INST_NOP
},
878 {C_OPCODE_UMUL
, INST_NOP
},
879 {C_OPCODE_USEQ
, INST_NOP
},
880 {C_OPCODE_USGE
, INST_NOP
},
881 {C_OPCODE_USHR
, INST_NOP
},
882 {C_OPCODE_USLT
, INST_NOP
},
883 {C_OPCODE_USNE
, INST_NOP
},
884 {C_OPCODE_SWITCH
, INST_NOP
},
885 {C_OPCODE_CASE
, INST_NOP
},
886 {C_OPCODE_DEFAULT
, INST_NOP
},
887 {C_OPCODE_ENDSWITCH
, INST_NOP
},
888 {C_OPCODE_VFETCH
, INST_NOP
},
889 {C_OPCODE_ENTRY
, INST_NOP
},
890 {C_OPCODE_ARL
, INST_NOP
},