0b45d4f6e019d1ba1f9d973a91ef5a1570b5c22a
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
107 struct r600_pipe_shader *pipeshader,
108 struct r600_shader_key key);
109
110 int r600_pipe_shader_create(struct pipe_context *ctx,
111 struct r600_pipe_shader *shader,
112 struct r600_shader_key key)
113 {
114 static int dump_shaders = -1;
115 struct r600_context *rctx = (struct r600_context *)ctx;
116 struct r600_pipe_shader_selector *sel = shader->selector;
117 int r;
118
119 /* Would like some magic "get_bool_option_once" routine.
120 */
121 if (dump_shaders == -1)
122 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
123
124 if (dump_shaders) {
125 fprintf(stderr, "--------------------------------------------------------------\n");
126 tgsi_dump(sel->tokens, 0);
127
128 if (sel->so.num_outputs) {
129 unsigned i;
130 fprintf(stderr, "STREAMOUT\n");
131 for (i = 0; i < sel->so.num_outputs; i++) {
132 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
133 sel->so.output[i].start_component;
134 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
135 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
136 mask & 1 ? "x" : "_",
137 (mask >> 1) & 1 ? "y" : "_",
138 (mask >> 2) & 1 ? "z" : "_",
139 (mask >> 3) & 1 ? "w" : "_");
140 }
141 }
142 }
143 r = r600_shader_from_tgsi(rctx->screen, shader, key);
144 if (r) {
145 R600_ERR("translation from TGSI failed !\n");
146 return r;
147 }
148 r = r600_bytecode_build(&shader->shader.bc);
149 if (r) {
150 R600_ERR("building bytecode failed !\n");
151 return r;
152 }
153 if (dump_shaders) {
154 r600_bytecode_dump(&shader->shader.bc);
155 fprintf(stderr, "______________________________________________________________\n");
156 }
157 return r600_pipe_shader(ctx, shader);
158 }
159
160 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
161 {
162 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
163 r600_bytecode_clear(&shader->shader.bc);
164 }
165
166 /*
167 * tgsi -> r600 shader
168 */
169 struct r600_shader_tgsi_instruction;
170
171 struct r600_shader_src {
172 unsigned sel;
173 unsigned swizzle[4];
174 unsigned neg;
175 unsigned abs;
176 unsigned rel;
177 uint32_t value[4];
178 };
179
180 struct r600_shader_ctx {
181 struct tgsi_shader_info info;
182 struct tgsi_parse_context parse;
183 const struct tgsi_token *tokens;
184 unsigned type;
185 unsigned file_offset[TGSI_FILE_COUNT];
186 unsigned temp_reg;
187 struct r600_shader_tgsi_instruction *inst_info;
188 struct r600_bytecode *bc;
189 struct r600_shader *shader;
190 struct r600_shader_src src[4];
191 uint32_t *literals;
192 uint32_t nliterals;
193 uint32_t max_driver_temp_used;
194 /* needed for evergreen interpolation */
195 boolean input_centroid;
196 boolean input_linear;
197 boolean input_perspective;
198 int num_interp_gpr;
199 int face_gpr;
200 int colors_used;
201 boolean clip_vertex_write;
202 unsigned cv_output;
203 int fragcoord_input;
204 int native_integers;
205 };
206
207 struct r600_shader_tgsi_instruction {
208 unsigned tgsi_opcode;
209 unsigned is_op3;
210 unsigned r600_opcode;
211 int (*process)(struct r600_shader_ctx *ctx);
212 };
213
214 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
215 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
216 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
217 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
218 static int tgsi_else(struct r600_shader_ctx *ctx);
219 static int tgsi_endif(struct r600_shader_ctx *ctx);
220 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
221 static int tgsi_endloop(struct r600_shader_ctx *ctx);
222 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
223
224 /*
225 * bytestream -> r600 shader
226 *
227 * These functions are used to transform the output of the LLVM backend into
228 * struct r600_bytecode.
229 */
230
231 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
232 unsigned char * bytes, unsigned num_bytes);
233
234 #ifdef HAVE_OPENCL
235 int r600_compute_shader_create(struct pipe_context * ctx,
236 LLVMModuleRef mod, struct r600_bytecode * bytecode)
237 {
238 struct r600_context *r600_ctx = (struct r600_context *)ctx;
239 unsigned char * bytes;
240 unsigned byte_count;
241 struct r600_shader_ctx shader_ctx;
242 unsigned dump = 0;
243
244 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
245 dump = 1;
246 }
247
248 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
249 shader_ctx.bc = bytecode;
250 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
251 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
252 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
253 if (shader_ctx.bc->chip_class == CAYMAN) {
254 cm_bytecode_add_cf_end(shader_ctx.bc);
255 }
256 r600_bytecode_build(shader_ctx.bc);
257 if (dump) {
258 r600_bytecode_dump(shader_ctx.bc);
259 }
260 free(bytes);
261 return 1;
262 }
263
264 #endif /* HAVE_OPENCL */
265
266 static uint32_t i32_from_byte_stream(unsigned char * bytes,
267 unsigned * bytes_read)
268 {
269 unsigned i;
270 uint32_t out = 0;
271 for (i = 0; i < 4; i++) {
272 out |= bytes[(*bytes_read)++] << (8 * i);
273 }
274 return out;
275 }
276
277 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
278 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
279 {
280 unsigned i;
281 unsigned sel0, sel1;
282 sel0 = bytes[bytes_read++];
283 sel1 = bytes[bytes_read++];
284 alu->src[src_idx].sel = sel0 | (sel1 << 8);
285 alu->src[src_idx].chan = bytes[bytes_read++];
286 alu->src[src_idx].neg = bytes[bytes_read++];
287 alu->src[src_idx].abs = bytes[bytes_read++];
288 alu->src[src_idx].rel = bytes[bytes_read++];
289 alu->src[src_idx].kc_bank = bytes[bytes_read++];
290 for (i = 0; i < 4; i++) {
291 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
292 }
293 return bytes_read;
294 }
295
296 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
297 unsigned char * bytes, unsigned bytes_read)
298 {
299 unsigned src_idx;
300 struct r600_bytecode_alu alu;
301 unsigned src_const_reg[3];
302 uint32_t word0, word1;
303
304 memset(&alu, 0, sizeof(alu));
305 for(src_idx = 0; src_idx < 3; src_idx++) {
306 unsigned i;
307 src_const_reg[src_idx] = bytes[bytes_read++];
308 for (i = 0; i < 4; i++) {
309 alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
310 }
311 }
312
313 word0 = i32_from_byte_stream(bytes, &bytes_read);
314 word1 = i32_from_byte_stream(bytes, &bytes_read);
315
316 switch(ctx->bc->chip_class) {
317 case R600:
318 r600_bytecode_alu_read(&alu, word0, word1);
319 break;
320 case R700:
321 case EVERGREEN:
322 case CAYMAN:
323 r700_bytecode_alu_read(&alu, word0, word1);
324 break;
325 }
326
327 for(src_idx = 0; src_idx < 3; src_idx++) {
328 if (src_const_reg[src_idx])
329 alu.src[src_idx].sel += 512;
330 }
331
332 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
333 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
334 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
335 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
336 alu.update_pred = 1;
337 alu.dst.write = 0;
338 alu.src[1].sel = V_SQ_ALU_SRC_0;
339 alu.src[1].chan = 0;
340 alu.last = 1;
341 }
342
343 if (alu.execute_mask) {
344 alu.pred_sel = 0;
345 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
346 } else {
347 r600_bytecode_add_alu(ctx->bc, &alu);
348 }
349
350 /* XXX: Handle other KILL instructions */
351 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
352 ctx->shader->uses_kill = 1;
353 /* XXX: This should be enforced in the LLVM backend. */
354 ctx->bc->force_add_cf = 1;
355 }
356 return bytes_read;
357 }
358
359 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
360 unsigned pred_inst)
361 {
362 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
363 fc_pushlevel(ctx, FC_IF);
364 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
365 }
366
367 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
368 struct r600_bytecode_alu *alu, unsigned compare_opcode)
369 {
370 unsigned opcode = TGSI_OPCODE_BRK;
371 if (ctx->bc->chip_class == CAYMAN)
372 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
373 else if (ctx->bc->chip_class >= EVERGREEN)
374 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
375 else
376 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
377 llvm_if(ctx, alu, compare_opcode);
378 tgsi_loop_brk_cont(ctx);
379 tgsi_endif(ctx);
380 }
381
382 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
383 unsigned char * bytes, unsigned bytes_read)
384 {
385 struct r600_bytecode_alu alu;
386 unsigned inst;
387 memset(&alu, 0, sizeof(alu));
388 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
389 inst = bytes[bytes_read++];
390 switch (inst) {
391 case 0: /* FC_IF */
392 llvm_if(ctx, &alu,
393 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
394 break;
395 case 1: /* FC_IF_INT */
396 llvm_if(ctx, &alu,
397 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
398 break;
399 case 2: /* FC_ELSE */
400 tgsi_else(ctx);
401 break;
402 case 3: /* FC_ENDIF */
403 tgsi_endif(ctx);
404 break;
405 case 4: /* FC_BGNLOOP */
406 tgsi_bgnloop(ctx);
407 break;
408 case 5: /* FC_ENDLOOP */
409 tgsi_endloop(ctx);
410 break;
411 case 6: /* FC_BREAK */
412 r600_break_from_byte_stream(ctx, &alu,
413 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
414 break;
415 case 7: /* FC_BREAK_NZ_INT */
416 r600_break_from_byte_stream(ctx, &alu,
417 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
418 break;
419 case 8: /* FC_CONTINUE */
420 {
421 unsigned opcode = TGSI_OPCODE_CONT;
422 if (ctx->bc->chip_class == CAYMAN) {
423 ctx->inst_info =
424 &cm_shader_tgsi_instruction[opcode];
425 } else if (ctx->bc->chip_class >= EVERGREEN) {
426 ctx->inst_info =
427 &eg_shader_tgsi_instruction[opcode];
428 } else {
429 ctx->inst_info =
430 &r600_shader_tgsi_instruction[opcode];
431 }
432 tgsi_loop_brk_cont(ctx);
433 }
434 break;
435 case 9: /* FC_BREAK_Z_INT */
436 r600_break_from_byte_stream(ctx, &alu,
437 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
438 break;
439 case 10: /* FC_BREAK_NZ */
440 r600_break_from_byte_stream(ctx, &alu,
441 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
442 break;
443 }
444
445 return bytes_read;
446 }
447
448 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
449 unsigned char * bytes, unsigned bytes_read)
450 {
451 struct r600_bytecode_tex tex;
452
453 tex.inst = bytes[bytes_read++];
454 tex.resource_id = bytes[bytes_read++];
455 tex.src_gpr = bytes[bytes_read++];
456 tex.src_rel = bytes[bytes_read++];
457 tex.dst_gpr = bytes[bytes_read++];
458 tex.dst_rel = bytes[bytes_read++];
459 tex.dst_sel_x = bytes[bytes_read++];
460 tex.dst_sel_y = bytes[bytes_read++];
461 tex.dst_sel_z = bytes[bytes_read++];
462 tex.dst_sel_w = bytes[bytes_read++];
463 tex.lod_bias = bytes[bytes_read++];
464 tex.coord_type_x = bytes[bytes_read++];
465 tex.coord_type_y = bytes[bytes_read++];
466 tex.coord_type_z = bytes[bytes_read++];
467 tex.coord_type_w = bytes[bytes_read++];
468 tex.offset_x = bytes[bytes_read++];
469 tex.offset_y = bytes[bytes_read++];
470 tex.offset_z = bytes[bytes_read++];
471 tex.sampler_id = bytes[bytes_read++];
472 tex.src_sel_x = bytes[bytes_read++];
473 tex.src_sel_y = bytes[bytes_read++];
474 tex.src_sel_z = bytes[bytes_read++];
475 tex.src_sel_w = bytes[bytes_read++];
476
477 r600_bytecode_add_tex(ctx->bc, &tex);
478
479 return bytes_read;
480 }
481
482 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
483 unsigned char * bytes, unsigned bytes_read)
484 {
485 struct r600_bytecode_vtx vtx;
486
487 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
488 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
489 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
490
491 memset(&vtx, 0, sizeof(vtx));
492
493 /* WORD0 */
494 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
495 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
496 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
497 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
498 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
499 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
500
501 /* WORD1 */
502 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
503 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
504 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
505 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
506 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
507 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
508 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
509 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
510 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
511 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
512
513 /* WORD 2*/
514 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
515 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
516
517 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
518 fprintf(stderr, "Error adding vtx\n");
519 }
520 /* Use the Texture Cache */
521 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
522 return bytes_read;
523 }
524
525 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
526 unsigned char * bytes, unsigned num_bytes)
527 {
528 unsigned bytes_read = 0;
529 unsigned i, byte;
530 while (bytes_read < num_bytes) {
531 char inst_type = bytes[bytes_read++];
532 switch (inst_type) {
533 case 0:
534 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
535 bytes_read);
536 break;
537 case 1:
538 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
539 bytes_read);
540 break;
541 case 2:
542 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
543 bytes_read);
544 break;
545 case 3:
546 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
547 for (i = 0; i < 2; i++) {
548 for (byte = 0 ; byte < 4; byte++) {
549 ctx->bc->cf_last->isa[i] |=
550 (bytes[bytes_read++] << (byte * 8));
551 }
552 }
553 break;
554
555 case 4:
556 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
557 bytes_read);
558 break;
559 default:
560 /* XXX: Error here */
561 break;
562 }
563 }
564 }
565
566 /* End bytestream -> r600 shader functions*/
567
568 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
569 {
570 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
571 int j;
572
573 if (i->Instruction.NumDstRegs > 1) {
574 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
575 return -EINVAL;
576 }
577 if (i->Instruction.Predicate) {
578 R600_ERR("predicate unsupported\n");
579 return -EINVAL;
580 }
581 #if 0
582 if (i->Instruction.Label) {
583 R600_ERR("label unsupported\n");
584 return -EINVAL;
585 }
586 #endif
587 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
588 if (i->Src[j].Register.Dimension) {
589 R600_ERR("unsupported src %d (dimension %d)\n", j,
590 i->Src[j].Register.Dimension);
591 return -EINVAL;
592 }
593 }
594 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
595 if (i->Dst[j].Register.Dimension) {
596 R600_ERR("unsupported dst (dimension)\n");
597 return -EINVAL;
598 }
599 }
600 return 0;
601 }
602
603 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
604 {
605 int i, r;
606 struct r600_bytecode_alu alu;
607 int gpr = 0, base_chan = 0;
608 int ij_index = 0;
609
610 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
611 ij_index = 0;
612 if (ctx->shader->input[input].centroid)
613 ij_index++;
614 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
615 ij_index = 0;
616 /* if we have perspective add one */
617 if (ctx->input_perspective) {
618 ij_index++;
619 /* if we have perspective centroid */
620 if (ctx->input_centroid)
621 ij_index++;
622 }
623 if (ctx->shader->input[input].centroid)
624 ij_index++;
625 }
626
627 /* work out gpr and base_chan from index */
628 gpr = ij_index / 2;
629 base_chan = (2 * (ij_index % 2)) + 1;
630
631 for (i = 0; i < 8; i++) {
632 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
633
634 if (i < 4)
635 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
636 else
637 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
638
639 if ((i > 1) && (i < 6)) {
640 alu.dst.sel = ctx->shader->input[input].gpr;
641 alu.dst.write = 1;
642 }
643
644 alu.dst.chan = i % 4;
645
646 alu.src[0].sel = gpr;
647 alu.src[0].chan = (base_chan - (i % 2));
648
649 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
650
651 alu.bank_swizzle_force = SQ_ALU_VEC_210;
652 if ((i % 4) == 3)
653 alu.last = 1;
654 r = r600_bytecode_add_alu(ctx->bc, &alu);
655 if (r)
656 return r;
657 }
658 return 0;
659 }
660
661 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
662 {
663 int i, r;
664 struct r600_bytecode_alu alu;
665
666 for (i = 0; i < 4; i++) {
667 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
668
669 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
670
671 alu.dst.sel = ctx->shader->input[input].gpr;
672 alu.dst.write = 1;
673
674 alu.dst.chan = i;
675
676 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
677 alu.src[0].chan = i;
678
679 if (i == 3)
680 alu.last = 1;
681 r = r600_bytecode_add_alu(ctx->bc, &alu);
682 if (r)
683 return r;
684 }
685 return 0;
686 }
687
688 /*
689 * Special export handling in shaders
690 *
691 * shader export ARRAY_BASE for EXPORT_POS:
692 * 60 is position
693 * 61 is misc vector
694 * 62, 63 are clip distance vectors
695 *
696 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
697 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
698 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
699 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
700 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
701 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
702 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
703 * exclusive from render target index)
704 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
705 *
706 *
707 * shader export ARRAY_BASE for EXPORT_PIXEL:
708 * 0-7 CB targets
709 * 61 computed Z vector
710 *
711 * The use of the values exported in the computed Z vector are controlled
712 * by DB_SHADER_CONTROL:
713 * Z_EXPORT_ENABLE - Z as a float in RED
714 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
715 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
716 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
717 * DB_SOURCE_FORMAT - export control restrictions
718 *
719 */
720
721
722 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
723 static int r600_spi_sid(struct r600_shader_io * io)
724 {
725 int index, name = io->name;
726
727 /* These params are handled differently, they don't need
728 * semantic indices, so we'll use 0 for them.
729 */
730 if (name == TGSI_SEMANTIC_POSITION ||
731 name == TGSI_SEMANTIC_PSIZE ||
732 name == TGSI_SEMANTIC_FACE)
733 index = 0;
734 else {
735 if (name == TGSI_SEMANTIC_GENERIC) {
736 /* For generic params simply use sid from tgsi */
737 index = io->sid;
738 } else {
739 /* For non-generic params - pack name and sid into 8 bits */
740 index = 0x80 | (name<<3) | (io->sid);
741 }
742
743 /* Make sure that all really used indices have nonzero value, so
744 * we can just compare it to 0 later instead of comparing the name
745 * with different values to detect special cases. */
746 index++;
747 }
748
749 return index;
750 };
751
752 /* turn input into interpolate on EG */
753 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
754 {
755 int r = 0;
756
757 if (ctx->shader->input[index].spi_sid) {
758 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
759 if (ctx->shader->input[index].interpolate > 0) {
760 r = evergreen_interp_alu(ctx, index);
761 } else {
762 r = evergreen_interp_flat(ctx, index);
763 }
764 }
765 return r;
766 }
767
768 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
769 {
770 struct r600_bytecode_alu alu;
771 int i, r;
772 int gpr_front = ctx->shader->input[front].gpr;
773 int gpr_back = ctx->shader->input[back].gpr;
774
775 for (i = 0; i < 4; i++) {
776 memset(&alu, 0, sizeof(alu));
777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
778 alu.is_op3 = 1;
779 alu.dst.write = 1;
780 alu.dst.sel = gpr_front;
781 alu.src[0].sel = ctx->face_gpr;
782 alu.src[1].sel = gpr_front;
783 alu.src[2].sel = gpr_back;
784
785 alu.dst.chan = i;
786 alu.src[1].chan = i;
787 alu.src[2].chan = i;
788 alu.last = (i==3);
789
790 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
791 return r;
792 }
793
794 return 0;
795 }
796
797 static int tgsi_declaration(struct r600_shader_ctx *ctx)
798 {
799 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
800 unsigned i;
801 int r;
802
803 switch (d->Declaration.File) {
804 case TGSI_FILE_INPUT:
805 i = ctx->shader->ninput++;
806 ctx->shader->input[i].name = d->Semantic.Name;
807 ctx->shader->input[i].sid = d->Semantic.Index;
808 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
809 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
810 ctx->shader->input[i].centroid = d->Interp.Centroid;
811 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
812 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
813 switch (ctx->shader->input[i].name) {
814 case TGSI_SEMANTIC_FACE:
815 ctx->face_gpr = ctx->shader->input[i].gpr;
816 break;
817 case TGSI_SEMANTIC_COLOR:
818 ctx->colors_used++;
819 break;
820 case TGSI_SEMANTIC_POSITION:
821 ctx->fragcoord_input = i;
822 break;
823 }
824 if (ctx->bc->chip_class >= EVERGREEN) {
825 if ((r = evergreen_interp_input(ctx, i)))
826 return r;
827 }
828 }
829 break;
830 case TGSI_FILE_OUTPUT:
831 i = ctx->shader->noutput++;
832 ctx->shader->output[i].name = d->Semantic.Name;
833 ctx->shader->output[i].sid = d->Semantic.Index;
834 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
835 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
836 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
837 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
838 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
839 switch (d->Semantic.Name) {
840 case TGSI_SEMANTIC_CLIPDIST:
841 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
842 break;
843 case TGSI_SEMANTIC_PSIZE:
844 ctx->shader->vs_out_misc_write = 1;
845 ctx->shader->vs_out_point_size = 1;
846 break;
847 case TGSI_SEMANTIC_CLIPVERTEX:
848 ctx->clip_vertex_write = TRUE;
849 ctx->cv_output = i;
850 break;
851 }
852 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
853 switch (d->Semantic.Name) {
854 case TGSI_SEMANTIC_COLOR:
855 ctx->shader->nr_ps_max_color_exports++;
856 break;
857 }
858 }
859 break;
860 case TGSI_FILE_CONSTANT:
861 case TGSI_FILE_TEMPORARY:
862 case TGSI_FILE_SAMPLER:
863 case TGSI_FILE_ADDRESS:
864 break;
865
866 case TGSI_FILE_SYSTEM_VALUE:
867 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
868 if (!ctx->native_integers) {
869 struct r600_bytecode_alu alu;
870 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
871
872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
873 alu.src[0].sel = 0;
874 alu.src[0].chan = 3;
875
876 alu.dst.sel = 0;
877 alu.dst.chan = 3;
878 alu.dst.write = 1;
879 alu.last = 1;
880
881 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
882 return r;
883 }
884 break;
885 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
886 break;
887 default:
888 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
889 return -EINVAL;
890 }
891 return 0;
892 }
893
894 static int r600_get_temp(struct r600_shader_ctx *ctx)
895 {
896 return ctx->temp_reg + ctx->max_driver_temp_used++;
897 }
898
899 /*
900 * for evergreen we need to scan the shader to find the number of GPRs we need to
901 * reserve for interpolation.
902 *
903 * we need to know if we are going to emit
904 * any centroid inputs
905 * if perspective and linear are required
906 */
907 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
908 {
909 int i;
910 int num_baryc;
911
912 ctx->input_linear = FALSE;
913 ctx->input_perspective = FALSE;
914 ctx->input_centroid = FALSE;
915 ctx->num_interp_gpr = 1;
916
917 /* any centroid inputs */
918 for (i = 0; i < ctx->info.num_inputs; i++) {
919 /* skip position/face */
920 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
921 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
922 continue;
923 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
924 ctx->input_linear = TRUE;
925 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
926 ctx->input_perspective = TRUE;
927 if (ctx->info.input_centroid[i])
928 ctx->input_centroid = TRUE;
929 }
930
931 num_baryc = 0;
932 /* ignoring sample for now */
933 if (ctx->input_perspective)
934 num_baryc++;
935 if (ctx->input_linear)
936 num_baryc++;
937 if (ctx->input_centroid)
938 num_baryc *= 2;
939
940 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
941
942 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
943 return ctx->num_interp_gpr;
944 }
945
946 static void tgsi_src(struct r600_shader_ctx *ctx,
947 const struct tgsi_full_src_register *tgsi_src,
948 struct r600_shader_src *r600_src)
949 {
950 memset(r600_src, 0, sizeof(*r600_src));
951 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
952 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
953 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
954 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
955 r600_src->neg = tgsi_src->Register.Negate;
956 r600_src->abs = tgsi_src->Register.Absolute;
957
958 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
959 int index;
960 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
961 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
962 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
963
964 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
965 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
966 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
967 return;
968 }
969 index = tgsi_src->Register.Index;
970 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
971 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
972 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
973 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
974 r600_src->swizzle[0] = 3;
975 r600_src->swizzle[1] = 3;
976 r600_src->swizzle[2] = 3;
977 r600_src->swizzle[3] = 3;
978 r600_src->sel = 0;
979 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
980 r600_src->swizzle[0] = 0;
981 r600_src->swizzle[1] = 0;
982 r600_src->swizzle[2] = 0;
983 r600_src->swizzle[3] = 0;
984 r600_src->sel = 0;
985 }
986 } else {
987 if (tgsi_src->Register.Indirect)
988 r600_src->rel = V_SQ_REL_RELATIVE;
989 r600_src->sel = tgsi_src->Register.Index;
990 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
991 }
992 }
993
994 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
995 {
996 struct r600_bytecode_vtx vtx;
997 unsigned int ar_reg;
998 int r;
999
1000 if (offset) {
1001 struct r600_bytecode_alu alu;
1002
1003 memset(&alu, 0, sizeof(alu));
1004
1005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
1006 alu.src[0].sel = ctx->bc->ar_reg;
1007
1008 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1009 alu.src[1].value = offset;
1010
1011 alu.dst.sel = dst_reg;
1012 alu.dst.write = 1;
1013 alu.last = 1;
1014
1015 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1016 return r;
1017
1018 ar_reg = dst_reg;
1019 } else {
1020 ar_reg = ctx->bc->ar_reg;
1021 }
1022
1023 memset(&vtx, 0, sizeof(vtx));
1024 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1025 vtx.src_gpr = ar_reg;
1026 vtx.mega_fetch_count = 16;
1027 vtx.dst_gpr = dst_reg;
1028 vtx.dst_sel_x = 0; /* SEL_X */
1029 vtx.dst_sel_y = 1; /* SEL_Y */
1030 vtx.dst_sel_z = 2; /* SEL_Z */
1031 vtx.dst_sel_w = 3; /* SEL_W */
1032 vtx.data_format = FMT_32_32_32_32_FLOAT;
1033 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1034 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1035 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1036 vtx.endian = r600_endian_swap(32);
1037
1038 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1039 return r;
1040
1041 return 0;
1042 }
1043
1044 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1045 {
1046 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1047 struct r600_bytecode_alu alu;
1048 int i, j, k, nconst, r;
1049
1050 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1051 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1052 nconst++;
1053 }
1054 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1055 }
1056 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1057 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1058 continue;
1059 }
1060
1061 if (ctx->src[i].rel) {
1062 int treg = r600_get_temp(ctx);
1063 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1064 return r;
1065
1066 ctx->src[i].sel = treg;
1067 ctx->src[i].rel = 0;
1068 j--;
1069 } else if (j > 0) {
1070 int treg = r600_get_temp(ctx);
1071 for (k = 0; k < 4; k++) {
1072 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1073 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1074 alu.src[0].sel = ctx->src[i].sel;
1075 alu.src[0].chan = k;
1076 alu.src[0].rel = ctx->src[i].rel;
1077 alu.dst.sel = treg;
1078 alu.dst.chan = k;
1079 alu.dst.write = 1;
1080 if (k == 3)
1081 alu.last = 1;
1082 r = r600_bytecode_add_alu(ctx->bc, &alu);
1083 if (r)
1084 return r;
1085 }
1086 ctx->src[i].sel = treg;
1087 ctx->src[i].rel =0;
1088 j--;
1089 }
1090 }
1091 return 0;
1092 }
1093
1094 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1095 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1096 {
1097 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1098 struct r600_bytecode_alu alu;
1099 int i, j, k, nliteral, r;
1100
1101 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1102 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1103 nliteral++;
1104 }
1105 }
1106 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1107 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1108 int treg = r600_get_temp(ctx);
1109 for (k = 0; k < 4; k++) {
1110 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1112 alu.src[0].sel = ctx->src[i].sel;
1113 alu.src[0].chan = k;
1114 alu.src[0].value = ctx->src[i].value[k];
1115 alu.dst.sel = treg;
1116 alu.dst.chan = k;
1117 alu.dst.write = 1;
1118 if (k == 3)
1119 alu.last = 1;
1120 r = r600_bytecode_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123 }
1124 ctx->src[i].sel = treg;
1125 j--;
1126 }
1127 }
1128 return 0;
1129 }
1130
1131 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx, unsigned use_llvm)
1132 {
1133 int i, r, count = ctx->shader->ninput;
1134
1135 for (i = 0; i < count; i++) {
1136 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1137 unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg;
1138 if (ctx->bc->chip_class >= EVERGREEN) {
1139 if ((r = evergreen_interp_input(ctx, back_facing_reg)))
1140 return r;
1141 }
1142
1143 if (!use_llvm) {
1144 r = select_twoside_color(ctx, i, back_facing_reg);
1145 if (r)
1146 return r;
1147 }
1148 }
1149 }
1150 return 0;
1151 }
1152
1153 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
1154 struct r600_pipe_shader *pipeshader,
1155 struct r600_shader_key key)
1156 {
1157 struct r600_shader *shader = &pipeshader->shader;
1158 struct tgsi_token *tokens = pipeshader->selector->tokens;
1159 struct pipe_stream_output_info so = pipeshader->selector->so;
1160 struct tgsi_full_immediate *immediate;
1161 struct tgsi_full_property *property;
1162 struct r600_shader_ctx ctx;
1163 struct r600_bytecode_output output[32];
1164 unsigned output_done, noutput;
1165 unsigned opcode;
1166 int i, j, k, r = 0;
1167 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1168 /* Declarations used by llvm code */
1169 bool use_llvm = false;
1170 unsigned char * inst_bytes = NULL;
1171 unsigned inst_byte_count = 0;
1172
1173 #ifdef R600_USE_LLVM
1174 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1175 #endif
1176 ctx.bc = &shader->bc;
1177 ctx.shader = shader;
1178 ctx.native_integers = true;
1179
1180 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
1181 ctx.tokens = tokens;
1182 tgsi_scan_shader(tokens, &ctx.info);
1183 tgsi_parse_init(&ctx.parse, tokens);
1184 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1185 shader->processor_type = ctx.type;
1186 ctx.bc->type = shader->processor_type;
1187
1188 ctx.face_gpr = -1;
1189 ctx.fragcoord_input = -1;
1190 ctx.colors_used = 0;
1191 ctx.clip_vertex_write = 0;
1192
1193 shader->nr_ps_color_exports = 0;
1194 shader->nr_ps_max_color_exports = 0;
1195
1196 shader->two_side = key.color_two_side;
1197
1198 /* register allocations */
1199 /* Values [0,127] correspond to GPR[0..127].
1200 * Values [128,159] correspond to constant buffer bank 0
1201 * Values [160,191] correspond to constant buffer bank 1
1202 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1203 * Values [256,287] correspond to constant buffer bank 2 (EG)
1204 * Values [288,319] correspond to constant buffer bank 3 (EG)
1205 * Other special values are shown in the list below.
1206 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1207 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1208 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1209 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1210 * 248 SQ_ALU_SRC_0: special constant 0.0.
1211 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1212 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1213 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1214 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1215 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1216 * 254 SQ_ALU_SRC_PV: previous vector result.
1217 * 255 SQ_ALU_SRC_PS: previous scalar result.
1218 */
1219 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1220 ctx.file_offset[i] = 0;
1221 }
1222 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1223 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1224 if (ctx.bc->chip_class >= EVERGREEN) {
1225 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1226 } else {
1227 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1228 }
1229 }
1230 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1231 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1232 }
1233
1234 #ifdef R600_USE_LLVM
1235 if (use_llvm && ctx.info.indirect_files) {
1236 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1237 "indirect adressing. Falling back to TGSI "
1238 "backend.\n");
1239 use_llvm = 0;
1240 }
1241 #endif
1242
1243 if (use_llvm) {
1244 ctx.file_offset[TGSI_FILE_OUTPUT] =
1245 ctx.file_offset[TGSI_FILE_INPUT];
1246 } else {
1247 ctx.file_offset[TGSI_FILE_OUTPUT] =
1248 ctx.file_offset[TGSI_FILE_INPUT] +
1249 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1250 }
1251 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1252 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1253
1254 /* Outside the GPR range. This will be translated to one of the
1255 * kcache banks later. */
1256 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1257
1258 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1259 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1260 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1261 ctx.temp_reg = ctx.bc->ar_reg + 1;
1262
1263 ctx.nliterals = 0;
1264 ctx.literals = NULL;
1265 shader->fs_write_all = FALSE;
1266 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1267 tgsi_parse_token(&ctx.parse);
1268 switch (ctx.parse.FullToken.Token.Type) {
1269 case TGSI_TOKEN_TYPE_IMMEDIATE:
1270 immediate = &ctx.parse.FullToken.FullImmediate;
1271 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1272 if(ctx.literals == NULL) {
1273 r = -ENOMEM;
1274 goto out_err;
1275 }
1276 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1277 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1278 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1279 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1280 ctx.nliterals++;
1281 break;
1282 case TGSI_TOKEN_TYPE_DECLARATION:
1283 r = tgsi_declaration(&ctx);
1284 if (r)
1285 goto out_err;
1286 break;
1287 case TGSI_TOKEN_TYPE_INSTRUCTION:
1288 break;
1289 case TGSI_TOKEN_TYPE_PROPERTY:
1290 property = &ctx.parse.FullToken.FullProperty;
1291 switch (property->Property.PropertyName) {
1292 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1293 if (property->u[0].Data == 1)
1294 shader->fs_write_all = TRUE;
1295 break;
1296 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1297 /* we don't need this one */
1298 break;
1299 }
1300 break;
1301 default:
1302 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1303 r = -EINVAL;
1304 goto out_err;
1305 }
1306 }
1307
1308 /* Process two side if needed */
1309 if (shader->two_side && ctx.colors_used) {
1310 int i, count = ctx.shader->ninput;
1311
1312 /* additional inputs will be allocated right after the existing inputs,
1313 * we won't need them after the color selection, so we don't need to
1314 * reserve these gprs for the rest of the shader code and to adjust
1315 * output offsets etc. */
1316 int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
1317 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1318
1319 if (ctx.face_gpr == -1) {
1320 i = ctx.shader->ninput++;
1321 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
1322 ctx.shader->input[i].spi_sid = 0;
1323 ctx.shader->input[i].gpr = gpr++;
1324 ctx.face_gpr = ctx.shader->input[i].gpr;
1325 }
1326
1327 for (i = 0; i < count; i++) {
1328 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1329 int ni = ctx.shader->ninput++;
1330 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
1331 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1332 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
1333 ctx.shader->input[ni].gpr = gpr++;
1334 ctx.shader->input[i].potential_back_facing_reg = ni;
1335 }
1336 }
1337 }
1338
1339 /* LLVM backend setup */
1340 #ifdef R600_USE_LLVM
1341 if (use_llvm) {
1342 struct radeon_llvm_context radeon_llvm_ctx;
1343 LLVMModuleRef mod;
1344 unsigned dump = 0;
1345 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1346 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1347 radeon_llvm_ctx.type = ctx.type;
1348 radeon_llvm_ctx.two_side = shader->two_side;
1349 radeon_llvm_ctx.face_input = ctx.face_gpr;
1350 radeon_llvm_ctx.r600_inputs = ctx.shader->input;
1351 radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
1352 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1353 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1354 dump = 1;
1355 }
1356 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1357 rscreen->family, dump)) {
1358 FREE(inst_bytes);
1359 radeon_llvm_dispose(&radeon_llvm_ctx);
1360 use_llvm = 0;
1361 fprintf(stderr, "R600 LLVM backend failed to compile "
1362 "shader. Falling back to TGSI\n");
1363 } else {
1364 ctx.file_offset[TGSI_FILE_OUTPUT] =
1365 ctx.file_offset[TGSI_FILE_INPUT];
1366 }
1367 radeon_llvm_dispose(&radeon_llvm_ctx);
1368 }
1369 #endif
1370 /* End of LLVM backend setup */
1371
1372 if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
1373 shader->nr_ps_max_color_exports = 8;
1374
1375 if (ctx.fragcoord_input >= 0) {
1376 if (ctx.bc->chip_class == CAYMAN) {
1377 for (j = 0 ; j < 4; j++) {
1378 struct r600_bytecode_alu alu;
1379 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1380 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1381 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1382 alu.src[0].chan = 3;
1383
1384 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1385 alu.dst.chan = j;
1386 alu.dst.write = (j == 3);
1387 alu.last = 1;
1388 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1389 return r;
1390 }
1391 } else {
1392 struct r600_bytecode_alu alu;
1393 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1394 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1395 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1396 alu.src[0].chan = 3;
1397
1398 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1399 alu.dst.chan = 3;
1400 alu.dst.write = 1;
1401 alu.last = 1;
1402 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1403 return r;
1404 }
1405 }
1406
1407 if (shader->two_side && ctx.colors_used) {
1408 if ((r = process_twoside_color_inputs(&ctx, use_llvm)))
1409 return r;
1410 }
1411
1412 tgsi_parse_init(&ctx.parse, tokens);
1413 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1414 tgsi_parse_token(&ctx.parse);
1415 switch (ctx.parse.FullToken.Token.Type) {
1416 case TGSI_TOKEN_TYPE_INSTRUCTION:
1417 if (use_llvm) {
1418 continue;
1419 }
1420 r = tgsi_is_supported(&ctx);
1421 if (r)
1422 goto out_err;
1423 ctx.max_driver_temp_used = 0;
1424 /* reserve first tmp for everyone */
1425 r600_get_temp(&ctx);
1426
1427 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1428 if ((r = tgsi_split_constant(&ctx)))
1429 goto out_err;
1430 if ((r = tgsi_split_literal_constant(&ctx)))
1431 goto out_err;
1432 if (ctx.bc->chip_class == CAYMAN)
1433 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1434 else if (ctx.bc->chip_class >= EVERGREEN)
1435 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1436 else
1437 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1438 r = ctx.inst_info->process(&ctx);
1439 if (r)
1440 goto out_err;
1441 break;
1442 default:
1443 break;
1444 }
1445 }
1446
1447 /* Get instructions if we are using the LLVM backend. */
1448 if (use_llvm) {
1449 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1450 FREE(inst_bytes);
1451 }
1452
1453 noutput = shader->noutput;
1454
1455 if (ctx.clip_vertex_write) {
1456 /* need to convert a clipvertex write into clipdistance writes and not export
1457 the clip vertex anymore */
1458
1459 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1460 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1461 shader->output[noutput].gpr = ctx.temp_reg;
1462 noutput++;
1463 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1464 shader->output[noutput].gpr = ctx.temp_reg+1;
1465 noutput++;
1466
1467 /* reset spi_sid for clipvertex output to avoid confusing spi */
1468 shader->output[ctx.cv_output].spi_sid = 0;
1469
1470 shader->clip_dist_write = 0xFF;
1471
1472 for (i = 0; i < 8; i++) {
1473 int oreg = i >> 2;
1474 int ochan = i & 3;
1475
1476 for (j = 0; j < 4; j++) {
1477 struct r600_bytecode_alu alu;
1478 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1479 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1480 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1481 alu.src[0].chan = j;
1482
1483 alu.src[1].sel = 512 + i;
1484 alu.src[1].kc_bank = 1;
1485 alu.src[1].chan = j;
1486
1487 alu.dst.sel = ctx.temp_reg + oreg;
1488 alu.dst.chan = j;
1489 alu.dst.write = (j == ochan);
1490 if (j == 3)
1491 alu.last = 1;
1492 r = r600_bytecode_add_alu(ctx.bc, &alu);
1493 if (r)
1494 return r;
1495 }
1496 }
1497 }
1498
1499 /* Add stream outputs. */
1500 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1501 for (i = 0; i < so.num_outputs; i++) {
1502 struct r600_bytecode_output output;
1503
1504 if (so.output[i].output_buffer >= 4) {
1505 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1506 so.output[i].output_buffer);
1507 r = -EINVAL;
1508 goto out_err;
1509 }
1510 if (so.output[i].dst_offset < so.output[i].start_component) {
1511 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1512 r = -EINVAL;
1513 goto out_err;
1514 }
1515
1516 memset(&output, 0, sizeof(struct r600_bytecode_output));
1517 output.gpr = shader->output[so.output[i].register_index].gpr;
1518 output.elem_size = 0;
1519 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1520 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1521 output.burst_count = 1;
1522 output.barrier = 1;
1523 /* array_size is an upper limit for the burst_count
1524 * with MEM_STREAM instructions */
1525 output.array_size = 0xFFF;
1526 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1527 if (ctx.bc->chip_class >= EVERGREEN) {
1528 switch (so.output[i].output_buffer) {
1529 case 0:
1530 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1531 break;
1532 case 1:
1533 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1534 break;
1535 case 2:
1536 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1537 break;
1538 case 3:
1539 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1540 break;
1541 }
1542 } else {
1543 switch (so.output[i].output_buffer) {
1544 case 0:
1545 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1546 break;
1547 case 1:
1548 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1549 break;
1550 case 2:
1551 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1552 break;
1553 case 3:
1554 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1555 break;
1556 }
1557 }
1558 r = r600_bytecode_add_output(ctx.bc, &output);
1559 if (r)
1560 goto out_err;
1561 }
1562 }
1563
1564 /* export output */
1565 for (i = 0, j = 0; i < noutput; i++, j++) {
1566 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1567 output[j].gpr = shader->output[i].gpr;
1568 output[j].elem_size = 3;
1569 output[j].swizzle_x = 0;
1570 output[j].swizzle_y = 1;
1571 output[j].swizzle_z = 2;
1572 output[j].swizzle_w = 3;
1573 output[j].burst_count = 1;
1574 output[j].barrier = 1;
1575 output[j].type = -1;
1576 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1577 switch (ctx.type) {
1578 case TGSI_PROCESSOR_VERTEX:
1579 switch (shader->output[i].name) {
1580 case TGSI_SEMANTIC_POSITION:
1581 output[j].array_base = next_pos_base++;
1582 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1583 break;
1584
1585 case TGSI_SEMANTIC_PSIZE:
1586 output[j].array_base = next_pos_base++;
1587 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1588 break;
1589 case TGSI_SEMANTIC_CLIPVERTEX:
1590 j--;
1591 break;
1592 case TGSI_SEMANTIC_CLIPDIST:
1593 output[j].array_base = next_pos_base++;
1594 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1595 /* spi_sid is 0 for clipdistance outputs that were generated
1596 * for clipvertex - we don't need to pass them to PS */
1597 if (shader->output[i].spi_sid) {
1598 j++;
1599 /* duplicate it as PARAM to pass to the pixel shader */
1600 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1601 output[j].array_base = next_param_base++;
1602 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1603 }
1604 break;
1605 case TGSI_SEMANTIC_FOG:
1606 output[j].swizzle_y = 4; /* 0 */
1607 output[j].swizzle_z = 4; /* 0 */
1608 output[j].swizzle_w = 5; /* 1 */
1609 break;
1610 }
1611 break;
1612 case TGSI_PROCESSOR_FRAGMENT:
1613 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1614 /* never export more colors than the number of CBs */
1615 if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
1616 /* skip export */
1617 j--;
1618 continue;
1619 }
1620 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1621 output[j].array_base = next_pixel_base++;
1622 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1623 shader->nr_ps_color_exports++;
1624 if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
1625 for (k = 1; k < key.nr_cbufs; k++) {
1626 j++;
1627 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1628 output[j].gpr = shader->output[i].gpr;
1629 output[j].elem_size = 3;
1630 output[j].swizzle_x = 0;
1631 output[j].swizzle_y = 1;
1632 output[j].swizzle_z = 2;
1633 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1634 output[j].burst_count = 1;
1635 output[j].barrier = 1;
1636 output[j].array_base = next_pixel_base++;
1637 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1638 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1639 shader->nr_ps_color_exports++;
1640 }
1641 }
1642 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1643 output[j].array_base = 61;
1644 output[j].swizzle_x = 2;
1645 output[j].swizzle_y = 7;
1646 output[j].swizzle_z = output[j].swizzle_w = 7;
1647 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1648 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1649 output[j].array_base = 61;
1650 output[j].swizzle_x = 7;
1651 output[j].swizzle_y = 1;
1652 output[j].swizzle_z = output[j].swizzle_w = 7;
1653 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1654 } else {
1655 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1656 r = -EINVAL;
1657 goto out_err;
1658 }
1659 break;
1660 default:
1661 R600_ERR("unsupported processor type %d\n", ctx.type);
1662 r = -EINVAL;
1663 goto out_err;
1664 }
1665
1666 if (output[j].type==-1) {
1667 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1668 output[j].array_base = next_param_base++;
1669 }
1670 }
1671
1672 /* add fake param output for vertex shader if no param is exported */
1673 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1674 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1675 output[j].gpr = 0;
1676 output[j].elem_size = 3;
1677 output[j].swizzle_x = 7;
1678 output[j].swizzle_y = 7;
1679 output[j].swizzle_z = 7;
1680 output[j].swizzle_w = 7;
1681 output[j].burst_count = 1;
1682 output[j].barrier = 1;
1683 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1684 output[j].array_base = 0;
1685 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1686 j++;
1687 }
1688
1689 /* add fake pixel export */
1690 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1691 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1692 output[j].gpr = 0;
1693 output[j].elem_size = 3;
1694 output[j].swizzle_x = 7;
1695 output[j].swizzle_y = 7;
1696 output[j].swizzle_z = 7;
1697 output[j].swizzle_w = 7;
1698 output[j].burst_count = 1;
1699 output[j].barrier = 1;
1700 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1701 output[j].array_base = 0;
1702 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1703 j++;
1704 }
1705
1706 noutput = j;
1707
1708 /* set export done on last export of each type */
1709 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1710 if (ctx.bc->chip_class < CAYMAN) {
1711 if (i == (noutput - 1)) {
1712 output[i].end_of_program = 1;
1713 }
1714 }
1715 if (!(output_done & (1 << output[i].type))) {
1716 output_done |= (1 << output[i].type);
1717 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1718 }
1719 }
1720 /* add output to bytecode */
1721 for (i = 0; i < noutput; i++) {
1722 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1723 if (r)
1724 goto out_err;
1725 }
1726 /* add program end */
1727 if (ctx.bc->chip_class == CAYMAN)
1728 cm_bytecode_add_cf_end(ctx.bc);
1729
1730 /* check GPR limit - we have 124 = 128 - 4
1731 * (4 are reserved as alu clause temporary registers) */
1732 if (ctx.bc->ngpr > 124) {
1733 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1734 r = -ENOMEM;
1735 goto out_err;
1736 }
1737
1738 free(ctx.literals);
1739 tgsi_parse_free(&ctx.parse);
1740 return 0;
1741 out_err:
1742 free(ctx.literals);
1743 tgsi_parse_free(&ctx.parse);
1744 return r;
1745 }
1746
1747 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1748 {
1749 R600_ERR("%s tgsi opcode unsupported\n",
1750 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1751 return -EINVAL;
1752 }
1753
1754 static int tgsi_end(struct r600_shader_ctx *ctx)
1755 {
1756 return 0;
1757 }
1758
1759 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1760 const struct r600_shader_src *shader_src,
1761 unsigned chan)
1762 {
1763 bc_src->sel = shader_src->sel;
1764 bc_src->chan = shader_src->swizzle[chan];
1765 bc_src->neg = shader_src->neg;
1766 bc_src->abs = shader_src->abs;
1767 bc_src->rel = shader_src->rel;
1768 bc_src->value = shader_src->value[bc_src->chan];
1769 }
1770
1771 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1772 {
1773 bc_src->abs = 1;
1774 bc_src->neg = 0;
1775 }
1776
1777 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1778 {
1779 bc_src->neg = !bc_src->neg;
1780 }
1781
1782 static void tgsi_dst(struct r600_shader_ctx *ctx,
1783 const struct tgsi_full_dst_register *tgsi_dst,
1784 unsigned swizzle,
1785 struct r600_bytecode_alu_dst *r600_dst)
1786 {
1787 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1788
1789 r600_dst->sel = tgsi_dst->Register.Index;
1790 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1791 r600_dst->chan = swizzle;
1792 r600_dst->write = 1;
1793 if (tgsi_dst->Register.Indirect)
1794 r600_dst->rel = V_SQ_REL_RELATIVE;
1795 if (inst->Instruction.Saturate) {
1796 r600_dst->clamp = 1;
1797 }
1798 }
1799
1800 static int tgsi_last_instruction(unsigned writemask)
1801 {
1802 int i, lasti = 0;
1803
1804 for (i = 0; i < 4; i++) {
1805 if (writemask & (1 << i)) {
1806 lasti = i;
1807 }
1808 }
1809 return lasti;
1810 }
1811
1812 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1813 {
1814 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1815 struct r600_bytecode_alu alu;
1816 int i, j, r;
1817 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1818
1819 for (i = 0; i < lasti + 1; i++) {
1820 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1821 continue;
1822
1823 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1824 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1825
1826 alu.inst = ctx->inst_info->r600_opcode;
1827 if (!swap) {
1828 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1829 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1830 }
1831 } else {
1832 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1833 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1834 }
1835 /* handle some special cases */
1836 switch (ctx->inst_info->tgsi_opcode) {
1837 case TGSI_OPCODE_SUB:
1838 r600_bytecode_src_toggle_neg(&alu.src[1]);
1839 break;
1840 case TGSI_OPCODE_ABS:
1841 r600_bytecode_src_set_abs(&alu.src[0]);
1842 break;
1843 default:
1844 break;
1845 }
1846 if (i == lasti || trans_only) {
1847 alu.last = 1;
1848 }
1849 r = r600_bytecode_add_alu(ctx->bc, &alu);
1850 if (r)
1851 return r;
1852 }
1853 return 0;
1854 }
1855
1856 static int tgsi_op2(struct r600_shader_ctx *ctx)
1857 {
1858 return tgsi_op2_s(ctx, 0, 0);
1859 }
1860
1861 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1862 {
1863 return tgsi_op2_s(ctx, 1, 0);
1864 }
1865
1866 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1867 {
1868 return tgsi_op2_s(ctx, 0, 1);
1869 }
1870
1871 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1872 {
1873 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1874 struct r600_bytecode_alu alu;
1875 int i, r;
1876 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1877
1878 for (i = 0; i < lasti + 1; i++) {
1879
1880 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1881 continue;
1882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1883 alu.inst = ctx->inst_info->r600_opcode;
1884
1885 alu.src[0].sel = V_SQ_ALU_SRC_0;
1886
1887 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1888
1889 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1890
1891 if (i == lasti) {
1892 alu.last = 1;
1893 }
1894 r = r600_bytecode_add_alu(ctx->bc, &alu);
1895 if (r)
1896 return r;
1897 }
1898 return 0;
1899
1900 }
1901
1902 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1903 {
1904 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1905 int i, j, r;
1906 struct r600_bytecode_alu alu;
1907 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1908
1909 for (i = 0 ; i < last_slot; i++) {
1910 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1911 alu.inst = ctx->inst_info->r600_opcode;
1912 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1913 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1914
1915 /* RSQ should take the absolute value of src */
1916 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
1917 r600_bytecode_src_set_abs(&alu.src[j]);
1918 }
1919 }
1920 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1921 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1922
1923 if (i == last_slot - 1)
1924 alu.last = 1;
1925 r = r600_bytecode_add_alu(ctx->bc, &alu);
1926 if (r)
1927 return r;
1928 }
1929 return 0;
1930 }
1931
1932 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1933 {
1934 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1935 int i, j, k, r;
1936 struct r600_bytecode_alu alu;
1937 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1938 for (k = 0; k < last_slot; k++) {
1939 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1940 continue;
1941
1942 for (i = 0 ; i < 4; i++) {
1943 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1944 alu.inst = ctx->inst_info->r600_opcode;
1945 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1946 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1947 }
1948 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1949 alu.dst.write = (i == k);
1950 if (i == 3)
1951 alu.last = 1;
1952 r = r600_bytecode_add_alu(ctx->bc, &alu);
1953 if (r)
1954 return r;
1955 }
1956 }
1957 return 0;
1958 }
1959
1960 /*
1961 * r600 - trunc to -PI..PI range
1962 * r700 - normalize by dividing by 2PI
1963 * see fdo bug 27901
1964 */
1965 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1966 {
1967 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1968 static float double_pi = 3.1415926535 * 2;
1969 static float neg_pi = -3.1415926535;
1970
1971 int r;
1972 struct r600_bytecode_alu alu;
1973
1974 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1975 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1976 alu.is_op3 = 1;
1977
1978 alu.dst.chan = 0;
1979 alu.dst.sel = ctx->temp_reg;
1980 alu.dst.write = 1;
1981
1982 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1983
1984 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1985 alu.src[1].chan = 0;
1986 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1987 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1988 alu.src[2].chan = 0;
1989 alu.last = 1;
1990 r = r600_bytecode_add_alu(ctx->bc, &alu);
1991 if (r)
1992 return r;
1993
1994 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1995 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1996
1997 alu.dst.chan = 0;
1998 alu.dst.sel = ctx->temp_reg;
1999 alu.dst.write = 1;
2000
2001 alu.src[0].sel = ctx->temp_reg;
2002 alu.src[0].chan = 0;
2003 alu.last = 1;
2004 r = r600_bytecode_add_alu(ctx->bc, &alu);
2005 if (r)
2006 return r;
2007
2008 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2009 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2010 alu.is_op3 = 1;
2011
2012 alu.dst.chan = 0;
2013 alu.dst.sel = ctx->temp_reg;
2014 alu.dst.write = 1;
2015
2016 alu.src[0].sel = ctx->temp_reg;
2017 alu.src[0].chan = 0;
2018
2019 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2020 alu.src[1].chan = 0;
2021 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2022 alu.src[2].chan = 0;
2023
2024 if (ctx->bc->chip_class == R600) {
2025 alu.src[1].value = *(uint32_t *)&double_pi;
2026 alu.src[2].value = *(uint32_t *)&neg_pi;
2027 } else {
2028 alu.src[1].sel = V_SQ_ALU_SRC_1;
2029 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2030 alu.src[2].neg = 1;
2031 }
2032
2033 alu.last = 1;
2034 r = r600_bytecode_add_alu(ctx->bc, &alu);
2035 if (r)
2036 return r;
2037 return 0;
2038 }
2039
2040 static int cayman_trig(struct r600_shader_ctx *ctx)
2041 {
2042 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2043 struct r600_bytecode_alu alu;
2044 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2045 int i, r;
2046
2047 r = tgsi_setup_trig(ctx);
2048 if (r)
2049 return r;
2050
2051
2052 for (i = 0; i < last_slot; i++) {
2053 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2054 alu.inst = ctx->inst_info->r600_opcode;
2055 alu.dst.chan = i;
2056
2057 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2058 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2059
2060 alu.src[0].sel = ctx->temp_reg;
2061 alu.src[0].chan = 0;
2062 if (i == last_slot - 1)
2063 alu.last = 1;
2064 r = r600_bytecode_add_alu(ctx->bc, &alu);
2065 if (r)
2066 return r;
2067 }
2068 return 0;
2069 }
2070
2071 static int tgsi_trig(struct r600_shader_ctx *ctx)
2072 {
2073 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2074 struct r600_bytecode_alu alu;
2075 int i, r;
2076 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2077
2078 r = tgsi_setup_trig(ctx);
2079 if (r)
2080 return r;
2081
2082 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2083 alu.inst = ctx->inst_info->r600_opcode;
2084 alu.dst.chan = 0;
2085 alu.dst.sel = ctx->temp_reg;
2086 alu.dst.write = 1;
2087
2088 alu.src[0].sel = ctx->temp_reg;
2089 alu.src[0].chan = 0;
2090 alu.last = 1;
2091 r = r600_bytecode_add_alu(ctx->bc, &alu);
2092 if (r)
2093 return r;
2094
2095 /* replicate result */
2096 for (i = 0; i < lasti + 1; i++) {
2097 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2098 continue;
2099
2100 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2102
2103 alu.src[0].sel = ctx->temp_reg;
2104 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2105 if (i == lasti)
2106 alu.last = 1;
2107 r = r600_bytecode_add_alu(ctx->bc, &alu);
2108 if (r)
2109 return r;
2110 }
2111 return 0;
2112 }
2113
2114 static int tgsi_scs(struct r600_shader_ctx *ctx)
2115 {
2116 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2117 struct r600_bytecode_alu alu;
2118 int i, r;
2119
2120 /* We'll only need the trig stuff if we are going to write to the
2121 * X or Y components of the destination vector.
2122 */
2123 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2124 r = tgsi_setup_trig(ctx);
2125 if (r)
2126 return r;
2127 }
2128
2129 /* dst.x = COS */
2130 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2131 if (ctx->bc->chip_class == CAYMAN) {
2132 for (i = 0 ; i < 3; i++) {
2133 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2135 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2136
2137 if (i == 0)
2138 alu.dst.write = 1;
2139 else
2140 alu.dst.write = 0;
2141 alu.src[0].sel = ctx->temp_reg;
2142 alu.src[0].chan = 0;
2143 if (i == 2)
2144 alu.last = 1;
2145 r = r600_bytecode_add_alu(ctx->bc, &alu);
2146 if (r)
2147 return r;
2148 }
2149 } else {
2150 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2151 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2152 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2153
2154 alu.src[0].sel = ctx->temp_reg;
2155 alu.src[0].chan = 0;
2156 alu.last = 1;
2157 r = r600_bytecode_add_alu(ctx->bc, &alu);
2158 if (r)
2159 return r;
2160 }
2161 }
2162
2163 /* dst.y = SIN */
2164 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2165 if (ctx->bc->chip_class == CAYMAN) {
2166 for (i = 0 ; i < 3; i++) {
2167 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2169 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2170 if (i == 1)
2171 alu.dst.write = 1;
2172 else
2173 alu.dst.write = 0;
2174 alu.src[0].sel = ctx->temp_reg;
2175 alu.src[0].chan = 0;
2176 if (i == 2)
2177 alu.last = 1;
2178 r = r600_bytecode_add_alu(ctx->bc, &alu);
2179 if (r)
2180 return r;
2181 }
2182 } else {
2183 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2185 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2186
2187 alu.src[0].sel = ctx->temp_reg;
2188 alu.src[0].chan = 0;
2189 alu.last = 1;
2190 r = r600_bytecode_add_alu(ctx->bc, &alu);
2191 if (r)
2192 return r;
2193 }
2194 }
2195
2196 /* dst.z = 0.0; */
2197 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2198 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2199
2200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2201
2202 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2203
2204 alu.src[0].sel = V_SQ_ALU_SRC_0;
2205 alu.src[0].chan = 0;
2206
2207 alu.last = 1;
2208
2209 r = r600_bytecode_add_alu(ctx->bc, &alu);
2210 if (r)
2211 return r;
2212 }
2213
2214 /* dst.w = 1.0; */
2215 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2216 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2217
2218 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2219
2220 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2221
2222 alu.src[0].sel = V_SQ_ALU_SRC_1;
2223 alu.src[0].chan = 0;
2224
2225 alu.last = 1;
2226
2227 r = r600_bytecode_add_alu(ctx->bc, &alu);
2228 if (r)
2229 return r;
2230 }
2231
2232 return 0;
2233 }
2234
2235 static int tgsi_kill(struct r600_shader_ctx *ctx)
2236 {
2237 struct r600_bytecode_alu alu;
2238 int i, r;
2239
2240 for (i = 0; i < 4; i++) {
2241 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2242 alu.inst = ctx->inst_info->r600_opcode;
2243
2244 alu.dst.chan = i;
2245
2246 alu.src[0].sel = V_SQ_ALU_SRC_0;
2247
2248 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2249 alu.src[1].sel = V_SQ_ALU_SRC_1;
2250 alu.src[1].neg = 1;
2251 } else {
2252 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2253 }
2254 if (i == 3) {
2255 alu.last = 1;
2256 }
2257 r = r600_bytecode_add_alu(ctx->bc, &alu);
2258 if (r)
2259 return r;
2260 }
2261
2262 /* kill must be last in ALU */
2263 ctx->bc->force_add_cf = 1;
2264 ctx->shader->uses_kill = TRUE;
2265 return 0;
2266 }
2267
2268 static int tgsi_lit(struct r600_shader_ctx *ctx)
2269 {
2270 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2271 struct r600_bytecode_alu alu;
2272 int r;
2273
2274 /* tmp.x = max(src.y, 0.0) */
2275 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2277 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2278 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2279 alu.src[1].chan = 1;
2280
2281 alu.dst.sel = ctx->temp_reg;
2282 alu.dst.chan = 0;
2283 alu.dst.write = 1;
2284
2285 alu.last = 1;
2286 r = r600_bytecode_add_alu(ctx->bc, &alu);
2287 if (r)
2288 return r;
2289
2290 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2291 {
2292 int chan;
2293 int sel;
2294 int i;
2295
2296 if (ctx->bc->chip_class == CAYMAN) {
2297 for (i = 0; i < 3; i++) {
2298 /* tmp.z = log(tmp.x) */
2299 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2301 alu.src[0].sel = ctx->temp_reg;
2302 alu.src[0].chan = 0;
2303 alu.dst.sel = ctx->temp_reg;
2304 alu.dst.chan = i;
2305 if (i == 2) {
2306 alu.dst.write = 1;
2307 alu.last = 1;
2308 } else
2309 alu.dst.write = 0;
2310
2311 r = r600_bytecode_add_alu(ctx->bc, &alu);
2312 if (r)
2313 return r;
2314 }
2315 } else {
2316 /* tmp.z = log(tmp.x) */
2317 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2318 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2319 alu.src[0].sel = ctx->temp_reg;
2320 alu.src[0].chan = 0;
2321 alu.dst.sel = ctx->temp_reg;
2322 alu.dst.chan = 2;
2323 alu.dst.write = 1;
2324 alu.last = 1;
2325 r = r600_bytecode_add_alu(ctx->bc, &alu);
2326 if (r)
2327 return r;
2328 }
2329
2330 chan = alu.dst.chan;
2331 sel = alu.dst.sel;
2332
2333 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2334 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2335 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2336 alu.src[0].sel = sel;
2337 alu.src[0].chan = chan;
2338 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2339 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2340 alu.dst.sel = ctx->temp_reg;
2341 alu.dst.chan = 0;
2342 alu.dst.write = 1;
2343 alu.is_op3 = 1;
2344 alu.last = 1;
2345 r = r600_bytecode_add_alu(ctx->bc, &alu);
2346 if (r)
2347 return r;
2348
2349 if (ctx->bc->chip_class == CAYMAN) {
2350 for (i = 0; i < 3; i++) {
2351 /* dst.z = exp(tmp.x) */
2352 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2354 alu.src[0].sel = ctx->temp_reg;
2355 alu.src[0].chan = 0;
2356 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2357 if (i == 2) {
2358 alu.dst.write = 1;
2359 alu.last = 1;
2360 } else
2361 alu.dst.write = 0;
2362 r = r600_bytecode_add_alu(ctx->bc, &alu);
2363 if (r)
2364 return r;
2365 }
2366 } else {
2367 /* dst.z = exp(tmp.x) */
2368 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2369 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2370 alu.src[0].sel = ctx->temp_reg;
2371 alu.src[0].chan = 0;
2372 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2373 alu.last = 1;
2374 r = r600_bytecode_add_alu(ctx->bc, &alu);
2375 if (r)
2376 return r;
2377 }
2378 }
2379
2380 /* dst.x, <- 1.0 */
2381 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2382 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2383 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2384 alu.src[0].chan = 0;
2385 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2386 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2387 r = r600_bytecode_add_alu(ctx->bc, &alu);
2388 if (r)
2389 return r;
2390
2391 /* dst.y = max(src.x, 0.0) */
2392 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2393 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2394 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2395 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2396 alu.src[1].chan = 0;
2397 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2398 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2399 r = r600_bytecode_add_alu(ctx->bc, &alu);
2400 if (r)
2401 return r;
2402
2403 /* dst.w, <- 1.0 */
2404 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2406 alu.src[0].sel = V_SQ_ALU_SRC_1;
2407 alu.src[0].chan = 0;
2408 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2409 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2410 alu.last = 1;
2411 r = r600_bytecode_add_alu(ctx->bc, &alu);
2412 if (r)
2413 return r;
2414
2415 return 0;
2416 }
2417
2418 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2419 {
2420 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2421 struct r600_bytecode_alu alu;
2422 int i, r;
2423
2424 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2425
2426 /* XXX:
2427 * For state trackers other than OpenGL, we'll want to use
2428 * _RECIPSQRT_IEEE instead.
2429 */
2430 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2431
2432 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2433 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2434 r600_bytecode_src_set_abs(&alu.src[i]);
2435 }
2436 alu.dst.sel = ctx->temp_reg;
2437 alu.dst.write = 1;
2438 alu.last = 1;
2439 r = r600_bytecode_add_alu(ctx->bc, &alu);
2440 if (r)
2441 return r;
2442 /* replicate result */
2443 return tgsi_helper_tempx_replicate(ctx);
2444 }
2445
2446 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2447 {
2448 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2449 struct r600_bytecode_alu alu;
2450 int i, r;
2451
2452 for (i = 0; i < 4; i++) {
2453 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2454 alu.src[0].sel = ctx->temp_reg;
2455 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2456 alu.dst.chan = i;
2457 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2458 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2459 if (i == 3)
2460 alu.last = 1;
2461 r = r600_bytecode_add_alu(ctx->bc, &alu);
2462 if (r)
2463 return r;
2464 }
2465 return 0;
2466 }
2467
2468 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2469 {
2470 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2471 struct r600_bytecode_alu alu;
2472 int i, r;
2473
2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2475 alu.inst = ctx->inst_info->r600_opcode;
2476 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2477 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2478 }
2479 alu.dst.sel = ctx->temp_reg;
2480 alu.dst.write = 1;
2481 alu.last = 1;
2482 r = r600_bytecode_add_alu(ctx->bc, &alu);
2483 if (r)
2484 return r;
2485 /* replicate result */
2486 return tgsi_helper_tempx_replicate(ctx);
2487 }
2488
2489 static int cayman_pow(struct r600_shader_ctx *ctx)
2490 {
2491 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2492 int i, r;
2493 struct r600_bytecode_alu alu;
2494 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2495
2496 for (i = 0; i < 3; i++) {
2497 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2498 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2499 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2500 alu.dst.sel = ctx->temp_reg;
2501 alu.dst.chan = i;
2502 alu.dst.write = 1;
2503 if (i == 2)
2504 alu.last = 1;
2505 r = r600_bytecode_add_alu(ctx->bc, &alu);
2506 if (r)
2507 return r;
2508 }
2509
2510 /* b * LOG2(a) */
2511 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2512 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2513 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2514 alu.src[1].sel = ctx->temp_reg;
2515 alu.dst.sel = ctx->temp_reg;
2516 alu.dst.write = 1;
2517 alu.last = 1;
2518 r = r600_bytecode_add_alu(ctx->bc, &alu);
2519 if (r)
2520 return r;
2521
2522 for (i = 0; i < last_slot; i++) {
2523 /* POW(a,b) = EXP2(b * LOG2(a))*/
2524 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2525 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2526 alu.src[0].sel = ctx->temp_reg;
2527
2528 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2529 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2530 if (i == last_slot - 1)
2531 alu.last = 1;
2532 r = r600_bytecode_add_alu(ctx->bc, &alu);
2533 if (r)
2534 return r;
2535 }
2536 return 0;
2537 }
2538
2539 static int tgsi_pow(struct r600_shader_ctx *ctx)
2540 {
2541 struct r600_bytecode_alu alu;
2542 int r;
2543
2544 /* LOG2(a) */
2545 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2546 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2547 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2548 alu.dst.sel = ctx->temp_reg;
2549 alu.dst.write = 1;
2550 alu.last = 1;
2551 r = r600_bytecode_add_alu(ctx->bc, &alu);
2552 if (r)
2553 return r;
2554 /* b * LOG2(a) */
2555 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2556 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2557 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2558 alu.src[1].sel = ctx->temp_reg;
2559 alu.dst.sel = ctx->temp_reg;
2560 alu.dst.write = 1;
2561 alu.last = 1;
2562 r = r600_bytecode_add_alu(ctx->bc, &alu);
2563 if (r)
2564 return r;
2565 /* POW(a,b) = EXP2(b * LOG2(a))*/
2566 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2568 alu.src[0].sel = ctx->temp_reg;
2569 alu.dst.sel = ctx->temp_reg;
2570 alu.dst.write = 1;
2571 alu.last = 1;
2572 r = r600_bytecode_add_alu(ctx->bc, &alu);
2573 if (r)
2574 return r;
2575 return tgsi_helper_tempx_replicate(ctx);
2576 }
2577
2578 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2579 {
2580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2581 struct r600_bytecode_alu alu;
2582 int i, r, j;
2583 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2584 int tmp0 = ctx->temp_reg;
2585 int tmp1 = r600_get_temp(ctx);
2586 int tmp2 = r600_get_temp(ctx);
2587 int tmp3 = r600_get_temp(ctx);
2588 /* Unsigned path:
2589 *
2590 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2591 *
2592 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2593 * 2. tmp0.z = lo (tmp0.x * src2)
2594 * 3. tmp0.w = -tmp0.z
2595 * 4. tmp0.y = hi (tmp0.x * src2)
2596 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2597 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2598 * 7. tmp1.x = tmp0.x - tmp0.w
2599 * 8. tmp1.y = tmp0.x + tmp0.w
2600 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2601 * 10. tmp0.z = hi(tmp0.x * src1) = q
2602 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2603 *
2604 * 12. tmp0.w = src1 - tmp0.y = r
2605 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2606 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2607 *
2608 * if DIV
2609 *
2610 * 15. tmp1.z = tmp0.z + 1 = q + 1
2611 * 16. tmp1.w = tmp0.z - 1 = q - 1
2612 *
2613 * else MOD
2614 *
2615 * 15. tmp1.z = tmp0.w - src2 = r - src2
2616 * 16. tmp1.w = tmp0.w + src2 = r + src2
2617 *
2618 * endif
2619 *
2620 * 17. tmp1.x = tmp1.x & tmp1.y
2621 *
2622 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2623 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2624 *
2625 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2626 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2627 *
2628 * Signed path:
2629 *
2630 * Same as unsigned, using abs values of the operands,
2631 * and fixing the sign of the result in the end.
2632 */
2633
2634 for (i = 0; i < 4; i++) {
2635 if (!(write_mask & (1<<i)))
2636 continue;
2637
2638 if (signed_op) {
2639
2640 /* tmp2.x = -src0 */
2641 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2642 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2643
2644 alu.dst.sel = tmp2;
2645 alu.dst.chan = 0;
2646 alu.dst.write = 1;
2647
2648 alu.src[0].sel = V_SQ_ALU_SRC_0;
2649
2650 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2651
2652 alu.last = 1;
2653 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2654 return r;
2655
2656 /* tmp2.y = -src1 */
2657 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2659
2660 alu.dst.sel = tmp2;
2661 alu.dst.chan = 1;
2662 alu.dst.write = 1;
2663
2664 alu.src[0].sel = V_SQ_ALU_SRC_0;
2665
2666 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2667
2668 alu.last = 1;
2669 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2670 return r;
2671
2672 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2673 /* it will be a sign of the quotient */
2674 if (!mod) {
2675
2676 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2677 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2678
2679 alu.dst.sel = tmp2;
2680 alu.dst.chan = 2;
2681 alu.dst.write = 1;
2682
2683 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2684 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2685
2686 alu.last = 1;
2687 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2688 return r;
2689 }
2690
2691 /* tmp2.x = |src0| */
2692 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2693 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2694 alu.is_op3 = 1;
2695
2696 alu.dst.sel = tmp2;
2697 alu.dst.chan = 0;
2698 alu.dst.write = 1;
2699
2700 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2701 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2702 alu.src[2].sel = tmp2;
2703 alu.src[2].chan = 0;
2704
2705 alu.last = 1;
2706 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2707 return r;
2708
2709 /* tmp2.y = |src1| */
2710 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2711 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2712 alu.is_op3 = 1;
2713
2714 alu.dst.sel = tmp2;
2715 alu.dst.chan = 1;
2716 alu.dst.write = 1;
2717
2718 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2719 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2720 alu.src[2].sel = tmp2;
2721 alu.src[2].chan = 1;
2722
2723 alu.last = 1;
2724 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2725 return r;
2726
2727 }
2728
2729 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2730 if (ctx->bc->chip_class == CAYMAN) {
2731 /* tmp3.x = u2f(src2) */
2732 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2734
2735 alu.dst.sel = tmp3;
2736 alu.dst.chan = 0;
2737 alu.dst.write = 1;
2738
2739 if (signed_op) {
2740 alu.src[0].sel = tmp2;
2741 alu.src[0].chan = 1;
2742 } else {
2743 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2744 }
2745
2746 alu.last = 1;
2747 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2748 return r;
2749
2750 /* tmp0.x = recip(tmp3.x) */
2751 for (j = 0 ; j < 3; j++) {
2752 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2753 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2754
2755 alu.dst.sel = tmp0;
2756 alu.dst.chan = j;
2757 alu.dst.write = (j == 0);
2758
2759 alu.src[0].sel = tmp3;
2760 alu.src[0].chan = 0;
2761
2762 if (j == 2)
2763 alu.last = 1;
2764 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2765 return r;
2766 }
2767
2768 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2769 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2770
2771 alu.src[0].sel = tmp0;
2772 alu.src[0].chan = 0;
2773
2774 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2775 alu.src[1].value = 0x4f800000;
2776
2777 alu.dst.sel = tmp3;
2778 alu.dst.write = 1;
2779 alu.last = 1;
2780 r = r600_bytecode_add_alu(ctx->bc, &alu);
2781 if (r)
2782 return r;
2783
2784 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2786
2787 alu.dst.sel = tmp0;
2788 alu.dst.chan = 0;
2789 alu.dst.write = 1;
2790
2791 alu.src[0].sel = tmp3;
2792 alu.src[0].chan = 0;
2793
2794 alu.last = 1;
2795 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2796 return r;
2797
2798 } else {
2799 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2800 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2801
2802 alu.dst.sel = tmp0;
2803 alu.dst.chan = 0;
2804 alu.dst.write = 1;
2805
2806 if (signed_op) {
2807 alu.src[0].sel = tmp2;
2808 alu.src[0].chan = 1;
2809 } else {
2810 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2811 }
2812
2813 alu.last = 1;
2814 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2815 return r;
2816 }
2817
2818 /* 2. tmp0.z = lo (tmp0.x * src2) */
2819 if (ctx->bc->chip_class == CAYMAN) {
2820 for (j = 0 ; j < 4; j++) {
2821 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2822 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2823
2824 alu.dst.sel = tmp0;
2825 alu.dst.chan = j;
2826 alu.dst.write = (j == 2);
2827
2828 alu.src[0].sel = tmp0;
2829 alu.src[0].chan = 0;
2830 if (signed_op) {
2831 alu.src[1].sel = tmp2;
2832 alu.src[1].chan = 1;
2833 } else {
2834 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2835 }
2836
2837 alu.last = (j == 3);
2838 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2839 return r;
2840 }
2841 } else {
2842 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2843 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2844
2845 alu.dst.sel = tmp0;
2846 alu.dst.chan = 2;
2847 alu.dst.write = 1;
2848
2849 alu.src[0].sel = tmp0;
2850 alu.src[0].chan = 0;
2851 if (signed_op) {
2852 alu.src[1].sel = tmp2;
2853 alu.src[1].chan = 1;
2854 } else {
2855 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2856 }
2857
2858 alu.last = 1;
2859 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2860 return r;
2861 }
2862
2863 /* 3. tmp0.w = -tmp0.z */
2864 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2865 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2866
2867 alu.dst.sel = tmp0;
2868 alu.dst.chan = 3;
2869 alu.dst.write = 1;
2870
2871 alu.src[0].sel = V_SQ_ALU_SRC_0;
2872 alu.src[1].sel = tmp0;
2873 alu.src[1].chan = 2;
2874
2875 alu.last = 1;
2876 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2877 return r;
2878
2879 /* 4. tmp0.y = hi (tmp0.x * src2) */
2880 if (ctx->bc->chip_class == CAYMAN) {
2881 for (j = 0 ; j < 4; j++) {
2882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2884
2885 alu.dst.sel = tmp0;
2886 alu.dst.chan = j;
2887 alu.dst.write = (j == 1);
2888
2889 alu.src[0].sel = tmp0;
2890 alu.src[0].chan = 0;
2891
2892 if (signed_op) {
2893 alu.src[1].sel = tmp2;
2894 alu.src[1].chan = 1;
2895 } else {
2896 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2897 }
2898 alu.last = (j == 3);
2899 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2900 return r;
2901 }
2902 } else {
2903 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2904 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2905
2906 alu.dst.sel = tmp0;
2907 alu.dst.chan = 1;
2908 alu.dst.write = 1;
2909
2910 alu.src[0].sel = tmp0;
2911 alu.src[0].chan = 0;
2912
2913 if (signed_op) {
2914 alu.src[1].sel = tmp2;
2915 alu.src[1].chan = 1;
2916 } else {
2917 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2918 }
2919
2920 alu.last = 1;
2921 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2922 return r;
2923 }
2924
2925 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2926 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2927 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2928 alu.is_op3 = 1;
2929
2930 alu.dst.sel = tmp0;
2931 alu.dst.chan = 2;
2932 alu.dst.write = 1;
2933
2934 alu.src[0].sel = tmp0;
2935 alu.src[0].chan = 1;
2936 alu.src[1].sel = tmp0;
2937 alu.src[1].chan = 3;
2938 alu.src[2].sel = tmp0;
2939 alu.src[2].chan = 2;
2940
2941 alu.last = 1;
2942 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2943 return r;
2944
2945 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2946 if (ctx->bc->chip_class == CAYMAN) {
2947 for (j = 0 ; j < 4; j++) {
2948 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2949 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2950
2951 alu.dst.sel = tmp0;
2952 alu.dst.chan = j;
2953 alu.dst.write = (j == 3);
2954
2955 alu.src[0].sel = tmp0;
2956 alu.src[0].chan = 2;
2957
2958 alu.src[1].sel = tmp0;
2959 alu.src[1].chan = 0;
2960
2961 alu.last = (j == 3);
2962 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2963 return r;
2964 }
2965 } else {
2966 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2967 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2968
2969 alu.dst.sel = tmp0;
2970 alu.dst.chan = 3;
2971 alu.dst.write = 1;
2972
2973 alu.src[0].sel = tmp0;
2974 alu.src[0].chan = 2;
2975
2976 alu.src[1].sel = tmp0;
2977 alu.src[1].chan = 0;
2978
2979 alu.last = 1;
2980 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2981 return r;
2982 }
2983
2984 /* 7. tmp1.x = tmp0.x - tmp0.w */
2985 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2986 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2987
2988 alu.dst.sel = tmp1;
2989 alu.dst.chan = 0;
2990 alu.dst.write = 1;
2991
2992 alu.src[0].sel = tmp0;
2993 alu.src[0].chan = 0;
2994 alu.src[1].sel = tmp0;
2995 alu.src[1].chan = 3;
2996
2997 alu.last = 1;
2998 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2999 return r;
3000
3001 /* 8. tmp1.y = tmp0.x + tmp0.w */
3002 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3003 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3004
3005 alu.dst.sel = tmp1;
3006 alu.dst.chan = 1;
3007 alu.dst.write = 1;
3008
3009 alu.src[0].sel = tmp0;
3010 alu.src[0].chan = 0;
3011 alu.src[1].sel = tmp0;
3012 alu.src[1].chan = 3;
3013
3014 alu.last = 1;
3015 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3016 return r;
3017
3018 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
3019 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3020 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3021 alu.is_op3 = 1;
3022
3023 alu.dst.sel = tmp0;
3024 alu.dst.chan = 0;
3025 alu.dst.write = 1;
3026
3027 alu.src[0].sel = tmp0;
3028 alu.src[0].chan = 1;
3029 alu.src[1].sel = tmp1;
3030 alu.src[1].chan = 1;
3031 alu.src[2].sel = tmp1;
3032 alu.src[2].chan = 0;
3033
3034 alu.last = 1;
3035 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3036 return r;
3037
3038 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3039 if (ctx->bc->chip_class == CAYMAN) {
3040 for (j = 0 ; j < 4; j++) {
3041 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3042 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3043
3044 alu.dst.sel = tmp0;
3045 alu.dst.chan = j;
3046 alu.dst.write = (j == 2);
3047
3048 alu.src[0].sel = tmp0;
3049 alu.src[0].chan = 0;
3050
3051 if (signed_op) {
3052 alu.src[1].sel = tmp2;
3053 alu.src[1].chan = 0;
3054 } else {
3055 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3056 }
3057
3058 alu.last = (j == 3);
3059 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3060 return r;
3061 }
3062 } else {
3063 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3065
3066 alu.dst.sel = tmp0;
3067 alu.dst.chan = 2;
3068 alu.dst.write = 1;
3069
3070 alu.src[0].sel = tmp0;
3071 alu.src[0].chan = 0;
3072
3073 if (signed_op) {
3074 alu.src[1].sel = tmp2;
3075 alu.src[1].chan = 0;
3076 } else {
3077 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3078 }
3079
3080 alu.last = 1;
3081 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3082 return r;
3083 }
3084
3085 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3086 if (ctx->bc->chip_class == CAYMAN) {
3087 for (j = 0 ; j < 4; j++) {
3088 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3090
3091 alu.dst.sel = tmp0;
3092 alu.dst.chan = j;
3093 alu.dst.write = (j == 1);
3094
3095 if (signed_op) {
3096 alu.src[0].sel = tmp2;
3097 alu.src[0].chan = 1;
3098 } else {
3099 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3100 }
3101
3102 alu.src[1].sel = tmp0;
3103 alu.src[1].chan = 2;
3104
3105 alu.last = (j == 3);
3106 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3107 return r;
3108 }
3109 } else {
3110 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3112
3113 alu.dst.sel = tmp0;
3114 alu.dst.chan = 1;
3115 alu.dst.write = 1;
3116
3117 if (signed_op) {
3118 alu.src[0].sel = tmp2;
3119 alu.src[0].chan = 1;
3120 } else {
3121 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3122 }
3123
3124 alu.src[1].sel = tmp0;
3125 alu.src[1].chan = 2;
3126
3127 alu.last = 1;
3128 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3129 return r;
3130 }
3131
3132 /* 12. tmp0.w = src1 - tmp0.y = r */
3133 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3135
3136 alu.dst.sel = tmp0;
3137 alu.dst.chan = 3;
3138 alu.dst.write = 1;
3139
3140 if (signed_op) {
3141 alu.src[0].sel = tmp2;
3142 alu.src[0].chan = 0;
3143 } else {
3144 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3145 }
3146
3147 alu.src[1].sel = tmp0;
3148 alu.src[1].chan = 1;
3149
3150 alu.last = 1;
3151 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3152 return r;
3153
3154 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3155 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3156 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3157
3158 alu.dst.sel = tmp1;
3159 alu.dst.chan = 0;
3160 alu.dst.write = 1;
3161
3162 alu.src[0].sel = tmp0;
3163 alu.src[0].chan = 3;
3164 if (signed_op) {
3165 alu.src[1].sel = tmp2;
3166 alu.src[1].chan = 1;
3167 } else {
3168 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3169 }
3170
3171 alu.last = 1;
3172 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3173 return r;
3174
3175 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3176 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3177 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3178
3179 alu.dst.sel = tmp1;
3180 alu.dst.chan = 1;
3181 alu.dst.write = 1;
3182
3183 if (signed_op) {
3184 alu.src[0].sel = tmp2;
3185 alu.src[0].chan = 0;
3186 } else {
3187 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3188 }
3189
3190 alu.src[1].sel = tmp0;
3191 alu.src[1].chan = 1;
3192
3193 alu.last = 1;
3194 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3195 return r;
3196
3197 if (mod) { /* UMOD */
3198
3199 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3200 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3202
3203 alu.dst.sel = tmp1;
3204 alu.dst.chan = 2;
3205 alu.dst.write = 1;
3206
3207 alu.src[0].sel = tmp0;
3208 alu.src[0].chan = 3;
3209
3210 if (signed_op) {
3211 alu.src[1].sel = tmp2;
3212 alu.src[1].chan = 1;
3213 } else {
3214 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3215 }
3216
3217 alu.last = 1;
3218 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3219 return r;
3220
3221 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3222 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3224
3225 alu.dst.sel = tmp1;
3226 alu.dst.chan = 3;
3227 alu.dst.write = 1;
3228
3229 alu.src[0].sel = tmp0;
3230 alu.src[0].chan = 3;
3231 if (signed_op) {
3232 alu.src[1].sel = tmp2;
3233 alu.src[1].chan = 1;
3234 } else {
3235 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3236 }
3237
3238 alu.last = 1;
3239 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3240 return r;
3241
3242 } else { /* UDIV */
3243
3244 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3245 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3246 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3247
3248 alu.dst.sel = tmp1;
3249 alu.dst.chan = 2;
3250 alu.dst.write = 1;
3251
3252 alu.src[0].sel = tmp0;
3253 alu.src[0].chan = 2;
3254 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3255
3256 alu.last = 1;
3257 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3258 return r;
3259
3260 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3261 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3262 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3263
3264 alu.dst.sel = tmp1;
3265 alu.dst.chan = 3;
3266 alu.dst.write = 1;
3267
3268 alu.src[0].sel = tmp0;
3269 alu.src[0].chan = 2;
3270 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3271
3272 alu.last = 1;
3273 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3274 return r;
3275
3276 }
3277
3278 /* 17. tmp1.x = tmp1.x & tmp1.y */
3279 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3280 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3281
3282 alu.dst.sel = tmp1;
3283 alu.dst.chan = 0;
3284 alu.dst.write = 1;
3285
3286 alu.src[0].sel = tmp1;
3287 alu.src[0].chan = 0;
3288 alu.src[1].sel = tmp1;
3289 alu.src[1].chan = 1;
3290
3291 alu.last = 1;
3292 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3293 return r;
3294
3295 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3296 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3297 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3299 alu.is_op3 = 1;
3300
3301 alu.dst.sel = tmp0;
3302 alu.dst.chan = 2;
3303 alu.dst.write = 1;
3304
3305 alu.src[0].sel = tmp1;
3306 alu.src[0].chan = 0;
3307 alu.src[1].sel = tmp0;
3308 alu.src[1].chan = mod ? 3 : 2;
3309 alu.src[2].sel = tmp1;
3310 alu.src[2].chan = 2;
3311
3312 alu.last = 1;
3313 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3314 return r;
3315
3316 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3317 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3318 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3319 alu.is_op3 = 1;
3320
3321 if (signed_op) {
3322 alu.dst.sel = tmp0;
3323 alu.dst.chan = 2;
3324 alu.dst.write = 1;
3325 } else {
3326 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3327 }
3328
3329 alu.src[0].sel = tmp1;
3330 alu.src[0].chan = 1;
3331 alu.src[1].sel = tmp1;
3332 alu.src[1].chan = 3;
3333 alu.src[2].sel = tmp0;
3334 alu.src[2].chan = 2;
3335
3336 alu.last = 1;
3337 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3338 return r;
3339
3340 if (signed_op) {
3341
3342 /* fix the sign of the result */
3343
3344 if (mod) {
3345
3346 /* tmp0.x = -tmp0.z */
3347 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3348 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3349
3350 alu.dst.sel = tmp0;
3351 alu.dst.chan = 0;
3352 alu.dst.write = 1;
3353
3354 alu.src[0].sel = V_SQ_ALU_SRC_0;
3355 alu.src[1].sel = tmp0;
3356 alu.src[1].chan = 2;
3357
3358 alu.last = 1;
3359 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3360 return r;
3361
3362 /* sign of the remainder is the same as the sign of src0 */
3363 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3364 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3366 alu.is_op3 = 1;
3367
3368 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3369
3370 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3371 alu.src[1].sel = tmp0;
3372 alu.src[1].chan = 2;
3373 alu.src[2].sel = tmp0;
3374 alu.src[2].chan = 0;
3375
3376 alu.last = 1;
3377 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3378 return r;
3379
3380 } else {
3381
3382 /* tmp0.x = -tmp0.z */
3383 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3384 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3385
3386 alu.dst.sel = tmp0;
3387 alu.dst.chan = 0;
3388 alu.dst.write = 1;
3389
3390 alu.src[0].sel = V_SQ_ALU_SRC_0;
3391 alu.src[1].sel = tmp0;
3392 alu.src[1].chan = 2;
3393
3394 alu.last = 1;
3395 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3396 return r;
3397
3398 /* fix the quotient sign (same as the sign of src0*src1) */
3399 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3402 alu.is_op3 = 1;
3403
3404 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3405
3406 alu.src[0].sel = tmp2;
3407 alu.src[0].chan = 2;
3408 alu.src[1].sel = tmp0;
3409 alu.src[1].chan = 2;
3410 alu.src[2].sel = tmp0;
3411 alu.src[2].chan = 0;
3412
3413 alu.last = 1;
3414 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3415 return r;
3416 }
3417 }
3418 }
3419 return 0;
3420 }
3421
3422 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3423 {
3424 return tgsi_divmod(ctx, 0, 0);
3425 }
3426
3427 static int tgsi_umod(struct r600_shader_ctx *ctx)
3428 {
3429 return tgsi_divmod(ctx, 1, 0);
3430 }
3431
3432 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3433 {
3434 return tgsi_divmod(ctx, 0, 1);
3435 }
3436
3437 static int tgsi_imod(struct r600_shader_ctx *ctx)
3438 {
3439 return tgsi_divmod(ctx, 1, 1);
3440 }
3441
3442
3443 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3444 {
3445 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3446 struct r600_bytecode_alu alu;
3447 int i, r;
3448 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3449 int last_inst = tgsi_last_instruction(write_mask);
3450
3451 for (i = 0; i < 4; i++) {
3452 if (!(write_mask & (1<<i)))
3453 continue;
3454
3455 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3457
3458 alu.dst.sel = ctx->temp_reg;
3459 alu.dst.chan = i;
3460 alu.dst.write = 1;
3461
3462 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3463 if (i == last_inst)
3464 alu.last = 1;
3465 r = r600_bytecode_add_alu(ctx->bc, &alu);
3466 if (r)
3467 return r;
3468 }
3469
3470 for (i = 0; i < 4; i++) {
3471 if (!(write_mask & (1<<i)))
3472 continue;
3473
3474 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3475 alu.inst = ctx->inst_info->r600_opcode;
3476
3477 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3478
3479 alu.src[0].sel = ctx->temp_reg;
3480 alu.src[0].chan = i;
3481
3482 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3483 alu.last = 1;
3484 r = r600_bytecode_add_alu(ctx->bc, &alu);
3485 if (r)
3486 return r;
3487 }
3488
3489 return 0;
3490 }
3491
3492 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3493 {
3494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3495 struct r600_bytecode_alu alu;
3496 int i, r;
3497 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3498 int last_inst = tgsi_last_instruction(write_mask);
3499
3500 /* tmp = -src */
3501 for (i = 0; i < 4; i++) {
3502 if (!(write_mask & (1<<i)))
3503 continue;
3504
3505 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3507
3508 alu.dst.sel = ctx->temp_reg;
3509 alu.dst.chan = i;
3510 alu.dst.write = 1;
3511
3512 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3513 alu.src[0].sel = V_SQ_ALU_SRC_0;
3514
3515 if (i == last_inst)
3516 alu.last = 1;
3517 r = r600_bytecode_add_alu(ctx->bc, &alu);
3518 if (r)
3519 return r;
3520 }
3521
3522 /* dst = (src >= 0 ? src : tmp) */
3523 for (i = 0; i < 4; i++) {
3524 if (!(write_mask & (1<<i)))
3525 continue;
3526
3527 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3528 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3529 alu.is_op3 = 1;
3530 alu.dst.write = 1;
3531
3532 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3533
3534 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3535 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3536 alu.src[2].sel = ctx->temp_reg;
3537 alu.src[2].chan = i;
3538
3539 if (i == last_inst)
3540 alu.last = 1;
3541 r = r600_bytecode_add_alu(ctx->bc, &alu);
3542 if (r)
3543 return r;
3544 }
3545 return 0;
3546 }
3547
3548 static int tgsi_issg(struct r600_shader_ctx *ctx)
3549 {
3550 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3551 struct r600_bytecode_alu alu;
3552 int i, r;
3553 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3554 int last_inst = tgsi_last_instruction(write_mask);
3555
3556 /* tmp = (src >= 0 ? src : -1) */
3557 for (i = 0; i < 4; i++) {
3558 if (!(write_mask & (1<<i)))
3559 continue;
3560
3561 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3562 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3563 alu.is_op3 = 1;
3564
3565 alu.dst.sel = ctx->temp_reg;
3566 alu.dst.chan = i;
3567 alu.dst.write = 1;
3568
3569 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3570 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3571 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3572
3573 if (i == last_inst)
3574 alu.last = 1;
3575 r = r600_bytecode_add_alu(ctx->bc, &alu);
3576 if (r)
3577 return r;
3578 }
3579
3580 /* dst = (tmp > 0 ? 1 : tmp) */
3581 for (i = 0; i < 4; i++) {
3582 if (!(write_mask & (1<<i)))
3583 continue;
3584
3585 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3586 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3587 alu.is_op3 = 1;
3588 alu.dst.write = 1;
3589
3590 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3591
3592 alu.src[0].sel = ctx->temp_reg;
3593 alu.src[0].chan = i;
3594
3595 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3596
3597 alu.src[2].sel = ctx->temp_reg;
3598 alu.src[2].chan = i;
3599
3600 if (i == last_inst)
3601 alu.last = 1;
3602 r = r600_bytecode_add_alu(ctx->bc, &alu);
3603 if (r)
3604 return r;
3605 }
3606 return 0;
3607 }
3608
3609
3610
3611 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3612 {
3613 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3614 struct r600_bytecode_alu alu;
3615 int i, r;
3616
3617 /* tmp = (src > 0 ? 1 : src) */
3618 for (i = 0; i < 4; i++) {
3619 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3621 alu.is_op3 = 1;
3622
3623 alu.dst.sel = ctx->temp_reg;
3624 alu.dst.chan = i;
3625
3626 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3627 alu.src[1].sel = V_SQ_ALU_SRC_1;
3628 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3629
3630 if (i == 3)
3631 alu.last = 1;
3632 r = r600_bytecode_add_alu(ctx->bc, &alu);
3633 if (r)
3634 return r;
3635 }
3636
3637 /* dst = (-tmp > 0 ? -1 : tmp) */
3638 for (i = 0; i < 4; i++) {
3639 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3640 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3641 alu.is_op3 = 1;
3642 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3643
3644 alu.src[0].sel = ctx->temp_reg;
3645 alu.src[0].chan = i;
3646 alu.src[0].neg = 1;
3647
3648 alu.src[1].sel = V_SQ_ALU_SRC_1;
3649 alu.src[1].neg = 1;
3650
3651 alu.src[2].sel = ctx->temp_reg;
3652 alu.src[2].chan = i;
3653
3654 if (i == 3)
3655 alu.last = 1;
3656 r = r600_bytecode_add_alu(ctx->bc, &alu);
3657 if (r)
3658 return r;
3659 }
3660 return 0;
3661 }
3662
3663 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3664 {
3665 struct r600_bytecode_alu alu;
3666 int i, r;
3667
3668 for (i = 0; i < 4; i++) {
3669 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3670 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3671 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3672 alu.dst.chan = i;
3673 } else {
3674 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3675 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3676 alu.src[0].sel = ctx->temp_reg;
3677 alu.src[0].chan = i;
3678 }
3679 if (i == 3) {
3680 alu.last = 1;
3681 }
3682 r = r600_bytecode_add_alu(ctx->bc, &alu);
3683 if (r)
3684 return r;
3685 }
3686 return 0;
3687 }
3688
3689 static int tgsi_op3(struct r600_shader_ctx *ctx)
3690 {
3691 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3692 struct r600_bytecode_alu alu;
3693 int i, j, r;
3694 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3695
3696 for (i = 0; i < lasti + 1; i++) {
3697 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3698 continue;
3699
3700 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3701 alu.inst = ctx->inst_info->r600_opcode;
3702 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3703 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3704 }
3705
3706 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3707 alu.dst.chan = i;
3708 alu.dst.write = 1;
3709 alu.is_op3 = 1;
3710 if (i == lasti) {
3711 alu.last = 1;
3712 }
3713 r = r600_bytecode_add_alu(ctx->bc, &alu);
3714 if (r)
3715 return r;
3716 }
3717 return 0;
3718 }
3719
3720 static int tgsi_dp(struct r600_shader_ctx *ctx)
3721 {
3722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3723 struct r600_bytecode_alu alu;
3724 int i, j, r;
3725
3726 for (i = 0; i < 4; i++) {
3727 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3728 alu.inst = ctx->inst_info->r600_opcode;
3729 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3730 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3731 }
3732
3733 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3734 alu.dst.chan = i;
3735 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3736 /* handle some special cases */
3737 switch (ctx->inst_info->tgsi_opcode) {
3738 case TGSI_OPCODE_DP2:
3739 if (i > 1) {
3740 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3741 alu.src[0].chan = alu.src[1].chan = 0;
3742 }
3743 break;
3744 case TGSI_OPCODE_DP3:
3745 if (i > 2) {
3746 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3747 alu.src[0].chan = alu.src[1].chan = 0;
3748 }
3749 break;
3750 case TGSI_OPCODE_DPH:
3751 if (i == 3) {
3752 alu.src[0].sel = V_SQ_ALU_SRC_1;
3753 alu.src[0].chan = 0;
3754 alu.src[0].neg = 0;
3755 }
3756 break;
3757 default:
3758 break;
3759 }
3760 if (i == 3) {
3761 alu.last = 1;
3762 }
3763 r = r600_bytecode_add_alu(ctx->bc, &alu);
3764 if (r)
3765 return r;
3766 }
3767 return 0;
3768 }
3769
3770 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3771 unsigned index)
3772 {
3773 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3774 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3775 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3776 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3777 ctx->src[index].neg || ctx->src[index].abs;
3778 }
3779
3780 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3781 unsigned index)
3782 {
3783 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3784 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3785 }
3786
3787 static int tgsi_tex(struct r600_shader_ctx *ctx)
3788 {
3789 static float one_point_five = 1.5f;
3790 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3791 struct r600_bytecode_tex tex;
3792 struct r600_bytecode_alu alu;
3793 unsigned src_gpr;
3794 int r, i, j;
3795 int opcode;
3796 /* Texture fetch instructions can only use gprs as source.
3797 * Also they cannot negate the source or take the absolute value */
3798 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3799 tgsi_tex_src_requires_loading(ctx, 0);
3800 boolean src_loaded = FALSE;
3801 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3802 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3803
3804 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3805
3806 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3807 /* get offset values */
3808 if (inst->Texture.NumOffsets) {
3809 assert(inst->Texture.NumOffsets == 1);
3810
3811 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3812 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3813 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3814 }
3815 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3816 /* TGSI moves the sampler to src reg 3 for TXD */
3817 sampler_src_reg = 3;
3818
3819 for (i = 1; i < 3; i++) {
3820 /* set gradients h/v */
3821 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3822 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3823 SQ_TEX_INST_SET_GRADIENTS_V;
3824 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3825 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3826
3827 if (tgsi_tex_src_requires_loading(ctx, i)) {
3828 tex.src_gpr = r600_get_temp(ctx);
3829 tex.src_sel_x = 0;
3830 tex.src_sel_y = 1;
3831 tex.src_sel_z = 2;
3832 tex.src_sel_w = 3;
3833
3834 for (j = 0; j < 4; j++) {
3835 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3837 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3838 alu.dst.sel = tex.src_gpr;
3839 alu.dst.chan = j;
3840 if (j == 3)
3841 alu.last = 1;
3842 alu.dst.write = 1;
3843 r = r600_bytecode_add_alu(ctx->bc, &alu);
3844 if (r)
3845 return r;
3846 }
3847
3848 } else {
3849 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3850 tex.src_sel_x = ctx->src[i].swizzle[0];
3851 tex.src_sel_y = ctx->src[i].swizzle[1];
3852 tex.src_sel_z = ctx->src[i].swizzle[2];
3853 tex.src_sel_w = ctx->src[i].swizzle[3];
3854 tex.src_rel = ctx->src[i].rel;
3855 }
3856 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3857 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3858 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3859 tex.coord_type_x = 1;
3860 tex.coord_type_y = 1;
3861 tex.coord_type_z = 1;
3862 tex.coord_type_w = 1;
3863 }
3864 r = r600_bytecode_add_tex(ctx->bc, &tex);
3865 if (r)
3866 return r;
3867 }
3868 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3869 int out_chan;
3870 /* Add perspective divide */
3871 if (ctx->bc->chip_class == CAYMAN) {
3872 out_chan = 2;
3873 for (i = 0; i < 3; i++) {
3874 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3876 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3877
3878 alu.dst.sel = ctx->temp_reg;
3879 alu.dst.chan = i;
3880 if (i == 2)
3881 alu.last = 1;
3882 if (out_chan == i)
3883 alu.dst.write = 1;
3884 r = r600_bytecode_add_alu(ctx->bc, &alu);
3885 if (r)
3886 return r;
3887 }
3888
3889 } else {
3890 out_chan = 3;
3891 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3892 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3893 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3894
3895 alu.dst.sel = ctx->temp_reg;
3896 alu.dst.chan = out_chan;
3897 alu.last = 1;
3898 alu.dst.write = 1;
3899 r = r600_bytecode_add_alu(ctx->bc, &alu);
3900 if (r)
3901 return r;
3902 }
3903
3904 for (i = 0; i < 3; i++) {
3905 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3906 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3907 alu.src[0].sel = ctx->temp_reg;
3908 alu.src[0].chan = out_chan;
3909 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3910 alu.dst.sel = ctx->temp_reg;
3911 alu.dst.chan = i;
3912 alu.dst.write = 1;
3913 r = r600_bytecode_add_alu(ctx->bc, &alu);
3914 if (r)
3915 return r;
3916 }
3917 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3918 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3919 alu.src[0].sel = V_SQ_ALU_SRC_1;
3920 alu.src[0].chan = 0;
3921 alu.dst.sel = ctx->temp_reg;
3922 alu.dst.chan = 3;
3923 alu.last = 1;
3924 alu.dst.write = 1;
3925 r = r600_bytecode_add_alu(ctx->bc, &alu);
3926 if (r)
3927 return r;
3928 src_loaded = TRUE;
3929 src_gpr = ctx->temp_reg;
3930 }
3931
3932 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3933 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3934 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3935 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3936
3937 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3938 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3939
3940 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3941 for (i = 0; i < 4; i++) {
3942 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3943 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3944 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3945 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3946 alu.dst.sel = ctx->temp_reg;
3947 alu.dst.chan = i;
3948 if (i == 3)
3949 alu.last = 1;
3950 alu.dst.write = 1;
3951 r = r600_bytecode_add_alu(ctx->bc, &alu);
3952 if (r)
3953 return r;
3954 }
3955
3956 /* tmp1.z = RCP_e(|tmp1.z|) */
3957 if (ctx->bc->chip_class == CAYMAN) {
3958 for (i = 0; i < 3; i++) {
3959 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3960 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3961 alu.src[0].sel = ctx->temp_reg;
3962 alu.src[0].chan = 2;
3963 alu.src[0].abs = 1;
3964 alu.dst.sel = ctx->temp_reg;
3965 alu.dst.chan = i;
3966 if (i == 2)
3967 alu.dst.write = 1;
3968 if (i == 2)
3969 alu.last = 1;
3970 r = r600_bytecode_add_alu(ctx->bc, &alu);
3971 if (r)
3972 return r;
3973 }
3974 } else {
3975 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3976 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3977 alu.src[0].sel = ctx->temp_reg;
3978 alu.src[0].chan = 2;
3979 alu.src[0].abs = 1;
3980 alu.dst.sel = ctx->temp_reg;
3981 alu.dst.chan = 2;
3982 alu.dst.write = 1;
3983 alu.last = 1;
3984 r = r600_bytecode_add_alu(ctx->bc, &alu);
3985 if (r)
3986 return r;
3987 }
3988
3989 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3990 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3991 * muladd has no writemask, have to use another temp
3992 */
3993 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3995 alu.is_op3 = 1;
3996
3997 alu.src[0].sel = ctx->temp_reg;
3998 alu.src[0].chan = 0;
3999 alu.src[1].sel = ctx->temp_reg;
4000 alu.src[1].chan = 2;
4001
4002 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4003 alu.src[2].chan = 0;
4004 alu.src[2].value = *(uint32_t *)&one_point_five;
4005
4006 alu.dst.sel = ctx->temp_reg;
4007 alu.dst.chan = 0;
4008 alu.dst.write = 1;
4009
4010 r = r600_bytecode_add_alu(ctx->bc, &alu);
4011 if (r)
4012 return r;
4013
4014 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4015 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4016 alu.is_op3 = 1;
4017
4018 alu.src[0].sel = ctx->temp_reg;
4019 alu.src[0].chan = 1;
4020 alu.src[1].sel = ctx->temp_reg;
4021 alu.src[1].chan = 2;
4022
4023 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4024 alu.src[2].chan = 0;
4025 alu.src[2].value = *(uint32_t *)&one_point_five;
4026
4027 alu.dst.sel = ctx->temp_reg;
4028 alu.dst.chan = 1;
4029 alu.dst.write = 1;
4030
4031 alu.last = 1;
4032 r = r600_bytecode_add_alu(ctx->bc, &alu);
4033 if (r)
4034 return r;
4035 /* write initial W value into Z component */
4036 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4037 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4038 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4039 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4040 alu.dst.sel = ctx->temp_reg;
4041 alu.dst.chan = 2;
4042 alu.dst.write = 1;
4043 alu.last = 1;
4044 r = r600_bytecode_add_alu(ctx->bc, &alu);
4045 if (r)
4046 return r;
4047 }
4048 src_loaded = TRUE;
4049 src_gpr = ctx->temp_reg;
4050 }
4051
4052 if (src_requires_loading && !src_loaded) {
4053 for (i = 0; i < 4; i++) {
4054 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4055 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4056 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4057 alu.dst.sel = ctx->temp_reg;
4058 alu.dst.chan = i;
4059 if (i == 3)
4060 alu.last = 1;
4061 alu.dst.write = 1;
4062 r = r600_bytecode_add_alu(ctx->bc, &alu);
4063 if (r)
4064 return r;
4065 }
4066 src_loaded = TRUE;
4067 src_gpr = ctx->temp_reg;
4068 }
4069
4070 opcode = ctx->inst_info->r600_opcode;
4071 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4072 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4073 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4074 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4075 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4076 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4077 switch (opcode) {
4078 case SQ_TEX_INST_SAMPLE:
4079 opcode = SQ_TEX_INST_SAMPLE_C;
4080 break;
4081 case SQ_TEX_INST_SAMPLE_L:
4082 opcode = SQ_TEX_INST_SAMPLE_C_L;
4083 break;
4084 case SQ_TEX_INST_SAMPLE_LB:
4085 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4086 break;
4087 case SQ_TEX_INST_SAMPLE_G:
4088 opcode = SQ_TEX_INST_SAMPLE_C_G;
4089 break;
4090 }
4091 }
4092
4093 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4094 tex.inst = opcode;
4095
4096 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4097 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4098 tex.src_gpr = src_gpr;
4099 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4100 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4101 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4102 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4103 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4104
4105 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4106 tex.src_sel_x = 4;
4107 tex.src_sel_y = 4;
4108 tex.src_sel_z = 4;
4109 tex.src_sel_w = 4;
4110 } else if (src_loaded) {
4111 tex.src_sel_x = 0;
4112 tex.src_sel_y = 1;
4113 tex.src_sel_z = 2;
4114 tex.src_sel_w = 3;
4115 } else {
4116 tex.src_sel_x = ctx->src[0].swizzle[0];
4117 tex.src_sel_y = ctx->src[0].swizzle[1];
4118 tex.src_sel_z = ctx->src[0].swizzle[2];
4119 tex.src_sel_w = ctx->src[0].swizzle[3];
4120 tex.src_rel = ctx->src[0].rel;
4121 }
4122
4123 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4124 tex.src_sel_x = 1;
4125 tex.src_sel_y = 0;
4126 tex.src_sel_z = 3;
4127 tex.src_sel_w = 1;
4128 }
4129 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4130 tex.src_sel_x = 1;
4131 tex.src_sel_y = 0;
4132 tex.src_sel_z = 3;
4133 tex.src_sel_w = 2; /* route Z compare value into W */
4134 }
4135
4136 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4137 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4138 tex.coord_type_x = 1;
4139 tex.coord_type_y = 1;
4140 }
4141 tex.coord_type_z = 1;
4142 tex.coord_type_w = 1;
4143
4144 tex.offset_x = offset_x;
4145 tex.offset_y = offset_y;
4146 tex.offset_z = offset_z;
4147
4148 /* Put the depth for comparison in W.
4149 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4150 * Some instructions expect the depth in Z. */
4151 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4152 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4153 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4154 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4155 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4156 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4157 tex.src_sel_w = tex.src_sel_z;
4158 }
4159
4160 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4161 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4162 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4163 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4164 /* the array index is read from Y */
4165 tex.coord_type_y = 0;
4166 } else {
4167 /* the array index is read from Z */
4168 tex.coord_type_z = 0;
4169 tex.src_sel_z = tex.src_sel_y;
4170 }
4171 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4172 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4173 /* the array index is read from Z */
4174 tex.coord_type_z = 0;
4175
4176 r = r600_bytecode_add_tex(ctx->bc, &tex);
4177 if (r)
4178 return r;
4179
4180 /* add shadow ambient support - gallium doesn't do it yet */
4181 return 0;
4182 }
4183
4184 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4185 {
4186 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4187 struct r600_bytecode_alu alu;
4188 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4189 unsigned i;
4190 int r;
4191
4192 /* optimize if it's just an equal balance */
4193 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4194 for (i = 0; i < lasti + 1; i++) {
4195 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4196 continue;
4197
4198 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4200 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4201 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4202 alu.omod = 3;
4203 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4204 alu.dst.chan = i;
4205 if (i == lasti) {
4206 alu.last = 1;
4207 }
4208 r = r600_bytecode_add_alu(ctx->bc, &alu);
4209 if (r)
4210 return r;
4211 }
4212 return 0;
4213 }
4214
4215 /* 1 - src0 */
4216 for (i = 0; i < lasti + 1; i++) {
4217 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4218 continue;
4219
4220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4222 alu.src[0].sel = V_SQ_ALU_SRC_1;
4223 alu.src[0].chan = 0;
4224 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4225 r600_bytecode_src_toggle_neg(&alu.src[1]);
4226 alu.dst.sel = ctx->temp_reg;
4227 alu.dst.chan = i;
4228 if (i == lasti) {
4229 alu.last = 1;
4230 }
4231 alu.dst.write = 1;
4232 r = r600_bytecode_add_alu(ctx->bc, &alu);
4233 if (r)
4234 return r;
4235 }
4236
4237 /* (1 - src0) * src2 */
4238 for (i = 0; i < lasti + 1; i++) {
4239 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4240 continue;
4241
4242 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4244 alu.src[0].sel = ctx->temp_reg;
4245 alu.src[0].chan = i;
4246 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4247 alu.dst.sel = ctx->temp_reg;
4248 alu.dst.chan = i;
4249 if (i == lasti) {
4250 alu.last = 1;
4251 }
4252 alu.dst.write = 1;
4253 r = r600_bytecode_add_alu(ctx->bc, &alu);
4254 if (r)
4255 return r;
4256 }
4257
4258 /* src0 * src1 + (1 - src0) * src2 */
4259 for (i = 0; i < lasti + 1; i++) {
4260 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4261 continue;
4262
4263 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4264 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4265 alu.is_op3 = 1;
4266 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4267 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4268 alu.src[2].sel = ctx->temp_reg;
4269 alu.src[2].chan = i;
4270
4271 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4272 alu.dst.chan = i;
4273 if (i == lasti) {
4274 alu.last = 1;
4275 }
4276 r = r600_bytecode_add_alu(ctx->bc, &alu);
4277 if (r)
4278 return r;
4279 }
4280 return 0;
4281 }
4282
4283 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4284 {
4285 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4286 struct r600_bytecode_alu alu;
4287 int i, r;
4288 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4289
4290 for (i = 0; i < lasti + 1; i++) {
4291 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4292 continue;
4293
4294 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4295 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4296 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4297 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4298 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4299 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4300 alu.dst.chan = i;
4301 alu.dst.write = 1;
4302 alu.is_op3 = 1;
4303 if (i == lasti)
4304 alu.last = 1;
4305 r = r600_bytecode_add_alu(ctx->bc, &alu);
4306 if (r)
4307 return r;
4308 }
4309 return 0;
4310 }
4311
4312 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4313 {
4314 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4315 static const unsigned int src0_swizzle[] = {2, 0, 1};
4316 static const unsigned int src1_swizzle[] = {1, 2, 0};
4317 struct r600_bytecode_alu alu;
4318 uint32_t use_temp = 0;
4319 int i, r;
4320
4321 if (inst->Dst[0].Register.WriteMask != 0xf)
4322 use_temp = 1;
4323
4324 for (i = 0; i < 4; i++) {
4325 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4326 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4327 if (i < 3) {
4328 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4329 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4330 } else {
4331 alu.src[0].sel = V_SQ_ALU_SRC_0;
4332 alu.src[0].chan = i;
4333 alu.src[1].sel = V_SQ_ALU_SRC_0;
4334 alu.src[1].chan = i;
4335 }
4336
4337 alu.dst.sel = ctx->temp_reg;
4338 alu.dst.chan = i;
4339 alu.dst.write = 1;
4340
4341 if (i == 3)
4342 alu.last = 1;
4343 r = r600_bytecode_add_alu(ctx->bc, &alu);
4344 if (r)
4345 return r;
4346 }
4347
4348 for (i = 0; i < 4; i++) {
4349 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4351
4352 if (i < 3) {
4353 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4354 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4355 } else {
4356 alu.src[0].sel = V_SQ_ALU_SRC_0;
4357 alu.src[0].chan = i;
4358 alu.src[1].sel = V_SQ_ALU_SRC_0;
4359 alu.src[1].chan = i;
4360 }
4361
4362 alu.src[2].sel = ctx->temp_reg;
4363 alu.src[2].neg = 1;
4364 alu.src[2].chan = i;
4365
4366 if (use_temp)
4367 alu.dst.sel = ctx->temp_reg;
4368 else
4369 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4370 alu.dst.chan = i;
4371 alu.dst.write = 1;
4372 alu.is_op3 = 1;
4373 if (i == 3)
4374 alu.last = 1;
4375 r = r600_bytecode_add_alu(ctx->bc, &alu);
4376 if (r)
4377 return r;
4378 }
4379 if (use_temp)
4380 return tgsi_helper_copy(ctx, inst);
4381 return 0;
4382 }
4383
4384 static int tgsi_exp(struct r600_shader_ctx *ctx)
4385 {
4386 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4387 struct r600_bytecode_alu alu;
4388 int r;
4389 int i;
4390
4391 /* result.x = 2^floor(src); */
4392 if (inst->Dst[0].Register.WriteMask & 1) {
4393 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4394
4395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4396 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4397
4398 alu.dst.sel = ctx->temp_reg;
4399 alu.dst.chan = 0;
4400 alu.dst.write = 1;
4401 alu.last = 1;
4402 r = r600_bytecode_add_alu(ctx->bc, &alu);
4403 if (r)
4404 return r;
4405
4406 if (ctx->bc->chip_class == CAYMAN) {
4407 for (i = 0; i < 3; i++) {
4408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4409 alu.src[0].sel = ctx->temp_reg;
4410 alu.src[0].chan = 0;
4411
4412 alu.dst.sel = ctx->temp_reg;
4413 alu.dst.chan = i;
4414 alu.dst.write = i == 0;
4415 alu.last = i == 2;
4416 r = r600_bytecode_add_alu(ctx->bc, &alu);
4417 if (r)
4418 return r;
4419 }
4420 } else {
4421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4422 alu.src[0].sel = ctx->temp_reg;
4423 alu.src[0].chan = 0;
4424
4425 alu.dst.sel = ctx->temp_reg;
4426 alu.dst.chan = 0;
4427 alu.dst.write = 1;
4428 alu.last = 1;
4429 r = r600_bytecode_add_alu(ctx->bc, &alu);
4430 if (r)
4431 return r;
4432 }
4433 }
4434
4435 /* result.y = tmp - floor(tmp); */
4436 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4437 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4438
4439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4440 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4441
4442 alu.dst.sel = ctx->temp_reg;
4443 #if 0
4444 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4445 if (r)
4446 return r;
4447 #endif
4448 alu.dst.write = 1;
4449 alu.dst.chan = 1;
4450
4451 alu.last = 1;
4452
4453 r = r600_bytecode_add_alu(ctx->bc, &alu);
4454 if (r)
4455 return r;
4456 }
4457
4458 /* result.z = RoughApprox2ToX(tmp);*/
4459 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4460 if (ctx->bc->chip_class == CAYMAN) {
4461 for (i = 0; i < 3; i++) {
4462 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4464 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4465
4466 alu.dst.sel = ctx->temp_reg;
4467 alu.dst.chan = i;
4468 if (i == 2) {
4469 alu.dst.write = 1;
4470 alu.last = 1;
4471 }
4472
4473 r = r600_bytecode_add_alu(ctx->bc, &alu);
4474 if (r)
4475 return r;
4476 }
4477 } else {
4478 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4480 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4481
4482 alu.dst.sel = ctx->temp_reg;
4483 alu.dst.write = 1;
4484 alu.dst.chan = 2;
4485
4486 alu.last = 1;
4487
4488 r = r600_bytecode_add_alu(ctx->bc, &alu);
4489 if (r)
4490 return r;
4491 }
4492 }
4493
4494 /* result.w = 1.0;*/
4495 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4496 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4497
4498 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4499 alu.src[0].sel = V_SQ_ALU_SRC_1;
4500 alu.src[0].chan = 0;
4501
4502 alu.dst.sel = ctx->temp_reg;
4503 alu.dst.chan = 3;
4504 alu.dst.write = 1;
4505 alu.last = 1;
4506 r = r600_bytecode_add_alu(ctx->bc, &alu);
4507 if (r)
4508 return r;
4509 }
4510 return tgsi_helper_copy(ctx, inst);
4511 }
4512
4513 static int tgsi_log(struct r600_shader_ctx *ctx)
4514 {
4515 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4516 struct r600_bytecode_alu alu;
4517 int r;
4518 int i;
4519
4520 /* result.x = floor(log2(|src|)); */
4521 if (inst->Dst[0].Register.WriteMask & 1) {
4522 if (ctx->bc->chip_class == CAYMAN) {
4523 for (i = 0; i < 3; i++) {
4524 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4525
4526 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4527 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4528 r600_bytecode_src_set_abs(&alu.src[0]);
4529
4530 alu.dst.sel = ctx->temp_reg;
4531 alu.dst.chan = i;
4532 if (i == 0)
4533 alu.dst.write = 1;
4534 if (i == 2)
4535 alu.last = 1;
4536 r = r600_bytecode_add_alu(ctx->bc, &alu);
4537 if (r)
4538 return r;
4539 }
4540
4541 } else {
4542 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4543
4544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4545 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4546 r600_bytecode_src_set_abs(&alu.src[0]);
4547
4548 alu.dst.sel = ctx->temp_reg;
4549 alu.dst.chan = 0;
4550 alu.dst.write = 1;
4551 alu.last = 1;
4552 r = r600_bytecode_add_alu(ctx->bc, &alu);
4553 if (r)
4554 return r;
4555 }
4556
4557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4558 alu.src[0].sel = ctx->temp_reg;
4559 alu.src[0].chan = 0;
4560
4561 alu.dst.sel = ctx->temp_reg;
4562 alu.dst.chan = 0;
4563 alu.dst.write = 1;
4564 alu.last = 1;
4565
4566 r = r600_bytecode_add_alu(ctx->bc, &alu);
4567 if (r)
4568 return r;
4569 }
4570
4571 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4572 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4573
4574 if (ctx->bc->chip_class == CAYMAN) {
4575 for (i = 0; i < 3; i++) {
4576 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4577
4578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4579 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4580 r600_bytecode_src_set_abs(&alu.src[0]);
4581
4582 alu.dst.sel = ctx->temp_reg;
4583 alu.dst.chan = i;
4584 if (i == 1)
4585 alu.dst.write = 1;
4586 if (i == 2)
4587 alu.last = 1;
4588
4589 r = r600_bytecode_add_alu(ctx->bc, &alu);
4590 if (r)
4591 return r;
4592 }
4593 } else {
4594 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4595
4596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4597 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4598 r600_bytecode_src_set_abs(&alu.src[0]);
4599
4600 alu.dst.sel = ctx->temp_reg;
4601 alu.dst.chan = 1;
4602 alu.dst.write = 1;
4603 alu.last = 1;
4604
4605 r = r600_bytecode_add_alu(ctx->bc, &alu);
4606 if (r)
4607 return r;
4608 }
4609
4610 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4611
4612 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4613 alu.src[0].sel = ctx->temp_reg;
4614 alu.src[0].chan = 1;
4615
4616 alu.dst.sel = ctx->temp_reg;
4617 alu.dst.chan = 1;
4618 alu.dst.write = 1;
4619 alu.last = 1;
4620
4621 r = r600_bytecode_add_alu(ctx->bc, &alu);
4622 if (r)
4623 return r;
4624
4625 if (ctx->bc->chip_class == CAYMAN) {
4626 for (i = 0; i < 3; i++) {
4627 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4629 alu.src[0].sel = ctx->temp_reg;
4630 alu.src[0].chan = 1;
4631
4632 alu.dst.sel = ctx->temp_reg;
4633 alu.dst.chan = i;
4634 if (i == 1)
4635 alu.dst.write = 1;
4636 if (i == 2)
4637 alu.last = 1;
4638
4639 r = r600_bytecode_add_alu(ctx->bc, &alu);
4640 if (r)
4641 return r;
4642 }
4643 } else {
4644 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4645 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4646 alu.src[0].sel = ctx->temp_reg;
4647 alu.src[0].chan = 1;
4648
4649 alu.dst.sel = ctx->temp_reg;
4650 alu.dst.chan = 1;
4651 alu.dst.write = 1;
4652 alu.last = 1;
4653
4654 r = r600_bytecode_add_alu(ctx->bc, &alu);
4655 if (r)
4656 return r;
4657 }
4658
4659 if (ctx->bc->chip_class == CAYMAN) {
4660 for (i = 0; i < 3; i++) {
4661 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4663 alu.src[0].sel = ctx->temp_reg;
4664 alu.src[0].chan = 1;
4665
4666 alu.dst.sel = ctx->temp_reg;
4667 alu.dst.chan = i;
4668 if (i == 1)
4669 alu.dst.write = 1;
4670 if (i == 2)
4671 alu.last = 1;
4672
4673 r = r600_bytecode_add_alu(ctx->bc, &alu);
4674 if (r)
4675 return r;
4676 }
4677 } else {
4678 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4679 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4680 alu.src[0].sel = ctx->temp_reg;
4681 alu.src[0].chan = 1;
4682
4683 alu.dst.sel = ctx->temp_reg;
4684 alu.dst.chan = 1;
4685 alu.dst.write = 1;
4686 alu.last = 1;
4687
4688 r = r600_bytecode_add_alu(ctx->bc, &alu);
4689 if (r)
4690 return r;
4691 }
4692
4693 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4694
4695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4696
4697 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4698 r600_bytecode_src_set_abs(&alu.src[0]);
4699
4700 alu.src[1].sel = ctx->temp_reg;
4701 alu.src[1].chan = 1;
4702
4703 alu.dst.sel = ctx->temp_reg;
4704 alu.dst.chan = 1;
4705 alu.dst.write = 1;
4706 alu.last = 1;
4707
4708 r = r600_bytecode_add_alu(ctx->bc, &alu);
4709 if (r)
4710 return r;
4711 }
4712
4713 /* result.z = log2(|src|);*/
4714 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4715 if (ctx->bc->chip_class == CAYMAN) {
4716 for (i = 0; i < 3; i++) {
4717 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4718
4719 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4720 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4721 r600_bytecode_src_set_abs(&alu.src[0]);
4722
4723 alu.dst.sel = ctx->temp_reg;
4724 if (i == 2)
4725 alu.dst.write = 1;
4726 alu.dst.chan = i;
4727 if (i == 2)
4728 alu.last = 1;
4729
4730 r = r600_bytecode_add_alu(ctx->bc, &alu);
4731 if (r)
4732 return r;
4733 }
4734 } else {
4735 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4736
4737 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4738 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4739 r600_bytecode_src_set_abs(&alu.src[0]);
4740
4741 alu.dst.sel = ctx->temp_reg;
4742 alu.dst.write = 1;
4743 alu.dst.chan = 2;
4744 alu.last = 1;
4745
4746 r = r600_bytecode_add_alu(ctx->bc, &alu);
4747 if (r)
4748 return r;
4749 }
4750 }
4751
4752 /* result.w = 1.0; */
4753 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4754 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4755
4756 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4757 alu.src[0].sel = V_SQ_ALU_SRC_1;
4758 alu.src[0].chan = 0;
4759
4760 alu.dst.sel = ctx->temp_reg;
4761 alu.dst.chan = 3;
4762 alu.dst.write = 1;
4763 alu.last = 1;
4764
4765 r = r600_bytecode_add_alu(ctx->bc, &alu);
4766 if (r)
4767 return r;
4768 }
4769
4770 return tgsi_helper_copy(ctx, inst);
4771 }
4772
4773 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4774 {
4775 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4776 struct r600_bytecode_alu alu;
4777 int r;
4778
4779 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4780
4781 switch (inst->Instruction.Opcode) {
4782 case TGSI_OPCODE_ARL:
4783 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4784 break;
4785 case TGSI_OPCODE_ARR:
4786 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4787 break;
4788 case TGSI_OPCODE_UARL:
4789 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4790 break;
4791 default:
4792 assert(0);
4793 return -1;
4794 }
4795
4796 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4797 alu.last = 1;
4798 alu.dst.sel = ctx->bc->ar_reg;
4799 alu.dst.write = 1;
4800 r = r600_bytecode_add_alu(ctx->bc, &alu);
4801 if (r)
4802 return r;
4803
4804 ctx->bc->ar_loaded = 0;
4805 return 0;
4806 }
4807 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4808 {
4809 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4810 struct r600_bytecode_alu alu;
4811 int r;
4812
4813 switch (inst->Instruction.Opcode) {
4814 case TGSI_OPCODE_ARL:
4815 memset(&alu, 0, sizeof(alu));
4816 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4817 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4818 alu.dst.sel = ctx->bc->ar_reg;
4819 alu.dst.write = 1;
4820 alu.last = 1;
4821
4822 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4823 return r;
4824
4825 memset(&alu, 0, sizeof(alu));
4826 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4827 alu.src[0].sel = ctx->bc->ar_reg;
4828 alu.dst.sel = ctx->bc->ar_reg;
4829 alu.dst.write = 1;
4830 alu.last = 1;
4831
4832 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4833 return r;
4834 break;
4835 case TGSI_OPCODE_ARR:
4836 memset(&alu, 0, sizeof(alu));
4837 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4838 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4839 alu.dst.sel = ctx->bc->ar_reg;
4840 alu.dst.write = 1;
4841 alu.last = 1;
4842
4843 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4844 return r;
4845 break;
4846 case TGSI_OPCODE_UARL:
4847 memset(&alu, 0, sizeof(alu));
4848 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4849 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4850 alu.dst.sel = ctx->bc->ar_reg;
4851 alu.dst.write = 1;
4852 alu.last = 1;
4853
4854 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4855 return r;
4856 break;
4857 default:
4858 assert(0);
4859 return -1;
4860 }
4861
4862 ctx->bc->ar_loaded = 0;
4863 return 0;
4864 }
4865
4866 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4867 {
4868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4869 struct r600_bytecode_alu alu;
4870 int i, r = 0;
4871
4872 for (i = 0; i < 4; i++) {
4873 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4874
4875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4876 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4877
4878 if (i == 0 || i == 3) {
4879 alu.src[0].sel = V_SQ_ALU_SRC_1;
4880 } else {
4881 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4882 }
4883
4884 if (i == 0 || i == 2) {
4885 alu.src[1].sel = V_SQ_ALU_SRC_1;
4886 } else {
4887 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4888 }
4889 if (i == 3)
4890 alu.last = 1;
4891 r = r600_bytecode_add_alu(ctx->bc, &alu);
4892 if (r)
4893 return r;
4894 }
4895 return 0;
4896 }
4897
4898 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4899 {
4900 struct r600_bytecode_alu alu;
4901 int r;
4902
4903 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4904 alu.inst = opcode;
4905 alu.execute_mask = 1;
4906 alu.update_pred = 1;
4907
4908 alu.dst.sel = ctx->temp_reg;
4909 alu.dst.write = 1;
4910 alu.dst.chan = 0;
4911
4912 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4913 alu.src[1].sel = V_SQ_ALU_SRC_0;
4914 alu.src[1].chan = 0;
4915
4916 alu.last = 1;
4917
4918 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4919 if (r)
4920 return r;
4921 return 0;
4922 }
4923
4924 static int pops(struct r600_shader_ctx *ctx, int pops)
4925 {
4926 unsigned force_pop = ctx->bc->force_add_cf;
4927
4928 if (!force_pop) {
4929 int alu_pop = 3;
4930 if (ctx->bc->cf_last) {
4931 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4932 alu_pop = 0;
4933 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4934 alu_pop = 1;
4935 }
4936 alu_pop += pops;
4937 if (alu_pop == 1) {
4938 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4939 ctx->bc->force_add_cf = 1;
4940 } else if (alu_pop == 2) {
4941 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4942 ctx->bc->force_add_cf = 1;
4943 } else {
4944 force_pop = 1;
4945 }
4946 }
4947
4948 if (force_pop) {
4949 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4950 ctx->bc->cf_last->pop_count = pops;
4951 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4952 }
4953
4954 return 0;
4955 }
4956
4957 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4958 {
4959 switch(reason) {
4960 case FC_PUSH_VPM:
4961 ctx->bc->callstack[ctx->bc->call_sp].current--;
4962 break;
4963 case FC_PUSH_WQM:
4964 case FC_LOOP:
4965 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4966 break;
4967 case FC_REP:
4968 /* TOODO : for 16 vp asic should -= 2; */
4969 ctx->bc->callstack[ctx->bc->call_sp].current --;
4970 break;
4971 }
4972 }
4973
4974 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4975 {
4976 if (check_max_only) {
4977 int diff;
4978 switch (reason) {
4979 case FC_PUSH_VPM:
4980 diff = 1;
4981 break;
4982 case FC_PUSH_WQM:
4983 diff = 4;
4984 break;
4985 default:
4986 assert(0);
4987 diff = 0;
4988 }
4989 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4990 ctx->bc->callstack[ctx->bc->call_sp].max) {
4991 ctx->bc->callstack[ctx->bc->call_sp].max =
4992 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4993 }
4994 return;
4995 }
4996 switch (reason) {
4997 case FC_PUSH_VPM:
4998 ctx->bc->callstack[ctx->bc->call_sp].current++;
4999 break;
5000 case FC_PUSH_WQM:
5001 case FC_LOOP:
5002 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
5003 break;
5004 case FC_REP:
5005 ctx->bc->callstack[ctx->bc->call_sp].current++;
5006 break;
5007 }
5008
5009 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
5010 ctx->bc->callstack[ctx->bc->call_sp].max) {
5011 ctx->bc->callstack[ctx->bc->call_sp].max =
5012 ctx->bc->callstack[ctx->bc->call_sp].current;
5013 }
5014 }
5015
5016 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
5017 {
5018 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
5019
5020 sp->mid = realloc((void *)sp->mid,
5021 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5022 sp->mid[sp->num_mid] = ctx->bc->cf_last;
5023 sp->num_mid++;
5024 }
5025
5026 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5027 {
5028 ctx->bc->fc_sp++;
5029 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5030 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5031 }
5032
5033 static void fc_poplevel(struct r600_shader_ctx *ctx)
5034 {
5035 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5036 free(sp->mid);
5037 sp->mid = NULL;
5038 sp->num_mid = 0;
5039 sp->start = NULL;
5040 sp->type = 0;
5041 ctx->bc->fc_sp--;
5042 }
5043
5044 #if 0
5045 static int emit_return(struct r600_shader_ctx *ctx)
5046 {
5047 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5048 return 0;
5049 }
5050
5051 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5052 {
5053
5054 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5055 ctx->bc->cf_last->pop_count = pops;
5056 /* XXX work out offset */
5057 return 0;
5058 }
5059
5060 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5061 {
5062 return 0;
5063 }
5064
5065 static void emit_testflag(struct r600_shader_ctx *ctx)
5066 {
5067
5068 }
5069
5070 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5071 {
5072 emit_testflag(ctx);
5073 emit_jump_to_offset(ctx, 1, 4);
5074 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5075 pops(ctx, ifidx + 1);
5076 emit_return(ctx);
5077 }
5078
5079 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5080 {
5081 emit_testflag(ctx);
5082
5083 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5084 ctx->bc->cf_last->pop_count = 1;
5085
5086 fc_set_mid(ctx, fc_sp);
5087
5088 pops(ctx, 1);
5089 }
5090 #endif
5091
5092 static int tgsi_if(struct r600_shader_ctx *ctx)
5093 {
5094 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5095
5096 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5097
5098 fc_pushlevel(ctx, FC_IF);
5099
5100 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5101 return 0;
5102 }
5103
5104 static int tgsi_else(struct r600_shader_ctx *ctx)
5105 {
5106 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5107 ctx->bc->cf_last->pop_count = 1;
5108
5109 fc_set_mid(ctx, ctx->bc->fc_sp);
5110 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5111 return 0;
5112 }
5113
5114 static int tgsi_endif(struct r600_shader_ctx *ctx)
5115 {
5116 pops(ctx, 1);
5117 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5118 R600_ERR("if/endif unbalanced in shader\n");
5119 return -1;
5120 }
5121
5122 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5123 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5124 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5125 } else {
5126 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5127 }
5128 fc_poplevel(ctx);
5129
5130 callstack_decrease_current(ctx, FC_PUSH_VPM);
5131 return 0;
5132 }
5133
5134 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5135 {
5136 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5137 * limited to 4096 iterations, like the other LOOP_* instructions. */
5138 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5139
5140 fc_pushlevel(ctx, FC_LOOP);
5141
5142 /* check stack depth */
5143 callstack_check_depth(ctx, FC_LOOP, 0);
5144 return 0;
5145 }
5146
5147 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5148 {
5149 int i;
5150
5151 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5152
5153 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5154 R600_ERR("loop/endloop in shader code are not paired.\n");
5155 return -EINVAL;
5156 }
5157
5158 /* fixup loop pointers - from r600isa
5159 LOOP END points to CF after LOOP START,
5160 LOOP START point to CF after LOOP END
5161 BRK/CONT point to LOOP END CF
5162 */
5163 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5164
5165 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5166
5167 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5168 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5169 }
5170 /* XXX add LOOPRET support */
5171 fc_poplevel(ctx);
5172 callstack_decrease_current(ctx, FC_LOOP);
5173 return 0;
5174 }
5175
5176 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5177 {
5178 unsigned int fscp;
5179
5180 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5181 {
5182 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5183 break;
5184 }
5185
5186 if (fscp == 0) {
5187 R600_ERR("Break not inside loop/endloop pair\n");
5188 return -EINVAL;
5189 }
5190
5191 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5192
5193 fc_set_mid(ctx, fscp);
5194
5195 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5196 return 0;
5197 }
5198
5199 static int tgsi_umad(struct r600_shader_ctx *ctx)
5200 {
5201 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5202 struct r600_bytecode_alu alu;
5203 int i, j, r;
5204 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5205
5206 /* src0 * src1 */
5207 for (i = 0; i < lasti + 1; i++) {
5208 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5209 continue;
5210
5211 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5212
5213 alu.dst.chan = i;
5214 alu.dst.sel = ctx->temp_reg;
5215 alu.dst.write = 1;
5216
5217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5218 for (j = 0; j < 2; j++) {
5219 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5220 }
5221
5222 alu.last = 1;
5223 r = r600_bytecode_add_alu(ctx->bc, &alu);
5224 if (r)
5225 return r;
5226 }
5227
5228
5229 for (i = 0; i < lasti + 1; i++) {
5230 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5231 continue;
5232
5233 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5234 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5235
5236 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5237
5238 alu.src[0].sel = ctx->temp_reg;
5239 alu.src[0].chan = i;
5240
5241 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5242 if (i == lasti) {
5243 alu.last = 1;
5244 }
5245 r = r600_bytecode_add_alu(ctx->bc, &alu);
5246 if (r)
5247 return r;
5248 }
5249 return 0;
5250 }
5251
5252 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5253 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5254 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5255 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5256
5257 /* XXX:
5258 * For state trackers other than OpenGL, we'll want to use
5259 * _RECIP_IEEE instead.
5260 */
5261 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5262
5263 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5264 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5265 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5266 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5267 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5268 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5269 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5270 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5271 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5272 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5273 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5274 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5275 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5276 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5277 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5278 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5279 /* gap */
5280 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5281 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5282 /* gap */
5283 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5286 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5287 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5288 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5289 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5290 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5291 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5292 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5293 /* gap */
5294 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5295 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5296 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5297 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5298 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5299 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5300 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5301 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5302 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5303 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5305 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5307 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5308 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5309 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5310 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5311 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5312 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5313 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5314 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5315 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5316 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5317 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5318 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5319 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5320 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5321 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5322 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5323 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5324 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5325 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5326 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5327 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5328 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5329 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5330 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5331 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5332 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5333 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5334 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5335 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5336 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5337 /* gap */
5338 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5339 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5340 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5341 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5342 /* gap */
5343 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5344 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5345 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5346 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5348 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5349 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5350 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5351 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5352 /* gap */
5353 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5354 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5355 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5356 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5357 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5358 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5359 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5360 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5361 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5362 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5363 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5364 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5365 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5366 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5367 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5368 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5369 /* gap */
5370 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5371 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5372 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5373 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5374 /* gap */
5375 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5376 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5377 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5378 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5379 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5380 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5381 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5382 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5383 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5384 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5385 /* gap */
5386 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5387 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5388 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5389 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5390 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5391 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5392 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5393 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5394 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5395 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5396 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5397 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5398 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5399 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5400 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5401 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5402 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5403 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5404 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5405 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5406 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5407 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5408 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5409 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5410 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5411 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5412 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5413 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5414 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5415 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5416 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5417 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5418 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5419 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5420 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5421 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5422 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5423 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5424 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5425 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5426 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5427 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5428 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5429 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5430 };
5431
5432 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5433 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5434 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5435 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5436 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5437 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5438 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5439 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5440 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5441 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5442 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5443 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5444 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5445 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5446 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5447 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5448 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5449 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5450 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5451 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5452 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5453 /* gap */
5454 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5455 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5456 /* gap */
5457 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5460 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5461 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5462 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5463 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5464 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5465 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5466 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5467 /* gap */
5468 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5469 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5470 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5472 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5473 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5474 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5475 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5476 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5477 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5479 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5481 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5482 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5484 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5485 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5486 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5487 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5488 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5489 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5490 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5491 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5492 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5493 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5494 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5495 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5496 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5497 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5498 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5499 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5500 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5502 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5503 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5504 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5505 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5506 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5507 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5508 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5509 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5510 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5511 /* gap */
5512 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5513 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5514 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5515 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5516 /* gap */
5517 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5518 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5519 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5520 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5522 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5523 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5524 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5525 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5526 /* gap */
5527 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5529 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5530 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5531 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5532 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5534 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5535 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5536 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5537 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5538 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5539 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5540 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5541 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5542 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5543 /* gap */
5544 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5545 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5546 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5547 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5548 /* gap */
5549 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5550 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5551 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5552 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5553 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5554 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5555 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5556 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5557 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5558 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5559 /* gap */
5560 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5561 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5562 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5563 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5564 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5565 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5566 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5567 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5568 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5569 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5570 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5571 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5572 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5573 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5574 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5575 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5576 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5577 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5578 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5579 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5580 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5581 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5582 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5583 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5584 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5585 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5586 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5587 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5588 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5589 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5590 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5591 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5592 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5593 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5594 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5595 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5596 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5597 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5598 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5599 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5600 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5601 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5602 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5603 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5604 };
5605
5606 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5607 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5608 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5609 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5610 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5611 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5612 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5613 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5614 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5615 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5616 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5617 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5618 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5619 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5620 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5621 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5622 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5623 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5624 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5625 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5626 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5627 /* gap */
5628 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5629 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5630 /* gap */
5631 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5634 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5635 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5636 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5637 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5638 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5639 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5640 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5641 /* gap */
5642 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5643 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5644 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5646 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5647 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5648 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5649 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5650 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5651 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5653 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5655 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5656 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5657 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5658 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5659 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5660 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5661 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5663 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5664 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5665 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5666 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5667 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5668 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5669 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5670 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5671 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5672 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5674 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5675 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5676 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5677 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5678 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5679 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5680 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5681 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5682 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5683 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5684 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5685 /* gap */
5686 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5687 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5688 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5689 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5690 /* gap */
5691 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5692 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5693 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5694 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5696 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5697 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5698 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5699 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5700 /* gap */
5701 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5702 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5703 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5704 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5705 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5706 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5707 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5708 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5709 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5710 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5711 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5712 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5713 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5714 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5715 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5717 /* gap */
5718 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5719 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5720 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5721 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5722 /* gap */
5723 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5724 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5725 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5726 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5727 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5728 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5729 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5730 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5731 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5732 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5733 /* gap */
5734 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5735 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5736 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5737 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5738 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5739 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5740 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5741 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5742 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5743 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5744 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5745 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5746 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5747 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5748 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5749 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5750 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5751 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5752 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5753 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5754 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5755 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5756 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5757 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5758 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5759 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5760 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5761 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5762 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5763 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5764 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5765 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5766 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5767 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5768 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5769 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5770 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5771 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5772 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5773 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5774 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5775 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5776 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5777 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5778 };