r600g: fix RSQ of negative value on Cayman
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
107 struct r600_pipe_shader *pipeshader,
108 struct r600_shader_key key);
109
110 int r600_pipe_shader_create(struct pipe_context *ctx,
111 struct r600_pipe_shader *shader,
112 struct r600_shader_key key)
113 {
114 static int dump_shaders = -1;
115 struct r600_context *rctx = (struct r600_context *)ctx;
116 struct r600_pipe_shader_selector *sel = shader->selector;
117 int r;
118
119 /* Would like some magic "get_bool_option_once" routine.
120 */
121 if (dump_shaders == -1)
122 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
123
124 if (dump_shaders) {
125 fprintf(stderr, "--------------------------------------------------------------\n");
126 tgsi_dump(sel->tokens, 0);
127
128 if (sel->so.num_outputs) {
129 unsigned i;
130 fprintf(stderr, "STREAMOUT\n");
131 for (i = 0; i < sel->so.num_outputs; i++) {
132 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
133 sel->so.output[i].start_component;
134 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
135 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
136 mask & 1 ? "x" : "_",
137 (mask >> 1) & 1 ? "y" : "_",
138 (mask >> 2) & 1 ? "z" : "_",
139 (mask >> 3) & 1 ? "w" : "_");
140 }
141 }
142 }
143 r = r600_shader_from_tgsi(rctx->screen, shader, key);
144 if (r) {
145 R600_ERR("translation from TGSI failed !\n");
146 return r;
147 }
148 r = r600_bytecode_build(&shader->shader.bc);
149 if (r) {
150 R600_ERR("building bytecode failed !\n");
151 return r;
152 }
153 if (dump_shaders) {
154 r600_bytecode_dump(&shader->shader.bc);
155 fprintf(stderr, "______________________________________________________________\n");
156 }
157 return r600_pipe_shader(ctx, shader);
158 }
159
160 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
161 {
162 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
163 r600_bytecode_clear(&shader->shader.bc);
164 }
165
166 /*
167 * tgsi -> r600 shader
168 */
169 struct r600_shader_tgsi_instruction;
170
171 struct r600_shader_src {
172 unsigned sel;
173 unsigned swizzle[4];
174 unsigned neg;
175 unsigned abs;
176 unsigned rel;
177 uint32_t value[4];
178 };
179
180 struct r600_shader_ctx {
181 struct tgsi_shader_info info;
182 struct tgsi_parse_context parse;
183 const struct tgsi_token *tokens;
184 unsigned type;
185 unsigned file_offset[TGSI_FILE_COUNT];
186 unsigned temp_reg;
187 struct r600_shader_tgsi_instruction *inst_info;
188 struct r600_bytecode *bc;
189 struct r600_shader *shader;
190 struct r600_shader_src src[4];
191 uint32_t *literals;
192 uint32_t nliterals;
193 uint32_t max_driver_temp_used;
194 /* needed for evergreen interpolation */
195 boolean input_centroid;
196 boolean input_linear;
197 boolean input_perspective;
198 int num_interp_gpr;
199 int face_gpr;
200 int colors_used;
201 boolean clip_vertex_write;
202 unsigned cv_output;
203 int fragcoord_input;
204 int native_integers;
205 };
206
207 struct r600_shader_tgsi_instruction {
208 unsigned tgsi_opcode;
209 unsigned is_op3;
210 unsigned r600_opcode;
211 int (*process)(struct r600_shader_ctx *ctx);
212 };
213
214 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
215 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
216 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
217 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
218 static int tgsi_else(struct r600_shader_ctx *ctx);
219 static int tgsi_endif(struct r600_shader_ctx *ctx);
220 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
221 static int tgsi_endloop(struct r600_shader_ctx *ctx);
222 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
223
224 /*
225 * bytestream -> r600 shader
226 *
227 * These functions are used to transform the output of the LLVM backend into
228 * struct r600_bytecode.
229 */
230
231 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
232 unsigned char * bytes, unsigned num_bytes);
233
234 #ifdef HAVE_OPENCL
235 int r600_compute_shader_create(struct pipe_context * ctx,
236 LLVMModuleRef mod, struct r600_bytecode * bytecode)
237 {
238 struct r600_context *r600_ctx = (struct r600_context *)ctx;
239 unsigned char * bytes;
240 unsigned byte_count;
241 struct r600_shader_ctx shader_ctx;
242 unsigned dump = 0;
243
244 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
245 dump = 1;
246 }
247
248 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
249 shader_ctx.bc = bytecode;
250 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
251 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
252 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
253 if (shader_ctx.bc->chip_class == CAYMAN) {
254 cm_bytecode_add_cf_end(shader_ctx.bc);
255 }
256 r600_bytecode_build(shader_ctx.bc);
257 if (dump) {
258 r600_bytecode_dump(shader_ctx.bc);
259 }
260 free(bytes);
261 return 1;
262 }
263
264 #endif /* HAVE_OPENCL */
265
266 static uint32_t i32_from_byte_stream(unsigned char * bytes,
267 unsigned * bytes_read)
268 {
269 unsigned i;
270 uint32_t out = 0;
271 for (i = 0; i < 4; i++) {
272 out |= bytes[(*bytes_read)++] << (8 * i);
273 }
274 return out;
275 }
276
277 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
278 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
279 {
280 unsigned i;
281 unsigned sel0, sel1;
282 sel0 = bytes[bytes_read++];
283 sel1 = bytes[bytes_read++];
284 alu->src[src_idx].sel = sel0 | (sel1 << 8);
285 alu->src[src_idx].chan = bytes[bytes_read++];
286 alu->src[src_idx].neg = bytes[bytes_read++];
287 alu->src[src_idx].abs = bytes[bytes_read++];
288 alu->src[src_idx].rel = bytes[bytes_read++];
289 alu->src[src_idx].kc_bank = bytes[bytes_read++];
290 for (i = 0; i < 4; i++) {
291 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
292 }
293 return bytes_read;
294 }
295
296 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
297 unsigned char * bytes, unsigned bytes_read)
298 {
299 unsigned src_idx;
300 struct r600_bytecode_alu alu;
301 unsigned src_const_reg[3];
302 uint32_t word0, word1;
303
304 memset(&alu, 0, sizeof(alu));
305 for(src_idx = 0; src_idx < 3; src_idx++) {
306 unsigned i;
307 src_const_reg[src_idx] = bytes[bytes_read++];
308 for (i = 0; i < 4; i++) {
309 alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
310 }
311 }
312
313 word0 = i32_from_byte_stream(bytes, &bytes_read);
314 word1 = i32_from_byte_stream(bytes, &bytes_read);
315
316 switch(ctx->bc->chip_class) {
317 case R600:
318 r600_bytecode_alu_read(&alu, word0, word1);
319 break;
320 case R700:
321 case EVERGREEN:
322 case CAYMAN:
323 r700_bytecode_alu_read(&alu, word0, word1);
324 break;
325 }
326
327 for(src_idx = 0; src_idx < 3; src_idx++) {
328 if (src_const_reg[src_idx])
329 alu.src[src_idx].sel += 512;
330 }
331
332 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
333 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
334 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
335 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
336 alu.update_pred = 1;
337 alu.dst.write = 0;
338 alu.src[1].sel = V_SQ_ALU_SRC_0;
339 alu.src[1].chan = 0;
340 alu.last = 1;
341 }
342
343 if (alu.execute_mask) {
344 alu.pred_sel = 0;
345 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
346 } else {
347 r600_bytecode_add_alu(ctx->bc, &alu);
348 }
349
350 /* XXX: Handle other KILL instructions */
351 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
352 ctx->shader->uses_kill = 1;
353 /* XXX: This should be enforced in the LLVM backend. */
354 ctx->bc->force_add_cf = 1;
355 }
356 return bytes_read;
357 }
358
359 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
360 unsigned pred_inst)
361 {
362 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
363 fc_pushlevel(ctx, FC_IF);
364 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
365 }
366
367 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
368 struct r600_bytecode_alu *alu, unsigned compare_opcode)
369 {
370 unsigned opcode = TGSI_OPCODE_BRK;
371 if (ctx->bc->chip_class == CAYMAN)
372 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
373 else if (ctx->bc->chip_class >= EVERGREEN)
374 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
375 else
376 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
377 llvm_if(ctx, alu, compare_opcode);
378 tgsi_loop_brk_cont(ctx);
379 tgsi_endif(ctx);
380 }
381
382 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
383 unsigned char * bytes, unsigned bytes_read)
384 {
385 struct r600_bytecode_alu alu;
386 unsigned inst;
387 memset(&alu, 0, sizeof(alu));
388 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
389 inst = bytes[bytes_read++];
390 switch (inst) {
391 case 0: /* FC_IF */
392 llvm_if(ctx, &alu,
393 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
394 break;
395 case 1: /* FC_IF_INT */
396 llvm_if(ctx, &alu,
397 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
398 break;
399 case 2: /* FC_ELSE */
400 tgsi_else(ctx);
401 break;
402 case 3: /* FC_ENDIF */
403 tgsi_endif(ctx);
404 break;
405 case 4: /* FC_BGNLOOP */
406 tgsi_bgnloop(ctx);
407 break;
408 case 5: /* FC_ENDLOOP */
409 tgsi_endloop(ctx);
410 break;
411 case 6: /* FC_BREAK */
412 r600_break_from_byte_stream(ctx, &alu,
413 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
414 break;
415 case 7: /* FC_BREAK_NZ_INT */
416 r600_break_from_byte_stream(ctx, &alu,
417 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
418 break;
419 case 8: /* FC_CONTINUE */
420 {
421 unsigned opcode = TGSI_OPCODE_CONT;
422 if (ctx->bc->chip_class == CAYMAN) {
423 ctx->inst_info =
424 &cm_shader_tgsi_instruction[opcode];
425 } else if (ctx->bc->chip_class >= EVERGREEN) {
426 ctx->inst_info =
427 &eg_shader_tgsi_instruction[opcode];
428 } else {
429 ctx->inst_info =
430 &r600_shader_tgsi_instruction[opcode];
431 }
432 tgsi_loop_brk_cont(ctx);
433 }
434 break;
435 case 9: /* FC_BREAK_Z_INT */
436 r600_break_from_byte_stream(ctx, &alu,
437 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
438 break;
439 case 10: /* FC_BREAK_NZ */
440 r600_break_from_byte_stream(ctx, &alu,
441 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
442 break;
443 }
444
445 return bytes_read;
446 }
447
448 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
449 unsigned char * bytes, unsigned bytes_read)
450 {
451 struct r600_bytecode_tex tex;
452
453 tex.inst = bytes[bytes_read++];
454 tex.resource_id = bytes[bytes_read++];
455 tex.src_gpr = bytes[bytes_read++];
456 tex.src_rel = bytes[bytes_read++];
457 tex.dst_gpr = bytes[bytes_read++];
458 tex.dst_rel = bytes[bytes_read++];
459 tex.dst_sel_x = bytes[bytes_read++];
460 tex.dst_sel_y = bytes[bytes_read++];
461 tex.dst_sel_z = bytes[bytes_read++];
462 tex.dst_sel_w = bytes[bytes_read++];
463 tex.lod_bias = bytes[bytes_read++];
464 tex.coord_type_x = bytes[bytes_read++];
465 tex.coord_type_y = bytes[bytes_read++];
466 tex.coord_type_z = bytes[bytes_read++];
467 tex.coord_type_w = bytes[bytes_read++];
468 tex.offset_x = bytes[bytes_read++];
469 tex.offset_y = bytes[bytes_read++];
470 tex.offset_z = bytes[bytes_read++];
471 tex.sampler_id = bytes[bytes_read++];
472 tex.src_sel_x = bytes[bytes_read++];
473 tex.src_sel_y = bytes[bytes_read++];
474 tex.src_sel_z = bytes[bytes_read++];
475 tex.src_sel_w = bytes[bytes_read++];
476
477 r600_bytecode_add_tex(ctx->bc, &tex);
478
479 return bytes_read;
480 }
481
482 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
483 unsigned char * bytes, unsigned bytes_read)
484 {
485 struct r600_bytecode_vtx vtx;
486
487 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
488 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
489 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
490
491 memset(&vtx, 0, sizeof(vtx));
492
493 /* WORD0 */
494 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
495 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
496 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
497 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
498 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
499 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
500
501 /* WORD1 */
502 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
503 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
504 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
505 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
506 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
507 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
508 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
509 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
510 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
511 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
512
513 /* WORD 2*/
514 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
515 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
516
517 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
518 fprintf(stderr, "Error adding vtx\n");
519 }
520 /* Use the Texture Cache */
521 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
522 return bytes_read;
523 }
524
525 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
526 unsigned char * bytes, unsigned num_bytes)
527 {
528 unsigned bytes_read = 0;
529 unsigned i, byte;
530 while (bytes_read < num_bytes) {
531 char inst_type = bytes[bytes_read++];
532 switch (inst_type) {
533 case 0:
534 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
535 bytes_read);
536 break;
537 case 1:
538 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
539 bytes_read);
540 break;
541 case 2:
542 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
543 bytes_read);
544 break;
545 case 3:
546 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
547 for (i = 0; i < 2; i++) {
548 for (byte = 0 ; byte < 4; byte++) {
549 ctx->bc->cf_last->isa[i] |=
550 (bytes[bytes_read++] << (byte * 8));
551 }
552 }
553 break;
554
555 case 4:
556 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
557 bytes_read);
558 break;
559 default:
560 /* XXX: Error here */
561 break;
562 }
563 }
564 }
565
566 /* End bytestream -> r600 shader functions*/
567
568 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
569 {
570 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
571 int j;
572
573 if (i->Instruction.NumDstRegs > 1) {
574 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
575 return -EINVAL;
576 }
577 if (i->Instruction.Predicate) {
578 R600_ERR("predicate unsupported\n");
579 return -EINVAL;
580 }
581 #if 0
582 if (i->Instruction.Label) {
583 R600_ERR("label unsupported\n");
584 return -EINVAL;
585 }
586 #endif
587 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
588 if (i->Src[j].Register.Dimension) {
589 R600_ERR("unsupported src %d (dimension %d)\n", j,
590 i->Src[j].Register.Dimension);
591 return -EINVAL;
592 }
593 }
594 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
595 if (i->Dst[j].Register.Dimension) {
596 R600_ERR("unsupported dst (dimension)\n");
597 return -EINVAL;
598 }
599 }
600 return 0;
601 }
602
603 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
604 {
605 int i, r;
606 struct r600_bytecode_alu alu;
607 int gpr = 0, base_chan = 0;
608 int ij_index = 0;
609
610 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
611 ij_index = 0;
612 if (ctx->shader->input[input].centroid)
613 ij_index++;
614 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
615 ij_index = 0;
616 /* if we have perspective add one */
617 if (ctx->input_perspective) {
618 ij_index++;
619 /* if we have perspective centroid */
620 if (ctx->input_centroid)
621 ij_index++;
622 }
623 if (ctx->shader->input[input].centroid)
624 ij_index++;
625 }
626
627 /* work out gpr and base_chan from index */
628 gpr = ij_index / 2;
629 base_chan = (2 * (ij_index % 2)) + 1;
630
631 for (i = 0; i < 8; i++) {
632 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
633
634 if (i < 4)
635 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
636 else
637 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
638
639 if ((i > 1) && (i < 6)) {
640 alu.dst.sel = ctx->shader->input[input].gpr;
641 alu.dst.write = 1;
642 }
643
644 alu.dst.chan = i % 4;
645
646 alu.src[0].sel = gpr;
647 alu.src[0].chan = (base_chan - (i % 2));
648
649 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
650
651 alu.bank_swizzle_force = SQ_ALU_VEC_210;
652 if ((i % 4) == 3)
653 alu.last = 1;
654 r = r600_bytecode_add_alu(ctx->bc, &alu);
655 if (r)
656 return r;
657 }
658 return 0;
659 }
660
661 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
662 {
663 int i, r;
664 struct r600_bytecode_alu alu;
665
666 for (i = 0; i < 4; i++) {
667 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
668
669 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
670
671 alu.dst.sel = ctx->shader->input[input].gpr;
672 alu.dst.write = 1;
673
674 alu.dst.chan = i;
675
676 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
677 alu.src[0].chan = i;
678
679 if (i == 3)
680 alu.last = 1;
681 r = r600_bytecode_add_alu(ctx->bc, &alu);
682 if (r)
683 return r;
684 }
685 return 0;
686 }
687
688 /*
689 * Special export handling in shaders
690 *
691 * shader export ARRAY_BASE for EXPORT_POS:
692 * 60 is position
693 * 61 is misc vector
694 * 62, 63 are clip distance vectors
695 *
696 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
697 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
698 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
699 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
700 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
701 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
702 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
703 * exclusive from render target index)
704 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
705 *
706 *
707 * shader export ARRAY_BASE for EXPORT_PIXEL:
708 * 0-7 CB targets
709 * 61 computed Z vector
710 *
711 * The use of the values exported in the computed Z vector are controlled
712 * by DB_SHADER_CONTROL:
713 * Z_EXPORT_ENABLE - Z as a float in RED
714 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
715 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
716 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
717 * DB_SOURCE_FORMAT - export control restrictions
718 *
719 */
720
721
722 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
723 static int r600_spi_sid(struct r600_shader_io * io)
724 {
725 int index, name = io->name;
726
727 /* These params are handled differently, they don't need
728 * semantic indices, so we'll use 0 for them.
729 */
730 if (name == TGSI_SEMANTIC_POSITION ||
731 name == TGSI_SEMANTIC_PSIZE ||
732 name == TGSI_SEMANTIC_FACE)
733 index = 0;
734 else {
735 if (name == TGSI_SEMANTIC_GENERIC) {
736 /* For generic params simply use sid from tgsi */
737 index = io->sid;
738 } else {
739 /* For non-generic params - pack name and sid into 8 bits */
740 index = 0x80 | (name<<3) | (io->sid);
741 }
742
743 /* Make sure that all really used indices have nonzero value, so
744 * we can just compare it to 0 later instead of comparing the name
745 * with different values to detect special cases. */
746 index++;
747 }
748
749 return index;
750 };
751
752 /* turn input into interpolate on EG */
753 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
754 {
755 int r = 0;
756
757 if (ctx->shader->input[index].spi_sid) {
758 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
759 if (ctx->shader->input[index].interpolate > 0) {
760 r = evergreen_interp_alu(ctx, index);
761 } else {
762 r = evergreen_interp_flat(ctx, index);
763 }
764 }
765 return r;
766 }
767
768 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
769 {
770 struct r600_bytecode_alu alu;
771 int i, r;
772 int gpr_front = ctx->shader->input[front].gpr;
773 int gpr_back = ctx->shader->input[back].gpr;
774
775 for (i = 0; i < 4; i++) {
776 memset(&alu, 0, sizeof(alu));
777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
778 alu.is_op3 = 1;
779 alu.dst.write = 1;
780 alu.dst.sel = gpr_front;
781 alu.src[0].sel = ctx->face_gpr;
782 alu.src[1].sel = gpr_front;
783 alu.src[2].sel = gpr_back;
784
785 alu.dst.chan = i;
786 alu.src[1].chan = i;
787 alu.src[2].chan = i;
788 alu.last = (i==3);
789
790 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
791 return r;
792 }
793
794 return 0;
795 }
796
797 static int tgsi_declaration(struct r600_shader_ctx *ctx)
798 {
799 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
800 unsigned i;
801 int r;
802
803 switch (d->Declaration.File) {
804 case TGSI_FILE_INPUT:
805 i = ctx->shader->ninput++;
806 ctx->shader->input[i].name = d->Semantic.Name;
807 ctx->shader->input[i].sid = d->Semantic.Index;
808 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
809 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
810 ctx->shader->input[i].centroid = d->Interp.Centroid;
811 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
812 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
813 switch (ctx->shader->input[i].name) {
814 case TGSI_SEMANTIC_FACE:
815 ctx->face_gpr = ctx->shader->input[i].gpr;
816 break;
817 case TGSI_SEMANTIC_COLOR:
818 ctx->colors_used++;
819 break;
820 case TGSI_SEMANTIC_POSITION:
821 ctx->fragcoord_input = i;
822 break;
823 }
824 if (ctx->bc->chip_class >= EVERGREEN) {
825 if ((r = evergreen_interp_input(ctx, i)))
826 return r;
827 }
828 }
829 break;
830 case TGSI_FILE_OUTPUT:
831 i = ctx->shader->noutput++;
832 ctx->shader->output[i].name = d->Semantic.Name;
833 ctx->shader->output[i].sid = d->Semantic.Index;
834 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
835 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
836 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
837 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
838 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
839 switch (d->Semantic.Name) {
840 case TGSI_SEMANTIC_CLIPDIST:
841 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
842 break;
843 case TGSI_SEMANTIC_PSIZE:
844 ctx->shader->vs_out_misc_write = 1;
845 ctx->shader->vs_out_point_size = 1;
846 break;
847 case TGSI_SEMANTIC_CLIPVERTEX:
848 ctx->clip_vertex_write = TRUE;
849 ctx->cv_output = i;
850 break;
851 }
852 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
853 switch (d->Semantic.Name) {
854 case TGSI_SEMANTIC_COLOR:
855 ctx->shader->nr_ps_max_color_exports++;
856 break;
857 }
858 }
859 break;
860 case TGSI_FILE_CONSTANT:
861 case TGSI_FILE_TEMPORARY:
862 case TGSI_FILE_SAMPLER:
863 case TGSI_FILE_ADDRESS:
864 break;
865
866 case TGSI_FILE_SYSTEM_VALUE:
867 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
868 if (!ctx->native_integers) {
869 struct r600_bytecode_alu alu;
870 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
871
872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
873 alu.src[0].sel = 0;
874 alu.src[0].chan = 3;
875
876 alu.dst.sel = 0;
877 alu.dst.chan = 3;
878 alu.dst.write = 1;
879 alu.last = 1;
880
881 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
882 return r;
883 }
884 break;
885 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
886 break;
887 default:
888 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
889 return -EINVAL;
890 }
891 return 0;
892 }
893
894 static int r600_get_temp(struct r600_shader_ctx *ctx)
895 {
896 return ctx->temp_reg + ctx->max_driver_temp_used++;
897 }
898
899 /*
900 * for evergreen we need to scan the shader to find the number of GPRs we need to
901 * reserve for interpolation.
902 *
903 * we need to know if we are going to emit
904 * any centroid inputs
905 * if perspective and linear are required
906 */
907 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
908 {
909 int i;
910 int num_baryc;
911
912 ctx->input_linear = FALSE;
913 ctx->input_perspective = FALSE;
914 ctx->input_centroid = FALSE;
915 ctx->num_interp_gpr = 1;
916
917 /* any centroid inputs */
918 for (i = 0; i < ctx->info.num_inputs; i++) {
919 /* skip position/face */
920 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
921 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
922 continue;
923 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
924 ctx->input_linear = TRUE;
925 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
926 ctx->input_perspective = TRUE;
927 if (ctx->info.input_centroid[i])
928 ctx->input_centroid = TRUE;
929 }
930
931 num_baryc = 0;
932 /* ignoring sample for now */
933 if (ctx->input_perspective)
934 num_baryc++;
935 if (ctx->input_linear)
936 num_baryc++;
937 if (ctx->input_centroid)
938 num_baryc *= 2;
939
940 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
941
942 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
943 return ctx->num_interp_gpr;
944 }
945
946 static void tgsi_src(struct r600_shader_ctx *ctx,
947 const struct tgsi_full_src_register *tgsi_src,
948 struct r600_shader_src *r600_src)
949 {
950 memset(r600_src, 0, sizeof(*r600_src));
951 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
952 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
953 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
954 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
955 r600_src->neg = tgsi_src->Register.Negate;
956 r600_src->abs = tgsi_src->Register.Absolute;
957
958 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
959 int index;
960 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
961 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
962 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
963
964 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
965 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
966 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
967 return;
968 }
969 index = tgsi_src->Register.Index;
970 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
971 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
972 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
973 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
974 r600_src->swizzle[0] = 3;
975 r600_src->swizzle[1] = 3;
976 r600_src->swizzle[2] = 3;
977 r600_src->swizzle[3] = 3;
978 r600_src->sel = 0;
979 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
980 r600_src->swizzle[0] = 0;
981 r600_src->swizzle[1] = 0;
982 r600_src->swizzle[2] = 0;
983 r600_src->swizzle[3] = 0;
984 r600_src->sel = 0;
985 }
986 } else {
987 if (tgsi_src->Register.Indirect)
988 r600_src->rel = V_SQ_REL_RELATIVE;
989 r600_src->sel = tgsi_src->Register.Index;
990 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
991 }
992 }
993
994 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
995 {
996 struct r600_bytecode_vtx vtx;
997 unsigned int ar_reg;
998 int r;
999
1000 if (offset) {
1001 struct r600_bytecode_alu alu;
1002
1003 memset(&alu, 0, sizeof(alu));
1004
1005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
1006 alu.src[0].sel = ctx->bc->ar_reg;
1007
1008 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1009 alu.src[1].value = offset;
1010
1011 alu.dst.sel = dst_reg;
1012 alu.dst.write = 1;
1013 alu.last = 1;
1014
1015 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1016 return r;
1017
1018 ar_reg = dst_reg;
1019 } else {
1020 ar_reg = ctx->bc->ar_reg;
1021 }
1022
1023 memset(&vtx, 0, sizeof(vtx));
1024 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1025 vtx.src_gpr = ar_reg;
1026 vtx.mega_fetch_count = 16;
1027 vtx.dst_gpr = dst_reg;
1028 vtx.dst_sel_x = 0; /* SEL_X */
1029 vtx.dst_sel_y = 1; /* SEL_Y */
1030 vtx.dst_sel_z = 2; /* SEL_Z */
1031 vtx.dst_sel_w = 3; /* SEL_W */
1032 vtx.data_format = FMT_32_32_32_32_FLOAT;
1033 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1034 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1035 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1036 vtx.endian = r600_endian_swap(32);
1037
1038 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1039 return r;
1040
1041 return 0;
1042 }
1043
1044 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1045 {
1046 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1047 struct r600_bytecode_alu alu;
1048 int i, j, k, nconst, r;
1049
1050 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1051 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1052 nconst++;
1053 }
1054 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1055 }
1056 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1057 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1058 continue;
1059 }
1060
1061 if (ctx->src[i].rel) {
1062 int treg = r600_get_temp(ctx);
1063 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1064 return r;
1065
1066 ctx->src[i].sel = treg;
1067 ctx->src[i].rel = 0;
1068 j--;
1069 } else if (j > 0) {
1070 int treg = r600_get_temp(ctx);
1071 for (k = 0; k < 4; k++) {
1072 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1073 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1074 alu.src[0].sel = ctx->src[i].sel;
1075 alu.src[0].chan = k;
1076 alu.src[0].rel = ctx->src[i].rel;
1077 alu.dst.sel = treg;
1078 alu.dst.chan = k;
1079 alu.dst.write = 1;
1080 if (k == 3)
1081 alu.last = 1;
1082 r = r600_bytecode_add_alu(ctx->bc, &alu);
1083 if (r)
1084 return r;
1085 }
1086 ctx->src[i].sel = treg;
1087 ctx->src[i].rel =0;
1088 j--;
1089 }
1090 }
1091 return 0;
1092 }
1093
1094 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1095 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1096 {
1097 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1098 struct r600_bytecode_alu alu;
1099 int i, j, k, nliteral, r;
1100
1101 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1102 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1103 nliteral++;
1104 }
1105 }
1106 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1107 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1108 int treg = r600_get_temp(ctx);
1109 for (k = 0; k < 4; k++) {
1110 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1112 alu.src[0].sel = ctx->src[i].sel;
1113 alu.src[0].chan = k;
1114 alu.src[0].value = ctx->src[i].value[k];
1115 alu.dst.sel = treg;
1116 alu.dst.chan = k;
1117 alu.dst.write = 1;
1118 if (k == 3)
1119 alu.last = 1;
1120 r = r600_bytecode_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123 }
1124 ctx->src[i].sel = treg;
1125 j--;
1126 }
1127 }
1128 return 0;
1129 }
1130
1131 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1132 {
1133 int i, r, count = ctx->shader->ninput;
1134
1135 /* additional inputs will be allocated right after the existing inputs,
1136 * we won't need them after the color selection, so we don't need to
1137 * reserve these gprs for the rest of the shader code and to adjust
1138 * output offsets etc. */
1139 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1140 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1141
1142 if (ctx->face_gpr == -1) {
1143 i = ctx->shader->ninput++;
1144 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1145 ctx->shader->input[i].spi_sid = 0;
1146 ctx->shader->input[i].gpr = gpr++;
1147 ctx->face_gpr = ctx->shader->input[i].gpr;
1148 }
1149
1150 for (i = 0; i < count; i++) {
1151 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1152 int ni = ctx->shader->ninput++;
1153 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1154 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1155 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1156 ctx->shader->input[ni].gpr = gpr++;
1157
1158 if (ctx->bc->chip_class >= EVERGREEN) {
1159 r = evergreen_interp_input(ctx, ni);
1160 if (r)
1161 return r;
1162 }
1163
1164 r = select_twoside_color(ctx, i, ni);
1165 if (r)
1166 return r;
1167 }
1168 }
1169 return 0;
1170 }
1171
1172 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
1173 struct r600_pipe_shader *pipeshader,
1174 struct r600_shader_key key)
1175 {
1176 struct r600_shader *shader = &pipeshader->shader;
1177 struct tgsi_token *tokens = pipeshader->selector->tokens;
1178 struct pipe_stream_output_info so = pipeshader->selector->so;
1179 struct tgsi_full_immediate *immediate;
1180 struct tgsi_full_property *property;
1181 struct r600_shader_ctx ctx;
1182 struct r600_bytecode_output output[32];
1183 unsigned output_done, noutput;
1184 unsigned opcode;
1185 int i, j, k, r = 0;
1186 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1187 /* Declarations used by llvm code */
1188 bool use_llvm = false;
1189 unsigned char * inst_bytes = NULL;
1190 unsigned inst_byte_count = 0;
1191
1192 #ifdef R600_USE_LLVM
1193 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1194 #endif
1195 ctx.bc = &shader->bc;
1196 ctx.shader = shader;
1197 ctx.native_integers = true;
1198
1199 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
1200 ctx.tokens = tokens;
1201 tgsi_scan_shader(tokens, &ctx.info);
1202 tgsi_parse_init(&ctx.parse, tokens);
1203 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1204 shader->processor_type = ctx.type;
1205 ctx.bc->type = shader->processor_type;
1206
1207 ctx.face_gpr = -1;
1208 ctx.fragcoord_input = -1;
1209 ctx.colors_used = 0;
1210 ctx.clip_vertex_write = 0;
1211
1212 shader->nr_ps_color_exports = 0;
1213 shader->nr_ps_max_color_exports = 0;
1214
1215 shader->two_side = key.color_two_side;
1216
1217 /* register allocations */
1218 /* Values [0,127] correspond to GPR[0..127].
1219 * Values [128,159] correspond to constant buffer bank 0
1220 * Values [160,191] correspond to constant buffer bank 1
1221 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1222 * Values [256,287] correspond to constant buffer bank 2 (EG)
1223 * Values [288,319] correspond to constant buffer bank 3 (EG)
1224 * Other special values are shown in the list below.
1225 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1226 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1227 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1228 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1229 * 248 SQ_ALU_SRC_0: special constant 0.0.
1230 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1231 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1232 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1233 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1234 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1235 * 254 SQ_ALU_SRC_PV: previous vector result.
1236 * 255 SQ_ALU_SRC_PS: previous scalar result.
1237 */
1238 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1239 ctx.file_offset[i] = 0;
1240 }
1241 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1242 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1243 if (ctx.bc->chip_class >= EVERGREEN) {
1244 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1245 } else {
1246 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1247 }
1248 }
1249 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1250 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1251 }
1252
1253 /* LLVM backend setup */
1254 #ifdef R600_USE_LLVM
1255 if (use_llvm && ctx.info.indirect_files) {
1256 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1257 "indirect adressing. Falling back to TGSI "
1258 "backend.\n");
1259 use_llvm = 0;
1260 }
1261 if (use_llvm) {
1262 struct radeon_llvm_context radeon_llvm_ctx;
1263 LLVMModuleRef mod;
1264 unsigned dump = 0;
1265 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1266 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1267 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1268 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1269 dump = 1;
1270 }
1271 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1272 rscreen->family, dump)) {
1273 FREE(inst_bytes);
1274 radeon_llvm_dispose(&radeon_llvm_ctx);
1275 use_llvm = 0;
1276 fprintf(stderr, "R600 LLVM backend failed to compile "
1277 "shader. Falling back to TGSI\n");
1278 } else {
1279 ctx.file_offset[TGSI_FILE_OUTPUT] =
1280 ctx.file_offset[TGSI_FILE_INPUT];
1281 }
1282 radeon_llvm_dispose(&radeon_llvm_ctx);
1283 }
1284 #endif
1285 /* End of LLVM backend setup */
1286
1287 if (!use_llvm) {
1288 ctx.file_offset[TGSI_FILE_OUTPUT] =
1289 ctx.file_offset[TGSI_FILE_INPUT] +
1290 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1291 }
1292 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1293 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1294
1295 /* Outside the GPR range. This will be translated to one of the
1296 * kcache banks later. */
1297 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1298
1299 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1300 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1301 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1302 ctx.temp_reg = ctx.bc->ar_reg + 1;
1303
1304 ctx.nliterals = 0;
1305 ctx.literals = NULL;
1306 shader->fs_write_all = FALSE;
1307 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1308 tgsi_parse_token(&ctx.parse);
1309 switch (ctx.parse.FullToken.Token.Type) {
1310 case TGSI_TOKEN_TYPE_IMMEDIATE:
1311 immediate = &ctx.parse.FullToken.FullImmediate;
1312 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1313 if(ctx.literals == NULL) {
1314 r = -ENOMEM;
1315 goto out_err;
1316 }
1317 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1318 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1319 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1320 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1321 ctx.nliterals++;
1322 break;
1323 case TGSI_TOKEN_TYPE_DECLARATION:
1324 r = tgsi_declaration(&ctx);
1325 if (r)
1326 goto out_err;
1327 break;
1328 case TGSI_TOKEN_TYPE_INSTRUCTION:
1329 break;
1330 case TGSI_TOKEN_TYPE_PROPERTY:
1331 property = &ctx.parse.FullToken.FullProperty;
1332 switch (property->Property.PropertyName) {
1333 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1334 if (property->u[0].Data == 1)
1335 shader->fs_write_all = TRUE;
1336 break;
1337 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1338 /* we don't need this one */
1339 break;
1340 }
1341 break;
1342 default:
1343 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1344 r = -EINVAL;
1345 goto out_err;
1346 }
1347 }
1348
1349 if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
1350 shader->nr_ps_max_color_exports = 8;
1351
1352 if (ctx.fragcoord_input >= 0) {
1353 if (ctx.bc->chip_class == CAYMAN) {
1354 for (j = 0 ; j < 4; j++) {
1355 struct r600_bytecode_alu alu;
1356 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1357 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1358 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1359 alu.src[0].chan = 3;
1360
1361 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1362 alu.dst.chan = j;
1363 alu.dst.write = (j == 3);
1364 alu.last = 1;
1365 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1366 return r;
1367 }
1368 } else {
1369 struct r600_bytecode_alu alu;
1370 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1371 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1372 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1373 alu.src[0].chan = 3;
1374
1375 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1376 alu.dst.chan = 3;
1377 alu.dst.write = 1;
1378 alu.last = 1;
1379 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1380 return r;
1381 }
1382 }
1383
1384 if (shader->two_side && ctx.colors_used) {
1385 if ((r = process_twoside_color_inputs(&ctx)))
1386 return r;
1387 }
1388
1389 tgsi_parse_init(&ctx.parse, tokens);
1390 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1391 tgsi_parse_token(&ctx.parse);
1392 switch (ctx.parse.FullToken.Token.Type) {
1393 case TGSI_TOKEN_TYPE_INSTRUCTION:
1394 if (use_llvm) {
1395 continue;
1396 }
1397 r = tgsi_is_supported(&ctx);
1398 if (r)
1399 goto out_err;
1400 ctx.max_driver_temp_used = 0;
1401 /* reserve first tmp for everyone */
1402 r600_get_temp(&ctx);
1403
1404 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1405 if ((r = tgsi_split_constant(&ctx)))
1406 goto out_err;
1407 if ((r = tgsi_split_literal_constant(&ctx)))
1408 goto out_err;
1409 if (ctx.bc->chip_class == CAYMAN)
1410 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1411 else if (ctx.bc->chip_class >= EVERGREEN)
1412 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1413 else
1414 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1415 r = ctx.inst_info->process(&ctx);
1416 if (r)
1417 goto out_err;
1418 break;
1419 default:
1420 break;
1421 }
1422 }
1423
1424 /* Get instructions if we are using the LLVM backend. */
1425 if (use_llvm) {
1426 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1427 FREE(inst_bytes);
1428 }
1429
1430 noutput = shader->noutput;
1431
1432 if (ctx.clip_vertex_write) {
1433 /* need to convert a clipvertex write into clipdistance writes and not export
1434 the clip vertex anymore */
1435
1436 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1437 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1438 shader->output[noutput].gpr = ctx.temp_reg;
1439 noutput++;
1440 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1441 shader->output[noutput].gpr = ctx.temp_reg+1;
1442 noutput++;
1443
1444 /* reset spi_sid for clipvertex output to avoid confusing spi */
1445 shader->output[ctx.cv_output].spi_sid = 0;
1446
1447 shader->clip_dist_write = 0xFF;
1448
1449 for (i = 0; i < 8; i++) {
1450 int oreg = i >> 2;
1451 int ochan = i & 3;
1452
1453 for (j = 0; j < 4; j++) {
1454 struct r600_bytecode_alu alu;
1455 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1456 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1457 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1458 alu.src[0].chan = j;
1459
1460 alu.src[1].sel = 512 + i;
1461 alu.src[1].kc_bank = 1;
1462 alu.src[1].chan = j;
1463
1464 alu.dst.sel = ctx.temp_reg + oreg;
1465 alu.dst.chan = j;
1466 alu.dst.write = (j == ochan);
1467 if (j == 3)
1468 alu.last = 1;
1469 r = r600_bytecode_add_alu(ctx.bc, &alu);
1470 if (r)
1471 return r;
1472 }
1473 }
1474 }
1475
1476 /* Add stream outputs. */
1477 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1478 for (i = 0; i < so.num_outputs; i++) {
1479 struct r600_bytecode_output output;
1480
1481 if (so.output[i].output_buffer >= 4) {
1482 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1483 so.output[i].output_buffer);
1484 r = -EINVAL;
1485 goto out_err;
1486 }
1487 if (so.output[i].dst_offset < so.output[i].start_component) {
1488 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1489 r = -EINVAL;
1490 goto out_err;
1491 }
1492
1493 memset(&output, 0, sizeof(struct r600_bytecode_output));
1494 output.gpr = shader->output[so.output[i].register_index].gpr;
1495 output.elem_size = 0;
1496 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1497 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1498 output.burst_count = 1;
1499 output.barrier = 1;
1500 /* array_size is an upper limit for the burst_count
1501 * with MEM_STREAM instructions */
1502 output.array_size = 0xFFF;
1503 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1504 if (ctx.bc->chip_class >= EVERGREEN) {
1505 switch (so.output[i].output_buffer) {
1506 case 0:
1507 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1508 break;
1509 case 1:
1510 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1511 break;
1512 case 2:
1513 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1514 break;
1515 case 3:
1516 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1517 break;
1518 }
1519 } else {
1520 switch (so.output[i].output_buffer) {
1521 case 0:
1522 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1523 break;
1524 case 1:
1525 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1526 break;
1527 case 2:
1528 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1529 break;
1530 case 3:
1531 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1532 break;
1533 }
1534 }
1535 r = r600_bytecode_add_output(ctx.bc, &output);
1536 if (r)
1537 goto out_err;
1538 }
1539 }
1540
1541 /* export output */
1542 for (i = 0, j = 0; i < noutput; i++, j++) {
1543 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1544 output[j].gpr = shader->output[i].gpr;
1545 output[j].elem_size = 3;
1546 output[j].swizzle_x = 0;
1547 output[j].swizzle_y = 1;
1548 output[j].swizzle_z = 2;
1549 output[j].swizzle_w = 3;
1550 output[j].burst_count = 1;
1551 output[j].barrier = 1;
1552 output[j].type = -1;
1553 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1554 switch (ctx.type) {
1555 case TGSI_PROCESSOR_VERTEX:
1556 switch (shader->output[i].name) {
1557 case TGSI_SEMANTIC_POSITION:
1558 output[j].array_base = next_pos_base++;
1559 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1560 break;
1561
1562 case TGSI_SEMANTIC_PSIZE:
1563 output[j].array_base = next_pos_base++;
1564 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1565 break;
1566 case TGSI_SEMANTIC_CLIPVERTEX:
1567 j--;
1568 break;
1569 case TGSI_SEMANTIC_CLIPDIST:
1570 output[j].array_base = next_pos_base++;
1571 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1572 /* spi_sid is 0 for clipdistance outputs that were generated
1573 * for clipvertex - we don't need to pass them to PS */
1574 if (shader->output[i].spi_sid) {
1575 j++;
1576 /* duplicate it as PARAM to pass to the pixel shader */
1577 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1578 output[j].array_base = next_param_base++;
1579 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1580 }
1581 break;
1582 case TGSI_SEMANTIC_FOG:
1583 output[j].swizzle_y = 4; /* 0 */
1584 output[j].swizzle_z = 4; /* 0 */
1585 output[j].swizzle_w = 5; /* 1 */
1586 break;
1587 }
1588 break;
1589 case TGSI_PROCESSOR_FRAGMENT:
1590 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1591 /* never export more colors than the number of CBs */
1592 if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
1593 /* skip export */
1594 j--;
1595 continue;
1596 }
1597 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1598 output[j].array_base = next_pixel_base++;
1599 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1600 shader->nr_ps_color_exports++;
1601 if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
1602 for (k = 1; k < key.nr_cbufs; k++) {
1603 j++;
1604 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1605 output[j].gpr = shader->output[i].gpr;
1606 output[j].elem_size = 3;
1607 output[j].swizzle_x = 0;
1608 output[j].swizzle_y = 1;
1609 output[j].swizzle_z = 2;
1610 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1611 output[j].burst_count = 1;
1612 output[j].barrier = 1;
1613 output[j].array_base = next_pixel_base++;
1614 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1615 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1616 shader->nr_ps_color_exports++;
1617 }
1618 }
1619 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1620 output[j].array_base = 61;
1621 output[j].swizzle_x = 2;
1622 output[j].swizzle_y = 7;
1623 output[j].swizzle_z = output[j].swizzle_w = 7;
1624 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1625 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1626 output[j].array_base = 61;
1627 output[j].swizzle_x = 7;
1628 output[j].swizzle_y = 1;
1629 output[j].swizzle_z = output[j].swizzle_w = 7;
1630 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1631 } else {
1632 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1633 r = -EINVAL;
1634 goto out_err;
1635 }
1636 break;
1637 default:
1638 R600_ERR("unsupported processor type %d\n", ctx.type);
1639 r = -EINVAL;
1640 goto out_err;
1641 }
1642
1643 if (output[j].type==-1) {
1644 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1645 output[j].array_base = next_param_base++;
1646 }
1647 }
1648
1649 /* add fake param output for vertex shader if no param is exported */
1650 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1651 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1652 output[j].gpr = 0;
1653 output[j].elem_size = 3;
1654 output[j].swizzle_x = 7;
1655 output[j].swizzle_y = 7;
1656 output[j].swizzle_z = 7;
1657 output[j].swizzle_w = 7;
1658 output[j].burst_count = 1;
1659 output[j].barrier = 1;
1660 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1661 output[j].array_base = 0;
1662 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1663 j++;
1664 }
1665
1666 /* add fake pixel export */
1667 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1668 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1669 output[j].gpr = 0;
1670 output[j].elem_size = 3;
1671 output[j].swizzle_x = 7;
1672 output[j].swizzle_y = 7;
1673 output[j].swizzle_z = 7;
1674 output[j].swizzle_w = 7;
1675 output[j].burst_count = 1;
1676 output[j].barrier = 1;
1677 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1678 output[j].array_base = 0;
1679 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1680 j++;
1681 }
1682
1683 noutput = j;
1684
1685 /* set export done on last export of each type */
1686 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1687 if (ctx.bc->chip_class < CAYMAN) {
1688 if (i == (noutput - 1)) {
1689 output[i].end_of_program = 1;
1690 }
1691 }
1692 if (!(output_done & (1 << output[i].type))) {
1693 output_done |= (1 << output[i].type);
1694 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1695 }
1696 }
1697 /* add output to bytecode */
1698 for (i = 0; i < noutput; i++) {
1699 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1700 if (r)
1701 goto out_err;
1702 }
1703 /* add program end */
1704 if (ctx.bc->chip_class == CAYMAN)
1705 cm_bytecode_add_cf_end(ctx.bc);
1706
1707 /* check GPR limit - we have 124 = 128 - 4
1708 * (4 are reserved as alu clause temporary registers) */
1709 if (ctx.bc->ngpr > 124) {
1710 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1711 r = -ENOMEM;
1712 goto out_err;
1713 }
1714
1715 free(ctx.literals);
1716 tgsi_parse_free(&ctx.parse);
1717 return 0;
1718 out_err:
1719 free(ctx.literals);
1720 tgsi_parse_free(&ctx.parse);
1721 return r;
1722 }
1723
1724 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1725 {
1726 R600_ERR("%s tgsi opcode unsupported\n",
1727 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1728 return -EINVAL;
1729 }
1730
1731 static int tgsi_end(struct r600_shader_ctx *ctx)
1732 {
1733 return 0;
1734 }
1735
1736 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1737 const struct r600_shader_src *shader_src,
1738 unsigned chan)
1739 {
1740 bc_src->sel = shader_src->sel;
1741 bc_src->chan = shader_src->swizzle[chan];
1742 bc_src->neg = shader_src->neg;
1743 bc_src->abs = shader_src->abs;
1744 bc_src->rel = shader_src->rel;
1745 bc_src->value = shader_src->value[bc_src->chan];
1746 }
1747
1748 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1749 {
1750 bc_src->abs = 1;
1751 bc_src->neg = 0;
1752 }
1753
1754 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1755 {
1756 bc_src->neg = !bc_src->neg;
1757 }
1758
1759 static void tgsi_dst(struct r600_shader_ctx *ctx,
1760 const struct tgsi_full_dst_register *tgsi_dst,
1761 unsigned swizzle,
1762 struct r600_bytecode_alu_dst *r600_dst)
1763 {
1764 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1765
1766 r600_dst->sel = tgsi_dst->Register.Index;
1767 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1768 r600_dst->chan = swizzle;
1769 r600_dst->write = 1;
1770 if (tgsi_dst->Register.Indirect)
1771 r600_dst->rel = V_SQ_REL_RELATIVE;
1772 if (inst->Instruction.Saturate) {
1773 r600_dst->clamp = 1;
1774 }
1775 }
1776
1777 static int tgsi_last_instruction(unsigned writemask)
1778 {
1779 int i, lasti = 0;
1780
1781 for (i = 0; i < 4; i++) {
1782 if (writemask & (1 << i)) {
1783 lasti = i;
1784 }
1785 }
1786 return lasti;
1787 }
1788
1789 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1790 {
1791 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1792 struct r600_bytecode_alu alu;
1793 int i, j, r;
1794 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1795
1796 for (i = 0; i < lasti + 1; i++) {
1797 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1798 continue;
1799
1800 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1801 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1802
1803 alu.inst = ctx->inst_info->r600_opcode;
1804 if (!swap) {
1805 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1806 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1807 }
1808 } else {
1809 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1810 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1811 }
1812 /* handle some special cases */
1813 switch (ctx->inst_info->tgsi_opcode) {
1814 case TGSI_OPCODE_SUB:
1815 r600_bytecode_src_toggle_neg(&alu.src[1]);
1816 break;
1817 case TGSI_OPCODE_ABS:
1818 r600_bytecode_src_set_abs(&alu.src[0]);
1819 break;
1820 default:
1821 break;
1822 }
1823 if (i == lasti || trans_only) {
1824 alu.last = 1;
1825 }
1826 r = r600_bytecode_add_alu(ctx->bc, &alu);
1827 if (r)
1828 return r;
1829 }
1830 return 0;
1831 }
1832
1833 static int tgsi_op2(struct r600_shader_ctx *ctx)
1834 {
1835 return tgsi_op2_s(ctx, 0, 0);
1836 }
1837
1838 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1839 {
1840 return tgsi_op2_s(ctx, 1, 0);
1841 }
1842
1843 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1844 {
1845 return tgsi_op2_s(ctx, 0, 1);
1846 }
1847
1848 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1849 {
1850 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1851 struct r600_bytecode_alu alu;
1852 int i, r;
1853 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1854
1855 for (i = 0; i < lasti + 1; i++) {
1856
1857 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1858 continue;
1859 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1860 alu.inst = ctx->inst_info->r600_opcode;
1861
1862 alu.src[0].sel = V_SQ_ALU_SRC_0;
1863
1864 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1865
1866 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1867
1868 if (i == lasti) {
1869 alu.last = 1;
1870 }
1871 r = r600_bytecode_add_alu(ctx->bc, &alu);
1872 if (r)
1873 return r;
1874 }
1875 return 0;
1876
1877 }
1878
1879 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1880 {
1881 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1882 int i, j, r;
1883 struct r600_bytecode_alu alu;
1884 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1885
1886 for (i = 0 ; i < last_slot; i++) {
1887 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1888 alu.inst = ctx->inst_info->r600_opcode;
1889 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1890 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1891
1892 /* RSQ should take the absolute value of src */
1893 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
1894 r600_bytecode_src_set_abs(&alu.src[j]);
1895 }
1896 }
1897 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1898 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1899
1900 if (i == last_slot - 1)
1901 alu.last = 1;
1902 r = r600_bytecode_add_alu(ctx->bc, &alu);
1903 if (r)
1904 return r;
1905 }
1906 return 0;
1907 }
1908
1909 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1910 {
1911 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1912 int i, j, k, r;
1913 struct r600_bytecode_alu alu;
1914 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1915 for (k = 0; k < last_slot; k++) {
1916 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1917 continue;
1918
1919 for (i = 0 ; i < 4; i++) {
1920 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1921 alu.inst = ctx->inst_info->r600_opcode;
1922 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1923 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1924 }
1925 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1926 alu.dst.write = (i == k);
1927 if (i == 3)
1928 alu.last = 1;
1929 r = r600_bytecode_add_alu(ctx->bc, &alu);
1930 if (r)
1931 return r;
1932 }
1933 }
1934 return 0;
1935 }
1936
1937 /*
1938 * r600 - trunc to -PI..PI range
1939 * r700 - normalize by dividing by 2PI
1940 * see fdo bug 27901
1941 */
1942 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1943 {
1944 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1945 static float double_pi = 3.1415926535 * 2;
1946 static float neg_pi = -3.1415926535;
1947
1948 int r;
1949 struct r600_bytecode_alu alu;
1950
1951 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1953 alu.is_op3 = 1;
1954
1955 alu.dst.chan = 0;
1956 alu.dst.sel = ctx->temp_reg;
1957 alu.dst.write = 1;
1958
1959 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1960
1961 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1962 alu.src[1].chan = 0;
1963 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1964 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1965 alu.src[2].chan = 0;
1966 alu.last = 1;
1967 r = r600_bytecode_add_alu(ctx->bc, &alu);
1968 if (r)
1969 return r;
1970
1971 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1973
1974 alu.dst.chan = 0;
1975 alu.dst.sel = ctx->temp_reg;
1976 alu.dst.write = 1;
1977
1978 alu.src[0].sel = ctx->temp_reg;
1979 alu.src[0].chan = 0;
1980 alu.last = 1;
1981 r = r600_bytecode_add_alu(ctx->bc, &alu);
1982 if (r)
1983 return r;
1984
1985 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1986 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1987 alu.is_op3 = 1;
1988
1989 alu.dst.chan = 0;
1990 alu.dst.sel = ctx->temp_reg;
1991 alu.dst.write = 1;
1992
1993 alu.src[0].sel = ctx->temp_reg;
1994 alu.src[0].chan = 0;
1995
1996 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1997 alu.src[1].chan = 0;
1998 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1999 alu.src[2].chan = 0;
2000
2001 if (ctx->bc->chip_class == R600) {
2002 alu.src[1].value = *(uint32_t *)&double_pi;
2003 alu.src[2].value = *(uint32_t *)&neg_pi;
2004 } else {
2005 alu.src[1].sel = V_SQ_ALU_SRC_1;
2006 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2007 alu.src[2].neg = 1;
2008 }
2009
2010 alu.last = 1;
2011 r = r600_bytecode_add_alu(ctx->bc, &alu);
2012 if (r)
2013 return r;
2014 return 0;
2015 }
2016
2017 static int cayman_trig(struct r600_shader_ctx *ctx)
2018 {
2019 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2020 struct r600_bytecode_alu alu;
2021 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2022 int i, r;
2023
2024 r = tgsi_setup_trig(ctx);
2025 if (r)
2026 return r;
2027
2028
2029 for (i = 0; i < last_slot; i++) {
2030 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2031 alu.inst = ctx->inst_info->r600_opcode;
2032 alu.dst.chan = i;
2033
2034 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2035 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2036
2037 alu.src[0].sel = ctx->temp_reg;
2038 alu.src[0].chan = 0;
2039 if (i == last_slot - 1)
2040 alu.last = 1;
2041 r = r600_bytecode_add_alu(ctx->bc, &alu);
2042 if (r)
2043 return r;
2044 }
2045 return 0;
2046 }
2047
2048 static int tgsi_trig(struct r600_shader_ctx *ctx)
2049 {
2050 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2051 struct r600_bytecode_alu alu;
2052 int i, r;
2053 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2054
2055 r = tgsi_setup_trig(ctx);
2056 if (r)
2057 return r;
2058
2059 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2060 alu.inst = ctx->inst_info->r600_opcode;
2061 alu.dst.chan = 0;
2062 alu.dst.sel = ctx->temp_reg;
2063 alu.dst.write = 1;
2064
2065 alu.src[0].sel = ctx->temp_reg;
2066 alu.src[0].chan = 0;
2067 alu.last = 1;
2068 r = r600_bytecode_add_alu(ctx->bc, &alu);
2069 if (r)
2070 return r;
2071
2072 /* replicate result */
2073 for (i = 0; i < lasti + 1; i++) {
2074 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2075 continue;
2076
2077 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2079
2080 alu.src[0].sel = ctx->temp_reg;
2081 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2082 if (i == lasti)
2083 alu.last = 1;
2084 r = r600_bytecode_add_alu(ctx->bc, &alu);
2085 if (r)
2086 return r;
2087 }
2088 return 0;
2089 }
2090
2091 static int tgsi_scs(struct r600_shader_ctx *ctx)
2092 {
2093 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2094 struct r600_bytecode_alu alu;
2095 int i, r;
2096
2097 /* We'll only need the trig stuff if we are going to write to the
2098 * X or Y components of the destination vector.
2099 */
2100 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2101 r = tgsi_setup_trig(ctx);
2102 if (r)
2103 return r;
2104 }
2105
2106 /* dst.x = COS */
2107 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2108 if (ctx->bc->chip_class == CAYMAN) {
2109 for (i = 0 ; i < 3; i++) {
2110 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2112 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2113
2114 if (i == 0)
2115 alu.dst.write = 1;
2116 else
2117 alu.dst.write = 0;
2118 alu.src[0].sel = ctx->temp_reg;
2119 alu.src[0].chan = 0;
2120 if (i == 2)
2121 alu.last = 1;
2122 r = r600_bytecode_add_alu(ctx->bc, &alu);
2123 if (r)
2124 return r;
2125 }
2126 } else {
2127 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2128 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2129 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2130
2131 alu.src[0].sel = ctx->temp_reg;
2132 alu.src[0].chan = 0;
2133 alu.last = 1;
2134 r = r600_bytecode_add_alu(ctx->bc, &alu);
2135 if (r)
2136 return r;
2137 }
2138 }
2139
2140 /* dst.y = SIN */
2141 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2142 if (ctx->bc->chip_class == CAYMAN) {
2143 for (i = 0 ; i < 3; i++) {
2144 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2145 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2146 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2147 if (i == 1)
2148 alu.dst.write = 1;
2149 else
2150 alu.dst.write = 0;
2151 alu.src[0].sel = ctx->temp_reg;
2152 alu.src[0].chan = 0;
2153 if (i == 2)
2154 alu.last = 1;
2155 r = r600_bytecode_add_alu(ctx->bc, &alu);
2156 if (r)
2157 return r;
2158 }
2159 } else {
2160 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2161 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2162 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2163
2164 alu.src[0].sel = ctx->temp_reg;
2165 alu.src[0].chan = 0;
2166 alu.last = 1;
2167 r = r600_bytecode_add_alu(ctx->bc, &alu);
2168 if (r)
2169 return r;
2170 }
2171 }
2172
2173 /* dst.z = 0.0; */
2174 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2175 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2176
2177 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2178
2179 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2180
2181 alu.src[0].sel = V_SQ_ALU_SRC_0;
2182 alu.src[0].chan = 0;
2183
2184 alu.last = 1;
2185
2186 r = r600_bytecode_add_alu(ctx->bc, &alu);
2187 if (r)
2188 return r;
2189 }
2190
2191 /* dst.w = 1.0; */
2192 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2193 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2194
2195 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2196
2197 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2198
2199 alu.src[0].sel = V_SQ_ALU_SRC_1;
2200 alu.src[0].chan = 0;
2201
2202 alu.last = 1;
2203
2204 r = r600_bytecode_add_alu(ctx->bc, &alu);
2205 if (r)
2206 return r;
2207 }
2208
2209 return 0;
2210 }
2211
2212 static int tgsi_kill(struct r600_shader_ctx *ctx)
2213 {
2214 struct r600_bytecode_alu alu;
2215 int i, r;
2216
2217 for (i = 0; i < 4; i++) {
2218 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2219 alu.inst = ctx->inst_info->r600_opcode;
2220
2221 alu.dst.chan = i;
2222
2223 alu.src[0].sel = V_SQ_ALU_SRC_0;
2224
2225 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2226 alu.src[1].sel = V_SQ_ALU_SRC_1;
2227 alu.src[1].neg = 1;
2228 } else {
2229 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2230 }
2231 if (i == 3) {
2232 alu.last = 1;
2233 }
2234 r = r600_bytecode_add_alu(ctx->bc, &alu);
2235 if (r)
2236 return r;
2237 }
2238
2239 /* kill must be last in ALU */
2240 ctx->bc->force_add_cf = 1;
2241 ctx->shader->uses_kill = TRUE;
2242 return 0;
2243 }
2244
2245 static int tgsi_lit(struct r600_shader_ctx *ctx)
2246 {
2247 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2248 struct r600_bytecode_alu alu;
2249 int r;
2250
2251 /* tmp.x = max(src.y, 0.0) */
2252 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2253 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2254 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2255 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2256 alu.src[1].chan = 1;
2257
2258 alu.dst.sel = ctx->temp_reg;
2259 alu.dst.chan = 0;
2260 alu.dst.write = 1;
2261
2262 alu.last = 1;
2263 r = r600_bytecode_add_alu(ctx->bc, &alu);
2264 if (r)
2265 return r;
2266
2267 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2268 {
2269 int chan;
2270 int sel;
2271 int i;
2272
2273 if (ctx->bc->chip_class == CAYMAN) {
2274 for (i = 0; i < 3; i++) {
2275 /* tmp.z = log(tmp.x) */
2276 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2277 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2278 alu.src[0].sel = ctx->temp_reg;
2279 alu.src[0].chan = 0;
2280 alu.dst.sel = ctx->temp_reg;
2281 alu.dst.chan = i;
2282 if (i == 2) {
2283 alu.dst.write = 1;
2284 alu.last = 1;
2285 } else
2286 alu.dst.write = 0;
2287
2288 r = r600_bytecode_add_alu(ctx->bc, &alu);
2289 if (r)
2290 return r;
2291 }
2292 } else {
2293 /* tmp.z = log(tmp.x) */
2294 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2295 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2296 alu.src[0].sel = ctx->temp_reg;
2297 alu.src[0].chan = 0;
2298 alu.dst.sel = ctx->temp_reg;
2299 alu.dst.chan = 2;
2300 alu.dst.write = 1;
2301 alu.last = 1;
2302 r = r600_bytecode_add_alu(ctx->bc, &alu);
2303 if (r)
2304 return r;
2305 }
2306
2307 chan = alu.dst.chan;
2308 sel = alu.dst.sel;
2309
2310 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2313 alu.src[0].sel = sel;
2314 alu.src[0].chan = chan;
2315 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2316 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2317 alu.dst.sel = ctx->temp_reg;
2318 alu.dst.chan = 0;
2319 alu.dst.write = 1;
2320 alu.is_op3 = 1;
2321 alu.last = 1;
2322 r = r600_bytecode_add_alu(ctx->bc, &alu);
2323 if (r)
2324 return r;
2325
2326 if (ctx->bc->chip_class == CAYMAN) {
2327 for (i = 0; i < 3; i++) {
2328 /* dst.z = exp(tmp.x) */
2329 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2330 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2331 alu.src[0].sel = ctx->temp_reg;
2332 alu.src[0].chan = 0;
2333 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2334 if (i == 2) {
2335 alu.dst.write = 1;
2336 alu.last = 1;
2337 } else
2338 alu.dst.write = 0;
2339 r = r600_bytecode_add_alu(ctx->bc, &alu);
2340 if (r)
2341 return r;
2342 }
2343 } else {
2344 /* dst.z = exp(tmp.x) */
2345 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2347 alu.src[0].sel = ctx->temp_reg;
2348 alu.src[0].chan = 0;
2349 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2350 alu.last = 1;
2351 r = r600_bytecode_add_alu(ctx->bc, &alu);
2352 if (r)
2353 return r;
2354 }
2355 }
2356
2357 /* dst.x, <- 1.0 */
2358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2360 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2361 alu.src[0].chan = 0;
2362 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2363 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2364 r = r600_bytecode_add_alu(ctx->bc, &alu);
2365 if (r)
2366 return r;
2367
2368 /* dst.y = max(src.x, 0.0) */
2369 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2371 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2372 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2373 alu.src[1].chan = 0;
2374 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2375 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2376 r = r600_bytecode_add_alu(ctx->bc, &alu);
2377 if (r)
2378 return r;
2379
2380 /* dst.w, <- 1.0 */
2381 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2382 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2383 alu.src[0].sel = V_SQ_ALU_SRC_1;
2384 alu.src[0].chan = 0;
2385 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2386 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2387 alu.last = 1;
2388 r = r600_bytecode_add_alu(ctx->bc, &alu);
2389 if (r)
2390 return r;
2391
2392 return 0;
2393 }
2394
2395 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2396 {
2397 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2398 struct r600_bytecode_alu alu;
2399 int i, r;
2400
2401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2402
2403 /* XXX:
2404 * For state trackers other than OpenGL, we'll want to use
2405 * _RECIPSQRT_IEEE instead.
2406 */
2407 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2408
2409 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2410 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2411 r600_bytecode_src_set_abs(&alu.src[i]);
2412 }
2413 alu.dst.sel = ctx->temp_reg;
2414 alu.dst.write = 1;
2415 alu.last = 1;
2416 r = r600_bytecode_add_alu(ctx->bc, &alu);
2417 if (r)
2418 return r;
2419 /* replicate result */
2420 return tgsi_helper_tempx_replicate(ctx);
2421 }
2422
2423 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2424 {
2425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2426 struct r600_bytecode_alu alu;
2427 int i, r;
2428
2429 for (i = 0; i < 4; i++) {
2430 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2431 alu.src[0].sel = ctx->temp_reg;
2432 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2433 alu.dst.chan = i;
2434 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2435 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2436 if (i == 3)
2437 alu.last = 1;
2438 r = r600_bytecode_add_alu(ctx->bc, &alu);
2439 if (r)
2440 return r;
2441 }
2442 return 0;
2443 }
2444
2445 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2446 {
2447 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2448 struct r600_bytecode_alu alu;
2449 int i, r;
2450
2451 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2452 alu.inst = ctx->inst_info->r600_opcode;
2453 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2454 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2455 }
2456 alu.dst.sel = ctx->temp_reg;
2457 alu.dst.write = 1;
2458 alu.last = 1;
2459 r = r600_bytecode_add_alu(ctx->bc, &alu);
2460 if (r)
2461 return r;
2462 /* replicate result */
2463 return tgsi_helper_tempx_replicate(ctx);
2464 }
2465
2466 static int cayman_pow(struct r600_shader_ctx *ctx)
2467 {
2468 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2469 int i, r;
2470 struct r600_bytecode_alu alu;
2471 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2472
2473 for (i = 0; i < 3; i++) {
2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2476 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2477 alu.dst.sel = ctx->temp_reg;
2478 alu.dst.chan = i;
2479 alu.dst.write = 1;
2480 if (i == 2)
2481 alu.last = 1;
2482 r = r600_bytecode_add_alu(ctx->bc, &alu);
2483 if (r)
2484 return r;
2485 }
2486
2487 /* b * LOG2(a) */
2488 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2489 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2490 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2491 alu.src[1].sel = ctx->temp_reg;
2492 alu.dst.sel = ctx->temp_reg;
2493 alu.dst.write = 1;
2494 alu.last = 1;
2495 r = r600_bytecode_add_alu(ctx->bc, &alu);
2496 if (r)
2497 return r;
2498
2499 for (i = 0; i < last_slot; i++) {
2500 /* POW(a,b) = EXP2(b * LOG2(a))*/
2501 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2503 alu.src[0].sel = ctx->temp_reg;
2504
2505 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2506 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2507 if (i == last_slot - 1)
2508 alu.last = 1;
2509 r = r600_bytecode_add_alu(ctx->bc, &alu);
2510 if (r)
2511 return r;
2512 }
2513 return 0;
2514 }
2515
2516 static int tgsi_pow(struct r600_shader_ctx *ctx)
2517 {
2518 struct r600_bytecode_alu alu;
2519 int r;
2520
2521 /* LOG2(a) */
2522 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2524 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2525 alu.dst.sel = ctx->temp_reg;
2526 alu.dst.write = 1;
2527 alu.last = 1;
2528 r = r600_bytecode_add_alu(ctx->bc, &alu);
2529 if (r)
2530 return r;
2531 /* b * LOG2(a) */
2532 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2533 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2534 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2535 alu.src[1].sel = ctx->temp_reg;
2536 alu.dst.sel = ctx->temp_reg;
2537 alu.dst.write = 1;
2538 alu.last = 1;
2539 r = r600_bytecode_add_alu(ctx->bc, &alu);
2540 if (r)
2541 return r;
2542 /* POW(a,b) = EXP2(b * LOG2(a))*/
2543 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2545 alu.src[0].sel = ctx->temp_reg;
2546 alu.dst.sel = ctx->temp_reg;
2547 alu.dst.write = 1;
2548 alu.last = 1;
2549 r = r600_bytecode_add_alu(ctx->bc, &alu);
2550 if (r)
2551 return r;
2552 return tgsi_helper_tempx_replicate(ctx);
2553 }
2554
2555 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2556 {
2557 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2558 struct r600_bytecode_alu alu;
2559 int i, r, j;
2560 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2561 int tmp0 = ctx->temp_reg;
2562 int tmp1 = r600_get_temp(ctx);
2563 int tmp2 = r600_get_temp(ctx);
2564 int tmp3 = r600_get_temp(ctx);
2565 /* Unsigned path:
2566 *
2567 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2568 *
2569 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2570 * 2. tmp0.z = lo (tmp0.x * src2)
2571 * 3. tmp0.w = -tmp0.z
2572 * 4. tmp0.y = hi (tmp0.x * src2)
2573 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2574 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2575 * 7. tmp1.x = tmp0.x - tmp0.w
2576 * 8. tmp1.y = tmp0.x + tmp0.w
2577 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2578 * 10. tmp0.z = hi(tmp0.x * src1) = q
2579 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2580 *
2581 * 12. tmp0.w = src1 - tmp0.y = r
2582 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2583 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2584 *
2585 * if DIV
2586 *
2587 * 15. tmp1.z = tmp0.z + 1 = q + 1
2588 * 16. tmp1.w = tmp0.z - 1 = q - 1
2589 *
2590 * else MOD
2591 *
2592 * 15. tmp1.z = tmp0.w - src2 = r - src2
2593 * 16. tmp1.w = tmp0.w + src2 = r + src2
2594 *
2595 * endif
2596 *
2597 * 17. tmp1.x = tmp1.x & tmp1.y
2598 *
2599 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2600 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2601 *
2602 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2603 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2604 *
2605 * Signed path:
2606 *
2607 * Same as unsigned, using abs values of the operands,
2608 * and fixing the sign of the result in the end.
2609 */
2610
2611 for (i = 0; i < 4; i++) {
2612 if (!(write_mask & (1<<i)))
2613 continue;
2614
2615 if (signed_op) {
2616
2617 /* tmp2.x = -src0 */
2618 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2620
2621 alu.dst.sel = tmp2;
2622 alu.dst.chan = 0;
2623 alu.dst.write = 1;
2624
2625 alu.src[0].sel = V_SQ_ALU_SRC_0;
2626
2627 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2628
2629 alu.last = 1;
2630 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2631 return r;
2632
2633 /* tmp2.y = -src1 */
2634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2636
2637 alu.dst.sel = tmp2;
2638 alu.dst.chan = 1;
2639 alu.dst.write = 1;
2640
2641 alu.src[0].sel = V_SQ_ALU_SRC_0;
2642
2643 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2644
2645 alu.last = 1;
2646 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2647 return r;
2648
2649 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2650 /* it will be a sign of the quotient */
2651 if (!mod) {
2652
2653 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2654 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2655
2656 alu.dst.sel = tmp2;
2657 alu.dst.chan = 2;
2658 alu.dst.write = 1;
2659
2660 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2661 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2662
2663 alu.last = 1;
2664 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2665 return r;
2666 }
2667
2668 /* tmp2.x = |src0| */
2669 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2670 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2671 alu.is_op3 = 1;
2672
2673 alu.dst.sel = tmp2;
2674 alu.dst.chan = 0;
2675 alu.dst.write = 1;
2676
2677 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2678 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2679 alu.src[2].sel = tmp2;
2680 alu.src[2].chan = 0;
2681
2682 alu.last = 1;
2683 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2684 return r;
2685
2686 /* tmp2.y = |src1| */
2687 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2689 alu.is_op3 = 1;
2690
2691 alu.dst.sel = tmp2;
2692 alu.dst.chan = 1;
2693 alu.dst.write = 1;
2694
2695 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2696 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2697 alu.src[2].sel = tmp2;
2698 alu.src[2].chan = 1;
2699
2700 alu.last = 1;
2701 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2702 return r;
2703
2704 }
2705
2706 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2707 if (ctx->bc->chip_class == CAYMAN) {
2708 /* tmp3.x = u2f(src2) */
2709 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2710 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2711
2712 alu.dst.sel = tmp3;
2713 alu.dst.chan = 0;
2714 alu.dst.write = 1;
2715
2716 if (signed_op) {
2717 alu.src[0].sel = tmp2;
2718 alu.src[0].chan = 1;
2719 } else {
2720 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2721 }
2722
2723 alu.last = 1;
2724 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2725 return r;
2726
2727 /* tmp0.x = recip(tmp3.x) */
2728 for (j = 0 ; j < 3; j++) {
2729 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2730 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2731
2732 alu.dst.sel = tmp0;
2733 alu.dst.chan = j;
2734 alu.dst.write = (j == 0);
2735
2736 alu.src[0].sel = tmp3;
2737 alu.src[0].chan = 0;
2738
2739 if (j == 2)
2740 alu.last = 1;
2741 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2742 return r;
2743 }
2744
2745 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2746 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2747
2748 alu.src[0].sel = tmp0;
2749 alu.src[0].chan = 0;
2750
2751 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2752 alu.src[1].value = 0x4f800000;
2753
2754 alu.dst.sel = tmp3;
2755 alu.dst.write = 1;
2756 alu.last = 1;
2757 r = r600_bytecode_add_alu(ctx->bc, &alu);
2758 if (r)
2759 return r;
2760
2761 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2762 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2763
2764 alu.dst.sel = tmp0;
2765 alu.dst.chan = 0;
2766 alu.dst.write = 1;
2767
2768 alu.src[0].sel = tmp3;
2769 alu.src[0].chan = 0;
2770
2771 alu.last = 1;
2772 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2773 return r;
2774
2775 } else {
2776 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2778
2779 alu.dst.sel = tmp0;
2780 alu.dst.chan = 0;
2781 alu.dst.write = 1;
2782
2783 if (signed_op) {
2784 alu.src[0].sel = tmp2;
2785 alu.src[0].chan = 1;
2786 } else {
2787 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2788 }
2789
2790 alu.last = 1;
2791 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2792 return r;
2793 }
2794
2795 /* 2. tmp0.z = lo (tmp0.x * src2) */
2796 if (ctx->bc->chip_class == CAYMAN) {
2797 for (j = 0 ; j < 4; j++) {
2798 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2799 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2800
2801 alu.dst.sel = tmp0;
2802 alu.dst.chan = j;
2803 alu.dst.write = (j == 2);
2804
2805 alu.src[0].sel = tmp0;
2806 alu.src[0].chan = 0;
2807 if (signed_op) {
2808 alu.src[1].sel = tmp2;
2809 alu.src[1].chan = 1;
2810 } else {
2811 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2812 }
2813
2814 alu.last = (j == 3);
2815 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2816 return r;
2817 }
2818 } else {
2819 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2820 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2821
2822 alu.dst.sel = tmp0;
2823 alu.dst.chan = 2;
2824 alu.dst.write = 1;
2825
2826 alu.src[0].sel = tmp0;
2827 alu.src[0].chan = 0;
2828 if (signed_op) {
2829 alu.src[1].sel = tmp2;
2830 alu.src[1].chan = 1;
2831 } else {
2832 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2833 }
2834
2835 alu.last = 1;
2836 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2837 return r;
2838 }
2839
2840 /* 3. tmp0.w = -tmp0.z */
2841 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2843
2844 alu.dst.sel = tmp0;
2845 alu.dst.chan = 3;
2846 alu.dst.write = 1;
2847
2848 alu.src[0].sel = V_SQ_ALU_SRC_0;
2849 alu.src[1].sel = tmp0;
2850 alu.src[1].chan = 2;
2851
2852 alu.last = 1;
2853 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2854 return r;
2855
2856 /* 4. tmp0.y = hi (tmp0.x * src2) */
2857 if (ctx->bc->chip_class == CAYMAN) {
2858 for (j = 0 ; j < 4; j++) {
2859 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2860 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2861
2862 alu.dst.sel = tmp0;
2863 alu.dst.chan = j;
2864 alu.dst.write = (j == 1);
2865
2866 alu.src[0].sel = tmp0;
2867 alu.src[0].chan = 0;
2868
2869 if (signed_op) {
2870 alu.src[1].sel = tmp2;
2871 alu.src[1].chan = 1;
2872 } else {
2873 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2874 }
2875 alu.last = (j == 3);
2876 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2877 return r;
2878 }
2879 } else {
2880 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2881 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2882
2883 alu.dst.sel = tmp0;
2884 alu.dst.chan = 1;
2885 alu.dst.write = 1;
2886
2887 alu.src[0].sel = tmp0;
2888 alu.src[0].chan = 0;
2889
2890 if (signed_op) {
2891 alu.src[1].sel = tmp2;
2892 alu.src[1].chan = 1;
2893 } else {
2894 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2895 }
2896
2897 alu.last = 1;
2898 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2899 return r;
2900 }
2901
2902 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2903 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2904 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2905 alu.is_op3 = 1;
2906
2907 alu.dst.sel = tmp0;
2908 alu.dst.chan = 2;
2909 alu.dst.write = 1;
2910
2911 alu.src[0].sel = tmp0;
2912 alu.src[0].chan = 1;
2913 alu.src[1].sel = tmp0;
2914 alu.src[1].chan = 3;
2915 alu.src[2].sel = tmp0;
2916 alu.src[2].chan = 2;
2917
2918 alu.last = 1;
2919 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2920 return r;
2921
2922 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2923 if (ctx->bc->chip_class == CAYMAN) {
2924 for (j = 0 ; j < 4; j++) {
2925 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2926 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2927
2928 alu.dst.sel = tmp0;
2929 alu.dst.chan = j;
2930 alu.dst.write = (j == 3);
2931
2932 alu.src[0].sel = tmp0;
2933 alu.src[0].chan = 2;
2934
2935 alu.src[1].sel = tmp0;
2936 alu.src[1].chan = 0;
2937
2938 alu.last = (j == 3);
2939 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2940 return r;
2941 }
2942 } else {
2943 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2944 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2945
2946 alu.dst.sel = tmp0;
2947 alu.dst.chan = 3;
2948 alu.dst.write = 1;
2949
2950 alu.src[0].sel = tmp0;
2951 alu.src[0].chan = 2;
2952
2953 alu.src[1].sel = tmp0;
2954 alu.src[1].chan = 0;
2955
2956 alu.last = 1;
2957 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2958 return r;
2959 }
2960
2961 /* 7. tmp1.x = tmp0.x - tmp0.w */
2962 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2964
2965 alu.dst.sel = tmp1;
2966 alu.dst.chan = 0;
2967 alu.dst.write = 1;
2968
2969 alu.src[0].sel = tmp0;
2970 alu.src[0].chan = 0;
2971 alu.src[1].sel = tmp0;
2972 alu.src[1].chan = 3;
2973
2974 alu.last = 1;
2975 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2976 return r;
2977
2978 /* 8. tmp1.y = tmp0.x + tmp0.w */
2979 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2980 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2981
2982 alu.dst.sel = tmp1;
2983 alu.dst.chan = 1;
2984 alu.dst.write = 1;
2985
2986 alu.src[0].sel = tmp0;
2987 alu.src[0].chan = 0;
2988 alu.src[1].sel = tmp0;
2989 alu.src[1].chan = 3;
2990
2991 alu.last = 1;
2992 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2993 return r;
2994
2995 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2996 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2997 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2998 alu.is_op3 = 1;
2999
3000 alu.dst.sel = tmp0;
3001 alu.dst.chan = 0;
3002 alu.dst.write = 1;
3003
3004 alu.src[0].sel = tmp0;
3005 alu.src[0].chan = 1;
3006 alu.src[1].sel = tmp1;
3007 alu.src[1].chan = 1;
3008 alu.src[2].sel = tmp1;
3009 alu.src[2].chan = 0;
3010
3011 alu.last = 1;
3012 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3013 return r;
3014
3015 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3016 if (ctx->bc->chip_class == CAYMAN) {
3017 for (j = 0 ; j < 4; j++) {
3018 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3020
3021 alu.dst.sel = tmp0;
3022 alu.dst.chan = j;
3023 alu.dst.write = (j == 2);
3024
3025 alu.src[0].sel = tmp0;
3026 alu.src[0].chan = 0;
3027
3028 if (signed_op) {
3029 alu.src[1].sel = tmp2;
3030 alu.src[1].chan = 0;
3031 } else {
3032 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3033 }
3034
3035 alu.last = (j == 3);
3036 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3037 return r;
3038 }
3039 } else {
3040 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3041 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3042
3043 alu.dst.sel = tmp0;
3044 alu.dst.chan = 2;
3045 alu.dst.write = 1;
3046
3047 alu.src[0].sel = tmp0;
3048 alu.src[0].chan = 0;
3049
3050 if (signed_op) {
3051 alu.src[1].sel = tmp2;
3052 alu.src[1].chan = 0;
3053 } else {
3054 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3055 }
3056
3057 alu.last = 1;
3058 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3059 return r;
3060 }
3061
3062 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3063 if (ctx->bc->chip_class == CAYMAN) {
3064 for (j = 0 ; j < 4; j++) {
3065 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3066 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3067
3068 alu.dst.sel = tmp0;
3069 alu.dst.chan = j;
3070 alu.dst.write = (j == 1);
3071
3072 if (signed_op) {
3073 alu.src[0].sel = tmp2;
3074 alu.src[0].chan = 1;
3075 } else {
3076 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3077 }
3078
3079 alu.src[1].sel = tmp0;
3080 alu.src[1].chan = 2;
3081
3082 alu.last = (j == 3);
3083 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3084 return r;
3085 }
3086 } else {
3087 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3089
3090 alu.dst.sel = tmp0;
3091 alu.dst.chan = 1;
3092 alu.dst.write = 1;
3093
3094 if (signed_op) {
3095 alu.src[0].sel = tmp2;
3096 alu.src[0].chan = 1;
3097 } else {
3098 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3099 }
3100
3101 alu.src[1].sel = tmp0;
3102 alu.src[1].chan = 2;
3103
3104 alu.last = 1;
3105 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3106 return r;
3107 }
3108
3109 /* 12. tmp0.w = src1 - tmp0.y = r */
3110 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3112
3113 alu.dst.sel = tmp0;
3114 alu.dst.chan = 3;
3115 alu.dst.write = 1;
3116
3117 if (signed_op) {
3118 alu.src[0].sel = tmp2;
3119 alu.src[0].chan = 0;
3120 } else {
3121 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3122 }
3123
3124 alu.src[1].sel = tmp0;
3125 alu.src[1].chan = 1;
3126
3127 alu.last = 1;
3128 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3129 return r;
3130
3131 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3132 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3133 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3134
3135 alu.dst.sel = tmp1;
3136 alu.dst.chan = 0;
3137 alu.dst.write = 1;
3138
3139 alu.src[0].sel = tmp0;
3140 alu.src[0].chan = 3;
3141 if (signed_op) {
3142 alu.src[1].sel = tmp2;
3143 alu.src[1].chan = 1;
3144 } else {
3145 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3146 }
3147
3148 alu.last = 1;
3149 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3150 return r;
3151
3152 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3153 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3154 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3155
3156 alu.dst.sel = tmp1;
3157 alu.dst.chan = 1;
3158 alu.dst.write = 1;
3159
3160 if (signed_op) {
3161 alu.src[0].sel = tmp2;
3162 alu.src[0].chan = 0;
3163 } else {
3164 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3165 }
3166
3167 alu.src[1].sel = tmp0;
3168 alu.src[1].chan = 1;
3169
3170 alu.last = 1;
3171 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3172 return r;
3173
3174 if (mod) { /* UMOD */
3175
3176 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3177 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3178 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3179
3180 alu.dst.sel = tmp1;
3181 alu.dst.chan = 2;
3182 alu.dst.write = 1;
3183
3184 alu.src[0].sel = tmp0;
3185 alu.src[0].chan = 3;
3186
3187 if (signed_op) {
3188 alu.src[1].sel = tmp2;
3189 alu.src[1].chan = 1;
3190 } else {
3191 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3192 }
3193
3194 alu.last = 1;
3195 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3196 return r;
3197
3198 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3199 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3201
3202 alu.dst.sel = tmp1;
3203 alu.dst.chan = 3;
3204 alu.dst.write = 1;
3205
3206 alu.src[0].sel = tmp0;
3207 alu.src[0].chan = 3;
3208 if (signed_op) {
3209 alu.src[1].sel = tmp2;
3210 alu.src[1].chan = 1;
3211 } else {
3212 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3213 }
3214
3215 alu.last = 1;
3216 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3217 return r;
3218
3219 } else { /* UDIV */
3220
3221 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3222 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3224
3225 alu.dst.sel = tmp1;
3226 alu.dst.chan = 2;
3227 alu.dst.write = 1;
3228
3229 alu.src[0].sel = tmp0;
3230 alu.src[0].chan = 2;
3231 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3232
3233 alu.last = 1;
3234 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3235 return r;
3236
3237 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3238 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3240
3241 alu.dst.sel = tmp1;
3242 alu.dst.chan = 3;
3243 alu.dst.write = 1;
3244
3245 alu.src[0].sel = tmp0;
3246 alu.src[0].chan = 2;
3247 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3248
3249 alu.last = 1;
3250 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3251 return r;
3252
3253 }
3254
3255 /* 17. tmp1.x = tmp1.x & tmp1.y */
3256 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3257 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3258
3259 alu.dst.sel = tmp1;
3260 alu.dst.chan = 0;
3261 alu.dst.write = 1;
3262
3263 alu.src[0].sel = tmp1;
3264 alu.src[0].chan = 0;
3265 alu.src[1].sel = tmp1;
3266 alu.src[1].chan = 1;
3267
3268 alu.last = 1;
3269 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3270 return r;
3271
3272 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3273 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3274 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3275 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3276 alu.is_op3 = 1;
3277
3278 alu.dst.sel = tmp0;
3279 alu.dst.chan = 2;
3280 alu.dst.write = 1;
3281
3282 alu.src[0].sel = tmp1;
3283 alu.src[0].chan = 0;
3284 alu.src[1].sel = tmp0;
3285 alu.src[1].chan = mod ? 3 : 2;
3286 alu.src[2].sel = tmp1;
3287 alu.src[2].chan = 2;
3288
3289 alu.last = 1;
3290 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3291 return r;
3292
3293 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3294 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3295 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3296 alu.is_op3 = 1;
3297
3298 if (signed_op) {
3299 alu.dst.sel = tmp0;
3300 alu.dst.chan = 2;
3301 alu.dst.write = 1;
3302 } else {
3303 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3304 }
3305
3306 alu.src[0].sel = tmp1;
3307 alu.src[0].chan = 1;
3308 alu.src[1].sel = tmp1;
3309 alu.src[1].chan = 3;
3310 alu.src[2].sel = tmp0;
3311 alu.src[2].chan = 2;
3312
3313 alu.last = 1;
3314 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3315 return r;
3316
3317 if (signed_op) {
3318
3319 /* fix the sign of the result */
3320
3321 if (mod) {
3322
3323 /* tmp0.x = -tmp0.z */
3324 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3325 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3326
3327 alu.dst.sel = tmp0;
3328 alu.dst.chan = 0;
3329 alu.dst.write = 1;
3330
3331 alu.src[0].sel = V_SQ_ALU_SRC_0;
3332 alu.src[1].sel = tmp0;
3333 alu.src[1].chan = 2;
3334
3335 alu.last = 1;
3336 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3337 return r;
3338
3339 /* sign of the remainder is the same as the sign of src0 */
3340 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3341 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3342 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3343 alu.is_op3 = 1;
3344
3345 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3346
3347 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3348 alu.src[1].sel = tmp0;
3349 alu.src[1].chan = 2;
3350 alu.src[2].sel = tmp0;
3351 alu.src[2].chan = 0;
3352
3353 alu.last = 1;
3354 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3355 return r;
3356
3357 } else {
3358
3359 /* tmp0.x = -tmp0.z */
3360 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3361 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3362
3363 alu.dst.sel = tmp0;
3364 alu.dst.chan = 0;
3365 alu.dst.write = 1;
3366
3367 alu.src[0].sel = V_SQ_ALU_SRC_0;
3368 alu.src[1].sel = tmp0;
3369 alu.src[1].chan = 2;
3370
3371 alu.last = 1;
3372 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3373 return r;
3374
3375 /* fix the quotient sign (same as the sign of src0*src1) */
3376 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3377 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3379 alu.is_op3 = 1;
3380
3381 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3382
3383 alu.src[0].sel = tmp2;
3384 alu.src[0].chan = 2;
3385 alu.src[1].sel = tmp0;
3386 alu.src[1].chan = 2;
3387 alu.src[2].sel = tmp0;
3388 alu.src[2].chan = 0;
3389
3390 alu.last = 1;
3391 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3392 return r;
3393 }
3394 }
3395 }
3396 return 0;
3397 }
3398
3399 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3400 {
3401 return tgsi_divmod(ctx, 0, 0);
3402 }
3403
3404 static int tgsi_umod(struct r600_shader_ctx *ctx)
3405 {
3406 return tgsi_divmod(ctx, 1, 0);
3407 }
3408
3409 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3410 {
3411 return tgsi_divmod(ctx, 0, 1);
3412 }
3413
3414 static int tgsi_imod(struct r600_shader_ctx *ctx)
3415 {
3416 return tgsi_divmod(ctx, 1, 1);
3417 }
3418
3419
3420 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3421 {
3422 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3423 struct r600_bytecode_alu alu;
3424 int i, r;
3425 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3426 int last_inst = tgsi_last_instruction(write_mask);
3427
3428 for (i = 0; i < 4; i++) {
3429 if (!(write_mask & (1<<i)))
3430 continue;
3431
3432 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3434
3435 alu.dst.sel = ctx->temp_reg;
3436 alu.dst.chan = i;
3437 alu.dst.write = 1;
3438
3439 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3440 if (i == last_inst)
3441 alu.last = 1;
3442 r = r600_bytecode_add_alu(ctx->bc, &alu);
3443 if (r)
3444 return r;
3445 }
3446
3447 for (i = 0; i < 4; i++) {
3448 if (!(write_mask & (1<<i)))
3449 continue;
3450
3451 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3452 alu.inst = ctx->inst_info->r600_opcode;
3453
3454 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3455
3456 alu.src[0].sel = ctx->temp_reg;
3457 alu.src[0].chan = i;
3458
3459 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3460 alu.last = 1;
3461 r = r600_bytecode_add_alu(ctx->bc, &alu);
3462 if (r)
3463 return r;
3464 }
3465
3466 return 0;
3467 }
3468
3469 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3470 {
3471 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3472 struct r600_bytecode_alu alu;
3473 int i, r;
3474 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3475 int last_inst = tgsi_last_instruction(write_mask);
3476
3477 /* tmp = -src */
3478 for (i = 0; i < 4; i++) {
3479 if (!(write_mask & (1<<i)))
3480 continue;
3481
3482 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3484
3485 alu.dst.sel = ctx->temp_reg;
3486 alu.dst.chan = i;
3487 alu.dst.write = 1;
3488
3489 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3490 alu.src[0].sel = V_SQ_ALU_SRC_0;
3491
3492 if (i == last_inst)
3493 alu.last = 1;
3494 r = r600_bytecode_add_alu(ctx->bc, &alu);
3495 if (r)
3496 return r;
3497 }
3498
3499 /* dst = (src >= 0 ? src : tmp) */
3500 for (i = 0; i < 4; i++) {
3501 if (!(write_mask & (1<<i)))
3502 continue;
3503
3504 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3506 alu.is_op3 = 1;
3507 alu.dst.write = 1;
3508
3509 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3510
3511 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3512 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3513 alu.src[2].sel = ctx->temp_reg;
3514 alu.src[2].chan = i;
3515
3516 if (i == last_inst)
3517 alu.last = 1;
3518 r = r600_bytecode_add_alu(ctx->bc, &alu);
3519 if (r)
3520 return r;
3521 }
3522 return 0;
3523 }
3524
3525 static int tgsi_issg(struct r600_shader_ctx *ctx)
3526 {
3527 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3528 struct r600_bytecode_alu alu;
3529 int i, r;
3530 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3531 int last_inst = tgsi_last_instruction(write_mask);
3532
3533 /* tmp = (src >= 0 ? src : -1) */
3534 for (i = 0; i < 4; i++) {
3535 if (!(write_mask & (1<<i)))
3536 continue;
3537
3538 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3539 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3540 alu.is_op3 = 1;
3541
3542 alu.dst.sel = ctx->temp_reg;
3543 alu.dst.chan = i;
3544 alu.dst.write = 1;
3545
3546 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3547 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3548 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3549
3550 if (i == last_inst)
3551 alu.last = 1;
3552 r = r600_bytecode_add_alu(ctx->bc, &alu);
3553 if (r)
3554 return r;
3555 }
3556
3557 /* dst = (tmp > 0 ? 1 : tmp) */
3558 for (i = 0; i < 4; i++) {
3559 if (!(write_mask & (1<<i)))
3560 continue;
3561
3562 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3563 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3564 alu.is_op3 = 1;
3565 alu.dst.write = 1;
3566
3567 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3568
3569 alu.src[0].sel = ctx->temp_reg;
3570 alu.src[0].chan = i;
3571
3572 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3573
3574 alu.src[2].sel = ctx->temp_reg;
3575 alu.src[2].chan = i;
3576
3577 if (i == last_inst)
3578 alu.last = 1;
3579 r = r600_bytecode_add_alu(ctx->bc, &alu);
3580 if (r)
3581 return r;
3582 }
3583 return 0;
3584 }
3585
3586
3587
3588 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3589 {
3590 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3591 struct r600_bytecode_alu alu;
3592 int i, r;
3593
3594 /* tmp = (src > 0 ? 1 : src) */
3595 for (i = 0; i < 4; i++) {
3596 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3597 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3598 alu.is_op3 = 1;
3599
3600 alu.dst.sel = ctx->temp_reg;
3601 alu.dst.chan = i;
3602
3603 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3604 alu.src[1].sel = V_SQ_ALU_SRC_1;
3605 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3606
3607 if (i == 3)
3608 alu.last = 1;
3609 r = r600_bytecode_add_alu(ctx->bc, &alu);
3610 if (r)
3611 return r;
3612 }
3613
3614 /* dst = (-tmp > 0 ? -1 : tmp) */
3615 for (i = 0; i < 4; i++) {
3616 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3617 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3618 alu.is_op3 = 1;
3619 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3620
3621 alu.src[0].sel = ctx->temp_reg;
3622 alu.src[0].chan = i;
3623 alu.src[0].neg = 1;
3624
3625 alu.src[1].sel = V_SQ_ALU_SRC_1;
3626 alu.src[1].neg = 1;
3627
3628 alu.src[2].sel = ctx->temp_reg;
3629 alu.src[2].chan = i;
3630
3631 if (i == 3)
3632 alu.last = 1;
3633 r = r600_bytecode_add_alu(ctx->bc, &alu);
3634 if (r)
3635 return r;
3636 }
3637 return 0;
3638 }
3639
3640 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3641 {
3642 struct r600_bytecode_alu alu;
3643 int i, r;
3644
3645 for (i = 0; i < 4; i++) {
3646 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3647 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3648 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3649 alu.dst.chan = i;
3650 } else {
3651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3652 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3653 alu.src[0].sel = ctx->temp_reg;
3654 alu.src[0].chan = i;
3655 }
3656 if (i == 3) {
3657 alu.last = 1;
3658 }
3659 r = r600_bytecode_add_alu(ctx->bc, &alu);
3660 if (r)
3661 return r;
3662 }
3663 return 0;
3664 }
3665
3666 static int tgsi_op3(struct r600_shader_ctx *ctx)
3667 {
3668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3669 struct r600_bytecode_alu alu;
3670 int i, j, r;
3671 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3672
3673 for (i = 0; i < lasti + 1; i++) {
3674 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3675 continue;
3676
3677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3678 alu.inst = ctx->inst_info->r600_opcode;
3679 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3680 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3681 }
3682
3683 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3684 alu.dst.chan = i;
3685 alu.dst.write = 1;
3686 alu.is_op3 = 1;
3687 if (i == lasti) {
3688 alu.last = 1;
3689 }
3690 r = r600_bytecode_add_alu(ctx->bc, &alu);
3691 if (r)
3692 return r;
3693 }
3694 return 0;
3695 }
3696
3697 static int tgsi_dp(struct r600_shader_ctx *ctx)
3698 {
3699 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3700 struct r600_bytecode_alu alu;
3701 int i, j, r;
3702
3703 for (i = 0; i < 4; i++) {
3704 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3705 alu.inst = ctx->inst_info->r600_opcode;
3706 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3707 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3708 }
3709
3710 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3711 alu.dst.chan = i;
3712 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3713 /* handle some special cases */
3714 switch (ctx->inst_info->tgsi_opcode) {
3715 case TGSI_OPCODE_DP2:
3716 if (i > 1) {
3717 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3718 alu.src[0].chan = alu.src[1].chan = 0;
3719 }
3720 break;
3721 case TGSI_OPCODE_DP3:
3722 if (i > 2) {
3723 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3724 alu.src[0].chan = alu.src[1].chan = 0;
3725 }
3726 break;
3727 case TGSI_OPCODE_DPH:
3728 if (i == 3) {
3729 alu.src[0].sel = V_SQ_ALU_SRC_1;
3730 alu.src[0].chan = 0;
3731 alu.src[0].neg = 0;
3732 }
3733 break;
3734 default:
3735 break;
3736 }
3737 if (i == 3) {
3738 alu.last = 1;
3739 }
3740 r = r600_bytecode_add_alu(ctx->bc, &alu);
3741 if (r)
3742 return r;
3743 }
3744 return 0;
3745 }
3746
3747 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3748 unsigned index)
3749 {
3750 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3751 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3752 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3753 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3754 ctx->src[index].neg || ctx->src[index].abs;
3755 }
3756
3757 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3758 unsigned index)
3759 {
3760 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3761 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3762 }
3763
3764 static int tgsi_tex(struct r600_shader_ctx *ctx)
3765 {
3766 static float one_point_five = 1.5f;
3767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3768 struct r600_bytecode_tex tex;
3769 struct r600_bytecode_alu alu;
3770 unsigned src_gpr;
3771 int r, i, j;
3772 int opcode;
3773 /* Texture fetch instructions can only use gprs as source.
3774 * Also they cannot negate the source or take the absolute value */
3775 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3776 tgsi_tex_src_requires_loading(ctx, 0);
3777 boolean src_loaded = FALSE;
3778 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3779 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3780
3781 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3782
3783 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3784 /* get offset values */
3785 if (inst->Texture.NumOffsets) {
3786 assert(inst->Texture.NumOffsets == 1);
3787
3788 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3789 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3790 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3791 }
3792 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3793 /* TGSI moves the sampler to src reg 3 for TXD */
3794 sampler_src_reg = 3;
3795
3796 for (i = 1; i < 3; i++) {
3797 /* set gradients h/v */
3798 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3799 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3800 SQ_TEX_INST_SET_GRADIENTS_V;
3801 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3802 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3803
3804 if (tgsi_tex_src_requires_loading(ctx, i)) {
3805 tex.src_gpr = r600_get_temp(ctx);
3806 tex.src_sel_x = 0;
3807 tex.src_sel_y = 1;
3808 tex.src_sel_z = 2;
3809 tex.src_sel_w = 3;
3810
3811 for (j = 0; j < 4; j++) {
3812 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3813 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3814 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3815 alu.dst.sel = tex.src_gpr;
3816 alu.dst.chan = j;
3817 if (j == 3)
3818 alu.last = 1;
3819 alu.dst.write = 1;
3820 r = r600_bytecode_add_alu(ctx->bc, &alu);
3821 if (r)
3822 return r;
3823 }
3824
3825 } else {
3826 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3827 tex.src_sel_x = ctx->src[i].swizzle[0];
3828 tex.src_sel_y = ctx->src[i].swizzle[1];
3829 tex.src_sel_z = ctx->src[i].swizzle[2];
3830 tex.src_sel_w = ctx->src[i].swizzle[3];
3831 tex.src_rel = ctx->src[i].rel;
3832 }
3833 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3834 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3835 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3836 tex.coord_type_x = 1;
3837 tex.coord_type_y = 1;
3838 tex.coord_type_z = 1;
3839 tex.coord_type_w = 1;
3840 }
3841 r = r600_bytecode_add_tex(ctx->bc, &tex);
3842 if (r)
3843 return r;
3844 }
3845 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3846 int out_chan;
3847 /* Add perspective divide */
3848 if (ctx->bc->chip_class == CAYMAN) {
3849 out_chan = 2;
3850 for (i = 0; i < 3; i++) {
3851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3853 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3854
3855 alu.dst.sel = ctx->temp_reg;
3856 alu.dst.chan = i;
3857 if (i == 2)
3858 alu.last = 1;
3859 if (out_chan == i)
3860 alu.dst.write = 1;
3861 r = r600_bytecode_add_alu(ctx->bc, &alu);
3862 if (r)
3863 return r;
3864 }
3865
3866 } else {
3867 out_chan = 3;
3868 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3869 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3870 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3871
3872 alu.dst.sel = ctx->temp_reg;
3873 alu.dst.chan = out_chan;
3874 alu.last = 1;
3875 alu.dst.write = 1;
3876 r = r600_bytecode_add_alu(ctx->bc, &alu);
3877 if (r)
3878 return r;
3879 }
3880
3881 for (i = 0; i < 3; i++) {
3882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3884 alu.src[0].sel = ctx->temp_reg;
3885 alu.src[0].chan = out_chan;
3886 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3887 alu.dst.sel = ctx->temp_reg;
3888 alu.dst.chan = i;
3889 alu.dst.write = 1;
3890 r = r600_bytecode_add_alu(ctx->bc, &alu);
3891 if (r)
3892 return r;
3893 }
3894 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3895 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3896 alu.src[0].sel = V_SQ_ALU_SRC_1;
3897 alu.src[0].chan = 0;
3898 alu.dst.sel = ctx->temp_reg;
3899 alu.dst.chan = 3;
3900 alu.last = 1;
3901 alu.dst.write = 1;
3902 r = r600_bytecode_add_alu(ctx->bc, &alu);
3903 if (r)
3904 return r;
3905 src_loaded = TRUE;
3906 src_gpr = ctx->temp_reg;
3907 }
3908
3909 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3910 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3911 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3912 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3913
3914 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3915 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3916
3917 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3918 for (i = 0; i < 4; i++) {
3919 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3921 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3922 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3923 alu.dst.sel = ctx->temp_reg;
3924 alu.dst.chan = i;
3925 if (i == 3)
3926 alu.last = 1;
3927 alu.dst.write = 1;
3928 r = r600_bytecode_add_alu(ctx->bc, &alu);
3929 if (r)
3930 return r;
3931 }
3932
3933 /* tmp1.z = RCP_e(|tmp1.z|) */
3934 if (ctx->bc->chip_class == CAYMAN) {
3935 for (i = 0; i < 3; i++) {
3936 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3938 alu.src[0].sel = ctx->temp_reg;
3939 alu.src[0].chan = 2;
3940 alu.src[0].abs = 1;
3941 alu.dst.sel = ctx->temp_reg;
3942 alu.dst.chan = i;
3943 if (i == 2)
3944 alu.dst.write = 1;
3945 if (i == 2)
3946 alu.last = 1;
3947 r = r600_bytecode_add_alu(ctx->bc, &alu);
3948 if (r)
3949 return r;
3950 }
3951 } else {
3952 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3954 alu.src[0].sel = ctx->temp_reg;
3955 alu.src[0].chan = 2;
3956 alu.src[0].abs = 1;
3957 alu.dst.sel = ctx->temp_reg;
3958 alu.dst.chan = 2;
3959 alu.dst.write = 1;
3960 alu.last = 1;
3961 r = r600_bytecode_add_alu(ctx->bc, &alu);
3962 if (r)
3963 return r;
3964 }
3965
3966 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3967 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3968 * muladd has no writemask, have to use another temp
3969 */
3970 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3972 alu.is_op3 = 1;
3973
3974 alu.src[0].sel = ctx->temp_reg;
3975 alu.src[0].chan = 0;
3976 alu.src[1].sel = ctx->temp_reg;
3977 alu.src[1].chan = 2;
3978
3979 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3980 alu.src[2].chan = 0;
3981 alu.src[2].value = *(uint32_t *)&one_point_five;
3982
3983 alu.dst.sel = ctx->temp_reg;
3984 alu.dst.chan = 0;
3985 alu.dst.write = 1;
3986
3987 r = r600_bytecode_add_alu(ctx->bc, &alu);
3988 if (r)
3989 return r;
3990
3991 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3993 alu.is_op3 = 1;
3994
3995 alu.src[0].sel = ctx->temp_reg;
3996 alu.src[0].chan = 1;
3997 alu.src[1].sel = ctx->temp_reg;
3998 alu.src[1].chan = 2;
3999
4000 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4001 alu.src[2].chan = 0;
4002 alu.src[2].value = *(uint32_t *)&one_point_five;
4003
4004 alu.dst.sel = ctx->temp_reg;
4005 alu.dst.chan = 1;
4006 alu.dst.write = 1;
4007
4008 alu.last = 1;
4009 r = r600_bytecode_add_alu(ctx->bc, &alu);
4010 if (r)
4011 return r;
4012 /* write initial W value into Z component */
4013 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4014 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4015 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4016 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4017 alu.dst.sel = ctx->temp_reg;
4018 alu.dst.chan = 2;
4019 alu.dst.write = 1;
4020 alu.last = 1;
4021 r = r600_bytecode_add_alu(ctx->bc, &alu);
4022 if (r)
4023 return r;
4024 }
4025 src_loaded = TRUE;
4026 src_gpr = ctx->temp_reg;
4027 }
4028
4029 if (src_requires_loading && !src_loaded) {
4030 for (i = 0; i < 4; i++) {
4031 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4032 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4033 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4034 alu.dst.sel = ctx->temp_reg;
4035 alu.dst.chan = i;
4036 if (i == 3)
4037 alu.last = 1;
4038 alu.dst.write = 1;
4039 r = r600_bytecode_add_alu(ctx->bc, &alu);
4040 if (r)
4041 return r;
4042 }
4043 src_loaded = TRUE;
4044 src_gpr = ctx->temp_reg;
4045 }
4046
4047 opcode = ctx->inst_info->r600_opcode;
4048 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4049 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4050 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4051 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4052 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4053 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4054 switch (opcode) {
4055 case SQ_TEX_INST_SAMPLE:
4056 opcode = SQ_TEX_INST_SAMPLE_C;
4057 break;
4058 case SQ_TEX_INST_SAMPLE_L:
4059 opcode = SQ_TEX_INST_SAMPLE_C_L;
4060 break;
4061 case SQ_TEX_INST_SAMPLE_LB:
4062 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4063 break;
4064 case SQ_TEX_INST_SAMPLE_G:
4065 opcode = SQ_TEX_INST_SAMPLE_C_G;
4066 break;
4067 }
4068 }
4069
4070 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4071 tex.inst = opcode;
4072
4073 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4074 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4075 tex.src_gpr = src_gpr;
4076 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4077 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4078 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4079 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4080 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4081
4082 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4083 tex.src_sel_x = 4;
4084 tex.src_sel_y = 4;
4085 tex.src_sel_z = 4;
4086 tex.src_sel_w = 4;
4087 } else if (src_loaded) {
4088 tex.src_sel_x = 0;
4089 tex.src_sel_y = 1;
4090 tex.src_sel_z = 2;
4091 tex.src_sel_w = 3;
4092 } else {
4093 tex.src_sel_x = ctx->src[0].swizzle[0];
4094 tex.src_sel_y = ctx->src[0].swizzle[1];
4095 tex.src_sel_z = ctx->src[0].swizzle[2];
4096 tex.src_sel_w = ctx->src[0].swizzle[3];
4097 tex.src_rel = ctx->src[0].rel;
4098 }
4099
4100 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4101 tex.src_sel_x = 1;
4102 tex.src_sel_y = 0;
4103 tex.src_sel_z = 3;
4104 tex.src_sel_w = 1;
4105 }
4106 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4107 tex.src_sel_x = 1;
4108 tex.src_sel_y = 0;
4109 tex.src_sel_z = 3;
4110 tex.src_sel_w = 2; /* route Z compare value into W */
4111 }
4112
4113 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4114 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4115 tex.coord_type_x = 1;
4116 tex.coord_type_y = 1;
4117 }
4118 tex.coord_type_z = 1;
4119 tex.coord_type_w = 1;
4120
4121 tex.offset_x = offset_x;
4122 tex.offset_y = offset_y;
4123 tex.offset_z = offset_z;
4124
4125 /* Put the depth for comparison in W.
4126 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4127 * Some instructions expect the depth in Z. */
4128 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4129 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4130 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4131 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4132 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4133 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4134 tex.src_sel_w = tex.src_sel_z;
4135 }
4136
4137 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4138 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4139 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4140 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4141 /* the array index is read from Y */
4142 tex.coord_type_y = 0;
4143 } else {
4144 /* the array index is read from Z */
4145 tex.coord_type_z = 0;
4146 tex.src_sel_z = tex.src_sel_y;
4147 }
4148 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4149 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4150 /* the array index is read from Z */
4151 tex.coord_type_z = 0;
4152
4153 r = r600_bytecode_add_tex(ctx->bc, &tex);
4154 if (r)
4155 return r;
4156
4157 /* add shadow ambient support - gallium doesn't do it yet */
4158 return 0;
4159 }
4160
4161 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4162 {
4163 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4164 struct r600_bytecode_alu alu;
4165 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4166 unsigned i;
4167 int r;
4168
4169 /* optimize if it's just an equal balance */
4170 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4171 for (i = 0; i < lasti + 1; i++) {
4172 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4173 continue;
4174
4175 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4176 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4177 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4178 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4179 alu.omod = 3;
4180 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4181 alu.dst.chan = i;
4182 if (i == lasti) {
4183 alu.last = 1;
4184 }
4185 r = r600_bytecode_add_alu(ctx->bc, &alu);
4186 if (r)
4187 return r;
4188 }
4189 return 0;
4190 }
4191
4192 /* 1 - src0 */
4193 for (i = 0; i < lasti + 1; i++) {
4194 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4195 continue;
4196
4197 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4199 alu.src[0].sel = V_SQ_ALU_SRC_1;
4200 alu.src[0].chan = 0;
4201 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4202 r600_bytecode_src_toggle_neg(&alu.src[1]);
4203 alu.dst.sel = ctx->temp_reg;
4204 alu.dst.chan = i;
4205 if (i == lasti) {
4206 alu.last = 1;
4207 }
4208 alu.dst.write = 1;
4209 r = r600_bytecode_add_alu(ctx->bc, &alu);
4210 if (r)
4211 return r;
4212 }
4213
4214 /* (1 - src0) * src2 */
4215 for (i = 0; i < lasti + 1; i++) {
4216 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4217 continue;
4218
4219 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4220 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4221 alu.src[0].sel = ctx->temp_reg;
4222 alu.src[0].chan = i;
4223 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4224 alu.dst.sel = ctx->temp_reg;
4225 alu.dst.chan = i;
4226 if (i == lasti) {
4227 alu.last = 1;
4228 }
4229 alu.dst.write = 1;
4230 r = r600_bytecode_add_alu(ctx->bc, &alu);
4231 if (r)
4232 return r;
4233 }
4234
4235 /* src0 * src1 + (1 - src0) * src2 */
4236 for (i = 0; i < lasti + 1; i++) {
4237 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4238 continue;
4239
4240 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4241 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4242 alu.is_op3 = 1;
4243 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4244 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4245 alu.src[2].sel = ctx->temp_reg;
4246 alu.src[2].chan = i;
4247
4248 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4249 alu.dst.chan = i;
4250 if (i == lasti) {
4251 alu.last = 1;
4252 }
4253 r = r600_bytecode_add_alu(ctx->bc, &alu);
4254 if (r)
4255 return r;
4256 }
4257 return 0;
4258 }
4259
4260 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4261 {
4262 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4263 struct r600_bytecode_alu alu;
4264 int i, r;
4265 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4266
4267 for (i = 0; i < lasti + 1; i++) {
4268 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4269 continue;
4270
4271 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4273 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4274 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4275 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4276 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4277 alu.dst.chan = i;
4278 alu.dst.write = 1;
4279 alu.is_op3 = 1;
4280 if (i == lasti)
4281 alu.last = 1;
4282 r = r600_bytecode_add_alu(ctx->bc, &alu);
4283 if (r)
4284 return r;
4285 }
4286 return 0;
4287 }
4288
4289 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4290 {
4291 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4292 static const unsigned int src0_swizzle[] = {2, 0, 1};
4293 static const unsigned int src1_swizzle[] = {1, 2, 0};
4294 struct r600_bytecode_alu alu;
4295 uint32_t use_temp = 0;
4296 int i, r;
4297
4298 if (inst->Dst[0].Register.WriteMask != 0xf)
4299 use_temp = 1;
4300
4301 for (i = 0; i < 4; i++) {
4302 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4304 if (i < 3) {
4305 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4306 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4307 } else {
4308 alu.src[0].sel = V_SQ_ALU_SRC_0;
4309 alu.src[0].chan = i;
4310 alu.src[1].sel = V_SQ_ALU_SRC_0;
4311 alu.src[1].chan = i;
4312 }
4313
4314 alu.dst.sel = ctx->temp_reg;
4315 alu.dst.chan = i;
4316 alu.dst.write = 1;
4317
4318 if (i == 3)
4319 alu.last = 1;
4320 r = r600_bytecode_add_alu(ctx->bc, &alu);
4321 if (r)
4322 return r;
4323 }
4324
4325 for (i = 0; i < 4; i++) {
4326 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4328
4329 if (i < 3) {
4330 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4331 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4332 } else {
4333 alu.src[0].sel = V_SQ_ALU_SRC_0;
4334 alu.src[0].chan = i;
4335 alu.src[1].sel = V_SQ_ALU_SRC_0;
4336 alu.src[1].chan = i;
4337 }
4338
4339 alu.src[2].sel = ctx->temp_reg;
4340 alu.src[2].neg = 1;
4341 alu.src[2].chan = i;
4342
4343 if (use_temp)
4344 alu.dst.sel = ctx->temp_reg;
4345 else
4346 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4347 alu.dst.chan = i;
4348 alu.dst.write = 1;
4349 alu.is_op3 = 1;
4350 if (i == 3)
4351 alu.last = 1;
4352 r = r600_bytecode_add_alu(ctx->bc, &alu);
4353 if (r)
4354 return r;
4355 }
4356 if (use_temp)
4357 return tgsi_helper_copy(ctx, inst);
4358 return 0;
4359 }
4360
4361 static int tgsi_exp(struct r600_shader_ctx *ctx)
4362 {
4363 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4364 struct r600_bytecode_alu alu;
4365 int r;
4366 int i;
4367
4368 /* result.x = 2^floor(src); */
4369 if (inst->Dst[0].Register.WriteMask & 1) {
4370 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4371
4372 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4373 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4374
4375 alu.dst.sel = ctx->temp_reg;
4376 alu.dst.chan = 0;
4377 alu.dst.write = 1;
4378 alu.last = 1;
4379 r = r600_bytecode_add_alu(ctx->bc, &alu);
4380 if (r)
4381 return r;
4382
4383 if (ctx->bc->chip_class == CAYMAN) {
4384 for (i = 0; i < 3; i++) {
4385 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4386 alu.src[0].sel = ctx->temp_reg;
4387 alu.src[0].chan = 0;
4388
4389 alu.dst.sel = ctx->temp_reg;
4390 alu.dst.chan = i;
4391 if (i == 0)
4392 alu.dst.write = 1;
4393 if (i == 2)
4394 alu.last = 1;
4395 r = r600_bytecode_add_alu(ctx->bc, &alu);
4396 if (r)
4397 return r;
4398 }
4399 } else {
4400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4401 alu.src[0].sel = ctx->temp_reg;
4402 alu.src[0].chan = 0;
4403
4404 alu.dst.sel = ctx->temp_reg;
4405 alu.dst.chan = 0;
4406 alu.dst.write = 1;
4407 alu.last = 1;
4408 r = r600_bytecode_add_alu(ctx->bc, &alu);
4409 if (r)
4410 return r;
4411 }
4412 }
4413
4414 /* result.y = tmp - floor(tmp); */
4415 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4416 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4417
4418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4419 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4420
4421 alu.dst.sel = ctx->temp_reg;
4422 #if 0
4423 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4424 if (r)
4425 return r;
4426 #endif
4427 alu.dst.write = 1;
4428 alu.dst.chan = 1;
4429
4430 alu.last = 1;
4431
4432 r = r600_bytecode_add_alu(ctx->bc, &alu);
4433 if (r)
4434 return r;
4435 }
4436
4437 /* result.z = RoughApprox2ToX(tmp);*/
4438 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4439 if (ctx->bc->chip_class == CAYMAN) {
4440 for (i = 0; i < 3; i++) {
4441 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4442 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4443 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4444
4445 alu.dst.sel = ctx->temp_reg;
4446 alu.dst.chan = i;
4447 if (i == 2) {
4448 alu.dst.write = 1;
4449 alu.last = 1;
4450 }
4451
4452 r = r600_bytecode_add_alu(ctx->bc, &alu);
4453 if (r)
4454 return r;
4455 }
4456 } else {
4457 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4458 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4459 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4460
4461 alu.dst.sel = ctx->temp_reg;
4462 alu.dst.write = 1;
4463 alu.dst.chan = 2;
4464
4465 alu.last = 1;
4466
4467 r = r600_bytecode_add_alu(ctx->bc, &alu);
4468 if (r)
4469 return r;
4470 }
4471 }
4472
4473 /* result.w = 1.0;*/
4474 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4475 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4476
4477 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4478 alu.src[0].sel = V_SQ_ALU_SRC_1;
4479 alu.src[0].chan = 0;
4480
4481 alu.dst.sel = ctx->temp_reg;
4482 alu.dst.chan = 3;
4483 alu.dst.write = 1;
4484 alu.last = 1;
4485 r = r600_bytecode_add_alu(ctx->bc, &alu);
4486 if (r)
4487 return r;
4488 }
4489 return tgsi_helper_copy(ctx, inst);
4490 }
4491
4492 static int tgsi_log(struct r600_shader_ctx *ctx)
4493 {
4494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4495 struct r600_bytecode_alu alu;
4496 int r;
4497 int i;
4498
4499 /* result.x = floor(log2(|src|)); */
4500 if (inst->Dst[0].Register.WriteMask & 1) {
4501 if (ctx->bc->chip_class == CAYMAN) {
4502 for (i = 0; i < 3; i++) {
4503 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4504
4505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4506 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4507 r600_bytecode_src_set_abs(&alu.src[0]);
4508
4509 alu.dst.sel = ctx->temp_reg;
4510 alu.dst.chan = i;
4511 if (i == 0)
4512 alu.dst.write = 1;
4513 if (i == 2)
4514 alu.last = 1;
4515 r = r600_bytecode_add_alu(ctx->bc, &alu);
4516 if (r)
4517 return r;
4518 }
4519
4520 } else {
4521 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4522
4523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4524 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4525 r600_bytecode_src_set_abs(&alu.src[0]);
4526
4527 alu.dst.sel = ctx->temp_reg;
4528 alu.dst.chan = 0;
4529 alu.dst.write = 1;
4530 alu.last = 1;
4531 r = r600_bytecode_add_alu(ctx->bc, &alu);
4532 if (r)
4533 return r;
4534 }
4535
4536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4537 alu.src[0].sel = ctx->temp_reg;
4538 alu.src[0].chan = 0;
4539
4540 alu.dst.sel = ctx->temp_reg;
4541 alu.dst.chan = 0;
4542 alu.dst.write = 1;
4543 alu.last = 1;
4544
4545 r = r600_bytecode_add_alu(ctx->bc, &alu);
4546 if (r)
4547 return r;
4548 }
4549
4550 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4551 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4552
4553 if (ctx->bc->chip_class == CAYMAN) {
4554 for (i = 0; i < 3; i++) {
4555 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4556
4557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4558 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4559 r600_bytecode_src_set_abs(&alu.src[0]);
4560
4561 alu.dst.sel = ctx->temp_reg;
4562 alu.dst.chan = i;
4563 if (i == 1)
4564 alu.dst.write = 1;
4565 if (i == 2)
4566 alu.last = 1;
4567
4568 r = r600_bytecode_add_alu(ctx->bc, &alu);
4569 if (r)
4570 return r;
4571 }
4572 } else {
4573 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4574
4575 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4576 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4577 r600_bytecode_src_set_abs(&alu.src[0]);
4578
4579 alu.dst.sel = ctx->temp_reg;
4580 alu.dst.chan = 1;
4581 alu.dst.write = 1;
4582 alu.last = 1;
4583
4584 r = r600_bytecode_add_alu(ctx->bc, &alu);
4585 if (r)
4586 return r;
4587 }
4588
4589 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4590
4591 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4592 alu.src[0].sel = ctx->temp_reg;
4593 alu.src[0].chan = 1;
4594
4595 alu.dst.sel = ctx->temp_reg;
4596 alu.dst.chan = 1;
4597 alu.dst.write = 1;
4598 alu.last = 1;
4599
4600 r = r600_bytecode_add_alu(ctx->bc, &alu);
4601 if (r)
4602 return r;
4603
4604 if (ctx->bc->chip_class == CAYMAN) {
4605 for (i = 0; i < 3; i++) {
4606 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4607 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4608 alu.src[0].sel = ctx->temp_reg;
4609 alu.src[0].chan = 1;
4610
4611 alu.dst.sel = ctx->temp_reg;
4612 alu.dst.chan = i;
4613 if (i == 1)
4614 alu.dst.write = 1;
4615 if (i == 2)
4616 alu.last = 1;
4617
4618 r = r600_bytecode_add_alu(ctx->bc, &alu);
4619 if (r)
4620 return r;
4621 }
4622 } else {
4623 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4625 alu.src[0].sel = ctx->temp_reg;
4626 alu.src[0].chan = 1;
4627
4628 alu.dst.sel = ctx->temp_reg;
4629 alu.dst.chan = 1;
4630 alu.dst.write = 1;
4631 alu.last = 1;
4632
4633 r = r600_bytecode_add_alu(ctx->bc, &alu);
4634 if (r)
4635 return r;
4636 }
4637
4638 if (ctx->bc->chip_class == CAYMAN) {
4639 for (i = 0; i < 3; i++) {
4640 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4642 alu.src[0].sel = ctx->temp_reg;
4643 alu.src[0].chan = 1;
4644
4645 alu.dst.sel = ctx->temp_reg;
4646 alu.dst.chan = i;
4647 if (i == 1)
4648 alu.dst.write = 1;
4649 if (i == 2)
4650 alu.last = 1;
4651
4652 r = r600_bytecode_add_alu(ctx->bc, &alu);
4653 if (r)
4654 return r;
4655 }
4656 } else {
4657 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4659 alu.src[0].sel = ctx->temp_reg;
4660 alu.src[0].chan = 1;
4661
4662 alu.dst.sel = ctx->temp_reg;
4663 alu.dst.chan = 1;
4664 alu.dst.write = 1;
4665 alu.last = 1;
4666
4667 r = r600_bytecode_add_alu(ctx->bc, &alu);
4668 if (r)
4669 return r;
4670 }
4671
4672 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4673
4674 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4675
4676 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4677 r600_bytecode_src_set_abs(&alu.src[0]);
4678
4679 alu.src[1].sel = ctx->temp_reg;
4680 alu.src[1].chan = 1;
4681
4682 alu.dst.sel = ctx->temp_reg;
4683 alu.dst.chan = 1;
4684 alu.dst.write = 1;
4685 alu.last = 1;
4686
4687 r = r600_bytecode_add_alu(ctx->bc, &alu);
4688 if (r)
4689 return r;
4690 }
4691
4692 /* result.z = log2(|src|);*/
4693 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4694 if (ctx->bc->chip_class == CAYMAN) {
4695 for (i = 0; i < 3; i++) {
4696 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4697
4698 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4699 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4700 r600_bytecode_src_set_abs(&alu.src[0]);
4701
4702 alu.dst.sel = ctx->temp_reg;
4703 if (i == 2)
4704 alu.dst.write = 1;
4705 alu.dst.chan = i;
4706 if (i == 2)
4707 alu.last = 1;
4708
4709 r = r600_bytecode_add_alu(ctx->bc, &alu);
4710 if (r)
4711 return r;
4712 }
4713 } else {
4714 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4715
4716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4717 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4718 r600_bytecode_src_set_abs(&alu.src[0]);
4719
4720 alu.dst.sel = ctx->temp_reg;
4721 alu.dst.write = 1;
4722 alu.dst.chan = 2;
4723 alu.last = 1;
4724
4725 r = r600_bytecode_add_alu(ctx->bc, &alu);
4726 if (r)
4727 return r;
4728 }
4729 }
4730
4731 /* result.w = 1.0; */
4732 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4733 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4734
4735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4736 alu.src[0].sel = V_SQ_ALU_SRC_1;
4737 alu.src[0].chan = 0;
4738
4739 alu.dst.sel = ctx->temp_reg;
4740 alu.dst.chan = 3;
4741 alu.dst.write = 1;
4742 alu.last = 1;
4743
4744 r = r600_bytecode_add_alu(ctx->bc, &alu);
4745 if (r)
4746 return r;
4747 }
4748
4749 return tgsi_helper_copy(ctx, inst);
4750 }
4751
4752 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4753 {
4754 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4755 struct r600_bytecode_alu alu;
4756 int r;
4757
4758 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4759
4760 switch (inst->Instruction.Opcode) {
4761 case TGSI_OPCODE_ARL:
4762 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4763 break;
4764 case TGSI_OPCODE_ARR:
4765 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4766 break;
4767 case TGSI_OPCODE_UARL:
4768 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4769 break;
4770 default:
4771 assert(0);
4772 return -1;
4773 }
4774
4775 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4776 alu.last = 1;
4777 alu.dst.sel = ctx->bc->ar_reg;
4778 alu.dst.write = 1;
4779 r = r600_bytecode_add_alu(ctx->bc, &alu);
4780 if (r)
4781 return r;
4782
4783 ctx->bc->ar_loaded = 0;
4784 return 0;
4785 }
4786 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4787 {
4788 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4789 struct r600_bytecode_alu alu;
4790 int r;
4791
4792 switch (inst->Instruction.Opcode) {
4793 case TGSI_OPCODE_ARL:
4794 memset(&alu, 0, sizeof(alu));
4795 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4796 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4797 alu.dst.sel = ctx->bc->ar_reg;
4798 alu.dst.write = 1;
4799 alu.last = 1;
4800
4801 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4802 return r;
4803
4804 memset(&alu, 0, sizeof(alu));
4805 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4806 alu.src[0].sel = ctx->bc->ar_reg;
4807 alu.dst.sel = ctx->bc->ar_reg;
4808 alu.dst.write = 1;
4809 alu.last = 1;
4810
4811 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4812 return r;
4813 break;
4814 case TGSI_OPCODE_ARR:
4815 memset(&alu, 0, sizeof(alu));
4816 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4817 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4818 alu.dst.sel = ctx->bc->ar_reg;
4819 alu.dst.write = 1;
4820 alu.last = 1;
4821
4822 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4823 return r;
4824 break;
4825 case TGSI_OPCODE_UARL:
4826 memset(&alu, 0, sizeof(alu));
4827 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4828 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4829 alu.dst.sel = ctx->bc->ar_reg;
4830 alu.dst.write = 1;
4831 alu.last = 1;
4832
4833 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4834 return r;
4835 break;
4836 default:
4837 assert(0);
4838 return -1;
4839 }
4840
4841 ctx->bc->ar_loaded = 0;
4842 return 0;
4843 }
4844
4845 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4846 {
4847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4848 struct r600_bytecode_alu alu;
4849 int i, r = 0;
4850
4851 for (i = 0; i < 4; i++) {
4852 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4853
4854 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4855 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4856
4857 if (i == 0 || i == 3) {
4858 alu.src[0].sel = V_SQ_ALU_SRC_1;
4859 } else {
4860 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4861 }
4862
4863 if (i == 0 || i == 2) {
4864 alu.src[1].sel = V_SQ_ALU_SRC_1;
4865 } else {
4866 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4867 }
4868 if (i == 3)
4869 alu.last = 1;
4870 r = r600_bytecode_add_alu(ctx->bc, &alu);
4871 if (r)
4872 return r;
4873 }
4874 return 0;
4875 }
4876
4877 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4878 {
4879 struct r600_bytecode_alu alu;
4880 int r;
4881
4882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4883 alu.inst = opcode;
4884 alu.execute_mask = 1;
4885 alu.update_pred = 1;
4886
4887 alu.dst.sel = ctx->temp_reg;
4888 alu.dst.write = 1;
4889 alu.dst.chan = 0;
4890
4891 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4892 alu.src[1].sel = V_SQ_ALU_SRC_0;
4893 alu.src[1].chan = 0;
4894
4895 alu.last = 1;
4896
4897 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4898 if (r)
4899 return r;
4900 return 0;
4901 }
4902
4903 static int pops(struct r600_shader_ctx *ctx, int pops)
4904 {
4905 unsigned force_pop = ctx->bc->force_add_cf;
4906
4907 if (!force_pop) {
4908 int alu_pop = 3;
4909 if (ctx->bc->cf_last) {
4910 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4911 alu_pop = 0;
4912 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4913 alu_pop = 1;
4914 }
4915 alu_pop += pops;
4916 if (alu_pop == 1) {
4917 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4918 ctx->bc->force_add_cf = 1;
4919 } else if (alu_pop == 2) {
4920 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4921 ctx->bc->force_add_cf = 1;
4922 } else {
4923 force_pop = 1;
4924 }
4925 }
4926
4927 if (force_pop) {
4928 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4929 ctx->bc->cf_last->pop_count = pops;
4930 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4931 }
4932
4933 return 0;
4934 }
4935
4936 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4937 {
4938 switch(reason) {
4939 case FC_PUSH_VPM:
4940 ctx->bc->callstack[ctx->bc->call_sp].current--;
4941 break;
4942 case FC_PUSH_WQM:
4943 case FC_LOOP:
4944 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4945 break;
4946 case FC_REP:
4947 /* TOODO : for 16 vp asic should -= 2; */
4948 ctx->bc->callstack[ctx->bc->call_sp].current --;
4949 break;
4950 }
4951 }
4952
4953 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4954 {
4955 if (check_max_only) {
4956 int diff;
4957 switch (reason) {
4958 case FC_PUSH_VPM:
4959 diff = 1;
4960 break;
4961 case FC_PUSH_WQM:
4962 diff = 4;
4963 break;
4964 default:
4965 assert(0);
4966 diff = 0;
4967 }
4968 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4969 ctx->bc->callstack[ctx->bc->call_sp].max) {
4970 ctx->bc->callstack[ctx->bc->call_sp].max =
4971 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4972 }
4973 return;
4974 }
4975 switch (reason) {
4976 case FC_PUSH_VPM:
4977 ctx->bc->callstack[ctx->bc->call_sp].current++;
4978 break;
4979 case FC_PUSH_WQM:
4980 case FC_LOOP:
4981 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4982 break;
4983 case FC_REP:
4984 ctx->bc->callstack[ctx->bc->call_sp].current++;
4985 break;
4986 }
4987
4988 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4989 ctx->bc->callstack[ctx->bc->call_sp].max) {
4990 ctx->bc->callstack[ctx->bc->call_sp].max =
4991 ctx->bc->callstack[ctx->bc->call_sp].current;
4992 }
4993 }
4994
4995 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4996 {
4997 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4998
4999 sp->mid = realloc((void *)sp->mid,
5000 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5001 sp->mid[sp->num_mid] = ctx->bc->cf_last;
5002 sp->num_mid++;
5003 }
5004
5005 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5006 {
5007 ctx->bc->fc_sp++;
5008 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5009 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5010 }
5011
5012 static void fc_poplevel(struct r600_shader_ctx *ctx)
5013 {
5014 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5015 free(sp->mid);
5016 sp->mid = NULL;
5017 sp->num_mid = 0;
5018 sp->start = NULL;
5019 sp->type = 0;
5020 ctx->bc->fc_sp--;
5021 }
5022
5023 #if 0
5024 static int emit_return(struct r600_shader_ctx *ctx)
5025 {
5026 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5027 return 0;
5028 }
5029
5030 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5031 {
5032
5033 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5034 ctx->bc->cf_last->pop_count = pops;
5035 /* XXX work out offset */
5036 return 0;
5037 }
5038
5039 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5040 {
5041 return 0;
5042 }
5043
5044 static void emit_testflag(struct r600_shader_ctx *ctx)
5045 {
5046
5047 }
5048
5049 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5050 {
5051 emit_testflag(ctx);
5052 emit_jump_to_offset(ctx, 1, 4);
5053 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5054 pops(ctx, ifidx + 1);
5055 emit_return(ctx);
5056 }
5057
5058 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5059 {
5060 emit_testflag(ctx);
5061
5062 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5063 ctx->bc->cf_last->pop_count = 1;
5064
5065 fc_set_mid(ctx, fc_sp);
5066
5067 pops(ctx, 1);
5068 }
5069 #endif
5070
5071 static int tgsi_if(struct r600_shader_ctx *ctx)
5072 {
5073 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5074
5075 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5076
5077 fc_pushlevel(ctx, FC_IF);
5078
5079 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5080 return 0;
5081 }
5082
5083 static int tgsi_else(struct r600_shader_ctx *ctx)
5084 {
5085 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5086 ctx->bc->cf_last->pop_count = 1;
5087
5088 fc_set_mid(ctx, ctx->bc->fc_sp);
5089 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5090 return 0;
5091 }
5092
5093 static int tgsi_endif(struct r600_shader_ctx *ctx)
5094 {
5095 pops(ctx, 1);
5096 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5097 R600_ERR("if/endif unbalanced in shader\n");
5098 return -1;
5099 }
5100
5101 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5102 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5103 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5104 } else {
5105 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5106 }
5107 fc_poplevel(ctx);
5108
5109 callstack_decrease_current(ctx, FC_PUSH_VPM);
5110 return 0;
5111 }
5112
5113 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5114 {
5115 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5116 * limited to 4096 iterations, like the other LOOP_* instructions. */
5117 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5118
5119 fc_pushlevel(ctx, FC_LOOP);
5120
5121 /* check stack depth */
5122 callstack_check_depth(ctx, FC_LOOP, 0);
5123 return 0;
5124 }
5125
5126 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5127 {
5128 int i;
5129
5130 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5131
5132 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5133 R600_ERR("loop/endloop in shader code are not paired.\n");
5134 return -EINVAL;
5135 }
5136
5137 /* fixup loop pointers - from r600isa
5138 LOOP END points to CF after LOOP START,
5139 LOOP START point to CF after LOOP END
5140 BRK/CONT point to LOOP END CF
5141 */
5142 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5143
5144 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5145
5146 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5147 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5148 }
5149 /* XXX add LOOPRET support */
5150 fc_poplevel(ctx);
5151 callstack_decrease_current(ctx, FC_LOOP);
5152 return 0;
5153 }
5154
5155 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5156 {
5157 unsigned int fscp;
5158
5159 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5160 {
5161 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5162 break;
5163 }
5164
5165 if (fscp == 0) {
5166 R600_ERR("Break not inside loop/endloop pair\n");
5167 return -EINVAL;
5168 }
5169
5170 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5171
5172 fc_set_mid(ctx, fscp);
5173
5174 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5175 return 0;
5176 }
5177
5178 static int tgsi_umad(struct r600_shader_ctx *ctx)
5179 {
5180 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5181 struct r600_bytecode_alu alu;
5182 int i, j, r;
5183 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5184
5185 /* src0 * src1 */
5186 for (i = 0; i < lasti + 1; i++) {
5187 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5188 continue;
5189
5190 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5191
5192 alu.dst.chan = i;
5193 alu.dst.sel = ctx->temp_reg;
5194 alu.dst.write = 1;
5195
5196 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5197 for (j = 0; j < 2; j++) {
5198 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5199 }
5200
5201 alu.last = 1;
5202 r = r600_bytecode_add_alu(ctx->bc, &alu);
5203 if (r)
5204 return r;
5205 }
5206
5207
5208 for (i = 0; i < lasti + 1; i++) {
5209 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5210 continue;
5211
5212 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5213 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5214
5215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5216
5217 alu.src[0].sel = ctx->temp_reg;
5218 alu.src[0].chan = i;
5219
5220 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5221 if (i == lasti) {
5222 alu.last = 1;
5223 }
5224 r = r600_bytecode_add_alu(ctx->bc, &alu);
5225 if (r)
5226 return r;
5227 }
5228 return 0;
5229 }
5230
5231 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5232 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5233 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5234 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5235
5236 /* XXX:
5237 * For state trackers other than OpenGL, we'll want to use
5238 * _RECIP_IEEE instead.
5239 */
5240 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5241
5242 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5243 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5244 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5245 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5246 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5247 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5248 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5249 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5250 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5251 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5252 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5253 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5254 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5255 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5256 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5257 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5258 /* gap */
5259 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5260 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5261 /* gap */
5262 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5263 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5264 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5265 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5266 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5267 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5268 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5269 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5270 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5271 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5272 /* gap */
5273 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5274 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5275 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5276 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5277 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5278 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5279 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5280 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5281 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5282 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5283 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5286 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5287 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5288 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5289 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5290 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5291 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5292 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5293 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5294 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5295 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5296 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5297 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5298 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5299 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5300 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5301 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5302 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5303 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5305 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5307 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5308 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5309 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5310 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5311 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5312 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5313 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5314 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5315 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5316 /* gap */
5317 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5318 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5319 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5320 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5321 /* gap */
5322 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5323 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5324 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5325 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5326 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5327 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5328 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5329 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5330 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5331 /* gap */
5332 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5333 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5334 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5335 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5336 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5337 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5338 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5339 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5340 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5341 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5342 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5343 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5344 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5345 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5346 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5348 /* gap */
5349 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5350 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5351 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5352 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5353 /* gap */
5354 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5355 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5356 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5357 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5358 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5359 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5360 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5361 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5362 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5363 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5364 /* gap */
5365 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5366 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5367 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5368 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5369 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5370 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5371 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5372 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5373 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5374 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5375 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5376 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5377 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5378 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5379 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5380 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5381 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5382 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5383 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5384 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5385 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5386 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5387 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5388 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5389 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5390 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5391 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5392 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5393 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5394 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5395 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5396 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5397 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5398 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5399 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5400 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5401 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5402 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5403 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5404 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5405 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5406 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5407 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5408 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5409 };
5410
5411 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5412 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5413 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5414 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5415 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5416 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5417 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5418 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5419 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5420 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5421 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5422 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5423 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5424 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5425 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5426 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5427 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5428 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5429 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5430 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5431 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5432 /* gap */
5433 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5434 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5435 /* gap */
5436 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5437 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5438 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5439 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5440 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5441 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5442 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5443 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5444 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5445 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5446 /* gap */
5447 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5448 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5449 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5450 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5451 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5452 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5453 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5454 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5455 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5456 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5457 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5460 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5461 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5462 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5463 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5464 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5465 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5466 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5467 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5468 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5469 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5470 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5472 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5473 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5474 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5475 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5477 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5479 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5481 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5482 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5483 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5484 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5485 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5486 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5487 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5488 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5489 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5490 /* gap */
5491 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5492 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5493 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5494 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5495 /* gap */
5496 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5497 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5498 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5499 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5500 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5501 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5502 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5503 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5504 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5505 /* gap */
5506 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5507 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5508 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5509 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5510 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5511 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5512 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5513 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5514 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5515 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5516 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5517 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5518 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5519 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5520 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5522 /* gap */
5523 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5524 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5526 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5527 /* gap */
5528 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5529 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5530 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5531 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5532 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5534 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5535 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5536 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5537 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5538 /* gap */
5539 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5540 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5541 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5542 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5543 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5544 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5545 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5546 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5547 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5548 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5549 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5550 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5551 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5552 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5553 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5554 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5555 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5556 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5557 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5558 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5559 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5560 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5561 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5562 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5563 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5564 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5565 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5566 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5567 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5568 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5569 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5570 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5571 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5572 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5573 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5574 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5575 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5576 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5577 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5578 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5579 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5580 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5581 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5582 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5583 };
5584
5585 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5586 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5587 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5588 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5589 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5590 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5591 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5592 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5593 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5594 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5595 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5596 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5597 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5598 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5599 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5600 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5601 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5602 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5603 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5604 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5605 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5606 /* gap */
5607 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5608 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5609 /* gap */
5610 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5611 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5612 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5613 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5614 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5615 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5616 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5617 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5618 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5619 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5620 /* gap */
5621 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5622 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5623 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5624 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5625 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5626 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5627 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5628 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5629 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5630 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5631 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5634 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5635 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5636 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5637 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5638 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5639 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5640 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5641 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5642 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5643 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5644 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5646 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5647 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5648 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5649 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5650 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5651 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5653 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5655 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5656 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5657 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5658 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5659 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5660 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5661 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5662 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5663 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5664 /* gap */
5665 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5666 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5667 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5668 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5669 /* gap */
5670 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5671 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5672 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5674 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5675 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5676 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5677 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5678 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5679 /* gap */
5680 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5681 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5682 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5683 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5684 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5685 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5686 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5687 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5688 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5689 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5690 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5691 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5692 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5693 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5694 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5696 /* gap */
5697 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5698 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5699 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5700 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5701 /* gap */
5702 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5703 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5704 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5705 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5706 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5707 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5708 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5709 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5710 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5711 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5712 /* gap */
5713 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5714 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5715 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5716 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5717 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5718 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5719 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5720 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5721 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5722 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5723 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5724 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5725 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5726 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5727 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5728 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5729 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5730 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5731 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5732 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5733 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5734 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5735 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5736 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5737 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5738 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5739 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5740 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5741 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5742 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5743 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5744 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5745 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5746 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5747 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5748 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5749 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5750 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5751 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5752 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5753 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5754 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5755 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5756 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5757 };