r600g/radeon/winsys: indentation cleanup
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600_shader.h"
28 #include "r600d.h"
29
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_info.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_scan.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "util/u_memory.h"
36 #include <stdio.h>
37 #include <errno.h>
38 #include <byteswap.h>
39
40 /* CAYMAN notes
41 Why CAYMAN got loops for lots of instructions is explained here.
42
43 -These 8xx t-slot only ops are implemented in all vector slots.
44 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
45 These 8xx t-slot only opcodes become vector ops, with all four
46 slots expecting the arguments on sources a and b. Result is
47 broadcast to all channels.
48 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
49 These 8xx t-slot only opcodes become vector ops in the z, y, and
50 x slots.
51 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
52 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
53 SQRT_IEEE/_64
54 SIN/COS
55 The w slot may have an independent co-issued operation, or if the
56 result is required to be in the w slot, the opcode above may be
57 issued in the w slot as well.
58 The compiler must issue the source argument to slots z, y, and x
59 */
60
61 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
62 {
63 struct r600_context *rctx = (struct r600_context *)ctx;
64 struct r600_shader *rshader = &shader->shader;
65 uint32_t *ptr;
66 int i;
67
68 /* copy new shader */
69 if (shader->bo == NULL) {
70 shader->bo = (struct r600_resource*)
71 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
72 if (shader->bo == NULL) {
73 return -ENOMEM;
74 }
75 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
76 if (R600_BIG_ENDIAN) {
77 for (i = 0; i < rshader->bc.ndw; ++i) {
78 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
79 }
80 } else {
81 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
82 }
83 rctx->ws->buffer_unmap(shader->bo->cs_buf);
84 }
85 /* build state */
86 switch (rshader->processor_type) {
87 case TGSI_PROCESSOR_VERTEX:
88 if (rctx->chip_class >= EVERGREEN) {
89 evergreen_pipe_shader_vs(ctx, shader);
90 } else {
91 r600_pipe_shader_vs(ctx, shader);
92 }
93 break;
94 case TGSI_PROCESSOR_FRAGMENT:
95 if (rctx->chip_class >= EVERGREEN) {
96 evergreen_pipe_shader_ps(ctx, shader);
97 } else {
98 r600_pipe_shader_ps(ctx, shader);
99 }
100 break;
101 default:
102 return -EINVAL;
103 }
104 return 0;
105 }
106
107 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
108 struct r600_pipe_shader *pipeshader,
109 struct r600_shader_key key);
110
111 static void r600_dump_streamout(struct pipe_stream_output_info *so)
112 {
113 unsigned i;
114
115 fprintf(stderr, "STREAMOUT\n");
116 for (i = 0; i < so->num_outputs; i++) {
117 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
118 so->output[i].start_component;
119 fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
120 i, so->output[i].output_buffer,
121 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
122 so->output[i].register_index,
123 mask & 1 ? "x" : "",
124 mask & 2 ? "y" : "",
125 mask & 4 ? "z" : "",
126 mask & 8 ? "w" : "",
127 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : "");
128 }
129 }
130
131 int r600_pipe_shader_create(struct pipe_context *ctx,
132 struct r600_pipe_shader *shader,
133 struct r600_shader_key key)
134 {
135 static int dump_shaders = -1;
136 struct r600_context *rctx = (struct r600_context *)ctx;
137 struct r600_pipe_shader_selector *sel = shader->selector;
138 int r;
139
140 /* Would like some magic "get_bool_option_once" routine.
141 */
142 if (dump_shaders == -1)
143 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
144
145 if (dump_shaders) {
146 fprintf(stderr, "--------------------------------------------------------------\n");
147 tgsi_dump(sel->tokens, 0);
148
149 if (sel->so.num_outputs) {
150 r600_dump_streamout(&sel->so);
151 }
152 }
153 r = r600_shader_from_tgsi(rctx->screen, shader, key);
154 if (r) {
155 R600_ERR("translation from TGSI failed !\n");
156 return r;
157 }
158 r = r600_bytecode_build(&shader->shader.bc);
159 if (r) {
160 R600_ERR("building bytecode failed !\n");
161 return r;
162 }
163 if (dump_shaders) {
164 r600_bytecode_dump(&shader->shader.bc);
165 fprintf(stderr, "______________________________________________________________\n");
166 }
167 return r600_pipe_shader(ctx, shader);
168 }
169
170 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
171 {
172 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
173 r600_bytecode_clear(&shader->shader.bc);
174 }
175
176 /*
177 * tgsi -> r600 shader
178 */
179 struct r600_shader_tgsi_instruction;
180
181 struct r600_shader_src {
182 unsigned sel;
183 unsigned swizzle[4];
184 unsigned neg;
185 unsigned abs;
186 unsigned rel;
187 uint32_t value[4];
188 };
189
190 struct r600_shader_ctx {
191 struct tgsi_shader_info info;
192 struct tgsi_parse_context parse;
193 const struct tgsi_token *tokens;
194 unsigned type;
195 unsigned file_offset[TGSI_FILE_COUNT];
196 unsigned temp_reg;
197 struct r600_shader_tgsi_instruction *inst_info;
198 struct r600_bytecode *bc;
199 struct r600_shader *shader;
200 struct r600_shader_src src[4];
201 uint32_t *literals;
202 uint32_t nliterals;
203 uint32_t max_driver_temp_used;
204 boolean use_llvm;
205 /* needed for evergreen interpolation */
206 boolean input_centroid;
207 boolean input_linear;
208 boolean input_perspective;
209 int num_interp_gpr;
210 int face_gpr;
211 int colors_used;
212 boolean clip_vertex_write;
213 unsigned cv_output;
214 int fragcoord_input;
215 int native_integers;
216 };
217
218 struct r600_shader_tgsi_instruction {
219 unsigned tgsi_opcode;
220 unsigned is_op3;
221 unsigned r600_opcode;
222 int (*process)(struct r600_shader_ctx *ctx);
223 };
224
225 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
226 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
227 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
228 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
229 static int tgsi_else(struct r600_shader_ctx *ctx);
230 static int tgsi_endif(struct r600_shader_ctx *ctx);
231 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
232 static int tgsi_endloop(struct r600_shader_ctx *ctx);
233 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
234
235 /*
236 * bytestream -> r600 shader
237 *
238 * These functions are used to transform the output of the LLVM backend into
239 * struct r600_bytecode.
240 */
241
242 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
243 unsigned char * bytes, unsigned num_bytes);
244
245 #ifdef HAVE_OPENCL
246 int r600_compute_shader_create(struct pipe_context * ctx,
247 LLVMModuleRef mod, struct r600_bytecode * bytecode)
248 {
249 struct r600_context *r600_ctx = (struct r600_context *)ctx;
250 unsigned char * bytes;
251 unsigned byte_count;
252 struct r600_shader_ctx shader_ctx;
253 unsigned dump = 0;
254
255 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
256 dump = 1;
257 }
258
259 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
260 shader_ctx.bc = bytecode;
261 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
262 r600_ctx->screen->msaa_texture_support);
263 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
264 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
265 if (shader_ctx.bc->chip_class == CAYMAN) {
266 cm_bytecode_add_cf_end(shader_ctx.bc);
267 }
268 r600_bytecode_build(shader_ctx.bc);
269 if (dump) {
270 r600_bytecode_dump(shader_ctx.bc);
271 }
272 free(bytes);
273 return 1;
274 }
275
276 #endif /* HAVE_OPENCL */
277
278 static uint32_t i32_from_byte_stream(unsigned char * bytes,
279 unsigned * bytes_read)
280 {
281 unsigned i;
282 uint32_t out = 0;
283 for (i = 0; i < 4; i++) {
284 out |= bytes[(*bytes_read)++] << (8 * i);
285 }
286 return out;
287 }
288
289 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
290 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
291 {
292 unsigned i;
293 unsigned sel0, sel1;
294 sel0 = bytes[bytes_read++];
295 sel1 = bytes[bytes_read++];
296 alu->src[src_idx].sel = sel0 | (sel1 << 8);
297 alu->src[src_idx].chan = bytes[bytes_read++];
298 alu->src[src_idx].neg = bytes[bytes_read++];
299 alu->src[src_idx].abs = bytes[bytes_read++];
300 alu->src[src_idx].rel = bytes[bytes_read++];
301 alu->src[src_idx].kc_bank = bytes[bytes_read++];
302 for (i = 0; i < 4; i++) {
303 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
304 }
305 return bytes_read;
306 }
307
308 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
309 unsigned char * bytes, unsigned bytes_read)
310 {
311 unsigned src_idx;
312 struct r600_bytecode_alu alu;
313 unsigned src_const_reg[3];
314 uint32_t word0, word1;
315
316 memset(&alu, 0, sizeof(alu));
317 for(src_idx = 0; src_idx < 3; src_idx++) {
318 unsigned i;
319 src_const_reg[src_idx] = bytes[bytes_read++];
320 for (i = 0; i < 4; i++) {
321 alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
322 }
323 }
324
325 word0 = i32_from_byte_stream(bytes, &bytes_read);
326 word1 = i32_from_byte_stream(bytes, &bytes_read);
327
328 switch(ctx->bc->chip_class) {
329 case R600:
330 r600_bytecode_alu_read(&alu, word0, word1);
331 break;
332 case R700:
333 case EVERGREEN:
334 case CAYMAN:
335 r700_bytecode_alu_read(&alu, word0, word1);
336 break;
337 }
338
339 for(src_idx = 0; src_idx < 3; src_idx++) {
340 if (src_const_reg[src_idx])
341 alu.src[src_idx].sel += 512;
342 }
343
344 #if HAVE_LLVM < 0x0302
345 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
346 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
347 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
348 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
349 alu.update_pred = 1;
350 alu.dst.write = 0;
351 alu.src[1].sel = V_SQ_ALU_SRC_0;
352 alu.src[1].chan = 0;
353 alu.last = 1;
354 }
355 #endif
356
357 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT)) {
358 ctx->bc->ar_reg = alu.src[0].sel;
359 ctx->bc->ar_loaded = 0;
360 return bytes_read;
361 }
362
363 if (alu.execute_mask) {
364 alu.pred_sel = 0;
365 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
366 } else {
367 r600_bytecode_add_alu(ctx->bc, &alu);
368 }
369
370 /* XXX: Handle other KILL instructions */
371 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
372 ctx->shader->uses_kill = 1;
373 /* XXX: This should be enforced in the LLVM backend. */
374 ctx->bc->force_add_cf = 1;
375 }
376 return bytes_read;
377 }
378
379 static void llvm_if(struct r600_shader_ctx *ctx)
380 {
381 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
382 fc_pushlevel(ctx, FC_IF);
383 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
384 }
385
386 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx)
387 {
388 unsigned opcode = TGSI_OPCODE_BRK;
389 if (ctx->bc->chip_class == CAYMAN)
390 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
391 else if (ctx->bc->chip_class >= EVERGREEN)
392 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
393 else
394 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
395 llvm_if(ctx);
396 tgsi_loop_brk_cont(ctx);
397 tgsi_endif(ctx);
398 }
399
400 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
401 unsigned char * bytes, unsigned bytes_read)
402 {
403 struct r600_bytecode_alu alu;
404 unsigned inst;
405 memset(&alu, 0, sizeof(alu));
406 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
407 inst = bytes[bytes_read++];
408 switch (inst) {
409 case 0: /* IF_PREDICATED */
410 llvm_if(ctx);
411 break;
412 case 1: /* ELSE */
413 tgsi_else(ctx);
414 break;
415 case 2: /* ENDIF */
416 tgsi_endif(ctx);
417 break;
418 case 3: /* BGNLOOP */
419 tgsi_bgnloop(ctx);
420 break;
421 case 4: /* ENDLOOP */
422 tgsi_endloop(ctx);
423 break;
424 case 5: /* PREDICATED_BREAK */
425 r600_break_from_byte_stream(ctx);
426 break;
427 case 6: /* CONTINUE */
428 {
429 unsigned opcode = TGSI_OPCODE_CONT;
430 if (ctx->bc->chip_class == CAYMAN) {
431 ctx->inst_info =
432 &cm_shader_tgsi_instruction[opcode];
433 } else if (ctx->bc->chip_class >= EVERGREEN) {
434 ctx->inst_info =
435 &eg_shader_tgsi_instruction[opcode];
436 } else {
437 ctx->inst_info =
438 &r600_shader_tgsi_instruction[opcode];
439 }
440 tgsi_loop_brk_cont(ctx);
441 }
442 break;
443 }
444
445 return bytes_read;
446 }
447
448 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
449 unsigned char * bytes, unsigned bytes_read)
450 {
451 struct r600_bytecode_tex tex;
452
453 tex.inst = bytes[bytes_read++];
454 tex.resource_id = bytes[bytes_read++];
455 tex.src_gpr = bytes[bytes_read++];
456 tex.src_rel = bytes[bytes_read++];
457 tex.dst_gpr = bytes[bytes_read++];
458 tex.dst_rel = bytes[bytes_read++];
459 tex.dst_sel_x = bytes[bytes_read++];
460 tex.dst_sel_y = bytes[bytes_read++];
461 tex.dst_sel_z = bytes[bytes_read++];
462 tex.dst_sel_w = bytes[bytes_read++];
463 tex.lod_bias = bytes[bytes_read++];
464 tex.coord_type_x = bytes[bytes_read++];
465 tex.coord_type_y = bytes[bytes_read++];
466 tex.coord_type_z = bytes[bytes_read++];
467 tex.coord_type_w = bytes[bytes_read++];
468 tex.offset_x = bytes[bytes_read++];
469 tex.offset_y = bytes[bytes_read++];
470 tex.offset_z = bytes[bytes_read++];
471 tex.sampler_id = bytes[bytes_read++];
472 tex.src_sel_x = bytes[bytes_read++];
473 tex.src_sel_y = bytes[bytes_read++];
474 tex.src_sel_z = bytes[bytes_read++];
475 tex.src_sel_w = bytes[bytes_read++];
476
477 tex.inst_mod = 0;
478
479 r600_bytecode_add_tex(ctx->bc, &tex);
480
481 return bytes_read;
482 }
483
484 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
485 unsigned char * bytes, unsigned bytes_read)
486 {
487 struct r600_bytecode_vtx vtx;
488
489 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
490 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
491 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
492
493 memset(&vtx, 0, sizeof(vtx));
494
495 /* WORD0 */
496 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
497 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
498 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
499 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
500 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
501 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
502
503 /* WORD1 */
504 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
505 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
506 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
507 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
508 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
509 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
510 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
511 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
512 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
513 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
514
515 /* WORD 2*/
516 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
517 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
518
519 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
520 fprintf(stderr, "Error adding vtx\n");
521 }
522 /* Use the Texture Cache */
523 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
524 return bytes_read;
525 }
526
527 static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
528 unsigned char * bytes, unsigned bytes_read)
529 {
530 uint32_t word0 = 0, word1 = 0;
531 struct r600_bytecode_output output;
532 memset(&output, 0, sizeof(struct r600_bytecode_output));
533 word0 = i32_from_byte_stream(bytes, &bytes_read);
534 word1 = i32_from_byte_stream(bytes, &bytes_read);
535 if (ctx->bc->chip_class >= EVERGREEN)
536 eg_bytecode_export_read(&output, word0,word1);
537 else
538 r600_bytecode_export_read(&output, word0,word1);
539 r600_bytecode_add_output(ctx->bc, &output);
540 return bytes_read;
541 }
542
543 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
544 unsigned char * bytes, unsigned num_bytes)
545 {
546 unsigned bytes_read = 0;
547 unsigned i, byte;
548 while (bytes_read < num_bytes) {
549 char inst_type = bytes[bytes_read++];
550 switch (inst_type) {
551 case 0:
552 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
553 bytes_read);
554 break;
555 case 1:
556 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
557 bytes_read);
558 break;
559 case 2:
560 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
561 bytes_read);
562 break;
563 case 3:
564 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
565 for (i = 0; i < 2; i++) {
566 for (byte = 0 ; byte < 4; byte++) {
567 ctx->bc->cf_last->isa[i] |=
568 (bytes[bytes_read++] << (byte * 8));
569 }
570 }
571 break;
572
573 case 4:
574 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
575 bytes_read);
576 break;
577 case 5:
578 bytes_read = r600_export_from_byte_stream(ctx, bytes,
579 bytes_read);
580 break;
581 default:
582 /* XXX: Error here */
583 break;
584 }
585 }
586 }
587
588 /* End bytestream -> r600 shader functions*/
589
590 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
591 {
592 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
593 int j;
594
595 if (i->Instruction.NumDstRegs > 1) {
596 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
597 return -EINVAL;
598 }
599 if (i->Instruction.Predicate) {
600 R600_ERR("predicate unsupported\n");
601 return -EINVAL;
602 }
603 #if 0
604 if (i->Instruction.Label) {
605 R600_ERR("label unsupported\n");
606 return -EINVAL;
607 }
608 #endif
609 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
610 if (i->Src[j].Register.Dimension) {
611 R600_ERR("unsupported src %d (dimension %d)\n", j,
612 i->Src[j].Register.Dimension);
613 return -EINVAL;
614 }
615 }
616 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
617 if (i->Dst[j].Register.Dimension) {
618 R600_ERR("unsupported dst (dimension)\n");
619 return -EINVAL;
620 }
621 }
622 return 0;
623 }
624
625 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
626 {
627 int i, r;
628 struct r600_bytecode_alu alu;
629 int gpr = 0, base_chan = 0;
630 int ij_index = 0;
631
632 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
633 ij_index = 0;
634 if (ctx->shader->input[input].centroid)
635 ij_index++;
636 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
637 ij_index = 0;
638 /* if we have perspective add one */
639 if (ctx->input_perspective) {
640 ij_index++;
641 /* if we have perspective centroid */
642 if (ctx->input_centroid)
643 ij_index++;
644 }
645 if (ctx->shader->input[input].centroid)
646 ij_index++;
647 }
648
649 /* work out gpr and base_chan from index */
650 gpr = ij_index / 2;
651 base_chan = (2 * (ij_index % 2)) + 1;
652
653 for (i = 0; i < 8; i++) {
654 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
655
656 if (i < 4)
657 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
658 else
659 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
660
661 if ((i > 1) && (i < 6)) {
662 alu.dst.sel = ctx->shader->input[input].gpr;
663 alu.dst.write = 1;
664 }
665
666 alu.dst.chan = i % 4;
667
668 alu.src[0].sel = gpr;
669 alu.src[0].chan = (base_chan - (i % 2));
670
671 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
672
673 alu.bank_swizzle_force = SQ_ALU_VEC_210;
674 if ((i % 4) == 3)
675 alu.last = 1;
676 r = r600_bytecode_add_alu(ctx->bc, &alu);
677 if (r)
678 return r;
679 }
680 return 0;
681 }
682
683 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
684 {
685 int i, r;
686 struct r600_bytecode_alu alu;
687
688 for (i = 0; i < 4; i++) {
689 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
690
691 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
692
693 alu.dst.sel = ctx->shader->input[input].gpr;
694 alu.dst.write = 1;
695
696 alu.dst.chan = i;
697
698 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
699 alu.src[0].chan = i;
700
701 if (i == 3)
702 alu.last = 1;
703 r = r600_bytecode_add_alu(ctx->bc, &alu);
704 if (r)
705 return r;
706 }
707 return 0;
708 }
709
710 /*
711 * Special export handling in shaders
712 *
713 * shader export ARRAY_BASE for EXPORT_POS:
714 * 60 is position
715 * 61 is misc vector
716 * 62, 63 are clip distance vectors
717 *
718 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
719 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
720 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
721 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
722 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
723 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
724 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
725 * exclusive from render target index)
726 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
727 *
728 *
729 * shader export ARRAY_BASE for EXPORT_PIXEL:
730 * 0-7 CB targets
731 * 61 computed Z vector
732 *
733 * The use of the values exported in the computed Z vector are controlled
734 * by DB_SHADER_CONTROL:
735 * Z_EXPORT_ENABLE - Z as a float in RED
736 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
737 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
738 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
739 * DB_SOURCE_FORMAT - export control restrictions
740 *
741 */
742
743
744 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
745 static int r600_spi_sid(struct r600_shader_io * io)
746 {
747 int index, name = io->name;
748
749 /* These params are handled differently, they don't need
750 * semantic indices, so we'll use 0 for them.
751 */
752 if (name == TGSI_SEMANTIC_POSITION ||
753 name == TGSI_SEMANTIC_PSIZE ||
754 name == TGSI_SEMANTIC_FACE)
755 index = 0;
756 else {
757 if (name == TGSI_SEMANTIC_GENERIC) {
758 /* For generic params simply use sid from tgsi */
759 index = io->sid;
760 } else {
761 /* For non-generic params - pack name and sid into 8 bits */
762 index = 0x80 | (name<<3) | (io->sid);
763 }
764
765 /* Make sure that all really used indices have nonzero value, so
766 * we can just compare it to 0 later instead of comparing the name
767 * with different values to detect special cases. */
768 index++;
769 }
770
771 return index;
772 };
773
774 /* turn input into interpolate on EG */
775 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
776 {
777 int r = 0;
778
779 if (ctx->shader->input[index].spi_sid) {
780 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
781 if (!ctx->use_llvm) {
782 if (ctx->shader->input[index].interpolate > 0) {
783 r = evergreen_interp_alu(ctx, index);
784 } else {
785 r = evergreen_interp_flat(ctx, index);
786 }
787 }
788 }
789 return r;
790 }
791
792 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
793 {
794 struct r600_bytecode_alu alu;
795 int i, r;
796 int gpr_front = ctx->shader->input[front].gpr;
797 int gpr_back = ctx->shader->input[back].gpr;
798
799 for (i = 0; i < 4; i++) {
800 memset(&alu, 0, sizeof(alu));
801 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
802 alu.is_op3 = 1;
803 alu.dst.write = 1;
804 alu.dst.sel = gpr_front;
805 alu.src[0].sel = ctx->face_gpr;
806 alu.src[1].sel = gpr_front;
807 alu.src[2].sel = gpr_back;
808
809 alu.dst.chan = i;
810 alu.src[1].chan = i;
811 alu.src[2].chan = i;
812 alu.last = (i==3);
813
814 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
815 return r;
816 }
817
818 return 0;
819 }
820
821 static int tgsi_declaration(struct r600_shader_ctx *ctx)
822 {
823 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
824 unsigned i;
825 int r;
826
827 switch (d->Declaration.File) {
828 case TGSI_FILE_INPUT:
829 i = ctx->shader->ninput++;
830 ctx->shader->input[i].name = d->Semantic.Name;
831 ctx->shader->input[i].sid = d->Semantic.Index;
832 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
833 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
834 ctx->shader->input[i].centroid = d->Interp.Centroid;
835 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
836 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
837 switch (ctx->shader->input[i].name) {
838 case TGSI_SEMANTIC_FACE:
839 ctx->face_gpr = ctx->shader->input[i].gpr;
840 break;
841 case TGSI_SEMANTIC_COLOR:
842 ctx->colors_used++;
843 break;
844 case TGSI_SEMANTIC_POSITION:
845 ctx->fragcoord_input = i;
846 break;
847 }
848 if (ctx->bc->chip_class >= EVERGREEN) {
849 if ((r = evergreen_interp_input(ctx, i)))
850 return r;
851 }
852 }
853 break;
854 case TGSI_FILE_OUTPUT:
855 i = ctx->shader->noutput++;
856 ctx->shader->output[i].name = d->Semantic.Name;
857 ctx->shader->output[i].sid = d->Semantic.Index;
858 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
859 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
860 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
861 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
862 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
863 switch (d->Semantic.Name) {
864 case TGSI_SEMANTIC_CLIPDIST:
865 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
866 break;
867 case TGSI_SEMANTIC_PSIZE:
868 ctx->shader->vs_out_misc_write = 1;
869 ctx->shader->vs_out_point_size = 1;
870 break;
871 case TGSI_SEMANTIC_CLIPVERTEX:
872 ctx->clip_vertex_write = TRUE;
873 ctx->cv_output = i;
874 break;
875 }
876 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
877 switch (d->Semantic.Name) {
878 case TGSI_SEMANTIC_COLOR:
879 ctx->shader->nr_ps_max_color_exports++;
880 break;
881 }
882 }
883 break;
884 case TGSI_FILE_CONSTANT:
885 case TGSI_FILE_TEMPORARY:
886 case TGSI_FILE_SAMPLER:
887 case TGSI_FILE_ADDRESS:
888 break;
889
890 case TGSI_FILE_SYSTEM_VALUE:
891 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
892 if (!ctx->native_integers) {
893 struct r600_bytecode_alu alu;
894 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
895
896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
897 alu.src[0].sel = 0;
898 alu.src[0].chan = 3;
899
900 alu.dst.sel = 0;
901 alu.dst.chan = 3;
902 alu.dst.write = 1;
903 alu.last = 1;
904
905 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
906 return r;
907 }
908 break;
909 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
910 break;
911 default:
912 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
913 return -EINVAL;
914 }
915 return 0;
916 }
917
918 static int r600_get_temp(struct r600_shader_ctx *ctx)
919 {
920 return ctx->temp_reg + ctx->max_driver_temp_used++;
921 }
922
923 /*
924 * for evergreen we need to scan the shader to find the number of GPRs we need to
925 * reserve for interpolation.
926 *
927 * we need to know if we are going to emit
928 * any centroid inputs
929 * if perspective and linear are required
930 */
931 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
932 {
933 int i;
934 int num_baryc;
935
936 ctx->input_linear = FALSE;
937 ctx->input_perspective = FALSE;
938 ctx->input_centroid = FALSE;
939 ctx->num_interp_gpr = 1;
940
941 /* any centroid inputs */
942 for (i = 0; i < ctx->info.num_inputs; i++) {
943 /* skip position/face */
944 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
945 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
946 continue;
947 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
948 ctx->input_linear = TRUE;
949 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
950 ctx->input_perspective = TRUE;
951 if (ctx->info.input_centroid[i])
952 ctx->input_centroid = TRUE;
953 }
954
955 num_baryc = 0;
956 /* ignoring sample for now */
957 if (ctx->input_perspective)
958 num_baryc++;
959 if (ctx->input_linear)
960 num_baryc++;
961 if (ctx->input_centroid)
962 num_baryc *= 2;
963
964 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
965
966 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
967 return ctx->num_interp_gpr;
968 }
969
970 static void tgsi_src(struct r600_shader_ctx *ctx,
971 const struct tgsi_full_src_register *tgsi_src,
972 struct r600_shader_src *r600_src)
973 {
974 memset(r600_src, 0, sizeof(*r600_src));
975 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
976 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
977 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
978 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
979 r600_src->neg = tgsi_src->Register.Negate;
980 r600_src->abs = tgsi_src->Register.Absolute;
981
982 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
983 int index;
984 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
985 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
986 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
987
988 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
989 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
990 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
991 return;
992 }
993 index = tgsi_src->Register.Index;
994 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
995 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
996 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
997 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
998 r600_src->swizzle[0] = 3;
999 r600_src->swizzle[1] = 3;
1000 r600_src->swizzle[2] = 3;
1001 r600_src->swizzle[3] = 3;
1002 r600_src->sel = 0;
1003 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
1004 r600_src->swizzle[0] = 0;
1005 r600_src->swizzle[1] = 0;
1006 r600_src->swizzle[2] = 0;
1007 r600_src->swizzle[3] = 0;
1008 r600_src->sel = 0;
1009 }
1010 } else {
1011 if (tgsi_src->Register.Indirect)
1012 r600_src->rel = V_SQ_REL_RELATIVE;
1013 r600_src->sel = tgsi_src->Register.Index;
1014 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
1015 }
1016 }
1017
1018 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
1019 {
1020 struct r600_bytecode_vtx vtx;
1021 unsigned int ar_reg;
1022 int r;
1023
1024 if (offset) {
1025 struct r600_bytecode_alu alu;
1026
1027 memset(&alu, 0, sizeof(alu));
1028
1029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
1030 alu.src[0].sel = ctx->bc->ar_reg;
1031
1032 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1033 alu.src[1].value = offset;
1034
1035 alu.dst.sel = dst_reg;
1036 alu.dst.write = 1;
1037 alu.last = 1;
1038
1039 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1040 return r;
1041
1042 ar_reg = dst_reg;
1043 } else {
1044 ar_reg = ctx->bc->ar_reg;
1045 }
1046
1047 memset(&vtx, 0, sizeof(vtx));
1048 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1049 vtx.src_gpr = ar_reg;
1050 vtx.mega_fetch_count = 16;
1051 vtx.dst_gpr = dst_reg;
1052 vtx.dst_sel_x = 0; /* SEL_X */
1053 vtx.dst_sel_y = 1; /* SEL_Y */
1054 vtx.dst_sel_z = 2; /* SEL_Z */
1055 vtx.dst_sel_w = 3; /* SEL_W */
1056 vtx.data_format = FMT_32_32_32_32_FLOAT;
1057 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1058 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1059 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1060 vtx.endian = r600_endian_swap(32);
1061
1062 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1063 return r;
1064
1065 return 0;
1066 }
1067
1068 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1069 {
1070 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1071 struct r600_bytecode_alu alu;
1072 int i, j, k, nconst, r;
1073
1074 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1075 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1076 nconst++;
1077 }
1078 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1079 }
1080 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1081 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1082 continue;
1083 }
1084
1085 if (ctx->src[i].rel) {
1086 int treg = r600_get_temp(ctx);
1087 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1088 return r;
1089
1090 ctx->src[i].sel = treg;
1091 ctx->src[i].rel = 0;
1092 j--;
1093 } else if (j > 0) {
1094 int treg = r600_get_temp(ctx);
1095 for (k = 0; k < 4; k++) {
1096 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1098 alu.src[0].sel = ctx->src[i].sel;
1099 alu.src[0].chan = k;
1100 alu.src[0].rel = ctx->src[i].rel;
1101 alu.dst.sel = treg;
1102 alu.dst.chan = k;
1103 alu.dst.write = 1;
1104 if (k == 3)
1105 alu.last = 1;
1106 r = r600_bytecode_add_alu(ctx->bc, &alu);
1107 if (r)
1108 return r;
1109 }
1110 ctx->src[i].sel = treg;
1111 ctx->src[i].rel =0;
1112 j--;
1113 }
1114 }
1115 return 0;
1116 }
1117
1118 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1119 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1120 {
1121 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1122 struct r600_bytecode_alu alu;
1123 int i, j, k, nliteral, r;
1124
1125 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1126 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1127 nliteral++;
1128 }
1129 }
1130 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1131 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1132 int treg = r600_get_temp(ctx);
1133 for (k = 0; k < 4; k++) {
1134 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1135 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1136 alu.src[0].sel = ctx->src[i].sel;
1137 alu.src[0].chan = k;
1138 alu.src[0].value = ctx->src[i].value[k];
1139 alu.dst.sel = treg;
1140 alu.dst.chan = k;
1141 alu.dst.write = 1;
1142 if (k == 3)
1143 alu.last = 1;
1144 r = r600_bytecode_add_alu(ctx->bc, &alu);
1145 if (r)
1146 return r;
1147 }
1148 ctx->src[i].sel = treg;
1149 j--;
1150 }
1151 }
1152 return 0;
1153 }
1154
1155 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1156 {
1157 int i, r, count = ctx->shader->ninput;
1158
1159 for (i = 0; i < count; i++) {
1160 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1161 unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg;
1162 if (ctx->bc->chip_class >= EVERGREEN) {
1163 if ((r = evergreen_interp_input(ctx, back_facing_reg)))
1164 return r;
1165 }
1166
1167 if (!ctx->use_llvm) {
1168 r = select_twoside_color(ctx, i, back_facing_reg);
1169 if (r)
1170 return r;
1171 }
1172 }
1173 }
1174 return 0;
1175 }
1176
1177 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
1178 struct r600_pipe_shader *pipeshader,
1179 struct r600_shader_key key)
1180 {
1181 struct r600_shader *shader = &pipeshader->shader;
1182 struct tgsi_token *tokens = pipeshader->selector->tokens;
1183 struct pipe_stream_output_info so = pipeshader->selector->so;
1184 struct tgsi_full_immediate *immediate;
1185 struct tgsi_full_property *property;
1186 struct r600_shader_ctx ctx;
1187 struct r600_bytecode_output output[32];
1188 unsigned output_done, noutput;
1189 unsigned opcode;
1190 int i, j, k, r = 0;
1191 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1192 /* Declarations used by llvm code */
1193 bool use_llvm = false;
1194 unsigned char * inst_bytes = NULL;
1195 unsigned inst_byte_count = 0;
1196
1197 #ifdef R600_USE_LLVM
1198 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1199 #endif
1200 ctx.bc = &shader->bc;
1201 ctx.shader = shader;
1202 ctx.native_integers = true;
1203
1204 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
1205 rscreen->msaa_texture_support);
1206 ctx.tokens = tokens;
1207 tgsi_scan_shader(tokens, &ctx.info);
1208 tgsi_parse_init(&ctx.parse, tokens);
1209 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1210 shader->processor_type = ctx.type;
1211 ctx.bc->type = shader->processor_type;
1212
1213 ctx.face_gpr = -1;
1214 ctx.fragcoord_input = -1;
1215 ctx.colors_used = 0;
1216 ctx.clip_vertex_write = 0;
1217
1218 shader->nr_ps_color_exports = 0;
1219 shader->nr_ps_max_color_exports = 0;
1220
1221 shader->two_side = key.color_two_side;
1222
1223 /* register allocations */
1224 /* Values [0,127] correspond to GPR[0..127].
1225 * Values [128,159] correspond to constant buffer bank 0
1226 * Values [160,191] correspond to constant buffer bank 1
1227 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1228 * Values [256,287] correspond to constant buffer bank 2 (EG)
1229 * Values [288,319] correspond to constant buffer bank 3 (EG)
1230 * Other special values are shown in the list below.
1231 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1232 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1233 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1234 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1235 * 248 SQ_ALU_SRC_0: special constant 0.0.
1236 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1237 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1238 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1239 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1240 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1241 * 254 SQ_ALU_SRC_PV: previous vector result.
1242 * 255 SQ_ALU_SRC_PS: previous scalar result.
1243 */
1244 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1245 ctx.file_offset[i] = 0;
1246 }
1247 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1248 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1249 if (ctx.bc->chip_class >= EVERGREEN) {
1250 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1251 } else {
1252 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1253 }
1254 }
1255 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1256 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1257 }
1258
1259 #ifdef R600_USE_LLVM
1260 if (use_llvm && ctx.info.indirect_files) {
1261 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1262 "indirect adressing. Falling back to TGSI "
1263 "backend.\n");
1264 use_llvm = 0;
1265 }
1266 #endif
1267 ctx.use_llvm = use_llvm;
1268
1269 if (use_llvm) {
1270 ctx.file_offset[TGSI_FILE_OUTPUT] =
1271 ctx.file_offset[TGSI_FILE_INPUT];
1272 } else {
1273 ctx.file_offset[TGSI_FILE_OUTPUT] =
1274 ctx.file_offset[TGSI_FILE_INPUT] +
1275 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1276 }
1277 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1278 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1279
1280 /* Outside the GPR range. This will be translated to one of the
1281 * kcache banks later. */
1282 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1283
1284 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1285 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1286 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1287 ctx.temp_reg = ctx.bc->ar_reg + 1;
1288
1289 ctx.nliterals = 0;
1290 ctx.literals = NULL;
1291 shader->fs_write_all = FALSE;
1292 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1293 tgsi_parse_token(&ctx.parse);
1294 switch (ctx.parse.FullToken.Token.Type) {
1295 case TGSI_TOKEN_TYPE_IMMEDIATE:
1296 immediate = &ctx.parse.FullToken.FullImmediate;
1297 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1298 if(ctx.literals == NULL) {
1299 r = -ENOMEM;
1300 goto out_err;
1301 }
1302 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1303 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1304 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1305 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1306 ctx.nliterals++;
1307 break;
1308 case TGSI_TOKEN_TYPE_DECLARATION:
1309 r = tgsi_declaration(&ctx);
1310 if (r)
1311 goto out_err;
1312 break;
1313 case TGSI_TOKEN_TYPE_INSTRUCTION:
1314 break;
1315 case TGSI_TOKEN_TYPE_PROPERTY:
1316 property = &ctx.parse.FullToken.FullProperty;
1317 switch (property->Property.PropertyName) {
1318 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1319 if (property->u[0].Data == 1)
1320 shader->fs_write_all = TRUE;
1321 break;
1322 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1323 /* we don't need this one */
1324 break;
1325 }
1326 break;
1327 default:
1328 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1329 r = -EINVAL;
1330 goto out_err;
1331 }
1332 }
1333
1334 /* Process two side if needed */
1335 if (shader->two_side && ctx.colors_used) {
1336 int i, count = ctx.shader->ninput;
1337 unsigned next_lds_loc = ctx.shader->nlds;
1338
1339 /* additional inputs will be allocated right after the existing inputs,
1340 * we won't need them after the color selection, so we don't need to
1341 * reserve these gprs for the rest of the shader code and to adjust
1342 * output offsets etc. */
1343 int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
1344 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1345
1346 if (ctx.face_gpr == -1) {
1347 i = ctx.shader->ninput++;
1348 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
1349 ctx.shader->input[i].spi_sid = 0;
1350 ctx.shader->input[i].gpr = gpr++;
1351 ctx.face_gpr = ctx.shader->input[i].gpr;
1352 }
1353
1354 for (i = 0; i < count; i++) {
1355 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1356 int ni = ctx.shader->ninput++;
1357 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
1358 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1359 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
1360 ctx.shader->input[ni].gpr = gpr++;
1361 // TGSI to LLVM needs to know the lds position of inputs.
1362 // Non LLVM path computes it later (in process_twoside_color)
1363 ctx.shader->input[ni].lds_pos = next_lds_loc++;
1364 ctx.shader->input[i].potential_back_facing_reg = ni;
1365 }
1366 }
1367 }
1368
1369 /* LLVM backend setup */
1370 #ifdef R600_USE_LLVM
1371 if (use_llvm) {
1372 struct radeon_llvm_context radeon_llvm_ctx;
1373 LLVMModuleRef mod;
1374 unsigned dump = 0;
1375 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1376 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1377 radeon_llvm_ctx.type = ctx.type;
1378 radeon_llvm_ctx.two_side = shader->two_side;
1379 radeon_llvm_ctx.face_input = ctx.face_gpr;
1380 radeon_llvm_ctx.r600_inputs = ctx.shader->input;
1381 radeon_llvm_ctx.r600_outputs = ctx.shader->output;
1382 radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
1383 radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
1384 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
1385 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1386 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1387 dump = 1;
1388 }
1389 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1390 rscreen->family, dump)) {
1391 FREE(inst_bytes);
1392 radeon_llvm_dispose(&radeon_llvm_ctx);
1393 use_llvm = 0;
1394 fprintf(stderr, "R600 LLVM backend failed to compile "
1395 "shader. Falling back to TGSI\n");
1396 } else {
1397 ctx.file_offset[TGSI_FILE_OUTPUT] =
1398 ctx.file_offset[TGSI_FILE_INPUT];
1399 }
1400 radeon_llvm_dispose(&radeon_llvm_ctx);
1401 }
1402 #endif
1403 /* End of LLVM backend setup */
1404
1405 if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
1406 shader->nr_ps_max_color_exports = 8;
1407
1408 if (ctx.fragcoord_input >= 0 && !use_llvm) {
1409 if (ctx.bc->chip_class == CAYMAN) {
1410 for (j = 0 ; j < 4; j++) {
1411 struct r600_bytecode_alu alu;
1412 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1413 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1414 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1415 alu.src[0].chan = 3;
1416
1417 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1418 alu.dst.chan = j;
1419 alu.dst.write = (j == 3);
1420 alu.last = 1;
1421 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1422 return r;
1423 }
1424 } else {
1425 struct r600_bytecode_alu alu;
1426 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1427 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1428 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1429 alu.src[0].chan = 3;
1430
1431 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1432 alu.dst.chan = 3;
1433 alu.dst.write = 1;
1434 alu.last = 1;
1435 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1436 return r;
1437 }
1438 }
1439
1440 if (shader->two_side && ctx.colors_used) {
1441 if ((r = process_twoside_color_inputs(&ctx)))
1442 return r;
1443 }
1444
1445 tgsi_parse_init(&ctx.parse, tokens);
1446 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1447 tgsi_parse_token(&ctx.parse);
1448 switch (ctx.parse.FullToken.Token.Type) {
1449 case TGSI_TOKEN_TYPE_INSTRUCTION:
1450 if (use_llvm) {
1451 continue;
1452 }
1453 r = tgsi_is_supported(&ctx);
1454 if (r)
1455 goto out_err;
1456 ctx.max_driver_temp_used = 0;
1457 /* reserve first tmp for everyone */
1458 r600_get_temp(&ctx);
1459
1460 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1461 if ((r = tgsi_split_constant(&ctx)))
1462 goto out_err;
1463 if ((r = tgsi_split_literal_constant(&ctx)))
1464 goto out_err;
1465 if (ctx.bc->chip_class == CAYMAN)
1466 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1467 else if (ctx.bc->chip_class >= EVERGREEN)
1468 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1469 else
1470 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1471 r = ctx.inst_info->process(&ctx);
1472 if (r)
1473 goto out_err;
1474 break;
1475 default:
1476 break;
1477 }
1478 }
1479
1480 /* Reset the temporary register counter. */
1481 ctx.max_driver_temp_used = 0;
1482
1483 /* Get instructions if we are using the LLVM backend. */
1484 if (use_llvm) {
1485 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1486 FREE(inst_bytes);
1487 }
1488
1489 noutput = shader->noutput;
1490
1491 if (ctx.clip_vertex_write) {
1492 unsigned clipdist_temp[2];
1493
1494 clipdist_temp[0] = r600_get_temp(&ctx);
1495 clipdist_temp[1] = r600_get_temp(&ctx);
1496
1497 /* need to convert a clipvertex write into clipdistance writes and not export
1498 the clip vertex anymore */
1499
1500 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1501 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1502 shader->output[noutput].gpr = clipdist_temp[0];
1503 noutput++;
1504 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1505 shader->output[noutput].gpr = clipdist_temp[1];
1506 noutput++;
1507
1508 /* reset spi_sid for clipvertex output to avoid confusing spi */
1509 shader->output[ctx.cv_output].spi_sid = 0;
1510
1511 shader->clip_dist_write = 0xFF;
1512
1513 for (i = 0; i < 8; i++) {
1514 int oreg = i >> 2;
1515 int ochan = i & 3;
1516
1517 for (j = 0; j < 4; j++) {
1518 struct r600_bytecode_alu alu;
1519 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1520 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1521 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1522 alu.src[0].chan = j;
1523
1524 alu.src[1].sel = 512 + i;
1525 alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
1526 alu.src[1].chan = j;
1527
1528 alu.dst.sel = clipdist_temp[oreg];
1529 alu.dst.chan = j;
1530 alu.dst.write = (j == ochan);
1531 if (j == 3)
1532 alu.last = 1;
1533 r = r600_bytecode_add_alu(ctx.bc, &alu);
1534 if (r)
1535 return r;
1536 }
1537 }
1538 }
1539
1540 /* Add stream outputs. */
1541 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1542 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
1543
1544 /* Sanity checking. */
1545 if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
1546 R600_ERR("Too many stream outputs: %d\n", so.num_outputs);
1547 r = -EINVAL;
1548 goto out_err;
1549 }
1550 for (i = 0; i < so.num_outputs; i++) {
1551 if (so.output[i].output_buffer >= 4) {
1552 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
1553 so.output[i].output_buffer);
1554 r = -EINVAL;
1555 goto out_err;
1556 }
1557 }
1558
1559 /* Initialize locations where the outputs are stored. */
1560 for (i = 0; i < so.num_outputs; i++) {
1561 so_gpr[i] = shader->output[so.output[i].register_index].gpr;
1562
1563 /* Lower outputs with dst_offset < start_component.
1564 *
1565 * We can only output 4D vectors with a write mask, e.g. we can
1566 * only output the W component at offset 3, etc. If we want
1567 * to store Y, Z, or W at buffer offset 0, we need to use MOV
1568 * to move it to X and output X. */
1569 if (so.output[i].dst_offset < so.output[i].start_component) {
1570 unsigned tmp = r600_get_temp(&ctx);
1571
1572 for (j = 0; j < so.output[i].num_components; j++) {
1573 struct r600_bytecode_alu alu;
1574 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1575 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1576 alu.src[0].sel = so_gpr[i];
1577 alu.src[0].chan = so.output[i].start_component + j;
1578
1579 alu.dst.sel = tmp;
1580 alu.dst.chan = j;
1581 alu.dst.write = 1;
1582 if (j == so.output[i].num_components - 1)
1583 alu.last = 1;
1584 r = r600_bytecode_add_alu(ctx.bc, &alu);
1585 if (r)
1586 return r;
1587 }
1588 so.output[i].start_component = 0;
1589 so_gpr[i] = tmp;
1590 }
1591 }
1592
1593 /* Write outputs to buffers. */
1594 for (i = 0; i < so.num_outputs; i++) {
1595 struct r600_bytecode_output output;
1596
1597 memset(&output, 0, sizeof(struct r600_bytecode_output));
1598 output.gpr = so_gpr[i];
1599 output.elem_size = so.output[i].num_components;
1600 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1601 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1602 output.burst_count = 1;
1603 output.barrier = 1;
1604 /* array_size is an upper limit for the burst_count
1605 * with MEM_STREAM instructions */
1606 output.array_size = 0xFFF;
1607 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1608 if (ctx.bc->chip_class >= EVERGREEN) {
1609 switch (so.output[i].output_buffer) {
1610 case 0:
1611 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1612 break;
1613 case 1:
1614 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1615 break;
1616 case 2:
1617 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1618 break;
1619 case 3:
1620 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1621 break;
1622 }
1623 } else {
1624 switch (so.output[i].output_buffer) {
1625 case 0:
1626 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1627 break;
1628 case 1:
1629 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1630 break;
1631 case 2:
1632 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1633 break;
1634 case 3:
1635 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1636 break;
1637 }
1638 }
1639 r = r600_bytecode_add_output(ctx.bc, &output);
1640 if (r)
1641 goto out_err;
1642 }
1643 }
1644
1645 /* export output */
1646 for (i = 0, j = 0; i < noutput; i++, j++) {
1647 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1648 output[j].gpr = shader->output[i].gpr;
1649 output[j].elem_size = 3;
1650 output[j].swizzle_x = 0;
1651 output[j].swizzle_y = 1;
1652 output[j].swizzle_z = 2;
1653 output[j].swizzle_w = 3;
1654 output[j].burst_count = 1;
1655 output[j].barrier = 1;
1656 output[j].type = -1;
1657 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1658 switch (ctx.type) {
1659 case TGSI_PROCESSOR_VERTEX:
1660 switch (shader->output[i].name) {
1661 case TGSI_SEMANTIC_POSITION:
1662 output[j].array_base = next_pos_base++;
1663 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1664 break;
1665
1666 case TGSI_SEMANTIC_PSIZE:
1667 output[j].array_base = next_pos_base++;
1668 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1669 break;
1670 case TGSI_SEMANTIC_CLIPVERTEX:
1671 j--;
1672 break;
1673 case TGSI_SEMANTIC_CLIPDIST:
1674 output[j].array_base = next_pos_base++;
1675 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1676 /* spi_sid is 0 for clipdistance outputs that were generated
1677 * for clipvertex - we don't need to pass them to PS */
1678 if (shader->output[i].spi_sid) {
1679 j++;
1680 /* duplicate it as PARAM to pass to the pixel shader */
1681 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1682 output[j].array_base = next_param_base++;
1683 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1684 }
1685 break;
1686 case TGSI_SEMANTIC_FOG:
1687 output[j].swizzle_y = 4; /* 0 */
1688 output[j].swizzle_z = 4; /* 0 */
1689 output[j].swizzle_w = 5; /* 1 */
1690 break;
1691 }
1692 break;
1693 case TGSI_PROCESSOR_FRAGMENT:
1694 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1695 /* never export more colors than the number of CBs */
1696 if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
1697 /* skip export */
1698 j--;
1699 continue;
1700 }
1701 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1702 output[j].array_base = next_pixel_base++;
1703 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1704 shader->nr_ps_color_exports++;
1705 if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
1706 for (k = 1; k < key.nr_cbufs; k++) {
1707 j++;
1708 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1709 output[j].gpr = shader->output[i].gpr;
1710 output[j].elem_size = 3;
1711 output[j].swizzle_x = 0;
1712 output[j].swizzle_y = 1;
1713 output[j].swizzle_z = 2;
1714 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1715 output[j].burst_count = 1;
1716 output[j].barrier = 1;
1717 output[j].array_base = next_pixel_base++;
1718 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1719 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1720 shader->nr_ps_color_exports++;
1721 }
1722 }
1723 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1724 output[j].array_base = 61;
1725 output[j].swizzle_x = 2;
1726 output[j].swizzle_y = 7;
1727 output[j].swizzle_z = output[j].swizzle_w = 7;
1728 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1729 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1730 output[j].array_base = 61;
1731 output[j].swizzle_x = 7;
1732 output[j].swizzle_y = 1;
1733 output[j].swizzle_z = output[j].swizzle_w = 7;
1734 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1735 } else {
1736 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1737 r = -EINVAL;
1738 goto out_err;
1739 }
1740 break;
1741 default:
1742 R600_ERR("unsupported processor type %d\n", ctx.type);
1743 r = -EINVAL;
1744 goto out_err;
1745 }
1746
1747 if (output[j].type==-1) {
1748 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1749 output[j].array_base = next_param_base++;
1750 }
1751 }
1752
1753 /* add fake param output for vertex shader if no param is exported */
1754 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1755 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1756 output[j].gpr = 0;
1757 output[j].elem_size = 3;
1758 output[j].swizzle_x = 7;
1759 output[j].swizzle_y = 7;
1760 output[j].swizzle_z = 7;
1761 output[j].swizzle_w = 7;
1762 output[j].burst_count = 1;
1763 output[j].barrier = 1;
1764 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1765 output[j].array_base = 0;
1766 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1767 j++;
1768 }
1769
1770 /* add fake pixel export */
1771 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1772 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1773 output[j].gpr = 0;
1774 output[j].elem_size = 3;
1775 output[j].swizzle_x = 7;
1776 output[j].swizzle_y = 7;
1777 output[j].swizzle_z = 7;
1778 output[j].swizzle_w = 7;
1779 output[j].burst_count = 1;
1780 output[j].barrier = 1;
1781 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1782 output[j].array_base = 0;
1783 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1784 j++;
1785 }
1786
1787 noutput = j;
1788
1789 /* set export done on last export of each type */
1790 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1791 if (ctx.bc->chip_class < CAYMAN) {
1792 if (i == (noutput - 1)) {
1793 output[i].end_of_program = 1;
1794 }
1795 }
1796 if (!(output_done & (1 << output[i].type))) {
1797 output_done |= (1 << output[i].type);
1798 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1799 }
1800 }
1801 /* add output to bytecode */
1802 if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
1803 for (i = 0; i < noutput; i++) {
1804 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1805 if (r)
1806 goto out_err;
1807 }
1808 }
1809 /* add program end */
1810 if (ctx.bc->chip_class == CAYMAN)
1811 cm_bytecode_add_cf_end(ctx.bc);
1812
1813 /* check GPR limit - we have 124 = 128 - 4
1814 * (4 are reserved as alu clause temporary registers) */
1815 if (ctx.bc->ngpr > 124) {
1816 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1817 r = -ENOMEM;
1818 goto out_err;
1819 }
1820
1821 free(ctx.literals);
1822 tgsi_parse_free(&ctx.parse);
1823 return 0;
1824 out_err:
1825 free(ctx.literals);
1826 tgsi_parse_free(&ctx.parse);
1827 return r;
1828 }
1829
1830 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1831 {
1832 R600_ERR("%s tgsi opcode unsupported\n",
1833 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1834 return -EINVAL;
1835 }
1836
1837 static int tgsi_end(struct r600_shader_ctx *ctx)
1838 {
1839 return 0;
1840 }
1841
1842 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1843 const struct r600_shader_src *shader_src,
1844 unsigned chan)
1845 {
1846 bc_src->sel = shader_src->sel;
1847 bc_src->chan = shader_src->swizzle[chan];
1848 bc_src->neg = shader_src->neg;
1849 bc_src->abs = shader_src->abs;
1850 bc_src->rel = shader_src->rel;
1851 bc_src->value = shader_src->value[bc_src->chan];
1852 }
1853
1854 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1855 {
1856 bc_src->abs = 1;
1857 bc_src->neg = 0;
1858 }
1859
1860 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1861 {
1862 bc_src->neg = !bc_src->neg;
1863 }
1864
1865 static void tgsi_dst(struct r600_shader_ctx *ctx,
1866 const struct tgsi_full_dst_register *tgsi_dst,
1867 unsigned swizzle,
1868 struct r600_bytecode_alu_dst *r600_dst)
1869 {
1870 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1871
1872 r600_dst->sel = tgsi_dst->Register.Index;
1873 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1874 r600_dst->chan = swizzle;
1875 r600_dst->write = 1;
1876 if (tgsi_dst->Register.Indirect)
1877 r600_dst->rel = V_SQ_REL_RELATIVE;
1878 if (inst->Instruction.Saturate) {
1879 r600_dst->clamp = 1;
1880 }
1881 }
1882
1883 static int tgsi_last_instruction(unsigned writemask)
1884 {
1885 int i, lasti = 0;
1886
1887 for (i = 0; i < 4; i++) {
1888 if (writemask & (1 << i)) {
1889 lasti = i;
1890 }
1891 }
1892 return lasti;
1893 }
1894
1895 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1896 {
1897 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1898 struct r600_bytecode_alu alu;
1899 int i, j, r;
1900 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1901
1902 for (i = 0; i < lasti + 1; i++) {
1903 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1904 continue;
1905
1906 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1907 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1908
1909 alu.inst = ctx->inst_info->r600_opcode;
1910 if (!swap) {
1911 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1912 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1913 }
1914 } else {
1915 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1916 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1917 }
1918 /* handle some special cases */
1919 switch (ctx->inst_info->tgsi_opcode) {
1920 case TGSI_OPCODE_SUB:
1921 r600_bytecode_src_toggle_neg(&alu.src[1]);
1922 break;
1923 case TGSI_OPCODE_ABS:
1924 r600_bytecode_src_set_abs(&alu.src[0]);
1925 break;
1926 default:
1927 break;
1928 }
1929 if (i == lasti || trans_only) {
1930 alu.last = 1;
1931 }
1932 r = r600_bytecode_add_alu(ctx->bc, &alu);
1933 if (r)
1934 return r;
1935 }
1936 return 0;
1937 }
1938
1939 static int tgsi_op2(struct r600_shader_ctx *ctx)
1940 {
1941 return tgsi_op2_s(ctx, 0, 0);
1942 }
1943
1944 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1945 {
1946 return tgsi_op2_s(ctx, 1, 0);
1947 }
1948
1949 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1950 {
1951 return tgsi_op2_s(ctx, 0, 1);
1952 }
1953
1954 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1955 {
1956 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1957 struct r600_bytecode_alu alu;
1958 int i, r;
1959 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1960
1961 for (i = 0; i < lasti + 1; i++) {
1962
1963 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1964 continue;
1965 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1966 alu.inst = ctx->inst_info->r600_opcode;
1967
1968 alu.src[0].sel = V_SQ_ALU_SRC_0;
1969
1970 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1971
1972 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1973
1974 if (i == lasti) {
1975 alu.last = 1;
1976 }
1977 r = r600_bytecode_add_alu(ctx->bc, &alu);
1978 if (r)
1979 return r;
1980 }
1981 return 0;
1982
1983 }
1984
1985 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1986 {
1987 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1988 int i, j, r;
1989 struct r600_bytecode_alu alu;
1990 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1991
1992 for (i = 0 ; i < last_slot; i++) {
1993 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1994 alu.inst = ctx->inst_info->r600_opcode;
1995 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1996 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1997
1998 /* RSQ should take the absolute value of src */
1999 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
2000 r600_bytecode_src_set_abs(&alu.src[j]);
2001 }
2002 }
2003 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2004 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2005
2006 if (i == last_slot - 1)
2007 alu.last = 1;
2008 r = r600_bytecode_add_alu(ctx->bc, &alu);
2009 if (r)
2010 return r;
2011 }
2012 return 0;
2013 }
2014
2015 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
2016 {
2017 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2018 int i, j, k, r;
2019 struct r600_bytecode_alu alu;
2020 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2021 for (k = 0; k < last_slot; k++) {
2022 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
2023 continue;
2024
2025 for (i = 0 ; i < 4; i++) {
2026 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2027 alu.inst = ctx->inst_info->r600_opcode;
2028 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2029 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
2030 }
2031 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2032 alu.dst.write = (i == k);
2033 if (i == 3)
2034 alu.last = 1;
2035 r = r600_bytecode_add_alu(ctx->bc, &alu);
2036 if (r)
2037 return r;
2038 }
2039 }
2040 return 0;
2041 }
2042
2043 /*
2044 * r600 - trunc to -PI..PI range
2045 * r700 - normalize by dividing by 2PI
2046 * see fdo bug 27901
2047 */
2048 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
2049 {
2050 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
2051 static float double_pi = 3.1415926535 * 2;
2052 static float neg_pi = -3.1415926535;
2053
2054 int r;
2055 struct r600_bytecode_alu alu;
2056
2057 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2059 alu.is_op3 = 1;
2060
2061 alu.dst.chan = 0;
2062 alu.dst.sel = ctx->temp_reg;
2063 alu.dst.write = 1;
2064
2065 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2066
2067 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2068 alu.src[1].chan = 0;
2069 alu.src[1].value = *(uint32_t *)&half_inv_pi;
2070 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2071 alu.src[2].chan = 0;
2072 alu.last = 1;
2073 r = r600_bytecode_add_alu(ctx->bc, &alu);
2074 if (r)
2075 return r;
2076
2077 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2079
2080 alu.dst.chan = 0;
2081 alu.dst.sel = ctx->temp_reg;
2082 alu.dst.write = 1;
2083
2084 alu.src[0].sel = ctx->temp_reg;
2085 alu.src[0].chan = 0;
2086 alu.last = 1;
2087 r = r600_bytecode_add_alu(ctx->bc, &alu);
2088 if (r)
2089 return r;
2090
2091 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2092 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2093 alu.is_op3 = 1;
2094
2095 alu.dst.chan = 0;
2096 alu.dst.sel = ctx->temp_reg;
2097 alu.dst.write = 1;
2098
2099 alu.src[0].sel = ctx->temp_reg;
2100 alu.src[0].chan = 0;
2101
2102 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2103 alu.src[1].chan = 0;
2104 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2105 alu.src[2].chan = 0;
2106
2107 if (ctx->bc->chip_class == R600) {
2108 alu.src[1].value = *(uint32_t *)&double_pi;
2109 alu.src[2].value = *(uint32_t *)&neg_pi;
2110 } else {
2111 alu.src[1].sel = V_SQ_ALU_SRC_1;
2112 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2113 alu.src[2].neg = 1;
2114 }
2115
2116 alu.last = 1;
2117 r = r600_bytecode_add_alu(ctx->bc, &alu);
2118 if (r)
2119 return r;
2120 return 0;
2121 }
2122
2123 static int cayman_trig(struct r600_shader_ctx *ctx)
2124 {
2125 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2126 struct r600_bytecode_alu alu;
2127 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2128 int i, r;
2129
2130 r = tgsi_setup_trig(ctx);
2131 if (r)
2132 return r;
2133
2134
2135 for (i = 0; i < last_slot; i++) {
2136 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2137 alu.inst = ctx->inst_info->r600_opcode;
2138 alu.dst.chan = i;
2139
2140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2141 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2142
2143 alu.src[0].sel = ctx->temp_reg;
2144 alu.src[0].chan = 0;
2145 if (i == last_slot - 1)
2146 alu.last = 1;
2147 r = r600_bytecode_add_alu(ctx->bc, &alu);
2148 if (r)
2149 return r;
2150 }
2151 return 0;
2152 }
2153
2154 static int tgsi_trig(struct r600_shader_ctx *ctx)
2155 {
2156 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2157 struct r600_bytecode_alu alu;
2158 int i, r;
2159 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2160
2161 r = tgsi_setup_trig(ctx);
2162 if (r)
2163 return r;
2164
2165 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2166 alu.inst = ctx->inst_info->r600_opcode;
2167 alu.dst.chan = 0;
2168 alu.dst.sel = ctx->temp_reg;
2169 alu.dst.write = 1;
2170
2171 alu.src[0].sel = ctx->temp_reg;
2172 alu.src[0].chan = 0;
2173 alu.last = 1;
2174 r = r600_bytecode_add_alu(ctx->bc, &alu);
2175 if (r)
2176 return r;
2177
2178 /* replicate result */
2179 for (i = 0; i < lasti + 1; i++) {
2180 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2181 continue;
2182
2183 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2185
2186 alu.src[0].sel = ctx->temp_reg;
2187 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2188 if (i == lasti)
2189 alu.last = 1;
2190 r = r600_bytecode_add_alu(ctx->bc, &alu);
2191 if (r)
2192 return r;
2193 }
2194 return 0;
2195 }
2196
2197 static int tgsi_scs(struct r600_shader_ctx *ctx)
2198 {
2199 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2200 struct r600_bytecode_alu alu;
2201 int i, r;
2202
2203 /* We'll only need the trig stuff if we are going to write to the
2204 * X or Y components of the destination vector.
2205 */
2206 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2207 r = tgsi_setup_trig(ctx);
2208 if (r)
2209 return r;
2210 }
2211
2212 /* dst.x = COS */
2213 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2214 if (ctx->bc->chip_class == CAYMAN) {
2215 for (i = 0 ; i < 3; i++) {
2216 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2218 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2219
2220 if (i == 0)
2221 alu.dst.write = 1;
2222 else
2223 alu.dst.write = 0;
2224 alu.src[0].sel = ctx->temp_reg;
2225 alu.src[0].chan = 0;
2226 if (i == 2)
2227 alu.last = 1;
2228 r = r600_bytecode_add_alu(ctx->bc, &alu);
2229 if (r)
2230 return r;
2231 }
2232 } else {
2233 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2234 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2235 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2236
2237 alu.src[0].sel = ctx->temp_reg;
2238 alu.src[0].chan = 0;
2239 alu.last = 1;
2240 r = r600_bytecode_add_alu(ctx->bc, &alu);
2241 if (r)
2242 return r;
2243 }
2244 }
2245
2246 /* dst.y = SIN */
2247 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2248 if (ctx->bc->chip_class == CAYMAN) {
2249 for (i = 0 ; i < 3; i++) {
2250 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2251 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2252 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2253 if (i == 1)
2254 alu.dst.write = 1;
2255 else
2256 alu.dst.write = 0;
2257 alu.src[0].sel = ctx->temp_reg;
2258 alu.src[0].chan = 0;
2259 if (i == 2)
2260 alu.last = 1;
2261 r = r600_bytecode_add_alu(ctx->bc, &alu);
2262 if (r)
2263 return r;
2264 }
2265 } else {
2266 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2268 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2269
2270 alu.src[0].sel = ctx->temp_reg;
2271 alu.src[0].chan = 0;
2272 alu.last = 1;
2273 r = r600_bytecode_add_alu(ctx->bc, &alu);
2274 if (r)
2275 return r;
2276 }
2277 }
2278
2279 /* dst.z = 0.0; */
2280 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2281 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2282
2283 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2284
2285 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2286
2287 alu.src[0].sel = V_SQ_ALU_SRC_0;
2288 alu.src[0].chan = 0;
2289
2290 alu.last = 1;
2291
2292 r = r600_bytecode_add_alu(ctx->bc, &alu);
2293 if (r)
2294 return r;
2295 }
2296
2297 /* dst.w = 1.0; */
2298 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2299 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2300
2301 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2302
2303 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2304
2305 alu.src[0].sel = V_SQ_ALU_SRC_1;
2306 alu.src[0].chan = 0;
2307
2308 alu.last = 1;
2309
2310 r = r600_bytecode_add_alu(ctx->bc, &alu);
2311 if (r)
2312 return r;
2313 }
2314
2315 return 0;
2316 }
2317
2318 static int tgsi_kill(struct r600_shader_ctx *ctx)
2319 {
2320 struct r600_bytecode_alu alu;
2321 int i, r;
2322
2323 for (i = 0; i < 4; i++) {
2324 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2325 alu.inst = ctx->inst_info->r600_opcode;
2326
2327 alu.dst.chan = i;
2328
2329 alu.src[0].sel = V_SQ_ALU_SRC_0;
2330
2331 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2332 alu.src[1].sel = V_SQ_ALU_SRC_1;
2333 alu.src[1].neg = 1;
2334 } else {
2335 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2336 }
2337 if (i == 3) {
2338 alu.last = 1;
2339 }
2340 r = r600_bytecode_add_alu(ctx->bc, &alu);
2341 if (r)
2342 return r;
2343 }
2344
2345 /* kill must be last in ALU */
2346 ctx->bc->force_add_cf = 1;
2347 ctx->shader->uses_kill = TRUE;
2348 return 0;
2349 }
2350
2351 static int tgsi_lit(struct r600_shader_ctx *ctx)
2352 {
2353 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2354 struct r600_bytecode_alu alu;
2355 int r;
2356
2357 /* tmp.x = max(src.y, 0.0) */
2358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2360 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2361 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2362 alu.src[1].chan = 1;
2363
2364 alu.dst.sel = ctx->temp_reg;
2365 alu.dst.chan = 0;
2366 alu.dst.write = 1;
2367
2368 alu.last = 1;
2369 r = r600_bytecode_add_alu(ctx->bc, &alu);
2370 if (r)
2371 return r;
2372
2373 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2374 {
2375 int chan;
2376 int sel;
2377 int i;
2378
2379 if (ctx->bc->chip_class == CAYMAN) {
2380 for (i = 0; i < 3; i++) {
2381 /* tmp.z = log(tmp.x) */
2382 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2384 alu.src[0].sel = ctx->temp_reg;
2385 alu.src[0].chan = 0;
2386 alu.dst.sel = ctx->temp_reg;
2387 alu.dst.chan = i;
2388 if (i == 2) {
2389 alu.dst.write = 1;
2390 alu.last = 1;
2391 } else
2392 alu.dst.write = 0;
2393
2394 r = r600_bytecode_add_alu(ctx->bc, &alu);
2395 if (r)
2396 return r;
2397 }
2398 } else {
2399 /* tmp.z = log(tmp.x) */
2400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2402 alu.src[0].sel = ctx->temp_reg;
2403 alu.src[0].chan = 0;
2404 alu.dst.sel = ctx->temp_reg;
2405 alu.dst.chan = 2;
2406 alu.dst.write = 1;
2407 alu.last = 1;
2408 r = r600_bytecode_add_alu(ctx->bc, &alu);
2409 if (r)
2410 return r;
2411 }
2412
2413 chan = alu.dst.chan;
2414 sel = alu.dst.sel;
2415
2416 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2417 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2419 alu.src[0].sel = sel;
2420 alu.src[0].chan = chan;
2421 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2422 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2423 alu.dst.sel = ctx->temp_reg;
2424 alu.dst.chan = 0;
2425 alu.dst.write = 1;
2426 alu.is_op3 = 1;
2427 alu.last = 1;
2428 r = r600_bytecode_add_alu(ctx->bc, &alu);
2429 if (r)
2430 return r;
2431
2432 if (ctx->bc->chip_class == CAYMAN) {
2433 for (i = 0; i < 3; i++) {
2434 /* dst.z = exp(tmp.x) */
2435 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2436 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2437 alu.src[0].sel = ctx->temp_reg;
2438 alu.src[0].chan = 0;
2439 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2440 if (i == 2) {
2441 alu.dst.write = 1;
2442 alu.last = 1;
2443 } else
2444 alu.dst.write = 0;
2445 r = r600_bytecode_add_alu(ctx->bc, &alu);
2446 if (r)
2447 return r;
2448 }
2449 } else {
2450 /* dst.z = exp(tmp.x) */
2451 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2452 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2453 alu.src[0].sel = ctx->temp_reg;
2454 alu.src[0].chan = 0;
2455 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2456 alu.last = 1;
2457 r = r600_bytecode_add_alu(ctx->bc, &alu);
2458 if (r)
2459 return r;
2460 }
2461 }
2462
2463 /* dst.x, <- 1.0 */
2464 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2466 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2467 alu.src[0].chan = 0;
2468 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2469 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2470 r = r600_bytecode_add_alu(ctx->bc, &alu);
2471 if (r)
2472 return r;
2473
2474 /* dst.y = max(src.x, 0.0) */
2475 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2477 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2478 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2479 alu.src[1].chan = 0;
2480 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2481 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2482 r = r600_bytecode_add_alu(ctx->bc, &alu);
2483 if (r)
2484 return r;
2485
2486 /* dst.w, <- 1.0 */
2487 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2488 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2489 alu.src[0].sel = V_SQ_ALU_SRC_1;
2490 alu.src[0].chan = 0;
2491 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2492 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2493 alu.last = 1;
2494 r = r600_bytecode_add_alu(ctx->bc, &alu);
2495 if (r)
2496 return r;
2497
2498 return 0;
2499 }
2500
2501 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2502 {
2503 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2504 struct r600_bytecode_alu alu;
2505 int i, r;
2506
2507 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2508
2509 /* XXX:
2510 * For state trackers other than OpenGL, we'll want to use
2511 * _RECIPSQRT_IEEE instead.
2512 */
2513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2514
2515 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2516 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2517 r600_bytecode_src_set_abs(&alu.src[i]);
2518 }
2519 alu.dst.sel = ctx->temp_reg;
2520 alu.dst.write = 1;
2521 alu.last = 1;
2522 r = r600_bytecode_add_alu(ctx->bc, &alu);
2523 if (r)
2524 return r;
2525 /* replicate result */
2526 return tgsi_helper_tempx_replicate(ctx);
2527 }
2528
2529 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2530 {
2531 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2532 struct r600_bytecode_alu alu;
2533 int i, r;
2534
2535 for (i = 0; i < 4; i++) {
2536 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2537 alu.src[0].sel = ctx->temp_reg;
2538 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2539 alu.dst.chan = i;
2540 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2541 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2542 if (i == 3)
2543 alu.last = 1;
2544 r = r600_bytecode_add_alu(ctx->bc, &alu);
2545 if (r)
2546 return r;
2547 }
2548 return 0;
2549 }
2550
2551 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2552 {
2553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2554 struct r600_bytecode_alu alu;
2555 int i, r;
2556
2557 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2558 alu.inst = ctx->inst_info->r600_opcode;
2559 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2560 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2561 }
2562 alu.dst.sel = ctx->temp_reg;
2563 alu.dst.write = 1;
2564 alu.last = 1;
2565 r = r600_bytecode_add_alu(ctx->bc, &alu);
2566 if (r)
2567 return r;
2568 /* replicate result */
2569 return tgsi_helper_tempx_replicate(ctx);
2570 }
2571
2572 static int cayman_pow(struct r600_shader_ctx *ctx)
2573 {
2574 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2575 int i, r;
2576 struct r600_bytecode_alu alu;
2577 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2578
2579 for (i = 0; i < 3; i++) {
2580 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2581 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2582 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2583 alu.dst.sel = ctx->temp_reg;
2584 alu.dst.chan = i;
2585 alu.dst.write = 1;
2586 if (i == 2)
2587 alu.last = 1;
2588 r = r600_bytecode_add_alu(ctx->bc, &alu);
2589 if (r)
2590 return r;
2591 }
2592
2593 /* b * LOG2(a) */
2594 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2595 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2596 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2597 alu.src[1].sel = ctx->temp_reg;
2598 alu.dst.sel = ctx->temp_reg;
2599 alu.dst.write = 1;
2600 alu.last = 1;
2601 r = r600_bytecode_add_alu(ctx->bc, &alu);
2602 if (r)
2603 return r;
2604
2605 for (i = 0; i < last_slot; i++) {
2606 /* POW(a,b) = EXP2(b * LOG2(a))*/
2607 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2608 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2609 alu.src[0].sel = ctx->temp_reg;
2610
2611 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2612 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2613 if (i == last_slot - 1)
2614 alu.last = 1;
2615 r = r600_bytecode_add_alu(ctx->bc, &alu);
2616 if (r)
2617 return r;
2618 }
2619 return 0;
2620 }
2621
2622 static int tgsi_pow(struct r600_shader_ctx *ctx)
2623 {
2624 struct r600_bytecode_alu alu;
2625 int r;
2626
2627 /* LOG2(a) */
2628 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2629 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2630 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2631 alu.dst.sel = ctx->temp_reg;
2632 alu.dst.write = 1;
2633 alu.last = 1;
2634 r = r600_bytecode_add_alu(ctx->bc, &alu);
2635 if (r)
2636 return r;
2637 /* b * LOG2(a) */
2638 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2640 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2641 alu.src[1].sel = ctx->temp_reg;
2642 alu.dst.sel = ctx->temp_reg;
2643 alu.dst.write = 1;
2644 alu.last = 1;
2645 r = r600_bytecode_add_alu(ctx->bc, &alu);
2646 if (r)
2647 return r;
2648 /* POW(a,b) = EXP2(b * LOG2(a))*/
2649 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2650 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2651 alu.src[0].sel = ctx->temp_reg;
2652 alu.dst.sel = ctx->temp_reg;
2653 alu.dst.write = 1;
2654 alu.last = 1;
2655 r = r600_bytecode_add_alu(ctx->bc, &alu);
2656 if (r)
2657 return r;
2658 return tgsi_helper_tempx_replicate(ctx);
2659 }
2660
2661 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2662 {
2663 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2664 struct r600_bytecode_alu alu;
2665 int i, r, j;
2666 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2667 int tmp0 = ctx->temp_reg;
2668 int tmp1 = r600_get_temp(ctx);
2669 int tmp2 = r600_get_temp(ctx);
2670 int tmp3 = r600_get_temp(ctx);
2671 /* Unsigned path:
2672 *
2673 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2674 *
2675 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2676 * 2. tmp0.z = lo (tmp0.x * src2)
2677 * 3. tmp0.w = -tmp0.z
2678 * 4. tmp0.y = hi (tmp0.x * src2)
2679 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2680 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2681 * 7. tmp1.x = tmp0.x - tmp0.w
2682 * 8. tmp1.y = tmp0.x + tmp0.w
2683 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2684 * 10. tmp0.z = hi(tmp0.x * src1) = q
2685 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2686 *
2687 * 12. tmp0.w = src1 - tmp0.y = r
2688 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2689 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2690 *
2691 * if DIV
2692 *
2693 * 15. tmp1.z = tmp0.z + 1 = q + 1
2694 * 16. tmp1.w = tmp0.z - 1 = q - 1
2695 *
2696 * else MOD
2697 *
2698 * 15. tmp1.z = tmp0.w - src2 = r - src2
2699 * 16. tmp1.w = tmp0.w + src2 = r + src2
2700 *
2701 * endif
2702 *
2703 * 17. tmp1.x = tmp1.x & tmp1.y
2704 *
2705 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2706 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2707 *
2708 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2709 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2710 *
2711 * Signed path:
2712 *
2713 * Same as unsigned, using abs values of the operands,
2714 * and fixing the sign of the result in the end.
2715 */
2716
2717 for (i = 0; i < 4; i++) {
2718 if (!(write_mask & (1<<i)))
2719 continue;
2720
2721 if (signed_op) {
2722
2723 /* tmp2.x = -src0 */
2724 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2725 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2726
2727 alu.dst.sel = tmp2;
2728 alu.dst.chan = 0;
2729 alu.dst.write = 1;
2730
2731 alu.src[0].sel = V_SQ_ALU_SRC_0;
2732
2733 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2734
2735 alu.last = 1;
2736 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2737 return r;
2738
2739 /* tmp2.y = -src1 */
2740 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2741 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2742
2743 alu.dst.sel = tmp2;
2744 alu.dst.chan = 1;
2745 alu.dst.write = 1;
2746
2747 alu.src[0].sel = V_SQ_ALU_SRC_0;
2748
2749 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2750
2751 alu.last = 1;
2752 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2753 return r;
2754
2755 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2756 /* it will be a sign of the quotient */
2757 if (!mod) {
2758
2759 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2760 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2761
2762 alu.dst.sel = tmp2;
2763 alu.dst.chan = 2;
2764 alu.dst.write = 1;
2765
2766 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2767 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2768
2769 alu.last = 1;
2770 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2771 return r;
2772 }
2773
2774 /* tmp2.x = |src0| */
2775 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2776 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2777 alu.is_op3 = 1;
2778
2779 alu.dst.sel = tmp2;
2780 alu.dst.chan = 0;
2781 alu.dst.write = 1;
2782
2783 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2784 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2785 alu.src[2].sel = tmp2;
2786 alu.src[2].chan = 0;
2787
2788 alu.last = 1;
2789 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2790 return r;
2791
2792 /* tmp2.y = |src1| */
2793 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2794 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2795 alu.is_op3 = 1;
2796
2797 alu.dst.sel = tmp2;
2798 alu.dst.chan = 1;
2799 alu.dst.write = 1;
2800
2801 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2802 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2803 alu.src[2].sel = tmp2;
2804 alu.src[2].chan = 1;
2805
2806 alu.last = 1;
2807 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2808 return r;
2809
2810 }
2811
2812 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2813 if (ctx->bc->chip_class == CAYMAN) {
2814 /* tmp3.x = u2f(src2) */
2815 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2816 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2817
2818 alu.dst.sel = tmp3;
2819 alu.dst.chan = 0;
2820 alu.dst.write = 1;
2821
2822 if (signed_op) {
2823 alu.src[0].sel = tmp2;
2824 alu.src[0].chan = 1;
2825 } else {
2826 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2827 }
2828
2829 alu.last = 1;
2830 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2831 return r;
2832
2833 /* tmp0.x = recip(tmp3.x) */
2834 for (j = 0 ; j < 3; j++) {
2835 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2836 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2837
2838 alu.dst.sel = tmp0;
2839 alu.dst.chan = j;
2840 alu.dst.write = (j == 0);
2841
2842 alu.src[0].sel = tmp3;
2843 alu.src[0].chan = 0;
2844
2845 if (j == 2)
2846 alu.last = 1;
2847 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2848 return r;
2849 }
2850
2851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2853
2854 alu.src[0].sel = tmp0;
2855 alu.src[0].chan = 0;
2856
2857 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2858 alu.src[1].value = 0x4f800000;
2859
2860 alu.dst.sel = tmp3;
2861 alu.dst.write = 1;
2862 alu.last = 1;
2863 r = r600_bytecode_add_alu(ctx->bc, &alu);
2864 if (r)
2865 return r;
2866
2867 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2869
2870 alu.dst.sel = tmp0;
2871 alu.dst.chan = 0;
2872 alu.dst.write = 1;
2873
2874 alu.src[0].sel = tmp3;
2875 alu.src[0].chan = 0;
2876
2877 alu.last = 1;
2878 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2879 return r;
2880
2881 } else {
2882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2884
2885 alu.dst.sel = tmp0;
2886 alu.dst.chan = 0;
2887 alu.dst.write = 1;
2888
2889 if (signed_op) {
2890 alu.src[0].sel = tmp2;
2891 alu.src[0].chan = 1;
2892 } else {
2893 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2894 }
2895
2896 alu.last = 1;
2897 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2898 return r;
2899 }
2900
2901 /* 2. tmp0.z = lo (tmp0.x * src2) */
2902 if (ctx->bc->chip_class == CAYMAN) {
2903 for (j = 0 ; j < 4; j++) {
2904 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2905 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2906
2907 alu.dst.sel = tmp0;
2908 alu.dst.chan = j;
2909 alu.dst.write = (j == 2);
2910
2911 alu.src[0].sel = tmp0;
2912 alu.src[0].chan = 0;
2913 if (signed_op) {
2914 alu.src[1].sel = tmp2;
2915 alu.src[1].chan = 1;
2916 } else {
2917 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2918 }
2919
2920 alu.last = (j == 3);
2921 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2922 return r;
2923 }
2924 } else {
2925 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2926 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2927
2928 alu.dst.sel = tmp0;
2929 alu.dst.chan = 2;
2930 alu.dst.write = 1;
2931
2932 alu.src[0].sel = tmp0;
2933 alu.src[0].chan = 0;
2934 if (signed_op) {
2935 alu.src[1].sel = tmp2;
2936 alu.src[1].chan = 1;
2937 } else {
2938 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2939 }
2940
2941 alu.last = 1;
2942 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2943 return r;
2944 }
2945
2946 /* 3. tmp0.w = -tmp0.z */
2947 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2948 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2949
2950 alu.dst.sel = tmp0;
2951 alu.dst.chan = 3;
2952 alu.dst.write = 1;
2953
2954 alu.src[0].sel = V_SQ_ALU_SRC_0;
2955 alu.src[1].sel = tmp0;
2956 alu.src[1].chan = 2;
2957
2958 alu.last = 1;
2959 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2960 return r;
2961
2962 /* 4. tmp0.y = hi (tmp0.x * src2) */
2963 if (ctx->bc->chip_class == CAYMAN) {
2964 for (j = 0 ; j < 4; j++) {
2965 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2966 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2967
2968 alu.dst.sel = tmp0;
2969 alu.dst.chan = j;
2970 alu.dst.write = (j == 1);
2971
2972 alu.src[0].sel = tmp0;
2973 alu.src[0].chan = 0;
2974
2975 if (signed_op) {
2976 alu.src[1].sel = tmp2;
2977 alu.src[1].chan = 1;
2978 } else {
2979 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2980 }
2981 alu.last = (j == 3);
2982 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2983 return r;
2984 }
2985 } else {
2986 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2987 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2988
2989 alu.dst.sel = tmp0;
2990 alu.dst.chan = 1;
2991 alu.dst.write = 1;
2992
2993 alu.src[0].sel = tmp0;
2994 alu.src[0].chan = 0;
2995
2996 if (signed_op) {
2997 alu.src[1].sel = tmp2;
2998 alu.src[1].chan = 1;
2999 } else {
3000 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3001 }
3002
3003 alu.last = 1;
3004 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3005 return r;
3006 }
3007
3008 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
3009 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3010 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3011 alu.is_op3 = 1;
3012
3013 alu.dst.sel = tmp0;
3014 alu.dst.chan = 2;
3015 alu.dst.write = 1;
3016
3017 alu.src[0].sel = tmp0;
3018 alu.src[0].chan = 1;
3019 alu.src[1].sel = tmp0;
3020 alu.src[1].chan = 3;
3021 alu.src[2].sel = tmp0;
3022 alu.src[2].chan = 2;
3023
3024 alu.last = 1;
3025 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3026 return r;
3027
3028 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
3029 if (ctx->bc->chip_class == CAYMAN) {
3030 for (j = 0 ; j < 4; j++) {
3031 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3032 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3033
3034 alu.dst.sel = tmp0;
3035 alu.dst.chan = j;
3036 alu.dst.write = (j == 3);
3037
3038 alu.src[0].sel = tmp0;
3039 alu.src[0].chan = 2;
3040
3041 alu.src[1].sel = tmp0;
3042 alu.src[1].chan = 0;
3043
3044 alu.last = (j == 3);
3045 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3046 return r;
3047 }
3048 } else {
3049 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3050 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3051
3052 alu.dst.sel = tmp0;
3053 alu.dst.chan = 3;
3054 alu.dst.write = 1;
3055
3056 alu.src[0].sel = tmp0;
3057 alu.src[0].chan = 2;
3058
3059 alu.src[1].sel = tmp0;
3060 alu.src[1].chan = 0;
3061
3062 alu.last = 1;
3063 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3064 return r;
3065 }
3066
3067 /* 7. tmp1.x = tmp0.x - tmp0.w */
3068 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3069 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3070
3071 alu.dst.sel = tmp1;
3072 alu.dst.chan = 0;
3073 alu.dst.write = 1;
3074
3075 alu.src[0].sel = tmp0;
3076 alu.src[0].chan = 0;
3077 alu.src[1].sel = tmp0;
3078 alu.src[1].chan = 3;
3079
3080 alu.last = 1;
3081 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3082 return r;
3083
3084 /* 8. tmp1.y = tmp0.x + tmp0.w */
3085 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3086 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3087
3088 alu.dst.sel = tmp1;
3089 alu.dst.chan = 1;
3090 alu.dst.write = 1;
3091
3092 alu.src[0].sel = tmp0;
3093 alu.src[0].chan = 0;
3094 alu.src[1].sel = tmp0;
3095 alu.src[1].chan = 3;
3096
3097 alu.last = 1;
3098 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3099 return r;
3100
3101 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
3102 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3104 alu.is_op3 = 1;
3105
3106 alu.dst.sel = tmp0;
3107 alu.dst.chan = 0;
3108 alu.dst.write = 1;
3109
3110 alu.src[0].sel = tmp0;
3111 alu.src[0].chan = 1;
3112 alu.src[1].sel = tmp1;
3113 alu.src[1].chan = 1;
3114 alu.src[2].sel = tmp1;
3115 alu.src[2].chan = 0;
3116
3117 alu.last = 1;
3118 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3119 return r;
3120
3121 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3122 if (ctx->bc->chip_class == CAYMAN) {
3123 for (j = 0 ; j < 4; j++) {
3124 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3125 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3126
3127 alu.dst.sel = tmp0;
3128 alu.dst.chan = j;
3129 alu.dst.write = (j == 2);
3130
3131 alu.src[0].sel = tmp0;
3132 alu.src[0].chan = 0;
3133
3134 if (signed_op) {
3135 alu.src[1].sel = tmp2;
3136 alu.src[1].chan = 0;
3137 } else {
3138 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3139 }
3140
3141 alu.last = (j == 3);
3142 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3143 return r;
3144 }
3145 } else {
3146 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3148
3149 alu.dst.sel = tmp0;
3150 alu.dst.chan = 2;
3151 alu.dst.write = 1;
3152
3153 alu.src[0].sel = tmp0;
3154 alu.src[0].chan = 0;
3155
3156 if (signed_op) {
3157 alu.src[1].sel = tmp2;
3158 alu.src[1].chan = 0;
3159 } else {
3160 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3161 }
3162
3163 alu.last = 1;
3164 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3165 return r;
3166 }
3167
3168 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3169 if (ctx->bc->chip_class == CAYMAN) {
3170 for (j = 0 ; j < 4; j++) {
3171 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3172 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3173
3174 alu.dst.sel = tmp0;
3175 alu.dst.chan = j;
3176 alu.dst.write = (j == 1);
3177
3178 if (signed_op) {
3179 alu.src[0].sel = tmp2;
3180 alu.src[0].chan = 1;
3181 } else {
3182 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3183 }
3184
3185 alu.src[1].sel = tmp0;
3186 alu.src[1].chan = 2;
3187
3188 alu.last = (j == 3);
3189 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3190 return r;
3191 }
3192 } else {
3193 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3194 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3195
3196 alu.dst.sel = tmp0;
3197 alu.dst.chan = 1;
3198 alu.dst.write = 1;
3199
3200 if (signed_op) {
3201 alu.src[0].sel = tmp2;
3202 alu.src[0].chan = 1;
3203 } else {
3204 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3205 }
3206
3207 alu.src[1].sel = tmp0;
3208 alu.src[1].chan = 2;
3209
3210 alu.last = 1;
3211 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3212 return r;
3213 }
3214
3215 /* 12. tmp0.w = src1 - tmp0.y = r */
3216 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3218
3219 alu.dst.sel = tmp0;
3220 alu.dst.chan = 3;
3221 alu.dst.write = 1;
3222
3223 if (signed_op) {
3224 alu.src[0].sel = tmp2;
3225 alu.src[0].chan = 0;
3226 } else {
3227 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3228 }
3229
3230 alu.src[1].sel = tmp0;
3231 alu.src[1].chan = 1;
3232
3233 alu.last = 1;
3234 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3235 return r;
3236
3237 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3238 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3240
3241 alu.dst.sel = tmp1;
3242 alu.dst.chan = 0;
3243 alu.dst.write = 1;
3244
3245 alu.src[0].sel = tmp0;
3246 alu.src[0].chan = 3;
3247 if (signed_op) {
3248 alu.src[1].sel = tmp2;
3249 alu.src[1].chan = 1;
3250 } else {
3251 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3252 }
3253
3254 alu.last = 1;
3255 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3256 return r;
3257
3258 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3259 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3260 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3261
3262 alu.dst.sel = tmp1;
3263 alu.dst.chan = 1;
3264 alu.dst.write = 1;
3265
3266 if (signed_op) {
3267 alu.src[0].sel = tmp2;
3268 alu.src[0].chan = 0;
3269 } else {
3270 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3271 }
3272
3273 alu.src[1].sel = tmp0;
3274 alu.src[1].chan = 1;
3275
3276 alu.last = 1;
3277 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3278 return r;
3279
3280 if (mod) { /* UMOD */
3281
3282 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3283 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3284 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3285
3286 alu.dst.sel = tmp1;
3287 alu.dst.chan = 2;
3288 alu.dst.write = 1;
3289
3290 alu.src[0].sel = tmp0;
3291 alu.src[0].chan = 3;
3292
3293 if (signed_op) {
3294 alu.src[1].sel = tmp2;
3295 alu.src[1].chan = 1;
3296 } else {
3297 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3298 }
3299
3300 alu.last = 1;
3301 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3302 return r;
3303
3304 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3305 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3306 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3307
3308 alu.dst.sel = tmp1;
3309 alu.dst.chan = 3;
3310 alu.dst.write = 1;
3311
3312 alu.src[0].sel = tmp0;
3313 alu.src[0].chan = 3;
3314 if (signed_op) {
3315 alu.src[1].sel = tmp2;
3316 alu.src[1].chan = 1;
3317 } else {
3318 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3319 }
3320
3321 alu.last = 1;
3322 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3323 return r;
3324
3325 } else { /* UDIV */
3326
3327 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3328 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3330
3331 alu.dst.sel = tmp1;
3332 alu.dst.chan = 2;
3333 alu.dst.write = 1;
3334
3335 alu.src[0].sel = tmp0;
3336 alu.src[0].chan = 2;
3337 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3338
3339 alu.last = 1;
3340 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3341 return r;
3342
3343 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3344 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3345 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3346
3347 alu.dst.sel = tmp1;
3348 alu.dst.chan = 3;
3349 alu.dst.write = 1;
3350
3351 alu.src[0].sel = tmp0;
3352 alu.src[0].chan = 2;
3353 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3354
3355 alu.last = 1;
3356 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3357 return r;
3358
3359 }
3360
3361 /* 17. tmp1.x = tmp1.x & tmp1.y */
3362 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3363 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3364
3365 alu.dst.sel = tmp1;
3366 alu.dst.chan = 0;
3367 alu.dst.write = 1;
3368
3369 alu.src[0].sel = tmp1;
3370 alu.src[0].chan = 0;
3371 alu.src[1].sel = tmp1;
3372 alu.src[1].chan = 1;
3373
3374 alu.last = 1;
3375 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3376 return r;
3377
3378 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3379 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3380 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3381 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3382 alu.is_op3 = 1;
3383
3384 alu.dst.sel = tmp0;
3385 alu.dst.chan = 2;
3386 alu.dst.write = 1;
3387
3388 alu.src[0].sel = tmp1;
3389 alu.src[0].chan = 0;
3390 alu.src[1].sel = tmp0;
3391 alu.src[1].chan = mod ? 3 : 2;
3392 alu.src[2].sel = tmp1;
3393 alu.src[2].chan = 2;
3394
3395 alu.last = 1;
3396 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3397 return r;
3398
3399 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3402 alu.is_op3 = 1;
3403
3404 if (signed_op) {
3405 alu.dst.sel = tmp0;
3406 alu.dst.chan = 2;
3407 alu.dst.write = 1;
3408 } else {
3409 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3410 }
3411
3412 alu.src[0].sel = tmp1;
3413 alu.src[0].chan = 1;
3414 alu.src[1].sel = tmp1;
3415 alu.src[1].chan = 3;
3416 alu.src[2].sel = tmp0;
3417 alu.src[2].chan = 2;
3418
3419 alu.last = 1;
3420 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3421 return r;
3422
3423 if (signed_op) {
3424
3425 /* fix the sign of the result */
3426
3427 if (mod) {
3428
3429 /* tmp0.x = -tmp0.z */
3430 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3432
3433 alu.dst.sel = tmp0;
3434 alu.dst.chan = 0;
3435 alu.dst.write = 1;
3436
3437 alu.src[0].sel = V_SQ_ALU_SRC_0;
3438 alu.src[1].sel = tmp0;
3439 alu.src[1].chan = 2;
3440
3441 alu.last = 1;
3442 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3443 return r;
3444
3445 /* sign of the remainder is the same as the sign of src0 */
3446 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3447 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3449 alu.is_op3 = 1;
3450
3451 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3452
3453 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3454 alu.src[1].sel = tmp0;
3455 alu.src[1].chan = 2;
3456 alu.src[2].sel = tmp0;
3457 alu.src[2].chan = 0;
3458
3459 alu.last = 1;
3460 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3461 return r;
3462
3463 } else {
3464
3465 /* tmp0.x = -tmp0.z */
3466 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3467 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3468
3469 alu.dst.sel = tmp0;
3470 alu.dst.chan = 0;
3471 alu.dst.write = 1;
3472
3473 alu.src[0].sel = V_SQ_ALU_SRC_0;
3474 alu.src[1].sel = tmp0;
3475 alu.src[1].chan = 2;
3476
3477 alu.last = 1;
3478 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3479 return r;
3480
3481 /* fix the quotient sign (same as the sign of src0*src1) */
3482 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3483 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3484 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3485 alu.is_op3 = 1;
3486
3487 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3488
3489 alu.src[0].sel = tmp2;
3490 alu.src[0].chan = 2;
3491 alu.src[1].sel = tmp0;
3492 alu.src[1].chan = 2;
3493 alu.src[2].sel = tmp0;
3494 alu.src[2].chan = 0;
3495
3496 alu.last = 1;
3497 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3498 return r;
3499 }
3500 }
3501 }
3502 return 0;
3503 }
3504
3505 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3506 {
3507 return tgsi_divmod(ctx, 0, 0);
3508 }
3509
3510 static int tgsi_umod(struct r600_shader_ctx *ctx)
3511 {
3512 return tgsi_divmod(ctx, 1, 0);
3513 }
3514
3515 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3516 {
3517 return tgsi_divmod(ctx, 0, 1);
3518 }
3519
3520 static int tgsi_imod(struct r600_shader_ctx *ctx)
3521 {
3522 return tgsi_divmod(ctx, 1, 1);
3523 }
3524
3525
3526 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3527 {
3528 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3529 struct r600_bytecode_alu alu;
3530 int i, r;
3531 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3532 int last_inst = tgsi_last_instruction(write_mask);
3533
3534 for (i = 0; i < 4; i++) {
3535 if (!(write_mask & (1<<i)))
3536 continue;
3537
3538 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3539 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3540
3541 alu.dst.sel = ctx->temp_reg;
3542 alu.dst.chan = i;
3543 alu.dst.write = 1;
3544
3545 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3546 if (i == last_inst)
3547 alu.last = 1;
3548 r = r600_bytecode_add_alu(ctx->bc, &alu);
3549 if (r)
3550 return r;
3551 }
3552
3553 for (i = 0; i < 4; i++) {
3554 if (!(write_mask & (1<<i)))
3555 continue;
3556
3557 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3558 alu.inst = ctx->inst_info->r600_opcode;
3559
3560 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3561
3562 alu.src[0].sel = ctx->temp_reg;
3563 alu.src[0].chan = i;
3564
3565 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3566 alu.last = 1;
3567 r = r600_bytecode_add_alu(ctx->bc, &alu);
3568 if (r)
3569 return r;
3570 }
3571
3572 return 0;
3573 }
3574
3575 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3576 {
3577 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3578 struct r600_bytecode_alu alu;
3579 int i, r;
3580 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3581 int last_inst = tgsi_last_instruction(write_mask);
3582
3583 /* tmp = -src */
3584 for (i = 0; i < 4; i++) {
3585 if (!(write_mask & (1<<i)))
3586 continue;
3587
3588 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3589 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3590
3591 alu.dst.sel = ctx->temp_reg;
3592 alu.dst.chan = i;
3593 alu.dst.write = 1;
3594
3595 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3596 alu.src[0].sel = V_SQ_ALU_SRC_0;
3597
3598 if (i == last_inst)
3599 alu.last = 1;
3600 r = r600_bytecode_add_alu(ctx->bc, &alu);
3601 if (r)
3602 return r;
3603 }
3604
3605 /* dst = (src >= 0 ? src : tmp) */
3606 for (i = 0; i < 4; i++) {
3607 if (!(write_mask & (1<<i)))
3608 continue;
3609
3610 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3611 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3612 alu.is_op3 = 1;
3613 alu.dst.write = 1;
3614
3615 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3616
3617 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3618 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3619 alu.src[2].sel = ctx->temp_reg;
3620 alu.src[2].chan = i;
3621
3622 if (i == last_inst)
3623 alu.last = 1;
3624 r = r600_bytecode_add_alu(ctx->bc, &alu);
3625 if (r)
3626 return r;
3627 }
3628 return 0;
3629 }
3630
3631 static int tgsi_issg(struct r600_shader_ctx *ctx)
3632 {
3633 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3634 struct r600_bytecode_alu alu;
3635 int i, r;
3636 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3637 int last_inst = tgsi_last_instruction(write_mask);
3638
3639 /* tmp = (src >= 0 ? src : -1) */
3640 for (i = 0; i < 4; i++) {
3641 if (!(write_mask & (1<<i)))
3642 continue;
3643
3644 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3645 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3646 alu.is_op3 = 1;
3647
3648 alu.dst.sel = ctx->temp_reg;
3649 alu.dst.chan = i;
3650 alu.dst.write = 1;
3651
3652 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3653 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3654 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3655
3656 if (i == last_inst)
3657 alu.last = 1;
3658 r = r600_bytecode_add_alu(ctx->bc, &alu);
3659 if (r)
3660 return r;
3661 }
3662
3663 /* dst = (tmp > 0 ? 1 : tmp) */
3664 for (i = 0; i < 4; i++) {
3665 if (!(write_mask & (1<<i)))
3666 continue;
3667
3668 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3669 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3670 alu.is_op3 = 1;
3671 alu.dst.write = 1;
3672
3673 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3674
3675 alu.src[0].sel = ctx->temp_reg;
3676 alu.src[0].chan = i;
3677
3678 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3679
3680 alu.src[2].sel = ctx->temp_reg;
3681 alu.src[2].chan = i;
3682
3683 if (i == last_inst)
3684 alu.last = 1;
3685 r = r600_bytecode_add_alu(ctx->bc, &alu);
3686 if (r)
3687 return r;
3688 }
3689 return 0;
3690 }
3691
3692
3693
3694 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3695 {
3696 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3697 struct r600_bytecode_alu alu;
3698 int i, r;
3699
3700 /* tmp = (src > 0 ? 1 : src) */
3701 for (i = 0; i < 4; i++) {
3702 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3704 alu.is_op3 = 1;
3705
3706 alu.dst.sel = ctx->temp_reg;
3707 alu.dst.chan = i;
3708
3709 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3710 alu.src[1].sel = V_SQ_ALU_SRC_1;
3711 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3712
3713 if (i == 3)
3714 alu.last = 1;
3715 r = r600_bytecode_add_alu(ctx->bc, &alu);
3716 if (r)
3717 return r;
3718 }
3719
3720 /* dst = (-tmp > 0 ? -1 : tmp) */
3721 for (i = 0; i < 4; i++) {
3722 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3723 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3724 alu.is_op3 = 1;
3725 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3726
3727 alu.src[0].sel = ctx->temp_reg;
3728 alu.src[0].chan = i;
3729 alu.src[0].neg = 1;
3730
3731 alu.src[1].sel = V_SQ_ALU_SRC_1;
3732 alu.src[1].neg = 1;
3733
3734 alu.src[2].sel = ctx->temp_reg;
3735 alu.src[2].chan = i;
3736
3737 if (i == 3)
3738 alu.last = 1;
3739 r = r600_bytecode_add_alu(ctx->bc, &alu);
3740 if (r)
3741 return r;
3742 }
3743 return 0;
3744 }
3745
3746 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3747 {
3748 struct r600_bytecode_alu alu;
3749 int i, r;
3750
3751 for (i = 0; i < 4; i++) {
3752 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3753 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3754 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3755 alu.dst.chan = i;
3756 } else {
3757 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3758 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3759 alu.src[0].sel = ctx->temp_reg;
3760 alu.src[0].chan = i;
3761 }
3762 if (i == 3) {
3763 alu.last = 1;
3764 }
3765 r = r600_bytecode_add_alu(ctx->bc, &alu);
3766 if (r)
3767 return r;
3768 }
3769 return 0;
3770 }
3771
3772 static int tgsi_op3(struct r600_shader_ctx *ctx)
3773 {
3774 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3775 struct r600_bytecode_alu alu;
3776 int i, j, r;
3777 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3778
3779 for (i = 0; i < lasti + 1; i++) {
3780 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3781 continue;
3782
3783 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3784 alu.inst = ctx->inst_info->r600_opcode;
3785 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3786 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3787 }
3788
3789 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3790 alu.dst.chan = i;
3791 alu.dst.write = 1;
3792 alu.is_op3 = 1;
3793 if (i == lasti) {
3794 alu.last = 1;
3795 }
3796 r = r600_bytecode_add_alu(ctx->bc, &alu);
3797 if (r)
3798 return r;
3799 }
3800 return 0;
3801 }
3802
3803 static int tgsi_dp(struct r600_shader_ctx *ctx)
3804 {
3805 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3806 struct r600_bytecode_alu alu;
3807 int i, j, r;
3808
3809 for (i = 0; i < 4; i++) {
3810 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3811 alu.inst = ctx->inst_info->r600_opcode;
3812 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3813 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3814 }
3815
3816 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3817 alu.dst.chan = i;
3818 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3819 /* handle some special cases */
3820 switch (ctx->inst_info->tgsi_opcode) {
3821 case TGSI_OPCODE_DP2:
3822 if (i > 1) {
3823 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3824 alu.src[0].chan = alu.src[1].chan = 0;
3825 }
3826 break;
3827 case TGSI_OPCODE_DP3:
3828 if (i > 2) {
3829 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3830 alu.src[0].chan = alu.src[1].chan = 0;
3831 }
3832 break;
3833 case TGSI_OPCODE_DPH:
3834 if (i == 3) {
3835 alu.src[0].sel = V_SQ_ALU_SRC_1;
3836 alu.src[0].chan = 0;
3837 alu.src[0].neg = 0;
3838 }
3839 break;
3840 default:
3841 break;
3842 }
3843 if (i == 3) {
3844 alu.last = 1;
3845 }
3846 r = r600_bytecode_add_alu(ctx->bc, &alu);
3847 if (r)
3848 return r;
3849 }
3850 return 0;
3851 }
3852
3853 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3854 unsigned index)
3855 {
3856 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3857 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3858 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3859 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3860 ctx->src[index].neg || ctx->src[index].abs;
3861 }
3862
3863 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3864 unsigned index)
3865 {
3866 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3867 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3868 }
3869
3870 static int tgsi_tex(struct r600_shader_ctx *ctx)
3871 {
3872 static float one_point_five = 1.5f;
3873 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3874 struct r600_bytecode_tex tex;
3875 struct r600_bytecode_alu alu;
3876 unsigned src_gpr;
3877 int r, i, j;
3878 int opcode;
3879 bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
3880 inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
3881 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
3882 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
3883 /* Texture fetch instructions can only use gprs as source.
3884 * Also they cannot negate the source or take the absolute value */
3885 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3886 tgsi_tex_src_requires_loading(ctx, 0)) ||
3887 read_compressed_msaa;
3888 boolean src_loaded = FALSE;
3889 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3890 int8_t offset_x = 0, offset_y = 0, offset_z = 0;
3891 boolean has_txq_cube_array_z = false;
3892
3893 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
3894 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
3895 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
3896 if (inst->Dst[0].Register.WriteMask & 4) {
3897 ctx->shader->has_txq_cube_array_z_comp = true;
3898 has_txq_cube_array_z = true;
3899 }
3900
3901 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
3902 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
3903 inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
3904 sampler_src_reg = 2;
3905
3906 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3907
3908 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3909 /* get offset values */
3910 if (inst->Texture.NumOffsets) {
3911 assert(inst->Texture.NumOffsets == 1);
3912
3913 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3914 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3915 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3916 }
3917 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3918 /* TGSI moves the sampler to src reg 3 for TXD */
3919 sampler_src_reg = 3;
3920
3921 for (i = 1; i < 3; i++) {
3922 /* set gradients h/v */
3923 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3924 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3925 SQ_TEX_INST_SET_GRADIENTS_V;
3926 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3927 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3928
3929 if (tgsi_tex_src_requires_loading(ctx, i)) {
3930 tex.src_gpr = r600_get_temp(ctx);
3931 tex.src_sel_x = 0;
3932 tex.src_sel_y = 1;
3933 tex.src_sel_z = 2;
3934 tex.src_sel_w = 3;
3935
3936 for (j = 0; j < 4; j++) {
3937 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3939 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3940 alu.dst.sel = tex.src_gpr;
3941 alu.dst.chan = j;
3942 if (j == 3)
3943 alu.last = 1;
3944 alu.dst.write = 1;
3945 r = r600_bytecode_add_alu(ctx->bc, &alu);
3946 if (r)
3947 return r;
3948 }
3949
3950 } else {
3951 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3952 tex.src_sel_x = ctx->src[i].swizzle[0];
3953 tex.src_sel_y = ctx->src[i].swizzle[1];
3954 tex.src_sel_z = ctx->src[i].swizzle[2];
3955 tex.src_sel_w = ctx->src[i].swizzle[3];
3956 tex.src_rel = ctx->src[i].rel;
3957 }
3958 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3959 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3960 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3961 tex.coord_type_x = 1;
3962 tex.coord_type_y = 1;
3963 tex.coord_type_z = 1;
3964 tex.coord_type_w = 1;
3965 }
3966 r = r600_bytecode_add_tex(ctx->bc, &tex);
3967 if (r)
3968 return r;
3969 }
3970 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3971 int out_chan;
3972 /* Add perspective divide */
3973 if (ctx->bc->chip_class == CAYMAN) {
3974 out_chan = 2;
3975 for (i = 0; i < 3; i++) {
3976 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3977 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3978 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3979
3980 alu.dst.sel = ctx->temp_reg;
3981 alu.dst.chan = i;
3982 if (i == 2)
3983 alu.last = 1;
3984 if (out_chan == i)
3985 alu.dst.write = 1;
3986 r = r600_bytecode_add_alu(ctx->bc, &alu);
3987 if (r)
3988 return r;
3989 }
3990
3991 } else {
3992 out_chan = 3;
3993 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3995 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3996
3997 alu.dst.sel = ctx->temp_reg;
3998 alu.dst.chan = out_chan;
3999 alu.last = 1;
4000 alu.dst.write = 1;
4001 r = r600_bytecode_add_alu(ctx->bc, &alu);
4002 if (r)
4003 return r;
4004 }
4005
4006 for (i = 0; i < 3; i++) {
4007 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4008 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4009 alu.src[0].sel = ctx->temp_reg;
4010 alu.src[0].chan = out_chan;
4011 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4012 alu.dst.sel = ctx->temp_reg;
4013 alu.dst.chan = i;
4014 alu.dst.write = 1;
4015 r = r600_bytecode_add_alu(ctx->bc, &alu);
4016 if (r)
4017 return r;
4018 }
4019 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4020 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4021 alu.src[0].sel = V_SQ_ALU_SRC_1;
4022 alu.src[0].chan = 0;
4023 alu.dst.sel = ctx->temp_reg;
4024 alu.dst.chan = 3;
4025 alu.last = 1;
4026 alu.dst.write = 1;
4027 r = r600_bytecode_add_alu(ctx->bc, &alu);
4028 if (r)
4029 return r;
4030 src_loaded = TRUE;
4031 src_gpr = ctx->temp_reg;
4032 }
4033
4034 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
4035 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
4036 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4037 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
4038 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
4039 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
4040
4041 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
4042 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
4043
4044 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4045 for (i = 0; i < 4; i++) {
4046 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4047 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
4048 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4049 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
4050 alu.dst.sel = ctx->temp_reg;
4051 alu.dst.chan = i;
4052 if (i == 3)
4053 alu.last = 1;
4054 alu.dst.write = 1;
4055 r = r600_bytecode_add_alu(ctx->bc, &alu);
4056 if (r)
4057 return r;
4058 }
4059
4060 /* tmp1.z = RCP_e(|tmp1.z|) */
4061 if (ctx->bc->chip_class == CAYMAN) {
4062 for (i = 0; i < 3; i++) {
4063 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4065 alu.src[0].sel = ctx->temp_reg;
4066 alu.src[0].chan = 2;
4067 alu.src[0].abs = 1;
4068 alu.dst.sel = ctx->temp_reg;
4069 alu.dst.chan = i;
4070 if (i == 2)
4071 alu.dst.write = 1;
4072 if (i == 2)
4073 alu.last = 1;
4074 r = r600_bytecode_add_alu(ctx->bc, &alu);
4075 if (r)
4076 return r;
4077 }
4078 } else {
4079 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4080 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4081 alu.src[0].sel = ctx->temp_reg;
4082 alu.src[0].chan = 2;
4083 alu.src[0].abs = 1;
4084 alu.dst.sel = ctx->temp_reg;
4085 alu.dst.chan = 2;
4086 alu.dst.write = 1;
4087 alu.last = 1;
4088 r = r600_bytecode_add_alu(ctx->bc, &alu);
4089 if (r)
4090 return r;
4091 }
4092
4093 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4094 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4095 * muladd has no writemask, have to use another temp
4096 */
4097 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4098 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4099 alu.is_op3 = 1;
4100
4101 alu.src[0].sel = ctx->temp_reg;
4102 alu.src[0].chan = 0;
4103 alu.src[1].sel = ctx->temp_reg;
4104 alu.src[1].chan = 2;
4105
4106 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4107 alu.src[2].chan = 0;
4108 alu.src[2].value = *(uint32_t *)&one_point_five;
4109
4110 alu.dst.sel = ctx->temp_reg;
4111 alu.dst.chan = 0;
4112 alu.dst.write = 1;
4113
4114 r = r600_bytecode_add_alu(ctx->bc, &alu);
4115 if (r)
4116 return r;
4117
4118 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4119 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4120 alu.is_op3 = 1;
4121
4122 alu.src[0].sel = ctx->temp_reg;
4123 alu.src[0].chan = 1;
4124 alu.src[1].sel = ctx->temp_reg;
4125 alu.src[1].chan = 2;
4126
4127 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4128 alu.src[2].chan = 0;
4129 alu.src[2].value = *(uint32_t *)&one_point_five;
4130
4131 alu.dst.sel = ctx->temp_reg;
4132 alu.dst.chan = 1;
4133 alu.dst.write = 1;
4134
4135 alu.last = 1;
4136 r = r600_bytecode_add_alu(ctx->bc, &alu);
4137 if (r)
4138 return r;
4139 /* write initial compare value into Z component
4140 - W src 0 for shadow cube
4141 - X src 1 for shadow cube array */
4142 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4143 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
4144 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4145 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4146 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
4147 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
4148 else
4149 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4150 alu.dst.sel = ctx->temp_reg;
4151 alu.dst.chan = 2;
4152 alu.dst.write = 1;
4153 alu.last = 1;
4154 r = r600_bytecode_add_alu(ctx->bc, &alu);
4155 if (r)
4156 return r;
4157 }
4158
4159 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
4160 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
4161 if (ctx->bc->chip_class >= EVERGREEN) {
4162 int mytmp = r600_get_temp(ctx);
4163 static const float eight = 8.0f;
4164 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4166 alu.src[0].sel = ctx->temp_reg;
4167 alu.src[0].chan = 3;
4168 alu.dst.sel = mytmp;
4169 alu.dst.chan = 0;
4170 alu.dst.write = 1;
4171 alu.last = 1;
4172 r = r600_bytecode_add_alu(ctx->bc, &alu);
4173 if (r)
4174 return r;
4175
4176 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */
4177 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4178 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4179 alu.is_op3 = 1;
4180 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4181 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4182 alu.src[1].chan = 0;
4183 alu.src[1].value = *(uint32_t *)&eight;
4184 alu.src[2].sel = mytmp;
4185 alu.src[2].chan = 0;
4186 alu.dst.sel = ctx->temp_reg;
4187 alu.dst.chan = 3;
4188 alu.dst.write = 1;
4189 alu.last = 1;
4190 r = r600_bytecode_add_alu(ctx->bc, &alu);
4191 if (r)
4192 return r;
4193 } else if (ctx->bc->chip_class < EVERGREEN) {
4194 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4195 tex.inst = SQ_TEX_INST_SET_CUBEMAP_INDEX;
4196 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4197 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4198 tex.src_gpr = r600_get_temp(ctx);
4199 tex.src_sel_x = 0;
4200 tex.src_sel_y = 0;
4201 tex.src_sel_z = 0;
4202 tex.src_sel_w = 0;
4203 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
4204 tex.coord_type_x = 1;
4205 tex.coord_type_y = 1;
4206 tex.coord_type_z = 1;
4207 tex.coord_type_w = 1;
4208 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4210 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4211 alu.dst.sel = tex.src_gpr;
4212 alu.dst.chan = 0;
4213 alu.last = 1;
4214 alu.dst.write = 1;
4215 r = r600_bytecode_add_alu(ctx->bc, &alu);
4216 if (r)
4217 return r;
4218
4219 r = r600_bytecode_add_tex(ctx->bc, &tex);
4220 if (r)
4221 return r;
4222 }
4223
4224 }
4225
4226 /* for cube forms of lod and bias we need to route things */
4227 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
4228 inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
4229 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
4230 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
4231 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4233 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
4234 inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
4235 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
4236 else
4237 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4238 alu.dst.sel = ctx->temp_reg;
4239 alu.dst.chan = 2;
4240 alu.last = 1;
4241 alu.dst.write = 1;
4242 r = r600_bytecode_add_alu(ctx->bc, &alu);
4243 if (r)
4244 return r;
4245 }
4246
4247 src_loaded = TRUE;
4248 src_gpr = ctx->temp_reg;
4249 }
4250
4251 if (src_requires_loading && !src_loaded) {
4252 for (i = 0; i < 4; i++) {
4253 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4254 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4255 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4256 alu.dst.sel = ctx->temp_reg;
4257 alu.dst.chan = i;
4258 if (i == 3)
4259 alu.last = 1;
4260 alu.dst.write = 1;
4261 r = r600_bytecode_add_alu(ctx->bc, &alu);
4262 if (r)
4263 return r;
4264 }
4265 src_loaded = TRUE;
4266 src_gpr = ctx->temp_reg;
4267 }
4268
4269 /* Obtain the sample index for reading a compressed MSAA color texture.
4270 * To read the FMASK, we use the ldfptr instruction, which tells us
4271 * where the samples are stored.
4272 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
4273 * which is the identity mapping. Each nibble says which physical sample
4274 * should be fetched to get that sample.
4275 *
4276 * Assume src.z contains the sample index. It should be modified like this:
4277 * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
4278 * Then fetch the texel with src.
4279 */
4280 if (read_compressed_msaa) {
4281 unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
4282 unsigned temp = r600_get_temp(ctx);
4283 assert(src_loaded);
4284
4285 /* temp.w = ldfptr() */
4286 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4287 tex.inst = SQ_TEX_INST_LD;
4288 tex.inst_mod = 1; /* to indicate this is ldfptr */
4289 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4290 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4291 tex.src_gpr = src_gpr;
4292 tex.dst_gpr = temp;
4293 tex.dst_sel_x = 7; /* mask out these components */
4294 tex.dst_sel_y = 7;
4295 tex.dst_sel_z = 7;
4296 tex.dst_sel_w = 0; /* store X */
4297 tex.src_sel_x = 0;
4298 tex.src_sel_y = 1;
4299 tex.src_sel_z = 2;
4300 tex.src_sel_w = 3;
4301 tex.offset_x = offset_x;
4302 tex.offset_y = offset_y;
4303 tex.offset_z = offset_z;
4304 r = r600_bytecode_add_tex(ctx->bc, &tex);
4305 if (r)
4306 return r;
4307
4308 /* temp.x = sample_index*4 */
4309 if (ctx->bc->chip_class == CAYMAN) {
4310 for (i = 0 ; i < 4; i++) {
4311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4312 alu.inst = ctx->inst_info->r600_opcode;
4313 alu.src[0].sel = src_gpr;
4314 alu.src[0].chan = sample_chan;
4315 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4316 alu.src[1].value = 4;
4317 alu.dst.sel = temp;
4318 alu.dst.chan = i;
4319 alu.dst.write = i == 0;
4320 if (i == 3)
4321 alu.last = 1;
4322 r = r600_bytecode_add_alu(ctx->bc, &alu);
4323 if (r)
4324 return r;
4325 }
4326 } else {
4327 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
4329 alu.src[0].sel = src_gpr;
4330 alu.src[0].chan = sample_chan;
4331 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4332 alu.src[1].value = 4;
4333 alu.dst.sel = temp;
4334 alu.dst.chan = 0;
4335 alu.dst.write = 1;
4336 alu.last = 1;
4337 r = r600_bytecode_add_alu(ctx->bc, &alu);
4338 if (r)
4339 return r;
4340 }
4341
4342 /* sample_index = temp.w >> temp.x */
4343 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
4345 alu.src[0].sel = temp;
4346 alu.src[0].chan = 3;
4347 alu.src[1].sel = temp;
4348 alu.src[1].chan = 0;
4349 alu.dst.sel = src_gpr;
4350 alu.dst.chan = sample_chan;
4351 alu.dst.write = 1;
4352 alu.last = 1;
4353 r = r600_bytecode_add_alu(ctx->bc, &alu);
4354 if (r)
4355 return r;
4356
4357 /* sample_index & 0xF */
4358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
4360 alu.src[0].sel = src_gpr;
4361 alu.src[0].chan = sample_chan;
4362 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4363 alu.src[1].value = 0xF;
4364 alu.dst.sel = src_gpr;
4365 alu.dst.chan = sample_chan;
4366 alu.dst.write = 1;
4367 alu.last = 1;
4368 r = r600_bytecode_add_alu(ctx->bc, &alu);
4369 if (r)
4370 return r;
4371 #if 0
4372 /* visualize the FMASK */
4373 for (i = 0; i < 4; i++) {
4374 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4375 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
4376 alu.src[0].sel = src_gpr;
4377 alu.src[0].chan = sample_chan;
4378 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4379 alu.dst.chan = i;
4380 alu.dst.write = 1;
4381 alu.last = 1;
4382 r = r600_bytecode_add_alu(ctx->bc, &alu);
4383 if (r)
4384 return r;
4385 }
4386 return 0;
4387 #endif
4388 }
4389
4390 /* does this shader want a num layers from TXQ for a cube array? */
4391 if (has_txq_cube_array_z) {
4392 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4393
4394 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4396
4397 alu.src[0].sel = 512 + (id / 4);
4398 alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
4399 alu.src[0].chan = id % 4;
4400 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
4401 alu.last = 1;
4402 r = r600_bytecode_add_alu(ctx->bc, &alu);
4403 if (r)
4404 return r;
4405 /* disable writemask from texture instruction */
4406 inst->Dst[0].Register.WriteMask &= ~4;
4407 }
4408
4409 opcode = ctx->inst_info->r600_opcode;
4410 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4411 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4412 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4413 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4414 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4415 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
4416 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
4417 switch (opcode) {
4418 case SQ_TEX_INST_SAMPLE:
4419 opcode = SQ_TEX_INST_SAMPLE_C;
4420 break;
4421 case SQ_TEX_INST_SAMPLE_L:
4422 opcode = SQ_TEX_INST_SAMPLE_C_L;
4423 break;
4424 case SQ_TEX_INST_SAMPLE_LB:
4425 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4426 break;
4427 case SQ_TEX_INST_SAMPLE_G:
4428 opcode = SQ_TEX_INST_SAMPLE_C_G;
4429 break;
4430 }
4431 }
4432
4433 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4434 tex.inst = opcode;
4435
4436 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4437 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4438 tex.src_gpr = src_gpr;
4439 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4440 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4441 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4442 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4443 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4444
4445 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4446 tex.src_sel_x = 4;
4447 tex.src_sel_y = 4;
4448 tex.src_sel_z = 4;
4449 tex.src_sel_w = 4;
4450 } else if (src_loaded) {
4451 tex.src_sel_x = 0;
4452 tex.src_sel_y = 1;
4453 tex.src_sel_z = 2;
4454 tex.src_sel_w = 3;
4455 } else {
4456 tex.src_sel_x = ctx->src[0].swizzle[0];
4457 tex.src_sel_y = ctx->src[0].swizzle[1];
4458 tex.src_sel_z = ctx->src[0].swizzle[2];
4459 tex.src_sel_w = ctx->src[0].swizzle[3];
4460 tex.src_rel = ctx->src[0].rel;
4461 }
4462
4463 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
4464 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4465 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
4466 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
4467 tex.src_sel_x = 1;
4468 tex.src_sel_y = 0;
4469 tex.src_sel_z = 3;
4470 tex.src_sel_w = 2; /* route Z compare or Lod value into W */
4471 }
4472
4473 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4474 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4475 tex.coord_type_x = 1;
4476 tex.coord_type_y = 1;
4477 }
4478 tex.coord_type_z = 1;
4479 tex.coord_type_w = 1;
4480
4481 tex.offset_x = offset_x;
4482 tex.offset_y = offset_y;
4483 tex.offset_z = offset_z;
4484
4485 /* Put the depth for comparison in W.
4486 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4487 * Some instructions expect the depth in Z. */
4488 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4489 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4490 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4491 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4492 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4493 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4494 tex.src_sel_w = tex.src_sel_z;
4495 }
4496
4497 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4498 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4499 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4500 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4501 /* the array index is read from Y */
4502 tex.coord_type_y = 0;
4503 } else {
4504 /* the array index is read from Z */
4505 tex.coord_type_z = 0;
4506 tex.src_sel_z = tex.src_sel_y;
4507 }
4508 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4509 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
4510 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
4511 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
4512 (ctx->bc->chip_class >= EVERGREEN)))
4513 /* the array index is read from Z */
4514 tex.coord_type_z = 0;
4515
4516 r = r600_bytecode_add_tex(ctx->bc, &tex);
4517 if (r)
4518 return r;
4519
4520 /* add shadow ambient support - gallium doesn't do it yet */
4521 return 0;
4522 }
4523
4524 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4525 {
4526 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4527 struct r600_bytecode_alu alu;
4528 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4529 unsigned i;
4530 int r;
4531
4532 /* optimize if it's just an equal balance */
4533 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4534 for (i = 0; i < lasti + 1; i++) {
4535 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4536 continue;
4537
4538 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4539 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4540 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4541 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4542 alu.omod = 3;
4543 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4544 alu.dst.chan = i;
4545 if (i == lasti) {
4546 alu.last = 1;
4547 }
4548 r = r600_bytecode_add_alu(ctx->bc, &alu);
4549 if (r)
4550 return r;
4551 }
4552 return 0;
4553 }
4554
4555 /* 1 - src0 */
4556 for (i = 0; i < lasti + 1; i++) {
4557 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4558 continue;
4559
4560 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4562 alu.src[0].sel = V_SQ_ALU_SRC_1;
4563 alu.src[0].chan = 0;
4564 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4565 r600_bytecode_src_toggle_neg(&alu.src[1]);
4566 alu.dst.sel = ctx->temp_reg;
4567 alu.dst.chan = i;
4568 if (i == lasti) {
4569 alu.last = 1;
4570 }
4571 alu.dst.write = 1;
4572 r = r600_bytecode_add_alu(ctx->bc, &alu);
4573 if (r)
4574 return r;
4575 }
4576
4577 /* (1 - src0) * src2 */
4578 for (i = 0; i < lasti + 1; i++) {
4579 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4580 continue;
4581
4582 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4584 alu.src[0].sel = ctx->temp_reg;
4585 alu.src[0].chan = i;
4586 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4587 alu.dst.sel = ctx->temp_reg;
4588 alu.dst.chan = i;
4589 if (i == lasti) {
4590 alu.last = 1;
4591 }
4592 alu.dst.write = 1;
4593 r = r600_bytecode_add_alu(ctx->bc, &alu);
4594 if (r)
4595 return r;
4596 }
4597
4598 /* src0 * src1 + (1 - src0) * src2 */
4599 for (i = 0; i < lasti + 1; i++) {
4600 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4601 continue;
4602
4603 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4604 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4605 alu.is_op3 = 1;
4606 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4607 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4608 alu.src[2].sel = ctx->temp_reg;
4609 alu.src[2].chan = i;
4610
4611 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4612 alu.dst.chan = i;
4613 if (i == lasti) {
4614 alu.last = 1;
4615 }
4616 r = r600_bytecode_add_alu(ctx->bc, &alu);
4617 if (r)
4618 return r;
4619 }
4620 return 0;
4621 }
4622
4623 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4624 {
4625 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4626 struct r600_bytecode_alu alu;
4627 int i, r;
4628 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4629
4630 for (i = 0; i < lasti + 1; i++) {
4631 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4632 continue;
4633
4634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4636 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4637 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4638 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4639 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4640 alu.dst.chan = i;
4641 alu.dst.write = 1;
4642 alu.is_op3 = 1;
4643 if (i == lasti)
4644 alu.last = 1;
4645 r = r600_bytecode_add_alu(ctx->bc, &alu);
4646 if (r)
4647 return r;
4648 }
4649 return 0;
4650 }
4651
4652 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4653 {
4654 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4655 static const unsigned int src0_swizzle[] = {2, 0, 1};
4656 static const unsigned int src1_swizzle[] = {1, 2, 0};
4657 struct r600_bytecode_alu alu;
4658 uint32_t use_temp = 0;
4659 int i, r;
4660
4661 if (inst->Dst[0].Register.WriteMask != 0xf)
4662 use_temp = 1;
4663
4664 for (i = 0; i < 4; i++) {
4665 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4666 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4667 if (i < 3) {
4668 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4669 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4670 } else {
4671 alu.src[0].sel = V_SQ_ALU_SRC_0;
4672 alu.src[0].chan = i;
4673 alu.src[1].sel = V_SQ_ALU_SRC_0;
4674 alu.src[1].chan = i;
4675 }
4676
4677 alu.dst.sel = ctx->temp_reg;
4678 alu.dst.chan = i;
4679 alu.dst.write = 1;
4680
4681 if (i == 3)
4682 alu.last = 1;
4683 r = r600_bytecode_add_alu(ctx->bc, &alu);
4684 if (r)
4685 return r;
4686 }
4687
4688 for (i = 0; i < 4; i++) {
4689 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4690 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4691
4692 if (i < 3) {
4693 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4694 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4695 } else {
4696 alu.src[0].sel = V_SQ_ALU_SRC_0;
4697 alu.src[0].chan = i;
4698 alu.src[1].sel = V_SQ_ALU_SRC_0;
4699 alu.src[1].chan = i;
4700 }
4701
4702 alu.src[2].sel = ctx->temp_reg;
4703 alu.src[2].neg = 1;
4704 alu.src[2].chan = i;
4705
4706 if (use_temp)
4707 alu.dst.sel = ctx->temp_reg;
4708 else
4709 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4710 alu.dst.chan = i;
4711 alu.dst.write = 1;
4712 alu.is_op3 = 1;
4713 if (i == 3)
4714 alu.last = 1;
4715 r = r600_bytecode_add_alu(ctx->bc, &alu);
4716 if (r)
4717 return r;
4718 }
4719 if (use_temp)
4720 return tgsi_helper_copy(ctx, inst);
4721 return 0;
4722 }
4723
4724 static int tgsi_exp(struct r600_shader_ctx *ctx)
4725 {
4726 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4727 struct r600_bytecode_alu alu;
4728 int r;
4729 int i;
4730
4731 /* result.x = 2^floor(src); */
4732 if (inst->Dst[0].Register.WriteMask & 1) {
4733 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4734
4735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4736 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4737
4738 alu.dst.sel = ctx->temp_reg;
4739 alu.dst.chan = 0;
4740 alu.dst.write = 1;
4741 alu.last = 1;
4742 r = r600_bytecode_add_alu(ctx->bc, &alu);
4743 if (r)
4744 return r;
4745
4746 if (ctx->bc->chip_class == CAYMAN) {
4747 for (i = 0; i < 3; i++) {
4748 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4749 alu.src[0].sel = ctx->temp_reg;
4750 alu.src[0].chan = 0;
4751
4752 alu.dst.sel = ctx->temp_reg;
4753 alu.dst.chan = i;
4754 alu.dst.write = i == 0;
4755 alu.last = i == 2;
4756 r = r600_bytecode_add_alu(ctx->bc, &alu);
4757 if (r)
4758 return r;
4759 }
4760 } else {
4761 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4762 alu.src[0].sel = ctx->temp_reg;
4763 alu.src[0].chan = 0;
4764
4765 alu.dst.sel = ctx->temp_reg;
4766 alu.dst.chan = 0;
4767 alu.dst.write = 1;
4768 alu.last = 1;
4769 r = r600_bytecode_add_alu(ctx->bc, &alu);
4770 if (r)
4771 return r;
4772 }
4773 }
4774
4775 /* result.y = tmp - floor(tmp); */
4776 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4777 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4778
4779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4780 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4781
4782 alu.dst.sel = ctx->temp_reg;
4783 #if 0
4784 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4785 if (r)
4786 return r;
4787 #endif
4788 alu.dst.write = 1;
4789 alu.dst.chan = 1;
4790
4791 alu.last = 1;
4792
4793 r = r600_bytecode_add_alu(ctx->bc, &alu);
4794 if (r)
4795 return r;
4796 }
4797
4798 /* result.z = RoughApprox2ToX(tmp);*/
4799 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4800 if (ctx->bc->chip_class == CAYMAN) {
4801 for (i = 0; i < 3; i++) {
4802 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4803 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4804 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4805
4806 alu.dst.sel = ctx->temp_reg;
4807 alu.dst.chan = i;
4808 if (i == 2) {
4809 alu.dst.write = 1;
4810 alu.last = 1;
4811 }
4812
4813 r = r600_bytecode_add_alu(ctx->bc, &alu);
4814 if (r)
4815 return r;
4816 }
4817 } else {
4818 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4819 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4820 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4821
4822 alu.dst.sel = ctx->temp_reg;
4823 alu.dst.write = 1;
4824 alu.dst.chan = 2;
4825
4826 alu.last = 1;
4827
4828 r = r600_bytecode_add_alu(ctx->bc, &alu);
4829 if (r)
4830 return r;
4831 }
4832 }
4833
4834 /* result.w = 1.0;*/
4835 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4836 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4837
4838 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4839 alu.src[0].sel = V_SQ_ALU_SRC_1;
4840 alu.src[0].chan = 0;
4841
4842 alu.dst.sel = ctx->temp_reg;
4843 alu.dst.chan = 3;
4844 alu.dst.write = 1;
4845 alu.last = 1;
4846 r = r600_bytecode_add_alu(ctx->bc, &alu);
4847 if (r)
4848 return r;
4849 }
4850 return tgsi_helper_copy(ctx, inst);
4851 }
4852
4853 static int tgsi_log(struct r600_shader_ctx *ctx)
4854 {
4855 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4856 struct r600_bytecode_alu alu;
4857 int r;
4858 int i;
4859
4860 /* result.x = floor(log2(|src|)); */
4861 if (inst->Dst[0].Register.WriteMask & 1) {
4862 if (ctx->bc->chip_class == CAYMAN) {
4863 for (i = 0; i < 3; i++) {
4864 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4865
4866 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4867 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4868 r600_bytecode_src_set_abs(&alu.src[0]);
4869
4870 alu.dst.sel = ctx->temp_reg;
4871 alu.dst.chan = i;
4872 if (i == 0)
4873 alu.dst.write = 1;
4874 if (i == 2)
4875 alu.last = 1;
4876 r = r600_bytecode_add_alu(ctx->bc, &alu);
4877 if (r)
4878 return r;
4879 }
4880
4881 } else {
4882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4883
4884 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4885 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4886 r600_bytecode_src_set_abs(&alu.src[0]);
4887
4888 alu.dst.sel = ctx->temp_reg;
4889 alu.dst.chan = 0;
4890 alu.dst.write = 1;
4891 alu.last = 1;
4892 r = r600_bytecode_add_alu(ctx->bc, &alu);
4893 if (r)
4894 return r;
4895 }
4896
4897 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4898 alu.src[0].sel = ctx->temp_reg;
4899 alu.src[0].chan = 0;
4900
4901 alu.dst.sel = ctx->temp_reg;
4902 alu.dst.chan = 0;
4903 alu.dst.write = 1;
4904 alu.last = 1;
4905
4906 r = r600_bytecode_add_alu(ctx->bc, &alu);
4907 if (r)
4908 return r;
4909 }
4910
4911 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4912 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4913
4914 if (ctx->bc->chip_class == CAYMAN) {
4915 for (i = 0; i < 3; i++) {
4916 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4917
4918 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4919 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4920 r600_bytecode_src_set_abs(&alu.src[0]);
4921
4922 alu.dst.sel = ctx->temp_reg;
4923 alu.dst.chan = i;
4924 if (i == 1)
4925 alu.dst.write = 1;
4926 if (i == 2)
4927 alu.last = 1;
4928
4929 r = r600_bytecode_add_alu(ctx->bc, &alu);
4930 if (r)
4931 return r;
4932 }
4933 } else {
4934 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4935
4936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4937 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4938 r600_bytecode_src_set_abs(&alu.src[0]);
4939
4940 alu.dst.sel = ctx->temp_reg;
4941 alu.dst.chan = 1;
4942 alu.dst.write = 1;
4943 alu.last = 1;
4944
4945 r = r600_bytecode_add_alu(ctx->bc, &alu);
4946 if (r)
4947 return r;
4948 }
4949
4950 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4951
4952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4953 alu.src[0].sel = ctx->temp_reg;
4954 alu.src[0].chan = 1;
4955
4956 alu.dst.sel = ctx->temp_reg;
4957 alu.dst.chan = 1;
4958 alu.dst.write = 1;
4959 alu.last = 1;
4960
4961 r = r600_bytecode_add_alu(ctx->bc, &alu);
4962 if (r)
4963 return r;
4964
4965 if (ctx->bc->chip_class == CAYMAN) {
4966 for (i = 0; i < 3; i++) {
4967 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4969 alu.src[0].sel = ctx->temp_reg;
4970 alu.src[0].chan = 1;
4971
4972 alu.dst.sel = ctx->temp_reg;
4973 alu.dst.chan = i;
4974 if (i == 1)
4975 alu.dst.write = 1;
4976 if (i == 2)
4977 alu.last = 1;
4978
4979 r = r600_bytecode_add_alu(ctx->bc, &alu);
4980 if (r)
4981 return r;
4982 }
4983 } else {
4984 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4985 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4986 alu.src[0].sel = ctx->temp_reg;
4987 alu.src[0].chan = 1;
4988
4989 alu.dst.sel = ctx->temp_reg;
4990 alu.dst.chan = 1;
4991 alu.dst.write = 1;
4992 alu.last = 1;
4993
4994 r = r600_bytecode_add_alu(ctx->bc, &alu);
4995 if (r)
4996 return r;
4997 }
4998
4999 if (ctx->bc->chip_class == CAYMAN) {
5000 for (i = 0; i < 3; i++) {
5001 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5002 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
5003 alu.src[0].sel = ctx->temp_reg;
5004 alu.src[0].chan = 1;
5005
5006 alu.dst.sel = ctx->temp_reg;
5007 alu.dst.chan = i;
5008 if (i == 1)
5009 alu.dst.write = 1;
5010 if (i == 2)
5011 alu.last = 1;
5012
5013 r = r600_bytecode_add_alu(ctx->bc, &alu);
5014 if (r)
5015 return r;
5016 }
5017 } else {
5018 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
5020 alu.src[0].sel = ctx->temp_reg;
5021 alu.src[0].chan = 1;
5022
5023 alu.dst.sel = ctx->temp_reg;
5024 alu.dst.chan = 1;
5025 alu.dst.write = 1;
5026 alu.last = 1;
5027
5028 r = r600_bytecode_add_alu(ctx->bc, &alu);
5029 if (r)
5030 return r;
5031 }
5032
5033 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5034
5035 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
5036
5037 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5038 r600_bytecode_src_set_abs(&alu.src[0]);
5039
5040 alu.src[1].sel = ctx->temp_reg;
5041 alu.src[1].chan = 1;
5042
5043 alu.dst.sel = ctx->temp_reg;
5044 alu.dst.chan = 1;
5045 alu.dst.write = 1;
5046 alu.last = 1;
5047
5048 r = r600_bytecode_add_alu(ctx->bc, &alu);
5049 if (r)
5050 return r;
5051 }
5052
5053 /* result.z = log2(|src|);*/
5054 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
5055 if (ctx->bc->chip_class == CAYMAN) {
5056 for (i = 0; i < 3; i++) {
5057 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5058
5059 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
5060 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5061 r600_bytecode_src_set_abs(&alu.src[0]);
5062
5063 alu.dst.sel = ctx->temp_reg;
5064 if (i == 2)
5065 alu.dst.write = 1;
5066 alu.dst.chan = i;
5067 if (i == 2)
5068 alu.last = 1;
5069
5070 r = r600_bytecode_add_alu(ctx->bc, &alu);
5071 if (r)
5072 return r;
5073 }
5074 } else {
5075 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5076
5077 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
5078 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5079 r600_bytecode_src_set_abs(&alu.src[0]);
5080
5081 alu.dst.sel = ctx->temp_reg;
5082 alu.dst.write = 1;
5083 alu.dst.chan = 2;
5084 alu.last = 1;
5085
5086 r = r600_bytecode_add_alu(ctx->bc, &alu);
5087 if (r)
5088 return r;
5089 }
5090 }
5091
5092 /* result.w = 1.0; */
5093 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
5094 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5095
5096 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
5097 alu.src[0].sel = V_SQ_ALU_SRC_1;
5098 alu.src[0].chan = 0;
5099
5100 alu.dst.sel = ctx->temp_reg;
5101 alu.dst.chan = 3;
5102 alu.dst.write = 1;
5103 alu.last = 1;
5104
5105 r = r600_bytecode_add_alu(ctx->bc, &alu);
5106 if (r)
5107 return r;
5108 }
5109
5110 return tgsi_helper_copy(ctx, inst);
5111 }
5112
5113 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
5114 {
5115 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5116 struct r600_bytecode_alu alu;
5117 int r;
5118
5119 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5120
5121 switch (inst->Instruction.Opcode) {
5122 case TGSI_OPCODE_ARL:
5123 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
5124 break;
5125 case TGSI_OPCODE_ARR:
5126 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
5127 break;
5128 case TGSI_OPCODE_UARL:
5129 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
5130 break;
5131 default:
5132 assert(0);
5133 return -1;
5134 }
5135
5136 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5137 alu.last = 1;
5138 alu.dst.sel = ctx->bc->ar_reg;
5139 alu.dst.write = 1;
5140 r = r600_bytecode_add_alu(ctx->bc, &alu);
5141 if (r)
5142 return r;
5143
5144 ctx->bc->ar_loaded = 0;
5145 return 0;
5146 }
5147 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
5148 {
5149 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5150 struct r600_bytecode_alu alu;
5151 int r;
5152
5153 switch (inst->Instruction.Opcode) {
5154 case TGSI_OPCODE_ARL:
5155 memset(&alu, 0, sizeof(alu));
5156 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
5157 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5158 alu.dst.sel = ctx->bc->ar_reg;
5159 alu.dst.write = 1;
5160 alu.last = 1;
5161
5162 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5163 return r;
5164
5165 memset(&alu, 0, sizeof(alu));
5166 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
5167 alu.src[0].sel = ctx->bc->ar_reg;
5168 alu.dst.sel = ctx->bc->ar_reg;
5169 alu.dst.write = 1;
5170 alu.last = 1;
5171
5172 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5173 return r;
5174 break;
5175 case TGSI_OPCODE_ARR:
5176 memset(&alu, 0, sizeof(alu));
5177 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
5178 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5179 alu.dst.sel = ctx->bc->ar_reg;
5180 alu.dst.write = 1;
5181 alu.last = 1;
5182
5183 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5184 return r;
5185 break;
5186 case TGSI_OPCODE_UARL:
5187 memset(&alu, 0, sizeof(alu));
5188 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
5189 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5190 alu.dst.sel = ctx->bc->ar_reg;
5191 alu.dst.write = 1;
5192 alu.last = 1;
5193
5194 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5195 return r;
5196 break;
5197 default:
5198 assert(0);
5199 return -1;
5200 }
5201
5202 ctx->bc->ar_loaded = 0;
5203 return 0;
5204 }
5205
5206 static int tgsi_opdst(struct r600_shader_ctx *ctx)
5207 {
5208 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5209 struct r600_bytecode_alu alu;
5210 int i, r = 0;
5211
5212 for (i = 0; i < 4; i++) {
5213 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5214
5215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
5216 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5217
5218 if (i == 0 || i == 3) {
5219 alu.src[0].sel = V_SQ_ALU_SRC_1;
5220 } else {
5221 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
5222 }
5223
5224 if (i == 0 || i == 2) {
5225 alu.src[1].sel = V_SQ_ALU_SRC_1;
5226 } else {
5227 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5228 }
5229 if (i == 3)
5230 alu.last = 1;
5231 r = r600_bytecode_add_alu(ctx->bc, &alu);
5232 if (r)
5233 return r;
5234 }
5235 return 0;
5236 }
5237
5238 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
5239 {
5240 struct r600_bytecode_alu alu;
5241 int r;
5242
5243 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5244 alu.inst = opcode;
5245 alu.execute_mask = 1;
5246 alu.update_pred = 1;
5247
5248 alu.dst.sel = ctx->temp_reg;
5249 alu.dst.write = 1;
5250 alu.dst.chan = 0;
5251
5252 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5253 alu.src[1].sel = V_SQ_ALU_SRC_0;
5254 alu.src[1].chan = 0;
5255
5256 alu.last = 1;
5257
5258 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
5259 if (r)
5260 return r;
5261 return 0;
5262 }
5263
5264 static int pops(struct r600_shader_ctx *ctx, int pops)
5265 {
5266 unsigned force_pop = ctx->bc->force_add_cf;
5267
5268 if (!force_pop) {
5269 int alu_pop = 3;
5270 if (ctx->bc->cf_last) {
5271 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
5272 alu_pop = 0;
5273 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
5274 alu_pop = 1;
5275 }
5276 alu_pop += pops;
5277 if (alu_pop == 1) {
5278 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
5279 ctx->bc->force_add_cf = 1;
5280 } else if (alu_pop == 2) {
5281 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
5282 ctx->bc->force_add_cf = 1;
5283 } else {
5284 force_pop = 1;
5285 }
5286 }
5287
5288 if (force_pop) {
5289 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
5290 ctx->bc->cf_last->pop_count = pops;
5291 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
5292 }
5293
5294 return 0;
5295 }
5296
5297 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
5298 {
5299 switch(reason) {
5300 case FC_PUSH_VPM:
5301 ctx->bc->callstack[ctx->bc->call_sp].current--;
5302 break;
5303 case FC_PUSH_WQM:
5304 case FC_LOOP:
5305 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
5306 break;
5307 case FC_REP:
5308 /* TOODO : for 16 vp asic should -= 2; */
5309 ctx->bc->callstack[ctx->bc->call_sp].current --;
5310 break;
5311 }
5312 }
5313
5314 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
5315 {
5316 if (check_max_only) {
5317 int diff;
5318 switch (reason) {
5319 case FC_PUSH_VPM:
5320 diff = 1;
5321 break;
5322 case FC_PUSH_WQM:
5323 diff = 4;
5324 break;
5325 default:
5326 assert(0);
5327 diff = 0;
5328 }
5329 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
5330 ctx->bc->callstack[ctx->bc->call_sp].max) {
5331 ctx->bc->callstack[ctx->bc->call_sp].max =
5332 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
5333 }
5334 return;
5335 }
5336 switch (reason) {
5337 case FC_PUSH_VPM:
5338 ctx->bc->callstack[ctx->bc->call_sp].current++;
5339 break;
5340 case FC_PUSH_WQM:
5341 case FC_LOOP:
5342 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
5343 break;
5344 case FC_REP:
5345 ctx->bc->callstack[ctx->bc->call_sp].current++;
5346 break;
5347 }
5348
5349 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
5350 ctx->bc->callstack[ctx->bc->call_sp].max) {
5351 ctx->bc->callstack[ctx->bc->call_sp].max =
5352 ctx->bc->callstack[ctx->bc->call_sp].current;
5353 }
5354 }
5355
5356 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
5357 {
5358 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
5359
5360 sp->mid = realloc((void *)sp->mid,
5361 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5362 sp->mid[sp->num_mid] = ctx->bc->cf_last;
5363 sp->num_mid++;
5364 }
5365
5366 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5367 {
5368 ctx->bc->fc_sp++;
5369 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5370 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5371 }
5372
5373 static void fc_poplevel(struct r600_shader_ctx *ctx)
5374 {
5375 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5376 free(sp->mid);
5377 sp->mid = NULL;
5378 sp->num_mid = 0;
5379 sp->start = NULL;
5380 sp->type = 0;
5381 ctx->bc->fc_sp--;
5382 }
5383
5384 #if 0
5385 static int emit_return(struct r600_shader_ctx *ctx)
5386 {
5387 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5388 return 0;
5389 }
5390
5391 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5392 {
5393
5394 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5395 ctx->bc->cf_last->pop_count = pops;
5396 /* XXX work out offset */
5397 return 0;
5398 }
5399
5400 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5401 {
5402 return 0;
5403 }
5404
5405 static void emit_testflag(struct r600_shader_ctx *ctx)
5406 {
5407
5408 }
5409
5410 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5411 {
5412 emit_testflag(ctx);
5413 emit_jump_to_offset(ctx, 1, 4);
5414 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5415 pops(ctx, ifidx + 1);
5416 emit_return(ctx);
5417 }
5418
5419 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5420 {
5421 emit_testflag(ctx);
5422
5423 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5424 ctx->bc->cf_last->pop_count = 1;
5425
5426 fc_set_mid(ctx, fc_sp);
5427
5428 pops(ctx, 1);
5429 }
5430 #endif
5431
5432 static int tgsi_if(struct r600_shader_ctx *ctx)
5433 {
5434 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5435
5436 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5437
5438 fc_pushlevel(ctx, FC_IF);
5439
5440 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5441 return 0;
5442 }
5443
5444 static int tgsi_else(struct r600_shader_ctx *ctx)
5445 {
5446 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5447 ctx->bc->cf_last->pop_count = 1;
5448
5449 fc_set_mid(ctx, ctx->bc->fc_sp);
5450 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5451 return 0;
5452 }
5453
5454 static int tgsi_endif(struct r600_shader_ctx *ctx)
5455 {
5456 pops(ctx, 1);
5457 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5458 R600_ERR("if/endif unbalanced in shader\n");
5459 return -1;
5460 }
5461
5462 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5463 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5464 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5465 } else {
5466 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5467 }
5468 fc_poplevel(ctx);
5469
5470 callstack_decrease_current(ctx, FC_PUSH_VPM);
5471 return 0;
5472 }
5473
5474 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5475 {
5476 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5477 * limited to 4096 iterations, like the other LOOP_* instructions. */
5478 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5479
5480 fc_pushlevel(ctx, FC_LOOP);
5481
5482 /* check stack depth */
5483 callstack_check_depth(ctx, FC_LOOP, 0);
5484 return 0;
5485 }
5486
5487 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5488 {
5489 int i;
5490
5491 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5492
5493 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5494 R600_ERR("loop/endloop in shader code are not paired.\n");
5495 return -EINVAL;
5496 }
5497
5498 /* fixup loop pointers - from r600isa
5499 LOOP END points to CF after LOOP START,
5500 LOOP START point to CF after LOOP END
5501 BRK/CONT point to LOOP END CF
5502 */
5503 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5504
5505 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5506
5507 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5508 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5509 }
5510 /* XXX add LOOPRET support */
5511 fc_poplevel(ctx);
5512 callstack_decrease_current(ctx, FC_LOOP);
5513 return 0;
5514 }
5515
5516 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5517 {
5518 unsigned int fscp;
5519
5520 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5521 {
5522 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5523 break;
5524 }
5525
5526 if (fscp == 0) {
5527 R600_ERR("Break not inside loop/endloop pair\n");
5528 return -EINVAL;
5529 }
5530
5531 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5532
5533 fc_set_mid(ctx, fscp);
5534
5535 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5536 return 0;
5537 }
5538
5539 static int tgsi_umad(struct r600_shader_ctx *ctx)
5540 {
5541 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5542 struct r600_bytecode_alu alu;
5543 int i, j, r;
5544 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5545
5546 /* src0 * src1 */
5547 for (i = 0; i < lasti + 1; i++) {
5548 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5549 continue;
5550
5551 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5552
5553 alu.dst.chan = i;
5554 alu.dst.sel = ctx->temp_reg;
5555 alu.dst.write = 1;
5556
5557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5558 for (j = 0; j < 2; j++) {
5559 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5560 }
5561
5562 alu.last = 1;
5563 r = r600_bytecode_add_alu(ctx->bc, &alu);
5564 if (r)
5565 return r;
5566 }
5567
5568
5569 for (i = 0; i < lasti + 1; i++) {
5570 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5571 continue;
5572
5573 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5574 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5575
5576 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5577
5578 alu.src[0].sel = ctx->temp_reg;
5579 alu.src[0].chan = i;
5580
5581 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5582 if (i == lasti) {
5583 alu.last = 1;
5584 }
5585 r = r600_bytecode_add_alu(ctx->bc, &alu);
5586 if (r)
5587 return r;
5588 }
5589 return 0;
5590 }
5591
5592 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5593 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5594 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5595 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5596
5597 /* XXX:
5598 * For state trackers other than OpenGL, we'll want to use
5599 * _RECIP_IEEE instead.
5600 */
5601 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5602
5603 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5604 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5605 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5606 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5607 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5608 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5609 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5610 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5611 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5612 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5613 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5614 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5615 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5616 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5617 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5618 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5619 /* gap */
5620 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5621 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5622 /* gap */
5623 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5624 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5625 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5626 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5627 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5628 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5629 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5630 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5631 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5632 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5633 /* gap */
5634 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5635 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5636 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5637 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5638 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5639 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5640 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5641 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5642 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5643 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5644 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5646 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5647 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5648 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5649 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5650 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5651 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5652 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5653 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5655 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5656 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5657 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5658 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5659 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5660 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5661 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5663 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5664 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5665 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5666 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5667 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5668 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5669 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5670 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5671 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5672 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5674 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5675 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5676 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5677 /* gap */
5678 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5679 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5680 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5681 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5682 /* gap */
5683 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5684 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5685 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5686 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5687 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5688 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5689 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5690 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5691 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5692 /* gap */
5693 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5694 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5695 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5696 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5697 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5698 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5699 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5700 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5701 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5702 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5703 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5704 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5705 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5706 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5707 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5708 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5709 /* gap */
5710 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5711 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5712 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5713 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5714 /* gap */
5715 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5717 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5718 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5719 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5720 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5721 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5722 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5723 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5724 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5725 /* gap */
5726 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5727 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5728 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5729 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5730 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5731 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5732 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5733 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5734 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5735 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5736 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5737 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5738 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5739 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5740 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5741 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5742 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5743 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5744 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5745 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5746 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5747 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5748 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5749 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5750 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5751 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5752 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5753 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5754 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5755 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5756 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5757 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5758 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5759 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5760 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5761 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5762 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5763 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5764 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5765 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5766 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5767 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5768 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5769 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5770 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5771 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5772 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5773 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5774 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5775 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5776 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5777 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5778 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5779 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5780 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5781 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5782 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5783 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5784 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5785 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5786 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5787 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5788 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5789 };
5790
5791 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5792 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5793 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5794 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5795 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5796 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5797 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5798 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5799 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5800 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5801 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5802 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5803 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5804 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5805 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5806 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5807 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5808 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5809 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5810 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5811 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5812 /* gap */
5813 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5814 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5815 /* gap */
5816 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5817 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5818 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5819 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5820 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5821 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5822 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5823 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5824 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5825 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5826 /* gap */
5827 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5828 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5829 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5830 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5831 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5832 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5833 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5834 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5835 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5836 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5837 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5838 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5839 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5840 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5841 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5842 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5843 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5844 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5845 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5846 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5847 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5848 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5849 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5850 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5851 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5852 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5853 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5854 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5855 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5856 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5857 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5858 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5859 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5860 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5861 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5862 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5863 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5864 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5865 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5866 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5867 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5868 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5869 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5870 /* gap */
5871 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5872 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5873 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5874 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5875 /* gap */
5876 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5877 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5878 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5879 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5880 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5881 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5882 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5883 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5884 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5885 /* gap */
5886 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5887 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5888 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5889 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5890 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5891 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5892 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5893 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5894 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5895 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5896 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5897 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5898 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5899 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5900 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5901 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5902 /* gap */
5903 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5904 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5905 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5906 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5907 /* gap */
5908 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5909 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5910 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5911 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5912 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5913 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5914 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5915 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5916 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5917 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5918 /* gap */
5919 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5920 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5921 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5922 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5923 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5924 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5925 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5926 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5927 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5928 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5929 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5930 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5931 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5932 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5933 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5934 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5935 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5936 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5937 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5938 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5939 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5940 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5941 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5942 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5943 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5944 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5945 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5946 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5947 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5948 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5949 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5950 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5951 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5952 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5953 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5954 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5955 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5956 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5957 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5958 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5959 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5960 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5961 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5962 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5963 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5964 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5965 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5966 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5967 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5968 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5969 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5970 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5971 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5972 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5973 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5974 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5975 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5976 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5977 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5978 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5979 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5980 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5981 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5982 };
5983
5984 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5985 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5986 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5987 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5988 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5989 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5990 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5991 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5992 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5993 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5994 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5995 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5996 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5997 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5998 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5999 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
6000 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
6001 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
6002 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
6003 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
6004 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6005 /* gap */
6006 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6007 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6008 /* gap */
6009 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6010 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6011 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
6012 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6013 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
6014 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
6015 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
6016 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
6017 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
6018 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
6019 /* gap */
6020 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6021 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
6022 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6023 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
6024 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
6025 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
6026 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
6027 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
6028 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6029 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6030 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6031 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6032 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6033 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
6034 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6035 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
6036 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
6037 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
6038 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
6039 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6040 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
6041 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
6042 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
6043 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6044 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6045 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6046 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6047 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6048 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6049 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
6050 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6051 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6052 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6053 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
6054 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
6055 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
6056 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
6057 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6058 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6059 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
6060 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
6061 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
6062 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
6063 /* gap */
6064 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6065 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6066 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
6067 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
6068 /* gap */
6069 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6070 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6071 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6072 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6073 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
6074 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
6075 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
6076 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
6077 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
6078 /* gap */
6079 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6080 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
6081 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
6082 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
6083 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
6084 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6085 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
6086 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
6087 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
6088 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6089 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6090 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
6091 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6092 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
6093 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6094 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
6095 /* gap */
6096 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6097 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6098 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6099 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6100 /* gap */
6101 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6102 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6103 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6104 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6105 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6106 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6107 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6108 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6109 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
6110 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
6111 /* gap */
6112 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6113 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
6114 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
6115 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
6116 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
6117 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
6118 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
6119 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
6120 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
6121 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
6122 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
6123 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
6124 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
6125 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
6126 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
6127 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
6128 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
6129 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
6130 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
6131 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
6132 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
6133 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
6134 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
6135 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6136 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6137 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6138 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6139 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
6140 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
6141 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
6142 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
6143 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
6144 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
6145 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
6146 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
6147 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
6148 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
6149 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
6150 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
6151 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
6152 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
6153 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
6154 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
6155 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6156 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6157 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6158 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6159 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6160 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6161 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6162 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6163 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6164 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6165 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6166 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6167 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6168 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6169 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6170 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6171 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
6172 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
6173 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
6174 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
6175 };