r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600_shader.h"
28 #include "r600d.h"
29
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_info.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_scan.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "util/u_memory.h"
36 #include <stdio.h>
37 #include <errno.h>
38 #include <byteswap.h>
39
40 /* CAYMAN notes
41 Why CAYMAN got loops for lots of instructions is explained here.
42
43 -These 8xx t-slot only ops are implemented in all vector slots.
44 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
45 These 8xx t-slot only opcodes become vector ops, with all four
46 slots expecting the arguments on sources a and b. Result is
47 broadcast to all channels.
48 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
49 These 8xx t-slot only opcodes become vector ops in the z, y, and
50 x slots.
51 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
52 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
53 SQRT_IEEE/_64
54 SIN/COS
55 The w slot may have an independent co-issued operation, or if the
56 result is required to be in the w slot, the opcode above may be
57 issued in the w slot as well.
58 The compiler must issue the source argument to slots z, y, and x
59 */
60
61 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
62 {
63 struct r600_context *rctx = (struct r600_context *)ctx;
64 struct r600_shader *rshader = &shader->shader;
65 uint32_t *ptr;
66 int i;
67
68 /* copy new shader */
69 if (shader->bo == NULL) {
70 shader->bo = (struct r600_resource*)
71 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
72 if (shader->bo == NULL) {
73 return -ENOMEM;
74 }
75 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
76 if (R600_BIG_ENDIAN) {
77 for (i = 0; i < rshader->bc.ndw; ++i) {
78 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
79 }
80 } else {
81 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
82 }
83 rctx->ws->buffer_unmap(shader->bo->cs_buf);
84 }
85 /* build state */
86 switch (rshader->processor_type) {
87 case TGSI_PROCESSOR_VERTEX:
88 if (rctx->chip_class >= EVERGREEN) {
89 evergreen_pipe_shader_vs(ctx, shader);
90 } else {
91 r600_pipe_shader_vs(ctx, shader);
92 }
93 break;
94 case TGSI_PROCESSOR_FRAGMENT:
95 if (rctx->chip_class >= EVERGREEN) {
96 evergreen_pipe_shader_ps(ctx, shader);
97 } else {
98 r600_pipe_shader_ps(ctx, shader);
99 }
100 break;
101 default:
102 return -EINVAL;
103 }
104 return 0;
105 }
106
107 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
108 struct r600_pipe_shader *pipeshader,
109 struct r600_shader_key key);
110
111 int r600_pipe_shader_create(struct pipe_context *ctx,
112 struct r600_pipe_shader *shader,
113 struct r600_shader_key key)
114 {
115 static int dump_shaders = -1;
116 struct r600_context *rctx = (struct r600_context *)ctx;
117 struct r600_pipe_shader_selector *sel = shader->selector;
118 int r;
119
120 /* Would like some magic "get_bool_option_once" routine.
121 */
122 if (dump_shaders == -1)
123 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
124
125 if (dump_shaders) {
126 fprintf(stderr, "--------------------------------------------------------------\n");
127 tgsi_dump(sel->tokens, 0);
128
129 if (sel->so.num_outputs) {
130 unsigned i;
131 fprintf(stderr, "STREAMOUT\n");
132 for (i = 0; i < sel->so.num_outputs; i++) {
133 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
134 sel->so.output[i].start_component;
135 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
136 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
137 mask & 1 ? "x" : "_",
138 (mask >> 1) & 1 ? "y" : "_",
139 (mask >> 2) & 1 ? "z" : "_",
140 (mask >> 3) & 1 ? "w" : "_");
141 }
142 }
143 }
144 r = r600_shader_from_tgsi(rctx->screen, shader, key);
145 if (r) {
146 R600_ERR("translation from TGSI failed !\n");
147 return r;
148 }
149 r = r600_bytecode_build(&shader->shader.bc);
150 if (r) {
151 R600_ERR("building bytecode failed !\n");
152 return r;
153 }
154 if (dump_shaders) {
155 r600_bytecode_dump(&shader->shader.bc);
156 fprintf(stderr, "______________________________________________________________\n");
157 }
158 return r600_pipe_shader(ctx, shader);
159 }
160
161 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
162 {
163 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
164 r600_bytecode_clear(&shader->shader.bc);
165 }
166
167 /*
168 * tgsi -> r600 shader
169 */
170 struct r600_shader_tgsi_instruction;
171
172 struct r600_shader_src {
173 unsigned sel;
174 unsigned swizzle[4];
175 unsigned neg;
176 unsigned abs;
177 unsigned rel;
178 uint32_t value[4];
179 };
180
181 struct r600_shader_ctx {
182 struct tgsi_shader_info info;
183 struct tgsi_parse_context parse;
184 const struct tgsi_token *tokens;
185 unsigned type;
186 unsigned file_offset[TGSI_FILE_COUNT];
187 unsigned temp_reg;
188 struct r600_shader_tgsi_instruction *inst_info;
189 struct r600_bytecode *bc;
190 struct r600_shader *shader;
191 struct r600_shader_src src[4];
192 uint32_t *literals;
193 uint32_t nliterals;
194 uint32_t max_driver_temp_used;
195 boolean use_llvm;
196 /* needed for evergreen interpolation */
197 boolean input_centroid;
198 boolean input_linear;
199 boolean input_perspective;
200 int num_interp_gpr;
201 int face_gpr;
202 int colors_used;
203 boolean clip_vertex_write;
204 unsigned cv_output;
205 int fragcoord_input;
206 int native_integers;
207 };
208
209 struct r600_shader_tgsi_instruction {
210 unsigned tgsi_opcode;
211 unsigned is_op3;
212 unsigned r600_opcode;
213 int (*process)(struct r600_shader_ctx *ctx);
214 };
215
216 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
217 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
218 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
219 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
220 static int tgsi_else(struct r600_shader_ctx *ctx);
221 static int tgsi_endif(struct r600_shader_ctx *ctx);
222 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
223 static int tgsi_endloop(struct r600_shader_ctx *ctx);
224 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
225
226 /*
227 * bytestream -> r600 shader
228 *
229 * These functions are used to transform the output of the LLVM backend into
230 * struct r600_bytecode.
231 */
232
233 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
234 unsigned char * bytes, unsigned num_bytes);
235
236 #ifdef HAVE_OPENCL
237 int r600_compute_shader_create(struct pipe_context * ctx,
238 LLVMModuleRef mod, struct r600_bytecode * bytecode)
239 {
240 struct r600_context *r600_ctx = (struct r600_context *)ctx;
241 unsigned char * bytes;
242 unsigned byte_count;
243 struct r600_shader_ctx shader_ctx;
244 unsigned dump = 0;
245
246 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
247 dump = 1;
248 }
249
250 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
251 shader_ctx.bc = bytecode;
252 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
253 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
254 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
255 if (shader_ctx.bc->chip_class == CAYMAN) {
256 cm_bytecode_add_cf_end(shader_ctx.bc);
257 }
258 r600_bytecode_build(shader_ctx.bc);
259 if (dump) {
260 r600_bytecode_dump(shader_ctx.bc);
261 }
262 free(bytes);
263 return 1;
264 }
265
266 #endif /* HAVE_OPENCL */
267
268 static uint32_t i32_from_byte_stream(unsigned char * bytes,
269 unsigned * bytes_read)
270 {
271 unsigned i;
272 uint32_t out = 0;
273 for (i = 0; i < 4; i++) {
274 out |= bytes[(*bytes_read)++] << (8 * i);
275 }
276 return out;
277 }
278
279 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
280 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
281 {
282 unsigned i;
283 unsigned sel0, sel1;
284 sel0 = bytes[bytes_read++];
285 sel1 = bytes[bytes_read++];
286 alu->src[src_idx].sel = sel0 | (sel1 << 8);
287 alu->src[src_idx].chan = bytes[bytes_read++];
288 alu->src[src_idx].neg = bytes[bytes_read++];
289 alu->src[src_idx].abs = bytes[bytes_read++];
290 alu->src[src_idx].rel = bytes[bytes_read++];
291 alu->src[src_idx].kc_bank = bytes[bytes_read++];
292 for (i = 0; i < 4; i++) {
293 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
294 }
295 return bytes_read;
296 }
297
298 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
299 unsigned char * bytes, unsigned bytes_read)
300 {
301 unsigned src_idx;
302 struct r600_bytecode_alu alu;
303 unsigned src_const_reg[3];
304 uint32_t word0, word1;
305
306 memset(&alu, 0, sizeof(alu));
307 for(src_idx = 0; src_idx < 3; src_idx++) {
308 unsigned i;
309 src_const_reg[src_idx] = bytes[bytes_read++];
310 for (i = 0; i < 4; i++) {
311 alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
312 }
313 }
314
315 word0 = i32_from_byte_stream(bytes, &bytes_read);
316 word1 = i32_from_byte_stream(bytes, &bytes_read);
317
318 switch(ctx->bc->chip_class) {
319 case R600:
320 r600_bytecode_alu_read(&alu, word0, word1);
321 break;
322 case R700:
323 case EVERGREEN:
324 case CAYMAN:
325 r700_bytecode_alu_read(&alu, word0, word1);
326 break;
327 }
328
329 for(src_idx = 0; src_idx < 3; src_idx++) {
330 if (src_const_reg[src_idx])
331 alu.src[src_idx].sel += 512;
332 }
333
334 #if HAVE_LLVM < 0x0302
335 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
336 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
337 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
338 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
339 alu.update_pred = 1;
340 alu.dst.write = 0;
341 alu.src[1].sel = V_SQ_ALU_SRC_0;
342 alu.src[1].chan = 0;
343 alu.last = 1;
344 }
345 #endif
346
347 if (alu.execute_mask) {
348 alu.pred_sel = 0;
349 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
350 } else {
351 r600_bytecode_add_alu(ctx->bc, &alu);
352 }
353
354 /* XXX: Handle other KILL instructions */
355 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
356 ctx->shader->uses_kill = 1;
357 /* XXX: This should be enforced in the LLVM backend. */
358 ctx->bc->force_add_cf = 1;
359 }
360 return bytes_read;
361 }
362
363 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
364 unsigned pred_inst)
365 {
366 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
367 fc_pushlevel(ctx, FC_IF);
368 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
369 }
370
371 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
372 struct r600_bytecode_alu *alu, unsigned compare_opcode)
373 {
374 unsigned opcode = TGSI_OPCODE_BRK;
375 if (ctx->bc->chip_class == CAYMAN)
376 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
377 else if (ctx->bc->chip_class >= EVERGREEN)
378 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
379 else
380 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
381 llvm_if(ctx, alu, compare_opcode);
382 tgsi_loop_brk_cont(ctx);
383 tgsi_endif(ctx);
384 }
385
386 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
387 unsigned char * bytes, unsigned bytes_read)
388 {
389 struct r600_bytecode_alu alu;
390 unsigned inst;
391 memset(&alu, 0, sizeof(alu));
392 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
393 inst = bytes[bytes_read++];
394 switch (inst) {
395 case 0: /* FC_IF */
396 llvm_if(ctx, &alu,
397 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
398 break;
399 case 1: /* FC_IF_INT */
400 llvm_if(ctx, &alu,
401 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
402 break;
403 case 2: /* FC_ELSE */
404 tgsi_else(ctx);
405 break;
406 case 3: /* FC_ENDIF */
407 tgsi_endif(ctx);
408 break;
409 case 4: /* FC_BGNLOOP */
410 tgsi_bgnloop(ctx);
411 break;
412 case 5: /* FC_ENDLOOP */
413 tgsi_endloop(ctx);
414 break;
415 case 6: /* FC_BREAK */
416 r600_break_from_byte_stream(ctx, &alu,
417 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
418 break;
419 case 7: /* FC_BREAK_NZ_INT */
420 r600_break_from_byte_stream(ctx, &alu,
421 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
422 break;
423 case 8: /* FC_CONTINUE */
424 {
425 unsigned opcode = TGSI_OPCODE_CONT;
426 if (ctx->bc->chip_class == CAYMAN) {
427 ctx->inst_info =
428 &cm_shader_tgsi_instruction[opcode];
429 } else if (ctx->bc->chip_class >= EVERGREEN) {
430 ctx->inst_info =
431 &eg_shader_tgsi_instruction[opcode];
432 } else {
433 ctx->inst_info =
434 &r600_shader_tgsi_instruction[opcode];
435 }
436 tgsi_loop_brk_cont(ctx);
437 }
438 break;
439 case 9: /* FC_BREAK_Z_INT */
440 r600_break_from_byte_stream(ctx, &alu,
441 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
442 break;
443 case 10: /* FC_BREAK_NZ */
444 r600_break_from_byte_stream(ctx, &alu,
445 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
446 break;
447 }
448
449 return bytes_read;
450 }
451
452 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
453 unsigned char * bytes, unsigned bytes_read)
454 {
455 struct r600_bytecode_tex tex;
456
457 tex.inst = bytes[bytes_read++];
458 tex.resource_id = bytes[bytes_read++];
459 tex.src_gpr = bytes[bytes_read++];
460 tex.src_rel = bytes[bytes_read++];
461 tex.dst_gpr = bytes[bytes_read++];
462 tex.dst_rel = bytes[bytes_read++];
463 tex.dst_sel_x = bytes[bytes_read++];
464 tex.dst_sel_y = bytes[bytes_read++];
465 tex.dst_sel_z = bytes[bytes_read++];
466 tex.dst_sel_w = bytes[bytes_read++];
467 tex.lod_bias = bytes[bytes_read++];
468 tex.coord_type_x = bytes[bytes_read++];
469 tex.coord_type_y = bytes[bytes_read++];
470 tex.coord_type_z = bytes[bytes_read++];
471 tex.coord_type_w = bytes[bytes_read++];
472 tex.offset_x = bytes[bytes_read++];
473 tex.offset_y = bytes[bytes_read++];
474 tex.offset_z = bytes[bytes_read++];
475 tex.sampler_id = bytes[bytes_read++];
476 tex.src_sel_x = bytes[bytes_read++];
477 tex.src_sel_y = bytes[bytes_read++];
478 tex.src_sel_z = bytes[bytes_read++];
479 tex.src_sel_w = bytes[bytes_read++];
480
481 r600_bytecode_add_tex(ctx->bc, &tex);
482
483 return bytes_read;
484 }
485
486 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
487 unsigned char * bytes, unsigned bytes_read)
488 {
489 struct r600_bytecode_vtx vtx;
490
491 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
492 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
493 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
494
495 memset(&vtx, 0, sizeof(vtx));
496
497 /* WORD0 */
498 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
499 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
500 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
501 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
502 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
503 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
504
505 /* WORD1 */
506 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
507 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
508 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
509 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
510 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
511 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
512 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
513 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
514 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
515 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
516
517 /* WORD 2*/
518 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
519 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
520
521 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
522 fprintf(stderr, "Error adding vtx\n");
523 }
524 /* Use the Texture Cache */
525 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
526 return bytes_read;
527 }
528
529 static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
530 unsigned char * bytes, unsigned bytes_read)
531 {
532 struct r600_bytecode_output output;
533 memset(&output, 0, sizeof(struct r600_bytecode_output));
534 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
535 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
536 if (ctx->bc->chip_class >= EVERGREEN)
537 eg_bytecode_export_read(&output, word0,word1);
538 else
539 r600_bytecode_export_read(&output, word0,word1);
540 r600_bytecode_add_output(ctx->bc, &output);
541 return bytes_read;
542 }
543
544 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
545 unsigned char * bytes, unsigned num_bytes)
546 {
547 unsigned bytes_read = 0;
548 unsigned i, byte;
549 while (bytes_read < num_bytes) {
550 char inst_type = bytes[bytes_read++];
551 switch (inst_type) {
552 case 0:
553 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
554 bytes_read);
555 break;
556 case 1:
557 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
558 bytes_read);
559 break;
560 case 2:
561 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
562 bytes_read);
563 break;
564 case 3:
565 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
566 for (i = 0; i < 2; i++) {
567 for (byte = 0 ; byte < 4; byte++) {
568 ctx->bc->cf_last->isa[i] |=
569 (bytes[bytes_read++] << (byte * 8));
570 }
571 }
572 break;
573
574 case 4:
575 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
576 bytes_read);
577 break;
578 case 5:
579 bytes_read = r600_export_from_byte_stream(ctx, bytes,
580 bytes_read);
581 break;
582 default:
583 /* XXX: Error here */
584 break;
585 }
586 }
587 }
588
589 /* End bytestream -> r600 shader functions*/
590
591 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
592 {
593 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
594 int j;
595
596 if (i->Instruction.NumDstRegs > 1) {
597 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
598 return -EINVAL;
599 }
600 if (i->Instruction.Predicate) {
601 R600_ERR("predicate unsupported\n");
602 return -EINVAL;
603 }
604 #if 0
605 if (i->Instruction.Label) {
606 R600_ERR("label unsupported\n");
607 return -EINVAL;
608 }
609 #endif
610 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
611 if (i->Src[j].Register.Dimension) {
612 R600_ERR("unsupported src %d (dimension %d)\n", j,
613 i->Src[j].Register.Dimension);
614 return -EINVAL;
615 }
616 }
617 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
618 if (i->Dst[j].Register.Dimension) {
619 R600_ERR("unsupported dst (dimension)\n");
620 return -EINVAL;
621 }
622 }
623 return 0;
624 }
625
626 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
627 {
628 int i, r;
629 struct r600_bytecode_alu alu;
630 int gpr = 0, base_chan = 0;
631 int ij_index = 0;
632
633 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
634 ij_index = 0;
635 if (ctx->shader->input[input].centroid)
636 ij_index++;
637 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
638 ij_index = 0;
639 /* if we have perspective add one */
640 if (ctx->input_perspective) {
641 ij_index++;
642 /* if we have perspective centroid */
643 if (ctx->input_centroid)
644 ij_index++;
645 }
646 if (ctx->shader->input[input].centroid)
647 ij_index++;
648 }
649
650 /* work out gpr and base_chan from index */
651 gpr = ij_index / 2;
652 base_chan = (2 * (ij_index % 2)) + 1;
653
654 for (i = 0; i < 8; i++) {
655 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
656
657 if (i < 4)
658 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
659 else
660 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
661
662 if ((i > 1) && (i < 6)) {
663 alu.dst.sel = ctx->shader->input[input].gpr;
664 alu.dst.write = 1;
665 }
666
667 alu.dst.chan = i % 4;
668
669 alu.src[0].sel = gpr;
670 alu.src[0].chan = (base_chan - (i % 2));
671
672 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
673
674 alu.bank_swizzle_force = SQ_ALU_VEC_210;
675 if ((i % 4) == 3)
676 alu.last = 1;
677 r = r600_bytecode_add_alu(ctx->bc, &alu);
678 if (r)
679 return r;
680 }
681 return 0;
682 }
683
684 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
685 {
686 int i, r;
687 struct r600_bytecode_alu alu;
688
689 for (i = 0; i < 4; i++) {
690 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
691
692 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
693
694 alu.dst.sel = ctx->shader->input[input].gpr;
695 alu.dst.write = 1;
696
697 alu.dst.chan = i;
698
699 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
700 alu.src[0].chan = i;
701
702 if (i == 3)
703 alu.last = 1;
704 r = r600_bytecode_add_alu(ctx->bc, &alu);
705 if (r)
706 return r;
707 }
708 return 0;
709 }
710
711 /*
712 * Special export handling in shaders
713 *
714 * shader export ARRAY_BASE for EXPORT_POS:
715 * 60 is position
716 * 61 is misc vector
717 * 62, 63 are clip distance vectors
718 *
719 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
720 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
721 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
722 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
723 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
724 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
725 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
726 * exclusive from render target index)
727 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
728 *
729 *
730 * shader export ARRAY_BASE for EXPORT_PIXEL:
731 * 0-7 CB targets
732 * 61 computed Z vector
733 *
734 * The use of the values exported in the computed Z vector are controlled
735 * by DB_SHADER_CONTROL:
736 * Z_EXPORT_ENABLE - Z as a float in RED
737 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
738 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
739 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
740 * DB_SOURCE_FORMAT - export control restrictions
741 *
742 */
743
744
745 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
746 static int r600_spi_sid(struct r600_shader_io * io)
747 {
748 int index, name = io->name;
749
750 /* These params are handled differently, they don't need
751 * semantic indices, so we'll use 0 for them.
752 */
753 if (name == TGSI_SEMANTIC_POSITION ||
754 name == TGSI_SEMANTIC_PSIZE ||
755 name == TGSI_SEMANTIC_FACE)
756 index = 0;
757 else {
758 if (name == TGSI_SEMANTIC_GENERIC) {
759 /* For generic params simply use sid from tgsi */
760 index = io->sid;
761 } else {
762 /* For non-generic params - pack name and sid into 8 bits */
763 index = 0x80 | (name<<3) | (io->sid);
764 }
765
766 /* Make sure that all really used indices have nonzero value, so
767 * we can just compare it to 0 later instead of comparing the name
768 * with different values to detect special cases. */
769 index++;
770 }
771
772 return index;
773 };
774
775 /* turn input into interpolate on EG */
776 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
777 {
778 int r = 0;
779
780 if (ctx->shader->input[index].spi_sid) {
781 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
782 if (!ctx->use_llvm) {
783 if (ctx->shader->input[index].interpolate > 0) {
784 r = evergreen_interp_alu(ctx, index);
785 } else {
786 r = evergreen_interp_flat(ctx, index);
787 }
788 }
789 }
790 return r;
791 }
792
793 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
794 {
795 struct r600_bytecode_alu alu;
796 int i, r;
797 int gpr_front = ctx->shader->input[front].gpr;
798 int gpr_back = ctx->shader->input[back].gpr;
799
800 for (i = 0; i < 4; i++) {
801 memset(&alu, 0, sizeof(alu));
802 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
803 alu.is_op3 = 1;
804 alu.dst.write = 1;
805 alu.dst.sel = gpr_front;
806 alu.src[0].sel = ctx->face_gpr;
807 alu.src[1].sel = gpr_front;
808 alu.src[2].sel = gpr_back;
809
810 alu.dst.chan = i;
811 alu.src[1].chan = i;
812 alu.src[2].chan = i;
813 alu.last = (i==3);
814
815 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
816 return r;
817 }
818
819 return 0;
820 }
821
822 static int tgsi_declaration(struct r600_shader_ctx *ctx)
823 {
824 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
825 unsigned i;
826 int r;
827
828 switch (d->Declaration.File) {
829 case TGSI_FILE_INPUT:
830 i = ctx->shader->ninput++;
831 ctx->shader->input[i].name = d->Semantic.Name;
832 ctx->shader->input[i].sid = d->Semantic.Index;
833 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
834 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
835 ctx->shader->input[i].centroid = d->Interp.Centroid;
836 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
837 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
838 switch (ctx->shader->input[i].name) {
839 case TGSI_SEMANTIC_FACE:
840 ctx->face_gpr = ctx->shader->input[i].gpr;
841 break;
842 case TGSI_SEMANTIC_COLOR:
843 ctx->colors_used++;
844 break;
845 case TGSI_SEMANTIC_POSITION:
846 ctx->fragcoord_input = i;
847 break;
848 }
849 if (ctx->bc->chip_class >= EVERGREEN) {
850 if ((r = evergreen_interp_input(ctx, i)))
851 return r;
852 }
853 }
854 break;
855 case TGSI_FILE_OUTPUT:
856 i = ctx->shader->noutput++;
857 ctx->shader->output[i].name = d->Semantic.Name;
858 ctx->shader->output[i].sid = d->Semantic.Index;
859 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
860 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
861 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
862 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
863 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
864 switch (d->Semantic.Name) {
865 case TGSI_SEMANTIC_CLIPDIST:
866 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
867 break;
868 case TGSI_SEMANTIC_PSIZE:
869 ctx->shader->vs_out_misc_write = 1;
870 ctx->shader->vs_out_point_size = 1;
871 break;
872 case TGSI_SEMANTIC_CLIPVERTEX:
873 ctx->clip_vertex_write = TRUE;
874 ctx->cv_output = i;
875 break;
876 }
877 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
878 switch (d->Semantic.Name) {
879 case TGSI_SEMANTIC_COLOR:
880 ctx->shader->nr_ps_max_color_exports++;
881 break;
882 }
883 }
884 break;
885 case TGSI_FILE_CONSTANT:
886 case TGSI_FILE_TEMPORARY:
887 case TGSI_FILE_SAMPLER:
888 case TGSI_FILE_ADDRESS:
889 break;
890
891 case TGSI_FILE_SYSTEM_VALUE:
892 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
893 if (!ctx->native_integers) {
894 struct r600_bytecode_alu alu;
895 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
896
897 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
898 alu.src[0].sel = 0;
899 alu.src[0].chan = 3;
900
901 alu.dst.sel = 0;
902 alu.dst.chan = 3;
903 alu.dst.write = 1;
904 alu.last = 1;
905
906 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
907 return r;
908 }
909 break;
910 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
911 break;
912 default:
913 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
914 return -EINVAL;
915 }
916 return 0;
917 }
918
919 static int r600_get_temp(struct r600_shader_ctx *ctx)
920 {
921 return ctx->temp_reg + ctx->max_driver_temp_used++;
922 }
923
924 /*
925 * for evergreen we need to scan the shader to find the number of GPRs we need to
926 * reserve for interpolation.
927 *
928 * we need to know if we are going to emit
929 * any centroid inputs
930 * if perspective and linear are required
931 */
932 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
933 {
934 int i;
935 int num_baryc;
936
937 ctx->input_linear = FALSE;
938 ctx->input_perspective = FALSE;
939 ctx->input_centroid = FALSE;
940 ctx->num_interp_gpr = 1;
941
942 /* any centroid inputs */
943 for (i = 0; i < ctx->info.num_inputs; i++) {
944 /* skip position/face */
945 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
946 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
947 continue;
948 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
949 ctx->input_linear = TRUE;
950 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
951 ctx->input_perspective = TRUE;
952 if (ctx->info.input_centroid[i])
953 ctx->input_centroid = TRUE;
954 }
955
956 num_baryc = 0;
957 /* ignoring sample for now */
958 if (ctx->input_perspective)
959 num_baryc++;
960 if (ctx->input_linear)
961 num_baryc++;
962 if (ctx->input_centroid)
963 num_baryc *= 2;
964
965 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
966
967 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
968 return ctx->num_interp_gpr;
969 }
970
971 static void tgsi_src(struct r600_shader_ctx *ctx,
972 const struct tgsi_full_src_register *tgsi_src,
973 struct r600_shader_src *r600_src)
974 {
975 memset(r600_src, 0, sizeof(*r600_src));
976 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
977 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
978 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
979 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
980 r600_src->neg = tgsi_src->Register.Negate;
981 r600_src->abs = tgsi_src->Register.Absolute;
982
983 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
984 int index;
985 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
986 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
987 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
988
989 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
990 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
991 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
992 return;
993 }
994 index = tgsi_src->Register.Index;
995 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
996 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
997 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
998 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
999 r600_src->swizzle[0] = 3;
1000 r600_src->swizzle[1] = 3;
1001 r600_src->swizzle[2] = 3;
1002 r600_src->swizzle[3] = 3;
1003 r600_src->sel = 0;
1004 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
1005 r600_src->swizzle[0] = 0;
1006 r600_src->swizzle[1] = 0;
1007 r600_src->swizzle[2] = 0;
1008 r600_src->swizzle[3] = 0;
1009 r600_src->sel = 0;
1010 }
1011 } else {
1012 if (tgsi_src->Register.Indirect)
1013 r600_src->rel = V_SQ_REL_RELATIVE;
1014 r600_src->sel = tgsi_src->Register.Index;
1015 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
1016 }
1017 }
1018
1019 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
1020 {
1021 struct r600_bytecode_vtx vtx;
1022 unsigned int ar_reg;
1023 int r;
1024
1025 if (offset) {
1026 struct r600_bytecode_alu alu;
1027
1028 memset(&alu, 0, sizeof(alu));
1029
1030 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
1031 alu.src[0].sel = ctx->bc->ar_reg;
1032
1033 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1034 alu.src[1].value = offset;
1035
1036 alu.dst.sel = dst_reg;
1037 alu.dst.write = 1;
1038 alu.last = 1;
1039
1040 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1041 return r;
1042
1043 ar_reg = dst_reg;
1044 } else {
1045 ar_reg = ctx->bc->ar_reg;
1046 }
1047
1048 memset(&vtx, 0, sizeof(vtx));
1049 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1050 vtx.src_gpr = ar_reg;
1051 vtx.mega_fetch_count = 16;
1052 vtx.dst_gpr = dst_reg;
1053 vtx.dst_sel_x = 0; /* SEL_X */
1054 vtx.dst_sel_y = 1; /* SEL_Y */
1055 vtx.dst_sel_z = 2; /* SEL_Z */
1056 vtx.dst_sel_w = 3; /* SEL_W */
1057 vtx.data_format = FMT_32_32_32_32_FLOAT;
1058 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1059 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1060 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1061 vtx.endian = r600_endian_swap(32);
1062
1063 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1064 return r;
1065
1066 return 0;
1067 }
1068
1069 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1070 {
1071 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1072 struct r600_bytecode_alu alu;
1073 int i, j, k, nconst, r;
1074
1075 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1076 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1077 nconst++;
1078 }
1079 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1080 }
1081 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1082 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1083 continue;
1084 }
1085
1086 if (ctx->src[i].rel) {
1087 int treg = r600_get_temp(ctx);
1088 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1089 return r;
1090
1091 ctx->src[i].sel = treg;
1092 ctx->src[i].rel = 0;
1093 j--;
1094 } else if (j > 0) {
1095 int treg = r600_get_temp(ctx);
1096 for (k = 0; k < 4; k++) {
1097 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1098 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1099 alu.src[0].sel = ctx->src[i].sel;
1100 alu.src[0].chan = k;
1101 alu.src[0].rel = ctx->src[i].rel;
1102 alu.dst.sel = treg;
1103 alu.dst.chan = k;
1104 alu.dst.write = 1;
1105 if (k == 3)
1106 alu.last = 1;
1107 r = r600_bytecode_add_alu(ctx->bc, &alu);
1108 if (r)
1109 return r;
1110 }
1111 ctx->src[i].sel = treg;
1112 ctx->src[i].rel =0;
1113 j--;
1114 }
1115 }
1116 return 0;
1117 }
1118
1119 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1120 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1121 {
1122 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1123 struct r600_bytecode_alu alu;
1124 int i, j, k, nliteral, r;
1125
1126 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1127 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1128 nliteral++;
1129 }
1130 }
1131 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1132 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1133 int treg = r600_get_temp(ctx);
1134 for (k = 0; k < 4; k++) {
1135 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1137 alu.src[0].sel = ctx->src[i].sel;
1138 alu.src[0].chan = k;
1139 alu.src[0].value = ctx->src[i].value[k];
1140 alu.dst.sel = treg;
1141 alu.dst.chan = k;
1142 alu.dst.write = 1;
1143 if (k == 3)
1144 alu.last = 1;
1145 r = r600_bytecode_add_alu(ctx->bc, &alu);
1146 if (r)
1147 return r;
1148 }
1149 ctx->src[i].sel = treg;
1150 j--;
1151 }
1152 }
1153 return 0;
1154 }
1155
1156 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1157 {
1158 int i, r, count = ctx->shader->ninput;
1159
1160 for (i = 0; i < count; i++) {
1161 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1162 unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg;
1163 if (ctx->bc->chip_class >= EVERGREEN) {
1164 if ((r = evergreen_interp_input(ctx, back_facing_reg)))
1165 return r;
1166 }
1167
1168 if (!ctx->use_llvm) {
1169 r = select_twoside_color(ctx, i, back_facing_reg);
1170 if (r)
1171 return r;
1172 }
1173 }
1174 }
1175 return 0;
1176 }
1177
1178 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
1179 struct r600_pipe_shader *pipeshader,
1180 struct r600_shader_key key)
1181 {
1182 struct r600_shader *shader = &pipeshader->shader;
1183 struct tgsi_token *tokens = pipeshader->selector->tokens;
1184 struct pipe_stream_output_info so = pipeshader->selector->so;
1185 struct tgsi_full_immediate *immediate;
1186 struct tgsi_full_property *property;
1187 struct r600_shader_ctx ctx;
1188 struct r600_bytecode_output output[32];
1189 unsigned output_done, noutput;
1190 unsigned opcode;
1191 int i, j, k, r = 0;
1192 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1193 /* Declarations used by llvm code */
1194 bool use_llvm = false;
1195 unsigned char * inst_bytes = NULL;
1196 unsigned inst_byte_count = 0;
1197
1198 #ifdef R600_USE_LLVM
1199 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1200 #endif
1201 ctx.bc = &shader->bc;
1202 ctx.shader = shader;
1203 ctx.native_integers = true;
1204
1205 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
1206 rscreen->msaa_texture_support);
1207 ctx.tokens = tokens;
1208 tgsi_scan_shader(tokens, &ctx.info);
1209 tgsi_parse_init(&ctx.parse, tokens);
1210 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1211 shader->processor_type = ctx.type;
1212 ctx.bc->type = shader->processor_type;
1213
1214 ctx.face_gpr = -1;
1215 ctx.fragcoord_input = -1;
1216 ctx.colors_used = 0;
1217 ctx.clip_vertex_write = 0;
1218
1219 shader->nr_ps_color_exports = 0;
1220 shader->nr_ps_max_color_exports = 0;
1221
1222 shader->two_side = key.color_two_side;
1223
1224 /* register allocations */
1225 /* Values [0,127] correspond to GPR[0..127].
1226 * Values [128,159] correspond to constant buffer bank 0
1227 * Values [160,191] correspond to constant buffer bank 1
1228 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1229 * Values [256,287] correspond to constant buffer bank 2 (EG)
1230 * Values [288,319] correspond to constant buffer bank 3 (EG)
1231 * Other special values are shown in the list below.
1232 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1233 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1234 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1235 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1236 * 248 SQ_ALU_SRC_0: special constant 0.0.
1237 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1238 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1239 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1240 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1241 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1242 * 254 SQ_ALU_SRC_PV: previous vector result.
1243 * 255 SQ_ALU_SRC_PS: previous scalar result.
1244 */
1245 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1246 ctx.file_offset[i] = 0;
1247 }
1248 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1249 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1250 if (ctx.bc->chip_class >= EVERGREEN) {
1251 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1252 } else {
1253 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1254 }
1255 }
1256 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1257 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1258 }
1259
1260 #ifdef R600_USE_LLVM
1261 if (use_llvm && ctx.info.indirect_files) {
1262 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1263 "indirect adressing. Falling back to TGSI "
1264 "backend.\n");
1265 use_llvm = 0;
1266 }
1267 #endif
1268 ctx.use_llvm = use_llvm;
1269
1270 if (use_llvm) {
1271 ctx.file_offset[TGSI_FILE_OUTPUT] =
1272 ctx.file_offset[TGSI_FILE_INPUT];
1273 } else {
1274 ctx.file_offset[TGSI_FILE_OUTPUT] =
1275 ctx.file_offset[TGSI_FILE_INPUT] +
1276 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1277 }
1278 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1279 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1280
1281 /* Outside the GPR range. This will be translated to one of the
1282 * kcache banks later. */
1283 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1284
1285 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1286 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1287 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1288 ctx.temp_reg = ctx.bc->ar_reg + 1;
1289
1290 ctx.nliterals = 0;
1291 ctx.literals = NULL;
1292 shader->fs_write_all = FALSE;
1293 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1294 tgsi_parse_token(&ctx.parse);
1295 switch (ctx.parse.FullToken.Token.Type) {
1296 case TGSI_TOKEN_TYPE_IMMEDIATE:
1297 immediate = &ctx.parse.FullToken.FullImmediate;
1298 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1299 if(ctx.literals == NULL) {
1300 r = -ENOMEM;
1301 goto out_err;
1302 }
1303 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1304 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1305 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1306 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1307 ctx.nliterals++;
1308 break;
1309 case TGSI_TOKEN_TYPE_DECLARATION:
1310 r = tgsi_declaration(&ctx);
1311 if (r)
1312 goto out_err;
1313 break;
1314 case TGSI_TOKEN_TYPE_INSTRUCTION:
1315 break;
1316 case TGSI_TOKEN_TYPE_PROPERTY:
1317 property = &ctx.parse.FullToken.FullProperty;
1318 switch (property->Property.PropertyName) {
1319 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1320 if (property->u[0].Data == 1)
1321 shader->fs_write_all = TRUE;
1322 break;
1323 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1324 /* we don't need this one */
1325 break;
1326 }
1327 break;
1328 default:
1329 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1330 r = -EINVAL;
1331 goto out_err;
1332 }
1333 }
1334
1335 /* Process two side if needed */
1336 if (shader->two_side && ctx.colors_used) {
1337 int i, count = ctx.shader->ninput;
1338 unsigned next_lds_loc = ctx.shader->nlds;
1339
1340 /* additional inputs will be allocated right after the existing inputs,
1341 * we won't need them after the color selection, so we don't need to
1342 * reserve these gprs for the rest of the shader code and to adjust
1343 * output offsets etc. */
1344 int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
1345 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1346
1347 if (ctx.face_gpr == -1) {
1348 i = ctx.shader->ninput++;
1349 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
1350 ctx.shader->input[i].spi_sid = 0;
1351 ctx.shader->input[i].gpr = gpr++;
1352 ctx.face_gpr = ctx.shader->input[i].gpr;
1353 }
1354
1355 for (i = 0; i < count; i++) {
1356 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1357 int ni = ctx.shader->ninput++;
1358 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
1359 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1360 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
1361 ctx.shader->input[ni].gpr = gpr++;
1362 // TGSI to LLVM needs to know the lds position of inputs.
1363 // Non LLVM path computes it later (in process_twoside_color)
1364 ctx.shader->input[ni].lds_pos = next_lds_loc++;
1365 ctx.shader->input[i].potential_back_facing_reg = ni;
1366 }
1367 }
1368 }
1369
1370 /* LLVM backend setup */
1371 #ifdef R600_USE_LLVM
1372 if (use_llvm) {
1373 struct radeon_llvm_context radeon_llvm_ctx;
1374 LLVMModuleRef mod;
1375 unsigned dump = 0;
1376 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1377 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1378 radeon_llvm_ctx.type = ctx.type;
1379 radeon_llvm_ctx.two_side = shader->two_side;
1380 radeon_llvm_ctx.face_input = ctx.face_gpr;
1381 radeon_llvm_ctx.r600_inputs = ctx.shader->input;
1382 radeon_llvm_ctx.r600_outputs = ctx.shader->output;
1383 radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
1384 radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
1385 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
1386 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1387 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1388 dump = 1;
1389 }
1390 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1391 rscreen->family, dump)) {
1392 FREE(inst_bytes);
1393 radeon_llvm_dispose(&radeon_llvm_ctx);
1394 use_llvm = 0;
1395 fprintf(stderr, "R600 LLVM backend failed to compile "
1396 "shader. Falling back to TGSI\n");
1397 } else {
1398 ctx.file_offset[TGSI_FILE_OUTPUT] =
1399 ctx.file_offset[TGSI_FILE_INPUT];
1400 }
1401 radeon_llvm_dispose(&radeon_llvm_ctx);
1402 }
1403 #endif
1404 /* End of LLVM backend setup */
1405
1406 if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
1407 shader->nr_ps_max_color_exports = 8;
1408
1409 if (ctx.fragcoord_input >= 0 && !use_llvm) {
1410 if (ctx.bc->chip_class == CAYMAN) {
1411 for (j = 0 ; j < 4; j++) {
1412 struct r600_bytecode_alu alu;
1413 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1414 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1415 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1416 alu.src[0].chan = 3;
1417
1418 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1419 alu.dst.chan = j;
1420 alu.dst.write = (j == 3);
1421 alu.last = 1;
1422 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1423 return r;
1424 }
1425 } else {
1426 struct r600_bytecode_alu alu;
1427 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1428 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1429 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1430 alu.src[0].chan = 3;
1431
1432 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1433 alu.dst.chan = 3;
1434 alu.dst.write = 1;
1435 alu.last = 1;
1436 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1437 return r;
1438 }
1439 }
1440
1441 if (shader->two_side && ctx.colors_used) {
1442 if ((r = process_twoside_color_inputs(&ctx)))
1443 return r;
1444 }
1445
1446 tgsi_parse_init(&ctx.parse, tokens);
1447 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1448 tgsi_parse_token(&ctx.parse);
1449 switch (ctx.parse.FullToken.Token.Type) {
1450 case TGSI_TOKEN_TYPE_INSTRUCTION:
1451 if (use_llvm) {
1452 continue;
1453 }
1454 r = tgsi_is_supported(&ctx);
1455 if (r)
1456 goto out_err;
1457 ctx.max_driver_temp_used = 0;
1458 /* reserve first tmp for everyone */
1459 r600_get_temp(&ctx);
1460
1461 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1462 if ((r = tgsi_split_constant(&ctx)))
1463 goto out_err;
1464 if ((r = tgsi_split_literal_constant(&ctx)))
1465 goto out_err;
1466 if (ctx.bc->chip_class == CAYMAN)
1467 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1468 else if (ctx.bc->chip_class >= EVERGREEN)
1469 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1470 else
1471 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1472 r = ctx.inst_info->process(&ctx);
1473 if (r)
1474 goto out_err;
1475 break;
1476 default:
1477 break;
1478 }
1479 }
1480
1481 /* Get instructions if we are using the LLVM backend. */
1482 if (use_llvm) {
1483 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1484 FREE(inst_bytes);
1485 }
1486
1487 noutput = shader->noutput;
1488
1489 if (ctx.clip_vertex_write) {
1490 /* need to convert a clipvertex write into clipdistance writes and not export
1491 the clip vertex anymore */
1492
1493 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1494 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1495 shader->output[noutput].gpr = ctx.temp_reg;
1496 noutput++;
1497 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1498 shader->output[noutput].gpr = ctx.temp_reg+1;
1499 noutput++;
1500
1501 /* reset spi_sid for clipvertex output to avoid confusing spi */
1502 shader->output[ctx.cv_output].spi_sid = 0;
1503
1504 shader->clip_dist_write = 0xFF;
1505
1506 for (i = 0; i < 8; i++) {
1507 int oreg = i >> 2;
1508 int ochan = i & 3;
1509
1510 for (j = 0; j < 4; j++) {
1511 struct r600_bytecode_alu alu;
1512 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1513 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1514 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1515 alu.src[0].chan = j;
1516
1517 alu.src[1].sel = 512 + i;
1518 alu.src[1].kc_bank = 1;
1519 alu.src[1].chan = j;
1520
1521 alu.dst.sel = ctx.temp_reg + oreg;
1522 alu.dst.chan = j;
1523 alu.dst.write = (j == ochan);
1524 if (j == 3)
1525 alu.last = 1;
1526 r = r600_bytecode_add_alu(ctx.bc, &alu);
1527 if (r)
1528 return r;
1529 }
1530 }
1531 }
1532
1533 /* Add stream outputs. */
1534 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1535 for (i = 0; i < so.num_outputs; i++) {
1536 struct r600_bytecode_output output;
1537
1538 if (so.output[i].output_buffer >= 4) {
1539 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1540 so.output[i].output_buffer);
1541 r = -EINVAL;
1542 goto out_err;
1543 }
1544 if (so.output[i].dst_offset < so.output[i].start_component) {
1545 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1546 r = -EINVAL;
1547 goto out_err;
1548 }
1549
1550 memset(&output, 0, sizeof(struct r600_bytecode_output));
1551 output.gpr = shader->output[so.output[i].register_index].gpr;
1552 output.elem_size = 0;
1553 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1554 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1555 output.burst_count = 1;
1556 output.barrier = 1;
1557 /* array_size is an upper limit for the burst_count
1558 * with MEM_STREAM instructions */
1559 output.array_size = 0xFFF;
1560 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1561 if (ctx.bc->chip_class >= EVERGREEN) {
1562 switch (so.output[i].output_buffer) {
1563 case 0:
1564 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1565 break;
1566 case 1:
1567 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1568 break;
1569 case 2:
1570 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1571 break;
1572 case 3:
1573 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1574 break;
1575 }
1576 } else {
1577 switch (so.output[i].output_buffer) {
1578 case 0:
1579 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1580 break;
1581 case 1:
1582 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1583 break;
1584 case 2:
1585 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1586 break;
1587 case 3:
1588 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1589 break;
1590 }
1591 }
1592 r = r600_bytecode_add_output(ctx.bc, &output);
1593 if (r)
1594 goto out_err;
1595 }
1596 }
1597
1598 /* export output */
1599 for (i = 0, j = 0; i < noutput; i++, j++) {
1600 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1601 output[j].gpr = shader->output[i].gpr;
1602 output[j].elem_size = 3;
1603 output[j].swizzle_x = 0;
1604 output[j].swizzle_y = 1;
1605 output[j].swizzle_z = 2;
1606 output[j].swizzle_w = 3;
1607 output[j].burst_count = 1;
1608 output[j].barrier = 1;
1609 output[j].type = -1;
1610 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1611 switch (ctx.type) {
1612 case TGSI_PROCESSOR_VERTEX:
1613 switch (shader->output[i].name) {
1614 case TGSI_SEMANTIC_POSITION:
1615 output[j].array_base = next_pos_base++;
1616 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1617 break;
1618
1619 case TGSI_SEMANTIC_PSIZE:
1620 output[j].array_base = next_pos_base++;
1621 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1622 break;
1623 case TGSI_SEMANTIC_CLIPVERTEX:
1624 j--;
1625 break;
1626 case TGSI_SEMANTIC_CLIPDIST:
1627 output[j].array_base = next_pos_base++;
1628 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1629 /* spi_sid is 0 for clipdistance outputs that were generated
1630 * for clipvertex - we don't need to pass them to PS */
1631 if (shader->output[i].spi_sid) {
1632 j++;
1633 /* duplicate it as PARAM to pass to the pixel shader */
1634 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1635 output[j].array_base = next_param_base++;
1636 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1637 }
1638 break;
1639 case TGSI_SEMANTIC_FOG:
1640 output[j].swizzle_y = 4; /* 0 */
1641 output[j].swizzle_z = 4; /* 0 */
1642 output[j].swizzle_w = 5; /* 1 */
1643 break;
1644 }
1645 break;
1646 case TGSI_PROCESSOR_FRAGMENT:
1647 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1648 /* never export more colors than the number of CBs */
1649 if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
1650 /* skip export */
1651 j--;
1652 continue;
1653 }
1654 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1655 output[j].array_base = next_pixel_base++;
1656 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1657 shader->nr_ps_color_exports++;
1658 if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
1659 for (k = 1; k < key.nr_cbufs; k++) {
1660 j++;
1661 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1662 output[j].gpr = shader->output[i].gpr;
1663 output[j].elem_size = 3;
1664 output[j].swizzle_x = 0;
1665 output[j].swizzle_y = 1;
1666 output[j].swizzle_z = 2;
1667 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
1668 output[j].burst_count = 1;
1669 output[j].barrier = 1;
1670 output[j].array_base = next_pixel_base++;
1671 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1672 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1673 shader->nr_ps_color_exports++;
1674 }
1675 }
1676 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1677 output[j].array_base = 61;
1678 output[j].swizzle_x = 2;
1679 output[j].swizzle_y = 7;
1680 output[j].swizzle_z = output[j].swizzle_w = 7;
1681 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1682 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1683 output[j].array_base = 61;
1684 output[j].swizzle_x = 7;
1685 output[j].swizzle_y = 1;
1686 output[j].swizzle_z = output[j].swizzle_w = 7;
1687 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1688 } else {
1689 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1690 r = -EINVAL;
1691 goto out_err;
1692 }
1693 break;
1694 default:
1695 R600_ERR("unsupported processor type %d\n", ctx.type);
1696 r = -EINVAL;
1697 goto out_err;
1698 }
1699
1700 if (output[j].type==-1) {
1701 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1702 output[j].array_base = next_param_base++;
1703 }
1704 }
1705
1706 /* add fake param output for vertex shader if no param is exported */
1707 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1708 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1709 output[j].gpr = 0;
1710 output[j].elem_size = 3;
1711 output[j].swizzle_x = 7;
1712 output[j].swizzle_y = 7;
1713 output[j].swizzle_z = 7;
1714 output[j].swizzle_w = 7;
1715 output[j].burst_count = 1;
1716 output[j].barrier = 1;
1717 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1718 output[j].array_base = 0;
1719 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1720 j++;
1721 }
1722
1723 /* add fake pixel export */
1724 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1725 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1726 output[j].gpr = 0;
1727 output[j].elem_size = 3;
1728 output[j].swizzle_x = 7;
1729 output[j].swizzle_y = 7;
1730 output[j].swizzle_z = 7;
1731 output[j].swizzle_w = 7;
1732 output[j].burst_count = 1;
1733 output[j].barrier = 1;
1734 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1735 output[j].array_base = 0;
1736 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1737 j++;
1738 }
1739
1740 noutput = j;
1741
1742 /* set export done on last export of each type */
1743 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1744 if (ctx.bc->chip_class < CAYMAN) {
1745 if (i == (noutput - 1)) {
1746 output[i].end_of_program = 1;
1747 }
1748 }
1749 if (!(output_done & (1 << output[i].type))) {
1750 output_done |= (1 << output[i].type);
1751 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1752 }
1753 }
1754 /* add output to bytecode */
1755 if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
1756 for (i = 0; i < noutput; i++) {
1757 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1758 if (r)
1759 goto out_err;
1760 }
1761 }
1762 /* add program end */
1763 if (ctx.bc->chip_class == CAYMAN)
1764 cm_bytecode_add_cf_end(ctx.bc);
1765
1766 /* check GPR limit - we have 124 = 128 - 4
1767 * (4 are reserved as alu clause temporary registers) */
1768 if (ctx.bc->ngpr > 124) {
1769 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1770 r = -ENOMEM;
1771 goto out_err;
1772 }
1773
1774 free(ctx.literals);
1775 tgsi_parse_free(&ctx.parse);
1776 return 0;
1777 out_err:
1778 free(ctx.literals);
1779 tgsi_parse_free(&ctx.parse);
1780 return r;
1781 }
1782
1783 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1784 {
1785 R600_ERR("%s tgsi opcode unsupported\n",
1786 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1787 return -EINVAL;
1788 }
1789
1790 static int tgsi_end(struct r600_shader_ctx *ctx)
1791 {
1792 return 0;
1793 }
1794
1795 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1796 const struct r600_shader_src *shader_src,
1797 unsigned chan)
1798 {
1799 bc_src->sel = shader_src->sel;
1800 bc_src->chan = shader_src->swizzle[chan];
1801 bc_src->neg = shader_src->neg;
1802 bc_src->abs = shader_src->abs;
1803 bc_src->rel = shader_src->rel;
1804 bc_src->value = shader_src->value[bc_src->chan];
1805 }
1806
1807 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1808 {
1809 bc_src->abs = 1;
1810 bc_src->neg = 0;
1811 }
1812
1813 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1814 {
1815 bc_src->neg = !bc_src->neg;
1816 }
1817
1818 static void tgsi_dst(struct r600_shader_ctx *ctx,
1819 const struct tgsi_full_dst_register *tgsi_dst,
1820 unsigned swizzle,
1821 struct r600_bytecode_alu_dst *r600_dst)
1822 {
1823 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1824
1825 r600_dst->sel = tgsi_dst->Register.Index;
1826 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1827 r600_dst->chan = swizzle;
1828 r600_dst->write = 1;
1829 if (tgsi_dst->Register.Indirect)
1830 r600_dst->rel = V_SQ_REL_RELATIVE;
1831 if (inst->Instruction.Saturate) {
1832 r600_dst->clamp = 1;
1833 }
1834 }
1835
1836 static int tgsi_last_instruction(unsigned writemask)
1837 {
1838 int i, lasti = 0;
1839
1840 for (i = 0; i < 4; i++) {
1841 if (writemask & (1 << i)) {
1842 lasti = i;
1843 }
1844 }
1845 return lasti;
1846 }
1847
1848 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1849 {
1850 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1851 struct r600_bytecode_alu alu;
1852 int i, j, r;
1853 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1854
1855 for (i = 0; i < lasti + 1; i++) {
1856 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1857 continue;
1858
1859 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1860 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1861
1862 alu.inst = ctx->inst_info->r600_opcode;
1863 if (!swap) {
1864 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1865 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1866 }
1867 } else {
1868 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1869 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1870 }
1871 /* handle some special cases */
1872 switch (ctx->inst_info->tgsi_opcode) {
1873 case TGSI_OPCODE_SUB:
1874 r600_bytecode_src_toggle_neg(&alu.src[1]);
1875 break;
1876 case TGSI_OPCODE_ABS:
1877 r600_bytecode_src_set_abs(&alu.src[0]);
1878 break;
1879 default:
1880 break;
1881 }
1882 if (i == lasti || trans_only) {
1883 alu.last = 1;
1884 }
1885 r = r600_bytecode_add_alu(ctx->bc, &alu);
1886 if (r)
1887 return r;
1888 }
1889 return 0;
1890 }
1891
1892 static int tgsi_op2(struct r600_shader_ctx *ctx)
1893 {
1894 return tgsi_op2_s(ctx, 0, 0);
1895 }
1896
1897 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1898 {
1899 return tgsi_op2_s(ctx, 1, 0);
1900 }
1901
1902 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1903 {
1904 return tgsi_op2_s(ctx, 0, 1);
1905 }
1906
1907 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1908 {
1909 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1910 struct r600_bytecode_alu alu;
1911 int i, r;
1912 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1913
1914 for (i = 0; i < lasti + 1; i++) {
1915
1916 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1917 continue;
1918 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1919 alu.inst = ctx->inst_info->r600_opcode;
1920
1921 alu.src[0].sel = V_SQ_ALU_SRC_0;
1922
1923 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1924
1925 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1926
1927 if (i == lasti) {
1928 alu.last = 1;
1929 }
1930 r = r600_bytecode_add_alu(ctx->bc, &alu);
1931 if (r)
1932 return r;
1933 }
1934 return 0;
1935
1936 }
1937
1938 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1939 {
1940 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1941 int i, j, r;
1942 struct r600_bytecode_alu alu;
1943 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1944
1945 for (i = 0 ; i < last_slot; i++) {
1946 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1947 alu.inst = ctx->inst_info->r600_opcode;
1948 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1949 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1950
1951 /* RSQ should take the absolute value of src */
1952 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
1953 r600_bytecode_src_set_abs(&alu.src[j]);
1954 }
1955 }
1956 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1957 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1958
1959 if (i == last_slot - 1)
1960 alu.last = 1;
1961 r = r600_bytecode_add_alu(ctx->bc, &alu);
1962 if (r)
1963 return r;
1964 }
1965 return 0;
1966 }
1967
1968 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1969 {
1970 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1971 int i, j, k, r;
1972 struct r600_bytecode_alu alu;
1973 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1974 for (k = 0; k < last_slot; k++) {
1975 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1976 continue;
1977
1978 for (i = 0 ; i < 4; i++) {
1979 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1980 alu.inst = ctx->inst_info->r600_opcode;
1981 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1982 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1983 }
1984 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1985 alu.dst.write = (i == k);
1986 if (i == 3)
1987 alu.last = 1;
1988 r = r600_bytecode_add_alu(ctx->bc, &alu);
1989 if (r)
1990 return r;
1991 }
1992 }
1993 return 0;
1994 }
1995
1996 /*
1997 * r600 - trunc to -PI..PI range
1998 * r700 - normalize by dividing by 2PI
1999 * see fdo bug 27901
2000 */
2001 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
2002 {
2003 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
2004 static float double_pi = 3.1415926535 * 2;
2005 static float neg_pi = -3.1415926535;
2006
2007 int r;
2008 struct r600_bytecode_alu alu;
2009
2010 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2011 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2012 alu.is_op3 = 1;
2013
2014 alu.dst.chan = 0;
2015 alu.dst.sel = ctx->temp_reg;
2016 alu.dst.write = 1;
2017
2018 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2019
2020 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2021 alu.src[1].chan = 0;
2022 alu.src[1].value = *(uint32_t *)&half_inv_pi;
2023 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2024 alu.src[2].chan = 0;
2025 alu.last = 1;
2026 r = r600_bytecode_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029
2030 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2031 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2032
2033 alu.dst.chan = 0;
2034 alu.dst.sel = ctx->temp_reg;
2035 alu.dst.write = 1;
2036
2037 alu.src[0].sel = ctx->temp_reg;
2038 alu.src[0].chan = 0;
2039 alu.last = 1;
2040 r = r600_bytecode_add_alu(ctx->bc, &alu);
2041 if (r)
2042 return r;
2043
2044 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2045 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2046 alu.is_op3 = 1;
2047
2048 alu.dst.chan = 0;
2049 alu.dst.sel = ctx->temp_reg;
2050 alu.dst.write = 1;
2051
2052 alu.src[0].sel = ctx->temp_reg;
2053 alu.src[0].chan = 0;
2054
2055 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2056 alu.src[1].chan = 0;
2057 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2058 alu.src[2].chan = 0;
2059
2060 if (ctx->bc->chip_class == R600) {
2061 alu.src[1].value = *(uint32_t *)&double_pi;
2062 alu.src[2].value = *(uint32_t *)&neg_pi;
2063 } else {
2064 alu.src[1].sel = V_SQ_ALU_SRC_1;
2065 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2066 alu.src[2].neg = 1;
2067 }
2068
2069 alu.last = 1;
2070 r = r600_bytecode_add_alu(ctx->bc, &alu);
2071 if (r)
2072 return r;
2073 return 0;
2074 }
2075
2076 static int cayman_trig(struct r600_shader_ctx *ctx)
2077 {
2078 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2079 struct r600_bytecode_alu alu;
2080 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2081 int i, r;
2082
2083 r = tgsi_setup_trig(ctx);
2084 if (r)
2085 return r;
2086
2087
2088 for (i = 0; i < last_slot; i++) {
2089 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2090 alu.inst = ctx->inst_info->r600_opcode;
2091 alu.dst.chan = i;
2092
2093 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2094 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2095
2096 alu.src[0].sel = ctx->temp_reg;
2097 alu.src[0].chan = 0;
2098 if (i == last_slot - 1)
2099 alu.last = 1;
2100 r = r600_bytecode_add_alu(ctx->bc, &alu);
2101 if (r)
2102 return r;
2103 }
2104 return 0;
2105 }
2106
2107 static int tgsi_trig(struct r600_shader_ctx *ctx)
2108 {
2109 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2110 struct r600_bytecode_alu alu;
2111 int i, r;
2112 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2113
2114 r = tgsi_setup_trig(ctx);
2115 if (r)
2116 return r;
2117
2118 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2119 alu.inst = ctx->inst_info->r600_opcode;
2120 alu.dst.chan = 0;
2121 alu.dst.sel = ctx->temp_reg;
2122 alu.dst.write = 1;
2123
2124 alu.src[0].sel = ctx->temp_reg;
2125 alu.src[0].chan = 0;
2126 alu.last = 1;
2127 r = r600_bytecode_add_alu(ctx->bc, &alu);
2128 if (r)
2129 return r;
2130
2131 /* replicate result */
2132 for (i = 0; i < lasti + 1; i++) {
2133 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2134 continue;
2135
2136 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2137 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2138
2139 alu.src[0].sel = ctx->temp_reg;
2140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2141 if (i == lasti)
2142 alu.last = 1;
2143 r = r600_bytecode_add_alu(ctx->bc, &alu);
2144 if (r)
2145 return r;
2146 }
2147 return 0;
2148 }
2149
2150 static int tgsi_scs(struct r600_shader_ctx *ctx)
2151 {
2152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2153 struct r600_bytecode_alu alu;
2154 int i, r;
2155
2156 /* We'll only need the trig stuff if we are going to write to the
2157 * X or Y components of the destination vector.
2158 */
2159 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2160 r = tgsi_setup_trig(ctx);
2161 if (r)
2162 return r;
2163 }
2164
2165 /* dst.x = COS */
2166 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2167 if (ctx->bc->chip_class == CAYMAN) {
2168 for (i = 0 ; i < 3; i++) {
2169 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2171 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2172
2173 if (i == 0)
2174 alu.dst.write = 1;
2175 else
2176 alu.dst.write = 0;
2177 alu.src[0].sel = ctx->temp_reg;
2178 alu.src[0].chan = 0;
2179 if (i == 2)
2180 alu.last = 1;
2181 r = r600_bytecode_add_alu(ctx->bc, &alu);
2182 if (r)
2183 return r;
2184 }
2185 } else {
2186 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2187 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2188 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2189
2190 alu.src[0].sel = ctx->temp_reg;
2191 alu.src[0].chan = 0;
2192 alu.last = 1;
2193 r = r600_bytecode_add_alu(ctx->bc, &alu);
2194 if (r)
2195 return r;
2196 }
2197 }
2198
2199 /* dst.y = SIN */
2200 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2201 if (ctx->bc->chip_class == CAYMAN) {
2202 for (i = 0 ; i < 3; i++) {
2203 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2204 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2205 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2206 if (i == 1)
2207 alu.dst.write = 1;
2208 else
2209 alu.dst.write = 0;
2210 alu.src[0].sel = ctx->temp_reg;
2211 alu.src[0].chan = 0;
2212 if (i == 2)
2213 alu.last = 1;
2214 r = r600_bytecode_add_alu(ctx->bc, &alu);
2215 if (r)
2216 return r;
2217 }
2218 } else {
2219 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2220 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2221 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2222
2223 alu.src[0].sel = ctx->temp_reg;
2224 alu.src[0].chan = 0;
2225 alu.last = 1;
2226 r = r600_bytecode_add_alu(ctx->bc, &alu);
2227 if (r)
2228 return r;
2229 }
2230 }
2231
2232 /* dst.z = 0.0; */
2233 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2234 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2235
2236 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2237
2238 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2239
2240 alu.src[0].sel = V_SQ_ALU_SRC_0;
2241 alu.src[0].chan = 0;
2242
2243 alu.last = 1;
2244
2245 r = r600_bytecode_add_alu(ctx->bc, &alu);
2246 if (r)
2247 return r;
2248 }
2249
2250 /* dst.w = 1.0; */
2251 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2252 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2253
2254 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2255
2256 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2257
2258 alu.src[0].sel = V_SQ_ALU_SRC_1;
2259 alu.src[0].chan = 0;
2260
2261 alu.last = 1;
2262
2263 r = r600_bytecode_add_alu(ctx->bc, &alu);
2264 if (r)
2265 return r;
2266 }
2267
2268 return 0;
2269 }
2270
2271 static int tgsi_kill(struct r600_shader_ctx *ctx)
2272 {
2273 struct r600_bytecode_alu alu;
2274 int i, r;
2275
2276 for (i = 0; i < 4; i++) {
2277 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2278 alu.inst = ctx->inst_info->r600_opcode;
2279
2280 alu.dst.chan = i;
2281
2282 alu.src[0].sel = V_SQ_ALU_SRC_0;
2283
2284 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2285 alu.src[1].sel = V_SQ_ALU_SRC_1;
2286 alu.src[1].neg = 1;
2287 } else {
2288 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2289 }
2290 if (i == 3) {
2291 alu.last = 1;
2292 }
2293 r = r600_bytecode_add_alu(ctx->bc, &alu);
2294 if (r)
2295 return r;
2296 }
2297
2298 /* kill must be last in ALU */
2299 ctx->bc->force_add_cf = 1;
2300 ctx->shader->uses_kill = TRUE;
2301 return 0;
2302 }
2303
2304 static int tgsi_lit(struct r600_shader_ctx *ctx)
2305 {
2306 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2307 struct r600_bytecode_alu alu;
2308 int r;
2309
2310 /* tmp.x = max(src.y, 0.0) */
2311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2313 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2314 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2315 alu.src[1].chan = 1;
2316
2317 alu.dst.sel = ctx->temp_reg;
2318 alu.dst.chan = 0;
2319 alu.dst.write = 1;
2320
2321 alu.last = 1;
2322 r = r600_bytecode_add_alu(ctx->bc, &alu);
2323 if (r)
2324 return r;
2325
2326 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2327 {
2328 int chan;
2329 int sel;
2330 int i;
2331
2332 if (ctx->bc->chip_class == CAYMAN) {
2333 for (i = 0; i < 3; i++) {
2334 /* tmp.z = log(tmp.x) */
2335 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2337 alu.src[0].sel = ctx->temp_reg;
2338 alu.src[0].chan = 0;
2339 alu.dst.sel = ctx->temp_reg;
2340 alu.dst.chan = i;
2341 if (i == 2) {
2342 alu.dst.write = 1;
2343 alu.last = 1;
2344 } else
2345 alu.dst.write = 0;
2346
2347 r = r600_bytecode_add_alu(ctx->bc, &alu);
2348 if (r)
2349 return r;
2350 }
2351 } else {
2352 /* tmp.z = log(tmp.x) */
2353 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2355 alu.src[0].sel = ctx->temp_reg;
2356 alu.src[0].chan = 0;
2357 alu.dst.sel = ctx->temp_reg;
2358 alu.dst.chan = 2;
2359 alu.dst.write = 1;
2360 alu.last = 1;
2361 r = r600_bytecode_add_alu(ctx->bc, &alu);
2362 if (r)
2363 return r;
2364 }
2365
2366 chan = alu.dst.chan;
2367 sel = alu.dst.sel;
2368
2369 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2370 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2371 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2372 alu.src[0].sel = sel;
2373 alu.src[0].chan = chan;
2374 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2375 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2376 alu.dst.sel = ctx->temp_reg;
2377 alu.dst.chan = 0;
2378 alu.dst.write = 1;
2379 alu.is_op3 = 1;
2380 alu.last = 1;
2381 r = r600_bytecode_add_alu(ctx->bc, &alu);
2382 if (r)
2383 return r;
2384
2385 if (ctx->bc->chip_class == CAYMAN) {
2386 for (i = 0; i < 3; i++) {
2387 /* dst.z = exp(tmp.x) */
2388 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2390 alu.src[0].sel = ctx->temp_reg;
2391 alu.src[0].chan = 0;
2392 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2393 if (i == 2) {
2394 alu.dst.write = 1;
2395 alu.last = 1;
2396 } else
2397 alu.dst.write = 0;
2398 r = r600_bytecode_add_alu(ctx->bc, &alu);
2399 if (r)
2400 return r;
2401 }
2402 } else {
2403 /* dst.z = exp(tmp.x) */
2404 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2406 alu.src[0].sel = ctx->temp_reg;
2407 alu.src[0].chan = 0;
2408 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2409 alu.last = 1;
2410 r = r600_bytecode_add_alu(ctx->bc, &alu);
2411 if (r)
2412 return r;
2413 }
2414 }
2415
2416 /* dst.x, <- 1.0 */
2417 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2419 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2420 alu.src[0].chan = 0;
2421 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2422 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2423 r = r600_bytecode_add_alu(ctx->bc, &alu);
2424 if (r)
2425 return r;
2426
2427 /* dst.y = max(src.x, 0.0) */
2428 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2429 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2430 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2431 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2432 alu.src[1].chan = 0;
2433 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2434 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2435 r = r600_bytecode_add_alu(ctx->bc, &alu);
2436 if (r)
2437 return r;
2438
2439 /* dst.w, <- 1.0 */
2440 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2442 alu.src[0].sel = V_SQ_ALU_SRC_1;
2443 alu.src[0].chan = 0;
2444 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2445 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2446 alu.last = 1;
2447 r = r600_bytecode_add_alu(ctx->bc, &alu);
2448 if (r)
2449 return r;
2450
2451 return 0;
2452 }
2453
2454 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2455 {
2456 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2457 struct r600_bytecode_alu alu;
2458 int i, r;
2459
2460 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2461
2462 /* XXX:
2463 * For state trackers other than OpenGL, we'll want to use
2464 * _RECIPSQRT_IEEE instead.
2465 */
2466 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2467
2468 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2469 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2470 r600_bytecode_src_set_abs(&alu.src[i]);
2471 }
2472 alu.dst.sel = ctx->temp_reg;
2473 alu.dst.write = 1;
2474 alu.last = 1;
2475 r = r600_bytecode_add_alu(ctx->bc, &alu);
2476 if (r)
2477 return r;
2478 /* replicate result */
2479 return tgsi_helper_tempx_replicate(ctx);
2480 }
2481
2482 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2483 {
2484 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2485 struct r600_bytecode_alu alu;
2486 int i, r;
2487
2488 for (i = 0; i < 4; i++) {
2489 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2490 alu.src[0].sel = ctx->temp_reg;
2491 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2492 alu.dst.chan = i;
2493 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2494 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2495 if (i == 3)
2496 alu.last = 1;
2497 r = r600_bytecode_add_alu(ctx->bc, &alu);
2498 if (r)
2499 return r;
2500 }
2501 return 0;
2502 }
2503
2504 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2505 {
2506 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2507 struct r600_bytecode_alu alu;
2508 int i, r;
2509
2510 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2511 alu.inst = ctx->inst_info->r600_opcode;
2512 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2513 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2514 }
2515 alu.dst.sel = ctx->temp_reg;
2516 alu.dst.write = 1;
2517 alu.last = 1;
2518 r = r600_bytecode_add_alu(ctx->bc, &alu);
2519 if (r)
2520 return r;
2521 /* replicate result */
2522 return tgsi_helper_tempx_replicate(ctx);
2523 }
2524
2525 static int cayman_pow(struct r600_shader_ctx *ctx)
2526 {
2527 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2528 int i, r;
2529 struct r600_bytecode_alu alu;
2530 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2531
2532 for (i = 0; i < 3; i++) {
2533 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2534 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2535 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2536 alu.dst.sel = ctx->temp_reg;
2537 alu.dst.chan = i;
2538 alu.dst.write = 1;
2539 if (i == 2)
2540 alu.last = 1;
2541 r = r600_bytecode_add_alu(ctx->bc, &alu);
2542 if (r)
2543 return r;
2544 }
2545
2546 /* b * LOG2(a) */
2547 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2549 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2550 alu.src[1].sel = ctx->temp_reg;
2551 alu.dst.sel = ctx->temp_reg;
2552 alu.dst.write = 1;
2553 alu.last = 1;
2554 r = r600_bytecode_add_alu(ctx->bc, &alu);
2555 if (r)
2556 return r;
2557
2558 for (i = 0; i < last_slot; i++) {
2559 /* POW(a,b) = EXP2(b * LOG2(a))*/
2560 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2562 alu.src[0].sel = ctx->temp_reg;
2563
2564 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2565 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2566 if (i == last_slot - 1)
2567 alu.last = 1;
2568 r = r600_bytecode_add_alu(ctx->bc, &alu);
2569 if (r)
2570 return r;
2571 }
2572 return 0;
2573 }
2574
2575 static int tgsi_pow(struct r600_shader_ctx *ctx)
2576 {
2577 struct r600_bytecode_alu alu;
2578 int r;
2579
2580 /* LOG2(a) */
2581 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2583 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2584 alu.dst.sel = ctx->temp_reg;
2585 alu.dst.write = 1;
2586 alu.last = 1;
2587 r = r600_bytecode_add_alu(ctx->bc, &alu);
2588 if (r)
2589 return r;
2590 /* b * LOG2(a) */
2591 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2593 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2594 alu.src[1].sel = ctx->temp_reg;
2595 alu.dst.sel = ctx->temp_reg;
2596 alu.dst.write = 1;
2597 alu.last = 1;
2598 r = r600_bytecode_add_alu(ctx->bc, &alu);
2599 if (r)
2600 return r;
2601 /* POW(a,b) = EXP2(b * LOG2(a))*/
2602 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2604 alu.src[0].sel = ctx->temp_reg;
2605 alu.dst.sel = ctx->temp_reg;
2606 alu.dst.write = 1;
2607 alu.last = 1;
2608 r = r600_bytecode_add_alu(ctx->bc, &alu);
2609 if (r)
2610 return r;
2611 return tgsi_helper_tempx_replicate(ctx);
2612 }
2613
2614 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2615 {
2616 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2617 struct r600_bytecode_alu alu;
2618 int i, r, j;
2619 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2620 int tmp0 = ctx->temp_reg;
2621 int tmp1 = r600_get_temp(ctx);
2622 int tmp2 = r600_get_temp(ctx);
2623 int tmp3 = r600_get_temp(ctx);
2624 /* Unsigned path:
2625 *
2626 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2627 *
2628 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2629 * 2. tmp0.z = lo (tmp0.x * src2)
2630 * 3. tmp0.w = -tmp0.z
2631 * 4. tmp0.y = hi (tmp0.x * src2)
2632 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2633 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2634 * 7. tmp1.x = tmp0.x - tmp0.w
2635 * 8. tmp1.y = tmp0.x + tmp0.w
2636 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2637 * 10. tmp0.z = hi(tmp0.x * src1) = q
2638 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2639 *
2640 * 12. tmp0.w = src1 - tmp0.y = r
2641 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2642 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2643 *
2644 * if DIV
2645 *
2646 * 15. tmp1.z = tmp0.z + 1 = q + 1
2647 * 16. tmp1.w = tmp0.z - 1 = q - 1
2648 *
2649 * else MOD
2650 *
2651 * 15. tmp1.z = tmp0.w - src2 = r - src2
2652 * 16. tmp1.w = tmp0.w + src2 = r + src2
2653 *
2654 * endif
2655 *
2656 * 17. tmp1.x = tmp1.x & tmp1.y
2657 *
2658 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2659 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2660 *
2661 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2662 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2663 *
2664 * Signed path:
2665 *
2666 * Same as unsigned, using abs values of the operands,
2667 * and fixing the sign of the result in the end.
2668 */
2669
2670 for (i = 0; i < 4; i++) {
2671 if (!(write_mask & (1<<i)))
2672 continue;
2673
2674 if (signed_op) {
2675
2676 /* tmp2.x = -src0 */
2677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2679
2680 alu.dst.sel = tmp2;
2681 alu.dst.chan = 0;
2682 alu.dst.write = 1;
2683
2684 alu.src[0].sel = V_SQ_ALU_SRC_0;
2685
2686 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2687
2688 alu.last = 1;
2689 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2690 return r;
2691
2692 /* tmp2.y = -src1 */
2693 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2694 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2695
2696 alu.dst.sel = tmp2;
2697 alu.dst.chan = 1;
2698 alu.dst.write = 1;
2699
2700 alu.src[0].sel = V_SQ_ALU_SRC_0;
2701
2702 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2703
2704 alu.last = 1;
2705 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2706 return r;
2707
2708 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2709 /* it will be a sign of the quotient */
2710 if (!mod) {
2711
2712 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2714
2715 alu.dst.sel = tmp2;
2716 alu.dst.chan = 2;
2717 alu.dst.write = 1;
2718
2719 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2720 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2721
2722 alu.last = 1;
2723 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2724 return r;
2725 }
2726
2727 /* tmp2.x = |src0| */
2728 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2729 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2730 alu.is_op3 = 1;
2731
2732 alu.dst.sel = tmp2;
2733 alu.dst.chan = 0;
2734 alu.dst.write = 1;
2735
2736 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2737 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2738 alu.src[2].sel = tmp2;
2739 alu.src[2].chan = 0;
2740
2741 alu.last = 1;
2742 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2743 return r;
2744
2745 /* tmp2.y = |src1| */
2746 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2747 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2748 alu.is_op3 = 1;
2749
2750 alu.dst.sel = tmp2;
2751 alu.dst.chan = 1;
2752 alu.dst.write = 1;
2753
2754 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2755 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2756 alu.src[2].sel = tmp2;
2757 alu.src[2].chan = 1;
2758
2759 alu.last = 1;
2760 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2761 return r;
2762
2763 }
2764
2765 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2766 if (ctx->bc->chip_class == CAYMAN) {
2767 /* tmp3.x = u2f(src2) */
2768 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2769 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2770
2771 alu.dst.sel = tmp3;
2772 alu.dst.chan = 0;
2773 alu.dst.write = 1;
2774
2775 if (signed_op) {
2776 alu.src[0].sel = tmp2;
2777 alu.src[0].chan = 1;
2778 } else {
2779 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2780 }
2781
2782 alu.last = 1;
2783 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2784 return r;
2785
2786 /* tmp0.x = recip(tmp3.x) */
2787 for (j = 0 ; j < 3; j++) {
2788 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2789 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2790
2791 alu.dst.sel = tmp0;
2792 alu.dst.chan = j;
2793 alu.dst.write = (j == 0);
2794
2795 alu.src[0].sel = tmp3;
2796 alu.src[0].chan = 0;
2797
2798 if (j == 2)
2799 alu.last = 1;
2800 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2801 return r;
2802 }
2803
2804 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2805 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2806
2807 alu.src[0].sel = tmp0;
2808 alu.src[0].chan = 0;
2809
2810 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2811 alu.src[1].value = 0x4f800000;
2812
2813 alu.dst.sel = tmp3;
2814 alu.dst.write = 1;
2815 alu.last = 1;
2816 r = r600_bytecode_add_alu(ctx->bc, &alu);
2817 if (r)
2818 return r;
2819
2820 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2822
2823 alu.dst.sel = tmp0;
2824 alu.dst.chan = 0;
2825 alu.dst.write = 1;
2826
2827 alu.src[0].sel = tmp3;
2828 alu.src[0].chan = 0;
2829
2830 alu.last = 1;
2831 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2832 return r;
2833
2834 } else {
2835 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2837
2838 alu.dst.sel = tmp0;
2839 alu.dst.chan = 0;
2840 alu.dst.write = 1;
2841
2842 if (signed_op) {
2843 alu.src[0].sel = tmp2;
2844 alu.src[0].chan = 1;
2845 } else {
2846 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2847 }
2848
2849 alu.last = 1;
2850 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2851 return r;
2852 }
2853
2854 /* 2. tmp0.z = lo (tmp0.x * src2) */
2855 if (ctx->bc->chip_class == CAYMAN) {
2856 for (j = 0 ; j < 4; j++) {
2857 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2858 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2859
2860 alu.dst.sel = tmp0;
2861 alu.dst.chan = j;
2862 alu.dst.write = (j == 2);
2863
2864 alu.src[0].sel = tmp0;
2865 alu.src[0].chan = 0;
2866 if (signed_op) {
2867 alu.src[1].sel = tmp2;
2868 alu.src[1].chan = 1;
2869 } else {
2870 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2871 }
2872
2873 alu.last = (j == 3);
2874 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2875 return r;
2876 }
2877 } else {
2878 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2879 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2880
2881 alu.dst.sel = tmp0;
2882 alu.dst.chan = 2;
2883 alu.dst.write = 1;
2884
2885 alu.src[0].sel = tmp0;
2886 alu.src[0].chan = 0;
2887 if (signed_op) {
2888 alu.src[1].sel = tmp2;
2889 alu.src[1].chan = 1;
2890 } else {
2891 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2892 }
2893
2894 alu.last = 1;
2895 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2896 return r;
2897 }
2898
2899 /* 3. tmp0.w = -tmp0.z */
2900 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2901 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2902
2903 alu.dst.sel = tmp0;
2904 alu.dst.chan = 3;
2905 alu.dst.write = 1;
2906
2907 alu.src[0].sel = V_SQ_ALU_SRC_0;
2908 alu.src[1].sel = tmp0;
2909 alu.src[1].chan = 2;
2910
2911 alu.last = 1;
2912 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2913 return r;
2914
2915 /* 4. tmp0.y = hi (tmp0.x * src2) */
2916 if (ctx->bc->chip_class == CAYMAN) {
2917 for (j = 0 ; j < 4; j++) {
2918 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2919 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2920
2921 alu.dst.sel = tmp0;
2922 alu.dst.chan = j;
2923 alu.dst.write = (j == 1);
2924
2925 alu.src[0].sel = tmp0;
2926 alu.src[0].chan = 0;
2927
2928 if (signed_op) {
2929 alu.src[1].sel = tmp2;
2930 alu.src[1].chan = 1;
2931 } else {
2932 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2933 }
2934 alu.last = (j == 3);
2935 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2936 return r;
2937 }
2938 } else {
2939 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2940 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2941
2942 alu.dst.sel = tmp0;
2943 alu.dst.chan = 1;
2944 alu.dst.write = 1;
2945
2946 alu.src[0].sel = tmp0;
2947 alu.src[0].chan = 0;
2948
2949 if (signed_op) {
2950 alu.src[1].sel = tmp2;
2951 alu.src[1].chan = 1;
2952 } else {
2953 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2954 }
2955
2956 alu.last = 1;
2957 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2958 return r;
2959 }
2960
2961 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2962 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2964 alu.is_op3 = 1;
2965
2966 alu.dst.sel = tmp0;
2967 alu.dst.chan = 2;
2968 alu.dst.write = 1;
2969
2970 alu.src[0].sel = tmp0;
2971 alu.src[0].chan = 1;
2972 alu.src[1].sel = tmp0;
2973 alu.src[1].chan = 3;
2974 alu.src[2].sel = tmp0;
2975 alu.src[2].chan = 2;
2976
2977 alu.last = 1;
2978 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2979 return r;
2980
2981 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2982 if (ctx->bc->chip_class == CAYMAN) {
2983 for (j = 0 ; j < 4; j++) {
2984 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2985 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2986
2987 alu.dst.sel = tmp0;
2988 alu.dst.chan = j;
2989 alu.dst.write = (j == 3);
2990
2991 alu.src[0].sel = tmp0;
2992 alu.src[0].chan = 2;
2993
2994 alu.src[1].sel = tmp0;
2995 alu.src[1].chan = 0;
2996
2997 alu.last = (j == 3);
2998 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2999 return r;
3000 }
3001 } else {
3002 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3003 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3004
3005 alu.dst.sel = tmp0;
3006 alu.dst.chan = 3;
3007 alu.dst.write = 1;
3008
3009 alu.src[0].sel = tmp0;
3010 alu.src[0].chan = 2;
3011
3012 alu.src[1].sel = tmp0;
3013 alu.src[1].chan = 0;
3014
3015 alu.last = 1;
3016 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3017 return r;
3018 }
3019
3020 /* 7. tmp1.x = tmp0.x - tmp0.w */
3021 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3022 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3023
3024 alu.dst.sel = tmp1;
3025 alu.dst.chan = 0;
3026 alu.dst.write = 1;
3027
3028 alu.src[0].sel = tmp0;
3029 alu.src[0].chan = 0;
3030 alu.src[1].sel = tmp0;
3031 alu.src[1].chan = 3;
3032
3033 alu.last = 1;
3034 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3035 return r;
3036
3037 /* 8. tmp1.y = tmp0.x + tmp0.w */
3038 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3040
3041 alu.dst.sel = tmp1;
3042 alu.dst.chan = 1;
3043 alu.dst.write = 1;
3044
3045 alu.src[0].sel = tmp0;
3046 alu.src[0].chan = 0;
3047 alu.src[1].sel = tmp0;
3048 alu.src[1].chan = 3;
3049
3050 alu.last = 1;
3051 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3052 return r;
3053
3054 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
3055 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3057 alu.is_op3 = 1;
3058
3059 alu.dst.sel = tmp0;
3060 alu.dst.chan = 0;
3061 alu.dst.write = 1;
3062
3063 alu.src[0].sel = tmp0;
3064 alu.src[0].chan = 1;
3065 alu.src[1].sel = tmp1;
3066 alu.src[1].chan = 1;
3067 alu.src[2].sel = tmp1;
3068 alu.src[2].chan = 0;
3069
3070 alu.last = 1;
3071 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3072 return r;
3073
3074 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3075 if (ctx->bc->chip_class == CAYMAN) {
3076 for (j = 0 ; j < 4; j++) {
3077 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3079
3080 alu.dst.sel = tmp0;
3081 alu.dst.chan = j;
3082 alu.dst.write = (j == 2);
3083
3084 alu.src[0].sel = tmp0;
3085 alu.src[0].chan = 0;
3086
3087 if (signed_op) {
3088 alu.src[1].sel = tmp2;
3089 alu.src[1].chan = 0;
3090 } else {
3091 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3092 }
3093
3094 alu.last = (j == 3);
3095 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3096 return r;
3097 }
3098 } else {
3099 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3101
3102 alu.dst.sel = tmp0;
3103 alu.dst.chan = 2;
3104 alu.dst.write = 1;
3105
3106 alu.src[0].sel = tmp0;
3107 alu.src[0].chan = 0;
3108
3109 if (signed_op) {
3110 alu.src[1].sel = tmp2;
3111 alu.src[1].chan = 0;
3112 } else {
3113 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3114 }
3115
3116 alu.last = 1;
3117 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3118 return r;
3119 }
3120
3121 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3122 if (ctx->bc->chip_class == CAYMAN) {
3123 for (j = 0 ; j < 4; j++) {
3124 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3125 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3126
3127 alu.dst.sel = tmp0;
3128 alu.dst.chan = j;
3129 alu.dst.write = (j == 1);
3130
3131 if (signed_op) {
3132 alu.src[0].sel = tmp2;
3133 alu.src[0].chan = 1;
3134 } else {
3135 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3136 }
3137
3138 alu.src[1].sel = tmp0;
3139 alu.src[1].chan = 2;
3140
3141 alu.last = (j == 3);
3142 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3143 return r;
3144 }
3145 } else {
3146 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3148
3149 alu.dst.sel = tmp0;
3150 alu.dst.chan = 1;
3151 alu.dst.write = 1;
3152
3153 if (signed_op) {
3154 alu.src[0].sel = tmp2;
3155 alu.src[0].chan = 1;
3156 } else {
3157 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3158 }
3159
3160 alu.src[1].sel = tmp0;
3161 alu.src[1].chan = 2;
3162
3163 alu.last = 1;
3164 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3165 return r;
3166 }
3167
3168 /* 12. tmp0.w = src1 - tmp0.y = r */
3169 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3171
3172 alu.dst.sel = tmp0;
3173 alu.dst.chan = 3;
3174 alu.dst.write = 1;
3175
3176 if (signed_op) {
3177 alu.src[0].sel = tmp2;
3178 alu.src[0].chan = 0;
3179 } else {
3180 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3181 }
3182
3183 alu.src[1].sel = tmp0;
3184 alu.src[1].chan = 1;
3185
3186 alu.last = 1;
3187 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3188 return r;
3189
3190 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3191 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3192 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3193
3194 alu.dst.sel = tmp1;
3195 alu.dst.chan = 0;
3196 alu.dst.write = 1;
3197
3198 alu.src[0].sel = tmp0;
3199 alu.src[0].chan = 3;
3200 if (signed_op) {
3201 alu.src[1].sel = tmp2;
3202 alu.src[1].chan = 1;
3203 } else {
3204 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3205 }
3206
3207 alu.last = 1;
3208 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3209 return r;
3210
3211 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3212 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3213 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3214
3215 alu.dst.sel = tmp1;
3216 alu.dst.chan = 1;
3217 alu.dst.write = 1;
3218
3219 if (signed_op) {
3220 alu.src[0].sel = tmp2;
3221 alu.src[0].chan = 0;
3222 } else {
3223 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3224 }
3225
3226 alu.src[1].sel = tmp0;
3227 alu.src[1].chan = 1;
3228
3229 alu.last = 1;
3230 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3231 return r;
3232
3233 if (mod) { /* UMOD */
3234
3235 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3236 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3238
3239 alu.dst.sel = tmp1;
3240 alu.dst.chan = 2;
3241 alu.dst.write = 1;
3242
3243 alu.src[0].sel = tmp0;
3244 alu.src[0].chan = 3;
3245
3246 if (signed_op) {
3247 alu.src[1].sel = tmp2;
3248 alu.src[1].chan = 1;
3249 } else {
3250 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3251 }
3252
3253 alu.last = 1;
3254 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3255 return r;
3256
3257 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3258 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3260
3261 alu.dst.sel = tmp1;
3262 alu.dst.chan = 3;
3263 alu.dst.write = 1;
3264
3265 alu.src[0].sel = tmp0;
3266 alu.src[0].chan = 3;
3267 if (signed_op) {
3268 alu.src[1].sel = tmp2;
3269 alu.src[1].chan = 1;
3270 } else {
3271 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3272 }
3273
3274 alu.last = 1;
3275 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3276 return r;
3277
3278 } else { /* UDIV */
3279
3280 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3281 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3282 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3283
3284 alu.dst.sel = tmp1;
3285 alu.dst.chan = 2;
3286 alu.dst.write = 1;
3287
3288 alu.src[0].sel = tmp0;
3289 alu.src[0].chan = 2;
3290 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3291
3292 alu.last = 1;
3293 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3294 return r;
3295
3296 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3297 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3299
3300 alu.dst.sel = tmp1;
3301 alu.dst.chan = 3;
3302 alu.dst.write = 1;
3303
3304 alu.src[0].sel = tmp0;
3305 alu.src[0].chan = 2;
3306 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3307
3308 alu.last = 1;
3309 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3310 return r;
3311
3312 }
3313
3314 /* 17. tmp1.x = tmp1.x & tmp1.y */
3315 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3316 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3317
3318 alu.dst.sel = tmp1;
3319 alu.dst.chan = 0;
3320 alu.dst.write = 1;
3321
3322 alu.src[0].sel = tmp1;
3323 alu.src[0].chan = 0;
3324 alu.src[1].sel = tmp1;
3325 alu.src[1].chan = 1;
3326
3327 alu.last = 1;
3328 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3329 return r;
3330
3331 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3332 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3333 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3334 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3335 alu.is_op3 = 1;
3336
3337 alu.dst.sel = tmp0;
3338 alu.dst.chan = 2;
3339 alu.dst.write = 1;
3340
3341 alu.src[0].sel = tmp1;
3342 alu.src[0].chan = 0;
3343 alu.src[1].sel = tmp0;
3344 alu.src[1].chan = mod ? 3 : 2;
3345 alu.src[2].sel = tmp1;
3346 alu.src[2].chan = 2;
3347
3348 alu.last = 1;
3349 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3350 return r;
3351
3352 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3353 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3355 alu.is_op3 = 1;
3356
3357 if (signed_op) {
3358 alu.dst.sel = tmp0;
3359 alu.dst.chan = 2;
3360 alu.dst.write = 1;
3361 } else {
3362 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3363 }
3364
3365 alu.src[0].sel = tmp1;
3366 alu.src[0].chan = 1;
3367 alu.src[1].sel = tmp1;
3368 alu.src[1].chan = 3;
3369 alu.src[2].sel = tmp0;
3370 alu.src[2].chan = 2;
3371
3372 alu.last = 1;
3373 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3374 return r;
3375
3376 if (signed_op) {
3377
3378 /* fix the sign of the result */
3379
3380 if (mod) {
3381
3382 /* tmp0.x = -tmp0.z */
3383 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3384 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3385
3386 alu.dst.sel = tmp0;
3387 alu.dst.chan = 0;
3388 alu.dst.write = 1;
3389
3390 alu.src[0].sel = V_SQ_ALU_SRC_0;
3391 alu.src[1].sel = tmp0;
3392 alu.src[1].chan = 2;
3393
3394 alu.last = 1;
3395 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3396 return r;
3397
3398 /* sign of the remainder is the same as the sign of src0 */
3399 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3402 alu.is_op3 = 1;
3403
3404 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3405
3406 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3407 alu.src[1].sel = tmp0;
3408 alu.src[1].chan = 2;
3409 alu.src[2].sel = tmp0;
3410 alu.src[2].chan = 0;
3411
3412 alu.last = 1;
3413 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3414 return r;
3415
3416 } else {
3417
3418 /* tmp0.x = -tmp0.z */
3419 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3420 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3421
3422 alu.dst.sel = tmp0;
3423 alu.dst.chan = 0;
3424 alu.dst.write = 1;
3425
3426 alu.src[0].sel = V_SQ_ALU_SRC_0;
3427 alu.src[1].sel = tmp0;
3428 alu.src[1].chan = 2;
3429
3430 alu.last = 1;
3431 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3432 return r;
3433
3434 /* fix the quotient sign (same as the sign of src0*src1) */
3435 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3436 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3438 alu.is_op3 = 1;
3439
3440 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3441
3442 alu.src[0].sel = tmp2;
3443 alu.src[0].chan = 2;
3444 alu.src[1].sel = tmp0;
3445 alu.src[1].chan = 2;
3446 alu.src[2].sel = tmp0;
3447 alu.src[2].chan = 0;
3448
3449 alu.last = 1;
3450 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3451 return r;
3452 }
3453 }
3454 }
3455 return 0;
3456 }
3457
3458 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3459 {
3460 return tgsi_divmod(ctx, 0, 0);
3461 }
3462
3463 static int tgsi_umod(struct r600_shader_ctx *ctx)
3464 {
3465 return tgsi_divmod(ctx, 1, 0);
3466 }
3467
3468 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3469 {
3470 return tgsi_divmod(ctx, 0, 1);
3471 }
3472
3473 static int tgsi_imod(struct r600_shader_ctx *ctx)
3474 {
3475 return tgsi_divmod(ctx, 1, 1);
3476 }
3477
3478
3479 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3480 {
3481 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3482 struct r600_bytecode_alu alu;
3483 int i, r;
3484 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3485 int last_inst = tgsi_last_instruction(write_mask);
3486
3487 for (i = 0; i < 4; i++) {
3488 if (!(write_mask & (1<<i)))
3489 continue;
3490
3491 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3493
3494 alu.dst.sel = ctx->temp_reg;
3495 alu.dst.chan = i;
3496 alu.dst.write = 1;
3497
3498 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3499 if (i == last_inst)
3500 alu.last = 1;
3501 r = r600_bytecode_add_alu(ctx->bc, &alu);
3502 if (r)
3503 return r;
3504 }
3505
3506 for (i = 0; i < 4; i++) {
3507 if (!(write_mask & (1<<i)))
3508 continue;
3509
3510 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3511 alu.inst = ctx->inst_info->r600_opcode;
3512
3513 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3514
3515 alu.src[0].sel = ctx->temp_reg;
3516 alu.src[0].chan = i;
3517
3518 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3519 alu.last = 1;
3520 r = r600_bytecode_add_alu(ctx->bc, &alu);
3521 if (r)
3522 return r;
3523 }
3524
3525 return 0;
3526 }
3527
3528 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3529 {
3530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3531 struct r600_bytecode_alu alu;
3532 int i, r;
3533 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3534 int last_inst = tgsi_last_instruction(write_mask);
3535
3536 /* tmp = -src */
3537 for (i = 0; i < 4; i++) {
3538 if (!(write_mask & (1<<i)))
3539 continue;
3540
3541 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3543
3544 alu.dst.sel = ctx->temp_reg;
3545 alu.dst.chan = i;
3546 alu.dst.write = 1;
3547
3548 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3549 alu.src[0].sel = V_SQ_ALU_SRC_0;
3550
3551 if (i == last_inst)
3552 alu.last = 1;
3553 r = r600_bytecode_add_alu(ctx->bc, &alu);
3554 if (r)
3555 return r;
3556 }
3557
3558 /* dst = (src >= 0 ? src : tmp) */
3559 for (i = 0; i < 4; i++) {
3560 if (!(write_mask & (1<<i)))
3561 continue;
3562
3563 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3564 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3565 alu.is_op3 = 1;
3566 alu.dst.write = 1;
3567
3568 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3569
3570 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3571 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3572 alu.src[2].sel = ctx->temp_reg;
3573 alu.src[2].chan = i;
3574
3575 if (i == last_inst)
3576 alu.last = 1;
3577 r = r600_bytecode_add_alu(ctx->bc, &alu);
3578 if (r)
3579 return r;
3580 }
3581 return 0;
3582 }
3583
3584 static int tgsi_issg(struct r600_shader_ctx *ctx)
3585 {
3586 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3587 struct r600_bytecode_alu alu;
3588 int i, r;
3589 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3590 int last_inst = tgsi_last_instruction(write_mask);
3591
3592 /* tmp = (src >= 0 ? src : -1) */
3593 for (i = 0; i < 4; i++) {
3594 if (!(write_mask & (1<<i)))
3595 continue;
3596
3597 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3598 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3599 alu.is_op3 = 1;
3600
3601 alu.dst.sel = ctx->temp_reg;
3602 alu.dst.chan = i;
3603 alu.dst.write = 1;
3604
3605 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3606 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3607 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3608
3609 if (i == last_inst)
3610 alu.last = 1;
3611 r = r600_bytecode_add_alu(ctx->bc, &alu);
3612 if (r)
3613 return r;
3614 }
3615
3616 /* dst = (tmp > 0 ? 1 : tmp) */
3617 for (i = 0; i < 4; i++) {
3618 if (!(write_mask & (1<<i)))
3619 continue;
3620
3621 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3622 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3623 alu.is_op3 = 1;
3624 alu.dst.write = 1;
3625
3626 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3627
3628 alu.src[0].sel = ctx->temp_reg;
3629 alu.src[0].chan = i;
3630
3631 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3632
3633 alu.src[2].sel = ctx->temp_reg;
3634 alu.src[2].chan = i;
3635
3636 if (i == last_inst)
3637 alu.last = 1;
3638 r = r600_bytecode_add_alu(ctx->bc, &alu);
3639 if (r)
3640 return r;
3641 }
3642 return 0;
3643 }
3644
3645
3646
3647 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3648 {
3649 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3650 struct r600_bytecode_alu alu;
3651 int i, r;
3652
3653 /* tmp = (src > 0 ? 1 : src) */
3654 for (i = 0; i < 4; i++) {
3655 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3657 alu.is_op3 = 1;
3658
3659 alu.dst.sel = ctx->temp_reg;
3660 alu.dst.chan = i;
3661
3662 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3663 alu.src[1].sel = V_SQ_ALU_SRC_1;
3664 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3665
3666 if (i == 3)
3667 alu.last = 1;
3668 r = r600_bytecode_add_alu(ctx->bc, &alu);
3669 if (r)
3670 return r;
3671 }
3672
3673 /* dst = (-tmp > 0 ? -1 : tmp) */
3674 for (i = 0; i < 4; i++) {
3675 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3676 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3677 alu.is_op3 = 1;
3678 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3679
3680 alu.src[0].sel = ctx->temp_reg;
3681 alu.src[0].chan = i;
3682 alu.src[0].neg = 1;
3683
3684 alu.src[1].sel = V_SQ_ALU_SRC_1;
3685 alu.src[1].neg = 1;
3686
3687 alu.src[2].sel = ctx->temp_reg;
3688 alu.src[2].chan = i;
3689
3690 if (i == 3)
3691 alu.last = 1;
3692 r = r600_bytecode_add_alu(ctx->bc, &alu);
3693 if (r)
3694 return r;
3695 }
3696 return 0;
3697 }
3698
3699 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3700 {
3701 struct r600_bytecode_alu alu;
3702 int i, r;
3703
3704 for (i = 0; i < 4; i++) {
3705 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3706 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3707 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3708 alu.dst.chan = i;
3709 } else {
3710 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3711 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3712 alu.src[0].sel = ctx->temp_reg;
3713 alu.src[0].chan = i;
3714 }
3715 if (i == 3) {
3716 alu.last = 1;
3717 }
3718 r = r600_bytecode_add_alu(ctx->bc, &alu);
3719 if (r)
3720 return r;
3721 }
3722 return 0;
3723 }
3724
3725 static int tgsi_op3(struct r600_shader_ctx *ctx)
3726 {
3727 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3728 struct r600_bytecode_alu alu;
3729 int i, j, r;
3730 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3731
3732 for (i = 0; i < lasti + 1; i++) {
3733 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3734 continue;
3735
3736 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3737 alu.inst = ctx->inst_info->r600_opcode;
3738 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3739 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3740 }
3741
3742 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3743 alu.dst.chan = i;
3744 alu.dst.write = 1;
3745 alu.is_op3 = 1;
3746 if (i == lasti) {
3747 alu.last = 1;
3748 }
3749 r = r600_bytecode_add_alu(ctx->bc, &alu);
3750 if (r)
3751 return r;
3752 }
3753 return 0;
3754 }
3755
3756 static int tgsi_dp(struct r600_shader_ctx *ctx)
3757 {
3758 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3759 struct r600_bytecode_alu alu;
3760 int i, j, r;
3761
3762 for (i = 0; i < 4; i++) {
3763 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3764 alu.inst = ctx->inst_info->r600_opcode;
3765 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3766 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3767 }
3768
3769 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3770 alu.dst.chan = i;
3771 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3772 /* handle some special cases */
3773 switch (ctx->inst_info->tgsi_opcode) {
3774 case TGSI_OPCODE_DP2:
3775 if (i > 1) {
3776 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3777 alu.src[0].chan = alu.src[1].chan = 0;
3778 }
3779 break;
3780 case TGSI_OPCODE_DP3:
3781 if (i > 2) {
3782 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3783 alu.src[0].chan = alu.src[1].chan = 0;
3784 }
3785 break;
3786 case TGSI_OPCODE_DPH:
3787 if (i == 3) {
3788 alu.src[0].sel = V_SQ_ALU_SRC_1;
3789 alu.src[0].chan = 0;
3790 alu.src[0].neg = 0;
3791 }
3792 break;
3793 default:
3794 break;
3795 }
3796 if (i == 3) {
3797 alu.last = 1;
3798 }
3799 r = r600_bytecode_add_alu(ctx->bc, &alu);
3800 if (r)
3801 return r;
3802 }
3803 return 0;
3804 }
3805
3806 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3807 unsigned index)
3808 {
3809 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3810 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3811 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3812 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3813 ctx->src[index].neg || ctx->src[index].abs;
3814 }
3815
3816 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3817 unsigned index)
3818 {
3819 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3820 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3821 }
3822
3823 static int tgsi_tex(struct r600_shader_ctx *ctx)
3824 {
3825 static float one_point_five = 1.5f;
3826 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3827 struct r600_bytecode_tex tex;
3828 struct r600_bytecode_alu alu;
3829 unsigned src_gpr;
3830 int r, i, j;
3831 int opcode;
3832 bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
3833 inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
3834 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
3835 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
3836 /* Texture fetch instructions can only use gprs as source.
3837 * Also they cannot negate the source or take the absolute value */
3838 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3839 tgsi_tex_src_requires_loading(ctx, 0)) ||
3840 read_compressed_msaa;
3841 boolean src_loaded = FALSE;
3842 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3843 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3844
3845 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3846
3847 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3848 /* get offset values */
3849 if (inst->Texture.NumOffsets) {
3850 assert(inst->Texture.NumOffsets == 1);
3851
3852 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3853 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3854 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3855 }
3856 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3857 /* TGSI moves the sampler to src reg 3 for TXD */
3858 sampler_src_reg = 3;
3859
3860 for (i = 1; i < 3; i++) {
3861 /* set gradients h/v */
3862 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3863 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3864 SQ_TEX_INST_SET_GRADIENTS_V;
3865 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3866 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3867
3868 if (tgsi_tex_src_requires_loading(ctx, i)) {
3869 tex.src_gpr = r600_get_temp(ctx);
3870 tex.src_sel_x = 0;
3871 tex.src_sel_y = 1;
3872 tex.src_sel_z = 2;
3873 tex.src_sel_w = 3;
3874
3875 for (j = 0; j < 4; j++) {
3876 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3878 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3879 alu.dst.sel = tex.src_gpr;
3880 alu.dst.chan = j;
3881 if (j == 3)
3882 alu.last = 1;
3883 alu.dst.write = 1;
3884 r = r600_bytecode_add_alu(ctx->bc, &alu);
3885 if (r)
3886 return r;
3887 }
3888
3889 } else {
3890 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3891 tex.src_sel_x = ctx->src[i].swizzle[0];
3892 tex.src_sel_y = ctx->src[i].swizzle[1];
3893 tex.src_sel_z = ctx->src[i].swizzle[2];
3894 tex.src_sel_w = ctx->src[i].swizzle[3];
3895 tex.src_rel = ctx->src[i].rel;
3896 }
3897 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3898 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3899 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3900 tex.coord_type_x = 1;
3901 tex.coord_type_y = 1;
3902 tex.coord_type_z = 1;
3903 tex.coord_type_w = 1;
3904 }
3905 r = r600_bytecode_add_tex(ctx->bc, &tex);
3906 if (r)
3907 return r;
3908 }
3909 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3910 int out_chan;
3911 /* Add perspective divide */
3912 if (ctx->bc->chip_class == CAYMAN) {
3913 out_chan = 2;
3914 for (i = 0; i < 3; i++) {
3915 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3916 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3917 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3918
3919 alu.dst.sel = ctx->temp_reg;
3920 alu.dst.chan = i;
3921 if (i == 2)
3922 alu.last = 1;
3923 if (out_chan == i)
3924 alu.dst.write = 1;
3925 r = r600_bytecode_add_alu(ctx->bc, &alu);
3926 if (r)
3927 return r;
3928 }
3929
3930 } else {
3931 out_chan = 3;
3932 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3933 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3934 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3935
3936 alu.dst.sel = ctx->temp_reg;
3937 alu.dst.chan = out_chan;
3938 alu.last = 1;
3939 alu.dst.write = 1;
3940 r = r600_bytecode_add_alu(ctx->bc, &alu);
3941 if (r)
3942 return r;
3943 }
3944
3945 for (i = 0; i < 3; i++) {
3946 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3947 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3948 alu.src[0].sel = ctx->temp_reg;
3949 alu.src[0].chan = out_chan;
3950 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3951 alu.dst.sel = ctx->temp_reg;
3952 alu.dst.chan = i;
3953 alu.dst.write = 1;
3954 r = r600_bytecode_add_alu(ctx->bc, &alu);
3955 if (r)
3956 return r;
3957 }
3958 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3959 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3960 alu.src[0].sel = V_SQ_ALU_SRC_1;
3961 alu.src[0].chan = 0;
3962 alu.dst.sel = ctx->temp_reg;
3963 alu.dst.chan = 3;
3964 alu.last = 1;
3965 alu.dst.write = 1;
3966 r = r600_bytecode_add_alu(ctx->bc, &alu);
3967 if (r)
3968 return r;
3969 src_loaded = TRUE;
3970 src_gpr = ctx->temp_reg;
3971 }
3972
3973 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3974 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3975 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3976 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3977
3978 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3979 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3980
3981 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3982 for (i = 0; i < 4; i++) {
3983 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3984 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3985 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3986 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3987 alu.dst.sel = ctx->temp_reg;
3988 alu.dst.chan = i;
3989 if (i == 3)
3990 alu.last = 1;
3991 alu.dst.write = 1;
3992 r = r600_bytecode_add_alu(ctx->bc, &alu);
3993 if (r)
3994 return r;
3995 }
3996
3997 /* tmp1.z = RCP_e(|tmp1.z|) */
3998 if (ctx->bc->chip_class == CAYMAN) {
3999 for (i = 0; i < 3; i++) {
4000 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4001 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4002 alu.src[0].sel = ctx->temp_reg;
4003 alu.src[0].chan = 2;
4004 alu.src[0].abs = 1;
4005 alu.dst.sel = ctx->temp_reg;
4006 alu.dst.chan = i;
4007 if (i == 2)
4008 alu.dst.write = 1;
4009 if (i == 2)
4010 alu.last = 1;
4011 r = r600_bytecode_add_alu(ctx->bc, &alu);
4012 if (r)
4013 return r;
4014 }
4015 } else {
4016 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4017 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4018 alu.src[0].sel = ctx->temp_reg;
4019 alu.src[0].chan = 2;
4020 alu.src[0].abs = 1;
4021 alu.dst.sel = ctx->temp_reg;
4022 alu.dst.chan = 2;
4023 alu.dst.write = 1;
4024 alu.last = 1;
4025 r = r600_bytecode_add_alu(ctx->bc, &alu);
4026 if (r)
4027 return r;
4028 }
4029
4030 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4031 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4032 * muladd has no writemask, have to use another temp
4033 */
4034 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4035 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4036 alu.is_op3 = 1;
4037
4038 alu.src[0].sel = ctx->temp_reg;
4039 alu.src[0].chan = 0;
4040 alu.src[1].sel = ctx->temp_reg;
4041 alu.src[1].chan = 2;
4042
4043 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4044 alu.src[2].chan = 0;
4045 alu.src[2].value = *(uint32_t *)&one_point_five;
4046
4047 alu.dst.sel = ctx->temp_reg;
4048 alu.dst.chan = 0;
4049 alu.dst.write = 1;
4050
4051 r = r600_bytecode_add_alu(ctx->bc, &alu);
4052 if (r)
4053 return r;
4054
4055 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4056 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4057 alu.is_op3 = 1;
4058
4059 alu.src[0].sel = ctx->temp_reg;
4060 alu.src[0].chan = 1;
4061 alu.src[1].sel = ctx->temp_reg;
4062 alu.src[1].chan = 2;
4063
4064 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
4065 alu.src[2].chan = 0;
4066 alu.src[2].value = *(uint32_t *)&one_point_five;
4067
4068 alu.dst.sel = ctx->temp_reg;
4069 alu.dst.chan = 1;
4070 alu.dst.write = 1;
4071
4072 alu.last = 1;
4073 r = r600_bytecode_add_alu(ctx->bc, &alu);
4074 if (r)
4075 return r;
4076 /* write initial W value into Z component */
4077 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4078 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4079 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4080 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4081 alu.dst.sel = ctx->temp_reg;
4082 alu.dst.chan = 2;
4083 alu.dst.write = 1;
4084 alu.last = 1;
4085 r = r600_bytecode_add_alu(ctx->bc, &alu);
4086 if (r)
4087 return r;
4088 }
4089 src_loaded = TRUE;
4090 src_gpr = ctx->temp_reg;
4091 }
4092
4093 if (src_requires_loading && !src_loaded) {
4094 for (i = 0; i < 4; i++) {
4095 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4096 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4097 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4098 alu.dst.sel = ctx->temp_reg;
4099 alu.dst.chan = i;
4100 if (i == 3)
4101 alu.last = 1;
4102 alu.dst.write = 1;
4103 r = r600_bytecode_add_alu(ctx->bc, &alu);
4104 if (r)
4105 return r;
4106 }
4107 src_loaded = TRUE;
4108 src_gpr = ctx->temp_reg;
4109 }
4110
4111 /* Obtain the sample index for reading a compressed MSAA color texture.
4112 * To read the FMASK, we use the ldfptr instruction, which tells us
4113 * where the samples are stored.
4114 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
4115 * which is the identity mapping. Each nibble says which physical sample
4116 * should be fetched to get that sample.
4117 *
4118 * Assume src.z contains the sample index. It should be modified like this:
4119 * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
4120 * Then fetch the texel with src.
4121 */
4122 if (read_compressed_msaa) {
4123 unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
4124 unsigned temp = r600_get_temp(ctx);
4125 assert(src_loaded);
4126
4127 /* temp.w = ldfptr() */
4128 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4129 tex.inst = SQ_TEX_INST_LD;
4130 tex.inst_mod = 1; /* to indicate this is ldfptr */
4131 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4132 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4133 tex.src_gpr = src_gpr;
4134 tex.dst_gpr = temp;
4135 tex.dst_sel_x = 7; /* mask out these components */
4136 tex.dst_sel_y = 7;
4137 tex.dst_sel_z = 7;
4138 tex.dst_sel_w = 0; /* store X */
4139 tex.src_sel_x = 0;
4140 tex.src_sel_y = 1;
4141 tex.src_sel_z = 2;
4142 tex.src_sel_w = 3;
4143 tex.offset_x = offset_x;
4144 tex.offset_y = offset_y;
4145 tex.offset_z = offset_z;
4146 r = r600_bytecode_add_tex(ctx->bc, &tex);
4147 if (r)
4148 return r;
4149
4150 /* temp.x = sample_index*4 */
4151 if (ctx->bc->chip_class == CAYMAN) {
4152 for (i = 0 ; i < 4; i++) {
4153 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4154 alu.inst = ctx->inst_info->r600_opcode;
4155 alu.src[0].sel = src_gpr;
4156 alu.src[0].chan = sample_chan;
4157 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4158 alu.src[1].value = 4;
4159 alu.dst.sel = temp;
4160 alu.dst.chan = i;
4161 alu.dst.write = i == 0;
4162 if (i == 3)
4163 alu.last = 1;
4164 r = r600_bytecode_add_alu(ctx->bc, &alu);
4165 if (r)
4166 return r;
4167 }
4168 } else {
4169 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
4171 alu.src[0].sel = src_gpr;
4172 alu.src[0].chan = sample_chan;
4173 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4174 alu.src[1].value = 4;
4175 alu.dst.sel = temp;
4176 alu.dst.chan = 0;
4177 alu.dst.write = 1;
4178 alu.last = 1;
4179 r = r600_bytecode_add_alu(ctx->bc, &alu);
4180 if (r)
4181 return r;
4182 }
4183
4184 /* sample_index = temp.w >> temp.x */
4185 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4186 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
4187 alu.src[0].sel = temp;
4188 alu.src[0].chan = 3;
4189 alu.src[1].sel = temp;
4190 alu.src[1].chan = 0;
4191 alu.dst.sel = src_gpr;
4192 alu.dst.chan = sample_chan;
4193 alu.dst.write = 1;
4194 alu.last = 1;
4195 r = r600_bytecode_add_alu(ctx->bc, &alu);
4196 if (r)
4197 return r;
4198
4199 /* sample_index & 0xF */
4200 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
4202 alu.src[0].sel = src_gpr;
4203 alu.src[0].chan = sample_chan;
4204 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4205 alu.src[1].value = 0xF;
4206 alu.dst.sel = src_gpr;
4207 alu.dst.chan = sample_chan;
4208 alu.dst.write = 1;
4209 alu.last = 1;
4210 r = r600_bytecode_add_alu(ctx->bc, &alu);
4211 if (r)
4212 return r;
4213 #if 0
4214 /* visualize the FMASK */
4215 for (i = 0; i < 4; i++) {
4216 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
4218 alu.src[0].sel = src_gpr;
4219 alu.src[0].chan = sample_chan;
4220 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4221 alu.dst.chan = i;
4222 alu.dst.write = 1;
4223 alu.last = 1;
4224 r = r600_bytecode_add_alu(ctx->bc, &alu);
4225 if (r)
4226 return r;
4227 }
4228 return 0;
4229 #endif
4230 }
4231
4232 opcode = ctx->inst_info->r600_opcode;
4233 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4234 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4235 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4236 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4237 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4238 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4239 switch (opcode) {
4240 case SQ_TEX_INST_SAMPLE:
4241 opcode = SQ_TEX_INST_SAMPLE_C;
4242 break;
4243 case SQ_TEX_INST_SAMPLE_L:
4244 opcode = SQ_TEX_INST_SAMPLE_C_L;
4245 break;
4246 case SQ_TEX_INST_SAMPLE_LB:
4247 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4248 break;
4249 case SQ_TEX_INST_SAMPLE_G:
4250 opcode = SQ_TEX_INST_SAMPLE_C_G;
4251 break;
4252 }
4253 }
4254
4255 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4256 tex.inst = opcode;
4257
4258 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4259 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4260 tex.src_gpr = src_gpr;
4261 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4262 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4263 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4264 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4265 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4266
4267 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4268 tex.src_sel_x = 4;
4269 tex.src_sel_y = 4;
4270 tex.src_sel_z = 4;
4271 tex.src_sel_w = 4;
4272 } else if (src_loaded) {
4273 tex.src_sel_x = 0;
4274 tex.src_sel_y = 1;
4275 tex.src_sel_z = 2;
4276 tex.src_sel_w = 3;
4277 } else {
4278 tex.src_sel_x = ctx->src[0].swizzle[0];
4279 tex.src_sel_y = ctx->src[0].swizzle[1];
4280 tex.src_sel_z = ctx->src[0].swizzle[2];
4281 tex.src_sel_w = ctx->src[0].swizzle[3];
4282 tex.src_rel = ctx->src[0].rel;
4283 }
4284
4285 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4286 tex.src_sel_x = 1;
4287 tex.src_sel_y = 0;
4288 tex.src_sel_z = 3;
4289 tex.src_sel_w = 1;
4290 }
4291 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4292 tex.src_sel_x = 1;
4293 tex.src_sel_y = 0;
4294 tex.src_sel_z = 3;
4295 tex.src_sel_w = 2; /* route Z compare value into W */
4296 }
4297
4298 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4299 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4300 tex.coord_type_x = 1;
4301 tex.coord_type_y = 1;
4302 }
4303 tex.coord_type_z = 1;
4304 tex.coord_type_w = 1;
4305
4306 tex.offset_x = offset_x;
4307 tex.offset_y = offset_y;
4308 tex.offset_z = offset_z;
4309
4310 /* Put the depth for comparison in W.
4311 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4312 * Some instructions expect the depth in Z. */
4313 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4314 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4315 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4316 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4317 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4318 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4319 tex.src_sel_w = tex.src_sel_z;
4320 }
4321
4322 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4323 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4324 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4325 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4326 /* the array index is read from Y */
4327 tex.coord_type_y = 0;
4328 } else {
4329 /* the array index is read from Z */
4330 tex.coord_type_z = 0;
4331 tex.src_sel_z = tex.src_sel_y;
4332 }
4333 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4334 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4335 /* the array index is read from Z */
4336 tex.coord_type_z = 0;
4337
4338 r = r600_bytecode_add_tex(ctx->bc, &tex);
4339 if (r)
4340 return r;
4341
4342 /* add shadow ambient support - gallium doesn't do it yet */
4343 return 0;
4344 }
4345
4346 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4347 {
4348 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4349 struct r600_bytecode_alu alu;
4350 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4351 unsigned i;
4352 int r;
4353
4354 /* optimize if it's just an equal balance */
4355 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4356 for (i = 0; i < lasti + 1; i++) {
4357 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4358 continue;
4359
4360 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4361 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4362 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4363 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4364 alu.omod = 3;
4365 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4366 alu.dst.chan = i;
4367 if (i == lasti) {
4368 alu.last = 1;
4369 }
4370 r = r600_bytecode_add_alu(ctx->bc, &alu);
4371 if (r)
4372 return r;
4373 }
4374 return 0;
4375 }
4376
4377 /* 1 - src0 */
4378 for (i = 0; i < lasti + 1; i++) {
4379 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4380 continue;
4381
4382 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4384 alu.src[0].sel = V_SQ_ALU_SRC_1;
4385 alu.src[0].chan = 0;
4386 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4387 r600_bytecode_src_toggle_neg(&alu.src[1]);
4388 alu.dst.sel = ctx->temp_reg;
4389 alu.dst.chan = i;
4390 if (i == lasti) {
4391 alu.last = 1;
4392 }
4393 alu.dst.write = 1;
4394 r = r600_bytecode_add_alu(ctx->bc, &alu);
4395 if (r)
4396 return r;
4397 }
4398
4399 /* (1 - src0) * src2 */
4400 for (i = 0; i < lasti + 1; i++) {
4401 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4402 continue;
4403
4404 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4406 alu.src[0].sel = ctx->temp_reg;
4407 alu.src[0].chan = i;
4408 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4409 alu.dst.sel = ctx->temp_reg;
4410 alu.dst.chan = i;
4411 if (i == lasti) {
4412 alu.last = 1;
4413 }
4414 alu.dst.write = 1;
4415 r = r600_bytecode_add_alu(ctx->bc, &alu);
4416 if (r)
4417 return r;
4418 }
4419
4420 /* src0 * src1 + (1 - src0) * src2 */
4421 for (i = 0; i < lasti + 1; i++) {
4422 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4423 continue;
4424
4425 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4426 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4427 alu.is_op3 = 1;
4428 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4429 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4430 alu.src[2].sel = ctx->temp_reg;
4431 alu.src[2].chan = i;
4432
4433 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4434 alu.dst.chan = i;
4435 if (i == lasti) {
4436 alu.last = 1;
4437 }
4438 r = r600_bytecode_add_alu(ctx->bc, &alu);
4439 if (r)
4440 return r;
4441 }
4442 return 0;
4443 }
4444
4445 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4446 {
4447 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4448 struct r600_bytecode_alu alu;
4449 int i, r;
4450 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4451
4452 for (i = 0; i < lasti + 1; i++) {
4453 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4454 continue;
4455
4456 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4457 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4458 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4459 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4460 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4461 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4462 alu.dst.chan = i;
4463 alu.dst.write = 1;
4464 alu.is_op3 = 1;
4465 if (i == lasti)
4466 alu.last = 1;
4467 r = r600_bytecode_add_alu(ctx->bc, &alu);
4468 if (r)
4469 return r;
4470 }
4471 return 0;
4472 }
4473
4474 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4475 {
4476 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4477 static const unsigned int src0_swizzle[] = {2, 0, 1};
4478 static const unsigned int src1_swizzle[] = {1, 2, 0};
4479 struct r600_bytecode_alu alu;
4480 uint32_t use_temp = 0;
4481 int i, r;
4482
4483 if (inst->Dst[0].Register.WriteMask != 0xf)
4484 use_temp = 1;
4485
4486 for (i = 0; i < 4; i++) {
4487 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4488 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4489 if (i < 3) {
4490 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4491 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4492 } else {
4493 alu.src[0].sel = V_SQ_ALU_SRC_0;
4494 alu.src[0].chan = i;
4495 alu.src[1].sel = V_SQ_ALU_SRC_0;
4496 alu.src[1].chan = i;
4497 }
4498
4499 alu.dst.sel = ctx->temp_reg;
4500 alu.dst.chan = i;
4501 alu.dst.write = 1;
4502
4503 if (i == 3)
4504 alu.last = 1;
4505 r = r600_bytecode_add_alu(ctx->bc, &alu);
4506 if (r)
4507 return r;
4508 }
4509
4510 for (i = 0; i < 4; i++) {
4511 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4512 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4513
4514 if (i < 3) {
4515 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4516 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4517 } else {
4518 alu.src[0].sel = V_SQ_ALU_SRC_0;
4519 alu.src[0].chan = i;
4520 alu.src[1].sel = V_SQ_ALU_SRC_0;
4521 alu.src[1].chan = i;
4522 }
4523
4524 alu.src[2].sel = ctx->temp_reg;
4525 alu.src[2].neg = 1;
4526 alu.src[2].chan = i;
4527
4528 if (use_temp)
4529 alu.dst.sel = ctx->temp_reg;
4530 else
4531 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4532 alu.dst.chan = i;
4533 alu.dst.write = 1;
4534 alu.is_op3 = 1;
4535 if (i == 3)
4536 alu.last = 1;
4537 r = r600_bytecode_add_alu(ctx->bc, &alu);
4538 if (r)
4539 return r;
4540 }
4541 if (use_temp)
4542 return tgsi_helper_copy(ctx, inst);
4543 return 0;
4544 }
4545
4546 static int tgsi_exp(struct r600_shader_ctx *ctx)
4547 {
4548 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4549 struct r600_bytecode_alu alu;
4550 int r;
4551 int i;
4552
4553 /* result.x = 2^floor(src); */
4554 if (inst->Dst[0].Register.WriteMask & 1) {
4555 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4556
4557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4558 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4559
4560 alu.dst.sel = ctx->temp_reg;
4561 alu.dst.chan = 0;
4562 alu.dst.write = 1;
4563 alu.last = 1;
4564 r = r600_bytecode_add_alu(ctx->bc, &alu);
4565 if (r)
4566 return r;
4567
4568 if (ctx->bc->chip_class == CAYMAN) {
4569 for (i = 0; i < 3; i++) {
4570 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4571 alu.src[0].sel = ctx->temp_reg;
4572 alu.src[0].chan = 0;
4573
4574 alu.dst.sel = ctx->temp_reg;
4575 alu.dst.chan = i;
4576 alu.dst.write = i == 0;
4577 alu.last = i == 2;
4578 r = r600_bytecode_add_alu(ctx->bc, &alu);
4579 if (r)
4580 return r;
4581 }
4582 } else {
4583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4584 alu.src[0].sel = ctx->temp_reg;
4585 alu.src[0].chan = 0;
4586
4587 alu.dst.sel = ctx->temp_reg;
4588 alu.dst.chan = 0;
4589 alu.dst.write = 1;
4590 alu.last = 1;
4591 r = r600_bytecode_add_alu(ctx->bc, &alu);
4592 if (r)
4593 return r;
4594 }
4595 }
4596
4597 /* result.y = tmp - floor(tmp); */
4598 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4599 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4600
4601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4602 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4603
4604 alu.dst.sel = ctx->temp_reg;
4605 #if 0
4606 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4607 if (r)
4608 return r;
4609 #endif
4610 alu.dst.write = 1;
4611 alu.dst.chan = 1;
4612
4613 alu.last = 1;
4614
4615 r = r600_bytecode_add_alu(ctx->bc, &alu);
4616 if (r)
4617 return r;
4618 }
4619
4620 /* result.z = RoughApprox2ToX(tmp);*/
4621 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4622 if (ctx->bc->chip_class == CAYMAN) {
4623 for (i = 0; i < 3; i++) {
4624 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4625 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4626 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4627
4628 alu.dst.sel = ctx->temp_reg;
4629 alu.dst.chan = i;
4630 if (i == 2) {
4631 alu.dst.write = 1;
4632 alu.last = 1;
4633 }
4634
4635 r = r600_bytecode_add_alu(ctx->bc, &alu);
4636 if (r)
4637 return r;
4638 }
4639 } else {
4640 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4642 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4643
4644 alu.dst.sel = ctx->temp_reg;
4645 alu.dst.write = 1;
4646 alu.dst.chan = 2;
4647
4648 alu.last = 1;
4649
4650 r = r600_bytecode_add_alu(ctx->bc, &alu);
4651 if (r)
4652 return r;
4653 }
4654 }
4655
4656 /* result.w = 1.0;*/
4657 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4658 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4659
4660 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4661 alu.src[0].sel = V_SQ_ALU_SRC_1;
4662 alu.src[0].chan = 0;
4663
4664 alu.dst.sel = ctx->temp_reg;
4665 alu.dst.chan = 3;
4666 alu.dst.write = 1;
4667 alu.last = 1;
4668 r = r600_bytecode_add_alu(ctx->bc, &alu);
4669 if (r)
4670 return r;
4671 }
4672 return tgsi_helper_copy(ctx, inst);
4673 }
4674
4675 static int tgsi_log(struct r600_shader_ctx *ctx)
4676 {
4677 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4678 struct r600_bytecode_alu alu;
4679 int r;
4680 int i;
4681
4682 /* result.x = floor(log2(|src|)); */
4683 if (inst->Dst[0].Register.WriteMask & 1) {
4684 if (ctx->bc->chip_class == CAYMAN) {
4685 for (i = 0; i < 3; i++) {
4686 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4687
4688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4689 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4690 r600_bytecode_src_set_abs(&alu.src[0]);
4691
4692 alu.dst.sel = ctx->temp_reg;
4693 alu.dst.chan = i;
4694 if (i == 0)
4695 alu.dst.write = 1;
4696 if (i == 2)
4697 alu.last = 1;
4698 r = r600_bytecode_add_alu(ctx->bc, &alu);
4699 if (r)
4700 return r;
4701 }
4702
4703 } else {
4704 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4705
4706 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4707 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4708 r600_bytecode_src_set_abs(&alu.src[0]);
4709
4710 alu.dst.sel = ctx->temp_reg;
4711 alu.dst.chan = 0;
4712 alu.dst.write = 1;
4713 alu.last = 1;
4714 r = r600_bytecode_add_alu(ctx->bc, &alu);
4715 if (r)
4716 return r;
4717 }
4718
4719 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4720 alu.src[0].sel = ctx->temp_reg;
4721 alu.src[0].chan = 0;
4722
4723 alu.dst.sel = ctx->temp_reg;
4724 alu.dst.chan = 0;
4725 alu.dst.write = 1;
4726 alu.last = 1;
4727
4728 r = r600_bytecode_add_alu(ctx->bc, &alu);
4729 if (r)
4730 return r;
4731 }
4732
4733 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4734 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4735
4736 if (ctx->bc->chip_class == CAYMAN) {
4737 for (i = 0; i < 3; i++) {
4738 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4739
4740 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4741 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4742 r600_bytecode_src_set_abs(&alu.src[0]);
4743
4744 alu.dst.sel = ctx->temp_reg;
4745 alu.dst.chan = i;
4746 if (i == 1)
4747 alu.dst.write = 1;
4748 if (i == 2)
4749 alu.last = 1;
4750
4751 r = r600_bytecode_add_alu(ctx->bc, &alu);
4752 if (r)
4753 return r;
4754 }
4755 } else {
4756 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4757
4758 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4759 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4760 r600_bytecode_src_set_abs(&alu.src[0]);
4761
4762 alu.dst.sel = ctx->temp_reg;
4763 alu.dst.chan = 1;
4764 alu.dst.write = 1;
4765 alu.last = 1;
4766
4767 r = r600_bytecode_add_alu(ctx->bc, &alu);
4768 if (r)
4769 return r;
4770 }
4771
4772 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4773
4774 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4775 alu.src[0].sel = ctx->temp_reg;
4776 alu.src[0].chan = 1;
4777
4778 alu.dst.sel = ctx->temp_reg;
4779 alu.dst.chan = 1;
4780 alu.dst.write = 1;
4781 alu.last = 1;
4782
4783 r = r600_bytecode_add_alu(ctx->bc, &alu);
4784 if (r)
4785 return r;
4786
4787 if (ctx->bc->chip_class == CAYMAN) {
4788 for (i = 0; i < 3; i++) {
4789 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4790 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4791 alu.src[0].sel = ctx->temp_reg;
4792 alu.src[0].chan = 1;
4793
4794 alu.dst.sel = ctx->temp_reg;
4795 alu.dst.chan = i;
4796 if (i == 1)
4797 alu.dst.write = 1;
4798 if (i == 2)
4799 alu.last = 1;
4800
4801 r = r600_bytecode_add_alu(ctx->bc, &alu);
4802 if (r)
4803 return r;
4804 }
4805 } else {
4806 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4808 alu.src[0].sel = ctx->temp_reg;
4809 alu.src[0].chan = 1;
4810
4811 alu.dst.sel = ctx->temp_reg;
4812 alu.dst.chan = 1;
4813 alu.dst.write = 1;
4814 alu.last = 1;
4815
4816 r = r600_bytecode_add_alu(ctx->bc, &alu);
4817 if (r)
4818 return r;
4819 }
4820
4821 if (ctx->bc->chip_class == CAYMAN) {
4822 for (i = 0; i < 3; i++) {
4823 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4824 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4825 alu.src[0].sel = ctx->temp_reg;
4826 alu.src[0].chan = 1;
4827
4828 alu.dst.sel = ctx->temp_reg;
4829 alu.dst.chan = i;
4830 if (i == 1)
4831 alu.dst.write = 1;
4832 if (i == 2)
4833 alu.last = 1;
4834
4835 r = r600_bytecode_add_alu(ctx->bc, &alu);
4836 if (r)
4837 return r;
4838 }
4839 } else {
4840 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4842 alu.src[0].sel = ctx->temp_reg;
4843 alu.src[0].chan = 1;
4844
4845 alu.dst.sel = ctx->temp_reg;
4846 alu.dst.chan = 1;
4847 alu.dst.write = 1;
4848 alu.last = 1;
4849
4850 r = r600_bytecode_add_alu(ctx->bc, &alu);
4851 if (r)
4852 return r;
4853 }
4854
4855 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4856
4857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4858
4859 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4860 r600_bytecode_src_set_abs(&alu.src[0]);
4861
4862 alu.src[1].sel = ctx->temp_reg;
4863 alu.src[1].chan = 1;
4864
4865 alu.dst.sel = ctx->temp_reg;
4866 alu.dst.chan = 1;
4867 alu.dst.write = 1;
4868 alu.last = 1;
4869
4870 r = r600_bytecode_add_alu(ctx->bc, &alu);
4871 if (r)
4872 return r;
4873 }
4874
4875 /* result.z = log2(|src|);*/
4876 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4877 if (ctx->bc->chip_class == CAYMAN) {
4878 for (i = 0; i < 3; i++) {
4879 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4880
4881 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4882 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4883 r600_bytecode_src_set_abs(&alu.src[0]);
4884
4885 alu.dst.sel = ctx->temp_reg;
4886 if (i == 2)
4887 alu.dst.write = 1;
4888 alu.dst.chan = i;
4889 if (i == 2)
4890 alu.last = 1;
4891
4892 r = r600_bytecode_add_alu(ctx->bc, &alu);
4893 if (r)
4894 return r;
4895 }
4896 } else {
4897 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4898
4899 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4900 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4901 r600_bytecode_src_set_abs(&alu.src[0]);
4902
4903 alu.dst.sel = ctx->temp_reg;
4904 alu.dst.write = 1;
4905 alu.dst.chan = 2;
4906 alu.last = 1;
4907
4908 r = r600_bytecode_add_alu(ctx->bc, &alu);
4909 if (r)
4910 return r;
4911 }
4912 }
4913
4914 /* result.w = 1.0; */
4915 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4916 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4917
4918 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4919 alu.src[0].sel = V_SQ_ALU_SRC_1;
4920 alu.src[0].chan = 0;
4921
4922 alu.dst.sel = ctx->temp_reg;
4923 alu.dst.chan = 3;
4924 alu.dst.write = 1;
4925 alu.last = 1;
4926
4927 r = r600_bytecode_add_alu(ctx->bc, &alu);
4928 if (r)
4929 return r;
4930 }
4931
4932 return tgsi_helper_copy(ctx, inst);
4933 }
4934
4935 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4936 {
4937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4938 struct r600_bytecode_alu alu;
4939 int r;
4940
4941 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4942
4943 switch (inst->Instruction.Opcode) {
4944 case TGSI_OPCODE_ARL:
4945 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4946 break;
4947 case TGSI_OPCODE_ARR:
4948 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4949 break;
4950 case TGSI_OPCODE_UARL:
4951 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4952 break;
4953 default:
4954 assert(0);
4955 return -1;
4956 }
4957
4958 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4959 alu.last = 1;
4960 alu.dst.sel = ctx->bc->ar_reg;
4961 alu.dst.write = 1;
4962 r = r600_bytecode_add_alu(ctx->bc, &alu);
4963 if (r)
4964 return r;
4965
4966 ctx->bc->ar_loaded = 0;
4967 return 0;
4968 }
4969 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4970 {
4971 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4972 struct r600_bytecode_alu alu;
4973 int r;
4974
4975 switch (inst->Instruction.Opcode) {
4976 case TGSI_OPCODE_ARL:
4977 memset(&alu, 0, sizeof(alu));
4978 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4979 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4980 alu.dst.sel = ctx->bc->ar_reg;
4981 alu.dst.write = 1;
4982 alu.last = 1;
4983
4984 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4985 return r;
4986
4987 memset(&alu, 0, sizeof(alu));
4988 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4989 alu.src[0].sel = ctx->bc->ar_reg;
4990 alu.dst.sel = ctx->bc->ar_reg;
4991 alu.dst.write = 1;
4992 alu.last = 1;
4993
4994 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4995 return r;
4996 break;
4997 case TGSI_OPCODE_ARR:
4998 memset(&alu, 0, sizeof(alu));
4999 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
5000 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5001 alu.dst.sel = ctx->bc->ar_reg;
5002 alu.dst.write = 1;
5003 alu.last = 1;
5004
5005 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5006 return r;
5007 break;
5008 case TGSI_OPCODE_UARL:
5009 memset(&alu, 0, sizeof(alu));
5010 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
5011 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5012 alu.dst.sel = ctx->bc->ar_reg;
5013 alu.dst.write = 1;
5014 alu.last = 1;
5015
5016 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5017 return r;
5018 break;
5019 default:
5020 assert(0);
5021 return -1;
5022 }
5023
5024 ctx->bc->ar_loaded = 0;
5025 return 0;
5026 }
5027
5028 static int tgsi_opdst(struct r600_shader_ctx *ctx)
5029 {
5030 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5031 struct r600_bytecode_alu alu;
5032 int i, r = 0;
5033
5034 for (i = 0; i < 4; i++) {
5035 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5036
5037 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
5038 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5039
5040 if (i == 0 || i == 3) {
5041 alu.src[0].sel = V_SQ_ALU_SRC_1;
5042 } else {
5043 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
5044 }
5045
5046 if (i == 0 || i == 2) {
5047 alu.src[1].sel = V_SQ_ALU_SRC_1;
5048 } else {
5049 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5050 }
5051 if (i == 3)
5052 alu.last = 1;
5053 r = r600_bytecode_add_alu(ctx->bc, &alu);
5054 if (r)
5055 return r;
5056 }
5057 return 0;
5058 }
5059
5060 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
5061 {
5062 struct r600_bytecode_alu alu;
5063 int r;
5064
5065 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5066 alu.inst = opcode;
5067 alu.execute_mask = 1;
5068 alu.update_pred = 1;
5069
5070 alu.dst.sel = ctx->temp_reg;
5071 alu.dst.write = 1;
5072 alu.dst.chan = 0;
5073
5074 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5075 alu.src[1].sel = V_SQ_ALU_SRC_0;
5076 alu.src[1].chan = 0;
5077
5078 alu.last = 1;
5079
5080 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
5081 if (r)
5082 return r;
5083 return 0;
5084 }
5085
5086 static int pops(struct r600_shader_ctx *ctx, int pops)
5087 {
5088 unsigned force_pop = ctx->bc->force_add_cf;
5089
5090 if (!force_pop) {
5091 int alu_pop = 3;
5092 if (ctx->bc->cf_last) {
5093 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
5094 alu_pop = 0;
5095 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
5096 alu_pop = 1;
5097 }
5098 alu_pop += pops;
5099 if (alu_pop == 1) {
5100 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
5101 ctx->bc->force_add_cf = 1;
5102 } else if (alu_pop == 2) {
5103 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
5104 ctx->bc->force_add_cf = 1;
5105 } else {
5106 force_pop = 1;
5107 }
5108 }
5109
5110 if (force_pop) {
5111 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
5112 ctx->bc->cf_last->pop_count = pops;
5113 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
5114 }
5115
5116 return 0;
5117 }
5118
5119 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
5120 {
5121 switch(reason) {
5122 case FC_PUSH_VPM:
5123 ctx->bc->callstack[ctx->bc->call_sp].current--;
5124 break;
5125 case FC_PUSH_WQM:
5126 case FC_LOOP:
5127 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
5128 break;
5129 case FC_REP:
5130 /* TOODO : for 16 vp asic should -= 2; */
5131 ctx->bc->callstack[ctx->bc->call_sp].current --;
5132 break;
5133 }
5134 }
5135
5136 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
5137 {
5138 if (check_max_only) {
5139 int diff;
5140 switch (reason) {
5141 case FC_PUSH_VPM:
5142 diff = 1;
5143 break;
5144 case FC_PUSH_WQM:
5145 diff = 4;
5146 break;
5147 default:
5148 assert(0);
5149 diff = 0;
5150 }
5151 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
5152 ctx->bc->callstack[ctx->bc->call_sp].max) {
5153 ctx->bc->callstack[ctx->bc->call_sp].max =
5154 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
5155 }
5156 return;
5157 }
5158 switch (reason) {
5159 case FC_PUSH_VPM:
5160 ctx->bc->callstack[ctx->bc->call_sp].current++;
5161 break;
5162 case FC_PUSH_WQM:
5163 case FC_LOOP:
5164 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
5165 break;
5166 case FC_REP:
5167 ctx->bc->callstack[ctx->bc->call_sp].current++;
5168 break;
5169 }
5170
5171 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
5172 ctx->bc->callstack[ctx->bc->call_sp].max) {
5173 ctx->bc->callstack[ctx->bc->call_sp].max =
5174 ctx->bc->callstack[ctx->bc->call_sp].current;
5175 }
5176 }
5177
5178 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
5179 {
5180 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
5181
5182 sp->mid = realloc((void *)sp->mid,
5183 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5184 sp->mid[sp->num_mid] = ctx->bc->cf_last;
5185 sp->num_mid++;
5186 }
5187
5188 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5189 {
5190 ctx->bc->fc_sp++;
5191 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5192 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5193 }
5194
5195 static void fc_poplevel(struct r600_shader_ctx *ctx)
5196 {
5197 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5198 free(sp->mid);
5199 sp->mid = NULL;
5200 sp->num_mid = 0;
5201 sp->start = NULL;
5202 sp->type = 0;
5203 ctx->bc->fc_sp--;
5204 }
5205
5206 #if 0
5207 static int emit_return(struct r600_shader_ctx *ctx)
5208 {
5209 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5210 return 0;
5211 }
5212
5213 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5214 {
5215
5216 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5217 ctx->bc->cf_last->pop_count = pops;
5218 /* XXX work out offset */
5219 return 0;
5220 }
5221
5222 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5223 {
5224 return 0;
5225 }
5226
5227 static void emit_testflag(struct r600_shader_ctx *ctx)
5228 {
5229
5230 }
5231
5232 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5233 {
5234 emit_testflag(ctx);
5235 emit_jump_to_offset(ctx, 1, 4);
5236 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5237 pops(ctx, ifidx + 1);
5238 emit_return(ctx);
5239 }
5240
5241 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5242 {
5243 emit_testflag(ctx);
5244
5245 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5246 ctx->bc->cf_last->pop_count = 1;
5247
5248 fc_set_mid(ctx, fc_sp);
5249
5250 pops(ctx, 1);
5251 }
5252 #endif
5253
5254 static int tgsi_if(struct r600_shader_ctx *ctx)
5255 {
5256 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5257
5258 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5259
5260 fc_pushlevel(ctx, FC_IF);
5261
5262 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5263 return 0;
5264 }
5265
5266 static int tgsi_else(struct r600_shader_ctx *ctx)
5267 {
5268 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5269 ctx->bc->cf_last->pop_count = 1;
5270
5271 fc_set_mid(ctx, ctx->bc->fc_sp);
5272 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5273 return 0;
5274 }
5275
5276 static int tgsi_endif(struct r600_shader_ctx *ctx)
5277 {
5278 pops(ctx, 1);
5279 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5280 R600_ERR("if/endif unbalanced in shader\n");
5281 return -1;
5282 }
5283
5284 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5285 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5286 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5287 } else {
5288 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5289 }
5290 fc_poplevel(ctx);
5291
5292 callstack_decrease_current(ctx, FC_PUSH_VPM);
5293 return 0;
5294 }
5295
5296 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5297 {
5298 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5299 * limited to 4096 iterations, like the other LOOP_* instructions. */
5300 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5301
5302 fc_pushlevel(ctx, FC_LOOP);
5303
5304 /* check stack depth */
5305 callstack_check_depth(ctx, FC_LOOP, 0);
5306 return 0;
5307 }
5308
5309 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5310 {
5311 int i;
5312
5313 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5314
5315 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5316 R600_ERR("loop/endloop in shader code are not paired.\n");
5317 return -EINVAL;
5318 }
5319
5320 /* fixup loop pointers - from r600isa
5321 LOOP END points to CF after LOOP START,
5322 LOOP START point to CF after LOOP END
5323 BRK/CONT point to LOOP END CF
5324 */
5325 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5326
5327 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5328
5329 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5330 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5331 }
5332 /* XXX add LOOPRET support */
5333 fc_poplevel(ctx);
5334 callstack_decrease_current(ctx, FC_LOOP);
5335 return 0;
5336 }
5337
5338 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5339 {
5340 unsigned int fscp;
5341
5342 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5343 {
5344 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5345 break;
5346 }
5347
5348 if (fscp == 0) {
5349 R600_ERR("Break not inside loop/endloop pair\n");
5350 return -EINVAL;
5351 }
5352
5353 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5354
5355 fc_set_mid(ctx, fscp);
5356
5357 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5358 return 0;
5359 }
5360
5361 static int tgsi_umad(struct r600_shader_ctx *ctx)
5362 {
5363 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5364 struct r600_bytecode_alu alu;
5365 int i, j, r;
5366 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5367
5368 /* src0 * src1 */
5369 for (i = 0; i < lasti + 1; i++) {
5370 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5371 continue;
5372
5373 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5374
5375 alu.dst.chan = i;
5376 alu.dst.sel = ctx->temp_reg;
5377 alu.dst.write = 1;
5378
5379 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5380 for (j = 0; j < 2; j++) {
5381 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5382 }
5383
5384 alu.last = 1;
5385 r = r600_bytecode_add_alu(ctx->bc, &alu);
5386 if (r)
5387 return r;
5388 }
5389
5390
5391 for (i = 0; i < lasti + 1; i++) {
5392 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5393 continue;
5394
5395 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5396 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5397
5398 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5399
5400 alu.src[0].sel = ctx->temp_reg;
5401 alu.src[0].chan = i;
5402
5403 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5404 if (i == lasti) {
5405 alu.last = 1;
5406 }
5407 r = r600_bytecode_add_alu(ctx->bc, &alu);
5408 if (r)
5409 return r;
5410 }
5411 return 0;
5412 }
5413
5414 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5415 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5416 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5417 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5418
5419 /* XXX:
5420 * For state trackers other than OpenGL, we'll want to use
5421 * _RECIP_IEEE instead.
5422 */
5423 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5424
5425 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5426 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5427 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5428 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5429 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5430 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5431 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5432 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5433 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5434 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5435 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5436 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5437 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5438 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5439 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5440 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5441 /* gap */
5442 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5443 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5444 /* gap */
5445 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5446 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5447 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5448 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5449 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5450 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5451 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5452 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5453 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5454 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5455 /* gap */
5456 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5457 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5458 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5460 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5461 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5462 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5463 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5464 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5465 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5466 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5467 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5468 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5469 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5470 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5472 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5473 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5474 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5475 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5477 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5478 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5479 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5481 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5484 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5485 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5486 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5487 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5488 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5489 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5490 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5491 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5492 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5493 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5494 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5495 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5496 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5497 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5498 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5499 /* gap */
5500 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5502 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5503 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5504 /* gap */
5505 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5506 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5507 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5508 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5509 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5510 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5511 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5512 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5513 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5514 /* gap */
5515 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5516 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5517 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5518 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5519 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5520 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5522 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5523 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5524 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5526 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5527 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5529 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5530 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5531 /* gap */
5532 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5534 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5535 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5536 /* gap */
5537 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5538 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5539 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5540 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5541 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5542 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5543 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5544 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5545 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5546 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5547 /* gap */
5548 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5549 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5550 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5551 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5552 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5553 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5554 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5555 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5556 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5557 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5558 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5559 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5560 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5561 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5562 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5563 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5564 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5565 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5566 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5567 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5568 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5569 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5570 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5571 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5572 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5573 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5574 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5575 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5576 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5577 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5578 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5579 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5580 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5581 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5582 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5583 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5584 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5585 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5586 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5587 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5588 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5589 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5590 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5591 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5592 };
5593
5594 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5595 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5596 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5597 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5598 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5599 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5600 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5601 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5602 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5603 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5604 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5605 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5606 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5607 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5608 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5609 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5610 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5611 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5612 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5613 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5614 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5615 /* gap */
5616 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5617 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5618 /* gap */
5619 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5620 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5621 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5622 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5623 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5624 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5625 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5626 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5627 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5628 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5629 /* gap */
5630 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5631 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5632 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5634 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5635 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5636 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5637 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5638 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5639 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5640 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5641 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5642 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5643 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5644 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5646 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5647 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5648 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5649 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5650 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5651 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5652 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5653 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5655 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5657 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5658 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5659 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5660 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5661 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5663 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5664 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5665 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5666 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5667 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5668 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5669 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5670 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5671 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5672 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5673 /* gap */
5674 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5675 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5676 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5677 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5678 /* gap */
5679 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5680 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5681 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5682 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5683 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5684 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5685 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5686 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5687 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5688 /* gap */
5689 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5690 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5691 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5692 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5693 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5694 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5696 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5697 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5698 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5699 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5700 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5701 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5702 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5703 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5704 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5705 /* gap */
5706 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5707 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5708 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5709 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5710 /* gap */
5711 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5712 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5713 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5714 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5715 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5717 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5718 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5719 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5720 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5721 /* gap */
5722 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5723 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5724 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5725 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5726 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5727 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5728 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5729 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5730 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5731 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5732 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5733 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5734 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5735 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5736 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5737 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5738 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5739 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5740 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5741 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5742 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5743 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5744 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5745 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5746 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5747 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5748 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5749 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5750 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5751 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5752 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5753 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5754 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5755 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5756 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5757 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5758 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5759 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5760 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5761 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5762 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5763 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5764 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5765 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5766 };
5767
5768 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5769 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5770 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5771 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5772 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5773 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5774 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5775 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5776 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5777 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5778 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5779 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5780 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5781 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5782 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5783 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5784 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5785 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5786 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5787 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5788 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5789 /* gap */
5790 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5791 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5792 /* gap */
5793 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5794 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5795 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5796 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5797 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5798 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5799 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5800 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5801 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5802 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5803 /* gap */
5804 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5805 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5806 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5807 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5808 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5809 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5810 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5811 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5812 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5813 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5814 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5815 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5816 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5817 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5818 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5819 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5820 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5821 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5822 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5823 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5824 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5825 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5826 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5827 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5828 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5829 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5830 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5831 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5832 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5833 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5834 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5835 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5836 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5837 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5838 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5839 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5840 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5841 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5842 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5843 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5844 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5845 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5846 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5847 /* gap */
5848 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5849 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5850 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5851 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5852 /* gap */
5853 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5854 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5855 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5856 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5857 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5858 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5859 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5860 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5861 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5862 /* gap */
5863 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5864 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5865 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5866 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5867 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5868 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5869 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5870 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5871 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5872 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5873 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5874 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5875 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5876 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5877 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5878 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5879 /* gap */
5880 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5881 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5882 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5883 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5884 /* gap */
5885 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5886 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5887 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5888 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5889 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5890 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5891 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5892 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5893 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5894 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5895 /* gap */
5896 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5897 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5898 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5899 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5900 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5901 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5902 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5903 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5904 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5905 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5906 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5907 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5908 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5909 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5910 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5911 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5912 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5913 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5914 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5915 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5916 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5917 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5918 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5919 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5920 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5921 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5922 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5923 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5924 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5925 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5926 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5927 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5928 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5929 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5930 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5931 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5932 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5933 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5934 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5935 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5936 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5937 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5938 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5939 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5940 };