Don't cast the return value of malloc/realloc
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 struct r600_pipe_shader_selector *sel = shader->selector;
113 int r;
114
115 /* Would like some magic "get_bool_option_once" routine.
116 */
117 if (dump_shaders == -1)
118 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
119
120 if (dump_shaders) {
121 fprintf(stderr, "--------------------------------------------------------------\n");
122 tgsi_dump(sel->tokens, 0);
123
124 if (sel->so.num_outputs) {
125 unsigned i;
126 fprintf(stderr, "STREAMOUT\n");
127 for (i = 0; i < sel->so.num_outputs; i++) {
128 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
129 sel->so.output[i].start_component;
130 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
131 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
132 mask & 1 ? "x" : "_",
133 (mask >> 1) & 1 ? "y" : "_",
134 (mask >> 2) & 1 ? "z" : "_",
135 (mask >> 3) & 1 ? "w" : "_");
136 }
137 }
138 }
139 r = r600_shader_from_tgsi(rctx, shader);
140 if (r) {
141 R600_ERR("translation from TGSI failed !\n");
142 return r;
143 }
144 r = r600_bytecode_build(&shader->shader.bc);
145 if (r) {
146 R600_ERR("building bytecode failed !\n");
147 return r;
148 }
149 if (dump_shaders) {
150 r600_bytecode_dump(&shader->shader.bc);
151 fprintf(stderr, "______________________________________________________________\n");
152 }
153 return r600_pipe_shader(ctx, shader);
154 }
155
156 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157 {
158 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
159 r600_bytecode_clear(&shader->shader.bc);
160 }
161
162 /*
163 * tgsi -> r600 shader
164 */
165 struct r600_shader_tgsi_instruction;
166
167 struct r600_shader_src {
168 unsigned sel;
169 unsigned swizzle[4];
170 unsigned neg;
171 unsigned abs;
172 unsigned rel;
173 uint32_t value[4];
174 };
175
176 struct r600_shader_ctx {
177 struct tgsi_shader_info info;
178 struct tgsi_parse_context parse;
179 const struct tgsi_token *tokens;
180 unsigned type;
181 unsigned file_offset[TGSI_FILE_COUNT];
182 unsigned temp_reg;
183 struct r600_shader_tgsi_instruction *inst_info;
184 struct r600_bytecode *bc;
185 struct r600_shader *shader;
186 struct r600_shader_src src[4];
187 uint32_t *literals;
188 uint32_t nliterals;
189 uint32_t max_driver_temp_used;
190 /* needed for evergreen interpolation */
191 boolean input_centroid;
192 boolean input_linear;
193 boolean input_perspective;
194 int num_interp_gpr;
195 int face_gpr;
196 int colors_used;
197 boolean clip_vertex_write;
198 unsigned cv_output;
199 int fragcoord_input;
200 int native_integers;
201 };
202
203 struct r600_shader_tgsi_instruction {
204 unsigned tgsi_opcode;
205 unsigned is_op3;
206 unsigned r600_opcode;
207 int (*process)(struct r600_shader_ctx *ctx);
208 };
209
210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
211 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
212 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
213 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
214 static int tgsi_else(struct r600_shader_ctx *ctx);
215 static int tgsi_endif(struct r600_shader_ctx *ctx);
216 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
217 static int tgsi_endloop(struct r600_shader_ctx *ctx);
218 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
219
220 /*
221 * bytestream -> r600 shader
222 *
223 * These functions are used to transform the output of the LLVM backend into
224 * struct r600_bytecode.
225 */
226
227 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
228 unsigned char * bytes, unsigned num_bytes);
229
230 #ifdef HAVE_OPENCL
231 int r600_compute_shader_create(struct pipe_context * ctx,
232 LLVMModuleRef mod, struct r600_bytecode * bytecode)
233 {
234 struct r600_context *r600_ctx = (struct r600_context *)ctx;
235 unsigned char * bytes;
236 unsigned byte_count;
237 struct r600_shader_ctx shader_ctx;
238 unsigned dump = 0;
239
240 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
241 dump = 1;
242 }
243
244 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
245 shader_ctx.bc = bytecode;
246 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
247 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
248 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
249 if (shader_ctx.bc->chip_class == CAYMAN) {
250 cm_bytecode_add_cf_end(shader_ctx.bc);
251 }
252 r600_bytecode_build(shader_ctx.bc);
253 if (dump) {
254 r600_bytecode_dump(shader_ctx.bc);
255 }
256 return 1;
257 }
258
259 #endif /* HAVE_OPENCL */
260
261 static uint32_t i32_from_byte_stream(unsigned char * bytes,
262 unsigned * bytes_read)
263 {
264 unsigned i;
265 uint32_t out = 0;
266 for (i = 0; i < 4; i++) {
267 out |= bytes[(*bytes_read)++] << (8 * i);
268 }
269 return out;
270 }
271
272 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
273 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
274 {
275 unsigned i;
276 unsigned sel0, sel1;
277 sel0 = bytes[bytes_read++];
278 sel1 = bytes[bytes_read++];
279 alu->src[src_idx].sel = sel0 | (sel1 << 8);
280 alu->src[src_idx].chan = bytes[bytes_read++];
281 alu->src[src_idx].neg = bytes[bytes_read++];
282 alu->src[src_idx].abs = bytes[bytes_read++];
283 alu->src[src_idx].rel = bytes[bytes_read++];
284 alu->src[src_idx].kc_bank = bytes[bytes_read++];
285 for (i = 0; i < 4; i++) {
286 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
287 }
288 return bytes_read;
289 }
290
291 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
292 unsigned char * bytes, unsigned bytes_read)
293 {
294 unsigned src_idx;
295 unsigned inst0, inst1;
296 unsigned push_modifier;
297 struct r600_bytecode_alu alu;
298 memset(&alu, 0, sizeof(alu));
299 for(src_idx = 0; src_idx < 3; src_idx++) {
300 bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
301 &alu, src_idx);
302 }
303
304 alu.dst.sel = bytes[bytes_read++];
305 alu.dst.chan = bytes[bytes_read++];
306 alu.dst.clamp = bytes[bytes_read++];
307 alu.dst.write = bytes[bytes_read++];
308 alu.dst.rel = bytes[bytes_read++];
309 inst0 = bytes[bytes_read++];
310 inst1 = bytes[bytes_read++];
311 alu.inst = inst0 | (inst1 << 8);
312 alu.last = bytes[bytes_read++];
313 alu.is_op3 = bytes[bytes_read++];
314 push_modifier = bytes[bytes_read++];
315 alu.pred_sel = bytes[bytes_read++];
316 alu.bank_swizzle = bytes[bytes_read++];
317 alu.bank_swizzle_force = bytes[bytes_read++];
318 alu.omod = bytes[bytes_read++];
319 alu.index_mode = bytes[bytes_read++];
320
321
322 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
323 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
324 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
325 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
326 alu.update_pred = 1;
327 alu.dst.write = 0;
328 alu.src[1].sel = V_SQ_ALU_SRC_0;
329 alu.src[1].chan = 0;
330 alu.last = 1;
331 }
332
333 if (push_modifier) {
334 alu.pred_sel = 0;
335 alu.execute_mask = 1;
336 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
337 } else
338 r600_bytecode_add_alu(ctx->bc, &alu);
339
340
341 /* XXX: Handle other KILL instructions */
342 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
343 ctx->shader->uses_kill = 1;
344 /* XXX: This should be enforced in the LLVM backend. */
345 ctx->bc->force_add_cf = 1;
346 }
347 return bytes_read;
348 }
349
350 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
351 unsigned pred_inst)
352 {
353 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
354 fc_pushlevel(ctx, FC_IF);
355 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
356 }
357
358 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
359 struct r600_bytecode_alu *alu, unsigned compare_opcode)
360 {
361 unsigned opcode = TGSI_OPCODE_BRK;
362 if (ctx->bc->chip_class == CAYMAN)
363 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
364 else if (ctx->bc->chip_class >= EVERGREEN)
365 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
366 else
367 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
368 llvm_if(ctx, alu, compare_opcode);
369 tgsi_loop_brk_cont(ctx);
370 tgsi_endif(ctx);
371 }
372
373 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
374 unsigned char * bytes, unsigned bytes_read)
375 {
376 struct r600_bytecode_alu alu;
377 unsigned inst;
378 memset(&alu, 0, sizeof(alu));
379 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
380 inst = bytes[bytes_read++];
381 switch (inst) {
382 case 0: /* FC_IF */
383 llvm_if(ctx, &alu,
384 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
385 break;
386 case 1: /* FC_IF_INT */
387 llvm_if(ctx, &alu,
388 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
389 break;
390 case 2: /* FC_ELSE */
391 tgsi_else(ctx);
392 break;
393 case 3: /* FC_ENDIF */
394 tgsi_endif(ctx);
395 break;
396 case 4: /* FC_BGNLOOP */
397 tgsi_bgnloop(ctx);
398 break;
399 case 5: /* FC_ENDLOOP */
400 tgsi_endloop(ctx);
401 break;
402 case 6: /* FC_BREAK */
403 r600_break_from_byte_stream(ctx, &alu,
404 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
405 break;
406 case 7: /* FC_BREAK_NZ_INT */
407 r600_break_from_byte_stream(ctx, &alu,
408 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
409 break;
410 case 8: /* FC_CONTINUE */
411 {
412 unsigned opcode = TGSI_OPCODE_CONT;
413 if (ctx->bc->chip_class == CAYMAN) {
414 ctx->inst_info =
415 &cm_shader_tgsi_instruction[opcode];
416 } else if (ctx->bc->chip_class >= EVERGREEN) {
417 ctx->inst_info =
418 &eg_shader_tgsi_instruction[opcode];
419 } else {
420 ctx->inst_info =
421 &r600_shader_tgsi_instruction[opcode];
422 }
423 tgsi_loop_brk_cont(ctx);
424 }
425 break;
426 case 9: /* FC_BREAK_Z_INT */
427 r600_break_from_byte_stream(ctx, &alu,
428 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
429 break;
430 case 10: /* FC_BREAK_NZ */
431 r600_break_from_byte_stream(ctx, &alu,
432 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
433 break;
434 }
435
436 return bytes_read;
437 }
438
439 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
440 unsigned char * bytes, unsigned bytes_read)
441 {
442 struct r600_bytecode_tex tex;
443
444 tex.inst = bytes[bytes_read++];
445 tex.resource_id = bytes[bytes_read++];
446 tex.src_gpr = bytes[bytes_read++];
447 tex.src_rel = bytes[bytes_read++];
448 tex.dst_gpr = bytes[bytes_read++];
449 tex.dst_rel = bytes[bytes_read++];
450 tex.dst_sel_x = bytes[bytes_read++];
451 tex.dst_sel_y = bytes[bytes_read++];
452 tex.dst_sel_z = bytes[bytes_read++];
453 tex.dst_sel_w = bytes[bytes_read++];
454 tex.lod_bias = bytes[bytes_read++];
455 tex.coord_type_x = bytes[bytes_read++];
456 tex.coord_type_y = bytes[bytes_read++];
457 tex.coord_type_z = bytes[bytes_read++];
458 tex.coord_type_w = bytes[bytes_read++];
459 tex.offset_x = bytes[bytes_read++];
460 tex.offset_y = bytes[bytes_read++];
461 tex.offset_z = bytes[bytes_read++];
462 tex.sampler_id = bytes[bytes_read++];
463 tex.src_sel_x = bytes[bytes_read++];
464 tex.src_sel_y = bytes[bytes_read++];
465 tex.src_sel_z = bytes[bytes_read++];
466 tex.src_sel_w = bytes[bytes_read++];
467
468 r600_bytecode_add_tex(ctx->bc, &tex);
469
470 return bytes_read;
471 }
472
473 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
474 unsigned char * bytes, unsigned bytes_read)
475 {
476 struct r600_bytecode_vtx vtx;
477
478 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
479 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
480 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
481
482 memset(&vtx, 0, sizeof(vtx));
483
484 /* WORD0 */
485 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
486 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
487 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
488 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
489 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
490 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
491
492 /* WORD1 */
493 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
494 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
495 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
496 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
497 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
498 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
499 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
500 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
501 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
502 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
503
504 /* WORD 2*/
505 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
506 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
507
508 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
509 fprintf(stderr, "Error adding vtx\n");
510 }
511 /* Use the Texture Cache */
512 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
513 return bytes_read;
514 }
515
516 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
517 unsigned char * bytes, unsigned num_bytes)
518 {
519 unsigned bytes_read = 0;
520 unsigned i, byte;
521 while (bytes_read < num_bytes) {
522 char inst_type = bytes[bytes_read++];
523 switch (inst_type) {
524 case 0:
525 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
526 bytes_read);
527 break;
528 case 1:
529 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
530 bytes_read);
531 break;
532 case 2:
533 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
534 bytes_read);
535 break;
536 case 3:
537 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
538 for (i = 0; i < 2; i++) {
539 for (byte = 0 ; byte < 4; byte++) {
540 ctx->bc->cf_last->isa[i] |=
541 (bytes[bytes_read++] << (byte * 8));
542 }
543 }
544 break;
545
546 case 4:
547 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
548 bytes_read);
549 break;
550 default:
551 /* XXX: Error here */
552 break;
553 }
554 }
555 }
556
557 /* End bytestream -> r600 shader functions*/
558
559 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
560 {
561 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
562 int j;
563
564 if (i->Instruction.NumDstRegs > 1) {
565 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
566 return -EINVAL;
567 }
568 if (i->Instruction.Predicate) {
569 R600_ERR("predicate unsupported\n");
570 return -EINVAL;
571 }
572 #if 0
573 if (i->Instruction.Label) {
574 R600_ERR("label unsupported\n");
575 return -EINVAL;
576 }
577 #endif
578 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
579 if (i->Src[j].Register.Dimension) {
580 R600_ERR("unsupported src %d (dimension %d)\n", j,
581 i->Src[j].Register.Dimension);
582 return -EINVAL;
583 }
584 }
585 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
586 if (i->Dst[j].Register.Dimension) {
587 R600_ERR("unsupported dst (dimension)\n");
588 return -EINVAL;
589 }
590 }
591 return 0;
592 }
593
594 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
595 {
596 int i, r;
597 struct r600_bytecode_alu alu;
598 int gpr = 0, base_chan = 0;
599 int ij_index = 0;
600
601 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
602 ij_index = 0;
603 if (ctx->shader->input[input].centroid)
604 ij_index++;
605 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
606 ij_index = 0;
607 /* if we have perspective add one */
608 if (ctx->input_perspective) {
609 ij_index++;
610 /* if we have perspective centroid */
611 if (ctx->input_centroid)
612 ij_index++;
613 }
614 if (ctx->shader->input[input].centroid)
615 ij_index++;
616 }
617
618 /* work out gpr and base_chan from index */
619 gpr = ij_index / 2;
620 base_chan = (2 * (ij_index % 2)) + 1;
621
622 for (i = 0; i < 8; i++) {
623 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
624
625 if (i < 4)
626 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
627 else
628 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
629
630 if ((i > 1) && (i < 6)) {
631 alu.dst.sel = ctx->shader->input[input].gpr;
632 alu.dst.write = 1;
633 }
634
635 alu.dst.chan = i % 4;
636
637 alu.src[0].sel = gpr;
638 alu.src[0].chan = (base_chan - (i % 2));
639
640 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
641
642 alu.bank_swizzle_force = SQ_ALU_VEC_210;
643 if ((i % 4) == 3)
644 alu.last = 1;
645 r = r600_bytecode_add_alu(ctx->bc, &alu);
646 if (r)
647 return r;
648 }
649 return 0;
650 }
651
652 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
653 {
654 int i, r;
655 struct r600_bytecode_alu alu;
656
657 for (i = 0; i < 4; i++) {
658 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
659
660 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
661
662 alu.dst.sel = ctx->shader->input[input].gpr;
663 alu.dst.write = 1;
664
665 alu.dst.chan = i;
666
667 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
668 alu.src[0].chan = i;
669
670 if (i == 3)
671 alu.last = 1;
672 r = r600_bytecode_add_alu(ctx->bc, &alu);
673 if (r)
674 return r;
675 }
676 return 0;
677 }
678
679 /*
680 * Special export handling in shaders
681 *
682 * shader export ARRAY_BASE for EXPORT_POS:
683 * 60 is position
684 * 61 is misc vector
685 * 62, 63 are clip distance vectors
686 *
687 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
688 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
689 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
690 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
691 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
692 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
693 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
694 * exclusive from render target index)
695 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
696 *
697 *
698 * shader export ARRAY_BASE for EXPORT_PIXEL:
699 * 0-7 CB targets
700 * 61 computed Z vector
701 *
702 * The use of the values exported in the computed Z vector are controlled
703 * by DB_SHADER_CONTROL:
704 * Z_EXPORT_ENABLE - Z as a float in RED
705 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
706 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
707 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
708 * DB_SOURCE_FORMAT - export control restrictions
709 *
710 */
711
712
713 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
714 static int r600_spi_sid(struct r600_shader_io * io)
715 {
716 int index, name = io->name;
717
718 /* These params are handled differently, they don't need
719 * semantic indices, so we'll use 0 for them.
720 */
721 if (name == TGSI_SEMANTIC_POSITION ||
722 name == TGSI_SEMANTIC_PSIZE ||
723 name == TGSI_SEMANTIC_FACE)
724 index = 0;
725 else {
726 if (name == TGSI_SEMANTIC_GENERIC) {
727 /* For generic params simply use sid from tgsi */
728 index = io->sid;
729 } else {
730 /* For non-generic params - pack name and sid into 8 bits */
731 index = 0x80 | (name<<3) | (io->sid);
732 }
733
734 /* Make sure that all really used indices have nonzero value, so
735 * we can just compare it to 0 later instead of comparing the name
736 * with different values to detect special cases. */
737 index++;
738 }
739
740 return index;
741 };
742
743 /* turn input into interpolate on EG */
744 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
745 {
746 int r = 0;
747
748 if (ctx->shader->input[index].spi_sid) {
749 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
750 if (ctx->shader->input[index].interpolate > 0) {
751 r = evergreen_interp_alu(ctx, index);
752 } else {
753 r = evergreen_interp_flat(ctx, index);
754 }
755 }
756 return r;
757 }
758
759 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
760 {
761 struct r600_bytecode_alu alu;
762 int i, r;
763 int gpr_front = ctx->shader->input[front].gpr;
764 int gpr_back = ctx->shader->input[back].gpr;
765
766 for (i = 0; i < 4; i++) {
767 memset(&alu, 0, sizeof(alu));
768 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
769 alu.is_op3 = 1;
770 alu.dst.write = 1;
771 alu.dst.sel = gpr_front;
772 alu.src[0].sel = ctx->face_gpr;
773 alu.src[1].sel = gpr_front;
774 alu.src[2].sel = gpr_back;
775
776 alu.dst.chan = i;
777 alu.src[1].chan = i;
778 alu.src[2].chan = i;
779 alu.last = (i==3);
780
781 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
782 return r;
783 }
784
785 return 0;
786 }
787
788 static int tgsi_declaration(struct r600_shader_ctx *ctx)
789 {
790 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
791 unsigned i;
792 int r;
793
794 switch (d->Declaration.File) {
795 case TGSI_FILE_INPUT:
796 i = ctx->shader->ninput++;
797 ctx->shader->input[i].name = d->Semantic.Name;
798 ctx->shader->input[i].sid = d->Semantic.Index;
799 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
800 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
801 ctx->shader->input[i].centroid = d->Interp.Centroid;
802 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
803 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
804 switch (ctx->shader->input[i].name) {
805 case TGSI_SEMANTIC_FACE:
806 ctx->face_gpr = ctx->shader->input[i].gpr;
807 break;
808 case TGSI_SEMANTIC_COLOR:
809 ctx->colors_used++;
810 break;
811 case TGSI_SEMANTIC_POSITION:
812 ctx->fragcoord_input = i;
813 break;
814 }
815 if (ctx->bc->chip_class >= EVERGREEN) {
816 if ((r = evergreen_interp_input(ctx, i)))
817 return r;
818 }
819 }
820 break;
821 case TGSI_FILE_OUTPUT:
822 i = ctx->shader->noutput++;
823 ctx->shader->output[i].name = d->Semantic.Name;
824 ctx->shader->output[i].sid = d->Semantic.Index;
825 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
826 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
827 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
828 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
829 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
830 switch (d->Semantic.Name) {
831 case TGSI_SEMANTIC_CLIPDIST:
832 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
833 break;
834 case TGSI_SEMANTIC_PSIZE:
835 ctx->shader->vs_out_misc_write = 1;
836 ctx->shader->vs_out_point_size = 1;
837 break;
838 case TGSI_SEMANTIC_CLIPVERTEX:
839 ctx->clip_vertex_write = TRUE;
840 ctx->cv_output = i;
841 break;
842 }
843 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
844 switch (d->Semantic.Name) {
845 case TGSI_SEMANTIC_COLOR:
846 ctx->shader->nr_ps_max_color_exports++;
847 break;
848 }
849 }
850 break;
851 case TGSI_FILE_CONSTANT:
852 case TGSI_FILE_TEMPORARY:
853 case TGSI_FILE_SAMPLER:
854 case TGSI_FILE_ADDRESS:
855 break;
856
857 case TGSI_FILE_SYSTEM_VALUE:
858 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
859 if (!ctx->native_integers) {
860 struct r600_bytecode_alu alu;
861 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
862
863 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
864 alu.src[0].sel = 0;
865 alu.src[0].chan = 3;
866
867 alu.dst.sel = 0;
868 alu.dst.chan = 3;
869 alu.dst.write = 1;
870 alu.last = 1;
871
872 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
873 return r;
874 }
875 break;
876 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
877 break;
878 default:
879 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
880 return -EINVAL;
881 }
882 return 0;
883 }
884
885 static int r600_get_temp(struct r600_shader_ctx *ctx)
886 {
887 return ctx->temp_reg + ctx->max_driver_temp_used++;
888 }
889
890 /*
891 * for evergreen we need to scan the shader to find the number of GPRs we need to
892 * reserve for interpolation.
893 *
894 * we need to know if we are going to emit
895 * any centroid inputs
896 * if perspective and linear are required
897 */
898 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
899 {
900 int i;
901 int num_baryc;
902
903 ctx->input_linear = FALSE;
904 ctx->input_perspective = FALSE;
905 ctx->input_centroid = FALSE;
906 ctx->num_interp_gpr = 1;
907
908 /* any centroid inputs */
909 for (i = 0; i < ctx->info.num_inputs; i++) {
910 /* skip position/face */
911 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
912 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
913 continue;
914 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
915 ctx->input_linear = TRUE;
916 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
917 ctx->input_perspective = TRUE;
918 if (ctx->info.input_centroid[i])
919 ctx->input_centroid = TRUE;
920 }
921
922 num_baryc = 0;
923 /* ignoring sample for now */
924 if (ctx->input_perspective)
925 num_baryc++;
926 if (ctx->input_linear)
927 num_baryc++;
928 if (ctx->input_centroid)
929 num_baryc *= 2;
930
931 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
932
933 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
934 return ctx->num_interp_gpr;
935 }
936
937 static void tgsi_src(struct r600_shader_ctx *ctx,
938 const struct tgsi_full_src_register *tgsi_src,
939 struct r600_shader_src *r600_src)
940 {
941 memset(r600_src, 0, sizeof(*r600_src));
942 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
943 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
944 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
945 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
946 r600_src->neg = tgsi_src->Register.Negate;
947 r600_src->abs = tgsi_src->Register.Absolute;
948
949 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
950 int index;
951 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
952 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
953 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
954
955 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
956 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
957 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
958 return;
959 }
960 index = tgsi_src->Register.Index;
961 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
962 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
963 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
964 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
965 r600_src->swizzle[0] = 3;
966 r600_src->swizzle[1] = 3;
967 r600_src->swizzle[2] = 3;
968 r600_src->swizzle[3] = 3;
969 r600_src->sel = 0;
970 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
971 r600_src->swizzle[0] = 0;
972 r600_src->swizzle[1] = 0;
973 r600_src->swizzle[2] = 0;
974 r600_src->swizzle[3] = 0;
975 r600_src->sel = 0;
976 }
977 } else {
978 if (tgsi_src->Register.Indirect)
979 r600_src->rel = V_SQ_REL_RELATIVE;
980 r600_src->sel = tgsi_src->Register.Index;
981 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
982 }
983 }
984
985 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
986 {
987 struct r600_bytecode_vtx vtx;
988 unsigned int ar_reg;
989 int r;
990
991 if (offset) {
992 struct r600_bytecode_alu alu;
993
994 memset(&alu, 0, sizeof(alu));
995
996 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
997 alu.src[0].sel = ctx->bc->ar_reg;
998
999 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1000 alu.src[1].value = offset;
1001
1002 alu.dst.sel = dst_reg;
1003 alu.dst.write = 1;
1004 alu.last = 1;
1005
1006 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1007 return r;
1008
1009 ar_reg = dst_reg;
1010 } else {
1011 ar_reg = ctx->bc->ar_reg;
1012 }
1013
1014 memset(&vtx, 0, sizeof(vtx));
1015 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1016 vtx.src_gpr = ar_reg;
1017 vtx.mega_fetch_count = 16;
1018 vtx.dst_gpr = dst_reg;
1019 vtx.dst_sel_x = 0; /* SEL_X */
1020 vtx.dst_sel_y = 1; /* SEL_Y */
1021 vtx.dst_sel_z = 2; /* SEL_Z */
1022 vtx.dst_sel_w = 3; /* SEL_W */
1023 vtx.data_format = FMT_32_32_32_32_FLOAT;
1024 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1025 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1026 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1027 vtx.endian = r600_endian_swap(32);
1028
1029 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1030 return r;
1031
1032 return 0;
1033 }
1034
1035 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1036 {
1037 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1038 struct r600_bytecode_alu alu;
1039 int i, j, k, nconst, r;
1040
1041 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1042 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1043 nconst++;
1044 }
1045 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1046 }
1047 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1048 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1049 continue;
1050 }
1051
1052 if (ctx->src[i].rel) {
1053 int treg = r600_get_temp(ctx);
1054 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1055 return r;
1056
1057 ctx->src[i].sel = treg;
1058 ctx->src[i].rel = 0;
1059 j--;
1060 } else if (j > 0) {
1061 int treg = r600_get_temp(ctx);
1062 for (k = 0; k < 4; k++) {
1063 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1065 alu.src[0].sel = ctx->src[i].sel;
1066 alu.src[0].chan = k;
1067 alu.src[0].rel = ctx->src[i].rel;
1068 alu.dst.sel = treg;
1069 alu.dst.chan = k;
1070 alu.dst.write = 1;
1071 if (k == 3)
1072 alu.last = 1;
1073 r = r600_bytecode_add_alu(ctx->bc, &alu);
1074 if (r)
1075 return r;
1076 }
1077 ctx->src[i].sel = treg;
1078 ctx->src[i].rel =0;
1079 j--;
1080 }
1081 }
1082 return 0;
1083 }
1084
1085 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1086 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1087 {
1088 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1089 struct r600_bytecode_alu alu;
1090 int i, j, k, nliteral, r;
1091
1092 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1093 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1094 nliteral++;
1095 }
1096 }
1097 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1098 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1099 int treg = r600_get_temp(ctx);
1100 for (k = 0; k < 4; k++) {
1101 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1103 alu.src[0].sel = ctx->src[i].sel;
1104 alu.src[0].chan = k;
1105 alu.src[0].value = ctx->src[i].value[k];
1106 alu.dst.sel = treg;
1107 alu.dst.chan = k;
1108 alu.dst.write = 1;
1109 if (k == 3)
1110 alu.last = 1;
1111 r = r600_bytecode_add_alu(ctx->bc, &alu);
1112 if (r)
1113 return r;
1114 }
1115 ctx->src[i].sel = treg;
1116 j--;
1117 }
1118 }
1119 return 0;
1120 }
1121
1122 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1123 {
1124 int i, r, count = ctx->shader->ninput;
1125
1126 /* additional inputs will be allocated right after the existing inputs,
1127 * we won't need them after the color selection, so we don't need to
1128 * reserve these gprs for the rest of the shader code and to adjust
1129 * output offsets etc. */
1130 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1131 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1132
1133 if (ctx->face_gpr == -1) {
1134 i = ctx->shader->ninput++;
1135 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1136 ctx->shader->input[i].spi_sid = 0;
1137 ctx->shader->input[i].gpr = gpr++;
1138 ctx->face_gpr = ctx->shader->input[i].gpr;
1139 }
1140
1141 for (i = 0; i < count; i++) {
1142 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1143 int ni = ctx->shader->ninput++;
1144 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1145 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1146 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1147 ctx->shader->input[ni].gpr = gpr++;
1148
1149 if (ctx->bc->chip_class >= EVERGREEN) {
1150 r = evergreen_interp_input(ctx, ni);
1151 if (r)
1152 return r;
1153 }
1154
1155 r = select_twoside_color(ctx, i, ni);
1156 if (r)
1157 return r;
1158 }
1159 }
1160 return 0;
1161 }
1162
1163 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1164 {
1165 struct r600_shader *shader = &pipeshader->shader;
1166 struct tgsi_token *tokens = pipeshader->selector->tokens;
1167 struct pipe_stream_output_info so = pipeshader->selector->so;
1168 struct tgsi_full_immediate *immediate;
1169 struct tgsi_full_property *property;
1170 struct r600_shader_ctx ctx;
1171 struct r600_bytecode_output output[32];
1172 unsigned output_done, noutput;
1173 unsigned opcode;
1174 int i, j, k, r = 0;
1175 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1176 /* Declarations used by llvm code */
1177 bool use_llvm = false;
1178 unsigned char * inst_bytes = NULL;
1179 unsigned inst_byte_count = 0;
1180
1181 #ifdef R600_USE_LLVM
1182 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1183 #endif
1184 ctx.bc = &shader->bc;
1185 ctx.shader = shader;
1186 ctx.native_integers = true;
1187
1188 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1189 ctx.tokens = tokens;
1190 tgsi_scan_shader(tokens, &ctx.info);
1191 tgsi_parse_init(&ctx.parse, tokens);
1192 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1193 shader->processor_type = ctx.type;
1194 ctx.bc->type = shader->processor_type;
1195
1196 ctx.face_gpr = -1;
1197 ctx.fragcoord_input = -1;
1198 ctx.colors_used = 0;
1199 ctx.clip_vertex_write = 0;
1200
1201 shader->nr_ps_color_exports = 0;
1202 shader->nr_ps_max_color_exports = 0;
1203
1204 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1205
1206 /* register allocations */
1207 /* Values [0,127] correspond to GPR[0..127].
1208 * Values [128,159] correspond to constant buffer bank 0
1209 * Values [160,191] correspond to constant buffer bank 1
1210 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1211 * Values [256,287] correspond to constant buffer bank 2 (EG)
1212 * Values [288,319] correspond to constant buffer bank 3 (EG)
1213 * Other special values are shown in the list below.
1214 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1215 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1216 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1217 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1218 * 248 SQ_ALU_SRC_0: special constant 0.0.
1219 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1220 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1221 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1222 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1223 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1224 * 254 SQ_ALU_SRC_PV: previous vector result.
1225 * 255 SQ_ALU_SRC_PS: previous scalar result.
1226 */
1227 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1228 ctx.file_offset[i] = 0;
1229 }
1230 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1231 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1232 if (ctx.bc->chip_class >= EVERGREEN) {
1233 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1234 } else {
1235 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1236 }
1237 }
1238 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1239 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1240 }
1241
1242 /* LLVM backend setup */
1243 #ifdef R600_USE_LLVM
1244 if (use_llvm && ctx.info.indirect_files) {
1245 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1246 "indirect adressing. Falling back to TGSI "
1247 "backend.\n");
1248 use_llvm = 0;
1249 }
1250 if (use_llvm) {
1251 struct radeon_llvm_context radeon_llvm_ctx;
1252 LLVMModuleRef mod;
1253 unsigned dump = 0;
1254 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1255 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1256 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1257 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1258 dump = 1;
1259 }
1260 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1261 rctx->family, dump)) {
1262 FREE(inst_bytes);
1263 radeon_llvm_dispose(&radeon_llvm_ctx);
1264 use_llvm = 0;
1265 fprintf(stderr, "R600 LLVM backend failed to compile "
1266 "shader. Falling back to TGSI\n");
1267 } else {
1268 ctx.file_offset[TGSI_FILE_OUTPUT] =
1269 ctx.file_offset[TGSI_FILE_INPUT];
1270 }
1271 radeon_llvm_dispose(&radeon_llvm_ctx);
1272 }
1273 #endif
1274 /* End of LLVM backend setup */
1275
1276 if (!use_llvm) {
1277 ctx.file_offset[TGSI_FILE_OUTPUT] =
1278 ctx.file_offset[TGSI_FILE_INPUT] +
1279 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1280 }
1281 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1282 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1283
1284 /* Outside the GPR range. This will be translated to one of the
1285 * kcache banks later. */
1286 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1287
1288 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1289 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1290 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1291 ctx.temp_reg = ctx.bc->ar_reg + 1;
1292
1293 ctx.nliterals = 0;
1294 ctx.literals = NULL;
1295 shader->fs_write_all = FALSE;
1296 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1297 tgsi_parse_token(&ctx.parse);
1298 switch (ctx.parse.FullToken.Token.Type) {
1299 case TGSI_TOKEN_TYPE_IMMEDIATE:
1300 immediate = &ctx.parse.FullToken.FullImmediate;
1301 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1302 if(ctx.literals == NULL) {
1303 r = -ENOMEM;
1304 goto out_err;
1305 }
1306 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1307 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1308 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1309 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1310 ctx.nliterals++;
1311 break;
1312 case TGSI_TOKEN_TYPE_DECLARATION:
1313 r = tgsi_declaration(&ctx);
1314 if (r)
1315 goto out_err;
1316 break;
1317 case TGSI_TOKEN_TYPE_INSTRUCTION:
1318 break;
1319 case TGSI_TOKEN_TYPE_PROPERTY:
1320 property = &ctx.parse.FullToken.FullProperty;
1321 switch (property->Property.PropertyName) {
1322 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1323 if (property->u[0].Data == 1)
1324 shader->fs_write_all = TRUE;
1325 break;
1326 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1327 if (property->u[0].Data == 1)
1328 shader->vs_prohibit_ucps = TRUE;
1329 break;
1330 }
1331 break;
1332 default:
1333 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1334 r = -EINVAL;
1335 goto out_err;
1336 }
1337 }
1338
1339 if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
1340 shader->nr_ps_max_color_exports = 8;
1341
1342 if (ctx.fragcoord_input >= 0) {
1343 if (ctx.bc->chip_class == CAYMAN) {
1344 for (j = 0 ; j < 4; j++) {
1345 struct r600_bytecode_alu alu;
1346 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1347 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1348 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1349 alu.src[0].chan = 3;
1350
1351 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1352 alu.dst.chan = j;
1353 alu.dst.write = (j == 3);
1354 alu.last = 1;
1355 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1356 return r;
1357 }
1358 } else {
1359 struct r600_bytecode_alu alu;
1360 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1361 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1362 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1363 alu.src[0].chan = 3;
1364
1365 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1366 alu.dst.chan = 3;
1367 alu.dst.write = 1;
1368 alu.last = 1;
1369 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1370 return r;
1371 }
1372 }
1373
1374 if (shader->two_side && ctx.colors_used) {
1375 if ((r = process_twoside_color_inputs(&ctx)))
1376 return r;
1377 }
1378
1379 tgsi_parse_init(&ctx.parse, tokens);
1380 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1381 tgsi_parse_token(&ctx.parse);
1382 switch (ctx.parse.FullToken.Token.Type) {
1383 case TGSI_TOKEN_TYPE_INSTRUCTION:
1384 if (use_llvm) {
1385 continue;
1386 }
1387 r = tgsi_is_supported(&ctx);
1388 if (r)
1389 goto out_err;
1390 ctx.max_driver_temp_used = 0;
1391 /* reserve first tmp for everyone */
1392 r600_get_temp(&ctx);
1393
1394 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1395 if ((r = tgsi_split_constant(&ctx)))
1396 goto out_err;
1397 if ((r = tgsi_split_literal_constant(&ctx)))
1398 goto out_err;
1399 if (ctx.bc->chip_class == CAYMAN)
1400 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1401 else if (ctx.bc->chip_class >= EVERGREEN)
1402 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1403 else
1404 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1405 r = ctx.inst_info->process(&ctx);
1406 if (r)
1407 goto out_err;
1408 break;
1409 default:
1410 break;
1411 }
1412 }
1413
1414 /* Get instructions if we are using the LLVM backend. */
1415 if (use_llvm) {
1416 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1417 FREE(inst_bytes);
1418 }
1419
1420 noutput = shader->noutput;
1421
1422 if (ctx.clip_vertex_write) {
1423 /* need to convert a clipvertex write into clipdistance writes and not export
1424 the clip vertex anymore */
1425
1426 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1427 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1428 shader->output[noutput].gpr = ctx.temp_reg;
1429 noutput++;
1430 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1431 shader->output[noutput].gpr = ctx.temp_reg+1;
1432 noutput++;
1433
1434 /* reset spi_sid for clipvertex output to avoid confusing spi */
1435 shader->output[ctx.cv_output].spi_sid = 0;
1436
1437 shader->clip_dist_write = 0xFF;
1438
1439 for (i = 0; i < 8; i++) {
1440 int oreg = i >> 2;
1441 int ochan = i & 3;
1442
1443 for (j = 0; j < 4; j++) {
1444 struct r600_bytecode_alu alu;
1445 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1446 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1447 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1448 alu.src[0].chan = j;
1449
1450 alu.src[1].sel = 512 + i;
1451 alu.src[1].kc_bank = 1;
1452 alu.src[1].chan = j;
1453
1454 alu.dst.sel = ctx.temp_reg + oreg;
1455 alu.dst.chan = j;
1456 alu.dst.write = (j == ochan);
1457 if (j == 3)
1458 alu.last = 1;
1459 r = r600_bytecode_add_alu(ctx.bc, &alu);
1460 if (r)
1461 return r;
1462 }
1463 }
1464 }
1465
1466 /* Add stream outputs. */
1467 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1468 for (i = 0; i < so.num_outputs; i++) {
1469 struct r600_bytecode_output output;
1470
1471 if (so.output[i].output_buffer >= 4) {
1472 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1473 so.output[i].output_buffer);
1474 r = -EINVAL;
1475 goto out_err;
1476 }
1477 if (so.output[i].dst_offset < so.output[i].start_component) {
1478 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1479 r = -EINVAL;
1480 goto out_err;
1481 }
1482
1483 memset(&output, 0, sizeof(struct r600_bytecode_output));
1484 output.gpr = shader->output[so.output[i].register_index].gpr;
1485 output.elem_size = 0;
1486 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1487 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1488 output.burst_count = 1;
1489 output.barrier = 1;
1490 /* array_size is an upper limit for the burst_count
1491 * with MEM_STREAM instructions */
1492 output.array_size = 0xFFF;
1493 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1494 if (ctx.bc->chip_class >= EVERGREEN) {
1495 switch (so.output[i].output_buffer) {
1496 case 0:
1497 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1498 break;
1499 case 1:
1500 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1501 break;
1502 case 2:
1503 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1504 break;
1505 case 3:
1506 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1507 break;
1508 }
1509 } else {
1510 switch (so.output[i].output_buffer) {
1511 case 0:
1512 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1513 break;
1514 case 1:
1515 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1516 break;
1517 case 2:
1518 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1519 break;
1520 case 3:
1521 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1522 break;
1523 }
1524 }
1525 r = r600_bytecode_add_output(ctx.bc, &output);
1526 if (r)
1527 goto out_err;
1528 }
1529 }
1530
1531 /* export output */
1532 for (i = 0, j = 0; i < noutput; i++, j++) {
1533 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1534 output[j].gpr = shader->output[i].gpr;
1535 output[j].elem_size = 3;
1536 output[j].swizzle_x = 0;
1537 output[j].swizzle_y = 1;
1538 output[j].swizzle_z = 2;
1539 output[j].swizzle_w = 3;
1540 output[j].burst_count = 1;
1541 output[j].barrier = 1;
1542 output[j].type = -1;
1543 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1544 switch (ctx.type) {
1545 case TGSI_PROCESSOR_VERTEX:
1546 switch (shader->output[i].name) {
1547 case TGSI_SEMANTIC_POSITION:
1548 output[j].array_base = next_pos_base++;
1549 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1550 break;
1551
1552 case TGSI_SEMANTIC_PSIZE:
1553 output[j].array_base = next_pos_base++;
1554 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1555 break;
1556 case TGSI_SEMANTIC_CLIPVERTEX:
1557 j--;
1558 break;
1559 case TGSI_SEMANTIC_CLIPDIST:
1560 output[j].array_base = next_pos_base++;
1561 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1562 /* spi_sid is 0 for clipdistance outputs that were generated
1563 * for clipvertex - we don't need to pass them to PS */
1564 if (shader->output[i].spi_sid) {
1565 j++;
1566 /* duplicate it as PARAM to pass to the pixel shader */
1567 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1568 output[j].array_base = next_param_base++;
1569 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1570 }
1571 break;
1572 case TGSI_SEMANTIC_FOG:
1573 output[j].swizzle_y = 4; /* 0 */
1574 output[j].swizzle_z = 4; /* 0 */
1575 output[j].swizzle_w = 5; /* 1 */
1576 break;
1577 }
1578 break;
1579 case TGSI_PROCESSOR_FRAGMENT:
1580 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1581 /* never export more colors than the number of CBs */
1582 if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
1583 /* skip export */
1584 j--;
1585 continue;
1586 }
1587 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1588 output[j].array_base = next_pixel_base++;
1589 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1590 shader->nr_ps_color_exports++;
1591 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1592 for (k = 1; k < rctx->nr_cbufs; k++) {
1593 j++;
1594 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1595 output[j].gpr = shader->output[i].gpr;
1596 output[j].elem_size = 3;
1597 output[j].swizzle_x = 0;
1598 output[j].swizzle_y = 1;
1599 output[j].swizzle_z = 2;
1600 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1601 output[j].burst_count = 1;
1602 output[j].barrier = 1;
1603 output[j].array_base = next_pixel_base++;
1604 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1605 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1606 shader->nr_ps_color_exports++;
1607 }
1608 }
1609 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1610 output[j].array_base = 61;
1611 output[j].swizzle_x = 2;
1612 output[j].swizzle_y = 7;
1613 output[j].swizzle_z = output[j].swizzle_w = 7;
1614 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1615 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1616 output[j].array_base = 61;
1617 output[j].swizzle_x = 7;
1618 output[j].swizzle_y = 1;
1619 output[j].swizzle_z = output[j].swizzle_w = 7;
1620 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1621 } else {
1622 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1623 r = -EINVAL;
1624 goto out_err;
1625 }
1626 break;
1627 default:
1628 R600_ERR("unsupported processor type %d\n", ctx.type);
1629 r = -EINVAL;
1630 goto out_err;
1631 }
1632
1633 if (output[j].type==-1) {
1634 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1635 output[j].array_base = next_param_base++;
1636 }
1637 }
1638
1639 /* add fake param output for vertex shader if no param is exported */
1640 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1641 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1642 output[j].gpr = 0;
1643 output[j].elem_size = 3;
1644 output[j].swizzle_x = 7;
1645 output[j].swizzle_y = 7;
1646 output[j].swizzle_z = 7;
1647 output[j].swizzle_w = 7;
1648 output[j].burst_count = 1;
1649 output[j].barrier = 1;
1650 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1651 output[j].array_base = 0;
1652 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1653 j++;
1654 }
1655
1656 /* add fake pixel export */
1657 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1658 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1659 output[j].gpr = 0;
1660 output[j].elem_size = 3;
1661 output[j].swizzle_x = 7;
1662 output[j].swizzle_y = 7;
1663 output[j].swizzle_z = 7;
1664 output[j].swizzle_w = 7;
1665 output[j].burst_count = 1;
1666 output[j].barrier = 1;
1667 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1668 output[j].array_base = 0;
1669 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1670 j++;
1671 }
1672
1673 noutput = j;
1674
1675 /* set export done on last export of each type */
1676 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1677 if (ctx.bc->chip_class < CAYMAN) {
1678 if (i == (noutput - 1)) {
1679 output[i].end_of_program = 1;
1680 }
1681 }
1682 if (!(output_done & (1 << output[i].type))) {
1683 output_done |= (1 << output[i].type);
1684 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1685 }
1686 }
1687 /* add output to bytecode */
1688 for (i = 0; i < noutput; i++) {
1689 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1690 if (r)
1691 goto out_err;
1692 }
1693 /* add program end */
1694 if (ctx.bc->chip_class == CAYMAN)
1695 cm_bytecode_add_cf_end(ctx.bc);
1696
1697 /* check GPR limit - we have 124 = 128 - 4
1698 * (4 are reserved as alu clause temporary registers) */
1699 if (ctx.bc->ngpr > 124) {
1700 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1701 r = -ENOMEM;
1702 goto out_err;
1703 }
1704
1705 free(ctx.literals);
1706 tgsi_parse_free(&ctx.parse);
1707 return 0;
1708 out_err:
1709 free(ctx.literals);
1710 tgsi_parse_free(&ctx.parse);
1711 return r;
1712 }
1713
1714 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1715 {
1716 R600_ERR("%s tgsi opcode unsupported\n",
1717 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1718 return -EINVAL;
1719 }
1720
1721 static int tgsi_end(struct r600_shader_ctx *ctx)
1722 {
1723 return 0;
1724 }
1725
1726 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1727 const struct r600_shader_src *shader_src,
1728 unsigned chan)
1729 {
1730 bc_src->sel = shader_src->sel;
1731 bc_src->chan = shader_src->swizzle[chan];
1732 bc_src->neg = shader_src->neg;
1733 bc_src->abs = shader_src->abs;
1734 bc_src->rel = shader_src->rel;
1735 bc_src->value = shader_src->value[bc_src->chan];
1736 }
1737
1738 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1739 {
1740 bc_src->abs = 1;
1741 bc_src->neg = 0;
1742 }
1743
1744 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1745 {
1746 bc_src->neg = !bc_src->neg;
1747 }
1748
1749 static void tgsi_dst(struct r600_shader_ctx *ctx,
1750 const struct tgsi_full_dst_register *tgsi_dst,
1751 unsigned swizzle,
1752 struct r600_bytecode_alu_dst *r600_dst)
1753 {
1754 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1755
1756 r600_dst->sel = tgsi_dst->Register.Index;
1757 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1758 r600_dst->chan = swizzle;
1759 r600_dst->write = 1;
1760 if (tgsi_dst->Register.Indirect)
1761 r600_dst->rel = V_SQ_REL_RELATIVE;
1762 if (inst->Instruction.Saturate) {
1763 r600_dst->clamp = 1;
1764 }
1765 }
1766
1767 static int tgsi_last_instruction(unsigned writemask)
1768 {
1769 int i, lasti = 0;
1770
1771 for (i = 0; i < 4; i++) {
1772 if (writemask & (1 << i)) {
1773 lasti = i;
1774 }
1775 }
1776 return lasti;
1777 }
1778
1779 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1780 {
1781 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1782 struct r600_bytecode_alu alu;
1783 int i, j, r;
1784 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1785
1786 for (i = 0; i < lasti + 1; i++) {
1787 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1788 continue;
1789
1790 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1791 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1792
1793 alu.inst = ctx->inst_info->r600_opcode;
1794 if (!swap) {
1795 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1796 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1797 }
1798 } else {
1799 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1800 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1801 }
1802 /* handle some special cases */
1803 switch (ctx->inst_info->tgsi_opcode) {
1804 case TGSI_OPCODE_SUB:
1805 r600_bytecode_src_toggle_neg(&alu.src[1]);
1806 break;
1807 case TGSI_OPCODE_ABS:
1808 r600_bytecode_src_set_abs(&alu.src[0]);
1809 break;
1810 default:
1811 break;
1812 }
1813 if (i == lasti || trans_only) {
1814 alu.last = 1;
1815 }
1816 r = r600_bytecode_add_alu(ctx->bc, &alu);
1817 if (r)
1818 return r;
1819 }
1820 return 0;
1821 }
1822
1823 static int tgsi_op2(struct r600_shader_ctx *ctx)
1824 {
1825 return tgsi_op2_s(ctx, 0, 0);
1826 }
1827
1828 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1829 {
1830 return tgsi_op2_s(ctx, 1, 0);
1831 }
1832
1833 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1834 {
1835 return tgsi_op2_s(ctx, 0, 1);
1836 }
1837
1838 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1839 {
1840 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1841 struct r600_bytecode_alu alu;
1842 int i, r;
1843 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1844
1845 for (i = 0; i < lasti + 1; i++) {
1846
1847 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1848 continue;
1849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1850 alu.inst = ctx->inst_info->r600_opcode;
1851
1852 alu.src[0].sel = V_SQ_ALU_SRC_0;
1853
1854 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1855
1856 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1857
1858 if (i == lasti) {
1859 alu.last = 1;
1860 }
1861 r = r600_bytecode_add_alu(ctx->bc, &alu);
1862 if (r)
1863 return r;
1864 }
1865 return 0;
1866
1867 }
1868
1869 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1870 {
1871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1872 int i, j, r;
1873 struct r600_bytecode_alu alu;
1874 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1875
1876 for (i = 0 ; i < last_slot; i++) {
1877 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1878 alu.inst = ctx->inst_info->r600_opcode;
1879 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1880 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1881 }
1882 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1883 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1884
1885 if (i == last_slot - 1)
1886 alu.last = 1;
1887 r = r600_bytecode_add_alu(ctx->bc, &alu);
1888 if (r)
1889 return r;
1890 }
1891 return 0;
1892 }
1893
1894 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1895 {
1896 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1897 int i, j, k, r;
1898 struct r600_bytecode_alu alu;
1899 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1900 for (k = 0; k < last_slot; k++) {
1901 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1902 continue;
1903
1904 for (i = 0 ; i < 4; i++) {
1905 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1906 alu.inst = ctx->inst_info->r600_opcode;
1907 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1908 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1909 }
1910 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1911 alu.dst.write = (i == k);
1912 if (i == 3)
1913 alu.last = 1;
1914 r = r600_bytecode_add_alu(ctx->bc, &alu);
1915 if (r)
1916 return r;
1917 }
1918 }
1919 return 0;
1920 }
1921
1922 /*
1923 * r600 - trunc to -PI..PI range
1924 * r700 - normalize by dividing by 2PI
1925 * see fdo bug 27901
1926 */
1927 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1928 {
1929 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1930 static float double_pi = 3.1415926535 * 2;
1931 static float neg_pi = -3.1415926535;
1932
1933 int r;
1934 struct r600_bytecode_alu alu;
1935
1936 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1938 alu.is_op3 = 1;
1939
1940 alu.dst.chan = 0;
1941 alu.dst.sel = ctx->temp_reg;
1942 alu.dst.write = 1;
1943
1944 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1945
1946 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1947 alu.src[1].chan = 0;
1948 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1949 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1950 alu.src[2].chan = 0;
1951 alu.last = 1;
1952 r = r600_bytecode_add_alu(ctx->bc, &alu);
1953 if (r)
1954 return r;
1955
1956 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1957 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1958
1959 alu.dst.chan = 0;
1960 alu.dst.sel = ctx->temp_reg;
1961 alu.dst.write = 1;
1962
1963 alu.src[0].sel = ctx->temp_reg;
1964 alu.src[0].chan = 0;
1965 alu.last = 1;
1966 r = r600_bytecode_add_alu(ctx->bc, &alu);
1967 if (r)
1968 return r;
1969
1970 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1972 alu.is_op3 = 1;
1973
1974 alu.dst.chan = 0;
1975 alu.dst.sel = ctx->temp_reg;
1976 alu.dst.write = 1;
1977
1978 alu.src[0].sel = ctx->temp_reg;
1979 alu.src[0].chan = 0;
1980
1981 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1982 alu.src[1].chan = 0;
1983 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1984 alu.src[2].chan = 0;
1985
1986 if (ctx->bc->chip_class == R600) {
1987 alu.src[1].value = *(uint32_t *)&double_pi;
1988 alu.src[2].value = *(uint32_t *)&neg_pi;
1989 } else {
1990 alu.src[1].sel = V_SQ_ALU_SRC_1;
1991 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1992 alu.src[2].neg = 1;
1993 }
1994
1995 alu.last = 1;
1996 r = r600_bytecode_add_alu(ctx->bc, &alu);
1997 if (r)
1998 return r;
1999 return 0;
2000 }
2001
2002 static int cayman_trig(struct r600_shader_ctx *ctx)
2003 {
2004 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2005 struct r600_bytecode_alu alu;
2006 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2007 int i, r;
2008
2009 r = tgsi_setup_trig(ctx);
2010 if (r)
2011 return r;
2012
2013
2014 for (i = 0; i < last_slot; i++) {
2015 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2016 alu.inst = ctx->inst_info->r600_opcode;
2017 alu.dst.chan = i;
2018
2019 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2020 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2021
2022 alu.src[0].sel = ctx->temp_reg;
2023 alu.src[0].chan = 0;
2024 if (i == last_slot - 1)
2025 alu.last = 1;
2026 r = r600_bytecode_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029 }
2030 return 0;
2031 }
2032
2033 static int tgsi_trig(struct r600_shader_ctx *ctx)
2034 {
2035 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2036 struct r600_bytecode_alu alu;
2037 int i, r;
2038 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2039
2040 r = tgsi_setup_trig(ctx);
2041 if (r)
2042 return r;
2043
2044 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2045 alu.inst = ctx->inst_info->r600_opcode;
2046 alu.dst.chan = 0;
2047 alu.dst.sel = ctx->temp_reg;
2048 alu.dst.write = 1;
2049
2050 alu.src[0].sel = ctx->temp_reg;
2051 alu.src[0].chan = 0;
2052 alu.last = 1;
2053 r = r600_bytecode_add_alu(ctx->bc, &alu);
2054 if (r)
2055 return r;
2056
2057 /* replicate result */
2058 for (i = 0; i < lasti + 1; i++) {
2059 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2060 continue;
2061
2062 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2063 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2064
2065 alu.src[0].sel = ctx->temp_reg;
2066 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2067 if (i == lasti)
2068 alu.last = 1;
2069 r = r600_bytecode_add_alu(ctx->bc, &alu);
2070 if (r)
2071 return r;
2072 }
2073 return 0;
2074 }
2075
2076 static int tgsi_scs(struct r600_shader_ctx *ctx)
2077 {
2078 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2079 struct r600_bytecode_alu alu;
2080 int i, r;
2081
2082 /* We'll only need the trig stuff if we are going to write to the
2083 * X or Y components of the destination vector.
2084 */
2085 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2086 r = tgsi_setup_trig(ctx);
2087 if (r)
2088 return r;
2089 }
2090
2091 /* dst.x = COS */
2092 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2093 if (ctx->bc->chip_class == CAYMAN) {
2094 for (i = 0 ; i < 3; i++) {
2095 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2096 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2097 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2098
2099 if (i == 0)
2100 alu.dst.write = 1;
2101 else
2102 alu.dst.write = 0;
2103 alu.src[0].sel = ctx->temp_reg;
2104 alu.src[0].chan = 0;
2105 if (i == 2)
2106 alu.last = 1;
2107 r = r600_bytecode_add_alu(ctx->bc, &alu);
2108 if (r)
2109 return r;
2110 }
2111 } else {
2112 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2113 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2114 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2115
2116 alu.src[0].sel = ctx->temp_reg;
2117 alu.src[0].chan = 0;
2118 alu.last = 1;
2119 r = r600_bytecode_add_alu(ctx->bc, &alu);
2120 if (r)
2121 return r;
2122 }
2123 }
2124
2125 /* dst.y = SIN */
2126 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2127 if (ctx->bc->chip_class == CAYMAN) {
2128 for (i = 0 ; i < 3; i++) {
2129 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2131 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2132 if (i == 1)
2133 alu.dst.write = 1;
2134 else
2135 alu.dst.write = 0;
2136 alu.src[0].sel = ctx->temp_reg;
2137 alu.src[0].chan = 0;
2138 if (i == 2)
2139 alu.last = 1;
2140 r = r600_bytecode_add_alu(ctx->bc, &alu);
2141 if (r)
2142 return r;
2143 }
2144 } else {
2145 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2146 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2147 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2148
2149 alu.src[0].sel = ctx->temp_reg;
2150 alu.src[0].chan = 0;
2151 alu.last = 1;
2152 r = r600_bytecode_add_alu(ctx->bc, &alu);
2153 if (r)
2154 return r;
2155 }
2156 }
2157
2158 /* dst.z = 0.0; */
2159 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2160 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2161
2162 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2163
2164 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2165
2166 alu.src[0].sel = V_SQ_ALU_SRC_0;
2167 alu.src[0].chan = 0;
2168
2169 alu.last = 1;
2170
2171 r = r600_bytecode_add_alu(ctx->bc, &alu);
2172 if (r)
2173 return r;
2174 }
2175
2176 /* dst.w = 1.0; */
2177 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2178 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2179
2180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2181
2182 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2183
2184 alu.src[0].sel = V_SQ_ALU_SRC_1;
2185 alu.src[0].chan = 0;
2186
2187 alu.last = 1;
2188
2189 r = r600_bytecode_add_alu(ctx->bc, &alu);
2190 if (r)
2191 return r;
2192 }
2193
2194 return 0;
2195 }
2196
2197 static int tgsi_kill(struct r600_shader_ctx *ctx)
2198 {
2199 struct r600_bytecode_alu alu;
2200 int i, r;
2201
2202 for (i = 0; i < 4; i++) {
2203 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2204 alu.inst = ctx->inst_info->r600_opcode;
2205
2206 alu.dst.chan = i;
2207
2208 alu.src[0].sel = V_SQ_ALU_SRC_0;
2209
2210 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2211 alu.src[1].sel = V_SQ_ALU_SRC_1;
2212 alu.src[1].neg = 1;
2213 } else {
2214 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2215 }
2216 if (i == 3) {
2217 alu.last = 1;
2218 }
2219 r = r600_bytecode_add_alu(ctx->bc, &alu);
2220 if (r)
2221 return r;
2222 }
2223
2224 /* kill must be last in ALU */
2225 ctx->bc->force_add_cf = 1;
2226 ctx->shader->uses_kill = TRUE;
2227 return 0;
2228 }
2229
2230 static int tgsi_lit(struct r600_shader_ctx *ctx)
2231 {
2232 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2233 struct r600_bytecode_alu alu;
2234 int r;
2235
2236 /* tmp.x = max(src.y, 0.0) */
2237 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2238 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2239 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2240 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2241 alu.src[1].chan = 1;
2242
2243 alu.dst.sel = ctx->temp_reg;
2244 alu.dst.chan = 0;
2245 alu.dst.write = 1;
2246
2247 alu.last = 1;
2248 r = r600_bytecode_add_alu(ctx->bc, &alu);
2249 if (r)
2250 return r;
2251
2252 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2253 {
2254 int chan;
2255 int sel;
2256 int i;
2257
2258 if (ctx->bc->chip_class == CAYMAN) {
2259 for (i = 0; i < 3; i++) {
2260 /* tmp.z = log(tmp.x) */
2261 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2262 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2263 alu.src[0].sel = ctx->temp_reg;
2264 alu.src[0].chan = 0;
2265 alu.dst.sel = ctx->temp_reg;
2266 alu.dst.chan = i;
2267 if (i == 2) {
2268 alu.dst.write = 1;
2269 alu.last = 1;
2270 } else
2271 alu.dst.write = 0;
2272
2273 r = r600_bytecode_add_alu(ctx->bc, &alu);
2274 if (r)
2275 return r;
2276 }
2277 } else {
2278 /* tmp.z = log(tmp.x) */
2279 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2280 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2281 alu.src[0].sel = ctx->temp_reg;
2282 alu.src[0].chan = 0;
2283 alu.dst.sel = ctx->temp_reg;
2284 alu.dst.chan = 2;
2285 alu.dst.write = 1;
2286 alu.last = 1;
2287 r = r600_bytecode_add_alu(ctx->bc, &alu);
2288 if (r)
2289 return r;
2290 }
2291
2292 chan = alu.dst.chan;
2293 sel = alu.dst.sel;
2294
2295 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2296 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2297 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2298 alu.src[0].sel = sel;
2299 alu.src[0].chan = chan;
2300 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2301 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2302 alu.dst.sel = ctx->temp_reg;
2303 alu.dst.chan = 0;
2304 alu.dst.write = 1;
2305 alu.is_op3 = 1;
2306 alu.last = 1;
2307 r = r600_bytecode_add_alu(ctx->bc, &alu);
2308 if (r)
2309 return r;
2310
2311 if (ctx->bc->chip_class == CAYMAN) {
2312 for (i = 0; i < 3; i++) {
2313 /* dst.z = exp(tmp.x) */
2314 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2315 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2316 alu.src[0].sel = ctx->temp_reg;
2317 alu.src[0].chan = 0;
2318 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2319 if (i == 2) {
2320 alu.dst.write = 1;
2321 alu.last = 1;
2322 } else
2323 alu.dst.write = 0;
2324 r = r600_bytecode_add_alu(ctx->bc, &alu);
2325 if (r)
2326 return r;
2327 }
2328 } else {
2329 /* dst.z = exp(tmp.x) */
2330 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2331 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2332 alu.src[0].sel = ctx->temp_reg;
2333 alu.src[0].chan = 0;
2334 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2335 alu.last = 1;
2336 r = r600_bytecode_add_alu(ctx->bc, &alu);
2337 if (r)
2338 return r;
2339 }
2340 }
2341
2342 /* dst.x, <- 1.0 */
2343 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2345 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2346 alu.src[0].chan = 0;
2347 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2348 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2349 r = r600_bytecode_add_alu(ctx->bc, &alu);
2350 if (r)
2351 return r;
2352
2353 /* dst.y = max(src.x, 0.0) */
2354 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2355 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2356 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2357 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2358 alu.src[1].chan = 0;
2359 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2360 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2361 r = r600_bytecode_add_alu(ctx->bc, &alu);
2362 if (r)
2363 return r;
2364
2365 /* dst.w, <- 1.0 */
2366 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2367 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2368 alu.src[0].sel = V_SQ_ALU_SRC_1;
2369 alu.src[0].chan = 0;
2370 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2371 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2372 alu.last = 1;
2373 r = r600_bytecode_add_alu(ctx->bc, &alu);
2374 if (r)
2375 return r;
2376
2377 return 0;
2378 }
2379
2380 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2381 {
2382 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2383 struct r600_bytecode_alu alu;
2384 int i, r;
2385
2386 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2387
2388 /* XXX:
2389 * For state trackers other than OpenGL, we'll want to use
2390 * _RECIPSQRT_IEEE instead.
2391 */
2392 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2393
2394 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2395 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2396 r600_bytecode_src_set_abs(&alu.src[i]);
2397 }
2398 alu.dst.sel = ctx->temp_reg;
2399 alu.dst.write = 1;
2400 alu.last = 1;
2401 r = r600_bytecode_add_alu(ctx->bc, &alu);
2402 if (r)
2403 return r;
2404 /* replicate result */
2405 return tgsi_helper_tempx_replicate(ctx);
2406 }
2407
2408 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2409 {
2410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2411 struct r600_bytecode_alu alu;
2412 int i, r;
2413
2414 for (i = 0; i < 4; i++) {
2415 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2416 alu.src[0].sel = ctx->temp_reg;
2417 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2418 alu.dst.chan = i;
2419 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2420 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2421 if (i == 3)
2422 alu.last = 1;
2423 r = r600_bytecode_add_alu(ctx->bc, &alu);
2424 if (r)
2425 return r;
2426 }
2427 return 0;
2428 }
2429
2430 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2431 {
2432 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2433 struct r600_bytecode_alu alu;
2434 int i, r;
2435
2436 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2437 alu.inst = ctx->inst_info->r600_opcode;
2438 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2439 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2440 }
2441 alu.dst.sel = ctx->temp_reg;
2442 alu.dst.write = 1;
2443 alu.last = 1;
2444 r = r600_bytecode_add_alu(ctx->bc, &alu);
2445 if (r)
2446 return r;
2447 /* replicate result */
2448 return tgsi_helper_tempx_replicate(ctx);
2449 }
2450
2451 static int cayman_pow(struct r600_shader_ctx *ctx)
2452 {
2453 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2454 int i, r;
2455 struct r600_bytecode_alu alu;
2456 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2457
2458 for (i = 0; i < 3; i++) {
2459 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2460 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2461 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2462 alu.dst.sel = ctx->temp_reg;
2463 alu.dst.chan = i;
2464 alu.dst.write = 1;
2465 if (i == 2)
2466 alu.last = 1;
2467 r = r600_bytecode_add_alu(ctx->bc, &alu);
2468 if (r)
2469 return r;
2470 }
2471
2472 /* b * LOG2(a) */
2473 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2474 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2475 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2476 alu.src[1].sel = ctx->temp_reg;
2477 alu.dst.sel = ctx->temp_reg;
2478 alu.dst.write = 1;
2479 alu.last = 1;
2480 r = r600_bytecode_add_alu(ctx->bc, &alu);
2481 if (r)
2482 return r;
2483
2484 for (i = 0; i < last_slot; i++) {
2485 /* POW(a,b) = EXP2(b * LOG2(a))*/
2486 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2487 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2488 alu.src[0].sel = ctx->temp_reg;
2489
2490 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2491 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2492 if (i == last_slot - 1)
2493 alu.last = 1;
2494 r = r600_bytecode_add_alu(ctx->bc, &alu);
2495 if (r)
2496 return r;
2497 }
2498 return 0;
2499 }
2500
2501 static int tgsi_pow(struct r600_shader_ctx *ctx)
2502 {
2503 struct r600_bytecode_alu alu;
2504 int r;
2505
2506 /* LOG2(a) */
2507 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2509 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2510 alu.dst.sel = ctx->temp_reg;
2511 alu.dst.write = 1;
2512 alu.last = 1;
2513 r = r600_bytecode_add_alu(ctx->bc, &alu);
2514 if (r)
2515 return r;
2516 /* b * LOG2(a) */
2517 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2518 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2519 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2520 alu.src[1].sel = ctx->temp_reg;
2521 alu.dst.sel = ctx->temp_reg;
2522 alu.dst.write = 1;
2523 alu.last = 1;
2524 r = r600_bytecode_add_alu(ctx->bc, &alu);
2525 if (r)
2526 return r;
2527 /* POW(a,b) = EXP2(b * LOG2(a))*/
2528 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2530 alu.src[0].sel = ctx->temp_reg;
2531 alu.dst.sel = ctx->temp_reg;
2532 alu.dst.write = 1;
2533 alu.last = 1;
2534 r = r600_bytecode_add_alu(ctx->bc, &alu);
2535 if (r)
2536 return r;
2537 return tgsi_helper_tempx_replicate(ctx);
2538 }
2539
2540 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2541 {
2542 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2543 struct r600_bytecode_alu alu;
2544 int i, r, j;
2545 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2546 int tmp0 = ctx->temp_reg;
2547 int tmp1 = r600_get_temp(ctx);
2548 int tmp2 = r600_get_temp(ctx);
2549 int tmp3 = r600_get_temp(ctx);
2550 /* Unsigned path:
2551 *
2552 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2553 *
2554 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2555 * 2. tmp0.z = lo (tmp0.x * src2)
2556 * 3. tmp0.w = -tmp0.z
2557 * 4. tmp0.y = hi (tmp0.x * src2)
2558 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2559 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2560 * 7. tmp1.x = tmp0.x - tmp0.w
2561 * 8. tmp1.y = tmp0.x + tmp0.w
2562 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2563 * 10. tmp0.z = hi(tmp0.x * src1) = q
2564 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2565 *
2566 * 12. tmp0.w = src1 - tmp0.y = r
2567 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2568 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2569 *
2570 * if DIV
2571 *
2572 * 15. tmp1.z = tmp0.z + 1 = q + 1
2573 * 16. tmp1.w = tmp0.z - 1 = q - 1
2574 *
2575 * else MOD
2576 *
2577 * 15. tmp1.z = tmp0.w - src2 = r - src2
2578 * 16. tmp1.w = tmp0.w + src2 = r + src2
2579 *
2580 * endif
2581 *
2582 * 17. tmp1.x = tmp1.x & tmp1.y
2583 *
2584 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2585 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2586 *
2587 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2588 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2589 *
2590 * Signed path:
2591 *
2592 * Same as unsigned, using abs values of the operands,
2593 * and fixing the sign of the result in the end.
2594 */
2595
2596 for (i = 0; i < 4; i++) {
2597 if (!(write_mask & (1<<i)))
2598 continue;
2599
2600 if (signed_op) {
2601
2602 /* tmp2.x = -src0 */
2603 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2604 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2605
2606 alu.dst.sel = tmp2;
2607 alu.dst.chan = 0;
2608 alu.dst.write = 1;
2609
2610 alu.src[0].sel = V_SQ_ALU_SRC_0;
2611
2612 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2613
2614 alu.last = 1;
2615 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2616 return r;
2617
2618 /* tmp2.y = -src1 */
2619 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2621
2622 alu.dst.sel = tmp2;
2623 alu.dst.chan = 1;
2624 alu.dst.write = 1;
2625
2626 alu.src[0].sel = V_SQ_ALU_SRC_0;
2627
2628 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2629
2630 alu.last = 1;
2631 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2632 return r;
2633
2634 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2635 /* it will be a sign of the quotient */
2636 if (!mod) {
2637
2638 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2640
2641 alu.dst.sel = tmp2;
2642 alu.dst.chan = 2;
2643 alu.dst.write = 1;
2644
2645 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2646 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2647
2648 alu.last = 1;
2649 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2650 return r;
2651 }
2652
2653 /* tmp2.x = |src0| */
2654 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2655 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2656 alu.is_op3 = 1;
2657
2658 alu.dst.sel = tmp2;
2659 alu.dst.chan = 0;
2660 alu.dst.write = 1;
2661
2662 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2663 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2664 alu.src[2].sel = tmp2;
2665 alu.src[2].chan = 0;
2666
2667 alu.last = 1;
2668 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2669 return r;
2670
2671 /* tmp2.y = |src1| */
2672 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2673 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2674 alu.is_op3 = 1;
2675
2676 alu.dst.sel = tmp2;
2677 alu.dst.chan = 1;
2678 alu.dst.write = 1;
2679
2680 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2681 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2682 alu.src[2].sel = tmp2;
2683 alu.src[2].chan = 1;
2684
2685 alu.last = 1;
2686 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2687 return r;
2688
2689 }
2690
2691 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2692 if (ctx->bc->chip_class == CAYMAN) {
2693 /* tmp3.x = u2f(src2) */
2694 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2696
2697 alu.dst.sel = tmp3;
2698 alu.dst.chan = 0;
2699 alu.dst.write = 1;
2700
2701 if (signed_op) {
2702 alu.src[0].sel = tmp2;
2703 alu.src[0].chan = 1;
2704 } else {
2705 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2706 }
2707
2708 alu.last = 1;
2709 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2710 return r;
2711
2712 /* tmp0.x = recip(tmp3.x) */
2713 for (j = 0 ; j < 3; j++) {
2714 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2715 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2716
2717 alu.dst.sel = tmp0;
2718 alu.dst.chan = j;
2719 alu.dst.write = (j == 0);
2720
2721 alu.src[0].sel = tmp3;
2722 alu.src[0].chan = 0;
2723
2724 if (j == 2)
2725 alu.last = 1;
2726 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2727 return r;
2728 }
2729
2730 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2731 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2732
2733 alu.src[0].sel = tmp0;
2734 alu.src[0].chan = 0;
2735
2736 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2737 alu.src[1].value = 0x4f800000;
2738
2739 alu.dst.sel = tmp3;
2740 alu.dst.write = 1;
2741 alu.last = 1;
2742 r = r600_bytecode_add_alu(ctx->bc, &alu);
2743 if (r)
2744 return r;
2745
2746 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2747 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2748
2749 alu.dst.sel = tmp0;
2750 alu.dst.chan = 0;
2751 alu.dst.write = 1;
2752
2753 alu.src[0].sel = tmp3;
2754 alu.src[0].chan = 0;
2755
2756 alu.last = 1;
2757 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2758 return r;
2759
2760 } else {
2761 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2762 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2763
2764 alu.dst.sel = tmp0;
2765 alu.dst.chan = 0;
2766 alu.dst.write = 1;
2767
2768 if (signed_op) {
2769 alu.src[0].sel = tmp2;
2770 alu.src[0].chan = 1;
2771 } else {
2772 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2773 }
2774
2775 alu.last = 1;
2776 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2777 return r;
2778 }
2779
2780 /* 2. tmp0.z = lo (tmp0.x * src2) */
2781 if (ctx->bc->chip_class == CAYMAN) {
2782 for (j = 0 ; j < 4; j++) {
2783 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2784 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2785
2786 alu.dst.sel = tmp0;
2787 alu.dst.chan = j;
2788 alu.dst.write = (j == 2);
2789
2790 alu.src[0].sel = tmp0;
2791 alu.src[0].chan = 0;
2792 if (signed_op) {
2793 alu.src[1].sel = tmp2;
2794 alu.src[1].chan = 1;
2795 } else {
2796 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2797 }
2798
2799 alu.last = (j == 3);
2800 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2801 return r;
2802 }
2803 } else {
2804 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2805 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2806
2807 alu.dst.sel = tmp0;
2808 alu.dst.chan = 2;
2809 alu.dst.write = 1;
2810
2811 alu.src[0].sel = tmp0;
2812 alu.src[0].chan = 0;
2813 if (signed_op) {
2814 alu.src[1].sel = tmp2;
2815 alu.src[1].chan = 1;
2816 } else {
2817 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2818 }
2819
2820 alu.last = 1;
2821 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2822 return r;
2823 }
2824
2825 /* 3. tmp0.w = -tmp0.z */
2826 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2827 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2828
2829 alu.dst.sel = tmp0;
2830 alu.dst.chan = 3;
2831 alu.dst.write = 1;
2832
2833 alu.src[0].sel = V_SQ_ALU_SRC_0;
2834 alu.src[1].sel = tmp0;
2835 alu.src[1].chan = 2;
2836
2837 alu.last = 1;
2838 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2839 return r;
2840
2841 /* 4. tmp0.y = hi (tmp0.x * src2) */
2842 if (ctx->bc->chip_class == CAYMAN) {
2843 for (j = 0 ; j < 4; j++) {
2844 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2845 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2846
2847 alu.dst.sel = tmp0;
2848 alu.dst.chan = j;
2849 alu.dst.write = (j == 1);
2850
2851 alu.src[0].sel = tmp0;
2852 alu.src[0].chan = 0;
2853
2854 if (signed_op) {
2855 alu.src[1].sel = tmp2;
2856 alu.src[1].chan = 1;
2857 } else {
2858 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2859 }
2860 alu.last = (j == 3);
2861 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2862 return r;
2863 }
2864 } else {
2865 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2866 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2867
2868 alu.dst.sel = tmp0;
2869 alu.dst.chan = 1;
2870 alu.dst.write = 1;
2871
2872 alu.src[0].sel = tmp0;
2873 alu.src[0].chan = 0;
2874
2875 if (signed_op) {
2876 alu.src[1].sel = tmp2;
2877 alu.src[1].chan = 1;
2878 } else {
2879 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2880 }
2881
2882 alu.last = 1;
2883 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2884 return r;
2885 }
2886
2887 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2888 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2889 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2890 alu.is_op3 = 1;
2891
2892 alu.dst.sel = tmp0;
2893 alu.dst.chan = 2;
2894 alu.dst.write = 1;
2895
2896 alu.src[0].sel = tmp0;
2897 alu.src[0].chan = 1;
2898 alu.src[1].sel = tmp0;
2899 alu.src[1].chan = 3;
2900 alu.src[2].sel = tmp0;
2901 alu.src[2].chan = 2;
2902
2903 alu.last = 1;
2904 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2905 return r;
2906
2907 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2908 if (ctx->bc->chip_class == CAYMAN) {
2909 for (j = 0 ; j < 4; j++) {
2910 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2911 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2912
2913 alu.dst.sel = tmp0;
2914 alu.dst.chan = j;
2915 alu.dst.write = (j == 3);
2916
2917 alu.src[0].sel = tmp0;
2918 alu.src[0].chan = 2;
2919
2920 alu.src[1].sel = tmp0;
2921 alu.src[1].chan = 0;
2922
2923 alu.last = (j == 3);
2924 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2925 return r;
2926 }
2927 } else {
2928 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2930
2931 alu.dst.sel = tmp0;
2932 alu.dst.chan = 3;
2933 alu.dst.write = 1;
2934
2935 alu.src[0].sel = tmp0;
2936 alu.src[0].chan = 2;
2937
2938 alu.src[1].sel = tmp0;
2939 alu.src[1].chan = 0;
2940
2941 alu.last = 1;
2942 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2943 return r;
2944 }
2945
2946 /* 7. tmp1.x = tmp0.x - tmp0.w */
2947 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2948 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2949
2950 alu.dst.sel = tmp1;
2951 alu.dst.chan = 0;
2952 alu.dst.write = 1;
2953
2954 alu.src[0].sel = tmp0;
2955 alu.src[0].chan = 0;
2956 alu.src[1].sel = tmp0;
2957 alu.src[1].chan = 3;
2958
2959 alu.last = 1;
2960 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2961 return r;
2962
2963 /* 8. tmp1.y = tmp0.x + tmp0.w */
2964 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2965 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2966
2967 alu.dst.sel = tmp1;
2968 alu.dst.chan = 1;
2969 alu.dst.write = 1;
2970
2971 alu.src[0].sel = tmp0;
2972 alu.src[0].chan = 0;
2973 alu.src[1].sel = tmp0;
2974 alu.src[1].chan = 3;
2975
2976 alu.last = 1;
2977 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2978 return r;
2979
2980 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2981 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2982 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2983 alu.is_op3 = 1;
2984
2985 alu.dst.sel = tmp0;
2986 alu.dst.chan = 0;
2987 alu.dst.write = 1;
2988
2989 alu.src[0].sel = tmp0;
2990 alu.src[0].chan = 1;
2991 alu.src[1].sel = tmp1;
2992 alu.src[1].chan = 1;
2993 alu.src[2].sel = tmp1;
2994 alu.src[2].chan = 0;
2995
2996 alu.last = 1;
2997 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2998 return r;
2999
3000 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3001 if (ctx->bc->chip_class == CAYMAN) {
3002 for (j = 0 ; j < 4; j++) {
3003 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3004 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3005
3006 alu.dst.sel = tmp0;
3007 alu.dst.chan = j;
3008 alu.dst.write = (j == 2);
3009
3010 alu.src[0].sel = tmp0;
3011 alu.src[0].chan = 0;
3012
3013 if (signed_op) {
3014 alu.src[1].sel = tmp2;
3015 alu.src[1].chan = 0;
3016 } else {
3017 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3018 }
3019
3020 alu.last = (j == 3);
3021 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3022 return r;
3023 }
3024 } else {
3025 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3026 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3027
3028 alu.dst.sel = tmp0;
3029 alu.dst.chan = 2;
3030 alu.dst.write = 1;
3031
3032 alu.src[0].sel = tmp0;
3033 alu.src[0].chan = 0;
3034
3035 if (signed_op) {
3036 alu.src[1].sel = tmp2;
3037 alu.src[1].chan = 0;
3038 } else {
3039 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3040 }
3041
3042 alu.last = 1;
3043 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3044 return r;
3045 }
3046
3047 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3048 if (ctx->bc->chip_class == CAYMAN) {
3049 for (j = 0 ; j < 4; j++) {
3050 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3051 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3052
3053 alu.dst.sel = tmp0;
3054 alu.dst.chan = j;
3055 alu.dst.write = (j == 1);
3056
3057 if (signed_op) {
3058 alu.src[0].sel = tmp2;
3059 alu.src[0].chan = 1;
3060 } else {
3061 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3062 }
3063
3064 alu.src[1].sel = tmp0;
3065 alu.src[1].chan = 2;
3066
3067 alu.last = (j == 3);
3068 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3069 return r;
3070 }
3071 } else {
3072 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3073 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3074
3075 alu.dst.sel = tmp0;
3076 alu.dst.chan = 1;
3077 alu.dst.write = 1;
3078
3079 if (signed_op) {
3080 alu.src[0].sel = tmp2;
3081 alu.src[0].chan = 1;
3082 } else {
3083 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3084 }
3085
3086 alu.src[1].sel = tmp0;
3087 alu.src[1].chan = 2;
3088
3089 alu.last = 1;
3090 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3091 return r;
3092 }
3093
3094 /* 12. tmp0.w = src1 - tmp0.y = r */
3095 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3096 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3097
3098 alu.dst.sel = tmp0;
3099 alu.dst.chan = 3;
3100 alu.dst.write = 1;
3101
3102 if (signed_op) {
3103 alu.src[0].sel = tmp2;
3104 alu.src[0].chan = 0;
3105 } else {
3106 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3107 }
3108
3109 alu.src[1].sel = tmp0;
3110 alu.src[1].chan = 1;
3111
3112 alu.last = 1;
3113 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3114 return r;
3115
3116 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3117 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3118 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3119
3120 alu.dst.sel = tmp1;
3121 alu.dst.chan = 0;
3122 alu.dst.write = 1;
3123
3124 alu.src[0].sel = tmp0;
3125 alu.src[0].chan = 3;
3126 if (signed_op) {
3127 alu.src[1].sel = tmp2;
3128 alu.src[1].chan = 1;
3129 } else {
3130 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3131 }
3132
3133 alu.last = 1;
3134 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3135 return r;
3136
3137 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3138 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3139 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3140
3141 alu.dst.sel = tmp1;
3142 alu.dst.chan = 1;
3143 alu.dst.write = 1;
3144
3145 if (signed_op) {
3146 alu.src[0].sel = tmp2;
3147 alu.src[0].chan = 0;
3148 } else {
3149 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3150 }
3151
3152 alu.src[1].sel = tmp0;
3153 alu.src[1].chan = 1;
3154
3155 alu.last = 1;
3156 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3157 return r;
3158
3159 if (mod) { /* UMOD */
3160
3161 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3162 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3163 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3164
3165 alu.dst.sel = tmp1;
3166 alu.dst.chan = 2;
3167 alu.dst.write = 1;
3168
3169 alu.src[0].sel = tmp0;
3170 alu.src[0].chan = 3;
3171
3172 if (signed_op) {
3173 alu.src[1].sel = tmp2;
3174 alu.src[1].chan = 1;
3175 } else {
3176 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3177 }
3178
3179 alu.last = 1;
3180 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3181 return r;
3182
3183 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3184 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3186
3187 alu.dst.sel = tmp1;
3188 alu.dst.chan = 3;
3189 alu.dst.write = 1;
3190
3191 alu.src[0].sel = tmp0;
3192 alu.src[0].chan = 3;
3193 if (signed_op) {
3194 alu.src[1].sel = tmp2;
3195 alu.src[1].chan = 1;
3196 } else {
3197 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3198 }
3199
3200 alu.last = 1;
3201 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3202 return r;
3203
3204 } else { /* UDIV */
3205
3206 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3207 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3208 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3209
3210 alu.dst.sel = tmp1;
3211 alu.dst.chan = 2;
3212 alu.dst.write = 1;
3213
3214 alu.src[0].sel = tmp0;
3215 alu.src[0].chan = 2;
3216 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3217
3218 alu.last = 1;
3219 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3220 return r;
3221
3222 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3223 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3224 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3225
3226 alu.dst.sel = tmp1;
3227 alu.dst.chan = 3;
3228 alu.dst.write = 1;
3229
3230 alu.src[0].sel = tmp0;
3231 alu.src[0].chan = 2;
3232 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3233
3234 alu.last = 1;
3235 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3236 return r;
3237
3238 }
3239
3240 /* 17. tmp1.x = tmp1.x & tmp1.y */
3241 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3243
3244 alu.dst.sel = tmp1;
3245 alu.dst.chan = 0;
3246 alu.dst.write = 1;
3247
3248 alu.src[0].sel = tmp1;
3249 alu.src[0].chan = 0;
3250 alu.src[1].sel = tmp1;
3251 alu.src[1].chan = 1;
3252
3253 alu.last = 1;
3254 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3255 return r;
3256
3257 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3258 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3259 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3260 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3261 alu.is_op3 = 1;
3262
3263 alu.dst.sel = tmp0;
3264 alu.dst.chan = 2;
3265 alu.dst.write = 1;
3266
3267 alu.src[0].sel = tmp1;
3268 alu.src[0].chan = 0;
3269 alu.src[1].sel = tmp0;
3270 alu.src[1].chan = mod ? 3 : 2;
3271 alu.src[2].sel = tmp1;
3272 alu.src[2].chan = 2;
3273
3274 alu.last = 1;
3275 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3276 return r;
3277
3278 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3279 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3280 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3281 alu.is_op3 = 1;
3282
3283 if (signed_op) {
3284 alu.dst.sel = tmp0;
3285 alu.dst.chan = 2;
3286 alu.dst.write = 1;
3287 } else {
3288 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3289 }
3290
3291 alu.src[0].sel = tmp1;
3292 alu.src[0].chan = 1;
3293 alu.src[1].sel = tmp1;
3294 alu.src[1].chan = 3;
3295 alu.src[2].sel = tmp0;
3296 alu.src[2].chan = 2;
3297
3298 alu.last = 1;
3299 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3300 return r;
3301
3302 if (signed_op) {
3303
3304 /* fix the sign of the result */
3305
3306 if (mod) {
3307
3308 /* tmp0.x = -tmp0.z */
3309 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3311
3312 alu.dst.sel = tmp0;
3313 alu.dst.chan = 0;
3314 alu.dst.write = 1;
3315
3316 alu.src[0].sel = V_SQ_ALU_SRC_0;
3317 alu.src[1].sel = tmp0;
3318 alu.src[1].chan = 2;
3319
3320 alu.last = 1;
3321 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3322 return r;
3323
3324 /* sign of the remainder is the same as the sign of src0 */
3325 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3326 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3328 alu.is_op3 = 1;
3329
3330 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3331
3332 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3333 alu.src[1].sel = tmp0;
3334 alu.src[1].chan = 2;
3335 alu.src[2].sel = tmp0;
3336 alu.src[2].chan = 0;
3337
3338 alu.last = 1;
3339 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3340 return r;
3341
3342 } else {
3343
3344 /* tmp0.x = -tmp0.z */
3345 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3347
3348 alu.dst.sel = tmp0;
3349 alu.dst.chan = 0;
3350 alu.dst.write = 1;
3351
3352 alu.src[0].sel = V_SQ_ALU_SRC_0;
3353 alu.src[1].sel = tmp0;
3354 alu.src[1].chan = 2;
3355
3356 alu.last = 1;
3357 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3358 return r;
3359
3360 /* fix the quotient sign (same as the sign of src0*src1) */
3361 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3362 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3363 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3364 alu.is_op3 = 1;
3365
3366 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3367
3368 alu.src[0].sel = tmp2;
3369 alu.src[0].chan = 2;
3370 alu.src[1].sel = tmp0;
3371 alu.src[1].chan = 2;
3372 alu.src[2].sel = tmp0;
3373 alu.src[2].chan = 0;
3374
3375 alu.last = 1;
3376 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3377 return r;
3378 }
3379 }
3380 }
3381 return 0;
3382 }
3383
3384 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3385 {
3386 return tgsi_divmod(ctx, 0, 0);
3387 }
3388
3389 static int tgsi_umod(struct r600_shader_ctx *ctx)
3390 {
3391 return tgsi_divmod(ctx, 1, 0);
3392 }
3393
3394 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3395 {
3396 return tgsi_divmod(ctx, 0, 1);
3397 }
3398
3399 static int tgsi_imod(struct r600_shader_ctx *ctx)
3400 {
3401 return tgsi_divmod(ctx, 1, 1);
3402 }
3403
3404
3405 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3406 {
3407 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3408 struct r600_bytecode_alu alu;
3409 int i, r;
3410 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3411 int last_inst = tgsi_last_instruction(write_mask);
3412
3413 for (i = 0; i < 4; i++) {
3414 if (!(write_mask & (1<<i)))
3415 continue;
3416
3417 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3419
3420 alu.dst.sel = ctx->temp_reg;
3421 alu.dst.chan = i;
3422 alu.dst.write = 1;
3423
3424 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3425 if (i == last_inst)
3426 alu.last = 1;
3427 r = r600_bytecode_add_alu(ctx->bc, &alu);
3428 if (r)
3429 return r;
3430 }
3431
3432 for (i = 0; i < 4; i++) {
3433 if (!(write_mask & (1<<i)))
3434 continue;
3435
3436 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3437 alu.inst = ctx->inst_info->r600_opcode;
3438
3439 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3440
3441 alu.src[0].sel = ctx->temp_reg;
3442 alu.src[0].chan = i;
3443
3444 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3445 alu.last = 1;
3446 r = r600_bytecode_add_alu(ctx->bc, &alu);
3447 if (r)
3448 return r;
3449 }
3450
3451 return 0;
3452 }
3453
3454 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3455 {
3456 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3457 struct r600_bytecode_alu alu;
3458 int i, r;
3459 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3460 int last_inst = tgsi_last_instruction(write_mask);
3461
3462 /* tmp = -src */
3463 for (i = 0; i < 4; i++) {
3464 if (!(write_mask & (1<<i)))
3465 continue;
3466
3467 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3468 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3469
3470 alu.dst.sel = ctx->temp_reg;
3471 alu.dst.chan = i;
3472 alu.dst.write = 1;
3473
3474 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3475 alu.src[0].sel = V_SQ_ALU_SRC_0;
3476
3477 if (i == last_inst)
3478 alu.last = 1;
3479 r = r600_bytecode_add_alu(ctx->bc, &alu);
3480 if (r)
3481 return r;
3482 }
3483
3484 /* dst = (src >= 0 ? src : tmp) */
3485 for (i = 0; i < 4; i++) {
3486 if (!(write_mask & (1<<i)))
3487 continue;
3488
3489 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3491 alu.is_op3 = 1;
3492 alu.dst.write = 1;
3493
3494 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3495
3496 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3497 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3498 alu.src[2].sel = ctx->temp_reg;
3499 alu.src[2].chan = i;
3500
3501 if (i == last_inst)
3502 alu.last = 1;
3503 r = r600_bytecode_add_alu(ctx->bc, &alu);
3504 if (r)
3505 return r;
3506 }
3507 return 0;
3508 }
3509
3510 static int tgsi_issg(struct r600_shader_ctx *ctx)
3511 {
3512 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3513 struct r600_bytecode_alu alu;
3514 int i, r;
3515 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3516 int last_inst = tgsi_last_instruction(write_mask);
3517
3518 /* tmp = (src >= 0 ? src : -1) */
3519 for (i = 0; i < 4; i++) {
3520 if (!(write_mask & (1<<i)))
3521 continue;
3522
3523 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3524 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3525 alu.is_op3 = 1;
3526
3527 alu.dst.sel = ctx->temp_reg;
3528 alu.dst.chan = i;
3529 alu.dst.write = 1;
3530
3531 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3532 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3533 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3534
3535 if (i == last_inst)
3536 alu.last = 1;
3537 r = r600_bytecode_add_alu(ctx->bc, &alu);
3538 if (r)
3539 return r;
3540 }
3541
3542 /* dst = (tmp > 0 ? 1 : tmp) */
3543 for (i = 0; i < 4; i++) {
3544 if (!(write_mask & (1<<i)))
3545 continue;
3546
3547 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3549 alu.is_op3 = 1;
3550 alu.dst.write = 1;
3551
3552 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3553
3554 alu.src[0].sel = ctx->temp_reg;
3555 alu.src[0].chan = i;
3556
3557 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3558
3559 alu.src[2].sel = ctx->temp_reg;
3560 alu.src[2].chan = i;
3561
3562 if (i == last_inst)
3563 alu.last = 1;
3564 r = r600_bytecode_add_alu(ctx->bc, &alu);
3565 if (r)
3566 return r;
3567 }
3568 return 0;
3569 }
3570
3571
3572
3573 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3574 {
3575 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3576 struct r600_bytecode_alu alu;
3577 int i, r;
3578
3579 /* tmp = (src > 0 ? 1 : src) */
3580 for (i = 0; i < 4; i++) {
3581 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3583 alu.is_op3 = 1;
3584
3585 alu.dst.sel = ctx->temp_reg;
3586 alu.dst.chan = i;
3587
3588 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3589 alu.src[1].sel = V_SQ_ALU_SRC_1;
3590 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3591
3592 if (i == 3)
3593 alu.last = 1;
3594 r = r600_bytecode_add_alu(ctx->bc, &alu);
3595 if (r)
3596 return r;
3597 }
3598
3599 /* dst = (-tmp > 0 ? -1 : tmp) */
3600 for (i = 0; i < 4; i++) {
3601 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3602 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3603 alu.is_op3 = 1;
3604 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3605
3606 alu.src[0].sel = ctx->temp_reg;
3607 alu.src[0].chan = i;
3608 alu.src[0].neg = 1;
3609
3610 alu.src[1].sel = V_SQ_ALU_SRC_1;
3611 alu.src[1].neg = 1;
3612
3613 alu.src[2].sel = ctx->temp_reg;
3614 alu.src[2].chan = i;
3615
3616 if (i == 3)
3617 alu.last = 1;
3618 r = r600_bytecode_add_alu(ctx->bc, &alu);
3619 if (r)
3620 return r;
3621 }
3622 return 0;
3623 }
3624
3625 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3626 {
3627 struct r600_bytecode_alu alu;
3628 int i, r;
3629
3630 for (i = 0; i < 4; i++) {
3631 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3632 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3633 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3634 alu.dst.chan = i;
3635 } else {
3636 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3637 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3638 alu.src[0].sel = ctx->temp_reg;
3639 alu.src[0].chan = i;
3640 }
3641 if (i == 3) {
3642 alu.last = 1;
3643 }
3644 r = r600_bytecode_add_alu(ctx->bc, &alu);
3645 if (r)
3646 return r;
3647 }
3648 return 0;
3649 }
3650
3651 static int tgsi_op3(struct r600_shader_ctx *ctx)
3652 {
3653 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3654 struct r600_bytecode_alu alu;
3655 int i, j, r;
3656 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3657
3658 for (i = 0; i < lasti + 1; i++) {
3659 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3660 continue;
3661
3662 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3663 alu.inst = ctx->inst_info->r600_opcode;
3664 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3665 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3666 }
3667
3668 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3669 alu.dst.chan = i;
3670 alu.dst.write = 1;
3671 alu.is_op3 = 1;
3672 if (i == lasti) {
3673 alu.last = 1;
3674 }
3675 r = r600_bytecode_add_alu(ctx->bc, &alu);
3676 if (r)
3677 return r;
3678 }
3679 return 0;
3680 }
3681
3682 static int tgsi_dp(struct r600_shader_ctx *ctx)
3683 {
3684 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3685 struct r600_bytecode_alu alu;
3686 int i, j, r;
3687
3688 for (i = 0; i < 4; i++) {
3689 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3690 alu.inst = ctx->inst_info->r600_opcode;
3691 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3692 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3693 }
3694
3695 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3696 alu.dst.chan = i;
3697 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3698 /* handle some special cases */
3699 switch (ctx->inst_info->tgsi_opcode) {
3700 case TGSI_OPCODE_DP2:
3701 if (i > 1) {
3702 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3703 alu.src[0].chan = alu.src[1].chan = 0;
3704 }
3705 break;
3706 case TGSI_OPCODE_DP3:
3707 if (i > 2) {
3708 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3709 alu.src[0].chan = alu.src[1].chan = 0;
3710 }
3711 break;
3712 case TGSI_OPCODE_DPH:
3713 if (i == 3) {
3714 alu.src[0].sel = V_SQ_ALU_SRC_1;
3715 alu.src[0].chan = 0;
3716 alu.src[0].neg = 0;
3717 }
3718 break;
3719 default:
3720 break;
3721 }
3722 if (i == 3) {
3723 alu.last = 1;
3724 }
3725 r = r600_bytecode_add_alu(ctx->bc, &alu);
3726 if (r)
3727 return r;
3728 }
3729 return 0;
3730 }
3731
3732 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3733 unsigned index)
3734 {
3735 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3736 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3737 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3738 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3739 ctx->src[index].neg || ctx->src[index].abs;
3740 }
3741
3742 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3743 unsigned index)
3744 {
3745 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3746 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3747 }
3748
3749 static int tgsi_tex(struct r600_shader_ctx *ctx)
3750 {
3751 static float one_point_five = 1.5f;
3752 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3753 struct r600_bytecode_tex tex;
3754 struct r600_bytecode_alu alu;
3755 unsigned src_gpr;
3756 int r, i, j;
3757 int opcode;
3758 /* Texture fetch instructions can only use gprs as source.
3759 * Also they cannot negate the source or take the absolute value */
3760 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3761 tgsi_tex_src_requires_loading(ctx, 0);
3762 boolean src_loaded = FALSE;
3763 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3764 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3765
3766 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3767
3768 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3769 /* get offset values */
3770 if (inst->Texture.NumOffsets) {
3771 assert(inst->Texture.NumOffsets == 1);
3772
3773 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3774 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3775 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3776 }
3777 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3778 /* TGSI moves the sampler to src reg 3 for TXD */
3779 sampler_src_reg = 3;
3780
3781 for (i = 1; i < 3; i++) {
3782 /* set gradients h/v */
3783 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3784 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3785 SQ_TEX_INST_SET_GRADIENTS_V;
3786 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3787 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3788
3789 if (tgsi_tex_src_requires_loading(ctx, i)) {
3790 tex.src_gpr = r600_get_temp(ctx);
3791 tex.src_sel_x = 0;
3792 tex.src_sel_y = 1;
3793 tex.src_sel_z = 2;
3794 tex.src_sel_w = 3;
3795
3796 for (j = 0; j < 4; j++) {
3797 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3798 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3799 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3800 alu.dst.sel = tex.src_gpr;
3801 alu.dst.chan = j;
3802 if (j == 3)
3803 alu.last = 1;
3804 alu.dst.write = 1;
3805 r = r600_bytecode_add_alu(ctx->bc, &alu);
3806 if (r)
3807 return r;
3808 }
3809
3810 } else {
3811 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3812 tex.src_sel_x = ctx->src[i].swizzle[0];
3813 tex.src_sel_y = ctx->src[i].swizzle[1];
3814 tex.src_sel_z = ctx->src[i].swizzle[2];
3815 tex.src_sel_w = ctx->src[i].swizzle[3];
3816 tex.src_rel = ctx->src[i].rel;
3817 }
3818 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3819 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3820 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3821 tex.coord_type_x = 1;
3822 tex.coord_type_y = 1;
3823 tex.coord_type_z = 1;
3824 tex.coord_type_w = 1;
3825 }
3826 r = r600_bytecode_add_tex(ctx->bc, &tex);
3827 if (r)
3828 return r;
3829 }
3830 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3831 int out_chan;
3832 /* Add perspective divide */
3833 if (ctx->bc->chip_class == CAYMAN) {
3834 out_chan = 2;
3835 for (i = 0; i < 3; i++) {
3836 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3837 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3838 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3839
3840 alu.dst.sel = ctx->temp_reg;
3841 alu.dst.chan = i;
3842 if (i == 2)
3843 alu.last = 1;
3844 if (out_chan == i)
3845 alu.dst.write = 1;
3846 r = r600_bytecode_add_alu(ctx->bc, &alu);
3847 if (r)
3848 return r;
3849 }
3850
3851 } else {
3852 out_chan = 3;
3853 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3854 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3855 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3856
3857 alu.dst.sel = ctx->temp_reg;
3858 alu.dst.chan = out_chan;
3859 alu.last = 1;
3860 alu.dst.write = 1;
3861 r = r600_bytecode_add_alu(ctx->bc, &alu);
3862 if (r)
3863 return r;
3864 }
3865
3866 for (i = 0; i < 3; i++) {
3867 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3869 alu.src[0].sel = ctx->temp_reg;
3870 alu.src[0].chan = out_chan;
3871 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3872 alu.dst.sel = ctx->temp_reg;
3873 alu.dst.chan = i;
3874 alu.dst.write = 1;
3875 r = r600_bytecode_add_alu(ctx->bc, &alu);
3876 if (r)
3877 return r;
3878 }
3879 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3880 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3881 alu.src[0].sel = V_SQ_ALU_SRC_1;
3882 alu.src[0].chan = 0;
3883 alu.dst.sel = ctx->temp_reg;
3884 alu.dst.chan = 3;
3885 alu.last = 1;
3886 alu.dst.write = 1;
3887 r = r600_bytecode_add_alu(ctx->bc, &alu);
3888 if (r)
3889 return r;
3890 src_loaded = TRUE;
3891 src_gpr = ctx->temp_reg;
3892 }
3893
3894 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3895 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3896 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3897 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3898
3899 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3900 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3901
3902 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3903 for (i = 0; i < 4; i++) {
3904 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3905 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3906 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3907 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3908 alu.dst.sel = ctx->temp_reg;
3909 alu.dst.chan = i;
3910 if (i == 3)
3911 alu.last = 1;
3912 alu.dst.write = 1;
3913 r = r600_bytecode_add_alu(ctx->bc, &alu);
3914 if (r)
3915 return r;
3916 }
3917
3918 /* tmp1.z = RCP_e(|tmp1.z|) */
3919 if (ctx->bc->chip_class == CAYMAN) {
3920 for (i = 0; i < 3; i++) {
3921 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3922 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3923 alu.src[0].sel = ctx->temp_reg;
3924 alu.src[0].chan = 2;
3925 alu.src[0].abs = 1;
3926 alu.dst.sel = ctx->temp_reg;
3927 alu.dst.chan = i;
3928 if (i == 2)
3929 alu.dst.write = 1;
3930 if (i == 2)
3931 alu.last = 1;
3932 r = r600_bytecode_add_alu(ctx->bc, &alu);
3933 if (r)
3934 return r;
3935 }
3936 } else {
3937 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3939 alu.src[0].sel = ctx->temp_reg;
3940 alu.src[0].chan = 2;
3941 alu.src[0].abs = 1;
3942 alu.dst.sel = ctx->temp_reg;
3943 alu.dst.chan = 2;
3944 alu.dst.write = 1;
3945 alu.last = 1;
3946 r = r600_bytecode_add_alu(ctx->bc, &alu);
3947 if (r)
3948 return r;
3949 }
3950
3951 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3952 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3953 * muladd has no writemask, have to use another temp
3954 */
3955 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3957 alu.is_op3 = 1;
3958
3959 alu.src[0].sel = ctx->temp_reg;
3960 alu.src[0].chan = 0;
3961 alu.src[1].sel = ctx->temp_reg;
3962 alu.src[1].chan = 2;
3963
3964 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3965 alu.src[2].chan = 0;
3966 alu.src[2].value = *(uint32_t *)&one_point_five;
3967
3968 alu.dst.sel = ctx->temp_reg;
3969 alu.dst.chan = 0;
3970 alu.dst.write = 1;
3971
3972 r = r600_bytecode_add_alu(ctx->bc, &alu);
3973 if (r)
3974 return r;
3975
3976 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3977 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3978 alu.is_op3 = 1;
3979
3980 alu.src[0].sel = ctx->temp_reg;
3981 alu.src[0].chan = 1;
3982 alu.src[1].sel = ctx->temp_reg;
3983 alu.src[1].chan = 2;
3984
3985 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3986 alu.src[2].chan = 0;
3987 alu.src[2].value = *(uint32_t *)&one_point_five;
3988
3989 alu.dst.sel = ctx->temp_reg;
3990 alu.dst.chan = 1;
3991 alu.dst.write = 1;
3992
3993 alu.last = 1;
3994 r = r600_bytecode_add_alu(ctx->bc, &alu);
3995 if (r)
3996 return r;
3997 /* write initial W value into Z component */
3998 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
3999 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4001 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4002 alu.dst.sel = ctx->temp_reg;
4003 alu.dst.chan = 2;
4004 alu.dst.write = 1;
4005 alu.last = 1;
4006 r = r600_bytecode_add_alu(ctx->bc, &alu);
4007 if (r)
4008 return r;
4009 }
4010 src_loaded = TRUE;
4011 src_gpr = ctx->temp_reg;
4012 }
4013
4014 if (src_requires_loading && !src_loaded) {
4015 for (i = 0; i < 4; i++) {
4016 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4017 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4018 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4019 alu.dst.sel = ctx->temp_reg;
4020 alu.dst.chan = i;
4021 if (i == 3)
4022 alu.last = 1;
4023 alu.dst.write = 1;
4024 r = r600_bytecode_add_alu(ctx->bc, &alu);
4025 if (r)
4026 return r;
4027 }
4028 src_loaded = TRUE;
4029 src_gpr = ctx->temp_reg;
4030 }
4031
4032 opcode = ctx->inst_info->r600_opcode;
4033 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4034 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4035 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4036 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4037 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4038 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4039 switch (opcode) {
4040 case SQ_TEX_INST_SAMPLE:
4041 opcode = SQ_TEX_INST_SAMPLE_C;
4042 break;
4043 case SQ_TEX_INST_SAMPLE_L:
4044 opcode = SQ_TEX_INST_SAMPLE_C_L;
4045 break;
4046 case SQ_TEX_INST_SAMPLE_LB:
4047 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4048 break;
4049 case SQ_TEX_INST_SAMPLE_G:
4050 opcode = SQ_TEX_INST_SAMPLE_C_G;
4051 break;
4052 }
4053 }
4054
4055 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4056 tex.inst = opcode;
4057
4058 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4059 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4060 tex.src_gpr = src_gpr;
4061 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4062 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4063 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4064 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4065 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4066
4067 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4068 tex.src_sel_x = 4;
4069 tex.src_sel_y = 4;
4070 tex.src_sel_z = 4;
4071 tex.src_sel_w = 4;
4072 } else if (src_loaded) {
4073 tex.src_sel_x = 0;
4074 tex.src_sel_y = 1;
4075 tex.src_sel_z = 2;
4076 tex.src_sel_w = 3;
4077 } else {
4078 tex.src_sel_x = ctx->src[0].swizzle[0];
4079 tex.src_sel_y = ctx->src[0].swizzle[1];
4080 tex.src_sel_z = ctx->src[0].swizzle[2];
4081 tex.src_sel_w = ctx->src[0].swizzle[3];
4082 tex.src_rel = ctx->src[0].rel;
4083 }
4084
4085 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4086 tex.src_sel_x = 1;
4087 tex.src_sel_y = 0;
4088 tex.src_sel_z = 3;
4089 tex.src_sel_w = 1;
4090 }
4091 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4092 tex.src_sel_x = 1;
4093 tex.src_sel_y = 0;
4094 tex.src_sel_z = 3;
4095 tex.src_sel_w = 2; /* route Z compare value into W */
4096 }
4097
4098 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4099 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4100 tex.coord_type_x = 1;
4101 tex.coord_type_y = 1;
4102 }
4103 tex.coord_type_z = 1;
4104 tex.coord_type_w = 1;
4105
4106 tex.offset_x = offset_x;
4107 tex.offset_y = offset_y;
4108 tex.offset_z = offset_z;
4109
4110 /* Put the depth for comparison in W.
4111 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4112 * Some instructions expect the depth in Z. */
4113 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4114 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4115 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4116 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4117 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4118 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4119 tex.src_sel_w = tex.src_sel_z;
4120 }
4121
4122 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4123 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4124 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4125 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4126 /* the array index is read from Y */
4127 tex.coord_type_y = 0;
4128 } else {
4129 /* the array index is read from Z */
4130 tex.coord_type_z = 0;
4131 tex.src_sel_z = tex.src_sel_y;
4132 }
4133 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4134 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4135 /* the array index is read from Z */
4136 tex.coord_type_z = 0;
4137
4138 r = r600_bytecode_add_tex(ctx->bc, &tex);
4139 if (r)
4140 return r;
4141
4142 /* add shadow ambient support - gallium doesn't do it yet */
4143 return 0;
4144 }
4145
4146 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4147 {
4148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4149 struct r600_bytecode_alu alu;
4150 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4151 unsigned i;
4152 int r;
4153
4154 /* optimize if it's just an equal balance */
4155 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4156 for (i = 0; i < lasti + 1; i++) {
4157 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4158 continue;
4159
4160 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4161 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4162 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4163 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4164 alu.omod = 3;
4165 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4166 alu.dst.chan = i;
4167 if (i == lasti) {
4168 alu.last = 1;
4169 }
4170 r = r600_bytecode_add_alu(ctx->bc, &alu);
4171 if (r)
4172 return r;
4173 }
4174 return 0;
4175 }
4176
4177 /* 1 - src0 */
4178 for (i = 0; i < lasti + 1; i++) {
4179 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4180 continue;
4181
4182 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4183 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4184 alu.src[0].sel = V_SQ_ALU_SRC_1;
4185 alu.src[0].chan = 0;
4186 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4187 r600_bytecode_src_toggle_neg(&alu.src[1]);
4188 alu.dst.sel = ctx->temp_reg;
4189 alu.dst.chan = i;
4190 if (i == lasti) {
4191 alu.last = 1;
4192 }
4193 alu.dst.write = 1;
4194 r = r600_bytecode_add_alu(ctx->bc, &alu);
4195 if (r)
4196 return r;
4197 }
4198
4199 /* (1 - src0) * src2 */
4200 for (i = 0; i < lasti + 1; i++) {
4201 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4202 continue;
4203
4204 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4205 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4206 alu.src[0].sel = ctx->temp_reg;
4207 alu.src[0].chan = i;
4208 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4209 alu.dst.sel = ctx->temp_reg;
4210 alu.dst.chan = i;
4211 if (i == lasti) {
4212 alu.last = 1;
4213 }
4214 alu.dst.write = 1;
4215 r = r600_bytecode_add_alu(ctx->bc, &alu);
4216 if (r)
4217 return r;
4218 }
4219
4220 /* src0 * src1 + (1 - src0) * src2 */
4221 for (i = 0; i < lasti + 1; i++) {
4222 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4223 continue;
4224
4225 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4226 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4227 alu.is_op3 = 1;
4228 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4229 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4230 alu.src[2].sel = ctx->temp_reg;
4231 alu.src[2].chan = i;
4232
4233 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4234 alu.dst.chan = i;
4235 if (i == lasti) {
4236 alu.last = 1;
4237 }
4238 r = r600_bytecode_add_alu(ctx->bc, &alu);
4239 if (r)
4240 return r;
4241 }
4242 return 0;
4243 }
4244
4245 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4246 {
4247 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4248 struct r600_bytecode_alu alu;
4249 int i, r;
4250 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4251
4252 for (i = 0; i < lasti + 1; i++) {
4253 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4254 continue;
4255
4256 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4257 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4258 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4259 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4260 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4261 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4262 alu.dst.chan = i;
4263 alu.dst.write = 1;
4264 alu.is_op3 = 1;
4265 if (i == lasti)
4266 alu.last = 1;
4267 r = r600_bytecode_add_alu(ctx->bc, &alu);
4268 if (r)
4269 return r;
4270 }
4271 return 0;
4272 }
4273
4274 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4275 {
4276 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4277 static const unsigned int src0_swizzle[] = {2, 0, 1};
4278 static const unsigned int src1_swizzle[] = {1, 2, 0};
4279 struct r600_bytecode_alu alu;
4280 uint32_t use_temp = 0;
4281 int i, r;
4282
4283 if (inst->Dst[0].Register.WriteMask != 0xf)
4284 use_temp = 1;
4285
4286 for (i = 0; i < 4; i++) {
4287 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4289 if (i < 3) {
4290 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4291 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4292 } else {
4293 alu.src[0].sel = V_SQ_ALU_SRC_0;
4294 alu.src[0].chan = i;
4295 alu.src[1].sel = V_SQ_ALU_SRC_0;
4296 alu.src[1].chan = i;
4297 }
4298
4299 alu.dst.sel = ctx->temp_reg;
4300 alu.dst.chan = i;
4301 alu.dst.write = 1;
4302
4303 if (i == 3)
4304 alu.last = 1;
4305 r = r600_bytecode_add_alu(ctx->bc, &alu);
4306 if (r)
4307 return r;
4308 }
4309
4310 for (i = 0; i < 4; i++) {
4311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4313
4314 if (i < 3) {
4315 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4316 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4317 } else {
4318 alu.src[0].sel = V_SQ_ALU_SRC_0;
4319 alu.src[0].chan = i;
4320 alu.src[1].sel = V_SQ_ALU_SRC_0;
4321 alu.src[1].chan = i;
4322 }
4323
4324 alu.src[2].sel = ctx->temp_reg;
4325 alu.src[2].neg = 1;
4326 alu.src[2].chan = i;
4327
4328 if (use_temp)
4329 alu.dst.sel = ctx->temp_reg;
4330 else
4331 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4332 alu.dst.chan = i;
4333 alu.dst.write = 1;
4334 alu.is_op3 = 1;
4335 if (i == 3)
4336 alu.last = 1;
4337 r = r600_bytecode_add_alu(ctx->bc, &alu);
4338 if (r)
4339 return r;
4340 }
4341 if (use_temp)
4342 return tgsi_helper_copy(ctx, inst);
4343 return 0;
4344 }
4345
4346 static int tgsi_exp(struct r600_shader_ctx *ctx)
4347 {
4348 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4349 struct r600_bytecode_alu alu;
4350 int r;
4351 int i;
4352
4353 /* result.x = 2^floor(src); */
4354 if (inst->Dst[0].Register.WriteMask & 1) {
4355 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4356
4357 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4358 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4359
4360 alu.dst.sel = ctx->temp_reg;
4361 alu.dst.chan = 0;
4362 alu.dst.write = 1;
4363 alu.last = 1;
4364 r = r600_bytecode_add_alu(ctx->bc, &alu);
4365 if (r)
4366 return r;
4367
4368 if (ctx->bc->chip_class == CAYMAN) {
4369 for (i = 0; i < 3; i++) {
4370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4371 alu.src[0].sel = ctx->temp_reg;
4372 alu.src[0].chan = 0;
4373
4374 alu.dst.sel = ctx->temp_reg;
4375 alu.dst.chan = i;
4376 if (i == 0)
4377 alu.dst.write = 1;
4378 if (i == 2)
4379 alu.last = 1;
4380 r = r600_bytecode_add_alu(ctx->bc, &alu);
4381 if (r)
4382 return r;
4383 }
4384 } else {
4385 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4386 alu.src[0].sel = ctx->temp_reg;
4387 alu.src[0].chan = 0;
4388
4389 alu.dst.sel = ctx->temp_reg;
4390 alu.dst.chan = 0;
4391 alu.dst.write = 1;
4392 alu.last = 1;
4393 r = r600_bytecode_add_alu(ctx->bc, &alu);
4394 if (r)
4395 return r;
4396 }
4397 }
4398
4399 /* result.y = tmp - floor(tmp); */
4400 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4402
4403 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4404 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4405
4406 alu.dst.sel = ctx->temp_reg;
4407 #if 0
4408 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4409 if (r)
4410 return r;
4411 #endif
4412 alu.dst.write = 1;
4413 alu.dst.chan = 1;
4414
4415 alu.last = 1;
4416
4417 r = r600_bytecode_add_alu(ctx->bc, &alu);
4418 if (r)
4419 return r;
4420 }
4421
4422 /* result.z = RoughApprox2ToX(tmp);*/
4423 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4424 if (ctx->bc->chip_class == CAYMAN) {
4425 for (i = 0; i < 3; i++) {
4426 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4427 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4428 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4429
4430 alu.dst.sel = ctx->temp_reg;
4431 alu.dst.chan = i;
4432 if (i == 2) {
4433 alu.dst.write = 1;
4434 alu.last = 1;
4435 }
4436
4437 r = r600_bytecode_add_alu(ctx->bc, &alu);
4438 if (r)
4439 return r;
4440 }
4441 } else {
4442 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4444 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4445
4446 alu.dst.sel = ctx->temp_reg;
4447 alu.dst.write = 1;
4448 alu.dst.chan = 2;
4449
4450 alu.last = 1;
4451
4452 r = r600_bytecode_add_alu(ctx->bc, &alu);
4453 if (r)
4454 return r;
4455 }
4456 }
4457
4458 /* result.w = 1.0;*/
4459 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4460 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4461
4462 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4463 alu.src[0].sel = V_SQ_ALU_SRC_1;
4464 alu.src[0].chan = 0;
4465
4466 alu.dst.sel = ctx->temp_reg;
4467 alu.dst.chan = 3;
4468 alu.dst.write = 1;
4469 alu.last = 1;
4470 r = r600_bytecode_add_alu(ctx->bc, &alu);
4471 if (r)
4472 return r;
4473 }
4474 return tgsi_helper_copy(ctx, inst);
4475 }
4476
4477 static int tgsi_log(struct r600_shader_ctx *ctx)
4478 {
4479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4480 struct r600_bytecode_alu alu;
4481 int r;
4482 int i;
4483
4484 /* result.x = floor(log2(|src|)); */
4485 if (inst->Dst[0].Register.WriteMask & 1) {
4486 if (ctx->bc->chip_class == CAYMAN) {
4487 for (i = 0; i < 3; i++) {
4488 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4489
4490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4491 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4492 r600_bytecode_src_set_abs(&alu.src[0]);
4493
4494 alu.dst.sel = ctx->temp_reg;
4495 alu.dst.chan = i;
4496 if (i == 0)
4497 alu.dst.write = 1;
4498 if (i == 2)
4499 alu.last = 1;
4500 r = r600_bytecode_add_alu(ctx->bc, &alu);
4501 if (r)
4502 return r;
4503 }
4504
4505 } else {
4506 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4507
4508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4509 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4510 r600_bytecode_src_set_abs(&alu.src[0]);
4511
4512 alu.dst.sel = ctx->temp_reg;
4513 alu.dst.chan = 0;
4514 alu.dst.write = 1;
4515 alu.last = 1;
4516 r = r600_bytecode_add_alu(ctx->bc, &alu);
4517 if (r)
4518 return r;
4519 }
4520
4521 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4522 alu.src[0].sel = ctx->temp_reg;
4523 alu.src[0].chan = 0;
4524
4525 alu.dst.sel = ctx->temp_reg;
4526 alu.dst.chan = 0;
4527 alu.dst.write = 1;
4528 alu.last = 1;
4529
4530 r = r600_bytecode_add_alu(ctx->bc, &alu);
4531 if (r)
4532 return r;
4533 }
4534
4535 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4536 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4537
4538 if (ctx->bc->chip_class == CAYMAN) {
4539 for (i = 0; i < 3; i++) {
4540 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4541
4542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4543 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4544 r600_bytecode_src_set_abs(&alu.src[0]);
4545
4546 alu.dst.sel = ctx->temp_reg;
4547 alu.dst.chan = i;
4548 if (i == 1)
4549 alu.dst.write = 1;
4550 if (i == 2)
4551 alu.last = 1;
4552
4553 r = r600_bytecode_add_alu(ctx->bc, &alu);
4554 if (r)
4555 return r;
4556 }
4557 } else {
4558 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4559
4560 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4561 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4562 r600_bytecode_src_set_abs(&alu.src[0]);
4563
4564 alu.dst.sel = ctx->temp_reg;
4565 alu.dst.chan = 1;
4566 alu.dst.write = 1;
4567 alu.last = 1;
4568
4569 r = r600_bytecode_add_alu(ctx->bc, &alu);
4570 if (r)
4571 return r;
4572 }
4573
4574 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4575
4576 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4577 alu.src[0].sel = ctx->temp_reg;
4578 alu.src[0].chan = 1;
4579
4580 alu.dst.sel = ctx->temp_reg;
4581 alu.dst.chan = 1;
4582 alu.dst.write = 1;
4583 alu.last = 1;
4584
4585 r = r600_bytecode_add_alu(ctx->bc, &alu);
4586 if (r)
4587 return r;
4588
4589 if (ctx->bc->chip_class == CAYMAN) {
4590 for (i = 0; i < 3; i++) {
4591 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4593 alu.src[0].sel = ctx->temp_reg;
4594 alu.src[0].chan = 1;
4595
4596 alu.dst.sel = ctx->temp_reg;
4597 alu.dst.chan = i;
4598 if (i == 1)
4599 alu.dst.write = 1;
4600 if (i == 2)
4601 alu.last = 1;
4602
4603 r = r600_bytecode_add_alu(ctx->bc, &alu);
4604 if (r)
4605 return r;
4606 }
4607 } else {
4608 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4609 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4610 alu.src[0].sel = ctx->temp_reg;
4611 alu.src[0].chan = 1;
4612
4613 alu.dst.sel = ctx->temp_reg;
4614 alu.dst.chan = 1;
4615 alu.dst.write = 1;
4616 alu.last = 1;
4617
4618 r = r600_bytecode_add_alu(ctx->bc, &alu);
4619 if (r)
4620 return r;
4621 }
4622
4623 if (ctx->bc->chip_class == CAYMAN) {
4624 for (i = 0; i < 3; i++) {
4625 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4626 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4627 alu.src[0].sel = ctx->temp_reg;
4628 alu.src[0].chan = 1;
4629
4630 alu.dst.sel = ctx->temp_reg;
4631 alu.dst.chan = i;
4632 if (i == 1)
4633 alu.dst.write = 1;
4634 if (i == 2)
4635 alu.last = 1;
4636
4637 r = r600_bytecode_add_alu(ctx->bc, &alu);
4638 if (r)
4639 return r;
4640 }
4641 } else {
4642 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4643 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4644 alu.src[0].sel = ctx->temp_reg;
4645 alu.src[0].chan = 1;
4646
4647 alu.dst.sel = ctx->temp_reg;
4648 alu.dst.chan = 1;
4649 alu.dst.write = 1;
4650 alu.last = 1;
4651
4652 r = r600_bytecode_add_alu(ctx->bc, &alu);
4653 if (r)
4654 return r;
4655 }
4656
4657 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4658
4659 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4660
4661 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4662 r600_bytecode_src_set_abs(&alu.src[0]);
4663
4664 alu.src[1].sel = ctx->temp_reg;
4665 alu.src[1].chan = 1;
4666
4667 alu.dst.sel = ctx->temp_reg;
4668 alu.dst.chan = 1;
4669 alu.dst.write = 1;
4670 alu.last = 1;
4671
4672 r = r600_bytecode_add_alu(ctx->bc, &alu);
4673 if (r)
4674 return r;
4675 }
4676
4677 /* result.z = log2(|src|);*/
4678 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4679 if (ctx->bc->chip_class == CAYMAN) {
4680 for (i = 0; i < 3; i++) {
4681 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4682
4683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4684 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4685 r600_bytecode_src_set_abs(&alu.src[0]);
4686
4687 alu.dst.sel = ctx->temp_reg;
4688 if (i == 2)
4689 alu.dst.write = 1;
4690 alu.dst.chan = i;
4691 if (i == 2)
4692 alu.last = 1;
4693
4694 r = r600_bytecode_add_alu(ctx->bc, &alu);
4695 if (r)
4696 return r;
4697 }
4698 } else {
4699 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4700
4701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4702 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4703 r600_bytecode_src_set_abs(&alu.src[0]);
4704
4705 alu.dst.sel = ctx->temp_reg;
4706 alu.dst.write = 1;
4707 alu.dst.chan = 2;
4708 alu.last = 1;
4709
4710 r = r600_bytecode_add_alu(ctx->bc, &alu);
4711 if (r)
4712 return r;
4713 }
4714 }
4715
4716 /* result.w = 1.0; */
4717 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4718 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4719
4720 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4721 alu.src[0].sel = V_SQ_ALU_SRC_1;
4722 alu.src[0].chan = 0;
4723
4724 alu.dst.sel = ctx->temp_reg;
4725 alu.dst.chan = 3;
4726 alu.dst.write = 1;
4727 alu.last = 1;
4728
4729 r = r600_bytecode_add_alu(ctx->bc, &alu);
4730 if (r)
4731 return r;
4732 }
4733
4734 return tgsi_helper_copy(ctx, inst);
4735 }
4736
4737 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4738 {
4739 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4740 struct r600_bytecode_alu alu;
4741 int r;
4742
4743 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4744
4745 switch (inst->Instruction.Opcode) {
4746 case TGSI_OPCODE_ARL:
4747 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4748 break;
4749 case TGSI_OPCODE_ARR:
4750 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4751 break;
4752 case TGSI_OPCODE_UARL:
4753 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4754 break;
4755 default:
4756 assert(0);
4757 return -1;
4758 }
4759
4760 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4761 alu.last = 1;
4762 alu.dst.sel = ctx->bc->ar_reg;
4763 alu.dst.write = 1;
4764 r = r600_bytecode_add_alu(ctx->bc, &alu);
4765 if (r)
4766 return r;
4767
4768 ctx->bc->ar_loaded = 0;
4769 return 0;
4770 }
4771 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4772 {
4773 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4774 struct r600_bytecode_alu alu;
4775 int r;
4776
4777 switch (inst->Instruction.Opcode) {
4778 case TGSI_OPCODE_ARL:
4779 memset(&alu, 0, sizeof(alu));
4780 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4781 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4782 alu.dst.sel = ctx->bc->ar_reg;
4783 alu.dst.write = 1;
4784 alu.last = 1;
4785
4786 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4787 return r;
4788
4789 memset(&alu, 0, sizeof(alu));
4790 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4791 alu.src[0].sel = ctx->bc->ar_reg;
4792 alu.dst.sel = ctx->bc->ar_reg;
4793 alu.dst.write = 1;
4794 alu.last = 1;
4795
4796 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4797 return r;
4798 break;
4799 case TGSI_OPCODE_ARR:
4800 memset(&alu, 0, sizeof(alu));
4801 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4802 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4803 alu.dst.sel = ctx->bc->ar_reg;
4804 alu.dst.write = 1;
4805 alu.last = 1;
4806
4807 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4808 return r;
4809 break;
4810 case TGSI_OPCODE_UARL:
4811 memset(&alu, 0, sizeof(alu));
4812 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4813 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4814 alu.dst.sel = ctx->bc->ar_reg;
4815 alu.dst.write = 1;
4816 alu.last = 1;
4817
4818 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4819 return r;
4820 break;
4821 default:
4822 assert(0);
4823 return -1;
4824 }
4825
4826 ctx->bc->ar_loaded = 0;
4827 return 0;
4828 }
4829
4830 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4831 {
4832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4833 struct r600_bytecode_alu alu;
4834 int i, r = 0;
4835
4836 for (i = 0; i < 4; i++) {
4837 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4838
4839 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4840 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4841
4842 if (i == 0 || i == 3) {
4843 alu.src[0].sel = V_SQ_ALU_SRC_1;
4844 } else {
4845 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4846 }
4847
4848 if (i == 0 || i == 2) {
4849 alu.src[1].sel = V_SQ_ALU_SRC_1;
4850 } else {
4851 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4852 }
4853 if (i == 3)
4854 alu.last = 1;
4855 r = r600_bytecode_add_alu(ctx->bc, &alu);
4856 if (r)
4857 return r;
4858 }
4859 return 0;
4860 }
4861
4862 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4863 {
4864 struct r600_bytecode_alu alu;
4865 int r;
4866
4867 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4868 alu.inst = opcode;
4869 alu.execute_mask = 1;
4870 alu.update_pred = 1;
4871
4872 alu.dst.sel = ctx->temp_reg;
4873 alu.dst.write = 1;
4874 alu.dst.chan = 0;
4875
4876 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4877 alu.src[1].sel = V_SQ_ALU_SRC_0;
4878 alu.src[1].chan = 0;
4879
4880 alu.last = 1;
4881
4882 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4883 if (r)
4884 return r;
4885 return 0;
4886 }
4887
4888 static int pops(struct r600_shader_ctx *ctx, int pops)
4889 {
4890 unsigned force_pop = ctx->bc->force_add_cf;
4891
4892 if (!force_pop) {
4893 int alu_pop = 3;
4894 if (ctx->bc->cf_last) {
4895 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4896 alu_pop = 0;
4897 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4898 alu_pop = 1;
4899 }
4900 alu_pop += pops;
4901 if (alu_pop == 1) {
4902 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4903 ctx->bc->force_add_cf = 1;
4904 } else if (alu_pop == 2) {
4905 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4906 ctx->bc->force_add_cf = 1;
4907 } else {
4908 force_pop = 1;
4909 }
4910 }
4911
4912 if (force_pop) {
4913 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4914 ctx->bc->cf_last->pop_count = pops;
4915 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4916 }
4917
4918 return 0;
4919 }
4920
4921 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4922 {
4923 switch(reason) {
4924 case FC_PUSH_VPM:
4925 ctx->bc->callstack[ctx->bc->call_sp].current--;
4926 break;
4927 case FC_PUSH_WQM:
4928 case FC_LOOP:
4929 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4930 break;
4931 case FC_REP:
4932 /* TOODO : for 16 vp asic should -= 2; */
4933 ctx->bc->callstack[ctx->bc->call_sp].current --;
4934 break;
4935 }
4936 }
4937
4938 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4939 {
4940 if (check_max_only) {
4941 int diff;
4942 switch (reason) {
4943 case FC_PUSH_VPM:
4944 diff = 1;
4945 break;
4946 case FC_PUSH_WQM:
4947 diff = 4;
4948 break;
4949 default:
4950 assert(0);
4951 diff = 0;
4952 }
4953 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4954 ctx->bc->callstack[ctx->bc->call_sp].max) {
4955 ctx->bc->callstack[ctx->bc->call_sp].max =
4956 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4957 }
4958 return;
4959 }
4960 switch (reason) {
4961 case FC_PUSH_VPM:
4962 ctx->bc->callstack[ctx->bc->call_sp].current++;
4963 break;
4964 case FC_PUSH_WQM:
4965 case FC_LOOP:
4966 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4967 break;
4968 case FC_REP:
4969 ctx->bc->callstack[ctx->bc->call_sp].current++;
4970 break;
4971 }
4972
4973 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4974 ctx->bc->callstack[ctx->bc->call_sp].max) {
4975 ctx->bc->callstack[ctx->bc->call_sp].max =
4976 ctx->bc->callstack[ctx->bc->call_sp].current;
4977 }
4978 }
4979
4980 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4981 {
4982 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4983
4984 sp->mid = realloc((void *)sp->mid,
4985 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
4986 sp->mid[sp->num_mid] = ctx->bc->cf_last;
4987 sp->num_mid++;
4988 }
4989
4990 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
4991 {
4992 ctx->bc->fc_sp++;
4993 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
4994 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
4995 }
4996
4997 static void fc_poplevel(struct r600_shader_ctx *ctx)
4998 {
4999 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5000 if (sp->mid) {
5001 free(sp->mid);
5002 sp->mid = NULL;
5003 }
5004 sp->num_mid = 0;
5005 sp->start = NULL;
5006 sp->type = 0;
5007 ctx->bc->fc_sp--;
5008 }
5009
5010 #if 0
5011 static int emit_return(struct r600_shader_ctx *ctx)
5012 {
5013 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5014 return 0;
5015 }
5016
5017 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5018 {
5019
5020 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5021 ctx->bc->cf_last->pop_count = pops;
5022 /* XXX work out offset */
5023 return 0;
5024 }
5025
5026 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5027 {
5028 return 0;
5029 }
5030
5031 static void emit_testflag(struct r600_shader_ctx *ctx)
5032 {
5033
5034 }
5035
5036 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5037 {
5038 emit_testflag(ctx);
5039 emit_jump_to_offset(ctx, 1, 4);
5040 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5041 pops(ctx, ifidx + 1);
5042 emit_return(ctx);
5043 }
5044
5045 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5046 {
5047 emit_testflag(ctx);
5048
5049 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5050 ctx->bc->cf_last->pop_count = 1;
5051
5052 fc_set_mid(ctx, fc_sp);
5053
5054 pops(ctx, 1);
5055 }
5056 #endif
5057
5058 static int tgsi_if(struct r600_shader_ctx *ctx)
5059 {
5060 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5061
5062 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5063
5064 fc_pushlevel(ctx, FC_IF);
5065
5066 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5067 return 0;
5068 }
5069
5070 static int tgsi_else(struct r600_shader_ctx *ctx)
5071 {
5072 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5073 ctx->bc->cf_last->pop_count = 1;
5074
5075 fc_set_mid(ctx, ctx->bc->fc_sp);
5076 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5077 return 0;
5078 }
5079
5080 static int tgsi_endif(struct r600_shader_ctx *ctx)
5081 {
5082 pops(ctx, 1);
5083 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5084 R600_ERR("if/endif unbalanced in shader\n");
5085 return -1;
5086 }
5087
5088 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5089 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5090 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5091 } else {
5092 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5093 }
5094 fc_poplevel(ctx);
5095
5096 callstack_decrease_current(ctx, FC_PUSH_VPM);
5097 return 0;
5098 }
5099
5100 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5101 {
5102 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
5103
5104 fc_pushlevel(ctx, FC_LOOP);
5105
5106 /* check stack depth */
5107 callstack_check_depth(ctx, FC_LOOP, 0);
5108 return 0;
5109 }
5110
5111 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5112 {
5113 int i;
5114
5115 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5116
5117 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5118 R600_ERR("loop/endloop in shader code are not paired.\n");
5119 return -EINVAL;
5120 }
5121
5122 /* fixup loop pointers - from r600isa
5123 LOOP END points to CF after LOOP START,
5124 LOOP START point to CF after LOOP END
5125 BRK/CONT point to LOOP END CF
5126 */
5127 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5128
5129 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5130
5131 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5132 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5133 }
5134 /* XXX add LOOPRET support */
5135 fc_poplevel(ctx);
5136 callstack_decrease_current(ctx, FC_LOOP);
5137 return 0;
5138 }
5139
5140 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5141 {
5142 unsigned int fscp;
5143
5144 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5145 {
5146 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5147 break;
5148 }
5149
5150 if (fscp == 0) {
5151 R600_ERR("Break not inside loop/endloop pair\n");
5152 return -EINVAL;
5153 }
5154
5155 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5156
5157 fc_set_mid(ctx, fscp);
5158
5159 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5160 return 0;
5161 }
5162
5163 static int tgsi_umad(struct r600_shader_ctx *ctx)
5164 {
5165 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5166 struct r600_bytecode_alu alu;
5167 int i, j, r;
5168 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5169
5170 /* src0 * src1 */
5171 for (i = 0; i < lasti + 1; i++) {
5172 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5173 continue;
5174
5175 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5176
5177 alu.dst.chan = i;
5178 alu.dst.sel = ctx->temp_reg;
5179 alu.dst.write = 1;
5180
5181 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5182 for (j = 0; j < 2; j++) {
5183 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5184 }
5185
5186 alu.last = 1;
5187 r = r600_bytecode_add_alu(ctx->bc, &alu);
5188 if (r)
5189 return r;
5190 }
5191
5192
5193 for (i = 0; i < lasti + 1; i++) {
5194 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5195 continue;
5196
5197 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5198 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5199
5200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5201
5202 alu.src[0].sel = ctx->temp_reg;
5203 alu.src[0].chan = i;
5204
5205 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5206 if (i == lasti) {
5207 alu.last = 1;
5208 }
5209 r = r600_bytecode_add_alu(ctx->bc, &alu);
5210 if (r)
5211 return r;
5212 }
5213 return 0;
5214 }
5215
5216 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5217 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5218 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5219 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5220
5221 /* XXX:
5222 * For state trackers other than OpenGL, we'll want to use
5223 * _RECIP_IEEE instead.
5224 */
5225 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5226
5227 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5228 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5229 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5230 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5231 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5232 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5233 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5234 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5235 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5236 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5237 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5238 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5239 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5240 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5241 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5242 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5243 /* gap */
5244 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5245 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5246 /* gap */
5247 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5248 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5249 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5250 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5251 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5252 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5253 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5254 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5255 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5256 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5257 /* gap */
5258 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5259 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5260 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5261 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5262 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5263 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5264 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5265 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5266 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5267 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5268 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5269 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5270 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5271 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5272 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5273 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5274 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5275 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5276 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5277 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5279 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5280 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5281 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5282 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5283 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5286 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5287 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5288 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5289 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5290 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5291 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5292 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5293 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5294 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5295 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5296 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5297 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5298 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5299 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5300 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5301 /* gap */
5302 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5303 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5305 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5306 /* gap */
5307 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5309 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5310 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5311 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5312 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5313 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5314 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5315 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5316 /* gap */
5317 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5318 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5319 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5320 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5321 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5322 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5323 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5324 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5325 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5326 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5327 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5328 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5329 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5330 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5331 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5332 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5333 /* gap */
5334 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5335 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5336 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5337 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5338 /* gap */
5339 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5340 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5341 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5342 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5343 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5344 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5345 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5346 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5348 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5349 /* gap */
5350 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5351 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5352 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5353 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5354 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5355 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5356 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5357 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5358 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5359 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5360 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5361 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5362 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5363 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5364 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5365 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5366 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5367 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5368 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5369 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5370 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5371 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5372 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5373 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5374 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5375 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5376 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5377 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5378 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5379 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5380 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5381 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5382 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5383 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5384 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5385 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5386 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5387 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5388 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5389 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5390 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5391 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5392 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5393 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5394 };
5395
5396 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5397 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5398 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5399 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5400 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5401 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5402 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5403 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5404 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5405 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5406 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5407 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5408 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5409 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5410 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5411 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5412 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5413 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5414 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5415 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5416 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5417 /* gap */
5418 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5419 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5420 /* gap */
5421 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5422 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5423 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5424 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5425 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5426 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5427 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5428 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5429 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5430 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5431 /* gap */
5432 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5433 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5434 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5435 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5436 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5437 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5438 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5439 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5440 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5441 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5442 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5443 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5444 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5445 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5446 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5447 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5448 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5449 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5450 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5451 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5453 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5454 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5455 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5456 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5457 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5460 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5461 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5462 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5463 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5464 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5465 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5466 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5467 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5468 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5469 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5470 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5472 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5473 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5474 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5475 /* gap */
5476 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5477 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5479 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5480 /* gap */
5481 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5484 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5485 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5486 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5487 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5488 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5489 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5490 /* gap */
5491 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5492 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5493 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5494 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5495 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5496 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5497 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5498 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5499 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5500 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5502 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5503 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5504 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5505 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5506 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5507 /* gap */
5508 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5509 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5510 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5511 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5512 /* gap */
5513 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5514 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5515 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5516 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5517 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5518 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5519 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5520 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5522 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5523 /* gap */
5524 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5526 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5527 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5528 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5529 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5530 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5531 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5532 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5533 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5534 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5535 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5536 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5537 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5538 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5539 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5540 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5541 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5542 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5543 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5544 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5545 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5546 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5547 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5548 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5549 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5550 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5551 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5552 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5553 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5554 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5555 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5556 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5557 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5558 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5559 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5560 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5561 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5562 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5563 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5564 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5565 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5566 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5567 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5568 };
5569
5570 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5571 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5572 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5573 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5574 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5575 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5576 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5577 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5578 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5579 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5580 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5581 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5582 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5583 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5584 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5585 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5586 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5587 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5588 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5589 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5590 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5591 /* gap */
5592 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5593 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5594 /* gap */
5595 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5596 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5597 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5598 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5599 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5600 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5601 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5602 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5603 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5604 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5605 /* gap */
5606 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5607 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5608 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5609 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5610 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5611 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5612 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5613 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5614 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5615 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5616 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5617 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5618 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5619 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5620 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5621 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5622 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5623 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5624 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5625 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5626 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5627 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5628 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5629 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5630 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5631 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5634 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5635 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5636 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5637 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5638 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5639 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5640 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5641 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5642 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5643 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5644 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5646 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5647 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5648 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5649 /* gap */
5650 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5651 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5653 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5654 /* gap */
5655 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5657 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5658 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5659 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5660 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5661 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5662 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5663 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5664 /* gap */
5665 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5666 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5667 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5668 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5669 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5670 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5671 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5672 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5673 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5674 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5675 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5676 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5677 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5678 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5679 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5680 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5681 /* gap */
5682 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5683 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5684 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5685 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5686 /* gap */
5687 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5688 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5689 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5690 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5691 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5692 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5693 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5694 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5696 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5697 /* gap */
5698 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5699 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5700 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5701 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5702 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5703 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5704 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5705 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5706 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5707 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5708 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5709 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5710 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5711 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5712 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5713 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5714 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5715 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5716 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5717 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5718 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5719 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5720 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5721 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5722 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5723 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5724 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5725 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5726 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5727 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5728 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5729 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5730 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5731 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5732 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5733 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5734 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5735 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5736 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5737 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5738 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5739 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5740 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5741 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5742 };