r600g: precompute color buffer state in pipe_surface and reuse it
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 struct r600_pipe_shader_selector *sel = shader->selector;
113 int r;
114
115 /* Would like some magic "get_bool_option_once" routine.
116 */
117 if (dump_shaders == -1)
118 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
119
120 if (dump_shaders) {
121 fprintf(stderr, "--------------------------------------------------------------\n");
122 tgsi_dump(sel->tokens, 0);
123
124 if (sel->so.num_outputs) {
125 unsigned i;
126 fprintf(stderr, "STREAMOUT\n");
127 for (i = 0; i < sel->so.num_outputs; i++) {
128 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
129 sel->so.output[i].start_component;
130 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
131 sel->so.output[i].output_buffer, sel->so.output[i].register_index,
132 mask & 1 ? "x" : "_",
133 (mask >> 1) & 1 ? "y" : "_",
134 (mask >> 2) & 1 ? "z" : "_",
135 (mask >> 3) & 1 ? "w" : "_");
136 }
137 }
138 }
139 r = r600_shader_from_tgsi(rctx, shader);
140 if (r) {
141 R600_ERR("translation from TGSI failed !\n");
142 return r;
143 }
144 r = r600_bytecode_build(&shader->shader.bc);
145 if (r) {
146 R600_ERR("building bytecode failed !\n");
147 return r;
148 }
149 if (dump_shaders) {
150 r600_bytecode_dump(&shader->shader.bc);
151 fprintf(stderr, "______________________________________________________________\n");
152 }
153 return r600_pipe_shader(ctx, shader);
154 }
155
156 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157 {
158 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
159 r600_bytecode_clear(&shader->shader.bc);
160 }
161
162 /*
163 * tgsi -> r600 shader
164 */
165 struct r600_shader_tgsi_instruction;
166
167 struct r600_shader_src {
168 unsigned sel;
169 unsigned swizzle[4];
170 unsigned neg;
171 unsigned abs;
172 unsigned rel;
173 uint32_t value[4];
174 };
175
176 struct r600_shader_ctx {
177 struct tgsi_shader_info info;
178 struct tgsi_parse_context parse;
179 const struct tgsi_token *tokens;
180 unsigned type;
181 unsigned file_offset[TGSI_FILE_COUNT];
182 unsigned temp_reg;
183 struct r600_shader_tgsi_instruction *inst_info;
184 struct r600_bytecode *bc;
185 struct r600_shader *shader;
186 struct r600_shader_src src[4];
187 uint32_t *literals;
188 uint32_t nliterals;
189 uint32_t max_driver_temp_used;
190 /* needed for evergreen interpolation */
191 boolean input_centroid;
192 boolean input_linear;
193 boolean input_perspective;
194 int num_interp_gpr;
195 int face_gpr;
196 int colors_used;
197 boolean clip_vertex_write;
198 unsigned cv_output;
199 int fragcoord_input;
200 int native_integers;
201 };
202
203 struct r600_shader_tgsi_instruction {
204 unsigned tgsi_opcode;
205 unsigned is_op3;
206 unsigned r600_opcode;
207 int (*process)(struct r600_shader_ctx *ctx);
208 };
209
210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
211 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
212 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
213 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
214 static int tgsi_else(struct r600_shader_ctx *ctx);
215 static int tgsi_endif(struct r600_shader_ctx *ctx);
216 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
217 static int tgsi_endloop(struct r600_shader_ctx *ctx);
218 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
219
220 /*
221 * bytestream -> r600 shader
222 *
223 * These functions are used to transform the output of the LLVM backend into
224 * struct r600_bytecode.
225 */
226
227 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
228 unsigned char * bytes, unsigned num_bytes);
229
230 #ifdef HAVE_OPENCL
231 int r600_compute_shader_create(struct pipe_context * ctx,
232 LLVMModuleRef mod, struct r600_bytecode * bytecode)
233 {
234 struct r600_context *r600_ctx = (struct r600_context *)ctx;
235 unsigned char * bytes;
236 unsigned byte_count;
237 struct r600_shader_ctx shader_ctx;
238 unsigned dump = 0;
239
240 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
241 dump = 1;
242 }
243
244 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
245 shader_ctx.bc = bytecode;
246 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
247 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
248 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
249 if (shader_ctx.bc->chip_class == CAYMAN) {
250 cm_bytecode_add_cf_end(shader_ctx.bc);
251 }
252 r600_bytecode_build(shader_ctx.bc);
253 if (dump) {
254 r600_bytecode_dump(shader_ctx.bc);
255 }
256 return 1;
257 }
258
259 #endif /* HAVE_OPENCL */
260
261 static uint32_t i32_from_byte_stream(unsigned char * bytes,
262 unsigned * bytes_read)
263 {
264 unsigned i;
265 uint32_t out = 0;
266 for (i = 0; i < 4; i++) {
267 out |= bytes[(*bytes_read)++] << (8 * i);
268 }
269 return out;
270 }
271
272 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
273 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
274 {
275 unsigned i;
276 unsigned sel0, sel1;
277 sel0 = bytes[bytes_read++];
278 sel1 = bytes[bytes_read++];
279 alu->src[src_idx].sel = sel0 | (sel1 << 8);
280 alu->src[src_idx].chan = bytes[bytes_read++];
281 alu->src[src_idx].neg = bytes[bytes_read++];
282 alu->src[src_idx].abs = bytes[bytes_read++];
283 alu->src[src_idx].rel = bytes[bytes_read++];
284 alu->src[src_idx].kc_bank = bytes[bytes_read++];
285 for (i = 0; i < 4; i++) {
286 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
287 }
288 return bytes_read;
289 }
290
291 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
292 unsigned char * bytes, unsigned bytes_read)
293 {
294 unsigned src_idx;
295 unsigned inst0, inst1;
296 struct r600_bytecode_alu alu;
297 memset(&alu, 0, sizeof(alu));
298 for(src_idx = 0; src_idx < 3; src_idx++) {
299 bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
300 &alu, src_idx);
301 }
302
303 alu.dst.sel = bytes[bytes_read++];
304 alu.dst.chan = bytes[bytes_read++];
305 alu.dst.clamp = bytes[bytes_read++];
306 alu.dst.write = bytes[bytes_read++];
307 alu.dst.rel = bytes[bytes_read++];
308 inst0 = bytes[bytes_read++];
309 inst1 = bytes[bytes_read++];
310 alu.inst = inst0 | (inst1 << 8);
311 alu.last = bytes[bytes_read++];
312 alu.is_op3 = bytes[bytes_read++];
313 alu.predicate = bytes[bytes_read++];
314 alu.bank_swizzle = bytes[bytes_read++];
315 alu.bank_swizzle_force = bytes[bytes_read++];
316 alu.omod = bytes[bytes_read++];
317 alu.index_mode = bytes[bytes_read++];
318 r600_bytecode_add_alu(ctx->bc, &alu);
319
320 /* XXX: Handle other KILL instructions */
321 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
322 ctx->shader->uses_kill = 1;
323 /* XXX: This should be enforced in the LLVM backend. */
324 ctx->bc->force_add_cf = 1;
325 }
326 return bytes_read;
327 }
328
329 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
330 unsigned pred_inst)
331 {
332 alu->inst = pred_inst;
333 alu->predicate = 1;
334 alu->dst.write = 0;
335 alu->src[1].sel = V_SQ_ALU_SRC_0;
336 alu->src[1].chan = 0;
337 alu->last = 1;
338 r600_bytecode_add_alu_type(ctx->bc, alu,
339 CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
340
341 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
342 fc_pushlevel(ctx, FC_IF);
343 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
344 }
345
346 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
347 struct r600_bytecode_alu *alu, unsigned compare_opcode)
348 {
349 unsigned opcode = TGSI_OPCODE_BRK;
350 if (ctx->bc->chip_class == CAYMAN)
351 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
352 else if (ctx->bc->chip_class >= EVERGREEN)
353 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
354 else
355 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
356 llvm_if(ctx, alu, compare_opcode);
357 tgsi_loop_brk_cont(ctx);
358 tgsi_endif(ctx);
359 }
360
361 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
362 unsigned char * bytes, unsigned bytes_read)
363 {
364 struct r600_bytecode_alu alu;
365 unsigned inst;
366 memset(&alu, 0, sizeof(alu));
367 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
368 inst = bytes[bytes_read++];
369 switch (inst) {
370 case 0:
371 llvm_if(ctx, &alu,
372 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
373 break;
374 case 1:
375 llvm_if(ctx, &alu,
376 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
377 break;
378 case 2:
379 tgsi_else(ctx);
380 break;
381 case 3:
382 tgsi_endif(ctx);
383 break;
384 case 4:
385 tgsi_bgnloop(ctx);
386 break;
387 case 5:
388 tgsi_endloop(ctx);
389 break;
390 case 6:
391 r600_break_from_byte_stream(ctx, &alu,
392 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
393 break;
394 case 7:
395 r600_break_from_byte_stream(ctx, &alu,
396 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
397 break;
398 case 8:
399 {
400 unsigned opcode = TGSI_OPCODE_CONT;
401 if (ctx->bc->chip_class == CAYMAN) {
402 ctx->inst_info =
403 &cm_shader_tgsi_instruction[opcode];
404 } else if (ctx->bc->chip_class >= EVERGREEN) {
405 ctx->inst_info =
406 &eg_shader_tgsi_instruction[opcode];
407 } else {
408 ctx->inst_info =
409 &r600_shader_tgsi_instruction[opcode];
410 }
411 tgsi_loop_brk_cont(ctx);
412 }
413 break;
414 case 9:
415 r600_break_from_byte_stream(ctx, &alu,
416 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
417 break;
418 }
419
420 return bytes_read;
421 }
422
423 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
424 unsigned char * bytes, unsigned bytes_read)
425 {
426 struct r600_bytecode_tex tex;
427
428 tex.inst = bytes[bytes_read++];
429 tex.resource_id = bytes[bytes_read++];
430 tex.src_gpr = bytes[bytes_read++];
431 tex.src_rel = bytes[bytes_read++];
432 tex.dst_gpr = bytes[bytes_read++];
433 tex.dst_rel = bytes[bytes_read++];
434 tex.dst_sel_x = bytes[bytes_read++];
435 tex.dst_sel_y = bytes[bytes_read++];
436 tex.dst_sel_z = bytes[bytes_read++];
437 tex.dst_sel_w = bytes[bytes_read++];
438 tex.lod_bias = bytes[bytes_read++];
439 tex.coord_type_x = bytes[bytes_read++];
440 tex.coord_type_y = bytes[bytes_read++];
441 tex.coord_type_z = bytes[bytes_read++];
442 tex.coord_type_w = bytes[bytes_read++];
443 tex.offset_x = bytes[bytes_read++];
444 tex.offset_y = bytes[bytes_read++];
445 tex.offset_z = bytes[bytes_read++];
446 tex.sampler_id = bytes[bytes_read++];
447 tex.src_sel_x = bytes[bytes_read++];
448 tex.src_sel_y = bytes[bytes_read++];
449 tex.src_sel_z = bytes[bytes_read++];
450 tex.src_sel_w = bytes[bytes_read++];
451
452 r600_bytecode_add_tex(ctx->bc, &tex);
453
454 return bytes_read;
455 }
456
457 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
458 unsigned char * bytes, unsigned bytes_read)
459 {
460 struct r600_bytecode_vtx vtx;
461
462 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
463 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
464 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
465
466 memset(&vtx, 0, sizeof(vtx));
467
468 /* WORD0 */
469 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
470 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
471 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
472 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
473 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
474 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
475
476 /* WORD1 */
477 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
478 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
479 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
480 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
481 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
482 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
483 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
484 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
485 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
486 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
487
488 /* WORD 2*/
489 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
490 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
491
492 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
493 fprintf(stderr, "Error adding vtx\n");
494 }
495 /* Use the Texture Cache */
496 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
497 return bytes_read;
498 }
499
500 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
501 unsigned char * bytes, unsigned num_bytes)
502 {
503 unsigned bytes_read = 0;
504 unsigned i, byte;
505 while (bytes_read < num_bytes) {
506 char inst_type = bytes[bytes_read++];
507 switch (inst_type) {
508 case 0:
509 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
510 bytes_read);
511 break;
512 case 1:
513 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
514 bytes_read);
515 break;
516 case 2:
517 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
518 bytes_read);
519 break;
520 case 3:
521 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
522 for (i = 0; i < 2; i++) {
523 for (byte = 0 ; byte < 4; byte++) {
524 ctx->bc->cf_last->isa[i] |=
525 (bytes[bytes_read++] << (byte * 8));
526 }
527 }
528 break;
529
530 case 4:
531 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
532 bytes_read);
533 break;
534 default:
535 /* XXX: Error here */
536 break;
537 }
538 }
539 }
540
541 /* End bytestream -> r600 shader functions*/
542
543 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
544 {
545 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
546 int j;
547
548 if (i->Instruction.NumDstRegs > 1) {
549 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
550 return -EINVAL;
551 }
552 if (i->Instruction.Predicate) {
553 R600_ERR("predicate unsupported\n");
554 return -EINVAL;
555 }
556 #if 0
557 if (i->Instruction.Label) {
558 R600_ERR("label unsupported\n");
559 return -EINVAL;
560 }
561 #endif
562 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
563 if (i->Src[j].Register.Dimension) {
564 R600_ERR("unsupported src %d (dimension %d)\n", j,
565 i->Src[j].Register.Dimension);
566 return -EINVAL;
567 }
568 }
569 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
570 if (i->Dst[j].Register.Dimension) {
571 R600_ERR("unsupported dst (dimension)\n");
572 return -EINVAL;
573 }
574 }
575 return 0;
576 }
577
578 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
579 {
580 int i, r;
581 struct r600_bytecode_alu alu;
582 int gpr = 0, base_chan = 0;
583 int ij_index = 0;
584
585 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
586 ij_index = 0;
587 if (ctx->shader->input[input].centroid)
588 ij_index++;
589 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
590 ij_index = 0;
591 /* if we have perspective add one */
592 if (ctx->input_perspective) {
593 ij_index++;
594 /* if we have perspective centroid */
595 if (ctx->input_centroid)
596 ij_index++;
597 }
598 if (ctx->shader->input[input].centroid)
599 ij_index++;
600 }
601
602 /* work out gpr and base_chan from index */
603 gpr = ij_index / 2;
604 base_chan = (2 * (ij_index % 2)) + 1;
605
606 for (i = 0; i < 8; i++) {
607 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
608
609 if (i < 4)
610 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
611 else
612 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
613
614 if ((i > 1) && (i < 6)) {
615 alu.dst.sel = ctx->shader->input[input].gpr;
616 alu.dst.write = 1;
617 }
618
619 alu.dst.chan = i % 4;
620
621 alu.src[0].sel = gpr;
622 alu.src[0].chan = (base_chan - (i % 2));
623
624 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
625
626 alu.bank_swizzle_force = SQ_ALU_VEC_210;
627 if ((i % 4) == 3)
628 alu.last = 1;
629 r = r600_bytecode_add_alu(ctx->bc, &alu);
630 if (r)
631 return r;
632 }
633 return 0;
634 }
635
636 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
637 {
638 int i, r;
639 struct r600_bytecode_alu alu;
640
641 for (i = 0; i < 4; i++) {
642 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
643
644 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
645
646 alu.dst.sel = ctx->shader->input[input].gpr;
647 alu.dst.write = 1;
648
649 alu.dst.chan = i;
650
651 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
652 alu.src[0].chan = i;
653
654 if (i == 3)
655 alu.last = 1;
656 r = r600_bytecode_add_alu(ctx->bc, &alu);
657 if (r)
658 return r;
659 }
660 return 0;
661 }
662
663 /*
664 * Special export handling in shaders
665 *
666 * shader export ARRAY_BASE for EXPORT_POS:
667 * 60 is position
668 * 61 is misc vector
669 * 62, 63 are clip distance vectors
670 *
671 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
672 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
673 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
674 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
675 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
676 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
677 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
678 * exclusive from render target index)
679 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
680 *
681 *
682 * shader export ARRAY_BASE for EXPORT_PIXEL:
683 * 0-7 CB targets
684 * 61 computed Z vector
685 *
686 * The use of the values exported in the computed Z vector are controlled
687 * by DB_SHADER_CONTROL:
688 * Z_EXPORT_ENABLE - Z as a float in RED
689 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
690 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
691 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
692 * DB_SOURCE_FORMAT - export control restrictions
693 *
694 */
695
696
697 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
698 static int r600_spi_sid(struct r600_shader_io * io)
699 {
700 int index, name = io->name;
701
702 /* These params are handled differently, they don't need
703 * semantic indices, so we'll use 0 for them.
704 */
705 if (name == TGSI_SEMANTIC_POSITION ||
706 name == TGSI_SEMANTIC_PSIZE ||
707 name == TGSI_SEMANTIC_FACE)
708 index = 0;
709 else {
710 if (name == TGSI_SEMANTIC_GENERIC) {
711 /* For generic params simply use sid from tgsi */
712 index = io->sid;
713 } else {
714 /* For non-generic params - pack name and sid into 8 bits */
715 index = 0x80 | (name<<3) | (io->sid);
716 }
717
718 /* Make sure that all really used indices have nonzero value, so
719 * we can just compare it to 0 later instead of comparing the name
720 * with different values to detect special cases. */
721 index++;
722 }
723
724 return index;
725 };
726
727 /* turn input into interpolate on EG */
728 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
729 {
730 int r = 0;
731
732 if (ctx->shader->input[index].spi_sid) {
733 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
734 if (ctx->shader->input[index].interpolate > 0) {
735 r = evergreen_interp_alu(ctx, index);
736 } else {
737 r = evergreen_interp_flat(ctx, index);
738 }
739 }
740 return r;
741 }
742
743 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
744 {
745 struct r600_bytecode_alu alu;
746 int i, r;
747 int gpr_front = ctx->shader->input[front].gpr;
748 int gpr_back = ctx->shader->input[back].gpr;
749
750 for (i = 0; i < 4; i++) {
751 memset(&alu, 0, sizeof(alu));
752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
753 alu.is_op3 = 1;
754 alu.dst.write = 1;
755 alu.dst.sel = gpr_front;
756 alu.src[0].sel = ctx->face_gpr;
757 alu.src[1].sel = gpr_front;
758 alu.src[2].sel = gpr_back;
759
760 alu.dst.chan = i;
761 alu.src[1].chan = i;
762 alu.src[2].chan = i;
763 alu.last = (i==3);
764
765 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
766 return r;
767 }
768
769 return 0;
770 }
771
772 static int tgsi_declaration(struct r600_shader_ctx *ctx)
773 {
774 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
775 unsigned i;
776 int r;
777
778 switch (d->Declaration.File) {
779 case TGSI_FILE_INPUT:
780 i = ctx->shader->ninput++;
781 ctx->shader->input[i].name = d->Semantic.Name;
782 ctx->shader->input[i].sid = d->Semantic.Index;
783 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
784 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
785 ctx->shader->input[i].centroid = d->Interp.Centroid;
786 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
787 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
788 switch (ctx->shader->input[i].name) {
789 case TGSI_SEMANTIC_FACE:
790 ctx->face_gpr = ctx->shader->input[i].gpr;
791 break;
792 case TGSI_SEMANTIC_COLOR:
793 ctx->colors_used++;
794 break;
795 case TGSI_SEMANTIC_POSITION:
796 ctx->fragcoord_input = i;
797 break;
798 }
799 if (ctx->bc->chip_class >= EVERGREEN) {
800 if ((r = evergreen_interp_input(ctx, i)))
801 return r;
802 }
803 }
804 break;
805 case TGSI_FILE_OUTPUT:
806 i = ctx->shader->noutput++;
807 ctx->shader->output[i].name = d->Semantic.Name;
808 ctx->shader->output[i].sid = d->Semantic.Index;
809 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
810 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
811 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
812 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
813 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
814 switch (d->Semantic.Name) {
815 case TGSI_SEMANTIC_CLIPDIST:
816 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
817 break;
818 case TGSI_SEMANTIC_PSIZE:
819 ctx->shader->vs_out_misc_write = 1;
820 ctx->shader->vs_out_point_size = 1;
821 break;
822 case TGSI_SEMANTIC_CLIPVERTEX:
823 ctx->clip_vertex_write = TRUE;
824 ctx->cv_output = i;
825 break;
826 }
827 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
828 switch (d->Semantic.Name) {
829 case TGSI_SEMANTIC_COLOR:
830 ctx->shader->nr_ps_max_color_exports++;
831 break;
832 }
833 }
834 break;
835 case TGSI_FILE_CONSTANT:
836 case TGSI_FILE_TEMPORARY:
837 case TGSI_FILE_SAMPLER:
838 case TGSI_FILE_ADDRESS:
839 break;
840
841 case TGSI_FILE_SYSTEM_VALUE:
842 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
843 if (!ctx->native_integers) {
844 struct r600_bytecode_alu alu;
845 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
846
847 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
848 alu.src[0].sel = 0;
849 alu.src[0].chan = 3;
850
851 alu.dst.sel = 0;
852 alu.dst.chan = 3;
853 alu.dst.write = 1;
854 alu.last = 1;
855
856 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
857 return r;
858 }
859 break;
860 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
861 break;
862 default:
863 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
864 return -EINVAL;
865 }
866 return 0;
867 }
868
869 static int r600_get_temp(struct r600_shader_ctx *ctx)
870 {
871 return ctx->temp_reg + ctx->max_driver_temp_used++;
872 }
873
874 /*
875 * for evergreen we need to scan the shader to find the number of GPRs we need to
876 * reserve for interpolation.
877 *
878 * we need to know if we are going to emit
879 * any centroid inputs
880 * if perspective and linear are required
881 */
882 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
883 {
884 int i;
885 int num_baryc;
886
887 ctx->input_linear = FALSE;
888 ctx->input_perspective = FALSE;
889 ctx->input_centroid = FALSE;
890 ctx->num_interp_gpr = 1;
891
892 /* any centroid inputs */
893 for (i = 0; i < ctx->info.num_inputs; i++) {
894 /* skip position/face */
895 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
896 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
897 continue;
898 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
899 ctx->input_linear = TRUE;
900 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
901 ctx->input_perspective = TRUE;
902 if (ctx->info.input_centroid[i])
903 ctx->input_centroid = TRUE;
904 }
905
906 num_baryc = 0;
907 /* ignoring sample for now */
908 if (ctx->input_perspective)
909 num_baryc++;
910 if (ctx->input_linear)
911 num_baryc++;
912 if (ctx->input_centroid)
913 num_baryc *= 2;
914
915 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
916
917 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
918 return ctx->num_interp_gpr;
919 }
920
921 static void tgsi_src(struct r600_shader_ctx *ctx,
922 const struct tgsi_full_src_register *tgsi_src,
923 struct r600_shader_src *r600_src)
924 {
925 memset(r600_src, 0, sizeof(*r600_src));
926 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
927 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
928 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
929 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
930 r600_src->neg = tgsi_src->Register.Negate;
931 r600_src->abs = tgsi_src->Register.Absolute;
932
933 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
934 int index;
935 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
936 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
937 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
938
939 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
940 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
941 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
942 return;
943 }
944 index = tgsi_src->Register.Index;
945 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
946 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
947 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
948 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
949 r600_src->swizzle[0] = 3;
950 r600_src->swizzle[1] = 3;
951 r600_src->swizzle[2] = 3;
952 r600_src->swizzle[3] = 3;
953 r600_src->sel = 0;
954 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
955 r600_src->swizzle[0] = 0;
956 r600_src->swizzle[1] = 0;
957 r600_src->swizzle[2] = 0;
958 r600_src->swizzle[3] = 0;
959 r600_src->sel = 0;
960 }
961 } else {
962 if (tgsi_src->Register.Indirect)
963 r600_src->rel = V_SQ_REL_RELATIVE;
964 r600_src->sel = tgsi_src->Register.Index;
965 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
966 }
967 }
968
969 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
970 {
971 struct r600_bytecode_vtx vtx;
972 unsigned int ar_reg;
973 int r;
974
975 if (offset) {
976 struct r600_bytecode_alu alu;
977
978 memset(&alu, 0, sizeof(alu));
979
980 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
981 alu.src[0].sel = ctx->bc->ar_reg;
982
983 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
984 alu.src[1].value = offset;
985
986 alu.dst.sel = dst_reg;
987 alu.dst.write = 1;
988 alu.last = 1;
989
990 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
991 return r;
992
993 ar_reg = dst_reg;
994 } else {
995 ar_reg = ctx->bc->ar_reg;
996 }
997
998 memset(&vtx, 0, sizeof(vtx));
999 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1000 vtx.src_gpr = ar_reg;
1001 vtx.mega_fetch_count = 16;
1002 vtx.dst_gpr = dst_reg;
1003 vtx.dst_sel_x = 0; /* SEL_X */
1004 vtx.dst_sel_y = 1; /* SEL_Y */
1005 vtx.dst_sel_z = 2; /* SEL_Z */
1006 vtx.dst_sel_w = 3; /* SEL_W */
1007 vtx.data_format = FMT_32_32_32_32_FLOAT;
1008 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1009 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1010 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1011 vtx.endian = r600_endian_swap(32);
1012
1013 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1014 return r;
1015
1016 return 0;
1017 }
1018
1019 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1020 {
1021 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1022 struct r600_bytecode_alu alu;
1023 int i, j, k, nconst, r;
1024
1025 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1026 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1027 nconst++;
1028 }
1029 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1030 }
1031 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1032 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1033 continue;
1034 }
1035
1036 if (ctx->src[i].rel) {
1037 int treg = r600_get_temp(ctx);
1038 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1039 return r;
1040
1041 ctx->src[i].sel = treg;
1042 ctx->src[i].rel = 0;
1043 j--;
1044 } else if (j > 0) {
1045 int treg = r600_get_temp(ctx);
1046 for (k = 0; k < 4; k++) {
1047 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1049 alu.src[0].sel = ctx->src[i].sel;
1050 alu.src[0].chan = k;
1051 alu.src[0].rel = ctx->src[i].rel;
1052 alu.dst.sel = treg;
1053 alu.dst.chan = k;
1054 alu.dst.write = 1;
1055 if (k == 3)
1056 alu.last = 1;
1057 r = r600_bytecode_add_alu(ctx->bc, &alu);
1058 if (r)
1059 return r;
1060 }
1061 ctx->src[i].sel = treg;
1062 ctx->src[i].rel =0;
1063 j--;
1064 }
1065 }
1066 return 0;
1067 }
1068
1069 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1070 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1071 {
1072 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1073 struct r600_bytecode_alu alu;
1074 int i, j, k, nliteral, r;
1075
1076 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1077 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1078 nliteral++;
1079 }
1080 }
1081 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1082 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1083 int treg = r600_get_temp(ctx);
1084 for (k = 0; k < 4; k++) {
1085 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1086 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1087 alu.src[0].sel = ctx->src[i].sel;
1088 alu.src[0].chan = k;
1089 alu.src[0].value = ctx->src[i].value[k];
1090 alu.dst.sel = treg;
1091 alu.dst.chan = k;
1092 alu.dst.write = 1;
1093 if (k == 3)
1094 alu.last = 1;
1095 r = r600_bytecode_add_alu(ctx->bc, &alu);
1096 if (r)
1097 return r;
1098 }
1099 ctx->src[i].sel = treg;
1100 j--;
1101 }
1102 }
1103 return 0;
1104 }
1105
1106 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1107 {
1108 int i, r, count = ctx->shader->ninput;
1109
1110 /* additional inputs will be allocated right after the existing inputs,
1111 * we won't need them after the color selection, so we don't need to
1112 * reserve these gprs for the rest of the shader code and to adjust
1113 * output offsets etc. */
1114 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1115 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1116
1117 if (ctx->face_gpr == -1) {
1118 i = ctx->shader->ninput++;
1119 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1120 ctx->shader->input[i].spi_sid = 0;
1121 ctx->shader->input[i].gpr = gpr++;
1122 ctx->face_gpr = ctx->shader->input[i].gpr;
1123 }
1124
1125 for (i = 0; i < count; i++) {
1126 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1127 int ni = ctx->shader->ninput++;
1128 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1129 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1130 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1131 ctx->shader->input[ni].gpr = gpr++;
1132
1133 if (ctx->bc->chip_class >= EVERGREEN) {
1134 r = evergreen_interp_input(ctx, ni);
1135 if (r)
1136 return r;
1137 }
1138
1139 r = select_twoside_color(ctx, i, ni);
1140 if (r)
1141 return r;
1142 }
1143 }
1144 return 0;
1145 }
1146
1147 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1148 {
1149 struct r600_shader *shader = &pipeshader->shader;
1150 struct tgsi_token *tokens = pipeshader->selector->tokens;
1151 struct pipe_stream_output_info so = pipeshader->selector->so;
1152 struct tgsi_full_immediate *immediate;
1153 struct tgsi_full_property *property;
1154 struct r600_shader_ctx ctx;
1155 struct r600_bytecode_output output[32];
1156 unsigned output_done, noutput;
1157 unsigned opcode;
1158 int i, j, k, r = 0;
1159 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1160 /* Declarations used by llvm code */
1161 bool use_llvm = false;
1162 unsigned char * inst_bytes = NULL;
1163 unsigned inst_byte_count = 0;
1164
1165 #ifdef R600_USE_LLVM
1166 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1167 #endif
1168 ctx.bc = &shader->bc;
1169 ctx.shader = shader;
1170 ctx.native_integers = true;
1171
1172 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1173 ctx.tokens = tokens;
1174 tgsi_scan_shader(tokens, &ctx.info);
1175 tgsi_parse_init(&ctx.parse, tokens);
1176 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1177 shader->processor_type = ctx.type;
1178 ctx.bc->type = shader->processor_type;
1179
1180 ctx.face_gpr = -1;
1181 ctx.fragcoord_input = -1;
1182 ctx.colors_used = 0;
1183 ctx.clip_vertex_write = 0;
1184
1185 shader->nr_ps_color_exports = 0;
1186 shader->nr_ps_max_color_exports = 0;
1187
1188 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1189
1190 /* register allocations */
1191 /* Values [0,127] correspond to GPR[0..127].
1192 * Values [128,159] correspond to constant buffer bank 0
1193 * Values [160,191] correspond to constant buffer bank 1
1194 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1195 * Values [256,287] correspond to constant buffer bank 2 (EG)
1196 * Values [288,319] correspond to constant buffer bank 3 (EG)
1197 * Other special values are shown in the list below.
1198 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1199 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1200 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1201 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1202 * 248 SQ_ALU_SRC_0: special constant 0.0.
1203 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1204 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1205 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1206 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1207 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1208 * 254 SQ_ALU_SRC_PV: previous vector result.
1209 * 255 SQ_ALU_SRC_PS: previous scalar result.
1210 */
1211 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1212 ctx.file_offset[i] = 0;
1213 }
1214 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1215 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1216 if (ctx.bc->chip_class >= EVERGREEN) {
1217 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1218 } else {
1219 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1220 }
1221 }
1222 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1223 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1224 }
1225
1226 /* LLVM backend setup */
1227 #ifdef R600_USE_LLVM
1228 if (use_llvm && ctx.info.indirect_files) {
1229 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1230 "indirect adressing. Falling back to TGSI "
1231 "backend.\n");
1232 use_llvm = 0;
1233 }
1234 if (use_llvm) {
1235 struct radeon_llvm_context radeon_llvm_ctx;
1236 LLVMModuleRef mod;
1237 unsigned dump = 0;
1238 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1239 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1240 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1241 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1242 dump = 1;
1243 }
1244 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1245 rctx->family, dump)) {
1246 FREE(inst_bytes);
1247 radeon_llvm_dispose(&radeon_llvm_ctx);
1248 use_llvm = 0;
1249 fprintf(stderr, "R600 LLVM backend failed to compile "
1250 "shader. Falling back to TGSI\n");
1251 } else {
1252 ctx.file_offset[TGSI_FILE_OUTPUT] =
1253 ctx.file_offset[TGSI_FILE_INPUT];
1254 }
1255 radeon_llvm_dispose(&radeon_llvm_ctx);
1256 }
1257 #endif
1258 /* End of LLVM backend setup */
1259
1260 if (!use_llvm) {
1261 ctx.file_offset[TGSI_FILE_OUTPUT] =
1262 ctx.file_offset[TGSI_FILE_INPUT] +
1263 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1264 }
1265 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1266 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1267
1268 /* Outside the GPR range. This will be translated to one of the
1269 * kcache banks later. */
1270 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1271
1272 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1273 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1274 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1275 ctx.temp_reg = ctx.bc->ar_reg + 1;
1276
1277 ctx.nliterals = 0;
1278 ctx.literals = NULL;
1279 shader->fs_write_all = FALSE;
1280 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1281 tgsi_parse_token(&ctx.parse);
1282 switch (ctx.parse.FullToken.Token.Type) {
1283 case TGSI_TOKEN_TYPE_IMMEDIATE:
1284 immediate = &ctx.parse.FullToken.FullImmediate;
1285 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1286 if(ctx.literals == NULL) {
1287 r = -ENOMEM;
1288 goto out_err;
1289 }
1290 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1291 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1292 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1293 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1294 ctx.nliterals++;
1295 break;
1296 case TGSI_TOKEN_TYPE_DECLARATION:
1297 r = tgsi_declaration(&ctx);
1298 if (r)
1299 goto out_err;
1300 break;
1301 case TGSI_TOKEN_TYPE_INSTRUCTION:
1302 break;
1303 case TGSI_TOKEN_TYPE_PROPERTY:
1304 property = &ctx.parse.FullToken.FullProperty;
1305 switch (property->Property.PropertyName) {
1306 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1307 if (property->u[0].Data == 1)
1308 shader->fs_write_all = TRUE;
1309 break;
1310 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1311 if (property->u[0].Data == 1)
1312 shader->vs_prohibit_ucps = TRUE;
1313 break;
1314 }
1315 break;
1316 default:
1317 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1318 r = -EINVAL;
1319 goto out_err;
1320 }
1321 }
1322
1323 if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
1324 shader->nr_ps_max_color_exports = 8;
1325
1326 if (ctx.fragcoord_input >= 0) {
1327 if (ctx.bc->chip_class == CAYMAN) {
1328 for (j = 0 ; j < 4; j++) {
1329 struct r600_bytecode_alu alu;
1330 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1331 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1332 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1333 alu.src[0].chan = 3;
1334
1335 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1336 alu.dst.chan = j;
1337 alu.dst.write = (j == 3);
1338 alu.last = 1;
1339 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1340 return r;
1341 }
1342 } else {
1343 struct r600_bytecode_alu alu;
1344 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1345 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1346 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1347 alu.src[0].chan = 3;
1348
1349 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1350 alu.dst.chan = 3;
1351 alu.dst.write = 1;
1352 alu.last = 1;
1353 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1354 return r;
1355 }
1356 }
1357
1358 if (shader->two_side && ctx.colors_used) {
1359 if ((r = process_twoside_color_inputs(&ctx)))
1360 return r;
1361 }
1362
1363 tgsi_parse_init(&ctx.parse, tokens);
1364 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1365 tgsi_parse_token(&ctx.parse);
1366 switch (ctx.parse.FullToken.Token.Type) {
1367 case TGSI_TOKEN_TYPE_INSTRUCTION:
1368 if (use_llvm) {
1369 continue;
1370 }
1371 r = tgsi_is_supported(&ctx);
1372 if (r)
1373 goto out_err;
1374 ctx.max_driver_temp_used = 0;
1375 /* reserve first tmp for everyone */
1376 r600_get_temp(&ctx);
1377
1378 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1379 if ((r = tgsi_split_constant(&ctx)))
1380 goto out_err;
1381 if ((r = tgsi_split_literal_constant(&ctx)))
1382 goto out_err;
1383 if (ctx.bc->chip_class == CAYMAN)
1384 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1385 else if (ctx.bc->chip_class >= EVERGREEN)
1386 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1387 else
1388 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1389 r = ctx.inst_info->process(&ctx);
1390 if (r)
1391 goto out_err;
1392 break;
1393 default:
1394 break;
1395 }
1396 }
1397
1398 /* Get instructions if we are using the LLVM backend. */
1399 if (use_llvm) {
1400 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1401 FREE(inst_bytes);
1402 }
1403
1404 noutput = shader->noutput;
1405
1406 if (ctx.clip_vertex_write) {
1407 /* need to convert a clipvertex write into clipdistance writes and not export
1408 the clip vertex anymore */
1409
1410 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1411 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1412 shader->output[noutput].gpr = ctx.temp_reg;
1413 noutput++;
1414 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1415 shader->output[noutput].gpr = ctx.temp_reg+1;
1416 noutput++;
1417
1418 /* reset spi_sid for clipvertex output to avoid confusing spi */
1419 shader->output[ctx.cv_output].spi_sid = 0;
1420
1421 shader->clip_dist_write = 0xFF;
1422
1423 for (i = 0; i < 8; i++) {
1424 int oreg = i >> 2;
1425 int ochan = i & 3;
1426
1427 for (j = 0; j < 4; j++) {
1428 struct r600_bytecode_alu alu;
1429 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1430 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1431 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1432 alu.src[0].chan = j;
1433
1434 alu.src[1].sel = 512 + i;
1435 alu.src[1].kc_bank = 1;
1436 alu.src[1].chan = j;
1437
1438 alu.dst.sel = ctx.temp_reg + oreg;
1439 alu.dst.chan = j;
1440 alu.dst.write = (j == ochan);
1441 if (j == 3)
1442 alu.last = 1;
1443 r = r600_bytecode_add_alu(ctx.bc, &alu);
1444 if (r)
1445 return r;
1446 }
1447 }
1448 }
1449
1450 /* Add stream outputs. */
1451 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1452 for (i = 0; i < so.num_outputs; i++) {
1453 struct r600_bytecode_output output;
1454
1455 if (so.output[i].output_buffer >= 4) {
1456 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1457 so.output[i].output_buffer);
1458 r = -EINVAL;
1459 goto out_err;
1460 }
1461 if (so.output[i].dst_offset < so.output[i].start_component) {
1462 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1463 r = -EINVAL;
1464 goto out_err;
1465 }
1466
1467 memset(&output, 0, sizeof(struct r600_bytecode_output));
1468 output.gpr = shader->output[so.output[i].register_index].gpr;
1469 output.elem_size = 0;
1470 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1471 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1472 output.burst_count = 1;
1473 output.barrier = 1;
1474 /* array_size is an upper limit for the burst_count
1475 * with MEM_STREAM instructions */
1476 output.array_size = 0xFFF;
1477 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1478 if (ctx.bc->chip_class >= EVERGREEN) {
1479 switch (so.output[i].output_buffer) {
1480 case 0:
1481 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1482 break;
1483 case 1:
1484 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1485 break;
1486 case 2:
1487 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1488 break;
1489 case 3:
1490 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1491 break;
1492 }
1493 } else {
1494 switch (so.output[i].output_buffer) {
1495 case 0:
1496 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1497 break;
1498 case 1:
1499 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1500 break;
1501 case 2:
1502 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1503 break;
1504 case 3:
1505 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1506 break;
1507 }
1508 }
1509 r = r600_bytecode_add_output(ctx.bc, &output);
1510 if (r)
1511 goto out_err;
1512 }
1513 }
1514
1515 /* export output */
1516 for (i = 0, j = 0; i < noutput; i++, j++) {
1517 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1518 output[j].gpr = shader->output[i].gpr;
1519 output[j].elem_size = 3;
1520 output[j].swizzle_x = 0;
1521 output[j].swizzle_y = 1;
1522 output[j].swizzle_z = 2;
1523 output[j].swizzle_w = 3;
1524 output[j].burst_count = 1;
1525 output[j].barrier = 1;
1526 output[j].type = -1;
1527 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1528 switch (ctx.type) {
1529 case TGSI_PROCESSOR_VERTEX:
1530 switch (shader->output[i].name) {
1531 case TGSI_SEMANTIC_POSITION:
1532 output[j].array_base = next_pos_base++;
1533 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1534 break;
1535
1536 case TGSI_SEMANTIC_PSIZE:
1537 output[j].array_base = next_pos_base++;
1538 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1539 break;
1540 case TGSI_SEMANTIC_CLIPVERTEX:
1541 j--;
1542 break;
1543 case TGSI_SEMANTIC_CLIPDIST:
1544 output[j].array_base = next_pos_base++;
1545 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1546 /* spi_sid is 0 for clipdistance outputs that were generated
1547 * for clipvertex - we don't need to pass them to PS */
1548 if (shader->output[i].spi_sid) {
1549 j++;
1550 /* duplicate it as PARAM to pass to the pixel shader */
1551 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1552 output[j].array_base = next_param_base++;
1553 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1554 }
1555 break;
1556 case TGSI_SEMANTIC_FOG:
1557 output[j].swizzle_y = 4; /* 0 */
1558 output[j].swizzle_z = 4; /* 0 */
1559 output[j].swizzle_w = 5; /* 1 */
1560 break;
1561 }
1562 break;
1563 case TGSI_PROCESSOR_FRAGMENT:
1564 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1565 /* never export more colors than the number of CBs */
1566 if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
1567 /* skip export */
1568 j--;
1569 continue;
1570 }
1571 output[j].array_base = next_pixel_base++;
1572 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1573 shader->nr_ps_color_exports++;
1574 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1575 for (k = 1; k < rctx->nr_cbufs; k++) {
1576 j++;
1577 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1578 output[j].gpr = shader->output[i].gpr;
1579 output[j].elem_size = 3;
1580 output[j].swizzle_x = 0;
1581 output[j].swizzle_y = 1;
1582 output[j].swizzle_z = 2;
1583 output[j].swizzle_w = 3;
1584 output[j].burst_count = 1;
1585 output[j].barrier = 1;
1586 output[j].array_base = next_pixel_base++;
1587 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1588 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1589 shader->nr_ps_color_exports++;
1590 }
1591 }
1592 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1593 output[j].array_base = 61;
1594 output[j].swizzle_x = 2;
1595 output[j].swizzle_y = 7;
1596 output[j].swizzle_z = output[j].swizzle_w = 7;
1597 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1598 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1599 output[j].array_base = 61;
1600 output[j].swizzle_x = 7;
1601 output[j].swizzle_y = 1;
1602 output[j].swizzle_z = output[j].swizzle_w = 7;
1603 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1604 } else {
1605 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1606 r = -EINVAL;
1607 goto out_err;
1608 }
1609 break;
1610 default:
1611 R600_ERR("unsupported processor type %d\n", ctx.type);
1612 r = -EINVAL;
1613 goto out_err;
1614 }
1615
1616 if (output[j].type==-1) {
1617 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1618 output[j].array_base = next_param_base++;
1619 }
1620 }
1621
1622 /* add fake param output for vertex shader if no param is exported */
1623 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1624 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1625 output[j].gpr = 0;
1626 output[j].elem_size = 3;
1627 output[j].swizzle_x = 7;
1628 output[j].swizzle_y = 7;
1629 output[j].swizzle_z = 7;
1630 output[j].swizzle_w = 7;
1631 output[j].burst_count = 1;
1632 output[j].barrier = 1;
1633 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1634 output[j].array_base = 0;
1635 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1636 j++;
1637 }
1638
1639 /* add fake pixel export */
1640 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1641 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1642 output[j].gpr = 0;
1643 output[j].elem_size = 3;
1644 output[j].swizzle_x = 7;
1645 output[j].swizzle_y = 7;
1646 output[j].swizzle_z = 7;
1647 output[j].swizzle_w = 7;
1648 output[j].burst_count = 1;
1649 output[j].barrier = 1;
1650 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1651 output[j].array_base = 0;
1652 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1653 j++;
1654 }
1655
1656 noutput = j;
1657
1658 /* set export done on last export of each type */
1659 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1660 if (ctx.bc->chip_class < CAYMAN) {
1661 if (i == (noutput - 1)) {
1662 output[i].end_of_program = 1;
1663 }
1664 }
1665 if (!(output_done & (1 << output[i].type))) {
1666 output_done |= (1 << output[i].type);
1667 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1668 }
1669 }
1670 /* add output to bytecode */
1671 for (i = 0; i < noutput; i++) {
1672 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1673 if (r)
1674 goto out_err;
1675 }
1676 /* add program end */
1677 if (ctx.bc->chip_class == CAYMAN)
1678 cm_bytecode_add_cf_end(ctx.bc);
1679
1680 /* check GPR limit - we have 124 = 128 - 4
1681 * (4 are reserved as alu clause temporary registers) */
1682 if (ctx.bc->ngpr > 124) {
1683 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1684 r = -ENOMEM;
1685 goto out_err;
1686 }
1687
1688 free(ctx.literals);
1689 tgsi_parse_free(&ctx.parse);
1690 return 0;
1691 out_err:
1692 free(ctx.literals);
1693 tgsi_parse_free(&ctx.parse);
1694 return r;
1695 }
1696
1697 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1698 {
1699 R600_ERR("%s tgsi opcode unsupported\n",
1700 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1701 return -EINVAL;
1702 }
1703
1704 static int tgsi_end(struct r600_shader_ctx *ctx)
1705 {
1706 return 0;
1707 }
1708
1709 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1710 const struct r600_shader_src *shader_src,
1711 unsigned chan)
1712 {
1713 bc_src->sel = shader_src->sel;
1714 bc_src->chan = shader_src->swizzle[chan];
1715 bc_src->neg = shader_src->neg;
1716 bc_src->abs = shader_src->abs;
1717 bc_src->rel = shader_src->rel;
1718 bc_src->value = shader_src->value[bc_src->chan];
1719 }
1720
1721 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1722 {
1723 bc_src->abs = 1;
1724 bc_src->neg = 0;
1725 }
1726
1727 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1728 {
1729 bc_src->neg = !bc_src->neg;
1730 }
1731
1732 static void tgsi_dst(struct r600_shader_ctx *ctx,
1733 const struct tgsi_full_dst_register *tgsi_dst,
1734 unsigned swizzle,
1735 struct r600_bytecode_alu_dst *r600_dst)
1736 {
1737 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1738
1739 r600_dst->sel = tgsi_dst->Register.Index;
1740 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1741 r600_dst->chan = swizzle;
1742 r600_dst->write = 1;
1743 if (tgsi_dst->Register.Indirect)
1744 r600_dst->rel = V_SQ_REL_RELATIVE;
1745 if (inst->Instruction.Saturate) {
1746 r600_dst->clamp = 1;
1747 }
1748 }
1749
1750 static int tgsi_last_instruction(unsigned writemask)
1751 {
1752 int i, lasti = 0;
1753
1754 for (i = 0; i < 4; i++) {
1755 if (writemask & (1 << i)) {
1756 lasti = i;
1757 }
1758 }
1759 return lasti;
1760 }
1761
1762 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1763 {
1764 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1765 struct r600_bytecode_alu alu;
1766 int i, j, r;
1767 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1768
1769 for (i = 0; i < lasti + 1; i++) {
1770 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1771 continue;
1772
1773 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1774 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1775
1776 alu.inst = ctx->inst_info->r600_opcode;
1777 if (!swap) {
1778 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1779 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1780 }
1781 } else {
1782 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1783 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1784 }
1785 /* handle some special cases */
1786 switch (ctx->inst_info->tgsi_opcode) {
1787 case TGSI_OPCODE_SUB:
1788 r600_bytecode_src_toggle_neg(&alu.src[1]);
1789 break;
1790 case TGSI_OPCODE_ABS:
1791 r600_bytecode_src_set_abs(&alu.src[0]);
1792 break;
1793 default:
1794 break;
1795 }
1796 if (i == lasti || trans_only) {
1797 alu.last = 1;
1798 }
1799 r = r600_bytecode_add_alu(ctx->bc, &alu);
1800 if (r)
1801 return r;
1802 }
1803 return 0;
1804 }
1805
1806 static int tgsi_op2(struct r600_shader_ctx *ctx)
1807 {
1808 return tgsi_op2_s(ctx, 0, 0);
1809 }
1810
1811 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1812 {
1813 return tgsi_op2_s(ctx, 1, 0);
1814 }
1815
1816 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1817 {
1818 return tgsi_op2_s(ctx, 0, 1);
1819 }
1820
1821 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1822 {
1823 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1824 struct r600_bytecode_alu alu;
1825 int i, r;
1826 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1827
1828 for (i = 0; i < lasti + 1; i++) {
1829
1830 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1831 continue;
1832 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1833 alu.inst = ctx->inst_info->r600_opcode;
1834
1835 alu.src[0].sel = V_SQ_ALU_SRC_0;
1836
1837 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1838
1839 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1840
1841 if (i == lasti) {
1842 alu.last = 1;
1843 }
1844 r = r600_bytecode_add_alu(ctx->bc, &alu);
1845 if (r)
1846 return r;
1847 }
1848 return 0;
1849
1850 }
1851
1852 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1853 {
1854 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1855 int i, j, r;
1856 struct r600_bytecode_alu alu;
1857 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1858
1859 for (i = 0 ; i < last_slot; i++) {
1860 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1861 alu.inst = ctx->inst_info->r600_opcode;
1862 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1863 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1864 }
1865 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1866 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1867
1868 if (i == last_slot - 1)
1869 alu.last = 1;
1870 r = r600_bytecode_add_alu(ctx->bc, &alu);
1871 if (r)
1872 return r;
1873 }
1874 return 0;
1875 }
1876
1877 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1878 {
1879 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1880 int i, j, k, r;
1881 struct r600_bytecode_alu alu;
1882 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1883 for (k = 0; k < last_slot; k++) {
1884 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1885 continue;
1886
1887 for (i = 0 ; i < 4; i++) {
1888 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1889 alu.inst = ctx->inst_info->r600_opcode;
1890 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1891 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1892 }
1893 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1894 alu.dst.write = (i == k);
1895 if (i == 3)
1896 alu.last = 1;
1897 r = r600_bytecode_add_alu(ctx->bc, &alu);
1898 if (r)
1899 return r;
1900 }
1901 }
1902 return 0;
1903 }
1904
1905 /*
1906 * r600 - trunc to -PI..PI range
1907 * r700 - normalize by dividing by 2PI
1908 * see fdo bug 27901
1909 */
1910 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1911 {
1912 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1913 static float double_pi = 3.1415926535 * 2;
1914 static float neg_pi = -3.1415926535;
1915
1916 int r;
1917 struct r600_bytecode_alu alu;
1918
1919 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1921 alu.is_op3 = 1;
1922
1923 alu.dst.chan = 0;
1924 alu.dst.sel = ctx->temp_reg;
1925 alu.dst.write = 1;
1926
1927 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1928
1929 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1930 alu.src[1].chan = 0;
1931 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1932 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1933 alu.src[2].chan = 0;
1934 alu.last = 1;
1935 r = r600_bytecode_add_alu(ctx->bc, &alu);
1936 if (r)
1937 return r;
1938
1939 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1940 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1941
1942 alu.dst.chan = 0;
1943 alu.dst.sel = ctx->temp_reg;
1944 alu.dst.write = 1;
1945
1946 alu.src[0].sel = ctx->temp_reg;
1947 alu.src[0].chan = 0;
1948 alu.last = 1;
1949 r = r600_bytecode_add_alu(ctx->bc, &alu);
1950 if (r)
1951 return r;
1952
1953 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1954 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1955 alu.is_op3 = 1;
1956
1957 alu.dst.chan = 0;
1958 alu.dst.sel = ctx->temp_reg;
1959 alu.dst.write = 1;
1960
1961 alu.src[0].sel = ctx->temp_reg;
1962 alu.src[0].chan = 0;
1963
1964 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1965 alu.src[1].chan = 0;
1966 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1967 alu.src[2].chan = 0;
1968
1969 if (ctx->bc->chip_class == R600) {
1970 alu.src[1].value = *(uint32_t *)&double_pi;
1971 alu.src[2].value = *(uint32_t *)&neg_pi;
1972 } else {
1973 alu.src[1].sel = V_SQ_ALU_SRC_1;
1974 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1975 alu.src[2].neg = 1;
1976 }
1977
1978 alu.last = 1;
1979 r = r600_bytecode_add_alu(ctx->bc, &alu);
1980 if (r)
1981 return r;
1982 return 0;
1983 }
1984
1985 static int cayman_trig(struct r600_shader_ctx *ctx)
1986 {
1987 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1988 struct r600_bytecode_alu alu;
1989 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1990 int i, r;
1991
1992 r = tgsi_setup_trig(ctx);
1993 if (r)
1994 return r;
1995
1996
1997 for (i = 0; i < last_slot; i++) {
1998 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1999 alu.inst = ctx->inst_info->r600_opcode;
2000 alu.dst.chan = i;
2001
2002 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2003 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2004
2005 alu.src[0].sel = ctx->temp_reg;
2006 alu.src[0].chan = 0;
2007 if (i == last_slot - 1)
2008 alu.last = 1;
2009 r = r600_bytecode_add_alu(ctx->bc, &alu);
2010 if (r)
2011 return r;
2012 }
2013 return 0;
2014 }
2015
2016 static int tgsi_trig(struct r600_shader_ctx *ctx)
2017 {
2018 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2019 struct r600_bytecode_alu alu;
2020 int i, r;
2021 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2022
2023 r = tgsi_setup_trig(ctx);
2024 if (r)
2025 return r;
2026
2027 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2028 alu.inst = ctx->inst_info->r600_opcode;
2029 alu.dst.chan = 0;
2030 alu.dst.sel = ctx->temp_reg;
2031 alu.dst.write = 1;
2032
2033 alu.src[0].sel = ctx->temp_reg;
2034 alu.src[0].chan = 0;
2035 alu.last = 1;
2036 r = r600_bytecode_add_alu(ctx->bc, &alu);
2037 if (r)
2038 return r;
2039
2040 /* replicate result */
2041 for (i = 0; i < lasti + 1; i++) {
2042 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2043 continue;
2044
2045 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2046 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2047
2048 alu.src[0].sel = ctx->temp_reg;
2049 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2050 if (i == lasti)
2051 alu.last = 1;
2052 r = r600_bytecode_add_alu(ctx->bc, &alu);
2053 if (r)
2054 return r;
2055 }
2056 return 0;
2057 }
2058
2059 static int tgsi_scs(struct r600_shader_ctx *ctx)
2060 {
2061 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2062 struct r600_bytecode_alu alu;
2063 int i, r;
2064
2065 /* We'll only need the trig stuff if we are going to write to the
2066 * X or Y components of the destination vector.
2067 */
2068 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2069 r = tgsi_setup_trig(ctx);
2070 if (r)
2071 return r;
2072 }
2073
2074 /* dst.x = COS */
2075 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2076 if (ctx->bc->chip_class == CAYMAN) {
2077 for (i = 0 ; i < 3; i++) {
2078 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2079 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2080 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2081
2082 if (i == 0)
2083 alu.dst.write = 1;
2084 else
2085 alu.dst.write = 0;
2086 alu.src[0].sel = ctx->temp_reg;
2087 alu.src[0].chan = 0;
2088 if (i == 2)
2089 alu.last = 1;
2090 r = r600_bytecode_add_alu(ctx->bc, &alu);
2091 if (r)
2092 return r;
2093 }
2094 } else {
2095 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2096 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2097 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2098
2099 alu.src[0].sel = ctx->temp_reg;
2100 alu.src[0].chan = 0;
2101 alu.last = 1;
2102 r = r600_bytecode_add_alu(ctx->bc, &alu);
2103 if (r)
2104 return r;
2105 }
2106 }
2107
2108 /* dst.y = SIN */
2109 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2110 if (ctx->bc->chip_class == CAYMAN) {
2111 for (i = 0 ; i < 3; i++) {
2112 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2113 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2114 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2115 if (i == 1)
2116 alu.dst.write = 1;
2117 else
2118 alu.dst.write = 0;
2119 alu.src[0].sel = ctx->temp_reg;
2120 alu.src[0].chan = 0;
2121 if (i == 2)
2122 alu.last = 1;
2123 r = r600_bytecode_add_alu(ctx->bc, &alu);
2124 if (r)
2125 return r;
2126 }
2127 } else {
2128 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2129 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2130 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2131
2132 alu.src[0].sel = ctx->temp_reg;
2133 alu.src[0].chan = 0;
2134 alu.last = 1;
2135 r = r600_bytecode_add_alu(ctx->bc, &alu);
2136 if (r)
2137 return r;
2138 }
2139 }
2140
2141 /* dst.z = 0.0; */
2142 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2143 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2144
2145 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2146
2147 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2148
2149 alu.src[0].sel = V_SQ_ALU_SRC_0;
2150 alu.src[0].chan = 0;
2151
2152 alu.last = 1;
2153
2154 r = r600_bytecode_add_alu(ctx->bc, &alu);
2155 if (r)
2156 return r;
2157 }
2158
2159 /* dst.w = 1.0; */
2160 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2161 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2162
2163 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2164
2165 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2166
2167 alu.src[0].sel = V_SQ_ALU_SRC_1;
2168 alu.src[0].chan = 0;
2169
2170 alu.last = 1;
2171
2172 r = r600_bytecode_add_alu(ctx->bc, &alu);
2173 if (r)
2174 return r;
2175 }
2176
2177 return 0;
2178 }
2179
2180 static int tgsi_kill(struct r600_shader_ctx *ctx)
2181 {
2182 struct r600_bytecode_alu alu;
2183 int i, r;
2184
2185 for (i = 0; i < 4; i++) {
2186 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2187 alu.inst = ctx->inst_info->r600_opcode;
2188
2189 alu.dst.chan = i;
2190
2191 alu.src[0].sel = V_SQ_ALU_SRC_0;
2192
2193 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2194 alu.src[1].sel = V_SQ_ALU_SRC_1;
2195 alu.src[1].neg = 1;
2196 } else {
2197 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2198 }
2199 if (i == 3) {
2200 alu.last = 1;
2201 }
2202 r = r600_bytecode_add_alu(ctx->bc, &alu);
2203 if (r)
2204 return r;
2205 }
2206
2207 /* kill must be last in ALU */
2208 ctx->bc->force_add_cf = 1;
2209 ctx->shader->uses_kill = TRUE;
2210 return 0;
2211 }
2212
2213 static int tgsi_lit(struct r600_shader_ctx *ctx)
2214 {
2215 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2216 struct r600_bytecode_alu alu;
2217 int r;
2218
2219 /* tmp.x = max(src.y, 0.0) */
2220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2222 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2223 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2224 alu.src[1].chan = 1;
2225
2226 alu.dst.sel = ctx->temp_reg;
2227 alu.dst.chan = 0;
2228 alu.dst.write = 1;
2229
2230 alu.last = 1;
2231 r = r600_bytecode_add_alu(ctx->bc, &alu);
2232 if (r)
2233 return r;
2234
2235 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2236 {
2237 int chan;
2238 int sel;
2239 int i;
2240
2241 if (ctx->bc->chip_class == CAYMAN) {
2242 for (i = 0; i < 3; i++) {
2243 /* tmp.z = log(tmp.x) */
2244 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2245 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2246 alu.src[0].sel = ctx->temp_reg;
2247 alu.src[0].chan = 0;
2248 alu.dst.sel = ctx->temp_reg;
2249 alu.dst.chan = i;
2250 if (i == 2) {
2251 alu.dst.write = 1;
2252 alu.last = 1;
2253 } else
2254 alu.dst.write = 0;
2255
2256 r = r600_bytecode_add_alu(ctx->bc, &alu);
2257 if (r)
2258 return r;
2259 }
2260 } else {
2261 /* tmp.z = log(tmp.x) */
2262 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2264 alu.src[0].sel = ctx->temp_reg;
2265 alu.src[0].chan = 0;
2266 alu.dst.sel = ctx->temp_reg;
2267 alu.dst.chan = 2;
2268 alu.dst.write = 1;
2269 alu.last = 1;
2270 r = r600_bytecode_add_alu(ctx->bc, &alu);
2271 if (r)
2272 return r;
2273 }
2274
2275 chan = alu.dst.chan;
2276 sel = alu.dst.sel;
2277
2278 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2279 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2280 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2281 alu.src[0].sel = sel;
2282 alu.src[0].chan = chan;
2283 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2284 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2285 alu.dst.sel = ctx->temp_reg;
2286 alu.dst.chan = 0;
2287 alu.dst.write = 1;
2288 alu.is_op3 = 1;
2289 alu.last = 1;
2290 r = r600_bytecode_add_alu(ctx->bc, &alu);
2291 if (r)
2292 return r;
2293
2294 if (ctx->bc->chip_class == CAYMAN) {
2295 for (i = 0; i < 3; i++) {
2296 /* dst.z = exp(tmp.x) */
2297 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2299 alu.src[0].sel = ctx->temp_reg;
2300 alu.src[0].chan = 0;
2301 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2302 if (i == 2) {
2303 alu.dst.write = 1;
2304 alu.last = 1;
2305 } else
2306 alu.dst.write = 0;
2307 r = r600_bytecode_add_alu(ctx->bc, &alu);
2308 if (r)
2309 return r;
2310 }
2311 } else {
2312 /* dst.z = exp(tmp.x) */
2313 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2315 alu.src[0].sel = ctx->temp_reg;
2316 alu.src[0].chan = 0;
2317 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2318 alu.last = 1;
2319 r = r600_bytecode_add_alu(ctx->bc, &alu);
2320 if (r)
2321 return r;
2322 }
2323 }
2324
2325 /* dst.x, <- 1.0 */
2326 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2328 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2329 alu.src[0].chan = 0;
2330 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2331 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2332 r = r600_bytecode_add_alu(ctx->bc, &alu);
2333 if (r)
2334 return r;
2335
2336 /* dst.y = max(src.x, 0.0) */
2337 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2339 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2340 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2341 alu.src[1].chan = 0;
2342 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2343 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2344 r = r600_bytecode_add_alu(ctx->bc, &alu);
2345 if (r)
2346 return r;
2347
2348 /* dst.w, <- 1.0 */
2349 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2351 alu.src[0].sel = V_SQ_ALU_SRC_1;
2352 alu.src[0].chan = 0;
2353 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2354 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2355 alu.last = 1;
2356 r = r600_bytecode_add_alu(ctx->bc, &alu);
2357 if (r)
2358 return r;
2359
2360 return 0;
2361 }
2362
2363 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2364 {
2365 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2366 struct r600_bytecode_alu alu;
2367 int i, r;
2368
2369 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2370
2371 /* XXX:
2372 * For state trackers other than OpenGL, we'll want to use
2373 * _RECIPSQRT_IEEE instead.
2374 */
2375 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2376
2377 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2378 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2379 r600_bytecode_src_set_abs(&alu.src[i]);
2380 }
2381 alu.dst.sel = ctx->temp_reg;
2382 alu.dst.write = 1;
2383 alu.last = 1;
2384 r = r600_bytecode_add_alu(ctx->bc, &alu);
2385 if (r)
2386 return r;
2387 /* replicate result */
2388 return tgsi_helper_tempx_replicate(ctx);
2389 }
2390
2391 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2392 {
2393 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2394 struct r600_bytecode_alu alu;
2395 int i, r;
2396
2397 for (i = 0; i < 4; i++) {
2398 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2399 alu.src[0].sel = ctx->temp_reg;
2400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2401 alu.dst.chan = i;
2402 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2403 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2404 if (i == 3)
2405 alu.last = 1;
2406 r = r600_bytecode_add_alu(ctx->bc, &alu);
2407 if (r)
2408 return r;
2409 }
2410 return 0;
2411 }
2412
2413 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2414 {
2415 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2416 struct r600_bytecode_alu alu;
2417 int i, r;
2418
2419 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2420 alu.inst = ctx->inst_info->r600_opcode;
2421 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2422 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2423 }
2424 alu.dst.sel = ctx->temp_reg;
2425 alu.dst.write = 1;
2426 alu.last = 1;
2427 r = r600_bytecode_add_alu(ctx->bc, &alu);
2428 if (r)
2429 return r;
2430 /* replicate result */
2431 return tgsi_helper_tempx_replicate(ctx);
2432 }
2433
2434 static int cayman_pow(struct r600_shader_ctx *ctx)
2435 {
2436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2437 int i, r;
2438 struct r600_bytecode_alu alu;
2439 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2440
2441 for (i = 0; i < 3; i++) {
2442 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2444 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2445 alu.dst.sel = ctx->temp_reg;
2446 alu.dst.chan = i;
2447 alu.dst.write = 1;
2448 if (i == 2)
2449 alu.last = 1;
2450 r = r600_bytecode_add_alu(ctx->bc, &alu);
2451 if (r)
2452 return r;
2453 }
2454
2455 /* b * LOG2(a) */
2456 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2457 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2458 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2459 alu.src[1].sel = ctx->temp_reg;
2460 alu.dst.sel = ctx->temp_reg;
2461 alu.dst.write = 1;
2462 alu.last = 1;
2463 r = r600_bytecode_add_alu(ctx->bc, &alu);
2464 if (r)
2465 return r;
2466
2467 for (i = 0; i < last_slot; i++) {
2468 /* POW(a,b) = EXP2(b * LOG2(a))*/
2469 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2471 alu.src[0].sel = ctx->temp_reg;
2472
2473 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2474 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2475 if (i == last_slot - 1)
2476 alu.last = 1;
2477 r = r600_bytecode_add_alu(ctx->bc, &alu);
2478 if (r)
2479 return r;
2480 }
2481 return 0;
2482 }
2483
2484 static int tgsi_pow(struct r600_shader_ctx *ctx)
2485 {
2486 struct r600_bytecode_alu alu;
2487 int r;
2488
2489 /* LOG2(a) */
2490 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2491 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2492 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2493 alu.dst.sel = ctx->temp_reg;
2494 alu.dst.write = 1;
2495 alu.last = 1;
2496 r = r600_bytecode_add_alu(ctx->bc, &alu);
2497 if (r)
2498 return r;
2499 /* b * LOG2(a) */
2500 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2502 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2503 alu.src[1].sel = ctx->temp_reg;
2504 alu.dst.sel = ctx->temp_reg;
2505 alu.dst.write = 1;
2506 alu.last = 1;
2507 r = r600_bytecode_add_alu(ctx->bc, &alu);
2508 if (r)
2509 return r;
2510 /* POW(a,b) = EXP2(b * LOG2(a))*/
2511 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2512 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2513 alu.src[0].sel = ctx->temp_reg;
2514 alu.dst.sel = ctx->temp_reg;
2515 alu.dst.write = 1;
2516 alu.last = 1;
2517 r = r600_bytecode_add_alu(ctx->bc, &alu);
2518 if (r)
2519 return r;
2520 return tgsi_helper_tempx_replicate(ctx);
2521 }
2522
2523 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2524 {
2525 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2526 struct r600_bytecode_alu alu;
2527 int i, r, j;
2528 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2529 int tmp0 = ctx->temp_reg;
2530 int tmp1 = r600_get_temp(ctx);
2531 int tmp2 = r600_get_temp(ctx);
2532 int tmp3 = r600_get_temp(ctx);
2533 /* Unsigned path:
2534 *
2535 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2536 *
2537 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2538 * 2. tmp0.z = lo (tmp0.x * src2)
2539 * 3. tmp0.w = -tmp0.z
2540 * 4. tmp0.y = hi (tmp0.x * src2)
2541 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2542 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2543 * 7. tmp1.x = tmp0.x - tmp0.w
2544 * 8. tmp1.y = tmp0.x + tmp0.w
2545 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2546 * 10. tmp0.z = hi(tmp0.x * src1) = q
2547 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2548 *
2549 * 12. tmp0.w = src1 - tmp0.y = r
2550 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2551 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2552 *
2553 * if DIV
2554 *
2555 * 15. tmp1.z = tmp0.z + 1 = q + 1
2556 * 16. tmp1.w = tmp0.z - 1 = q - 1
2557 *
2558 * else MOD
2559 *
2560 * 15. tmp1.z = tmp0.w - src2 = r - src2
2561 * 16. tmp1.w = tmp0.w + src2 = r + src2
2562 *
2563 * endif
2564 *
2565 * 17. tmp1.x = tmp1.x & tmp1.y
2566 *
2567 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2568 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2569 *
2570 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2571 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2572 *
2573 * Signed path:
2574 *
2575 * Same as unsigned, using abs values of the operands,
2576 * and fixing the sign of the result in the end.
2577 */
2578
2579 for (i = 0; i < 4; i++) {
2580 if (!(write_mask & (1<<i)))
2581 continue;
2582
2583 if (signed_op) {
2584
2585 /* tmp2.x = -src0 */
2586 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2587 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2588
2589 alu.dst.sel = tmp2;
2590 alu.dst.chan = 0;
2591 alu.dst.write = 1;
2592
2593 alu.src[0].sel = V_SQ_ALU_SRC_0;
2594
2595 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2596
2597 alu.last = 1;
2598 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2599 return r;
2600
2601 /* tmp2.y = -src1 */
2602 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2604
2605 alu.dst.sel = tmp2;
2606 alu.dst.chan = 1;
2607 alu.dst.write = 1;
2608
2609 alu.src[0].sel = V_SQ_ALU_SRC_0;
2610
2611 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2612
2613 alu.last = 1;
2614 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2615 return r;
2616
2617 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2618 /* it will be a sign of the quotient */
2619 if (!mod) {
2620
2621 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2622 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2623
2624 alu.dst.sel = tmp2;
2625 alu.dst.chan = 2;
2626 alu.dst.write = 1;
2627
2628 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2629 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2630
2631 alu.last = 1;
2632 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2633 return r;
2634 }
2635
2636 /* tmp2.x = |src0| */
2637 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2638 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2639 alu.is_op3 = 1;
2640
2641 alu.dst.sel = tmp2;
2642 alu.dst.chan = 0;
2643 alu.dst.write = 1;
2644
2645 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2646 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2647 alu.src[2].sel = tmp2;
2648 alu.src[2].chan = 0;
2649
2650 alu.last = 1;
2651 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2652 return r;
2653
2654 /* tmp2.y = |src1| */
2655 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2657 alu.is_op3 = 1;
2658
2659 alu.dst.sel = tmp2;
2660 alu.dst.chan = 1;
2661 alu.dst.write = 1;
2662
2663 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2664 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2665 alu.src[2].sel = tmp2;
2666 alu.src[2].chan = 1;
2667
2668 alu.last = 1;
2669 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2670 return r;
2671
2672 }
2673
2674 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2675 if (ctx->bc->chip_class == CAYMAN) {
2676 /* tmp3.x = u2f(src2) */
2677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2679
2680 alu.dst.sel = tmp3;
2681 alu.dst.chan = 0;
2682 alu.dst.write = 1;
2683
2684 if (signed_op) {
2685 alu.src[0].sel = tmp2;
2686 alu.src[0].chan = 1;
2687 } else {
2688 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2689 }
2690
2691 alu.last = 1;
2692 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2693 return r;
2694
2695 /* tmp0.x = recip(tmp3.x) */
2696 for (j = 0 ; j < 3; j++) {
2697 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2698 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2699
2700 alu.dst.sel = tmp0;
2701 alu.dst.chan = j;
2702 alu.dst.write = (j == 0);
2703
2704 alu.src[0].sel = tmp3;
2705 alu.src[0].chan = 0;
2706
2707 if (j == 2)
2708 alu.last = 1;
2709 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2710 return r;
2711 }
2712
2713 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2714 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2715
2716 alu.src[0].sel = tmp0;
2717 alu.src[0].chan = 0;
2718
2719 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2720 alu.src[1].value = 0x4f800000;
2721
2722 alu.dst.sel = tmp3;
2723 alu.dst.write = 1;
2724 alu.last = 1;
2725 r = r600_bytecode_add_alu(ctx->bc, &alu);
2726 if (r)
2727 return r;
2728
2729 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2730 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2731
2732 alu.dst.sel = tmp0;
2733 alu.dst.chan = 0;
2734 alu.dst.write = 1;
2735
2736 alu.src[0].sel = tmp3;
2737 alu.src[0].chan = 0;
2738
2739 alu.last = 1;
2740 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2741 return r;
2742
2743 } else {
2744 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2745 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2746
2747 alu.dst.sel = tmp0;
2748 alu.dst.chan = 0;
2749 alu.dst.write = 1;
2750
2751 if (signed_op) {
2752 alu.src[0].sel = tmp2;
2753 alu.src[0].chan = 1;
2754 } else {
2755 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2756 }
2757
2758 alu.last = 1;
2759 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2760 return r;
2761 }
2762
2763 /* 2. tmp0.z = lo (tmp0.x * src2) */
2764 if (ctx->bc->chip_class == CAYMAN) {
2765 for (j = 0 ; j < 4; j++) {
2766 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2768
2769 alu.dst.sel = tmp0;
2770 alu.dst.chan = j;
2771 alu.dst.write = (j == 2);
2772
2773 alu.src[0].sel = tmp0;
2774 alu.src[0].chan = 0;
2775 if (signed_op) {
2776 alu.src[1].sel = tmp2;
2777 alu.src[1].chan = 1;
2778 } else {
2779 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2780 }
2781
2782 alu.last = (j == 3);
2783 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2784 return r;
2785 }
2786 } else {
2787 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2788 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2789
2790 alu.dst.sel = tmp0;
2791 alu.dst.chan = 2;
2792 alu.dst.write = 1;
2793
2794 alu.src[0].sel = tmp0;
2795 alu.src[0].chan = 0;
2796 if (signed_op) {
2797 alu.src[1].sel = tmp2;
2798 alu.src[1].chan = 1;
2799 } else {
2800 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2801 }
2802
2803 alu.last = 1;
2804 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2805 return r;
2806 }
2807
2808 /* 3. tmp0.w = -tmp0.z */
2809 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2810 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2811
2812 alu.dst.sel = tmp0;
2813 alu.dst.chan = 3;
2814 alu.dst.write = 1;
2815
2816 alu.src[0].sel = V_SQ_ALU_SRC_0;
2817 alu.src[1].sel = tmp0;
2818 alu.src[1].chan = 2;
2819
2820 alu.last = 1;
2821 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2822 return r;
2823
2824 /* 4. tmp0.y = hi (tmp0.x * src2) */
2825 if (ctx->bc->chip_class == CAYMAN) {
2826 for (j = 0 ; j < 4; j++) {
2827 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2828 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2829
2830 alu.dst.sel = tmp0;
2831 alu.dst.chan = j;
2832 alu.dst.write = (j == 1);
2833
2834 alu.src[0].sel = tmp0;
2835 alu.src[0].chan = 0;
2836
2837 if (signed_op) {
2838 alu.src[1].sel = tmp2;
2839 alu.src[1].chan = 1;
2840 } else {
2841 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2842 }
2843 alu.last = (j == 3);
2844 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2845 return r;
2846 }
2847 } else {
2848 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2849 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2850
2851 alu.dst.sel = tmp0;
2852 alu.dst.chan = 1;
2853 alu.dst.write = 1;
2854
2855 alu.src[0].sel = tmp0;
2856 alu.src[0].chan = 0;
2857
2858 if (signed_op) {
2859 alu.src[1].sel = tmp2;
2860 alu.src[1].chan = 1;
2861 } else {
2862 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2863 }
2864
2865 alu.last = 1;
2866 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2867 return r;
2868 }
2869
2870 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2871 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2873 alu.is_op3 = 1;
2874
2875 alu.dst.sel = tmp0;
2876 alu.dst.chan = 2;
2877 alu.dst.write = 1;
2878
2879 alu.src[0].sel = tmp0;
2880 alu.src[0].chan = 1;
2881 alu.src[1].sel = tmp0;
2882 alu.src[1].chan = 3;
2883 alu.src[2].sel = tmp0;
2884 alu.src[2].chan = 2;
2885
2886 alu.last = 1;
2887 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2888 return r;
2889
2890 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2891 if (ctx->bc->chip_class == CAYMAN) {
2892 for (j = 0 ; j < 4; j++) {
2893 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2894 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2895
2896 alu.dst.sel = tmp0;
2897 alu.dst.chan = j;
2898 alu.dst.write = (j == 3);
2899
2900 alu.src[0].sel = tmp0;
2901 alu.src[0].chan = 2;
2902
2903 alu.src[1].sel = tmp0;
2904 alu.src[1].chan = 0;
2905
2906 alu.last = (j == 3);
2907 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2908 return r;
2909 }
2910 } else {
2911 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2912 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2913
2914 alu.dst.sel = tmp0;
2915 alu.dst.chan = 3;
2916 alu.dst.write = 1;
2917
2918 alu.src[0].sel = tmp0;
2919 alu.src[0].chan = 2;
2920
2921 alu.src[1].sel = tmp0;
2922 alu.src[1].chan = 0;
2923
2924 alu.last = 1;
2925 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2926 return r;
2927 }
2928
2929 /* 7. tmp1.x = tmp0.x - tmp0.w */
2930 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2931 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2932
2933 alu.dst.sel = tmp1;
2934 alu.dst.chan = 0;
2935 alu.dst.write = 1;
2936
2937 alu.src[0].sel = tmp0;
2938 alu.src[0].chan = 0;
2939 alu.src[1].sel = tmp0;
2940 alu.src[1].chan = 3;
2941
2942 alu.last = 1;
2943 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2944 return r;
2945
2946 /* 8. tmp1.y = tmp0.x + tmp0.w */
2947 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2948 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2949
2950 alu.dst.sel = tmp1;
2951 alu.dst.chan = 1;
2952 alu.dst.write = 1;
2953
2954 alu.src[0].sel = tmp0;
2955 alu.src[0].chan = 0;
2956 alu.src[1].sel = tmp0;
2957 alu.src[1].chan = 3;
2958
2959 alu.last = 1;
2960 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2961 return r;
2962
2963 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2964 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2965 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2966 alu.is_op3 = 1;
2967
2968 alu.dst.sel = tmp0;
2969 alu.dst.chan = 0;
2970 alu.dst.write = 1;
2971
2972 alu.src[0].sel = tmp0;
2973 alu.src[0].chan = 1;
2974 alu.src[1].sel = tmp1;
2975 alu.src[1].chan = 1;
2976 alu.src[2].sel = tmp1;
2977 alu.src[2].chan = 0;
2978
2979 alu.last = 1;
2980 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2981 return r;
2982
2983 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
2984 if (ctx->bc->chip_class == CAYMAN) {
2985 for (j = 0 ; j < 4; j++) {
2986 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2987 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2988
2989 alu.dst.sel = tmp0;
2990 alu.dst.chan = j;
2991 alu.dst.write = (j == 2);
2992
2993 alu.src[0].sel = tmp0;
2994 alu.src[0].chan = 0;
2995
2996 if (signed_op) {
2997 alu.src[1].sel = tmp2;
2998 alu.src[1].chan = 0;
2999 } else {
3000 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3001 }
3002
3003 alu.last = (j == 3);
3004 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3005 return r;
3006 }
3007 } else {
3008 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3009 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3010
3011 alu.dst.sel = tmp0;
3012 alu.dst.chan = 2;
3013 alu.dst.write = 1;
3014
3015 alu.src[0].sel = tmp0;
3016 alu.src[0].chan = 0;
3017
3018 if (signed_op) {
3019 alu.src[1].sel = tmp2;
3020 alu.src[1].chan = 0;
3021 } else {
3022 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3023 }
3024
3025 alu.last = 1;
3026 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3027 return r;
3028 }
3029
3030 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3031 if (ctx->bc->chip_class == CAYMAN) {
3032 for (j = 0 ; j < 4; j++) {
3033 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3035
3036 alu.dst.sel = tmp0;
3037 alu.dst.chan = j;
3038 alu.dst.write = (j == 1);
3039
3040 if (signed_op) {
3041 alu.src[0].sel = tmp2;
3042 alu.src[0].chan = 1;
3043 } else {
3044 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3045 }
3046
3047 alu.src[1].sel = tmp0;
3048 alu.src[1].chan = 2;
3049
3050 alu.last = (j == 3);
3051 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3052 return r;
3053 }
3054 } else {
3055 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3057
3058 alu.dst.sel = tmp0;
3059 alu.dst.chan = 1;
3060 alu.dst.write = 1;
3061
3062 if (signed_op) {
3063 alu.src[0].sel = tmp2;
3064 alu.src[0].chan = 1;
3065 } else {
3066 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3067 }
3068
3069 alu.src[1].sel = tmp0;
3070 alu.src[1].chan = 2;
3071
3072 alu.last = 1;
3073 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3074 return r;
3075 }
3076
3077 /* 12. tmp0.w = src1 - tmp0.y = r */
3078 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3079 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3080
3081 alu.dst.sel = tmp0;
3082 alu.dst.chan = 3;
3083 alu.dst.write = 1;
3084
3085 if (signed_op) {
3086 alu.src[0].sel = tmp2;
3087 alu.src[0].chan = 0;
3088 } else {
3089 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3090 }
3091
3092 alu.src[1].sel = tmp0;
3093 alu.src[1].chan = 1;
3094
3095 alu.last = 1;
3096 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3097 return r;
3098
3099 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3100 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3102
3103 alu.dst.sel = tmp1;
3104 alu.dst.chan = 0;
3105 alu.dst.write = 1;
3106
3107 alu.src[0].sel = tmp0;
3108 alu.src[0].chan = 3;
3109 if (signed_op) {
3110 alu.src[1].sel = tmp2;
3111 alu.src[1].chan = 1;
3112 } else {
3113 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3114 }
3115
3116 alu.last = 1;
3117 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3118 return r;
3119
3120 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3121 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3123
3124 alu.dst.sel = tmp1;
3125 alu.dst.chan = 1;
3126 alu.dst.write = 1;
3127
3128 if (signed_op) {
3129 alu.src[0].sel = tmp2;
3130 alu.src[0].chan = 0;
3131 } else {
3132 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3133 }
3134
3135 alu.src[1].sel = tmp0;
3136 alu.src[1].chan = 1;
3137
3138 alu.last = 1;
3139 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3140 return r;
3141
3142 if (mod) { /* UMOD */
3143
3144 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3145 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3146 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3147
3148 alu.dst.sel = tmp1;
3149 alu.dst.chan = 2;
3150 alu.dst.write = 1;
3151
3152 alu.src[0].sel = tmp0;
3153 alu.src[0].chan = 3;
3154
3155 if (signed_op) {
3156 alu.src[1].sel = tmp2;
3157 alu.src[1].chan = 1;
3158 } else {
3159 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3160 }
3161
3162 alu.last = 1;
3163 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3164 return r;
3165
3166 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3167 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3169
3170 alu.dst.sel = tmp1;
3171 alu.dst.chan = 3;
3172 alu.dst.write = 1;
3173
3174 alu.src[0].sel = tmp0;
3175 alu.src[0].chan = 3;
3176 if (signed_op) {
3177 alu.src[1].sel = tmp2;
3178 alu.src[1].chan = 1;
3179 } else {
3180 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3181 }
3182
3183 alu.last = 1;
3184 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3185 return r;
3186
3187 } else { /* UDIV */
3188
3189 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3190 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3191 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3192
3193 alu.dst.sel = tmp1;
3194 alu.dst.chan = 2;
3195 alu.dst.write = 1;
3196
3197 alu.src[0].sel = tmp0;
3198 alu.src[0].chan = 2;
3199 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3200
3201 alu.last = 1;
3202 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3203 return r;
3204
3205 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3206 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3207 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3208
3209 alu.dst.sel = tmp1;
3210 alu.dst.chan = 3;
3211 alu.dst.write = 1;
3212
3213 alu.src[0].sel = tmp0;
3214 alu.src[0].chan = 2;
3215 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3216
3217 alu.last = 1;
3218 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3219 return r;
3220
3221 }
3222
3223 /* 17. tmp1.x = tmp1.x & tmp1.y */
3224 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3225 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3226
3227 alu.dst.sel = tmp1;
3228 alu.dst.chan = 0;
3229 alu.dst.write = 1;
3230
3231 alu.src[0].sel = tmp1;
3232 alu.src[0].chan = 0;
3233 alu.src[1].sel = tmp1;
3234 alu.src[1].chan = 1;
3235
3236 alu.last = 1;
3237 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3238 return r;
3239
3240 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3241 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3242 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3244 alu.is_op3 = 1;
3245
3246 alu.dst.sel = tmp0;
3247 alu.dst.chan = 2;
3248 alu.dst.write = 1;
3249
3250 alu.src[0].sel = tmp1;
3251 alu.src[0].chan = 0;
3252 alu.src[1].sel = tmp0;
3253 alu.src[1].chan = mod ? 3 : 2;
3254 alu.src[2].sel = tmp1;
3255 alu.src[2].chan = 2;
3256
3257 alu.last = 1;
3258 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3259 return r;
3260
3261 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3262 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3264 alu.is_op3 = 1;
3265
3266 if (signed_op) {
3267 alu.dst.sel = tmp0;
3268 alu.dst.chan = 2;
3269 alu.dst.write = 1;
3270 } else {
3271 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3272 }
3273
3274 alu.src[0].sel = tmp1;
3275 alu.src[0].chan = 1;
3276 alu.src[1].sel = tmp1;
3277 alu.src[1].chan = 3;
3278 alu.src[2].sel = tmp0;
3279 alu.src[2].chan = 2;
3280
3281 alu.last = 1;
3282 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3283 return r;
3284
3285 if (signed_op) {
3286
3287 /* fix the sign of the result */
3288
3289 if (mod) {
3290
3291 /* tmp0.x = -tmp0.z */
3292 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3293 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3294
3295 alu.dst.sel = tmp0;
3296 alu.dst.chan = 0;
3297 alu.dst.write = 1;
3298
3299 alu.src[0].sel = V_SQ_ALU_SRC_0;
3300 alu.src[1].sel = tmp0;
3301 alu.src[1].chan = 2;
3302
3303 alu.last = 1;
3304 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3305 return r;
3306
3307 /* sign of the remainder is the same as the sign of src0 */
3308 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3309 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3311 alu.is_op3 = 1;
3312
3313 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3314
3315 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3316 alu.src[1].sel = tmp0;
3317 alu.src[1].chan = 2;
3318 alu.src[2].sel = tmp0;
3319 alu.src[2].chan = 0;
3320
3321 alu.last = 1;
3322 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3323 return r;
3324
3325 } else {
3326
3327 /* tmp0.x = -tmp0.z */
3328 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3330
3331 alu.dst.sel = tmp0;
3332 alu.dst.chan = 0;
3333 alu.dst.write = 1;
3334
3335 alu.src[0].sel = V_SQ_ALU_SRC_0;
3336 alu.src[1].sel = tmp0;
3337 alu.src[1].chan = 2;
3338
3339 alu.last = 1;
3340 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3341 return r;
3342
3343 /* fix the quotient sign (same as the sign of src0*src1) */
3344 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3345 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3347 alu.is_op3 = 1;
3348
3349 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3350
3351 alu.src[0].sel = tmp2;
3352 alu.src[0].chan = 2;
3353 alu.src[1].sel = tmp0;
3354 alu.src[1].chan = 2;
3355 alu.src[2].sel = tmp0;
3356 alu.src[2].chan = 0;
3357
3358 alu.last = 1;
3359 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3360 return r;
3361 }
3362 }
3363 }
3364 return 0;
3365 }
3366
3367 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3368 {
3369 return tgsi_divmod(ctx, 0, 0);
3370 }
3371
3372 static int tgsi_umod(struct r600_shader_ctx *ctx)
3373 {
3374 return tgsi_divmod(ctx, 1, 0);
3375 }
3376
3377 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3378 {
3379 return tgsi_divmod(ctx, 0, 1);
3380 }
3381
3382 static int tgsi_imod(struct r600_shader_ctx *ctx)
3383 {
3384 return tgsi_divmod(ctx, 1, 1);
3385 }
3386
3387
3388 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3389 {
3390 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3391 struct r600_bytecode_alu alu;
3392 int i, r;
3393 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3394 int last_inst = tgsi_last_instruction(write_mask);
3395
3396 for (i = 0; i < 4; i++) {
3397 if (!(write_mask & (1<<i)))
3398 continue;
3399
3400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3402
3403 alu.dst.sel = ctx->temp_reg;
3404 alu.dst.chan = i;
3405 alu.dst.write = 1;
3406
3407 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3408 if (i == last_inst)
3409 alu.last = 1;
3410 r = r600_bytecode_add_alu(ctx->bc, &alu);
3411 if (r)
3412 return r;
3413 }
3414
3415 for (i = 0; i < 4; i++) {
3416 if (!(write_mask & (1<<i)))
3417 continue;
3418
3419 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3420 alu.inst = ctx->inst_info->r600_opcode;
3421
3422 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3423
3424 alu.src[0].sel = ctx->temp_reg;
3425 alu.src[0].chan = i;
3426
3427 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3428 alu.last = 1;
3429 r = r600_bytecode_add_alu(ctx->bc, &alu);
3430 if (r)
3431 return r;
3432 }
3433
3434 return 0;
3435 }
3436
3437 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3438 {
3439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3440 struct r600_bytecode_alu alu;
3441 int i, r;
3442 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3443 int last_inst = tgsi_last_instruction(write_mask);
3444
3445 /* tmp = -src */
3446 for (i = 0; i < 4; i++) {
3447 if (!(write_mask & (1<<i)))
3448 continue;
3449
3450 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3451 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3452
3453 alu.dst.sel = ctx->temp_reg;
3454 alu.dst.chan = i;
3455 alu.dst.write = 1;
3456
3457 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3458 alu.src[0].sel = V_SQ_ALU_SRC_0;
3459
3460 if (i == last_inst)
3461 alu.last = 1;
3462 r = r600_bytecode_add_alu(ctx->bc, &alu);
3463 if (r)
3464 return r;
3465 }
3466
3467 /* dst = (src >= 0 ? src : tmp) */
3468 for (i = 0; i < 4; i++) {
3469 if (!(write_mask & (1<<i)))
3470 continue;
3471
3472 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3473 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3474 alu.is_op3 = 1;
3475 alu.dst.write = 1;
3476
3477 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3478
3479 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3480 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3481 alu.src[2].sel = ctx->temp_reg;
3482 alu.src[2].chan = i;
3483
3484 if (i == last_inst)
3485 alu.last = 1;
3486 r = r600_bytecode_add_alu(ctx->bc, &alu);
3487 if (r)
3488 return r;
3489 }
3490 return 0;
3491 }
3492
3493 static int tgsi_issg(struct r600_shader_ctx *ctx)
3494 {
3495 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3496 struct r600_bytecode_alu alu;
3497 int i, r;
3498 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3499 int last_inst = tgsi_last_instruction(write_mask);
3500
3501 /* tmp = (src >= 0 ? src : -1) */
3502 for (i = 0; i < 4; i++) {
3503 if (!(write_mask & (1<<i)))
3504 continue;
3505
3506 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3507 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3508 alu.is_op3 = 1;
3509
3510 alu.dst.sel = ctx->temp_reg;
3511 alu.dst.chan = i;
3512 alu.dst.write = 1;
3513
3514 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3515 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3516 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3517
3518 if (i == last_inst)
3519 alu.last = 1;
3520 r = r600_bytecode_add_alu(ctx->bc, &alu);
3521 if (r)
3522 return r;
3523 }
3524
3525 /* dst = (tmp > 0 ? 1 : tmp) */
3526 for (i = 0; i < 4; i++) {
3527 if (!(write_mask & (1<<i)))
3528 continue;
3529
3530 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3531 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3532 alu.is_op3 = 1;
3533 alu.dst.write = 1;
3534
3535 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3536
3537 alu.src[0].sel = ctx->temp_reg;
3538 alu.src[0].chan = i;
3539
3540 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3541
3542 alu.src[2].sel = ctx->temp_reg;
3543 alu.src[2].chan = i;
3544
3545 if (i == last_inst)
3546 alu.last = 1;
3547 r = r600_bytecode_add_alu(ctx->bc, &alu);
3548 if (r)
3549 return r;
3550 }
3551 return 0;
3552 }
3553
3554
3555
3556 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3557 {
3558 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3559 struct r600_bytecode_alu alu;
3560 int i, r;
3561
3562 /* tmp = (src > 0 ? 1 : src) */
3563 for (i = 0; i < 4; i++) {
3564 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3565 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3566 alu.is_op3 = 1;
3567
3568 alu.dst.sel = ctx->temp_reg;
3569 alu.dst.chan = i;
3570
3571 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3572 alu.src[1].sel = V_SQ_ALU_SRC_1;
3573 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3574
3575 if (i == 3)
3576 alu.last = 1;
3577 r = r600_bytecode_add_alu(ctx->bc, &alu);
3578 if (r)
3579 return r;
3580 }
3581
3582 /* dst = (-tmp > 0 ? -1 : tmp) */
3583 for (i = 0; i < 4; i++) {
3584 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3585 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3586 alu.is_op3 = 1;
3587 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3588
3589 alu.src[0].sel = ctx->temp_reg;
3590 alu.src[0].chan = i;
3591 alu.src[0].neg = 1;
3592
3593 alu.src[1].sel = V_SQ_ALU_SRC_1;
3594 alu.src[1].neg = 1;
3595
3596 alu.src[2].sel = ctx->temp_reg;
3597 alu.src[2].chan = i;
3598
3599 if (i == 3)
3600 alu.last = 1;
3601 r = r600_bytecode_add_alu(ctx->bc, &alu);
3602 if (r)
3603 return r;
3604 }
3605 return 0;
3606 }
3607
3608 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3609 {
3610 struct r600_bytecode_alu alu;
3611 int i, r;
3612
3613 for (i = 0; i < 4; i++) {
3614 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3615 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3616 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3617 alu.dst.chan = i;
3618 } else {
3619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3620 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3621 alu.src[0].sel = ctx->temp_reg;
3622 alu.src[0].chan = i;
3623 }
3624 if (i == 3) {
3625 alu.last = 1;
3626 }
3627 r = r600_bytecode_add_alu(ctx->bc, &alu);
3628 if (r)
3629 return r;
3630 }
3631 return 0;
3632 }
3633
3634 static int tgsi_op3(struct r600_shader_ctx *ctx)
3635 {
3636 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3637 struct r600_bytecode_alu alu;
3638 int i, j, r;
3639 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3640
3641 for (i = 0; i < lasti + 1; i++) {
3642 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3643 continue;
3644
3645 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3646 alu.inst = ctx->inst_info->r600_opcode;
3647 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3648 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3649 }
3650
3651 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3652 alu.dst.chan = i;
3653 alu.dst.write = 1;
3654 alu.is_op3 = 1;
3655 if (i == lasti) {
3656 alu.last = 1;
3657 }
3658 r = r600_bytecode_add_alu(ctx->bc, &alu);
3659 if (r)
3660 return r;
3661 }
3662 return 0;
3663 }
3664
3665 static int tgsi_dp(struct r600_shader_ctx *ctx)
3666 {
3667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3668 struct r600_bytecode_alu alu;
3669 int i, j, r;
3670
3671 for (i = 0; i < 4; i++) {
3672 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3673 alu.inst = ctx->inst_info->r600_opcode;
3674 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3675 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3676 }
3677
3678 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3679 alu.dst.chan = i;
3680 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3681 /* handle some special cases */
3682 switch (ctx->inst_info->tgsi_opcode) {
3683 case TGSI_OPCODE_DP2:
3684 if (i > 1) {
3685 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3686 alu.src[0].chan = alu.src[1].chan = 0;
3687 }
3688 break;
3689 case TGSI_OPCODE_DP3:
3690 if (i > 2) {
3691 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3692 alu.src[0].chan = alu.src[1].chan = 0;
3693 }
3694 break;
3695 case TGSI_OPCODE_DPH:
3696 if (i == 3) {
3697 alu.src[0].sel = V_SQ_ALU_SRC_1;
3698 alu.src[0].chan = 0;
3699 alu.src[0].neg = 0;
3700 }
3701 break;
3702 default:
3703 break;
3704 }
3705 if (i == 3) {
3706 alu.last = 1;
3707 }
3708 r = r600_bytecode_add_alu(ctx->bc, &alu);
3709 if (r)
3710 return r;
3711 }
3712 return 0;
3713 }
3714
3715 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3716 unsigned index)
3717 {
3718 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3719 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3720 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3721 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3722 ctx->src[index].neg || ctx->src[index].abs;
3723 }
3724
3725 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3726 unsigned index)
3727 {
3728 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3729 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3730 }
3731
3732 static int tgsi_tex(struct r600_shader_ctx *ctx)
3733 {
3734 static float one_point_five = 1.5f;
3735 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3736 struct r600_bytecode_tex tex;
3737 struct r600_bytecode_alu alu;
3738 unsigned src_gpr;
3739 int r, i, j;
3740 int opcode;
3741 /* Texture fetch instructions can only use gprs as source.
3742 * Also they cannot negate the source or take the absolute value */
3743 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
3744 boolean src_loaded = FALSE;
3745 unsigned sampler_src_reg = 1;
3746 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3747
3748 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3749
3750 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3751 /* get offset values */
3752 if (inst->Texture.NumOffsets) {
3753 assert(inst->Texture.NumOffsets == 1);
3754
3755 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3756 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3757 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3758 }
3759 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3760 /* TGSI moves the sampler to src reg 3 for TXD */
3761 sampler_src_reg = 3;
3762
3763 for (i = 1; i < 3; i++) {
3764 /* set gradients h/v */
3765 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3766 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3767 SQ_TEX_INST_SET_GRADIENTS_V;
3768 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3769 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3770
3771 if (tgsi_tex_src_requires_loading(ctx, i)) {
3772 tex.src_gpr = r600_get_temp(ctx);
3773 tex.src_sel_x = 0;
3774 tex.src_sel_y = 1;
3775 tex.src_sel_z = 2;
3776 tex.src_sel_w = 3;
3777
3778 for (j = 0; j < 4; j++) {
3779 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3780 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3781 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3782 alu.dst.sel = tex.src_gpr;
3783 alu.dst.chan = j;
3784 if (j == 3)
3785 alu.last = 1;
3786 alu.dst.write = 1;
3787 r = r600_bytecode_add_alu(ctx->bc, &alu);
3788 if (r)
3789 return r;
3790 }
3791
3792 } else {
3793 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3794 tex.src_sel_x = ctx->src[i].swizzle[0];
3795 tex.src_sel_y = ctx->src[i].swizzle[1];
3796 tex.src_sel_z = ctx->src[i].swizzle[2];
3797 tex.src_sel_w = ctx->src[i].swizzle[3];
3798 tex.src_rel = ctx->src[i].rel;
3799 }
3800 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3801 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3802 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3803 tex.coord_type_x = 1;
3804 tex.coord_type_y = 1;
3805 tex.coord_type_z = 1;
3806 tex.coord_type_w = 1;
3807 }
3808 r = r600_bytecode_add_tex(ctx->bc, &tex);
3809 if (r)
3810 return r;
3811 }
3812 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3813 int out_chan;
3814 /* Add perspective divide */
3815 if (ctx->bc->chip_class == CAYMAN) {
3816 out_chan = 2;
3817 for (i = 0; i < 3; i++) {
3818 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3819 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3820 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3821
3822 alu.dst.sel = ctx->temp_reg;
3823 alu.dst.chan = i;
3824 if (i == 2)
3825 alu.last = 1;
3826 if (out_chan == i)
3827 alu.dst.write = 1;
3828 r = r600_bytecode_add_alu(ctx->bc, &alu);
3829 if (r)
3830 return r;
3831 }
3832
3833 } else {
3834 out_chan = 3;
3835 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3837 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3838
3839 alu.dst.sel = ctx->temp_reg;
3840 alu.dst.chan = out_chan;
3841 alu.last = 1;
3842 alu.dst.write = 1;
3843 r = r600_bytecode_add_alu(ctx->bc, &alu);
3844 if (r)
3845 return r;
3846 }
3847
3848 for (i = 0; i < 3; i++) {
3849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3850 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3851 alu.src[0].sel = ctx->temp_reg;
3852 alu.src[0].chan = out_chan;
3853 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3854 alu.dst.sel = ctx->temp_reg;
3855 alu.dst.chan = i;
3856 alu.dst.write = 1;
3857 r = r600_bytecode_add_alu(ctx->bc, &alu);
3858 if (r)
3859 return r;
3860 }
3861 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3862 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3863 alu.src[0].sel = V_SQ_ALU_SRC_1;
3864 alu.src[0].chan = 0;
3865 alu.dst.sel = ctx->temp_reg;
3866 alu.dst.chan = 3;
3867 alu.last = 1;
3868 alu.dst.write = 1;
3869 r = r600_bytecode_add_alu(ctx->bc, &alu);
3870 if (r)
3871 return r;
3872 src_loaded = TRUE;
3873 src_gpr = ctx->temp_reg;
3874 }
3875
3876 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3877 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3878 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
3879
3880 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3881 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3882
3883 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3884 for (i = 0; i < 4; i++) {
3885 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3886 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3887 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3888 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3889 alu.dst.sel = ctx->temp_reg;
3890 alu.dst.chan = i;
3891 if (i == 3)
3892 alu.last = 1;
3893 alu.dst.write = 1;
3894 r = r600_bytecode_add_alu(ctx->bc, &alu);
3895 if (r)
3896 return r;
3897 }
3898
3899 /* tmp1.z = RCP_e(|tmp1.z|) */
3900 if (ctx->bc->chip_class == CAYMAN) {
3901 for (i = 0; i < 3; i++) {
3902 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3903 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3904 alu.src[0].sel = ctx->temp_reg;
3905 alu.src[0].chan = 2;
3906 alu.src[0].abs = 1;
3907 alu.dst.sel = ctx->temp_reg;
3908 alu.dst.chan = i;
3909 if (i == 2)
3910 alu.dst.write = 1;
3911 if (i == 2)
3912 alu.last = 1;
3913 r = r600_bytecode_add_alu(ctx->bc, &alu);
3914 if (r)
3915 return r;
3916 }
3917 } else {
3918 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3919 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3920 alu.src[0].sel = ctx->temp_reg;
3921 alu.src[0].chan = 2;
3922 alu.src[0].abs = 1;
3923 alu.dst.sel = ctx->temp_reg;
3924 alu.dst.chan = 2;
3925 alu.dst.write = 1;
3926 alu.last = 1;
3927 r = r600_bytecode_add_alu(ctx->bc, &alu);
3928 if (r)
3929 return r;
3930 }
3931
3932 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3933 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3934 * muladd has no writemask, have to use another temp
3935 */
3936 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3938 alu.is_op3 = 1;
3939
3940 alu.src[0].sel = ctx->temp_reg;
3941 alu.src[0].chan = 0;
3942 alu.src[1].sel = ctx->temp_reg;
3943 alu.src[1].chan = 2;
3944
3945 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3946 alu.src[2].chan = 0;
3947 alu.src[2].value = *(uint32_t *)&one_point_five;
3948
3949 alu.dst.sel = ctx->temp_reg;
3950 alu.dst.chan = 0;
3951 alu.dst.write = 1;
3952
3953 r = r600_bytecode_add_alu(ctx->bc, &alu);
3954 if (r)
3955 return r;
3956
3957 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3959 alu.is_op3 = 1;
3960
3961 alu.src[0].sel = ctx->temp_reg;
3962 alu.src[0].chan = 1;
3963 alu.src[1].sel = ctx->temp_reg;
3964 alu.src[1].chan = 2;
3965
3966 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3967 alu.src[2].chan = 0;
3968 alu.src[2].value = *(uint32_t *)&one_point_five;
3969
3970 alu.dst.sel = ctx->temp_reg;
3971 alu.dst.chan = 1;
3972 alu.dst.write = 1;
3973
3974 alu.last = 1;
3975 r = r600_bytecode_add_alu(ctx->bc, &alu);
3976 if (r)
3977 return r;
3978 /* write initial W value into Z component */
3979 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
3980 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3981 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3982 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3983 alu.dst.sel = ctx->temp_reg;
3984 alu.dst.chan = 2;
3985 alu.dst.write = 1;
3986 alu.last = 1;
3987 r = r600_bytecode_add_alu(ctx->bc, &alu);
3988 if (r)
3989 return r;
3990 }
3991 src_loaded = TRUE;
3992 src_gpr = ctx->temp_reg;
3993 }
3994
3995 if (src_requires_loading && !src_loaded) {
3996 for (i = 0; i < 4; i++) {
3997 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3998 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3999 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4000 alu.dst.sel = ctx->temp_reg;
4001 alu.dst.chan = i;
4002 if (i == 3)
4003 alu.last = 1;
4004 alu.dst.write = 1;
4005 r = r600_bytecode_add_alu(ctx->bc, &alu);
4006 if (r)
4007 return r;
4008 }
4009 src_loaded = TRUE;
4010 src_gpr = ctx->temp_reg;
4011 }
4012
4013 opcode = ctx->inst_info->r600_opcode;
4014 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4015 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4016 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4017 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4018 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4019 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4020 switch (opcode) {
4021 case SQ_TEX_INST_SAMPLE:
4022 opcode = SQ_TEX_INST_SAMPLE_C;
4023 break;
4024 case SQ_TEX_INST_SAMPLE_L:
4025 opcode = SQ_TEX_INST_SAMPLE_C_L;
4026 break;
4027 case SQ_TEX_INST_SAMPLE_LB:
4028 opcode = SQ_TEX_INST_SAMPLE_C_LB;
4029 break;
4030 case SQ_TEX_INST_SAMPLE_G:
4031 opcode = SQ_TEX_INST_SAMPLE_C_G;
4032 break;
4033 }
4034 }
4035
4036 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4037 tex.inst = opcode;
4038
4039 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4040 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4041 tex.src_gpr = src_gpr;
4042 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4043 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4044 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4045 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4046 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4047 if (src_loaded) {
4048 tex.src_sel_x = 0;
4049 tex.src_sel_y = 1;
4050 tex.src_sel_z = 2;
4051 tex.src_sel_w = 3;
4052 } else {
4053 tex.src_sel_x = ctx->src[0].swizzle[0];
4054 tex.src_sel_y = ctx->src[0].swizzle[1];
4055 tex.src_sel_z = ctx->src[0].swizzle[2];
4056 tex.src_sel_w = ctx->src[0].swizzle[3];
4057 tex.src_rel = ctx->src[0].rel;
4058 }
4059
4060 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4061 tex.src_sel_x = 1;
4062 tex.src_sel_y = 0;
4063 tex.src_sel_z = 3;
4064 tex.src_sel_w = 1;
4065 }
4066 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4067 tex.src_sel_x = 1;
4068 tex.src_sel_y = 0;
4069 tex.src_sel_z = 3;
4070 tex.src_sel_w = 2; /* route Z compare value into W */
4071 }
4072
4073 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4074 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4075 tex.coord_type_x = 1;
4076 tex.coord_type_y = 1;
4077 }
4078 tex.coord_type_z = 1;
4079 tex.coord_type_w = 1;
4080
4081 tex.offset_x = offset_x;
4082 tex.offset_y = offset_y;
4083 tex.offset_z = offset_z;
4084
4085 /* Put the depth for comparison in W.
4086 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4087 * Some instructions expect the depth in Z. */
4088 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4089 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4090 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4091 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4092 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4093 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4094 tex.src_sel_w = tex.src_sel_z;
4095 }
4096
4097 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4098 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4099 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4100 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4101 /* the array index is read from Y */
4102 tex.coord_type_y = 0;
4103 } else {
4104 /* the array index is read from Z */
4105 tex.coord_type_z = 0;
4106 tex.src_sel_z = tex.src_sel_y;
4107 }
4108 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4109 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4110 /* the array index is read from Z */
4111 tex.coord_type_z = 0;
4112
4113 r = r600_bytecode_add_tex(ctx->bc, &tex);
4114 if (r)
4115 return r;
4116
4117 /* add shadow ambient support - gallium doesn't do it yet */
4118 return 0;
4119 }
4120
4121 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4122 {
4123 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4124 struct r600_bytecode_alu alu;
4125 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4126 unsigned i;
4127 int r;
4128
4129 /* optimize if it's just an equal balance */
4130 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4131 for (i = 0; i < lasti + 1; i++) {
4132 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4133 continue;
4134
4135 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4137 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4138 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4139 alu.omod = 3;
4140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4141 alu.dst.chan = i;
4142 if (i == lasti) {
4143 alu.last = 1;
4144 }
4145 r = r600_bytecode_add_alu(ctx->bc, &alu);
4146 if (r)
4147 return r;
4148 }
4149 return 0;
4150 }
4151
4152 /* 1 - src0 */
4153 for (i = 0; i < lasti + 1; i++) {
4154 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4155 continue;
4156
4157 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4158 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4159 alu.src[0].sel = V_SQ_ALU_SRC_1;
4160 alu.src[0].chan = 0;
4161 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4162 r600_bytecode_src_toggle_neg(&alu.src[1]);
4163 alu.dst.sel = ctx->temp_reg;
4164 alu.dst.chan = i;
4165 if (i == lasti) {
4166 alu.last = 1;
4167 }
4168 alu.dst.write = 1;
4169 r = r600_bytecode_add_alu(ctx->bc, &alu);
4170 if (r)
4171 return r;
4172 }
4173
4174 /* (1 - src0) * src2 */
4175 for (i = 0; i < lasti + 1; i++) {
4176 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4177 continue;
4178
4179 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4181 alu.src[0].sel = ctx->temp_reg;
4182 alu.src[0].chan = i;
4183 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4184 alu.dst.sel = ctx->temp_reg;
4185 alu.dst.chan = i;
4186 if (i == lasti) {
4187 alu.last = 1;
4188 }
4189 alu.dst.write = 1;
4190 r = r600_bytecode_add_alu(ctx->bc, &alu);
4191 if (r)
4192 return r;
4193 }
4194
4195 /* src0 * src1 + (1 - src0) * src2 */
4196 for (i = 0; i < lasti + 1; i++) {
4197 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4198 continue;
4199
4200 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4202 alu.is_op3 = 1;
4203 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4204 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4205 alu.src[2].sel = ctx->temp_reg;
4206 alu.src[2].chan = i;
4207
4208 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4209 alu.dst.chan = i;
4210 if (i == lasti) {
4211 alu.last = 1;
4212 }
4213 r = r600_bytecode_add_alu(ctx->bc, &alu);
4214 if (r)
4215 return r;
4216 }
4217 return 0;
4218 }
4219
4220 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4221 {
4222 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4223 struct r600_bytecode_alu alu;
4224 int i, r;
4225 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4226
4227 for (i = 0; i < lasti + 1; i++) {
4228 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4229 continue;
4230
4231 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4233 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4234 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4235 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4236 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4237 alu.dst.chan = i;
4238 alu.dst.write = 1;
4239 alu.is_op3 = 1;
4240 if (i == lasti)
4241 alu.last = 1;
4242 r = r600_bytecode_add_alu(ctx->bc, &alu);
4243 if (r)
4244 return r;
4245 }
4246 return 0;
4247 }
4248
4249 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4250 {
4251 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4252 static const unsigned int src0_swizzle[] = {2, 0, 1};
4253 static const unsigned int src1_swizzle[] = {1, 2, 0};
4254 struct r600_bytecode_alu alu;
4255 uint32_t use_temp = 0;
4256 int i, r;
4257
4258 if (inst->Dst[0].Register.WriteMask != 0xf)
4259 use_temp = 1;
4260
4261 for (i = 0; i < 4; i++) {
4262 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4264 if (i < 3) {
4265 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4266 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4267 } else {
4268 alu.src[0].sel = V_SQ_ALU_SRC_0;
4269 alu.src[0].chan = i;
4270 alu.src[1].sel = V_SQ_ALU_SRC_0;
4271 alu.src[1].chan = i;
4272 }
4273
4274 alu.dst.sel = ctx->temp_reg;
4275 alu.dst.chan = i;
4276 alu.dst.write = 1;
4277
4278 if (i == 3)
4279 alu.last = 1;
4280 r = r600_bytecode_add_alu(ctx->bc, &alu);
4281 if (r)
4282 return r;
4283 }
4284
4285 for (i = 0; i < 4; i++) {
4286 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4287 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4288
4289 if (i < 3) {
4290 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4291 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4292 } else {
4293 alu.src[0].sel = V_SQ_ALU_SRC_0;
4294 alu.src[0].chan = i;
4295 alu.src[1].sel = V_SQ_ALU_SRC_0;
4296 alu.src[1].chan = i;
4297 }
4298
4299 alu.src[2].sel = ctx->temp_reg;
4300 alu.src[2].neg = 1;
4301 alu.src[2].chan = i;
4302
4303 if (use_temp)
4304 alu.dst.sel = ctx->temp_reg;
4305 else
4306 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4307 alu.dst.chan = i;
4308 alu.dst.write = 1;
4309 alu.is_op3 = 1;
4310 if (i == 3)
4311 alu.last = 1;
4312 r = r600_bytecode_add_alu(ctx->bc, &alu);
4313 if (r)
4314 return r;
4315 }
4316 if (use_temp)
4317 return tgsi_helper_copy(ctx, inst);
4318 return 0;
4319 }
4320
4321 static int tgsi_exp(struct r600_shader_ctx *ctx)
4322 {
4323 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4324 struct r600_bytecode_alu alu;
4325 int r;
4326 int i;
4327
4328 /* result.x = 2^floor(src); */
4329 if (inst->Dst[0].Register.WriteMask & 1) {
4330 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4331
4332 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4333 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4334
4335 alu.dst.sel = ctx->temp_reg;
4336 alu.dst.chan = 0;
4337 alu.dst.write = 1;
4338 alu.last = 1;
4339 r = r600_bytecode_add_alu(ctx->bc, &alu);
4340 if (r)
4341 return r;
4342
4343 if (ctx->bc->chip_class == CAYMAN) {
4344 for (i = 0; i < 3; i++) {
4345 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4346 alu.src[0].sel = ctx->temp_reg;
4347 alu.src[0].chan = 0;
4348
4349 alu.dst.sel = ctx->temp_reg;
4350 alu.dst.chan = i;
4351 if (i == 0)
4352 alu.dst.write = 1;
4353 if (i == 2)
4354 alu.last = 1;
4355 r = r600_bytecode_add_alu(ctx->bc, &alu);
4356 if (r)
4357 return r;
4358 }
4359 } else {
4360 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4361 alu.src[0].sel = ctx->temp_reg;
4362 alu.src[0].chan = 0;
4363
4364 alu.dst.sel = ctx->temp_reg;
4365 alu.dst.chan = 0;
4366 alu.dst.write = 1;
4367 alu.last = 1;
4368 r = r600_bytecode_add_alu(ctx->bc, &alu);
4369 if (r)
4370 return r;
4371 }
4372 }
4373
4374 /* result.y = tmp - floor(tmp); */
4375 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4376 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4377
4378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4379 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4380
4381 alu.dst.sel = ctx->temp_reg;
4382 #if 0
4383 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4384 if (r)
4385 return r;
4386 #endif
4387 alu.dst.write = 1;
4388 alu.dst.chan = 1;
4389
4390 alu.last = 1;
4391
4392 r = r600_bytecode_add_alu(ctx->bc, &alu);
4393 if (r)
4394 return r;
4395 }
4396
4397 /* result.z = RoughApprox2ToX(tmp);*/
4398 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4399 if (ctx->bc->chip_class == CAYMAN) {
4400 for (i = 0; i < 3; i++) {
4401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4402 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4403 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4404
4405 alu.dst.sel = ctx->temp_reg;
4406 alu.dst.chan = i;
4407 if (i == 2) {
4408 alu.dst.write = 1;
4409 alu.last = 1;
4410 }
4411
4412 r = r600_bytecode_add_alu(ctx->bc, &alu);
4413 if (r)
4414 return r;
4415 }
4416 } else {
4417 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4419 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4420
4421 alu.dst.sel = ctx->temp_reg;
4422 alu.dst.write = 1;
4423 alu.dst.chan = 2;
4424
4425 alu.last = 1;
4426
4427 r = r600_bytecode_add_alu(ctx->bc, &alu);
4428 if (r)
4429 return r;
4430 }
4431 }
4432
4433 /* result.w = 1.0;*/
4434 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4435 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4436
4437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4438 alu.src[0].sel = V_SQ_ALU_SRC_1;
4439 alu.src[0].chan = 0;
4440
4441 alu.dst.sel = ctx->temp_reg;
4442 alu.dst.chan = 3;
4443 alu.dst.write = 1;
4444 alu.last = 1;
4445 r = r600_bytecode_add_alu(ctx->bc, &alu);
4446 if (r)
4447 return r;
4448 }
4449 return tgsi_helper_copy(ctx, inst);
4450 }
4451
4452 static int tgsi_log(struct r600_shader_ctx *ctx)
4453 {
4454 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4455 struct r600_bytecode_alu alu;
4456 int r;
4457 int i;
4458
4459 /* result.x = floor(log2(|src|)); */
4460 if (inst->Dst[0].Register.WriteMask & 1) {
4461 if (ctx->bc->chip_class == CAYMAN) {
4462 for (i = 0; i < 3; i++) {
4463 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4464
4465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4466 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4467 r600_bytecode_src_set_abs(&alu.src[0]);
4468
4469 alu.dst.sel = ctx->temp_reg;
4470 alu.dst.chan = i;
4471 if (i == 0)
4472 alu.dst.write = 1;
4473 if (i == 2)
4474 alu.last = 1;
4475 r = r600_bytecode_add_alu(ctx->bc, &alu);
4476 if (r)
4477 return r;
4478 }
4479
4480 } else {
4481 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4482
4483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4484 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4485 r600_bytecode_src_set_abs(&alu.src[0]);
4486
4487 alu.dst.sel = ctx->temp_reg;
4488 alu.dst.chan = 0;
4489 alu.dst.write = 1;
4490 alu.last = 1;
4491 r = r600_bytecode_add_alu(ctx->bc, &alu);
4492 if (r)
4493 return r;
4494 }
4495
4496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4497 alu.src[0].sel = ctx->temp_reg;
4498 alu.src[0].chan = 0;
4499
4500 alu.dst.sel = ctx->temp_reg;
4501 alu.dst.chan = 0;
4502 alu.dst.write = 1;
4503 alu.last = 1;
4504
4505 r = r600_bytecode_add_alu(ctx->bc, &alu);
4506 if (r)
4507 return r;
4508 }
4509
4510 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4511 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4512
4513 if (ctx->bc->chip_class == CAYMAN) {
4514 for (i = 0; i < 3; i++) {
4515 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4516
4517 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4518 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4519 r600_bytecode_src_set_abs(&alu.src[0]);
4520
4521 alu.dst.sel = ctx->temp_reg;
4522 alu.dst.chan = i;
4523 if (i == 1)
4524 alu.dst.write = 1;
4525 if (i == 2)
4526 alu.last = 1;
4527
4528 r = r600_bytecode_add_alu(ctx->bc, &alu);
4529 if (r)
4530 return r;
4531 }
4532 } else {
4533 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4534
4535 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4536 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4537 r600_bytecode_src_set_abs(&alu.src[0]);
4538
4539 alu.dst.sel = ctx->temp_reg;
4540 alu.dst.chan = 1;
4541 alu.dst.write = 1;
4542 alu.last = 1;
4543
4544 r = r600_bytecode_add_alu(ctx->bc, &alu);
4545 if (r)
4546 return r;
4547 }
4548
4549 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4550
4551 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4552 alu.src[0].sel = ctx->temp_reg;
4553 alu.src[0].chan = 1;
4554
4555 alu.dst.sel = ctx->temp_reg;
4556 alu.dst.chan = 1;
4557 alu.dst.write = 1;
4558 alu.last = 1;
4559
4560 r = r600_bytecode_add_alu(ctx->bc, &alu);
4561 if (r)
4562 return r;
4563
4564 if (ctx->bc->chip_class == CAYMAN) {
4565 for (i = 0; i < 3; i++) {
4566 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4568 alu.src[0].sel = ctx->temp_reg;
4569 alu.src[0].chan = 1;
4570
4571 alu.dst.sel = ctx->temp_reg;
4572 alu.dst.chan = i;
4573 if (i == 1)
4574 alu.dst.write = 1;
4575 if (i == 2)
4576 alu.last = 1;
4577
4578 r = r600_bytecode_add_alu(ctx->bc, &alu);
4579 if (r)
4580 return r;
4581 }
4582 } else {
4583 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4584 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4585 alu.src[0].sel = ctx->temp_reg;
4586 alu.src[0].chan = 1;
4587
4588 alu.dst.sel = ctx->temp_reg;
4589 alu.dst.chan = 1;
4590 alu.dst.write = 1;
4591 alu.last = 1;
4592
4593 r = r600_bytecode_add_alu(ctx->bc, &alu);
4594 if (r)
4595 return r;
4596 }
4597
4598 if (ctx->bc->chip_class == CAYMAN) {
4599 for (i = 0; i < 3; i++) {
4600 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4602 alu.src[0].sel = ctx->temp_reg;
4603 alu.src[0].chan = 1;
4604
4605 alu.dst.sel = ctx->temp_reg;
4606 alu.dst.chan = i;
4607 if (i == 1)
4608 alu.dst.write = 1;
4609 if (i == 2)
4610 alu.last = 1;
4611
4612 r = r600_bytecode_add_alu(ctx->bc, &alu);
4613 if (r)
4614 return r;
4615 }
4616 } else {
4617 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4618 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4619 alu.src[0].sel = ctx->temp_reg;
4620 alu.src[0].chan = 1;
4621
4622 alu.dst.sel = ctx->temp_reg;
4623 alu.dst.chan = 1;
4624 alu.dst.write = 1;
4625 alu.last = 1;
4626
4627 r = r600_bytecode_add_alu(ctx->bc, &alu);
4628 if (r)
4629 return r;
4630 }
4631
4632 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4633
4634 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4635
4636 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4637 r600_bytecode_src_set_abs(&alu.src[0]);
4638
4639 alu.src[1].sel = ctx->temp_reg;
4640 alu.src[1].chan = 1;
4641
4642 alu.dst.sel = ctx->temp_reg;
4643 alu.dst.chan = 1;
4644 alu.dst.write = 1;
4645 alu.last = 1;
4646
4647 r = r600_bytecode_add_alu(ctx->bc, &alu);
4648 if (r)
4649 return r;
4650 }
4651
4652 /* result.z = log2(|src|);*/
4653 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4654 if (ctx->bc->chip_class == CAYMAN) {
4655 for (i = 0; i < 3; i++) {
4656 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4657
4658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4659 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4660 r600_bytecode_src_set_abs(&alu.src[0]);
4661
4662 alu.dst.sel = ctx->temp_reg;
4663 if (i == 2)
4664 alu.dst.write = 1;
4665 alu.dst.chan = i;
4666 if (i == 2)
4667 alu.last = 1;
4668
4669 r = r600_bytecode_add_alu(ctx->bc, &alu);
4670 if (r)
4671 return r;
4672 }
4673 } else {
4674 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4675
4676 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4677 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4678 r600_bytecode_src_set_abs(&alu.src[0]);
4679
4680 alu.dst.sel = ctx->temp_reg;
4681 alu.dst.write = 1;
4682 alu.dst.chan = 2;
4683 alu.last = 1;
4684
4685 r = r600_bytecode_add_alu(ctx->bc, &alu);
4686 if (r)
4687 return r;
4688 }
4689 }
4690
4691 /* result.w = 1.0; */
4692 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4693 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4694
4695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4696 alu.src[0].sel = V_SQ_ALU_SRC_1;
4697 alu.src[0].chan = 0;
4698
4699 alu.dst.sel = ctx->temp_reg;
4700 alu.dst.chan = 3;
4701 alu.dst.write = 1;
4702 alu.last = 1;
4703
4704 r = r600_bytecode_add_alu(ctx->bc, &alu);
4705 if (r)
4706 return r;
4707 }
4708
4709 return tgsi_helper_copy(ctx, inst);
4710 }
4711
4712 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4713 {
4714 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4715 struct r600_bytecode_alu alu;
4716 int r;
4717
4718 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4719
4720 switch (inst->Instruction.Opcode) {
4721 case TGSI_OPCODE_ARL:
4722 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4723 break;
4724 case TGSI_OPCODE_ARR:
4725 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4726 break;
4727 case TGSI_OPCODE_UARL:
4728 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4729 break;
4730 default:
4731 assert(0);
4732 return -1;
4733 }
4734
4735 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4736 alu.last = 1;
4737 alu.dst.sel = ctx->bc->ar_reg;
4738 alu.dst.write = 1;
4739 r = r600_bytecode_add_alu(ctx->bc, &alu);
4740 if (r)
4741 return r;
4742
4743 ctx->bc->ar_loaded = 0;
4744 return 0;
4745 }
4746 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4747 {
4748 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4749 struct r600_bytecode_alu alu;
4750 int r;
4751
4752 switch (inst->Instruction.Opcode) {
4753 case TGSI_OPCODE_ARL:
4754 memset(&alu, 0, sizeof(alu));
4755 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4756 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4757 alu.dst.sel = ctx->bc->ar_reg;
4758 alu.dst.write = 1;
4759 alu.last = 1;
4760
4761 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4762 return r;
4763
4764 memset(&alu, 0, sizeof(alu));
4765 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4766 alu.src[0].sel = ctx->bc->ar_reg;
4767 alu.dst.sel = ctx->bc->ar_reg;
4768 alu.dst.write = 1;
4769 alu.last = 1;
4770
4771 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4772 return r;
4773 break;
4774 case TGSI_OPCODE_ARR:
4775 memset(&alu, 0, sizeof(alu));
4776 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4777 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4778 alu.dst.sel = ctx->bc->ar_reg;
4779 alu.dst.write = 1;
4780 alu.last = 1;
4781
4782 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4783 return r;
4784 break;
4785 case TGSI_OPCODE_UARL:
4786 memset(&alu, 0, sizeof(alu));
4787 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4788 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4789 alu.dst.sel = ctx->bc->ar_reg;
4790 alu.dst.write = 1;
4791 alu.last = 1;
4792
4793 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4794 return r;
4795 break;
4796 default:
4797 assert(0);
4798 return -1;
4799 }
4800
4801 ctx->bc->ar_loaded = 0;
4802 return 0;
4803 }
4804
4805 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4806 {
4807 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4808 struct r600_bytecode_alu alu;
4809 int i, r = 0;
4810
4811 for (i = 0; i < 4; i++) {
4812 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4813
4814 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4815 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4816
4817 if (i == 0 || i == 3) {
4818 alu.src[0].sel = V_SQ_ALU_SRC_1;
4819 } else {
4820 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4821 }
4822
4823 if (i == 0 || i == 2) {
4824 alu.src[1].sel = V_SQ_ALU_SRC_1;
4825 } else {
4826 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4827 }
4828 if (i == 3)
4829 alu.last = 1;
4830 r = r600_bytecode_add_alu(ctx->bc, &alu);
4831 if (r)
4832 return r;
4833 }
4834 return 0;
4835 }
4836
4837 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4838 {
4839 struct r600_bytecode_alu alu;
4840 int r;
4841
4842 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4843 alu.inst = opcode;
4844 alu.predicate = 1;
4845
4846 alu.dst.sel = ctx->temp_reg;
4847 alu.dst.write = 1;
4848 alu.dst.chan = 0;
4849
4850 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4851 alu.src[1].sel = V_SQ_ALU_SRC_0;
4852 alu.src[1].chan = 0;
4853
4854 alu.last = 1;
4855
4856 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4857 if (r)
4858 return r;
4859 return 0;
4860 }
4861
4862 static int pops(struct r600_shader_ctx *ctx, int pops)
4863 {
4864 unsigned force_pop = ctx->bc->force_add_cf;
4865
4866 if (!force_pop) {
4867 int alu_pop = 3;
4868 if (ctx->bc->cf_last) {
4869 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4870 alu_pop = 0;
4871 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4872 alu_pop = 1;
4873 }
4874 alu_pop += pops;
4875 if (alu_pop == 1) {
4876 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4877 ctx->bc->force_add_cf = 1;
4878 } else if (alu_pop == 2) {
4879 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4880 ctx->bc->force_add_cf = 1;
4881 } else {
4882 force_pop = 1;
4883 }
4884 }
4885
4886 if (force_pop) {
4887 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4888 ctx->bc->cf_last->pop_count = pops;
4889 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4890 }
4891
4892 return 0;
4893 }
4894
4895 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4896 {
4897 switch(reason) {
4898 case FC_PUSH_VPM:
4899 ctx->bc->callstack[ctx->bc->call_sp].current--;
4900 break;
4901 case FC_PUSH_WQM:
4902 case FC_LOOP:
4903 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4904 break;
4905 case FC_REP:
4906 /* TOODO : for 16 vp asic should -= 2; */
4907 ctx->bc->callstack[ctx->bc->call_sp].current --;
4908 break;
4909 }
4910 }
4911
4912 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4913 {
4914 if (check_max_only) {
4915 int diff;
4916 switch (reason) {
4917 case FC_PUSH_VPM:
4918 diff = 1;
4919 break;
4920 case FC_PUSH_WQM:
4921 diff = 4;
4922 break;
4923 default:
4924 assert(0);
4925 diff = 0;
4926 }
4927 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4928 ctx->bc->callstack[ctx->bc->call_sp].max) {
4929 ctx->bc->callstack[ctx->bc->call_sp].max =
4930 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4931 }
4932 return;
4933 }
4934 switch (reason) {
4935 case FC_PUSH_VPM:
4936 ctx->bc->callstack[ctx->bc->call_sp].current++;
4937 break;
4938 case FC_PUSH_WQM:
4939 case FC_LOOP:
4940 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4941 break;
4942 case FC_REP:
4943 ctx->bc->callstack[ctx->bc->call_sp].current++;
4944 break;
4945 }
4946
4947 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4948 ctx->bc->callstack[ctx->bc->call_sp].max) {
4949 ctx->bc->callstack[ctx->bc->call_sp].max =
4950 ctx->bc->callstack[ctx->bc->call_sp].current;
4951 }
4952 }
4953
4954 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4955 {
4956 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4957
4958 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
4959 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
4960 sp->mid[sp->num_mid] = ctx->bc->cf_last;
4961 sp->num_mid++;
4962 }
4963
4964 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
4965 {
4966 ctx->bc->fc_sp++;
4967 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
4968 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
4969 }
4970
4971 static void fc_poplevel(struct r600_shader_ctx *ctx)
4972 {
4973 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
4974 if (sp->mid) {
4975 free(sp->mid);
4976 sp->mid = NULL;
4977 }
4978 sp->num_mid = 0;
4979 sp->start = NULL;
4980 sp->type = 0;
4981 ctx->bc->fc_sp--;
4982 }
4983
4984 #if 0
4985 static int emit_return(struct r600_shader_ctx *ctx)
4986 {
4987 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
4988 return 0;
4989 }
4990
4991 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
4992 {
4993
4994 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4995 ctx->bc->cf_last->pop_count = pops;
4996 /* XXX work out offset */
4997 return 0;
4998 }
4999
5000 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5001 {
5002 return 0;
5003 }
5004
5005 static void emit_testflag(struct r600_shader_ctx *ctx)
5006 {
5007
5008 }
5009
5010 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5011 {
5012 emit_testflag(ctx);
5013 emit_jump_to_offset(ctx, 1, 4);
5014 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5015 pops(ctx, ifidx + 1);
5016 emit_return(ctx);
5017 }
5018
5019 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5020 {
5021 emit_testflag(ctx);
5022
5023 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5024 ctx->bc->cf_last->pop_count = 1;
5025
5026 fc_set_mid(ctx, fc_sp);
5027
5028 pops(ctx, 1);
5029 }
5030 #endif
5031
5032 static int tgsi_if(struct r600_shader_ctx *ctx)
5033 {
5034 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5035
5036 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5037
5038 fc_pushlevel(ctx, FC_IF);
5039
5040 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5041 return 0;
5042 }
5043
5044 static int tgsi_else(struct r600_shader_ctx *ctx)
5045 {
5046 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5047 ctx->bc->cf_last->pop_count = 1;
5048
5049 fc_set_mid(ctx, ctx->bc->fc_sp);
5050 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5051 return 0;
5052 }
5053
5054 static int tgsi_endif(struct r600_shader_ctx *ctx)
5055 {
5056 pops(ctx, 1);
5057 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5058 R600_ERR("if/endif unbalanced in shader\n");
5059 return -1;
5060 }
5061
5062 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5063 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5064 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5065 } else {
5066 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5067 }
5068 fc_poplevel(ctx);
5069
5070 callstack_decrease_current(ctx, FC_PUSH_VPM);
5071 return 0;
5072 }
5073
5074 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5075 {
5076 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
5077
5078 fc_pushlevel(ctx, FC_LOOP);
5079
5080 /* check stack depth */
5081 callstack_check_depth(ctx, FC_LOOP, 0);
5082 return 0;
5083 }
5084
5085 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5086 {
5087 int i;
5088
5089 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5090
5091 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5092 R600_ERR("loop/endloop in shader code are not paired.\n");
5093 return -EINVAL;
5094 }
5095
5096 /* fixup loop pointers - from r600isa
5097 LOOP END points to CF after LOOP START,
5098 LOOP START point to CF after LOOP END
5099 BRK/CONT point to LOOP END CF
5100 */
5101 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5102
5103 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5104
5105 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5106 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5107 }
5108 /* XXX add LOOPRET support */
5109 fc_poplevel(ctx);
5110 callstack_decrease_current(ctx, FC_LOOP);
5111 return 0;
5112 }
5113
5114 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5115 {
5116 unsigned int fscp;
5117
5118 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5119 {
5120 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5121 break;
5122 }
5123
5124 if (fscp == 0) {
5125 R600_ERR("Break not inside loop/endloop pair\n");
5126 return -EINVAL;
5127 }
5128
5129 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5130
5131 fc_set_mid(ctx, fscp);
5132
5133 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5134 return 0;
5135 }
5136
5137 static int tgsi_umad(struct r600_shader_ctx *ctx)
5138 {
5139 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5140 struct r600_bytecode_alu alu;
5141 int i, j, r;
5142 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5143
5144 /* src0 * src1 */
5145 for (i = 0; i < lasti + 1; i++) {
5146 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5147 continue;
5148
5149 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5150
5151 alu.dst.chan = i;
5152 alu.dst.sel = ctx->temp_reg;
5153 alu.dst.write = 1;
5154
5155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5156 for (j = 0; j < 2; j++) {
5157 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5158 }
5159
5160 alu.last = 1;
5161 r = r600_bytecode_add_alu(ctx->bc, &alu);
5162 if (r)
5163 return r;
5164 }
5165
5166
5167 for (i = 0; i < lasti + 1; i++) {
5168 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5169 continue;
5170
5171 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5172 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5173
5174 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5175
5176 alu.src[0].sel = ctx->temp_reg;
5177 alu.src[0].chan = i;
5178
5179 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5180 if (i == lasti) {
5181 alu.last = 1;
5182 }
5183 r = r600_bytecode_add_alu(ctx->bc, &alu);
5184 if (r)
5185 return r;
5186 }
5187 return 0;
5188 }
5189
5190 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5191 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5192 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5193 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5194
5195 /* XXX:
5196 * For state trackers other than OpenGL, we'll want to use
5197 * _RECIP_IEEE instead.
5198 */
5199 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5200
5201 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5202 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5203 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5204 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5205 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5206 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5207 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5208 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5209 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5210 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5211 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5212 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5213 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5214 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5215 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5216 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5217 /* gap */
5218 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5219 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5220 /* gap */
5221 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5222 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5223 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5224 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5225 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5226 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5227 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5228 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5229 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5230 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5231 /* gap */
5232 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5233 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5234 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5235 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5236 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5237 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5238 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5239 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5240 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5241 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5242 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5243 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5244 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5245 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5246 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5247 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5248 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5249 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5250 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5251 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5252 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5253 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5254 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5255 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5256 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5257 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5258 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5259 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5260 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5261 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5262 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5263 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5264 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5265 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5266 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5267 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5268 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5269 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5270 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5271 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5272 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5273 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5274 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5275 /* gap */
5276 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5277 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5279 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5280 /* gap */
5281 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5282 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5283 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5286 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5287 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5288 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5289 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5290 /* gap */
5291 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5292 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5293 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5294 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5295 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5296 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5297 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5298 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5299 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5300 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5301 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5302 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5303 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5305 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 /* gap */
5307 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5309 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5310 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5311 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5312 /* gap */
5313 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5314 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5315 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5316 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5317 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5318 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5319 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5320 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5321 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5322 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5323 /* gap */
5324 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5325 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5326 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5327 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5328 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5329 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5330 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5331 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5332 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5333 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5334 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5335 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5336 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5337 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5338 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5339 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5340 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5341 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5342 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5343 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5344 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5345 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5346 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5347 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5348 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5349 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5350 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5351 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5352 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5353 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5354 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5355 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5356 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5357 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5358 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5359 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5360 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5361 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5362 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5363 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5364 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5365 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5366 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5367 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5368 };
5369
5370 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5371 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5372 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5373 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5374 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5375 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5376 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5377 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5378 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5379 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5380 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5381 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5382 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5383 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5384 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5385 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5386 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5387 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5388 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5389 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5390 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5391 /* gap */
5392 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5393 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5394 /* gap */
5395 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5396 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5397 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5398 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5399 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5400 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5401 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5402 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5403 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5404 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5405 /* gap */
5406 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5407 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5408 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5409 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5410 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5411 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5412 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5413 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5414 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5415 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5416 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5417 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5418 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5419 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5420 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5421 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5422 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5423 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5424 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5425 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5426 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5427 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5428 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5429 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5430 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5431 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5432 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5433 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5434 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5435 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5436 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5437 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5438 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5439 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5440 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5441 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5442 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5443 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5444 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5445 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5446 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5447 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5448 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5449 /* gap */
5450 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5451 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5453 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5454 /* gap */
5455 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5456 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5457 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5460 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5461 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5462 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5463 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5464 /* gap */
5465 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5466 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5467 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5468 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5469 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5470 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5472 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5473 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5474 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5475 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5477 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5479 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 /* gap */
5481 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5484 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5485 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5486 /* gap */
5487 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5488 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5489 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5490 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5491 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5492 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5493 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5494 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5495 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5496 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5497 /* gap */
5498 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5499 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5500 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5501 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5502 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5503 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5504 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5505 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5506 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5507 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5508 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5509 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5510 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5511 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5512 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5513 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5514 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5515 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5516 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5517 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5518 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5519 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5520 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5521 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5522 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5523 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5524 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5526 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5527 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5528 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5529 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5530 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5531 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5532 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5533 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5534 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5535 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5536 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5537 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5538 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5539 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5540 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5541 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5542 };
5543
5544 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5545 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5546 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5547 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5548 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5549 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5550 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5551 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5552 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5553 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5554 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5555 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5556 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5557 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5558 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5559 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5560 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5561 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5562 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5563 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5564 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5565 /* gap */
5566 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5567 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5568 /* gap */
5569 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5570 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5571 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5572 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5573 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5574 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5575 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5576 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5577 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5578 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5579 /* gap */
5580 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5581 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5582 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5583 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5584 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5585 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5586 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5587 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5588 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5589 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5590 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5591 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5592 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5593 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5594 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5595 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5596 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5597 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5598 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5599 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5600 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5601 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5602 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5603 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5604 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5605 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5606 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5607 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5608 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5609 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5610 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5611 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5612 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5613 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5614 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5615 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5616 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5617 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5618 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5619 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5620 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5621 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5622 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5623 /* gap */
5624 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5625 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5626 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5627 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5628 /* gap */
5629 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5630 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5631 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5634 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5635 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5636 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5637 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5638 /* gap */
5639 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5640 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5641 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5642 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5643 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5644 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5645 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5646 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5647 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5648 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5649 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5650 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5651 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5653 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 /* gap */
5655 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5657 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5658 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5659 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5660 /* gap */
5661 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5663 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5664 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5665 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5666 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5667 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5668 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5669 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5670 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5671 /* gap */
5672 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5674 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5675 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5676 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5677 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5678 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5679 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5680 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5681 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5682 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5683 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5684 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5685 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5686 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5687 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5688 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5689 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5690 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5691 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5692 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5693 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5694 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5695 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5696 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5697 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5698 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5699 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5700 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5701 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5702 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5703 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5704 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5705 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5706 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5707 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5708 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5709 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5710 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5711 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5712 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5713 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5714 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5715 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 };