d294084de9cc3c1bf7ef70ff8cc808aef53d9d52
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 int r;
113
114 /* Would like some magic "get_bool_option_once" routine.
115 */
116 if (dump_shaders == -1)
117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119 if (dump_shaders) {
120 fprintf(stderr, "--------------------------------------------------------------\n");
121 tgsi_dump(shader->tokens, 0);
122
123 if (shader->so.num_outputs) {
124 unsigned i;
125 fprintf(stderr, "STREAMOUT\n");
126 for (i = 0; i < shader->so.num_outputs; i++) {
127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128 shader->so.output[i].start_component;
129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130 shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131 mask & 1 ? "x" : "_",
132 (mask >> 1) & 1 ? "y" : "_",
133 (mask >> 2) & 1 ? "z" : "_",
134 (mask >> 3) & 1 ? "w" : "_");
135 }
136 }
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bytecode_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bytecode_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158 r600_bytecode_clear(&shader->shader.bc);
159
160 memset(&shader->shader,0,sizeof(struct r600_shader));
161 }
162
163 /*
164 * tgsi -> r600 shader
165 */
166 struct r600_shader_tgsi_instruction;
167
168 struct r600_shader_src {
169 unsigned sel;
170 unsigned swizzle[4];
171 unsigned neg;
172 unsigned abs;
173 unsigned rel;
174 uint32_t value[4];
175 };
176
177 struct r600_shader_ctx {
178 struct tgsi_shader_info info;
179 struct tgsi_parse_context parse;
180 const struct tgsi_token *tokens;
181 unsigned type;
182 unsigned file_offset[TGSI_FILE_COUNT];
183 unsigned temp_reg;
184 struct r600_shader_tgsi_instruction *inst_info;
185 struct r600_bytecode *bc;
186 struct r600_shader *shader;
187 struct r600_shader_src src[4];
188 uint32_t *literals;
189 uint32_t nliterals;
190 uint32_t max_driver_temp_used;
191 /* needed for evergreen interpolation */
192 boolean input_centroid;
193 boolean input_linear;
194 boolean input_perspective;
195 int num_interp_gpr;
196 int face_gpr;
197 int colors_used;
198 boolean clip_vertex_write;
199 unsigned cv_output;
200 int fragcoord_input;
201 int native_integers;
202 };
203
204 struct r600_shader_tgsi_instruction {
205 unsigned tgsi_opcode;
206 unsigned is_op3;
207 unsigned r600_opcode;
208 int (*process)(struct r600_shader_ctx *ctx);
209 };
210
211 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
212 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
213 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
214 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
215 static int tgsi_else(struct r600_shader_ctx *ctx);
216 static int tgsi_endif(struct r600_shader_ctx *ctx);
217 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
218 static int tgsi_endloop(struct r600_shader_ctx *ctx);
219 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
220
221 /*
222 * bytestream -> r600 shader
223 *
224 * These functions are used to transform the output of the LLVM backend into
225 * struct r600_bytecode.
226 */
227
228 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
229 unsigned char * bytes, unsigned num_bytes);
230
231 #ifdef HAVE_OPENCL
232 int r600_compute_shader_create(struct pipe_context * ctx,
233 LLVMModuleRef mod, struct r600_bytecode * bytecode)
234 {
235 struct r600_context *r600_ctx = (struct r600_context *)ctx;
236 unsigned char * bytes;
237 unsigned byte_count;
238 struct r600_shader_ctx shader_ctx;
239 unsigned dump = 0;
240
241 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
242 dump = 1;
243 }
244
245 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
246 shader_ctx.bc = bytecode;
247 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
248 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
249 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
250 if (shader_ctx.bc->chip_class == CAYMAN) {
251 cm_bytecode_add_cf_end(shader_ctx.bc);
252 }
253 r600_bytecode_build(shader_ctx.bc);
254 if (dump) {
255 r600_bytecode_dump(shader_ctx.bc);
256 }
257 return 1;
258 }
259
260 #endif /* HAVE_OPENCL */
261
262 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
263 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
264 {
265 unsigned i;
266 unsigned sel0, sel1;
267 sel0 = bytes[bytes_read++];
268 sel1 = bytes[bytes_read++];
269 alu->src[src_idx].sel = sel0 | (sel1 << 8);
270 alu->src[src_idx].chan = bytes[bytes_read++];
271 alu->src[src_idx].neg = bytes[bytes_read++];
272 alu->src[src_idx].abs = bytes[bytes_read++];
273 alu->src[src_idx].rel = bytes[bytes_read++];
274 alu->src[src_idx].kc_bank = bytes[bytes_read++];
275 for (i = 0; i < 4; i++) {
276 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
277 }
278 return bytes_read;
279 }
280
281 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
282 unsigned char * bytes, unsigned bytes_read)
283 {
284 unsigned src_idx;
285 unsigned inst0, inst1;
286 struct r600_bytecode_alu alu;
287 memset(&alu, 0, sizeof(alu));
288 for(src_idx = 0; src_idx < 3; src_idx++) {
289 bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
290 &alu, src_idx);
291 }
292
293 alu.dst.sel = bytes[bytes_read++];
294 alu.dst.chan = bytes[bytes_read++];
295 alu.dst.clamp = bytes[bytes_read++];
296 alu.dst.write = bytes[bytes_read++];
297 alu.dst.rel = bytes[bytes_read++];
298 inst0 = bytes[bytes_read++];
299 inst1 = bytes[bytes_read++];
300 alu.inst = inst0 | (inst1 << 8);
301 alu.last = bytes[bytes_read++];
302 alu.is_op3 = bytes[bytes_read++];
303 alu.predicate = bytes[bytes_read++];
304 alu.bank_swizzle = bytes[bytes_read++];
305 alu.bank_swizzle_force = bytes[bytes_read++];
306 alu.omod = bytes[bytes_read++];
307 alu.index_mode = bytes[bytes_read++];
308 r600_bytecode_add_alu(ctx->bc, &alu);
309
310 /* XXX: Handle other KILL instructions */
311 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
312 ctx->shader->uses_kill = 1;
313 /* XXX: This should be enforced in the LLVM backend. */
314 ctx->bc->force_add_cf = 1;
315 }
316 return bytes_read;
317 }
318
319 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
320 unsigned pred_inst)
321 {
322 alu->inst = pred_inst;
323 alu->predicate = 1;
324 alu->dst.write = 0;
325 alu->src[1].sel = V_SQ_ALU_SRC_0;
326 alu->src[1].chan = 0;
327 alu->last = 1;
328 r600_bytecode_add_alu_type(ctx->bc, alu,
329 CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
330
331 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
332 fc_pushlevel(ctx, FC_IF);
333 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
334 }
335
336 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
337 struct r600_bytecode_alu *alu, unsigned compare_opcode)
338 {
339 unsigned opcode = TGSI_OPCODE_BRK;
340 if (ctx->bc->chip_class == CAYMAN)
341 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
342 else if (ctx->bc->chip_class >= EVERGREEN)
343 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
344 else
345 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
346 llvm_if(ctx, alu, compare_opcode);
347 tgsi_loop_brk_cont(ctx);
348 tgsi_endif(ctx);
349 }
350
351 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
352 unsigned char * bytes, unsigned bytes_read)
353 {
354 struct r600_bytecode_alu alu;
355 unsigned inst;
356 memset(&alu, 0, sizeof(alu));
357 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
358 inst = bytes[bytes_read++];
359 switch (inst) {
360 case 0:
361 llvm_if(ctx, &alu,
362 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
363 break;
364 case 1:
365 tgsi_else(ctx);
366 break;
367 case 2:
368 tgsi_endif(ctx);
369 break;
370 case 3:
371 tgsi_bgnloop(ctx);
372 break;
373 case 4:
374 tgsi_endloop(ctx);
375 break;
376 case 5:
377 r600_break_from_byte_stream(ctx, &alu,
378 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE));
379 break;
380 case 6:
381 r600_break_from_byte_stream(ctx, &alu,
382 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
383 break;
384 case 7:
385 {
386 unsigned opcode = TGSI_OPCODE_CONT;
387 if (ctx->bc->chip_class == CAYMAN) {
388 ctx->inst_info =
389 &cm_shader_tgsi_instruction[opcode];
390 } else if (ctx->bc->chip_class >= EVERGREEN) {
391 ctx->inst_info =
392 &eg_shader_tgsi_instruction[opcode];
393 } else {
394 ctx->inst_info =
395 &r600_shader_tgsi_instruction[opcode];
396 }
397 tgsi_loop_brk_cont(ctx);
398 }
399 break;
400 case 8:
401 r600_break_from_byte_stream(ctx, &alu,
402 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
403 break;
404 }
405
406 return bytes_read;
407 }
408
409 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
410 unsigned char * bytes, unsigned bytes_read)
411 {
412 struct r600_bytecode_tex tex;
413
414 tex.inst = bytes[bytes_read++];
415 tex.resource_id = bytes[bytes_read++];
416 tex.src_gpr = bytes[bytes_read++];
417 tex.src_rel = bytes[bytes_read++];
418 tex.dst_gpr = bytes[bytes_read++];
419 tex.dst_rel = bytes[bytes_read++];
420 tex.dst_sel_x = bytes[bytes_read++];
421 tex.dst_sel_y = bytes[bytes_read++];
422 tex.dst_sel_z = bytes[bytes_read++];
423 tex.dst_sel_w = bytes[bytes_read++];
424 tex.lod_bias = bytes[bytes_read++];
425 tex.coord_type_x = bytes[bytes_read++];
426 tex.coord_type_y = bytes[bytes_read++];
427 tex.coord_type_z = bytes[bytes_read++];
428 tex.coord_type_w = bytes[bytes_read++];
429 tex.offset_x = bytes[bytes_read++];
430 tex.offset_y = bytes[bytes_read++];
431 tex.offset_z = bytes[bytes_read++];
432 tex.sampler_id = bytes[bytes_read++];
433 tex.src_sel_x = bytes[bytes_read++];
434 tex.src_sel_y = bytes[bytes_read++];
435 tex.src_sel_z = bytes[bytes_read++];
436 tex.src_sel_w = bytes[bytes_read++];
437
438 r600_bytecode_add_tex(ctx->bc, &tex);
439
440 return bytes_read;
441 }
442
443 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
444 unsigned char * bytes, unsigned bytes_read)
445 {
446 struct r600_bytecode_vtx vtx;
447 memset(&vtx, 0, sizeof(vtx));
448 vtx.inst = bytes[bytes_read++];
449 vtx.fetch_type = bytes[bytes_read++];
450 vtx.buffer_id = bytes[bytes_read++];
451 vtx.src_gpr = bytes[bytes_read++];
452 vtx.src_sel_x = bytes[bytes_read++];
453 vtx.mega_fetch_count = bytes[bytes_read++];
454 vtx.dst_gpr = bytes[bytes_read++];
455 vtx.dst_sel_x = bytes[bytes_read++];
456 vtx.dst_sel_y = bytes[bytes_read++];
457 vtx.dst_sel_z = bytes[bytes_read++];
458 vtx.dst_sel_w = bytes[bytes_read++];
459 vtx.use_const_fields = bytes[bytes_read++];
460 vtx.data_format = bytes[bytes_read++];
461 vtx.num_format_all = bytes[bytes_read++];
462 vtx.format_comp_all = bytes[bytes_read++];
463 vtx.srf_mode_all = bytes[bytes_read++];
464 /* offset is 2 bytes wide */
465 vtx.offset = bytes[bytes_read++];
466 vtx.offset |= (bytes[bytes_read++] << 8);
467 vtx.endian = bytes[bytes_read++];
468
469 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
470 fprintf(stderr, "Error adding vtx\n");
471 }
472 /* Use the Texture Cache */
473 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
474 return bytes_read;
475 }
476
477 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
478 unsigned char * bytes, unsigned num_bytes)
479 {
480 unsigned bytes_read = 0;
481 unsigned i, byte;
482 while (bytes_read < num_bytes) {
483 char inst_type = bytes[bytes_read++];
484 switch (inst_type) {
485 case 0:
486 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
487 bytes_read);
488 break;
489 case 1:
490 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
491 bytes_read);
492 break;
493 case 2:
494 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
495 bytes_read);
496 break;
497 case 3:
498 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
499 for (i = 0; i < 2; i++) {
500 for (byte = 0 ; byte < 4; byte++) {
501 ctx->bc->cf_last->isa[i] |=
502 (bytes[bytes_read++] << (byte * 8));
503 }
504 }
505 break;
506
507 case 4:
508 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
509 bytes_read);
510 break;
511 default:
512 /* XXX: Error here */
513 break;
514 }
515 }
516 }
517
518 /* End bytestream -> r600 shader functions*/
519
520 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
521 {
522 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
523 int j;
524
525 if (i->Instruction.NumDstRegs > 1) {
526 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
527 return -EINVAL;
528 }
529 if (i->Instruction.Predicate) {
530 R600_ERR("predicate unsupported\n");
531 return -EINVAL;
532 }
533 #if 0
534 if (i->Instruction.Label) {
535 R600_ERR("label unsupported\n");
536 return -EINVAL;
537 }
538 #endif
539 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
540 if (i->Src[j].Register.Dimension) {
541 R600_ERR("unsupported src %d (dimension %d)\n", j,
542 i->Src[j].Register.Dimension);
543 return -EINVAL;
544 }
545 }
546 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
547 if (i->Dst[j].Register.Dimension) {
548 R600_ERR("unsupported dst (dimension)\n");
549 return -EINVAL;
550 }
551 }
552 return 0;
553 }
554
555 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
556 {
557 int i, r;
558 struct r600_bytecode_alu alu;
559 int gpr = 0, base_chan = 0;
560 int ij_index = 0;
561
562 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
563 ij_index = 0;
564 if (ctx->shader->input[input].centroid)
565 ij_index++;
566 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
567 ij_index = 0;
568 /* if we have perspective add one */
569 if (ctx->input_perspective) {
570 ij_index++;
571 /* if we have perspective centroid */
572 if (ctx->input_centroid)
573 ij_index++;
574 }
575 if (ctx->shader->input[input].centroid)
576 ij_index++;
577 }
578
579 /* work out gpr and base_chan from index */
580 gpr = ij_index / 2;
581 base_chan = (2 * (ij_index % 2)) + 1;
582
583 for (i = 0; i < 8; i++) {
584 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
585
586 if (i < 4)
587 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
588 else
589 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
590
591 if ((i > 1) && (i < 6)) {
592 alu.dst.sel = ctx->shader->input[input].gpr;
593 alu.dst.write = 1;
594 }
595
596 alu.dst.chan = i % 4;
597
598 alu.src[0].sel = gpr;
599 alu.src[0].chan = (base_chan - (i % 2));
600
601 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
602
603 alu.bank_swizzle_force = SQ_ALU_VEC_210;
604 if ((i % 4) == 3)
605 alu.last = 1;
606 r = r600_bytecode_add_alu(ctx->bc, &alu);
607 if (r)
608 return r;
609 }
610 return 0;
611 }
612
613 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
614 {
615 int i, r;
616 struct r600_bytecode_alu alu;
617
618 for (i = 0; i < 4; i++) {
619 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
620
621 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
622
623 alu.dst.sel = ctx->shader->input[input].gpr;
624 alu.dst.write = 1;
625
626 alu.dst.chan = i;
627
628 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
629 alu.src[0].chan = i;
630
631 if (i == 3)
632 alu.last = 1;
633 r = r600_bytecode_add_alu(ctx->bc, &alu);
634 if (r)
635 return r;
636 }
637 return 0;
638 }
639
640 /*
641 * Special export handling in shaders
642 *
643 * shader export ARRAY_BASE for EXPORT_POS:
644 * 60 is position
645 * 61 is misc vector
646 * 62, 63 are clip distance vectors
647 *
648 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
649 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
650 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
651 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
652 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
653 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
654 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
655 * exclusive from render target index)
656 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
657 *
658 *
659 * shader export ARRAY_BASE for EXPORT_PIXEL:
660 * 0-7 CB targets
661 * 61 computed Z vector
662 *
663 * The use of the values exported in the computed Z vector are controlled
664 * by DB_SHADER_CONTROL:
665 * Z_EXPORT_ENABLE - Z as a float in RED
666 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
667 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
668 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
669 * DB_SOURCE_FORMAT - export control restrictions
670 *
671 */
672
673
674 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
675 static int r600_spi_sid(struct r600_shader_io * io)
676 {
677 int index, name = io->name;
678
679 /* These params are handled differently, they don't need
680 * semantic indices, so we'll use 0 for them.
681 */
682 if (name == TGSI_SEMANTIC_POSITION ||
683 name == TGSI_SEMANTIC_PSIZE ||
684 name == TGSI_SEMANTIC_FACE)
685 index = 0;
686 else {
687 if (name == TGSI_SEMANTIC_GENERIC) {
688 /* For generic params simply use sid from tgsi */
689 index = io->sid;
690 } else {
691 /* For non-generic params - pack name and sid into 8 bits */
692 index = 0x80 | (name<<3) | (io->sid);
693 }
694
695 /* Make sure that all really used indices have nonzero value, so
696 * we can just compare it to 0 later instead of comparing the name
697 * with different values to detect special cases. */
698 index++;
699 }
700
701 return index;
702 };
703
704 /* turn input into interpolate on EG */
705 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
706 {
707 int r = 0;
708
709 if (ctx->shader->input[index].spi_sid) {
710 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
711 if (ctx->shader->input[index].interpolate > 0) {
712 r = evergreen_interp_alu(ctx, index);
713 } else {
714 r = evergreen_interp_flat(ctx, index);
715 }
716 }
717 return r;
718 }
719
720 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
721 {
722 struct r600_bytecode_alu alu;
723 int i, r;
724 int gpr_front = ctx->shader->input[front].gpr;
725 int gpr_back = ctx->shader->input[back].gpr;
726
727 for (i = 0; i < 4; i++) {
728 memset(&alu, 0, sizeof(alu));
729 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
730 alu.is_op3 = 1;
731 alu.dst.write = 1;
732 alu.dst.sel = gpr_front;
733 alu.src[0].sel = ctx->face_gpr;
734 alu.src[1].sel = gpr_front;
735 alu.src[2].sel = gpr_back;
736
737 alu.dst.chan = i;
738 alu.src[1].chan = i;
739 alu.src[2].chan = i;
740 alu.last = (i==3);
741
742 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
743 return r;
744 }
745
746 return 0;
747 }
748
749 static int tgsi_declaration(struct r600_shader_ctx *ctx)
750 {
751 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
752 unsigned i;
753 int r;
754
755 switch (d->Declaration.File) {
756 case TGSI_FILE_INPUT:
757 i = ctx->shader->ninput++;
758 ctx->shader->input[i].name = d->Semantic.Name;
759 ctx->shader->input[i].sid = d->Semantic.Index;
760 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
761 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
762 ctx->shader->input[i].centroid = d->Interp.Centroid;
763 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
764 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
765 switch (ctx->shader->input[i].name) {
766 case TGSI_SEMANTIC_FACE:
767 ctx->face_gpr = ctx->shader->input[i].gpr;
768 break;
769 case TGSI_SEMANTIC_COLOR:
770 ctx->colors_used++;
771 break;
772 case TGSI_SEMANTIC_POSITION:
773 ctx->fragcoord_input = i;
774 break;
775 }
776 if (ctx->bc->chip_class >= EVERGREEN) {
777 if ((r = evergreen_interp_input(ctx, i)))
778 return r;
779 }
780 }
781 break;
782 case TGSI_FILE_OUTPUT:
783 i = ctx->shader->noutput++;
784 ctx->shader->output[i].name = d->Semantic.Name;
785 ctx->shader->output[i].sid = d->Semantic.Index;
786 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
787 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
788 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
789 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
790 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
791 switch (d->Semantic.Name) {
792 case TGSI_SEMANTIC_CLIPDIST:
793 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
794 break;
795 case TGSI_SEMANTIC_PSIZE:
796 ctx->shader->vs_out_misc_write = 1;
797 ctx->shader->vs_out_point_size = 1;
798 break;
799 case TGSI_SEMANTIC_CLIPVERTEX:
800 ctx->clip_vertex_write = TRUE;
801 ctx->cv_output = i;
802 break;
803 }
804 }
805 break;
806 case TGSI_FILE_CONSTANT:
807 case TGSI_FILE_TEMPORARY:
808 case TGSI_FILE_SAMPLER:
809 case TGSI_FILE_ADDRESS:
810 break;
811
812 case TGSI_FILE_SYSTEM_VALUE:
813 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
814 if (!ctx->native_integers) {
815 struct r600_bytecode_alu alu;
816 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
817
818 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
819 alu.src[0].sel = 0;
820 alu.src[0].chan = 3;
821
822 alu.dst.sel = 0;
823 alu.dst.chan = 3;
824 alu.dst.write = 1;
825 alu.last = 1;
826
827 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
828 return r;
829 }
830 break;
831 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
832 break;
833 default:
834 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
835 return -EINVAL;
836 }
837 return 0;
838 }
839
840 static int r600_get_temp(struct r600_shader_ctx *ctx)
841 {
842 return ctx->temp_reg + ctx->max_driver_temp_used++;
843 }
844
845 /*
846 * for evergreen we need to scan the shader to find the number of GPRs we need to
847 * reserve for interpolation.
848 *
849 * we need to know if we are going to emit
850 * any centroid inputs
851 * if perspective and linear are required
852 */
853 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
854 {
855 int i;
856 int num_baryc;
857
858 ctx->input_linear = FALSE;
859 ctx->input_perspective = FALSE;
860 ctx->input_centroid = FALSE;
861 ctx->num_interp_gpr = 1;
862
863 /* any centroid inputs */
864 for (i = 0; i < ctx->info.num_inputs; i++) {
865 /* skip position/face */
866 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
867 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
868 continue;
869 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
870 ctx->input_linear = TRUE;
871 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
872 ctx->input_perspective = TRUE;
873 if (ctx->info.input_centroid[i])
874 ctx->input_centroid = TRUE;
875 }
876
877 num_baryc = 0;
878 /* ignoring sample for now */
879 if (ctx->input_perspective)
880 num_baryc++;
881 if (ctx->input_linear)
882 num_baryc++;
883 if (ctx->input_centroid)
884 num_baryc *= 2;
885
886 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
887
888 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
889 return ctx->num_interp_gpr;
890 }
891
892 static void tgsi_src(struct r600_shader_ctx *ctx,
893 const struct tgsi_full_src_register *tgsi_src,
894 struct r600_shader_src *r600_src)
895 {
896 memset(r600_src, 0, sizeof(*r600_src));
897 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
898 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
899 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
900 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
901 r600_src->neg = tgsi_src->Register.Negate;
902 r600_src->abs = tgsi_src->Register.Absolute;
903
904 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
905 int index;
906 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
907 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
908 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
909
910 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
911 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
912 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
913 return;
914 }
915 index = tgsi_src->Register.Index;
916 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
917 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
918 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
919 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
920 r600_src->swizzle[0] = 3;
921 r600_src->swizzle[1] = 3;
922 r600_src->swizzle[2] = 3;
923 r600_src->swizzle[3] = 3;
924 r600_src->sel = 0;
925 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
926 r600_src->swizzle[0] = 0;
927 r600_src->swizzle[1] = 0;
928 r600_src->swizzle[2] = 0;
929 r600_src->swizzle[3] = 0;
930 r600_src->sel = 0;
931 }
932 } else {
933 if (tgsi_src->Register.Indirect)
934 r600_src->rel = V_SQ_REL_RELATIVE;
935 r600_src->sel = tgsi_src->Register.Index;
936 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
937 }
938 }
939
940 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
941 {
942 struct r600_bytecode_vtx vtx;
943 unsigned int ar_reg;
944 int r;
945
946 if (offset) {
947 struct r600_bytecode_alu alu;
948
949 memset(&alu, 0, sizeof(alu));
950
951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
952 alu.src[0].sel = ctx->bc->ar_reg;
953
954 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
955 alu.src[1].value = offset;
956
957 alu.dst.sel = dst_reg;
958 alu.dst.write = 1;
959 alu.last = 1;
960
961 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
962 return r;
963
964 ar_reg = dst_reg;
965 } else {
966 ar_reg = ctx->bc->ar_reg;
967 }
968
969 memset(&vtx, 0, sizeof(vtx));
970 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
971 vtx.src_gpr = ar_reg;
972 vtx.mega_fetch_count = 16;
973 vtx.dst_gpr = dst_reg;
974 vtx.dst_sel_x = 0; /* SEL_X */
975 vtx.dst_sel_y = 1; /* SEL_Y */
976 vtx.dst_sel_z = 2; /* SEL_Z */
977 vtx.dst_sel_w = 3; /* SEL_W */
978 vtx.data_format = FMT_32_32_32_32_FLOAT;
979 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
980 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
981 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
982 vtx.endian = r600_endian_swap(32);
983
984 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
985 return r;
986
987 return 0;
988 }
989
990 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
991 {
992 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
993 struct r600_bytecode_alu alu;
994 int i, j, k, nconst, r;
995
996 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
997 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
998 nconst++;
999 }
1000 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1001 }
1002 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1003 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1004 continue;
1005 }
1006
1007 if (ctx->src[i].rel) {
1008 int treg = r600_get_temp(ctx);
1009 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1010 return r;
1011
1012 ctx->src[i].sel = treg;
1013 ctx->src[i].rel = 0;
1014 j--;
1015 } else if (j > 0) {
1016 int treg = r600_get_temp(ctx);
1017 for (k = 0; k < 4; k++) {
1018 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1020 alu.src[0].sel = ctx->src[i].sel;
1021 alu.src[0].chan = k;
1022 alu.src[0].rel = ctx->src[i].rel;
1023 alu.dst.sel = treg;
1024 alu.dst.chan = k;
1025 alu.dst.write = 1;
1026 if (k == 3)
1027 alu.last = 1;
1028 r = r600_bytecode_add_alu(ctx->bc, &alu);
1029 if (r)
1030 return r;
1031 }
1032 ctx->src[i].sel = treg;
1033 ctx->src[i].rel =0;
1034 j--;
1035 }
1036 }
1037 return 0;
1038 }
1039
1040 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1041 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1042 {
1043 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1044 struct r600_bytecode_alu alu;
1045 int i, j, k, nliteral, r;
1046
1047 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1048 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1049 nliteral++;
1050 }
1051 }
1052 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1053 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1054 int treg = r600_get_temp(ctx);
1055 for (k = 0; k < 4; k++) {
1056 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1057 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1058 alu.src[0].sel = ctx->src[i].sel;
1059 alu.src[0].chan = k;
1060 alu.src[0].value = ctx->src[i].value[k];
1061 alu.dst.sel = treg;
1062 alu.dst.chan = k;
1063 alu.dst.write = 1;
1064 if (k == 3)
1065 alu.last = 1;
1066 r = r600_bytecode_add_alu(ctx->bc, &alu);
1067 if (r)
1068 return r;
1069 }
1070 ctx->src[i].sel = treg;
1071 j--;
1072 }
1073 }
1074 return 0;
1075 }
1076
1077 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1078 {
1079 int i, r, count = ctx->shader->ninput;
1080
1081 /* additional inputs will be allocated right after the existing inputs,
1082 * we won't need them after the color selection, so we don't need to
1083 * reserve these gprs for the rest of the shader code and to adjust
1084 * output offsets etc. */
1085 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1086 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1087
1088 if (ctx->face_gpr == -1) {
1089 i = ctx->shader->ninput++;
1090 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1091 ctx->shader->input[i].spi_sid = 0;
1092 ctx->shader->input[i].gpr = gpr++;
1093 ctx->face_gpr = ctx->shader->input[i].gpr;
1094 }
1095
1096 for (i = 0; i < count; i++) {
1097 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1098 int ni = ctx->shader->ninput++;
1099 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1100 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1101 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1102 ctx->shader->input[ni].gpr = gpr++;
1103
1104 if (ctx->bc->chip_class >= EVERGREEN) {
1105 r = evergreen_interp_input(ctx, ni);
1106 if (r)
1107 return r;
1108 }
1109
1110 r = select_twoside_color(ctx, i, ni);
1111 if (r)
1112 return r;
1113 }
1114 }
1115 return 0;
1116 }
1117
1118 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1119 {
1120 struct r600_shader *shader = &pipeshader->shader;
1121 struct tgsi_token *tokens = pipeshader->tokens;
1122 struct pipe_stream_output_info so = pipeshader->so;
1123 struct tgsi_full_immediate *immediate;
1124 struct tgsi_full_property *property;
1125 struct r600_shader_ctx ctx;
1126 struct r600_bytecode_output output[32];
1127 unsigned output_done, noutput;
1128 unsigned opcode;
1129 int i, j, k, r = 0;
1130 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1131 /* Declarations used by llvm code */
1132 bool use_llvm = false;
1133 unsigned char * inst_bytes = NULL;
1134 unsigned inst_byte_count = 0;
1135
1136 #ifdef R600_USE_LLVM
1137 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1138 #endif
1139 ctx.bc = &shader->bc;
1140 ctx.shader = shader;
1141 ctx.native_integers = (rctx->screen->glsl_feature_level >= 130);
1142
1143 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1144 ctx.tokens = tokens;
1145 tgsi_scan_shader(tokens, &ctx.info);
1146 tgsi_parse_init(&ctx.parse, tokens);
1147 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1148 shader->processor_type = ctx.type;
1149 ctx.bc->type = shader->processor_type;
1150
1151 ctx.face_gpr = -1;
1152 ctx.fragcoord_input = -1;
1153 ctx.colors_used = 0;
1154 ctx.clip_vertex_write = 0;
1155
1156 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1157 shader->nr_cbufs = rctx->nr_cbufs;
1158
1159 /* register allocations */
1160 /* Values [0,127] correspond to GPR[0..127].
1161 * Values [128,159] correspond to constant buffer bank 0
1162 * Values [160,191] correspond to constant buffer bank 1
1163 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1164 * Values [256,287] correspond to constant buffer bank 2 (EG)
1165 * Values [288,319] correspond to constant buffer bank 3 (EG)
1166 * Other special values are shown in the list below.
1167 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1168 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1169 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1170 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1171 * 248 SQ_ALU_SRC_0: special constant 0.0.
1172 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1173 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1174 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1175 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1176 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1177 * 254 SQ_ALU_SRC_PV: previous vector result.
1178 * 255 SQ_ALU_SRC_PS: previous scalar result.
1179 */
1180 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1181 ctx.file_offset[i] = 0;
1182 }
1183 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1184 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1185 if (ctx.bc->chip_class >= EVERGREEN) {
1186 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1187 } else {
1188 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1189 }
1190 }
1191 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1192 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1193 }
1194
1195 /* LLVM backend setup */
1196 #ifdef R600_USE_LLVM
1197 if (use_llvm && ctx.info.indirect_files) {
1198 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1199 "indirect adressing. Falling back to TGSI "
1200 "backend.\n");
1201 use_llvm = 0;
1202 }
1203 if (use_llvm) {
1204 struct radeon_llvm_context radeon_llvm_ctx;
1205 LLVMModuleRef mod;
1206 unsigned dump = 0;
1207 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1208 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1209 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1210 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1211 dump = 1;
1212 }
1213 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1214 rctx->family, dump)) {
1215 FREE(inst_bytes);
1216 radeon_llvm_dispose(&radeon_llvm_ctx);
1217 use_llvm = 0;
1218 fprintf(stderr, "R600 LLVM backend failed to compile "
1219 "shader. Falling back to TGSI\n");
1220 } else {
1221 ctx.file_offset[TGSI_FILE_OUTPUT] =
1222 ctx.file_offset[TGSI_FILE_INPUT];
1223 }
1224 radeon_llvm_dispose(&radeon_llvm_ctx);
1225 }
1226 #endif
1227 /* End of LLVM backend setup */
1228
1229 if (!use_llvm) {
1230 ctx.file_offset[TGSI_FILE_OUTPUT] =
1231 ctx.file_offset[TGSI_FILE_INPUT] +
1232 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1233 }
1234 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1235 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1236
1237 /* Outside the GPR range. This will be translated to one of the
1238 * kcache banks later. */
1239 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1240
1241 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1242 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1243 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1244 ctx.temp_reg = ctx.bc->ar_reg + 1;
1245
1246 ctx.nliterals = 0;
1247 ctx.literals = NULL;
1248 shader->fs_write_all = FALSE;
1249 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1250 tgsi_parse_token(&ctx.parse);
1251 switch (ctx.parse.FullToken.Token.Type) {
1252 case TGSI_TOKEN_TYPE_IMMEDIATE:
1253 immediate = &ctx.parse.FullToken.FullImmediate;
1254 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1255 if(ctx.literals == NULL) {
1256 r = -ENOMEM;
1257 goto out_err;
1258 }
1259 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1260 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1261 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1262 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1263 ctx.nliterals++;
1264 break;
1265 case TGSI_TOKEN_TYPE_DECLARATION:
1266 r = tgsi_declaration(&ctx);
1267 if (r)
1268 goto out_err;
1269 break;
1270 case TGSI_TOKEN_TYPE_INSTRUCTION:
1271 break;
1272 case TGSI_TOKEN_TYPE_PROPERTY:
1273 property = &ctx.parse.FullToken.FullProperty;
1274 switch (property->Property.PropertyName) {
1275 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1276 if (property->u[0].Data == 1)
1277 shader->fs_write_all = TRUE;
1278 break;
1279 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1280 if (property->u[0].Data == 1)
1281 shader->vs_prohibit_ucps = TRUE;
1282 break;
1283 }
1284 break;
1285 default:
1286 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1287 r = -EINVAL;
1288 goto out_err;
1289 }
1290 }
1291
1292 if (ctx.fragcoord_input >= 0) {
1293 if (ctx.bc->chip_class == CAYMAN) {
1294 for (j = 0 ; j < 4; j++) {
1295 struct r600_bytecode_alu alu;
1296 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1297 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1298 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1299 alu.src[0].chan = 3;
1300
1301 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1302 alu.dst.chan = j;
1303 alu.dst.write = (j == 3);
1304 alu.last = 1;
1305 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1306 return r;
1307 }
1308 } else {
1309 struct r600_bytecode_alu alu;
1310 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1311 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1312 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1313 alu.src[0].chan = 3;
1314
1315 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1316 alu.dst.chan = 3;
1317 alu.dst.write = 1;
1318 alu.last = 1;
1319 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1320 return r;
1321 }
1322 }
1323
1324 if (shader->two_side && ctx.colors_used) {
1325 if ((r = process_twoside_color_inputs(&ctx)))
1326 return r;
1327 }
1328
1329 tgsi_parse_init(&ctx.parse, tokens);
1330 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1331 tgsi_parse_token(&ctx.parse);
1332 switch (ctx.parse.FullToken.Token.Type) {
1333 case TGSI_TOKEN_TYPE_INSTRUCTION:
1334 if (use_llvm) {
1335 continue;
1336 }
1337 r = tgsi_is_supported(&ctx);
1338 if (r)
1339 goto out_err;
1340 ctx.max_driver_temp_used = 0;
1341 /* reserve first tmp for everyone */
1342 r600_get_temp(&ctx);
1343
1344 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1345 if ((r = tgsi_split_constant(&ctx)))
1346 goto out_err;
1347 if ((r = tgsi_split_literal_constant(&ctx)))
1348 goto out_err;
1349 if (ctx.bc->chip_class == CAYMAN)
1350 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1351 else if (ctx.bc->chip_class >= EVERGREEN)
1352 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1353 else
1354 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1355 r = ctx.inst_info->process(&ctx);
1356 if (r)
1357 goto out_err;
1358 break;
1359 default:
1360 break;
1361 }
1362 }
1363
1364 /* Get instructions if we are using the LLVM backend. */
1365 if (use_llvm) {
1366 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1367 FREE(inst_bytes);
1368 }
1369
1370 noutput = shader->noutput;
1371
1372 if (ctx.clip_vertex_write) {
1373 /* need to convert a clipvertex write into clipdistance writes and not export
1374 the clip vertex anymore */
1375
1376 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1377 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1378 shader->output[noutput].gpr = ctx.temp_reg;
1379 noutput++;
1380 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1381 shader->output[noutput].gpr = ctx.temp_reg+1;
1382 noutput++;
1383
1384 /* reset spi_sid for clipvertex output to avoid confusing spi */
1385 shader->output[ctx.cv_output].spi_sid = 0;
1386
1387 shader->clip_dist_write = 0xFF;
1388
1389 for (i = 0; i < 8; i++) {
1390 int oreg = i >> 2;
1391 int ochan = i & 3;
1392
1393 for (j = 0; j < 4; j++) {
1394 struct r600_bytecode_alu alu;
1395 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1396 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1397 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1398 alu.src[0].chan = j;
1399
1400 alu.src[1].sel = 512 + i;
1401 alu.src[1].kc_bank = 1;
1402 alu.src[1].chan = j;
1403
1404 alu.dst.sel = ctx.temp_reg + oreg;
1405 alu.dst.chan = j;
1406 alu.dst.write = (j == ochan);
1407 if (j == 3)
1408 alu.last = 1;
1409 r = r600_bytecode_add_alu(ctx.bc, &alu);
1410 if (r)
1411 return r;
1412 }
1413 }
1414 }
1415
1416 /* Add stream outputs. */
1417 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1418 for (i = 0; i < so.num_outputs; i++) {
1419 struct r600_bytecode_output output;
1420
1421 if (so.output[i].output_buffer >= 4) {
1422 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1423 so.output[i].output_buffer);
1424 r = -EINVAL;
1425 goto out_err;
1426 }
1427 if (so.output[i].dst_offset < so.output[i].start_component) {
1428 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1429 r = -EINVAL;
1430 goto out_err;
1431 }
1432
1433 memset(&output, 0, sizeof(struct r600_bytecode_output));
1434 output.gpr = shader->output[so.output[i].register_index].gpr;
1435 output.elem_size = 0;
1436 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1437 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1438 output.burst_count = 1;
1439 output.barrier = 1;
1440 /* array_size is an upper limit for the burst_count
1441 * with MEM_STREAM instructions */
1442 output.array_size = 0xFFF;
1443 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1444 if (ctx.bc->chip_class >= EVERGREEN) {
1445 switch (so.output[i].output_buffer) {
1446 case 0:
1447 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1448 break;
1449 case 1:
1450 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1451 break;
1452 case 2:
1453 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1454 break;
1455 case 3:
1456 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1457 break;
1458 }
1459 } else {
1460 switch (so.output[i].output_buffer) {
1461 case 0:
1462 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1463 break;
1464 case 1:
1465 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1466 break;
1467 case 2:
1468 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1469 break;
1470 case 3:
1471 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1472 break;
1473 }
1474 }
1475 r = r600_bytecode_add_output(ctx.bc, &output);
1476 if (r)
1477 goto out_err;
1478 }
1479 }
1480
1481 /* export output */
1482 for (i = 0, j = 0; i < noutput; i++, j++) {
1483 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1484 output[j].gpr = shader->output[i].gpr;
1485 output[j].elem_size = 3;
1486 output[j].swizzle_x = 0;
1487 output[j].swizzle_y = 1;
1488 output[j].swizzle_z = 2;
1489 output[j].swizzle_w = 3;
1490 output[j].burst_count = 1;
1491 output[j].barrier = 1;
1492 output[j].type = -1;
1493 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1494 switch (ctx.type) {
1495 case TGSI_PROCESSOR_VERTEX:
1496 switch (shader->output[i].name) {
1497 case TGSI_SEMANTIC_POSITION:
1498 output[j].array_base = next_pos_base++;
1499 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1500 break;
1501
1502 case TGSI_SEMANTIC_PSIZE:
1503 output[j].array_base = next_pos_base++;
1504 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1505 break;
1506 case TGSI_SEMANTIC_CLIPVERTEX:
1507 j--;
1508 break;
1509 case TGSI_SEMANTIC_CLIPDIST:
1510 output[j].array_base = next_pos_base++;
1511 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1512 /* spi_sid is 0 for clipdistance outputs that were generated
1513 * for clipvertex - we don't need to pass them to PS */
1514 if (shader->output[i].spi_sid) {
1515 j++;
1516 /* duplicate it as PARAM to pass to the pixel shader */
1517 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1518 output[j].array_base = next_param_base++;
1519 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1520 }
1521 break;
1522 case TGSI_SEMANTIC_FOG:
1523 output[j].swizzle_y = 4; /* 0 */
1524 output[j].swizzle_z = 4; /* 0 */
1525 output[j].swizzle_w = 5; /* 1 */
1526 break;
1527 }
1528 break;
1529 case TGSI_PROCESSOR_FRAGMENT:
1530 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1531 output[j].array_base = next_pixel_base++;
1532 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1533 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1534 for (k = 1; k < shader->nr_cbufs; k++) {
1535 j++;
1536 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1537 output[j].gpr = shader->output[i].gpr;
1538 output[j].elem_size = 3;
1539 output[j].swizzle_x = 0;
1540 output[j].swizzle_y = 1;
1541 output[j].swizzle_z = 2;
1542 output[j].swizzle_w = 3;
1543 output[j].burst_count = 1;
1544 output[j].barrier = 1;
1545 output[j].array_base = next_pixel_base++;
1546 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1547 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1548 }
1549 }
1550 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1551 output[j].array_base = 61;
1552 output[j].swizzle_x = 2;
1553 output[j].swizzle_y = 7;
1554 output[j].swizzle_z = output[j].swizzle_w = 7;
1555 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1556 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1557 output[j].array_base = 61;
1558 output[j].swizzle_x = 7;
1559 output[j].swizzle_y = 1;
1560 output[j].swizzle_z = output[j].swizzle_w = 7;
1561 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1562 } else {
1563 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1564 r = -EINVAL;
1565 goto out_err;
1566 }
1567 break;
1568 default:
1569 R600_ERR("unsupported processor type %d\n", ctx.type);
1570 r = -EINVAL;
1571 goto out_err;
1572 }
1573
1574 if (output[j].type==-1) {
1575 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1576 output[j].array_base = next_param_base++;
1577 }
1578 }
1579
1580 /* add fake param output for vertex shader if no param is exported */
1581 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1582 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1583 output[j].gpr = 0;
1584 output[j].elem_size = 3;
1585 output[j].swizzle_x = 7;
1586 output[j].swizzle_y = 7;
1587 output[j].swizzle_z = 7;
1588 output[j].swizzle_w = 7;
1589 output[j].burst_count = 1;
1590 output[j].barrier = 1;
1591 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1592 output[j].array_base = 0;
1593 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1594 j++;
1595 }
1596
1597 /* add fake pixel export */
1598 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
1599 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1600 output[j].gpr = 0;
1601 output[j].elem_size = 3;
1602 output[j].swizzle_x = 7;
1603 output[j].swizzle_y = 7;
1604 output[j].swizzle_z = 7;
1605 output[j].swizzle_w = 7;
1606 output[j].burst_count = 1;
1607 output[j].barrier = 1;
1608 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1609 output[j].array_base = 0;
1610 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1611 j++;
1612 }
1613
1614 noutput = j;
1615
1616 /* set export done on last export of each type */
1617 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1618 if (ctx.bc->chip_class < CAYMAN) {
1619 if (i == (noutput - 1)) {
1620 output[i].end_of_program = 1;
1621 }
1622 }
1623 if (!(output_done & (1 << output[i].type))) {
1624 output_done |= (1 << output[i].type);
1625 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1626 }
1627 }
1628 /* add output to bytecode */
1629 for (i = 0; i < noutput; i++) {
1630 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1631 if (r)
1632 goto out_err;
1633 }
1634 /* add program end */
1635 if (ctx.bc->chip_class == CAYMAN)
1636 cm_bytecode_add_cf_end(ctx.bc);
1637
1638 /* check GPR limit - we have 124 = 128 - 4
1639 * (4 are reserved as alu clause temporary registers) */
1640 if (ctx.bc->ngpr > 124) {
1641 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1642 r = -ENOMEM;
1643 goto out_err;
1644 }
1645
1646 free(ctx.literals);
1647 tgsi_parse_free(&ctx.parse);
1648 return 0;
1649 out_err:
1650 free(ctx.literals);
1651 tgsi_parse_free(&ctx.parse);
1652 return r;
1653 }
1654
1655 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1656 {
1657 R600_ERR("%s tgsi opcode unsupported\n",
1658 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1659 return -EINVAL;
1660 }
1661
1662 static int tgsi_end(struct r600_shader_ctx *ctx)
1663 {
1664 return 0;
1665 }
1666
1667 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1668 const struct r600_shader_src *shader_src,
1669 unsigned chan)
1670 {
1671 bc_src->sel = shader_src->sel;
1672 bc_src->chan = shader_src->swizzle[chan];
1673 bc_src->neg = shader_src->neg;
1674 bc_src->abs = shader_src->abs;
1675 bc_src->rel = shader_src->rel;
1676 bc_src->value = shader_src->value[bc_src->chan];
1677 }
1678
1679 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1680 {
1681 bc_src->abs = 1;
1682 bc_src->neg = 0;
1683 }
1684
1685 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1686 {
1687 bc_src->neg = !bc_src->neg;
1688 }
1689
1690 static void tgsi_dst(struct r600_shader_ctx *ctx,
1691 const struct tgsi_full_dst_register *tgsi_dst,
1692 unsigned swizzle,
1693 struct r600_bytecode_alu_dst *r600_dst)
1694 {
1695 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1696
1697 r600_dst->sel = tgsi_dst->Register.Index;
1698 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1699 r600_dst->chan = swizzle;
1700 r600_dst->write = 1;
1701 if (tgsi_dst->Register.Indirect)
1702 r600_dst->rel = V_SQ_REL_RELATIVE;
1703 if (inst->Instruction.Saturate) {
1704 r600_dst->clamp = 1;
1705 }
1706 }
1707
1708 static int tgsi_last_instruction(unsigned writemask)
1709 {
1710 int i, lasti = 0;
1711
1712 for (i = 0; i < 4; i++) {
1713 if (writemask & (1 << i)) {
1714 lasti = i;
1715 }
1716 }
1717 return lasti;
1718 }
1719
1720 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1721 {
1722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1723 struct r600_bytecode_alu alu;
1724 int i, j, r;
1725 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1726
1727 for (i = 0; i < lasti + 1; i++) {
1728 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1729 continue;
1730
1731 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1732 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1733
1734 alu.inst = ctx->inst_info->r600_opcode;
1735 if (!swap) {
1736 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1737 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1738 }
1739 } else {
1740 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1741 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1742 }
1743 /* handle some special cases */
1744 switch (ctx->inst_info->tgsi_opcode) {
1745 case TGSI_OPCODE_SUB:
1746 r600_bytecode_src_toggle_neg(&alu.src[1]);
1747 break;
1748 case TGSI_OPCODE_ABS:
1749 r600_bytecode_src_set_abs(&alu.src[0]);
1750 break;
1751 default:
1752 break;
1753 }
1754 if (i == lasti || trans_only) {
1755 alu.last = 1;
1756 }
1757 r = r600_bytecode_add_alu(ctx->bc, &alu);
1758 if (r)
1759 return r;
1760 }
1761 return 0;
1762 }
1763
1764 static int tgsi_op2(struct r600_shader_ctx *ctx)
1765 {
1766 return tgsi_op2_s(ctx, 0, 0);
1767 }
1768
1769 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1770 {
1771 return tgsi_op2_s(ctx, 1, 0);
1772 }
1773
1774 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1775 {
1776 return tgsi_op2_s(ctx, 0, 1);
1777 }
1778
1779 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1780 {
1781 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1782 struct r600_bytecode_alu alu;
1783 int i, r;
1784 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1785
1786 for (i = 0; i < lasti + 1; i++) {
1787
1788 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1789 continue;
1790 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1791 alu.inst = ctx->inst_info->r600_opcode;
1792
1793 alu.src[0].sel = V_SQ_ALU_SRC_0;
1794
1795 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1796
1797 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1798
1799 if (i == lasti) {
1800 alu.last = 1;
1801 }
1802 r = r600_bytecode_add_alu(ctx->bc, &alu);
1803 if (r)
1804 return r;
1805 }
1806 return 0;
1807
1808 }
1809
1810 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1811 {
1812 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1813 int i, j, r;
1814 struct r600_bytecode_alu alu;
1815 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1816
1817 for (i = 0 ; i < last_slot; i++) {
1818 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1819 alu.inst = ctx->inst_info->r600_opcode;
1820 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1821 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1822 }
1823 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1824 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1825
1826 if (i == last_slot - 1)
1827 alu.last = 1;
1828 r = r600_bytecode_add_alu(ctx->bc, &alu);
1829 if (r)
1830 return r;
1831 }
1832 return 0;
1833 }
1834
1835 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1836 {
1837 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1838 int i, j, k, r;
1839 struct r600_bytecode_alu alu;
1840 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1841 for (k = 0; k < last_slot; k++) {
1842 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1843 continue;
1844
1845 for (i = 0 ; i < 4; i++) {
1846 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1847 alu.inst = ctx->inst_info->r600_opcode;
1848 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1849 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1850 }
1851 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1852 alu.dst.write = (i == k);
1853 if (i == 3)
1854 alu.last = 1;
1855 r = r600_bytecode_add_alu(ctx->bc, &alu);
1856 if (r)
1857 return r;
1858 }
1859 }
1860 return 0;
1861 }
1862
1863 /*
1864 * r600 - trunc to -PI..PI range
1865 * r700 - normalize by dividing by 2PI
1866 * see fdo bug 27901
1867 */
1868 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1869 {
1870 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1871 static float double_pi = 3.1415926535 * 2;
1872 static float neg_pi = -3.1415926535;
1873
1874 int r;
1875 struct r600_bytecode_alu alu;
1876
1877 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1878 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1879 alu.is_op3 = 1;
1880
1881 alu.dst.chan = 0;
1882 alu.dst.sel = ctx->temp_reg;
1883 alu.dst.write = 1;
1884
1885 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1886
1887 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1888 alu.src[1].chan = 0;
1889 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1890 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1891 alu.src[2].chan = 0;
1892 alu.last = 1;
1893 r = r600_bytecode_add_alu(ctx->bc, &alu);
1894 if (r)
1895 return r;
1896
1897 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1898 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1899
1900 alu.dst.chan = 0;
1901 alu.dst.sel = ctx->temp_reg;
1902 alu.dst.write = 1;
1903
1904 alu.src[0].sel = ctx->temp_reg;
1905 alu.src[0].chan = 0;
1906 alu.last = 1;
1907 r = r600_bytecode_add_alu(ctx->bc, &alu);
1908 if (r)
1909 return r;
1910
1911 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1912 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1913 alu.is_op3 = 1;
1914
1915 alu.dst.chan = 0;
1916 alu.dst.sel = ctx->temp_reg;
1917 alu.dst.write = 1;
1918
1919 alu.src[0].sel = ctx->temp_reg;
1920 alu.src[0].chan = 0;
1921
1922 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1923 alu.src[1].chan = 0;
1924 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1925 alu.src[2].chan = 0;
1926
1927 if (ctx->bc->chip_class == R600) {
1928 alu.src[1].value = *(uint32_t *)&double_pi;
1929 alu.src[2].value = *(uint32_t *)&neg_pi;
1930 } else {
1931 alu.src[1].sel = V_SQ_ALU_SRC_1;
1932 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1933 alu.src[2].neg = 1;
1934 }
1935
1936 alu.last = 1;
1937 r = r600_bytecode_add_alu(ctx->bc, &alu);
1938 if (r)
1939 return r;
1940 return 0;
1941 }
1942
1943 static int cayman_trig(struct r600_shader_ctx *ctx)
1944 {
1945 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1946 struct r600_bytecode_alu alu;
1947 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1948 int i, r;
1949
1950 r = tgsi_setup_trig(ctx);
1951 if (r)
1952 return r;
1953
1954
1955 for (i = 0; i < last_slot; i++) {
1956 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1957 alu.inst = ctx->inst_info->r600_opcode;
1958 alu.dst.chan = i;
1959
1960 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1961 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1962
1963 alu.src[0].sel = ctx->temp_reg;
1964 alu.src[0].chan = 0;
1965 if (i == last_slot - 1)
1966 alu.last = 1;
1967 r = r600_bytecode_add_alu(ctx->bc, &alu);
1968 if (r)
1969 return r;
1970 }
1971 return 0;
1972 }
1973
1974 static int tgsi_trig(struct r600_shader_ctx *ctx)
1975 {
1976 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1977 struct r600_bytecode_alu alu;
1978 int i, r;
1979 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1980
1981 r = tgsi_setup_trig(ctx);
1982 if (r)
1983 return r;
1984
1985 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1986 alu.inst = ctx->inst_info->r600_opcode;
1987 alu.dst.chan = 0;
1988 alu.dst.sel = ctx->temp_reg;
1989 alu.dst.write = 1;
1990
1991 alu.src[0].sel = ctx->temp_reg;
1992 alu.src[0].chan = 0;
1993 alu.last = 1;
1994 r = r600_bytecode_add_alu(ctx->bc, &alu);
1995 if (r)
1996 return r;
1997
1998 /* replicate result */
1999 for (i = 0; i < lasti + 1; i++) {
2000 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2001 continue;
2002
2003 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2004 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2005
2006 alu.src[0].sel = ctx->temp_reg;
2007 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2008 if (i == lasti)
2009 alu.last = 1;
2010 r = r600_bytecode_add_alu(ctx->bc, &alu);
2011 if (r)
2012 return r;
2013 }
2014 return 0;
2015 }
2016
2017 static int tgsi_scs(struct r600_shader_ctx *ctx)
2018 {
2019 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2020 struct r600_bytecode_alu alu;
2021 int i, r;
2022
2023 /* We'll only need the trig stuff if we are going to write to the
2024 * X or Y components of the destination vector.
2025 */
2026 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2027 r = tgsi_setup_trig(ctx);
2028 if (r)
2029 return r;
2030 }
2031
2032 /* dst.x = COS */
2033 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2034 if (ctx->bc->chip_class == CAYMAN) {
2035 for (i = 0 ; i < 3; i++) {
2036 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2037 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2038 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2039
2040 if (i == 0)
2041 alu.dst.write = 1;
2042 else
2043 alu.dst.write = 0;
2044 alu.src[0].sel = ctx->temp_reg;
2045 alu.src[0].chan = 0;
2046 if (i == 2)
2047 alu.last = 1;
2048 r = r600_bytecode_add_alu(ctx->bc, &alu);
2049 if (r)
2050 return r;
2051 }
2052 } else {
2053 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2054 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2055 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2056
2057 alu.src[0].sel = ctx->temp_reg;
2058 alu.src[0].chan = 0;
2059 alu.last = 1;
2060 r = r600_bytecode_add_alu(ctx->bc, &alu);
2061 if (r)
2062 return r;
2063 }
2064 }
2065
2066 /* dst.y = SIN */
2067 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2068 if (ctx->bc->chip_class == CAYMAN) {
2069 for (i = 0 ; i < 3; i++) {
2070 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2071 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2072 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2073 if (i == 1)
2074 alu.dst.write = 1;
2075 else
2076 alu.dst.write = 0;
2077 alu.src[0].sel = ctx->temp_reg;
2078 alu.src[0].chan = 0;
2079 if (i == 2)
2080 alu.last = 1;
2081 r = r600_bytecode_add_alu(ctx->bc, &alu);
2082 if (r)
2083 return r;
2084 }
2085 } else {
2086 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2087 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2088 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2089
2090 alu.src[0].sel = ctx->temp_reg;
2091 alu.src[0].chan = 0;
2092 alu.last = 1;
2093 r = r600_bytecode_add_alu(ctx->bc, &alu);
2094 if (r)
2095 return r;
2096 }
2097 }
2098
2099 /* dst.z = 0.0; */
2100 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2101 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2102
2103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2104
2105 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2106
2107 alu.src[0].sel = V_SQ_ALU_SRC_0;
2108 alu.src[0].chan = 0;
2109
2110 alu.last = 1;
2111
2112 r = r600_bytecode_add_alu(ctx->bc, &alu);
2113 if (r)
2114 return r;
2115 }
2116
2117 /* dst.w = 1.0; */
2118 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2119 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2120
2121 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2122
2123 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2124
2125 alu.src[0].sel = V_SQ_ALU_SRC_1;
2126 alu.src[0].chan = 0;
2127
2128 alu.last = 1;
2129
2130 r = r600_bytecode_add_alu(ctx->bc, &alu);
2131 if (r)
2132 return r;
2133 }
2134
2135 return 0;
2136 }
2137
2138 static int tgsi_kill(struct r600_shader_ctx *ctx)
2139 {
2140 struct r600_bytecode_alu alu;
2141 int i, r;
2142
2143 for (i = 0; i < 4; i++) {
2144 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2145 alu.inst = ctx->inst_info->r600_opcode;
2146
2147 alu.dst.chan = i;
2148
2149 alu.src[0].sel = V_SQ_ALU_SRC_0;
2150
2151 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2152 alu.src[1].sel = V_SQ_ALU_SRC_1;
2153 alu.src[1].neg = 1;
2154 } else {
2155 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2156 }
2157 if (i == 3) {
2158 alu.last = 1;
2159 }
2160 r = r600_bytecode_add_alu(ctx->bc, &alu);
2161 if (r)
2162 return r;
2163 }
2164
2165 /* kill must be last in ALU */
2166 ctx->bc->force_add_cf = 1;
2167 ctx->shader->uses_kill = TRUE;
2168 return 0;
2169 }
2170
2171 static int tgsi_lit(struct r600_shader_ctx *ctx)
2172 {
2173 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2174 struct r600_bytecode_alu alu;
2175 int r;
2176
2177 /* tmp.x = max(src.y, 0.0) */
2178 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2179 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2180 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2181 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2182 alu.src[1].chan = 1;
2183
2184 alu.dst.sel = ctx->temp_reg;
2185 alu.dst.chan = 0;
2186 alu.dst.write = 1;
2187
2188 alu.last = 1;
2189 r = r600_bytecode_add_alu(ctx->bc, &alu);
2190 if (r)
2191 return r;
2192
2193 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2194 {
2195 int chan;
2196 int sel;
2197 int i;
2198
2199 if (ctx->bc->chip_class == CAYMAN) {
2200 for (i = 0; i < 3; i++) {
2201 /* tmp.z = log(tmp.x) */
2202 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2204 alu.src[0].sel = ctx->temp_reg;
2205 alu.src[0].chan = 0;
2206 alu.dst.sel = ctx->temp_reg;
2207 alu.dst.chan = i;
2208 if (i == 2) {
2209 alu.dst.write = 1;
2210 alu.last = 1;
2211 } else
2212 alu.dst.write = 0;
2213
2214 r = r600_bytecode_add_alu(ctx->bc, &alu);
2215 if (r)
2216 return r;
2217 }
2218 } else {
2219 /* tmp.z = log(tmp.x) */
2220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2222 alu.src[0].sel = ctx->temp_reg;
2223 alu.src[0].chan = 0;
2224 alu.dst.sel = ctx->temp_reg;
2225 alu.dst.chan = 2;
2226 alu.dst.write = 1;
2227 alu.last = 1;
2228 r = r600_bytecode_add_alu(ctx->bc, &alu);
2229 if (r)
2230 return r;
2231 }
2232
2233 chan = alu.dst.chan;
2234 sel = alu.dst.sel;
2235
2236 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2237 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2238 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2239 alu.src[0].sel = sel;
2240 alu.src[0].chan = chan;
2241 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2242 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2243 alu.dst.sel = ctx->temp_reg;
2244 alu.dst.chan = 0;
2245 alu.dst.write = 1;
2246 alu.is_op3 = 1;
2247 alu.last = 1;
2248 r = r600_bytecode_add_alu(ctx->bc, &alu);
2249 if (r)
2250 return r;
2251
2252 if (ctx->bc->chip_class == CAYMAN) {
2253 for (i = 0; i < 3; i++) {
2254 /* dst.z = exp(tmp.x) */
2255 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2256 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2257 alu.src[0].sel = ctx->temp_reg;
2258 alu.src[0].chan = 0;
2259 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2260 if (i == 2) {
2261 alu.dst.write = 1;
2262 alu.last = 1;
2263 } else
2264 alu.dst.write = 0;
2265 r = r600_bytecode_add_alu(ctx->bc, &alu);
2266 if (r)
2267 return r;
2268 }
2269 } else {
2270 /* dst.z = exp(tmp.x) */
2271 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2273 alu.src[0].sel = ctx->temp_reg;
2274 alu.src[0].chan = 0;
2275 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2276 alu.last = 1;
2277 r = r600_bytecode_add_alu(ctx->bc, &alu);
2278 if (r)
2279 return r;
2280 }
2281 }
2282
2283 /* dst.x, <- 1.0 */
2284 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2286 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2287 alu.src[0].chan = 0;
2288 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2289 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2290 r = r600_bytecode_add_alu(ctx->bc, &alu);
2291 if (r)
2292 return r;
2293
2294 /* dst.y = max(src.x, 0.0) */
2295 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2297 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2298 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2299 alu.src[1].chan = 0;
2300 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2301 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2302 r = r600_bytecode_add_alu(ctx->bc, &alu);
2303 if (r)
2304 return r;
2305
2306 /* dst.w, <- 1.0 */
2307 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2309 alu.src[0].sel = V_SQ_ALU_SRC_1;
2310 alu.src[0].chan = 0;
2311 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2312 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2313 alu.last = 1;
2314 r = r600_bytecode_add_alu(ctx->bc, &alu);
2315 if (r)
2316 return r;
2317
2318 return 0;
2319 }
2320
2321 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2322 {
2323 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2324 struct r600_bytecode_alu alu;
2325 int i, r;
2326
2327 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2328
2329 /* XXX:
2330 * For state trackers other than OpenGL, we'll want to use
2331 * _RECIPSQRT_IEEE instead.
2332 */
2333 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2334
2335 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2336 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2337 r600_bytecode_src_set_abs(&alu.src[i]);
2338 }
2339 alu.dst.sel = ctx->temp_reg;
2340 alu.dst.write = 1;
2341 alu.last = 1;
2342 r = r600_bytecode_add_alu(ctx->bc, &alu);
2343 if (r)
2344 return r;
2345 /* replicate result */
2346 return tgsi_helper_tempx_replicate(ctx);
2347 }
2348
2349 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2350 {
2351 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2352 struct r600_bytecode_alu alu;
2353 int i, r;
2354
2355 for (i = 0; i < 4; i++) {
2356 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2357 alu.src[0].sel = ctx->temp_reg;
2358 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2359 alu.dst.chan = i;
2360 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2361 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2362 if (i == 3)
2363 alu.last = 1;
2364 r = r600_bytecode_add_alu(ctx->bc, &alu);
2365 if (r)
2366 return r;
2367 }
2368 return 0;
2369 }
2370
2371 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2372 {
2373 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2374 struct r600_bytecode_alu alu;
2375 int i, r;
2376
2377 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2378 alu.inst = ctx->inst_info->r600_opcode;
2379 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2380 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2381 }
2382 alu.dst.sel = ctx->temp_reg;
2383 alu.dst.write = 1;
2384 alu.last = 1;
2385 r = r600_bytecode_add_alu(ctx->bc, &alu);
2386 if (r)
2387 return r;
2388 /* replicate result */
2389 return tgsi_helper_tempx_replicate(ctx);
2390 }
2391
2392 static int cayman_pow(struct r600_shader_ctx *ctx)
2393 {
2394 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2395 int i, r;
2396 struct r600_bytecode_alu alu;
2397 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2398
2399 for (i = 0; i < 3; i++) {
2400 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2402 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2403 alu.dst.sel = ctx->temp_reg;
2404 alu.dst.chan = i;
2405 alu.dst.write = 1;
2406 if (i == 2)
2407 alu.last = 1;
2408 r = r600_bytecode_add_alu(ctx->bc, &alu);
2409 if (r)
2410 return r;
2411 }
2412
2413 /* b * LOG2(a) */
2414 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2416 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2417 alu.src[1].sel = ctx->temp_reg;
2418 alu.dst.sel = ctx->temp_reg;
2419 alu.dst.write = 1;
2420 alu.last = 1;
2421 r = r600_bytecode_add_alu(ctx->bc, &alu);
2422 if (r)
2423 return r;
2424
2425 for (i = 0; i < last_slot; i++) {
2426 /* POW(a,b) = EXP2(b * LOG2(a))*/
2427 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2428 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2429 alu.src[0].sel = ctx->temp_reg;
2430
2431 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2432 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2433 if (i == last_slot - 1)
2434 alu.last = 1;
2435 r = r600_bytecode_add_alu(ctx->bc, &alu);
2436 if (r)
2437 return r;
2438 }
2439 return 0;
2440 }
2441
2442 static int tgsi_pow(struct r600_shader_ctx *ctx)
2443 {
2444 struct r600_bytecode_alu alu;
2445 int r;
2446
2447 /* LOG2(a) */
2448 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2449 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2450 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2451 alu.dst.sel = ctx->temp_reg;
2452 alu.dst.write = 1;
2453 alu.last = 1;
2454 r = r600_bytecode_add_alu(ctx->bc, &alu);
2455 if (r)
2456 return r;
2457 /* b * LOG2(a) */
2458 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2460 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2461 alu.src[1].sel = ctx->temp_reg;
2462 alu.dst.sel = ctx->temp_reg;
2463 alu.dst.write = 1;
2464 alu.last = 1;
2465 r = r600_bytecode_add_alu(ctx->bc, &alu);
2466 if (r)
2467 return r;
2468 /* POW(a,b) = EXP2(b * LOG2(a))*/
2469 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2471 alu.src[0].sel = ctx->temp_reg;
2472 alu.dst.sel = ctx->temp_reg;
2473 alu.dst.write = 1;
2474 alu.last = 1;
2475 r = r600_bytecode_add_alu(ctx->bc, &alu);
2476 if (r)
2477 return r;
2478 return tgsi_helper_tempx_replicate(ctx);
2479 }
2480
2481 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2482 {
2483 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2484 struct r600_bytecode_alu alu;
2485 int i, r, j;
2486 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2487 int tmp0 = ctx->temp_reg;
2488 int tmp1 = r600_get_temp(ctx);
2489 int tmp2 = r600_get_temp(ctx);
2490 int tmp3 = r600_get_temp(ctx);
2491 /* Unsigned path:
2492 *
2493 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2494 *
2495 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2496 * 2. tmp0.z = lo (tmp0.x * src2)
2497 * 3. tmp0.w = -tmp0.z
2498 * 4. tmp0.y = hi (tmp0.x * src2)
2499 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2500 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2501 * 7. tmp1.x = tmp0.x - tmp0.w
2502 * 8. tmp1.y = tmp0.x + tmp0.w
2503 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2504 * 10. tmp0.z = hi(tmp0.x * src1) = q
2505 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2506 *
2507 * 12. tmp0.w = src1 - tmp0.y = r
2508 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2509 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2510 *
2511 * if DIV
2512 *
2513 * 15. tmp1.z = tmp0.z + 1 = q + 1
2514 * 16. tmp1.w = tmp0.z - 1 = q - 1
2515 *
2516 * else MOD
2517 *
2518 * 15. tmp1.z = tmp0.w - src2 = r - src2
2519 * 16. tmp1.w = tmp0.w + src2 = r + src2
2520 *
2521 * endif
2522 *
2523 * 17. tmp1.x = tmp1.x & tmp1.y
2524 *
2525 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2526 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2527 *
2528 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2529 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2530 *
2531 * Signed path:
2532 *
2533 * Same as unsigned, using abs values of the operands,
2534 * and fixing the sign of the result in the end.
2535 */
2536
2537 for (i = 0; i < 4; i++) {
2538 if (!(write_mask & (1<<i)))
2539 continue;
2540
2541 if (signed_op) {
2542
2543 /* tmp2.x = -src0 */
2544 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2545 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2546
2547 alu.dst.sel = tmp2;
2548 alu.dst.chan = 0;
2549 alu.dst.write = 1;
2550
2551 alu.src[0].sel = V_SQ_ALU_SRC_0;
2552
2553 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2554
2555 alu.last = 1;
2556 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2557 return r;
2558
2559 /* tmp2.y = -src1 */
2560 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2562
2563 alu.dst.sel = tmp2;
2564 alu.dst.chan = 1;
2565 alu.dst.write = 1;
2566
2567 alu.src[0].sel = V_SQ_ALU_SRC_0;
2568
2569 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2570
2571 alu.last = 1;
2572 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2573 return r;
2574
2575 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2576 /* it will be a sign of the quotient */
2577 if (!mod) {
2578
2579 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2580 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2581
2582 alu.dst.sel = tmp2;
2583 alu.dst.chan = 2;
2584 alu.dst.write = 1;
2585
2586 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2587 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2588
2589 alu.last = 1;
2590 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2591 return r;
2592 }
2593
2594 /* tmp2.x = |src0| */
2595 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2597 alu.is_op3 = 1;
2598
2599 alu.dst.sel = tmp2;
2600 alu.dst.chan = 0;
2601 alu.dst.write = 1;
2602
2603 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2604 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2605 alu.src[2].sel = tmp2;
2606 alu.src[2].chan = 0;
2607
2608 alu.last = 1;
2609 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2610 return r;
2611
2612 /* tmp2.y = |src1| */
2613 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2615 alu.is_op3 = 1;
2616
2617 alu.dst.sel = tmp2;
2618 alu.dst.chan = 1;
2619 alu.dst.write = 1;
2620
2621 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2622 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2623 alu.src[2].sel = tmp2;
2624 alu.src[2].chan = 1;
2625
2626 alu.last = 1;
2627 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2628 return r;
2629
2630 }
2631
2632 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2633 if (ctx->bc->chip_class == CAYMAN) {
2634 /* tmp3.x = u2f(src2) */
2635 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2636 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2637
2638 alu.dst.sel = tmp3;
2639 alu.dst.chan = 0;
2640 alu.dst.write = 1;
2641
2642 if (signed_op) {
2643 alu.src[0].sel = tmp2;
2644 alu.src[0].chan = 1;
2645 } else {
2646 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2647 }
2648
2649 alu.last = 1;
2650 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2651 return r;
2652
2653 /* tmp0.x = recip(tmp3.x) */
2654 for (j = 0 ; j < 3; j++) {
2655 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2656 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2657
2658 alu.dst.sel = tmp0;
2659 alu.dst.chan = j;
2660 alu.dst.write = (j == 0);
2661
2662 alu.src[0].sel = tmp3;
2663 alu.src[0].chan = 0;
2664
2665 if (j == 2)
2666 alu.last = 1;
2667 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2668 return r;
2669 }
2670
2671 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2673
2674 alu.src[0].sel = tmp0;
2675 alu.src[0].chan = 0;
2676
2677 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2678 alu.src[1].value = 0x4f800000;
2679
2680 alu.dst.sel = tmp3;
2681 alu.dst.write = 1;
2682 alu.last = 1;
2683 r = r600_bytecode_add_alu(ctx->bc, &alu);
2684 if (r)
2685 return r;
2686
2687 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2689
2690 alu.dst.sel = tmp0;
2691 alu.dst.chan = 0;
2692 alu.dst.write = 1;
2693
2694 alu.src[0].sel = tmp3;
2695 alu.src[0].chan = 0;
2696
2697 alu.last = 1;
2698 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2699 return r;
2700
2701 } else {
2702 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2704
2705 alu.dst.sel = tmp0;
2706 alu.dst.chan = 0;
2707 alu.dst.write = 1;
2708
2709 if (signed_op) {
2710 alu.src[0].sel = tmp2;
2711 alu.src[0].chan = 1;
2712 } else {
2713 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2714 }
2715
2716 alu.last = 1;
2717 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2718 return r;
2719 }
2720
2721 /* 2. tmp0.z = lo (tmp0.x * src2) */
2722 if (ctx->bc->chip_class == CAYMAN) {
2723 for (j = 0 ; j < 4; j++) {
2724 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2725 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2726
2727 alu.dst.sel = tmp0;
2728 alu.dst.chan = j;
2729 alu.dst.write = (j == 2);
2730
2731 alu.src[0].sel = tmp0;
2732 alu.src[0].chan = 0;
2733 if (signed_op) {
2734 alu.src[1].sel = tmp2;
2735 alu.src[1].chan = 1;
2736 } else {
2737 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2738 }
2739
2740 alu.last = (j == 3);
2741 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2742 return r;
2743 }
2744 } else {
2745 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2746 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2747
2748 alu.dst.sel = tmp0;
2749 alu.dst.chan = 2;
2750 alu.dst.write = 1;
2751
2752 alu.src[0].sel = tmp0;
2753 alu.src[0].chan = 0;
2754 if (signed_op) {
2755 alu.src[1].sel = tmp2;
2756 alu.src[1].chan = 1;
2757 } else {
2758 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2759 }
2760
2761 alu.last = 1;
2762 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2763 return r;
2764 }
2765
2766 /* 3. tmp0.w = -tmp0.z */
2767 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2768 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2769
2770 alu.dst.sel = tmp0;
2771 alu.dst.chan = 3;
2772 alu.dst.write = 1;
2773
2774 alu.src[0].sel = V_SQ_ALU_SRC_0;
2775 alu.src[1].sel = tmp0;
2776 alu.src[1].chan = 2;
2777
2778 alu.last = 1;
2779 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2780 return r;
2781
2782 /* 4. tmp0.y = hi (tmp0.x * src2) */
2783 if (ctx->bc->chip_class == CAYMAN) {
2784 for (j = 0 ; j < 4; j++) {
2785 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2786 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2787
2788 alu.dst.sel = tmp0;
2789 alu.dst.chan = j;
2790 alu.dst.write = (j == 1);
2791
2792 alu.src[0].sel = tmp0;
2793 alu.src[0].chan = 0;
2794
2795 if (signed_op) {
2796 alu.src[1].sel = tmp2;
2797 alu.src[1].chan = 1;
2798 } else {
2799 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2800 }
2801 alu.last = (j == 3);
2802 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2803 return r;
2804 }
2805 } else {
2806 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2808
2809 alu.dst.sel = tmp0;
2810 alu.dst.chan = 1;
2811 alu.dst.write = 1;
2812
2813 alu.src[0].sel = tmp0;
2814 alu.src[0].chan = 0;
2815
2816 if (signed_op) {
2817 alu.src[1].sel = tmp2;
2818 alu.src[1].chan = 1;
2819 } else {
2820 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2821 }
2822
2823 alu.last = 1;
2824 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2825 return r;
2826 }
2827
2828 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2829 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2830 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2831 alu.is_op3 = 1;
2832
2833 alu.dst.sel = tmp0;
2834 alu.dst.chan = 2;
2835 alu.dst.write = 1;
2836
2837 alu.src[0].sel = tmp0;
2838 alu.src[0].chan = 1;
2839 alu.src[1].sel = tmp0;
2840 alu.src[1].chan = 3;
2841 alu.src[2].sel = tmp0;
2842 alu.src[2].chan = 2;
2843
2844 alu.last = 1;
2845 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2846 return r;
2847
2848 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2849 if (ctx->bc->chip_class == CAYMAN) {
2850 for (j = 0 ; j < 4; j++) {
2851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2853
2854 alu.dst.sel = tmp0;
2855 alu.dst.chan = j;
2856 alu.dst.write = (j == 3);
2857
2858 alu.src[0].sel = tmp0;
2859 alu.src[0].chan = 2;
2860
2861 alu.src[1].sel = tmp0;
2862 alu.src[1].chan = 0;
2863
2864 alu.last = (j == 3);
2865 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2866 return r;
2867 }
2868 } else {
2869 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2870 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2871
2872 alu.dst.sel = tmp0;
2873 alu.dst.chan = 3;
2874 alu.dst.write = 1;
2875
2876 alu.src[0].sel = tmp0;
2877 alu.src[0].chan = 2;
2878
2879 alu.src[1].sel = tmp0;
2880 alu.src[1].chan = 0;
2881
2882 alu.last = 1;
2883 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2884 return r;
2885 }
2886
2887 /* 7. tmp1.x = tmp0.x - tmp0.w */
2888 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2889 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2890
2891 alu.dst.sel = tmp1;
2892 alu.dst.chan = 0;
2893 alu.dst.write = 1;
2894
2895 alu.src[0].sel = tmp0;
2896 alu.src[0].chan = 0;
2897 alu.src[1].sel = tmp0;
2898 alu.src[1].chan = 3;
2899
2900 alu.last = 1;
2901 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2902 return r;
2903
2904 /* 8. tmp1.y = tmp0.x + tmp0.w */
2905 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2906 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2907
2908 alu.dst.sel = tmp1;
2909 alu.dst.chan = 1;
2910 alu.dst.write = 1;
2911
2912 alu.src[0].sel = tmp0;
2913 alu.src[0].chan = 0;
2914 alu.src[1].sel = tmp0;
2915 alu.src[1].chan = 3;
2916
2917 alu.last = 1;
2918 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2919 return r;
2920
2921 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2922 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2923 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2924 alu.is_op3 = 1;
2925
2926 alu.dst.sel = tmp0;
2927 alu.dst.chan = 0;
2928 alu.dst.write = 1;
2929
2930 alu.src[0].sel = tmp0;
2931 alu.src[0].chan = 1;
2932 alu.src[1].sel = tmp1;
2933 alu.src[1].chan = 1;
2934 alu.src[2].sel = tmp1;
2935 alu.src[2].chan = 0;
2936
2937 alu.last = 1;
2938 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2939 return r;
2940
2941 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
2942 if (ctx->bc->chip_class == CAYMAN) {
2943 for (j = 0 ; j < 4; j++) {
2944 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2945 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2946
2947 alu.dst.sel = tmp0;
2948 alu.dst.chan = j;
2949 alu.dst.write = (j == 2);
2950
2951 alu.src[0].sel = tmp0;
2952 alu.src[0].chan = 0;
2953
2954 if (signed_op) {
2955 alu.src[1].sel = tmp2;
2956 alu.src[1].chan = 0;
2957 } else {
2958 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2959 }
2960
2961 alu.last = (j == 3);
2962 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2963 return r;
2964 }
2965 } else {
2966 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2967 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2968
2969 alu.dst.sel = tmp0;
2970 alu.dst.chan = 2;
2971 alu.dst.write = 1;
2972
2973 alu.src[0].sel = tmp0;
2974 alu.src[0].chan = 0;
2975
2976 if (signed_op) {
2977 alu.src[1].sel = tmp2;
2978 alu.src[1].chan = 0;
2979 } else {
2980 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2981 }
2982
2983 alu.last = 1;
2984 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2985 return r;
2986 }
2987
2988 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
2989 if (ctx->bc->chip_class == CAYMAN) {
2990 for (j = 0 ; j < 4; j++) {
2991 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2993
2994 alu.dst.sel = tmp0;
2995 alu.dst.chan = j;
2996 alu.dst.write = (j == 1);
2997
2998 if (signed_op) {
2999 alu.src[0].sel = tmp2;
3000 alu.src[0].chan = 1;
3001 } else {
3002 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3003 }
3004
3005 alu.src[1].sel = tmp0;
3006 alu.src[1].chan = 2;
3007
3008 alu.last = (j == 3);
3009 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3010 return r;
3011 }
3012 } else {
3013 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3014 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3015
3016 alu.dst.sel = tmp0;
3017 alu.dst.chan = 1;
3018 alu.dst.write = 1;
3019
3020 if (signed_op) {
3021 alu.src[0].sel = tmp2;
3022 alu.src[0].chan = 1;
3023 } else {
3024 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3025 }
3026
3027 alu.src[1].sel = tmp0;
3028 alu.src[1].chan = 2;
3029
3030 alu.last = 1;
3031 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3032 return r;
3033 }
3034
3035 /* 12. tmp0.w = src1 - tmp0.y = r */
3036 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3037 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3038
3039 alu.dst.sel = tmp0;
3040 alu.dst.chan = 3;
3041 alu.dst.write = 1;
3042
3043 if (signed_op) {
3044 alu.src[0].sel = tmp2;
3045 alu.src[0].chan = 0;
3046 } else {
3047 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3048 }
3049
3050 alu.src[1].sel = tmp0;
3051 alu.src[1].chan = 1;
3052
3053 alu.last = 1;
3054 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3055 return r;
3056
3057 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3058 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3059 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3060
3061 alu.dst.sel = tmp1;
3062 alu.dst.chan = 0;
3063 alu.dst.write = 1;
3064
3065 alu.src[0].sel = tmp0;
3066 alu.src[0].chan = 3;
3067 if (signed_op) {
3068 alu.src[1].sel = tmp2;
3069 alu.src[1].chan = 1;
3070 } else {
3071 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3072 }
3073
3074 alu.last = 1;
3075 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3076 return r;
3077
3078 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3079 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3080 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3081
3082 alu.dst.sel = tmp1;
3083 alu.dst.chan = 1;
3084 alu.dst.write = 1;
3085
3086 if (signed_op) {
3087 alu.src[0].sel = tmp2;
3088 alu.src[0].chan = 0;
3089 } else {
3090 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3091 }
3092
3093 alu.src[1].sel = tmp0;
3094 alu.src[1].chan = 1;
3095
3096 alu.last = 1;
3097 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3098 return r;
3099
3100 if (mod) { /* UMOD */
3101
3102 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3103 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3104 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3105
3106 alu.dst.sel = tmp1;
3107 alu.dst.chan = 2;
3108 alu.dst.write = 1;
3109
3110 alu.src[0].sel = tmp0;
3111 alu.src[0].chan = 3;
3112
3113 if (signed_op) {
3114 alu.src[1].sel = tmp2;
3115 alu.src[1].chan = 1;
3116 } else {
3117 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3118 }
3119
3120 alu.last = 1;
3121 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3122 return r;
3123
3124 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3125 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3126 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3127
3128 alu.dst.sel = tmp1;
3129 alu.dst.chan = 3;
3130 alu.dst.write = 1;
3131
3132 alu.src[0].sel = tmp0;
3133 alu.src[0].chan = 3;
3134 if (signed_op) {
3135 alu.src[1].sel = tmp2;
3136 alu.src[1].chan = 1;
3137 } else {
3138 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3139 }
3140
3141 alu.last = 1;
3142 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3143 return r;
3144
3145 } else { /* UDIV */
3146
3147 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3148 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3150
3151 alu.dst.sel = tmp1;
3152 alu.dst.chan = 2;
3153 alu.dst.write = 1;
3154
3155 alu.src[0].sel = tmp0;
3156 alu.src[0].chan = 2;
3157 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3158
3159 alu.last = 1;
3160 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3161 return r;
3162
3163 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3164 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3166
3167 alu.dst.sel = tmp1;
3168 alu.dst.chan = 3;
3169 alu.dst.write = 1;
3170
3171 alu.src[0].sel = tmp0;
3172 alu.src[0].chan = 2;
3173 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3174
3175 alu.last = 1;
3176 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3177 return r;
3178
3179 }
3180
3181 /* 17. tmp1.x = tmp1.x & tmp1.y */
3182 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3183 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3184
3185 alu.dst.sel = tmp1;
3186 alu.dst.chan = 0;
3187 alu.dst.write = 1;
3188
3189 alu.src[0].sel = tmp1;
3190 alu.src[0].chan = 0;
3191 alu.src[1].sel = tmp1;
3192 alu.src[1].chan = 1;
3193
3194 alu.last = 1;
3195 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3196 return r;
3197
3198 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3199 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3200 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3202 alu.is_op3 = 1;
3203
3204 alu.dst.sel = tmp0;
3205 alu.dst.chan = 2;
3206 alu.dst.write = 1;
3207
3208 alu.src[0].sel = tmp1;
3209 alu.src[0].chan = 0;
3210 alu.src[1].sel = tmp0;
3211 alu.src[1].chan = mod ? 3 : 2;
3212 alu.src[2].sel = tmp1;
3213 alu.src[2].chan = 2;
3214
3215 alu.last = 1;
3216 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3217 return r;
3218
3219 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3222 alu.is_op3 = 1;
3223
3224 if (signed_op) {
3225 alu.dst.sel = tmp0;
3226 alu.dst.chan = 2;
3227 alu.dst.write = 1;
3228 } else {
3229 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3230 }
3231
3232 alu.src[0].sel = tmp1;
3233 alu.src[0].chan = 1;
3234 alu.src[1].sel = tmp1;
3235 alu.src[1].chan = 3;
3236 alu.src[2].sel = tmp0;
3237 alu.src[2].chan = 2;
3238
3239 alu.last = 1;
3240 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3241 return r;
3242
3243 if (signed_op) {
3244
3245 /* fix the sign of the result */
3246
3247 if (mod) {
3248
3249 /* tmp0.x = -tmp0.z */
3250 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3251 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3252
3253 alu.dst.sel = tmp0;
3254 alu.dst.chan = 0;
3255 alu.dst.write = 1;
3256
3257 alu.src[0].sel = V_SQ_ALU_SRC_0;
3258 alu.src[1].sel = tmp0;
3259 alu.src[1].chan = 2;
3260
3261 alu.last = 1;
3262 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3263 return r;
3264
3265 /* sign of the remainder is the same as the sign of src0 */
3266 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3267 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3269 alu.is_op3 = 1;
3270
3271 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3272
3273 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3274 alu.src[1].sel = tmp0;
3275 alu.src[1].chan = 2;
3276 alu.src[2].sel = tmp0;
3277 alu.src[2].chan = 0;
3278
3279 alu.last = 1;
3280 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3281 return r;
3282
3283 } else {
3284
3285 /* tmp0.x = -tmp0.z */
3286 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3287 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3288
3289 alu.dst.sel = tmp0;
3290 alu.dst.chan = 0;
3291 alu.dst.write = 1;
3292
3293 alu.src[0].sel = V_SQ_ALU_SRC_0;
3294 alu.src[1].sel = tmp0;
3295 alu.src[1].chan = 2;
3296
3297 alu.last = 1;
3298 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3299 return r;
3300
3301 /* fix the quotient sign (same as the sign of src0*src1) */
3302 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3303 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3304 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3305 alu.is_op3 = 1;
3306
3307 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3308
3309 alu.src[0].sel = tmp2;
3310 alu.src[0].chan = 2;
3311 alu.src[1].sel = tmp0;
3312 alu.src[1].chan = 2;
3313 alu.src[2].sel = tmp0;
3314 alu.src[2].chan = 0;
3315
3316 alu.last = 1;
3317 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3318 return r;
3319 }
3320 }
3321 }
3322 return 0;
3323 }
3324
3325 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3326 {
3327 return tgsi_divmod(ctx, 0, 0);
3328 }
3329
3330 static int tgsi_umod(struct r600_shader_ctx *ctx)
3331 {
3332 return tgsi_divmod(ctx, 1, 0);
3333 }
3334
3335 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3336 {
3337 return tgsi_divmod(ctx, 0, 1);
3338 }
3339
3340 static int tgsi_imod(struct r600_shader_ctx *ctx)
3341 {
3342 return tgsi_divmod(ctx, 1, 1);
3343 }
3344
3345
3346 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3347 {
3348 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3349 struct r600_bytecode_alu alu;
3350 int i, r;
3351 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3352 int last_inst = tgsi_last_instruction(write_mask);
3353
3354 for (i = 0; i < 4; i++) {
3355 if (!(write_mask & (1<<i)))
3356 continue;
3357
3358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3360
3361 alu.dst.sel = ctx->temp_reg;
3362 alu.dst.chan = i;
3363 alu.dst.write = 1;
3364
3365 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3366 if (i == last_inst)
3367 alu.last = 1;
3368 r = r600_bytecode_add_alu(ctx->bc, &alu);
3369 if (r)
3370 return r;
3371 }
3372
3373 for (i = 0; i < 4; i++) {
3374 if (!(write_mask & (1<<i)))
3375 continue;
3376
3377 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3378 alu.inst = ctx->inst_info->r600_opcode;
3379
3380 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3381
3382 alu.src[0].sel = ctx->temp_reg;
3383 alu.src[0].chan = i;
3384
3385 if (i == last_inst)
3386 alu.last = 1;
3387 r = r600_bytecode_add_alu(ctx->bc, &alu);
3388 if (r)
3389 return r;
3390 }
3391
3392 return 0;
3393 }
3394
3395 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3396 {
3397 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3398 struct r600_bytecode_alu alu;
3399 int i, r;
3400 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3401 int last_inst = tgsi_last_instruction(write_mask);
3402
3403 /* tmp = -src */
3404 for (i = 0; i < 4; i++) {
3405 if (!(write_mask & (1<<i)))
3406 continue;
3407
3408 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3409 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3410
3411 alu.dst.sel = ctx->temp_reg;
3412 alu.dst.chan = i;
3413 alu.dst.write = 1;
3414
3415 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3416 alu.src[0].sel = V_SQ_ALU_SRC_0;
3417
3418 if (i == last_inst)
3419 alu.last = 1;
3420 r = r600_bytecode_add_alu(ctx->bc, &alu);
3421 if (r)
3422 return r;
3423 }
3424
3425 /* dst = (src >= 0 ? src : tmp) */
3426 for (i = 0; i < 4; i++) {
3427 if (!(write_mask & (1<<i)))
3428 continue;
3429
3430 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3432 alu.is_op3 = 1;
3433 alu.dst.write = 1;
3434
3435 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3436
3437 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3438 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3439 alu.src[2].sel = ctx->temp_reg;
3440 alu.src[2].chan = i;
3441
3442 if (i == last_inst)
3443 alu.last = 1;
3444 r = r600_bytecode_add_alu(ctx->bc, &alu);
3445 if (r)
3446 return r;
3447 }
3448 return 0;
3449 }
3450
3451 static int tgsi_issg(struct r600_shader_ctx *ctx)
3452 {
3453 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3454 struct r600_bytecode_alu alu;
3455 int i, r;
3456 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3457 int last_inst = tgsi_last_instruction(write_mask);
3458
3459 /* tmp = (src >= 0 ? src : -1) */
3460 for (i = 0; i < 4; i++) {
3461 if (!(write_mask & (1<<i)))
3462 continue;
3463
3464 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3466 alu.is_op3 = 1;
3467
3468 alu.dst.sel = ctx->temp_reg;
3469 alu.dst.chan = i;
3470 alu.dst.write = 1;
3471
3472 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3473 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3474 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3475
3476 if (i == last_inst)
3477 alu.last = 1;
3478 r = r600_bytecode_add_alu(ctx->bc, &alu);
3479 if (r)
3480 return r;
3481 }
3482
3483 /* dst = (tmp > 0 ? 1 : tmp) */
3484 for (i = 0; i < 4; i++) {
3485 if (!(write_mask & (1<<i)))
3486 continue;
3487
3488 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3489 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3490 alu.is_op3 = 1;
3491 alu.dst.write = 1;
3492
3493 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3494
3495 alu.src[0].sel = ctx->temp_reg;
3496 alu.src[0].chan = i;
3497
3498 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3499
3500 alu.src[2].sel = ctx->temp_reg;
3501 alu.src[2].chan = i;
3502
3503 if (i == last_inst)
3504 alu.last = 1;
3505 r = r600_bytecode_add_alu(ctx->bc, &alu);
3506 if (r)
3507 return r;
3508 }
3509 return 0;
3510 }
3511
3512
3513
3514 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3515 {
3516 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3517 struct r600_bytecode_alu alu;
3518 int i, r;
3519
3520 /* tmp = (src > 0 ? 1 : src) */
3521 for (i = 0; i < 4; i++) {
3522 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3524 alu.is_op3 = 1;
3525
3526 alu.dst.sel = ctx->temp_reg;
3527 alu.dst.chan = i;
3528
3529 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3530 alu.src[1].sel = V_SQ_ALU_SRC_1;
3531 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3532
3533 if (i == 3)
3534 alu.last = 1;
3535 r = r600_bytecode_add_alu(ctx->bc, &alu);
3536 if (r)
3537 return r;
3538 }
3539
3540 /* dst = (-tmp > 0 ? -1 : tmp) */
3541 for (i = 0; i < 4; i++) {
3542 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3543 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3544 alu.is_op3 = 1;
3545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3546
3547 alu.src[0].sel = ctx->temp_reg;
3548 alu.src[0].chan = i;
3549 alu.src[0].neg = 1;
3550
3551 alu.src[1].sel = V_SQ_ALU_SRC_1;
3552 alu.src[1].neg = 1;
3553
3554 alu.src[2].sel = ctx->temp_reg;
3555 alu.src[2].chan = i;
3556
3557 if (i == 3)
3558 alu.last = 1;
3559 r = r600_bytecode_add_alu(ctx->bc, &alu);
3560 if (r)
3561 return r;
3562 }
3563 return 0;
3564 }
3565
3566 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3567 {
3568 struct r600_bytecode_alu alu;
3569 int i, r;
3570
3571 for (i = 0; i < 4; i++) {
3572 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3573 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3574 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3575 alu.dst.chan = i;
3576 } else {
3577 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3578 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3579 alu.src[0].sel = ctx->temp_reg;
3580 alu.src[0].chan = i;
3581 }
3582 if (i == 3) {
3583 alu.last = 1;
3584 }
3585 r = r600_bytecode_add_alu(ctx->bc, &alu);
3586 if (r)
3587 return r;
3588 }
3589 return 0;
3590 }
3591
3592 static int tgsi_op3(struct r600_shader_ctx *ctx)
3593 {
3594 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3595 struct r600_bytecode_alu alu;
3596 int i, j, r;
3597 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3598
3599 for (i = 0; i < lasti + 1; i++) {
3600 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3601 continue;
3602
3603 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3604 alu.inst = ctx->inst_info->r600_opcode;
3605 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3606 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3607 }
3608
3609 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3610 alu.dst.chan = i;
3611 alu.dst.write = 1;
3612 alu.is_op3 = 1;
3613 if (i == lasti) {
3614 alu.last = 1;
3615 }
3616 r = r600_bytecode_add_alu(ctx->bc, &alu);
3617 if (r)
3618 return r;
3619 }
3620 return 0;
3621 }
3622
3623 static int tgsi_dp(struct r600_shader_ctx *ctx)
3624 {
3625 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3626 struct r600_bytecode_alu alu;
3627 int i, j, r;
3628
3629 for (i = 0; i < 4; i++) {
3630 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3631 alu.inst = ctx->inst_info->r600_opcode;
3632 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3633 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3634 }
3635
3636 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3637 alu.dst.chan = i;
3638 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3639 /* handle some special cases */
3640 switch (ctx->inst_info->tgsi_opcode) {
3641 case TGSI_OPCODE_DP2:
3642 if (i > 1) {
3643 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3644 alu.src[0].chan = alu.src[1].chan = 0;
3645 }
3646 break;
3647 case TGSI_OPCODE_DP3:
3648 if (i > 2) {
3649 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3650 alu.src[0].chan = alu.src[1].chan = 0;
3651 }
3652 break;
3653 case TGSI_OPCODE_DPH:
3654 if (i == 3) {
3655 alu.src[0].sel = V_SQ_ALU_SRC_1;
3656 alu.src[0].chan = 0;
3657 alu.src[0].neg = 0;
3658 }
3659 break;
3660 default:
3661 break;
3662 }
3663 if (i == 3) {
3664 alu.last = 1;
3665 }
3666 r = r600_bytecode_add_alu(ctx->bc, &alu);
3667 if (r)
3668 return r;
3669 }
3670 return 0;
3671 }
3672
3673 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3674 unsigned index)
3675 {
3676 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3677 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3678 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3679 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3680 ctx->src[index].neg || ctx->src[index].abs;
3681 }
3682
3683 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3684 unsigned index)
3685 {
3686 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3687 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3688 }
3689
3690 static int tgsi_tex(struct r600_shader_ctx *ctx)
3691 {
3692 static float one_point_five = 1.5f;
3693 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3694 struct r600_bytecode_tex tex;
3695 struct r600_bytecode_alu alu;
3696 unsigned src_gpr;
3697 int r, i, j;
3698 int opcode;
3699 /* Texture fetch instructions can only use gprs as source.
3700 * Also they cannot negate the source or take the absolute value */
3701 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
3702 boolean src_loaded = FALSE;
3703 unsigned sampler_src_reg = 1;
3704 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3705
3706 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3707
3708 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3709 /* get offset values */
3710 if (inst->Texture.NumOffsets) {
3711 assert(inst->Texture.NumOffsets == 1);
3712
3713 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3714 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3715 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3716 }
3717 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3718 /* TGSI moves the sampler to src reg 3 for TXD */
3719 sampler_src_reg = 3;
3720
3721 for (i = 1; i < 3; i++) {
3722 /* set gradients h/v */
3723 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3724 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3725 SQ_TEX_INST_SET_GRADIENTS_V;
3726 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3727 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3728
3729 if (tgsi_tex_src_requires_loading(ctx, i)) {
3730 tex.src_gpr = r600_get_temp(ctx);
3731 tex.src_sel_x = 0;
3732 tex.src_sel_y = 1;
3733 tex.src_sel_z = 2;
3734 tex.src_sel_w = 3;
3735
3736 for (j = 0; j < 4; j++) {
3737 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3738 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3739 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3740 alu.dst.sel = tex.src_gpr;
3741 alu.dst.chan = j;
3742 if (j == 3)
3743 alu.last = 1;
3744 alu.dst.write = 1;
3745 r = r600_bytecode_add_alu(ctx->bc, &alu);
3746 if (r)
3747 return r;
3748 }
3749
3750 } else {
3751 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3752 tex.src_sel_x = ctx->src[i].swizzle[0];
3753 tex.src_sel_y = ctx->src[i].swizzle[1];
3754 tex.src_sel_z = ctx->src[i].swizzle[2];
3755 tex.src_sel_w = ctx->src[i].swizzle[3];
3756 tex.src_rel = ctx->src[i].rel;
3757 }
3758 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3759 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3760 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3761 tex.coord_type_x = 1;
3762 tex.coord_type_y = 1;
3763 tex.coord_type_z = 1;
3764 tex.coord_type_w = 1;
3765 }
3766 r = r600_bytecode_add_tex(ctx->bc, &tex);
3767 if (r)
3768 return r;
3769 }
3770 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3771 int out_chan;
3772 /* Add perspective divide */
3773 if (ctx->bc->chip_class == CAYMAN) {
3774 out_chan = 2;
3775 for (i = 0; i < 3; i++) {
3776 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3778 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3779
3780 alu.dst.sel = ctx->temp_reg;
3781 alu.dst.chan = i;
3782 if (i == 2)
3783 alu.last = 1;
3784 if (out_chan == i)
3785 alu.dst.write = 1;
3786 r = r600_bytecode_add_alu(ctx->bc, &alu);
3787 if (r)
3788 return r;
3789 }
3790
3791 } else {
3792 out_chan = 3;
3793 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3794 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3795 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3796
3797 alu.dst.sel = ctx->temp_reg;
3798 alu.dst.chan = out_chan;
3799 alu.last = 1;
3800 alu.dst.write = 1;
3801 r = r600_bytecode_add_alu(ctx->bc, &alu);
3802 if (r)
3803 return r;
3804 }
3805
3806 for (i = 0; i < 3; i++) {
3807 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3809 alu.src[0].sel = ctx->temp_reg;
3810 alu.src[0].chan = out_chan;
3811 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3812 alu.dst.sel = ctx->temp_reg;
3813 alu.dst.chan = i;
3814 alu.dst.write = 1;
3815 r = r600_bytecode_add_alu(ctx->bc, &alu);
3816 if (r)
3817 return r;
3818 }
3819 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3820 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3821 alu.src[0].sel = V_SQ_ALU_SRC_1;
3822 alu.src[0].chan = 0;
3823 alu.dst.sel = ctx->temp_reg;
3824 alu.dst.chan = 3;
3825 alu.last = 1;
3826 alu.dst.write = 1;
3827 r = r600_bytecode_add_alu(ctx->bc, &alu);
3828 if (r)
3829 return r;
3830 src_loaded = TRUE;
3831 src_gpr = ctx->temp_reg;
3832 }
3833
3834 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3835 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3836 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
3837
3838 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3839 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3840
3841 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3842 for (i = 0; i < 4; i++) {
3843 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3844 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3845 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3846 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3847 alu.dst.sel = ctx->temp_reg;
3848 alu.dst.chan = i;
3849 if (i == 3)
3850 alu.last = 1;
3851 alu.dst.write = 1;
3852 r = r600_bytecode_add_alu(ctx->bc, &alu);
3853 if (r)
3854 return r;
3855 }
3856
3857 /* tmp1.z = RCP_e(|tmp1.z|) */
3858 if (ctx->bc->chip_class == CAYMAN) {
3859 for (i = 0; i < 3; i++) {
3860 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3861 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3862 alu.src[0].sel = ctx->temp_reg;
3863 alu.src[0].chan = 2;
3864 alu.src[0].abs = 1;
3865 alu.dst.sel = ctx->temp_reg;
3866 alu.dst.chan = i;
3867 if (i == 2)
3868 alu.dst.write = 1;
3869 if (i == 2)
3870 alu.last = 1;
3871 r = r600_bytecode_add_alu(ctx->bc, &alu);
3872 if (r)
3873 return r;
3874 }
3875 } else {
3876 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3878 alu.src[0].sel = ctx->temp_reg;
3879 alu.src[0].chan = 2;
3880 alu.src[0].abs = 1;
3881 alu.dst.sel = ctx->temp_reg;
3882 alu.dst.chan = 2;
3883 alu.dst.write = 1;
3884 alu.last = 1;
3885 r = r600_bytecode_add_alu(ctx->bc, &alu);
3886 if (r)
3887 return r;
3888 }
3889
3890 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3891 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3892 * muladd has no writemask, have to use another temp
3893 */
3894 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3895 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3896 alu.is_op3 = 1;
3897
3898 alu.src[0].sel = ctx->temp_reg;
3899 alu.src[0].chan = 0;
3900 alu.src[1].sel = ctx->temp_reg;
3901 alu.src[1].chan = 2;
3902
3903 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3904 alu.src[2].chan = 0;
3905 alu.src[2].value = *(uint32_t *)&one_point_five;
3906
3907 alu.dst.sel = ctx->temp_reg;
3908 alu.dst.chan = 0;
3909 alu.dst.write = 1;
3910
3911 r = r600_bytecode_add_alu(ctx->bc, &alu);
3912 if (r)
3913 return r;
3914
3915 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3916 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3917 alu.is_op3 = 1;
3918
3919 alu.src[0].sel = ctx->temp_reg;
3920 alu.src[0].chan = 1;
3921 alu.src[1].sel = ctx->temp_reg;
3922 alu.src[1].chan = 2;
3923
3924 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3925 alu.src[2].chan = 0;
3926 alu.src[2].value = *(uint32_t *)&one_point_five;
3927
3928 alu.dst.sel = ctx->temp_reg;
3929 alu.dst.chan = 1;
3930 alu.dst.write = 1;
3931
3932 alu.last = 1;
3933 r = r600_bytecode_add_alu(ctx->bc, &alu);
3934 if (r)
3935 return r;
3936 /* write initial W value into Z component */
3937 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
3938 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3939 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3940 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3941 alu.dst.sel = ctx->temp_reg;
3942 alu.dst.chan = 2;
3943 alu.dst.write = 1;
3944 alu.last = 1;
3945 r = r600_bytecode_add_alu(ctx->bc, &alu);
3946 if (r)
3947 return r;
3948 }
3949 src_loaded = TRUE;
3950 src_gpr = ctx->temp_reg;
3951 }
3952
3953 if (src_requires_loading && !src_loaded) {
3954 for (i = 0; i < 4; i++) {
3955 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3957 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3958 alu.dst.sel = ctx->temp_reg;
3959 alu.dst.chan = i;
3960 if (i == 3)
3961 alu.last = 1;
3962 alu.dst.write = 1;
3963 r = r600_bytecode_add_alu(ctx->bc, &alu);
3964 if (r)
3965 return r;
3966 }
3967 src_loaded = TRUE;
3968 src_gpr = ctx->temp_reg;
3969 }
3970
3971 opcode = ctx->inst_info->r600_opcode;
3972 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
3973 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
3974 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
3975 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
3976 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
3977 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
3978 switch (opcode) {
3979 case SQ_TEX_INST_SAMPLE:
3980 opcode = SQ_TEX_INST_SAMPLE_C;
3981 break;
3982 case SQ_TEX_INST_SAMPLE_L:
3983 opcode = SQ_TEX_INST_SAMPLE_C_L;
3984 break;
3985 case SQ_TEX_INST_SAMPLE_LB:
3986 opcode = SQ_TEX_INST_SAMPLE_C_LB;
3987 break;
3988 case SQ_TEX_INST_SAMPLE_G:
3989 opcode = SQ_TEX_INST_SAMPLE_C_G;
3990 break;
3991 }
3992 }
3993
3994 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3995 tex.inst = opcode;
3996
3997 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3998 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3999 tex.src_gpr = src_gpr;
4000 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4001 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4002 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4003 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4004 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4005 if (src_loaded) {
4006 tex.src_sel_x = 0;
4007 tex.src_sel_y = 1;
4008 tex.src_sel_z = 2;
4009 tex.src_sel_w = 3;
4010 } else {
4011 tex.src_sel_x = ctx->src[0].swizzle[0];
4012 tex.src_sel_y = ctx->src[0].swizzle[1];
4013 tex.src_sel_z = ctx->src[0].swizzle[2];
4014 tex.src_sel_w = ctx->src[0].swizzle[3];
4015 tex.src_rel = ctx->src[0].rel;
4016 }
4017
4018 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
4019 tex.src_sel_x = 1;
4020 tex.src_sel_y = 0;
4021 tex.src_sel_z = 3;
4022 tex.src_sel_w = 1;
4023 }
4024 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4025 tex.src_sel_x = 1;
4026 tex.src_sel_y = 0;
4027 tex.src_sel_z = 3;
4028 tex.src_sel_w = 2; /* route Z compare value into W */
4029 }
4030
4031 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4032 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4033 tex.coord_type_x = 1;
4034 tex.coord_type_y = 1;
4035 }
4036 tex.coord_type_z = 1;
4037 tex.coord_type_w = 1;
4038
4039 tex.offset_x = offset_x;
4040 tex.offset_y = offset_y;
4041 tex.offset_z = offset_z;
4042
4043 /* Put the depth for comparison in W.
4044 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4045 * Some instructions expect the depth in Z. */
4046 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4047 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4048 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4049 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4050 opcode != SQ_TEX_INST_SAMPLE_C_L &&
4051 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4052 tex.src_sel_w = tex.src_sel_z;
4053 }
4054
4055 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4056 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4057 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4058 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4059 /* the array index is read from Y */
4060 tex.coord_type_y = 0;
4061 } else {
4062 /* the array index is read from Z */
4063 tex.coord_type_z = 0;
4064 tex.src_sel_z = tex.src_sel_y;
4065 }
4066 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4067 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4068 /* the array index is read from Z */
4069 tex.coord_type_z = 0;
4070
4071 r = r600_bytecode_add_tex(ctx->bc, &tex);
4072 if (r)
4073 return r;
4074
4075 /* add shadow ambient support - gallium doesn't do it yet */
4076 return 0;
4077 }
4078
4079 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4080 {
4081 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4082 struct r600_bytecode_alu alu;
4083 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4084 unsigned i;
4085 int r;
4086
4087 /* optimize if it's just an equal balance */
4088 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4089 for (i = 0; i < lasti + 1; i++) {
4090 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4091 continue;
4092
4093 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4094 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4095 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4096 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4097 alu.omod = 3;
4098 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4099 alu.dst.chan = i;
4100 if (i == lasti) {
4101 alu.last = 1;
4102 }
4103 r = r600_bytecode_add_alu(ctx->bc, &alu);
4104 if (r)
4105 return r;
4106 }
4107 return 0;
4108 }
4109
4110 /* 1 - src0 */
4111 for (i = 0; i < lasti + 1; i++) {
4112 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4113 continue;
4114
4115 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4116 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4117 alu.src[0].sel = V_SQ_ALU_SRC_1;
4118 alu.src[0].chan = 0;
4119 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4120 r600_bytecode_src_toggle_neg(&alu.src[1]);
4121 alu.dst.sel = ctx->temp_reg;
4122 alu.dst.chan = i;
4123 if (i == lasti) {
4124 alu.last = 1;
4125 }
4126 alu.dst.write = 1;
4127 r = r600_bytecode_add_alu(ctx->bc, &alu);
4128 if (r)
4129 return r;
4130 }
4131
4132 /* (1 - src0) * src2 */
4133 for (i = 0; i < lasti + 1; i++) {
4134 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4135 continue;
4136
4137 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4138 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4139 alu.src[0].sel = ctx->temp_reg;
4140 alu.src[0].chan = i;
4141 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4142 alu.dst.sel = ctx->temp_reg;
4143 alu.dst.chan = i;
4144 if (i == lasti) {
4145 alu.last = 1;
4146 }
4147 alu.dst.write = 1;
4148 r = r600_bytecode_add_alu(ctx->bc, &alu);
4149 if (r)
4150 return r;
4151 }
4152
4153 /* src0 * src1 + (1 - src0) * src2 */
4154 for (i = 0; i < lasti + 1; i++) {
4155 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4156 continue;
4157
4158 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4159 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4160 alu.is_op3 = 1;
4161 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4162 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4163 alu.src[2].sel = ctx->temp_reg;
4164 alu.src[2].chan = i;
4165
4166 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4167 alu.dst.chan = i;
4168 if (i == lasti) {
4169 alu.last = 1;
4170 }
4171 r = r600_bytecode_add_alu(ctx->bc, &alu);
4172 if (r)
4173 return r;
4174 }
4175 return 0;
4176 }
4177
4178 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4179 {
4180 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4181 struct r600_bytecode_alu alu;
4182 int i, r;
4183 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4184
4185 for (i = 0; i < lasti + 1; i++) {
4186 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4187 continue;
4188
4189 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4190 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4191 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4192 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4193 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4194 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4195 alu.dst.chan = i;
4196 alu.dst.write = 1;
4197 alu.is_op3 = 1;
4198 if (i == lasti)
4199 alu.last = 1;
4200 r = r600_bytecode_add_alu(ctx->bc, &alu);
4201 if (r)
4202 return r;
4203 }
4204 return 0;
4205 }
4206
4207 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4208 {
4209 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4210 static const unsigned int src0_swizzle[] = {2, 0, 1};
4211 static const unsigned int src1_swizzle[] = {1, 2, 0};
4212 struct r600_bytecode_alu alu;
4213 uint32_t use_temp = 0;
4214 int i, r;
4215
4216 if (inst->Dst[0].Register.WriteMask != 0xf)
4217 use_temp = 1;
4218
4219 for (i = 0; i < 4; i++) {
4220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4222 if (i < 3) {
4223 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4224 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4225 } else {
4226 alu.src[0].sel = V_SQ_ALU_SRC_0;
4227 alu.src[0].chan = i;
4228 alu.src[1].sel = V_SQ_ALU_SRC_0;
4229 alu.src[1].chan = i;
4230 }
4231
4232 alu.dst.sel = ctx->temp_reg;
4233 alu.dst.chan = i;
4234 alu.dst.write = 1;
4235
4236 if (i == 3)
4237 alu.last = 1;
4238 r = r600_bytecode_add_alu(ctx->bc, &alu);
4239 if (r)
4240 return r;
4241 }
4242
4243 for (i = 0; i < 4; i++) {
4244 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4245 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4246
4247 if (i < 3) {
4248 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4249 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4250 } else {
4251 alu.src[0].sel = V_SQ_ALU_SRC_0;
4252 alu.src[0].chan = i;
4253 alu.src[1].sel = V_SQ_ALU_SRC_0;
4254 alu.src[1].chan = i;
4255 }
4256
4257 alu.src[2].sel = ctx->temp_reg;
4258 alu.src[2].neg = 1;
4259 alu.src[2].chan = i;
4260
4261 if (use_temp)
4262 alu.dst.sel = ctx->temp_reg;
4263 else
4264 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4265 alu.dst.chan = i;
4266 alu.dst.write = 1;
4267 alu.is_op3 = 1;
4268 if (i == 3)
4269 alu.last = 1;
4270 r = r600_bytecode_add_alu(ctx->bc, &alu);
4271 if (r)
4272 return r;
4273 }
4274 if (use_temp)
4275 return tgsi_helper_copy(ctx, inst);
4276 return 0;
4277 }
4278
4279 static int tgsi_exp(struct r600_shader_ctx *ctx)
4280 {
4281 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4282 struct r600_bytecode_alu alu;
4283 int r;
4284 int i;
4285
4286 /* result.x = 2^floor(src); */
4287 if (inst->Dst[0].Register.WriteMask & 1) {
4288 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4289
4290 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4291 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4292
4293 alu.dst.sel = ctx->temp_reg;
4294 alu.dst.chan = 0;
4295 alu.dst.write = 1;
4296 alu.last = 1;
4297 r = r600_bytecode_add_alu(ctx->bc, &alu);
4298 if (r)
4299 return r;
4300
4301 if (ctx->bc->chip_class == CAYMAN) {
4302 for (i = 0; i < 3; i++) {
4303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4304 alu.src[0].sel = ctx->temp_reg;
4305 alu.src[0].chan = 0;
4306
4307 alu.dst.sel = ctx->temp_reg;
4308 alu.dst.chan = i;
4309 if (i == 0)
4310 alu.dst.write = 1;
4311 if (i == 2)
4312 alu.last = 1;
4313 r = r600_bytecode_add_alu(ctx->bc, &alu);
4314 if (r)
4315 return r;
4316 }
4317 } else {
4318 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4319 alu.src[0].sel = ctx->temp_reg;
4320 alu.src[0].chan = 0;
4321
4322 alu.dst.sel = ctx->temp_reg;
4323 alu.dst.chan = 0;
4324 alu.dst.write = 1;
4325 alu.last = 1;
4326 r = r600_bytecode_add_alu(ctx->bc, &alu);
4327 if (r)
4328 return r;
4329 }
4330 }
4331
4332 /* result.y = tmp - floor(tmp); */
4333 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4334 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4335
4336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4337 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4338
4339 alu.dst.sel = ctx->temp_reg;
4340 #if 0
4341 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4342 if (r)
4343 return r;
4344 #endif
4345 alu.dst.write = 1;
4346 alu.dst.chan = 1;
4347
4348 alu.last = 1;
4349
4350 r = r600_bytecode_add_alu(ctx->bc, &alu);
4351 if (r)
4352 return r;
4353 }
4354
4355 /* result.z = RoughApprox2ToX(tmp);*/
4356 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4357 if (ctx->bc->chip_class == CAYMAN) {
4358 for (i = 0; i < 3; i++) {
4359 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4360 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4361 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4362
4363 alu.dst.sel = ctx->temp_reg;
4364 alu.dst.chan = i;
4365 if (i == 2) {
4366 alu.dst.write = 1;
4367 alu.last = 1;
4368 }
4369
4370 r = r600_bytecode_add_alu(ctx->bc, &alu);
4371 if (r)
4372 return r;
4373 }
4374 } else {
4375 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4376 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4377 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4378
4379 alu.dst.sel = ctx->temp_reg;
4380 alu.dst.write = 1;
4381 alu.dst.chan = 2;
4382
4383 alu.last = 1;
4384
4385 r = r600_bytecode_add_alu(ctx->bc, &alu);
4386 if (r)
4387 return r;
4388 }
4389 }
4390
4391 /* result.w = 1.0;*/
4392 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4393 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4394
4395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4396 alu.src[0].sel = V_SQ_ALU_SRC_1;
4397 alu.src[0].chan = 0;
4398
4399 alu.dst.sel = ctx->temp_reg;
4400 alu.dst.chan = 3;
4401 alu.dst.write = 1;
4402 alu.last = 1;
4403 r = r600_bytecode_add_alu(ctx->bc, &alu);
4404 if (r)
4405 return r;
4406 }
4407 return tgsi_helper_copy(ctx, inst);
4408 }
4409
4410 static int tgsi_log(struct r600_shader_ctx *ctx)
4411 {
4412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4413 struct r600_bytecode_alu alu;
4414 int r;
4415 int i;
4416
4417 /* result.x = floor(log2(|src|)); */
4418 if (inst->Dst[0].Register.WriteMask & 1) {
4419 if (ctx->bc->chip_class == CAYMAN) {
4420 for (i = 0; i < 3; i++) {
4421 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4422
4423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4424 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4425 r600_bytecode_src_set_abs(&alu.src[0]);
4426
4427 alu.dst.sel = ctx->temp_reg;
4428 alu.dst.chan = i;
4429 if (i == 0)
4430 alu.dst.write = 1;
4431 if (i == 2)
4432 alu.last = 1;
4433 r = r600_bytecode_add_alu(ctx->bc, &alu);
4434 if (r)
4435 return r;
4436 }
4437
4438 } else {
4439 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4440
4441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4442 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4443 r600_bytecode_src_set_abs(&alu.src[0]);
4444
4445 alu.dst.sel = ctx->temp_reg;
4446 alu.dst.chan = 0;
4447 alu.dst.write = 1;
4448 alu.last = 1;
4449 r = r600_bytecode_add_alu(ctx->bc, &alu);
4450 if (r)
4451 return r;
4452 }
4453
4454 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4455 alu.src[0].sel = ctx->temp_reg;
4456 alu.src[0].chan = 0;
4457
4458 alu.dst.sel = ctx->temp_reg;
4459 alu.dst.chan = 0;
4460 alu.dst.write = 1;
4461 alu.last = 1;
4462
4463 r = r600_bytecode_add_alu(ctx->bc, &alu);
4464 if (r)
4465 return r;
4466 }
4467
4468 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4469 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4470
4471 if (ctx->bc->chip_class == CAYMAN) {
4472 for (i = 0; i < 3; i++) {
4473 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4474
4475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4476 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4477 r600_bytecode_src_set_abs(&alu.src[0]);
4478
4479 alu.dst.sel = ctx->temp_reg;
4480 alu.dst.chan = i;
4481 if (i == 1)
4482 alu.dst.write = 1;
4483 if (i == 2)
4484 alu.last = 1;
4485
4486 r = r600_bytecode_add_alu(ctx->bc, &alu);
4487 if (r)
4488 return r;
4489 }
4490 } else {
4491 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4492
4493 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4494 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4495 r600_bytecode_src_set_abs(&alu.src[0]);
4496
4497 alu.dst.sel = ctx->temp_reg;
4498 alu.dst.chan = 1;
4499 alu.dst.write = 1;
4500 alu.last = 1;
4501
4502 r = r600_bytecode_add_alu(ctx->bc, &alu);
4503 if (r)
4504 return r;
4505 }
4506
4507 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4508
4509 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4510 alu.src[0].sel = ctx->temp_reg;
4511 alu.src[0].chan = 1;
4512
4513 alu.dst.sel = ctx->temp_reg;
4514 alu.dst.chan = 1;
4515 alu.dst.write = 1;
4516 alu.last = 1;
4517
4518 r = r600_bytecode_add_alu(ctx->bc, &alu);
4519 if (r)
4520 return r;
4521
4522 if (ctx->bc->chip_class == CAYMAN) {
4523 for (i = 0; i < 3; i++) {
4524 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4525 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4526 alu.src[0].sel = ctx->temp_reg;
4527 alu.src[0].chan = 1;
4528
4529 alu.dst.sel = ctx->temp_reg;
4530 alu.dst.chan = i;
4531 if (i == 1)
4532 alu.dst.write = 1;
4533 if (i == 2)
4534 alu.last = 1;
4535
4536 r = r600_bytecode_add_alu(ctx->bc, &alu);
4537 if (r)
4538 return r;
4539 }
4540 } else {
4541 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4543 alu.src[0].sel = ctx->temp_reg;
4544 alu.src[0].chan = 1;
4545
4546 alu.dst.sel = ctx->temp_reg;
4547 alu.dst.chan = 1;
4548 alu.dst.write = 1;
4549 alu.last = 1;
4550
4551 r = r600_bytecode_add_alu(ctx->bc, &alu);
4552 if (r)
4553 return r;
4554 }
4555
4556 if (ctx->bc->chip_class == CAYMAN) {
4557 for (i = 0; i < 3; i++) {
4558 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4559 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4560 alu.src[0].sel = ctx->temp_reg;
4561 alu.src[0].chan = 1;
4562
4563 alu.dst.sel = ctx->temp_reg;
4564 alu.dst.chan = i;
4565 if (i == 1)
4566 alu.dst.write = 1;
4567 if (i == 2)
4568 alu.last = 1;
4569
4570 r = r600_bytecode_add_alu(ctx->bc, &alu);
4571 if (r)
4572 return r;
4573 }
4574 } else {
4575 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4576 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4577 alu.src[0].sel = ctx->temp_reg;
4578 alu.src[0].chan = 1;
4579
4580 alu.dst.sel = ctx->temp_reg;
4581 alu.dst.chan = 1;
4582 alu.dst.write = 1;
4583 alu.last = 1;
4584
4585 r = r600_bytecode_add_alu(ctx->bc, &alu);
4586 if (r)
4587 return r;
4588 }
4589
4590 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4591
4592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4593
4594 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4595 r600_bytecode_src_set_abs(&alu.src[0]);
4596
4597 alu.src[1].sel = ctx->temp_reg;
4598 alu.src[1].chan = 1;
4599
4600 alu.dst.sel = ctx->temp_reg;
4601 alu.dst.chan = 1;
4602 alu.dst.write = 1;
4603 alu.last = 1;
4604
4605 r = r600_bytecode_add_alu(ctx->bc, &alu);
4606 if (r)
4607 return r;
4608 }
4609
4610 /* result.z = log2(|src|);*/
4611 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4612 if (ctx->bc->chip_class == CAYMAN) {
4613 for (i = 0; i < 3; i++) {
4614 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4615
4616 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4617 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4618 r600_bytecode_src_set_abs(&alu.src[0]);
4619
4620 alu.dst.sel = ctx->temp_reg;
4621 if (i == 2)
4622 alu.dst.write = 1;
4623 alu.dst.chan = i;
4624 if (i == 2)
4625 alu.last = 1;
4626
4627 r = r600_bytecode_add_alu(ctx->bc, &alu);
4628 if (r)
4629 return r;
4630 }
4631 } else {
4632 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4633
4634 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4635 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4636 r600_bytecode_src_set_abs(&alu.src[0]);
4637
4638 alu.dst.sel = ctx->temp_reg;
4639 alu.dst.write = 1;
4640 alu.dst.chan = 2;
4641 alu.last = 1;
4642
4643 r = r600_bytecode_add_alu(ctx->bc, &alu);
4644 if (r)
4645 return r;
4646 }
4647 }
4648
4649 /* result.w = 1.0; */
4650 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4651 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4652
4653 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4654 alu.src[0].sel = V_SQ_ALU_SRC_1;
4655 alu.src[0].chan = 0;
4656
4657 alu.dst.sel = ctx->temp_reg;
4658 alu.dst.chan = 3;
4659 alu.dst.write = 1;
4660 alu.last = 1;
4661
4662 r = r600_bytecode_add_alu(ctx->bc, &alu);
4663 if (r)
4664 return r;
4665 }
4666
4667 return tgsi_helper_copy(ctx, inst);
4668 }
4669
4670 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4671 {
4672 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4673 struct r600_bytecode_alu alu;
4674 int r;
4675
4676 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4677
4678 switch (inst->Instruction.Opcode) {
4679 case TGSI_OPCODE_ARL:
4680 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4681 break;
4682 case TGSI_OPCODE_ARR:
4683 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4684 break;
4685 case TGSI_OPCODE_UARL:
4686 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4687 break;
4688 default:
4689 assert(0);
4690 return -1;
4691 }
4692
4693 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4694 alu.last = 1;
4695 alu.dst.sel = ctx->bc->ar_reg;
4696 alu.dst.write = 1;
4697 r = r600_bytecode_add_alu(ctx->bc, &alu);
4698 if (r)
4699 return r;
4700
4701 ctx->bc->ar_loaded = 0;
4702 return 0;
4703 }
4704 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4705 {
4706 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4707 struct r600_bytecode_alu alu;
4708 int r;
4709
4710 switch (inst->Instruction.Opcode) {
4711 case TGSI_OPCODE_ARL:
4712 memset(&alu, 0, sizeof(alu));
4713 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4714 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4715 alu.dst.sel = ctx->bc->ar_reg;
4716 alu.dst.write = 1;
4717 alu.last = 1;
4718
4719 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4720 return r;
4721
4722 memset(&alu, 0, sizeof(alu));
4723 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4724 alu.src[0].sel = ctx->bc->ar_reg;
4725 alu.dst.sel = ctx->bc->ar_reg;
4726 alu.dst.write = 1;
4727 alu.last = 1;
4728
4729 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4730 return r;
4731 break;
4732 case TGSI_OPCODE_ARR:
4733 memset(&alu, 0, sizeof(alu));
4734 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4735 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4736 alu.dst.sel = ctx->bc->ar_reg;
4737 alu.dst.write = 1;
4738 alu.last = 1;
4739
4740 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4741 return r;
4742 break;
4743 case TGSI_OPCODE_UARL:
4744 memset(&alu, 0, sizeof(alu));
4745 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4746 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4747 alu.dst.sel = ctx->bc->ar_reg;
4748 alu.dst.write = 1;
4749 alu.last = 1;
4750
4751 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4752 return r;
4753 break;
4754 default:
4755 assert(0);
4756 return -1;
4757 }
4758
4759 ctx->bc->ar_loaded = 0;
4760 return 0;
4761 }
4762
4763 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4764 {
4765 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4766 struct r600_bytecode_alu alu;
4767 int i, r = 0;
4768
4769 for (i = 0; i < 4; i++) {
4770 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4771
4772 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4773 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4774
4775 if (i == 0 || i == 3) {
4776 alu.src[0].sel = V_SQ_ALU_SRC_1;
4777 } else {
4778 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4779 }
4780
4781 if (i == 0 || i == 2) {
4782 alu.src[1].sel = V_SQ_ALU_SRC_1;
4783 } else {
4784 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4785 }
4786 if (i == 3)
4787 alu.last = 1;
4788 r = r600_bytecode_add_alu(ctx->bc, &alu);
4789 if (r)
4790 return r;
4791 }
4792 return 0;
4793 }
4794
4795 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4796 {
4797 struct r600_bytecode_alu alu;
4798 int r;
4799
4800 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4801 alu.inst = opcode;
4802 alu.predicate = 1;
4803
4804 alu.dst.sel = ctx->temp_reg;
4805 alu.dst.write = 1;
4806 alu.dst.chan = 0;
4807
4808 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4809 alu.src[1].sel = V_SQ_ALU_SRC_0;
4810 alu.src[1].chan = 0;
4811
4812 alu.last = 1;
4813
4814 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4815 if (r)
4816 return r;
4817 return 0;
4818 }
4819
4820 static int pops(struct r600_shader_ctx *ctx, int pops)
4821 {
4822 unsigned force_pop = ctx->bc->force_add_cf;
4823
4824 if (!force_pop) {
4825 int alu_pop = 3;
4826 if (ctx->bc->cf_last) {
4827 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4828 alu_pop = 0;
4829 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4830 alu_pop = 1;
4831 }
4832 alu_pop += pops;
4833 if (alu_pop == 1) {
4834 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4835 ctx->bc->force_add_cf = 1;
4836 } else if (alu_pop == 2) {
4837 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4838 ctx->bc->force_add_cf = 1;
4839 } else {
4840 force_pop = 1;
4841 }
4842 }
4843
4844 if (force_pop) {
4845 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4846 ctx->bc->cf_last->pop_count = pops;
4847 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4848 }
4849
4850 return 0;
4851 }
4852
4853 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4854 {
4855 switch(reason) {
4856 case FC_PUSH_VPM:
4857 ctx->bc->callstack[ctx->bc->call_sp].current--;
4858 break;
4859 case FC_PUSH_WQM:
4860 case FC_LOOP:
4861 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4862 break;
4863 case FC_REP:
4864 /* TOODO : for 16 vp asic should -= 2; */
4865 ctx->bc->callstack[ctx->bc->call_sp].current --;
4866 break;
4867 }
4868 }
4869
4870 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4871 {
4872 if (check_max_only) {
4873 int diff;
4874 switch (reason) {
4875 case FC_PUSH_VPM:
4876 diff = 1;
4877 break;
4878 case FC_PUSH_WQM:
4879 diff = 4;
4880 break;
4881 default:
4882 assert(0);
4883 diff = 0;
4884 }
4885 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4886 ctx->bc->callstack[ctx->bc->call_sp].max) {
4887 ctx->bc->callstack[ctx->bc->call_sp].max =
4888 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4889 }
4890 return;
4891 }
4892 switch (reason) {
4893 case FC_PUSH_VPM:
4894 ctx->bc->callstack[ctx->bc->call_sp].current++;
4895 break;
4896 case FC_PUSH_WQM:
4897 case FC_LOOP:
4898 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4899 break;
4900 case FC_REP:
4901 ctx->bc->callstack[ctx->bc->call_sp].current++;
4902 break;
4903 }
4904
4905 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4906 ctx->bc->callstack[ctx->bc->call_sp].max) {
4907 ctx->bc->callstack[ctx->bc->call_sp].max =
4908 ctx->bc->callstack[ctx->bc->call_sp].current;
4909 }
4910 }
4911
4912 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4913 {
4914 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4915
4916 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
4917 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
4918 sp->mid[sp->num_mid] = ctx->bc->cf_last;
4919 sp->num_mid++;
4920 }
4921
4922 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
4923 {
4924 ctx->bc->fc_sp++;
4925 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
4926 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
4927 }
4928
4929 static void fc_poplevel(struct r600_shader_ctx *ctx)
4930 {
4931 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
4932 if (sp->mid) {
4933 free(sp->mid);
4934 sp->mid = NULL;
4935 }
4936 sp->num_mid = 0;
4937 sp->start = NULL;
4938 sp->type = 0;
4939 ctx->bc->fc_sp--;
4940 }
4941
4942 #if 0
4943 static int emit_return(struct r600_shader_ctx *ctx)
4944 {
4945 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
4946 return 0;
4947 }
4948
4949 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
4950 {
4951
4952 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4953 ctx->bc->cf_last->pop_count = pops;
4954 /* XXX work out offset */
4955 return 0;
4956 }
4957
4958 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
4959 {
4960 return 0;
4961 }
4962
4963 static void emit_testflag(struct r600_shader_ctx *ctx)
4964 {
4965
4966 }
4967
4968 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
4969 {
4970 emit_testflag(ctx);
4971 emit_jump_to_offset(ctx, 1, 4);
4972 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
4973 pops(ctx, ifidx + 1);
4974 emit_return(ctx);
4975 }
4976
4977 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
4978 {
4979 emit_testflag(ctx);
4980
4981 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
4982 ctx->bc->cf_last->pop_count = 1;
4983
4984 fc_set_mid(ctx, fc_sp);
4985
4986 pops(ctx, 1);
4987 }
4988 #endif
4989
4990 static int tgsi_if(struct r600_shader_ctx *ctx)
4991 {
4992 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
4993
4994 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4995
4996 fc_pushlevel(ctx, FC_IF);
4997
4998 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
4999 return 0;
5000 }
5001
5002 static int tgsi_else(struct r600_shader_ctx *ctx)
5003 {
5004 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5005 ctx->bc->cf_last->pop_count = 1;
5006
5007 fc_set_mid(ctx, ctx->bc->fc_sp);
5008 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5009 return 0;
5010 }
5011
5012 static int tgsi_endif(struct r600_shader_ctx *ctx)
5013 {
5014 pops(ctx, 1);
5015 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5016 R600_ERR("if/endif unbalanced in shader\n");
5017 return -1;
5018 }
5019
5020 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5021 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5022 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5023 } else {
5024 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5025 }
5026 fc_poplevel(ctx);
5027
5028 callstack_decrease_current(ctx, FC_PUSH_VPM);
5029 return 0;
5030 }
5031
5032 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5033 {
5034 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
5035
5036 fc_pushlevel(ctx, FC_LOOP);
5037
5038 /* check stack depth */
5039 callstack_check_depth(ctx, FC_LOOP, 0);
5040 return 0;
5041 }
5042
5043 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5044 {
5045 int i;
5046
5047 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5048
5049 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5050 R600_ERR("loop/endloop in shader code are not paired.\n");
5051 return -EINVAL;
5052 }
5053
5054 /* fixup loop pointers - from r600isa
5055 LOOP END points to CF after LOOP START,
5056 LOOP START point to CF after LOOP END
5057 BRK/CONT point to LOOP END CF
5058 */
5059 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5060
5061 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5062
5063 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5064 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5065 }
5066 /* XXX add LOOPRET support */
5067 fc_poplevel(ctx);
5068 callstack_decrease_current(ctx, FC_LOOP);
5069 return 0;
5070 }
5071
5072 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5073 {
5074 unsigned int fscp;
5075
5076 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5077 {
5078 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5079 break;
5080 }
5081
5082 if (fscp == 0) {
5083 R600_ERR("Break not inside loop/endloop pair\n");
5084 return -EINVAL;
5085 }
5086
5087 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5088
5089 fc_set_mid(ctx, fscp);
5090
5091 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5092 return 0;
5093 }
5094
5095 static int tgsi_umad(struct r600_shader_ctx *ctx)
5096 {
5097 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5098 struct r600_bytecode_alu alu;
5099 int i, j, r;
5100 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5101
5102 /* src0 * src1 */
5103 for (i = 0; i < lasti + 1; i++) {
5104 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5105 continue;
5106
5107 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5108
5109 alu.dst.chan = i;
5110 alu.dst.sel = ctx->temp_reg;
5111 alu.dst.write = 1;
5112
5113 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5114 for (j = 0; j < 2; j++) {
5115 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5116 }
5117
5118 alu.last = 1;
5119 r = r600_bytecode_add_alu(ctx->bc, &alu);
5120 if (r)
5121 return r;
5122 }
5123
5124
5125 for (i = 0; i < lasti + 1; i++) {
5126 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5127 continue;
5128
5129 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5130 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5131
5132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5133
5134 alu.src[0].sel = ctx->temp_reg;
5135 alu.src[0].chan = i;
5136
5137 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5138 if (i == lasti) {
5139 alu.last = 1;
5140 }
5141 r = r600_bytecode_add_alu(ctx->bc, &alu);
5142 if (r)
5143 return r;
5144 }
5145 return 0;
5146 }
5147
5148 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5149 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5150 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5151 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5152
5153 /* XXX:
5154 * For state trackers other than OpenGL, we'll want to use
5155 * _RECIP_IEEE instead.
5156 */
5157 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5158
5159 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5160 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5161 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5162 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5163 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5164 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5165 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5166 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5167 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5168 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5169 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5170 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5171 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5172 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5173 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5174 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5175 /* gap */
5176 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5177 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5178 /* gap */
5179 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5180 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5181 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5182 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5183 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5184 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5185 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5186 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5187 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5188 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5189 /* gap */
5190 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5191 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5192 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5193 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5194 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5195 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5196 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5197 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5198 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5199 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5200 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5201 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5202 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5203 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5204 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5205 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5206 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5207 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5208 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5209 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5210 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5211 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5212 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5213 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5214 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5215 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5216 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5217 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5218 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5219 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5220 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5221 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5222 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5223 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5224 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5225 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5226 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5227 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5228 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5229 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5230 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5231 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5232 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5233 /* gap */
5234 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5235 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5236 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5237 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5238 /* gap */
5239 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5240 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5241 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5242 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5243 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5244 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5245 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5246 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5247 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5248 /* gap */
5249 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5250 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5251 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5252 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5253 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5254 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5255 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5256 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5257 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5258 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5259 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5260 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5261 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5262 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5263 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5264 /* gap */
5265 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5266 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5267 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5268 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5269 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5270 /* gap */
5271 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5272 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5273 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5274 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5275 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5276 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5277 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5279 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5280 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5281 /* gap */
5282 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5283 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5284 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5285 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5286 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5287 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5288 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5289 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5290 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5291 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5292 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5293 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5294 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5295 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5296 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5297 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5298 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5299 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5300 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5301 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5302 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5303 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5304 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5305 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5307 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5309 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5310 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5311 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5312 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5313 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5314 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5315 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5316 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5317 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5318 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5319 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5320 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5321 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5322 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5323 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5324 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5325 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5326 };
5327
5328 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5329 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5330 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5331 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5332 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5333 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5334 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5335 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5336 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5337 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5338 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5339 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5340 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5341 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5342 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5343 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5344 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5345 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5346 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5347 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5348 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5349 /* gap */
5350 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5351 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5352 /* gap */
5353 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5354 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5355 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5356 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5357 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5358 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5359 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5360 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5361 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5362 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5363 /* gap */
5364 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5365 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5366 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5367 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5368 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5369 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5370 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5371 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5372 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5373 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5374 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5375 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5376 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5377 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5378 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5379 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5380 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5381 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5382 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5383 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5384 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5385 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5386 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5387 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5388 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5389 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5390 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5391 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5392 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5393 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5394 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5395 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5396 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5397 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5398 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5399 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5400 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5401 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5402 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5403 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5404 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5405 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5406 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5407 /* gap */
5408 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5409 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5410 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5411 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5412 /* gap */
5413 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5414 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5415 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5416 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5417 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5418 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5419 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5420 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5421 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5422 /* gap */
5423 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5424 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5425 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5426 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5427 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5428 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5429 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5430 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5431 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5432 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5433 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5434 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5435 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5436 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5437 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5438 /* gap */
5439 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5440 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5441 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5442 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5443 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5444 /* gap */
5445 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5446 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5447 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5448 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5449 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5450 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5451 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5453 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5454 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5455 /* gap */
5456 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5457 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5458 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5459 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5460 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5461 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5462 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5463 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5464 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5465 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5466 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5467 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5468 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5469 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5470 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5471 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5472 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5473 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5474 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5475 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5476 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5477 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5478 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5479 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5481 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5484 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5485 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5486 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5487 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5488 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5489 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5490 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5491 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5492 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5493 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5494 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5495 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5496 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5497 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5498 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5499 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5500 };
5501
5502 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5503 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5504 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5505 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5506 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5507 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5508 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5509 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5510 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5511 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5512 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5513 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5514 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5515 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5516 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5517 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5518 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5519 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5520 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5521 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5522 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5523 /* gap */
5524 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5526 /* gap */
5527 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5529 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5530 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5531 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5532 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5533 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5534 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5535 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5536 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5537 /* gap */
5538 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5539 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5540 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5541 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5542 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5543 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5544 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5545 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5546 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5547 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5548 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5549 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5550 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5551 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5552 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5553 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5554 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5555 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5556 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5557 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5558 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5559 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5560 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5561 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5562 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5563 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5564 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5565 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5566 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5567 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5568 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5569 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5570 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5571 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5572 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5573 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5574 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5575 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5576 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5577 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5578 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5579 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5580 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5581 /* gap */
5582 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5583 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5584 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5585 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5586 /* gap */
5587 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5588 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5589 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5590 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5591 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5592 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5593 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5594 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5595 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5596 /* gap */
5597 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5598 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5599 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5600 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5601 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5602 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5603 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5604 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5605 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5606 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5607 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5608 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5609 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5610 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5611 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5612 /* gap */
5613 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5614 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5615 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5616 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5617 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5618 /* gap */
5619 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5620 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5621 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5622 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5623 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5624 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5625 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5626 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5627 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5628 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5629 /* gap */
5630 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5631 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5632 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5633 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5634 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5635 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5636 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5637 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5638 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5639 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5640 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5641 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5642 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5643 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5644 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5645 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5646 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5647 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5648 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5649 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5650 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5651 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5652 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5653 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5655 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5657 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5658 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
5659 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5660 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5661 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5662 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5663 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5664 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5665 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5666 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
5667 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5668 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5669 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5670 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5671 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5672 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5673 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5674 };