d0f5eecaeace70e988d73a01b07aa2d3169ad190
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_context *rctx = (struct r600_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 int r;
113
114 /* Would like some magic "get_bool_option_once" routine.
115 */
116 if (dump_shaders == -1)
117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119 if (dump_shaders) {
120 fprintf(stderr, "--------------------------------------------------------------\n");
121 tgsi_dump(shader->tokens, 0);
122
123 if (shader->so.num_outputs) {
124 unsigned i;
125 fprintf(stderr, "STREAMOUT\n");
126 for (i = 0; i < shader->so.num_outputs; i++) {
127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128 shader->so.output[i].start_component;
129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130 shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131 mask & 1 ? "x" : "_",
132 (mask >> 1) & 1 ? "y" : "_",
133 (mask >> 2) & 1 ? "z" : "_",
134 (mask >> 3) & 1 ? "w" : "_");
135 }
136 }
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bytecode_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bytecode_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158 r600_bytecode_clear(&shader->shader.bc);
159
160 memset(&shader->shader,0,sizeof(struct r600_shader));
161 }
162
163 /*
164 * tgsi -> r600 shader
165 */
166 struct r600_shader_tgsi_instruction;
167
168 struct r600_shader_src {
169 unsigned sel;
170 unsigned swizzle[4];
171 unsigned neg;
172 unsigned abs;
173 unsigned rel;
174 uint32_t value[4];
175 };
176
177 struct r600_shader_ctx {
178 struct tgsi_shader_info info;
179 struct tgsi_parse_context parse;
180 const struct tgsi_token *tokens;
181 unsigned type;
182 unsigned file_offset[TGSI_FILE_COUNT];
183 unsigned temp_reg;
184 struct r600_shader_tgsi_instruction *inst_info;
185 struct r600_bytecode *bc;
186 struct r600_shader *shader;
187 struct r600_shader_src src[4];
188 uint32_t *literals;
189 uint32_t nliterals;
190 uint32_t max_driver_temp_used;
191 /* needed for evergreen interpolation */
192 boolean input_centroid;
193 boolean input_linear;
194 boolean input_perspective;
195 int num_interp_gpr;
196 int face_gpr;
197 int colors_used;
198 boolean clip_vertex_write;
199 unsigned cv_output;
200 int fragcoord_input;
201 int native_integers;
202 };
203
204 struct r600_shader_tgsi_instruction {
205 unsigned tgsi_opcode;
206 unsigned is_op3;
207 unsigned r600_opcode;
208 int (*process)(struct r600_shader_ctx *ctx);
209 };
210
211 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
212 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
213 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
214 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
215 static int tgsi_else(struct r600_shader_ctx *ctx);
216 static int tgsi_endif(struct r600_shader_ctx *ctx);
217 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
218 static int tgsi_endloop(struct r600_shader_ctx *ctx);
219 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
220
221 /*
222 * bytestream -> r600 shader
223 *
224 * These functions are used to transform the output of the LLVM backend into
225 * struct r600_bytecode.
226 */
227
228 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
229 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
230 {
231 unsigned i;
232 unsigned sel0, sel1;
233 sel0 = bytes[bytes_read++];
234 sel1 = bytes[bytes_read++];
235 alu->src[src_idx].sel = sel0 | (sel1 << 8);
236 alu->src[src_idx].chan = bytes[bytes_read++];
237 alu->src[src_idx].neg = bytes[bytes_read++];
238 alu->src[src_idx].abs = bytes[bytes_read++];
239 alu->src[src_idx].rel = bytes[bytes_read++];
240 alu->src[src_idx].kc_bank = bytes[bytes_read++];
241 for (i = 0; i < 4; i++) {
242 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
243 }
244 return bytes_read;
245 }
246
247 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
248 unsigned char * bytes, unsigned bytes_read)
249 {
250 unsigned src_idx;
251 unsigned inst0, inst1;
252 struct r600_bytecode_alu alu;
253 memset(&alu, 0, sizeof(alu));
254 for(src_idx = 0; src_idx < 3; src_idx++) {
255 bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
256 &alu, src_idx);
257 }
258
259 alu.dst.sel = bytes[bytes_read++];
260 alu.dst.chan = bytes[bytes_read++];
261 alu.dst.clamp = bytes[bytes_read++];
262 alu.dst.write = bytes[bytes_read++];
263 alu.dst.rel = bytes[bytes_read++];
264 inst0 = bytes[bytes_read++];
265 inst1 = bytes[bytes_read++];
266 alu.inst = inst0 | (inst1 << 8);
267 alu.last = bytes[bytes_read++];
268 alu.is_op3 = bytes[bytes_read++];
269 alu.predicate = bytes[bytes_read++];
270 alu.bank_swizzle = bytes[bytes_read++];
271 alu.bank_swizzle_force = bytes[bytes_read++];
272 alu.omod = bytes[bytes_read++];
273 alu.index_mode = bytes[bytes_read++];
274 r600_bytecode_add_alu(ctx->bc, &alu);
275
276 /* XXX: Handle other KILL instructions */
277 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
278 ctx->shader->uses_kill = 1;
279 /* XXX: This should be enforced in the LLVM backend. */
280 ctx->bc->force_add_cf = 1;
281 }
282 return bytes_read;
283 }
284
285 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
286 unsigned pred_inst)
287 {
288 alu->inst = pred_inst;
289 alu->predicate = 1;
290 alu->src[1].sel = V_SQ_ALU_SRC_0;
291 alu->src[1].chan = 0;
292 alu->last = 1;
293 r600_bytecode_add_alu_type(ctx->bc, alu,
294 CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
295
296 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
297 fc_pushlevel(ctx, FC_IF);
298 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
299 }
300
301 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
302 struct r600_bytecode_alu *alu, unsigned compare_opcode)
303 {
304 unsigned opcode = TGSI_OPCODE_BRK;
305 if (ctx->bc->chip_class == CAYMAN)
306 ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
307 else if (ctx->bc->chip_class >= EVERGREEN)
308 ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
309 else
310 ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
311 llvm_if(ctx, alu, compare_opcode);
312 tgsi_loop_brk_cont(ctx);
313 tgsi_endif(ctx);
314 }
315
316 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
317 unsigned char * bytes, unsigned bytes_read)
318 {
319 struct r600_bytecode_alu alu;
320 unsigned inst;
321 memset(&alu, 0, sizeof(alu));
322 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
323 inst = bytes[bytes_read++];
324 switch (inst) {
325 case 0:
326 llvm_if(ctx, &alu,
327 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
328 break;
329 case 1:
330 tgsi_else(ctx);
331 break;
332 case 2:
333 tgsi_endif(ctx);
334 break;
335 case 3:
336 tgsi_bgnloop(ctx);
337 break;
338 case 4:
339 tgsi_endloop(ctx);
340 break;
341 case 5:
342 r600_break_from_byte_stream(ctx, &alu,
343 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE));
344 break;
345 case 6:
346 r600_break_from_byte_stream(ctx, &alu,
347 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
348 break;
349 case 7:
350 {
351 unsigned opcode = TGSI_OPCODE_CONT;
352 if (ctx->bc->chip_class == CAYMAN) {
353 ctx->inst_info =
354 &cm_shader_tgsi_instruction[opcode];
355 } else if (ctx->bc->chip_class >= EVERGREEN) {
356 ctx->inst_info =
357 &eg_shader_tgsi_instruction[opcode];
358 } else {
359 ctx->inst_info =
360 &r600_shader_tgsi_instruction[opcode];
361 }
362 tgsi_loop_brk_cont(ctx);
363 }
364 break;
365 case 8:
366 r600_break_from_byte_stream(ctx, &alu,
367 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
368 break;
369 }
370
371 return bytes_read;
372 }
373
374 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
375 unsigned char * bytes, unsigned bytes_read)
376 {
377 struct r600_bytecode_tex tex;
378
379 tex.inst = bytes[bytes_read++];
380 tex.resource_id = bytes[bytes_read++];
381 tex.src_gpr = bytes[bytes_read++];
382 tex.src_rel = bytes[bytes_read++];
383 tex.dst_gpr = bytes[bytes_read++];
384 tex.dst_rel = bytes[bytes_read++];
385 tex.dst_sel_x = bytes[bytes_read++];
386 tex.dst_sel_y = bytes[bytes_read++];
387 tex.dst_sel_z = bytes[bytes_read++];
388 tex.dst_sel_w = bytes[bytes_read++];
389 tex.lod_bias = bytes[bytes_read++];
390 tex.coord_type_x = bytes[bytes_read++];
391 tex.coord_type_y = bytes[bytes_read++];
392 tex.coord_type_z = bytes[bytes_read++];
393 tex.coord_type_w = bytes[bytes_read++];
394 tex.offset_x = bytes[bytes_read++];
395 tex.offset_y = bytes[bytes_read++];
396 tex.offset_z = bytes[bytes_read++];
397 tex.sampler_id = bytes[bytes_read++];
398 tex.src_sel_x = bytes[bytes_read++];
399 tex.src_sel_y = bytes[bytes_read++];
400 tex.src_sel_z = bytes[bytes_read++];
401 tex.src_sel_w = bytes[bytes_read++];
402
403 r600_bytecode_add_tex(ctx->bc, &tex);
404
405 return bytes_read;
406 }
407
408 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
409 unsigned char * bytes, unsigned bytes_read)
410 {
411 struct r600_bytecode_vtx vtx;
412 memset(&vtx, 0, sizeof(vtx));
413 vtx.inst = bytes[bytes_read++];
414 vtx.fetch_type = bytes[bytes_read++];
415 vtx.buffer_id = bytes[bytes_read++];
416 vtx.src_gpr = bytes[bytes_read++];
417 vtx.src_sel_x = bytes[bytes_read++];
418 vtx.mega_fetch_count = bytes[bytes_read++];
419 vtx.dst_gpr = bytes[bytes_read++];
420 vtx.dst_sel_x = bytes[bytes_read++];
421 vtx.dst_sel_y = bytes[bytes_read++];
422 vtx.dst_sel_z = bytes[bytes_read++];
423 vtx.dst_sel_w = bytes[bytes_read++];
424 vtx.use_const_fields = bytes[bytes_read++];
425 vtx.data_format = bytes[bytes_read++];
426 vtx.num_format_all = bytes[bytes_read++];
427 vtx.format_comp_all = bytes[bytes_read++];
428 vtx.srf_mode_all = bytes[bytes_read++];
429 vtx.offset = bytes[bytes_read++];
430 vtx.endian = bytes[bytes_read++];
431
432 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
433 fprintf(stderr, "Error adding vtx\n");
434 }
435 /* Use the Texture Cache */
436 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
437 return bytes_read;
438 }
439
440 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
441 unsigned char * bytes, unsigned num_bytes)
442 {
443 unsigned bytes_read = 0;
444 unsigned i, byte;
445 while (bytes_read < num_bytes) {
446 char inst_type = bytes[bytes_read++];
447 switch (inst_type) {
448 case 0:
449 bytes_read = r600_alu_from_byte_stream(ctx, bytes,
450 bytes_read);
451 break;
452 case 1:
453 bytes_read = r600_tex_from_byte_stream(ctx, bytes,
454 bytes_read);
455 break;
456 case 2:
457 bytes_read = r600_fc_from_byte_stream(ctx, bytes,
458 bytes_read);
459 break;
460 case 3:
461 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
462 for (i = 0; i < 2; i++) {
463 for (byte = 0 ; byte < 4; byte++) {
464 ctx->bc->cf_last->isa[i] |=
465 (bytes[bytes_read++] << (byte * 8));
466 }
467 }
468 break;
469
470 case 4:
471 bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
472 bytes_read);
473 break;
474 default:
475 /* XXX: Error here */
476 break;
477 }
478 }
479 }
480
481 /* End bytestream -> r600 shader functions*/
482
483 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
484 {
485 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
486 int j;
487
488 if (i->Instruction.NumDstRegs > 1) {
489 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
490 return -EINVAL;
491 }
492 if (i->Instruction.Predicate) {
493 R600_ERR("predicate unsupported\n");
494 return -EINVAL;
495 }
496 #if 0
497 if (i->Instruction.Label) {
498 R600_ERR("label unsupported\n");
499 return -EINVAL;
500 }
501 #endif
502 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
503 if (i->Src[j].Register.Dimension) {
504 R600_ERR("unsupported src %d (dimension %d)\n", j,
505 i->Src[j].Register.Dimension);
506 return -EINVAL;
507 }
508 }
509 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
510 if (i->Dst[j].Register.Dimension) {
511 R600_ERR("unsupported dst (dimension)\n");
512 return -EINVAL;
513 }
514 }
515 return 0;
516 }
517
518 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
519 {
520 int i, r;
521 struct r600_bytecode_alu alu;
522 int gpr = 0, base_chan = 0;
523 int ij_index = 0;
524
525 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
526 ij_index = 0;
527 if (ctx->shader->input[input].centroid)
528 ij_index++;
529 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
530 ij_index = 0;
531 /* if we have perspective add one */
532 if (ctx->input_perspective) {
533 ij_index++;
534 /* if we have perspective centroid */
535 if (ctx->input_centroid)
536 ij_index++;
537 }
538 if (ctx->shader->input[input].centroid)
539 ij_index++;
540 }
541
542 /* work out gpr and base_chan from index */
543 gpr = ij_index / 2;
544 base_chan = (2 * (ij_index % 2)) + 1;
545
546 for (i = 0; i < 8; i++) {
547 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
548
549 if (i < 4)
550 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
551 else
552 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
553
554 if ((i > 1) && (i < 6)) {
555 alu.dst.sel = ctx->shader->input[input].gpr;
556 alu.dst.write = 1;
557 }
558
559 alu.dst.chan = i % 4;
560
561 alu.src[0].sel = gpr;
562 alu.src[0].chan = (base_chan - (i % 2));
563
564 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
565
566 alu.bank_swizzle_force = SQ_ALU_VEC_210;
567 if ((i % 4) == 3)
568 alu.last = 1;
569 r = r600_bytecode_add_alu(ctx->bc, &alu);
570 if (r)
571 return r;
572 }
573 return 0;
574 }
575
576 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
577 {
578 int i, r;
579 struct r600_bytecode_alu alu;
580
581 for (i = 0; i < 4; i++) {
582 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
583
584 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
585
586 alu.dst.sel = ctx->shader->input[input].gpr;
587 alu.dst.write = 1;
588
589 alu.dst.chan = i;
590
591 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
592 alu.src[0].chan = i;
593
594 if (i == 3)
595 alu.last = 1;
596 r = r600_bytecode_add_alu(ctx->bc, &alu);
597 if (r)
598 return r;
599 }
600 return 0;
601 }
602
603 /*
604 * Special export handling in shaders
605 *
606 * shader export ARRAY_BASE for EXPORT_POS:
607 * 60 is position
608 * 61 is misc vector
609 * 62, 63 are clip distance vectors
610 *
611 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
612 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
613 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
614 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
615 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
616 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
617 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
618 * exclusive from render target index)
619 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
620 *
621 *
622 * shader export ARRAY_BASE for EXPORT_PIXEL:
623 * 0-7 CB targets
624 * 61 computed Z vector
625 *
626 * The use of the values exported in the computed Z vector are controlled
627 * by DB_SHADER_CONTROL:
628 * Z_EXPORT_ENABLE - Z as a float in RED
629 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
630 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
631 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
632 * DB_SOURCE_FORMAT - export control restrictions
633 *
634 */
635
636
637 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
638 static int r600_spi_sid(struct r600_shader_io * io)
639 {
640 int index, name = io->name;
641
642 /* These params are handled differently, they don't need
643 * semantic indices, so we'll use 0 for them.
644 */
645 if (name == TGSI_SEMANTIC_POSITION ||
646 name == TGSI_SEMANTIC_PSIZE ||
647 name == TGSI_SEMANTIC_FACE)
648 index = 0;
649 else {
650 if (name == TGSI_SEMANTIC_GENERIC) {
651 /* For generic params simply use sid from tgsi */
652 index = io->sid;
653 } else {
654 /* For non-generic params - pack name and sid into 8 bits */
655 index = 0x80 | (name<<3) | (io->sid);
656 }
657
658 /* Make sure that all really used indices have nonzero value, so
659 * we can just compare it to 0 later instead of comparing the name
660 * with different values to detect special cases. */
661 index++;
662 }
663
664 return index;
665 };
666
667 /* turn input into interpolate on EG */
668 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
669 {
670 int r = 0;
671
672 if (ctx->shader->input[index].spi_sid) {
673 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
674 if (ctx->shader->input[index].interpolate > 0) {
675 r = evergreen_interp_alu(ctx, index);
676 } else {
677 r = evergreen_interp_flat(ctx, index);
678 }
679 }
680 return r;
681 }
682
683 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
684 {
685 struct r600_bytecode_alu alu;
686 int i, r;
687 int gpr_front = ctx->shader->input[front].gpr;
688 int gpr_back = ctx->shader->input[back].gpr;
689
690 for (i = 0; i < 4; i++) {
691 memset(&alu, 0, sizeof(alu));
692 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
693 alu.is_op3 = 1;
694 alu.dst.write = 1;
695 alu.dst.sel = gpr_front;
696 alu.src[0].sel = ctx->face_gpr;
697 alu.src[1].sel = gpr_front;
698 alu.src[2].sel = gpr_back;
699
700 alu.dst.chan = i;
701 alu.src[1].chan = i;
702 alu.src[2].chan = i;
703 alu.last = (i==3);
704
705 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
706 return r;
707 }
708
709 return 0;
710 }
711
712 static int tgsi_declaration(struct r600_shader_ctx *ctx)
713 {
714 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
715 unsigned i;
716 int r;
717
718 switch (d->Declaration.File) {
719 case TGSI_FILE_INPUT:
720 i = ctx->shader->ninput++;
721 ctx->shader->input[i].name = d->Semantic.Name;
722 ctx->shader->input[i].sid = d->Semantic.Index;
723 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
724 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
725 ctx->shader->input[i].centroid = d->Declaration.Centroid;
726 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
727 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
728 switch (ctx->shader->input[i].name) {
729 case TGSI_SEMANTIC_FACE:
730 ctx->face_gpr = ctx->shader->input[i].gpr;
731 break;
732 case TGSI_SEMANTIC_COLOR:
733 ctx->colors_used++;
734 break;
735 case TGSI_SEMANTIC_POSITION:
736 ctx->fragcoord_input = i;
737 break;
738 }
739 if (ctx->bc->chip_class >= EVERGREEN) {
740 if ((r = evergreen_interp_input(ctx, i)))
741 return r;
742 }
743 }
744 break;
745 case TGSI_FILE_OUTPUT:
746 i = ctx->shader->noutput++;
747 ctx->shader->output[i].name = d->Semantic.Name;
748 ctx->shader->output[i].sid = d->Semantic.Index;
749 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
750 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
751 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
752 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
753 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
754 switch (d->Semantic.Name) {
755 case TGSI_SEMANTIC_CLIPDIST:
756 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
757 break;
758 case TGSI_SEMANTIC_PSIZE:
759 ctx->shader->vs_out_misc_write = 1;
760 ctx->shader->vs_out_point_size = 1;
761 break;
762 case TGSI_SEMANTIC_CLIPVERTEX:
763 ctx->clip_vertex_write = TRUE;
764 ctx->cv_output = i;
765 break;
766 }
767 }
768 break;
769 case TGSI_FILE_CONSTANT:
770 case TGSI_FILE_TEMPORARY:
771 case TGSI_FILE_SAMPLER:
772 case TGSI_FILE_ADDRESS:
773 break;
774
775 case TGSI_FILE_SYSTEM_VALUE:
776 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
777 if (!ctx->native_integers) {
778 struct r600_bytecode_alu alu;
779 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
780
781 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
782 alu.src[0].sel = 0;
783 alu.src[0].chan = 3;
784
785 alu.dst.sel = 0;
786 alu.dst.chan = 3;
787 alu.dst.write = 1;
788 alu.last = 1;
789
790 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
791 return r;
792 }
793 break;
794 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
795 break;
796 default:
797 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
798 return -EINVAL;
799 }
800 return 0;
801 }
802
803 static int r600_get_temp(struct r600_shader_ctx *ctx)
804 {
805 return ctx->temp_reg + ctx->max_driver_temp_used++;
806 }
807
808 /*
809 * for evergreen we need to scan the shader to find the number of GPRs we need to
810 * reserve for interpolation.
811 *
812 * we need to know if we are going to emit
813 * any centroid inputs
814 * if perspective and linear are required
815 */
816 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
817 {
818 int i;
819 int num_baryc;
820
821 ctx->input_linear = FALSE;
822 ctx->input_perspective = FALSE;
823 ctx->input_centroid = FALSE;
824 ctx->num_interp_gpr = 1;
825
826 /* any centroid inputs */
827 for (i = 0; i < ctx->info.num_inputs; i++) {
828 /* skip position/face */
829 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
830 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
831 continue;
832 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
833 ctx->input_linear = TRUE;
834 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
835 ctx->input_perspective = TRUE;
836 if (ctx->info.input_centroid[i])
837 ctx->input_centroid = TRUE;
838 }
839
840 num_baryc = 0;
841 /* ignoring sample for now */
842 if (ctx->input_perspective)
843 num_baryc++;
844 if (ctx->input_linear)
845 num_baryc++;
846 if (ctx->input_centroid)
847 num_baryc *= 2;
848
849 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
850
851 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
852 return ctx->num_interp_gpr;
853 }
854
855 static void tgsi_src(struct r600_shader_ctx *ctx,
856 const struct tgsi_full_src_register *tgsi_src,
857 struct r600_shader_src *r600_src)
858 {
859 memset(r600_src, 0, sizeof(*r600_src));
860 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
861 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
862 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
863 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
864 r600_src->neg = tgsi_src->Register.Negate;
865 r600_src->abs = tgsi_src->Register.Absolute;
866
867 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
868 int index;
869 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
870 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
871 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
872
873 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
874 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
875 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
876 return;
877 }
878 index = tgsi_src->Register.Index;
879 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
880 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
881 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
882 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
883 r600_src->swizzle[0] = 3;
884 r600_src->swizzle[1] = 3;
885 r600_src->swizzle[2] = 3;
886 r600_src->swizzle[3] = 3;
887 r600_src->sel = 0;
888 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
889 r600_src->swizzle[0] = 0;
890 r600_src->swizzle[1] = 0;
891 r600_src->swizzle[2] = 0;
892 r600_src->swizzle[3] = 0;
893 r600_src->sel = 0;
894 }
895 } else {
896 if (tgsi_src->Register.Indirect)
897 r600_src->rel = V_SQ_REL_RELATIVE;
898 r600_src->sel = tgsi_src->Register.Index;
899 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
900 }
901 }
902
903 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
904 {
905 struct r600_bytecode_vtx vtx;
906 unsigned int ar_reg;
907 int r;
908
909 if (offset) {
910 struct r600_bytecode_alu alu;
911
912 memset(&alu, 0, sizeof(alu));
913
914 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
915 alu.src[0].sel = ctx->bc->ar_reg;
916
917 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
918 alu.src[1].value = offset;
919
920 alu.dst.sel = dst_reg;
921 alu.dst.write = 1;
922 alu.last = 1;
923
924 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
925 return r;
926
927 ar_reg = dst_reg;
928 } else {
929 ar_reg = ctx->bc->ar_reg;
930 }
931
932 memset(&vtx, 0, sizeof(vtx));
933 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
934 vtx.src_gpr = ar_reg;
935 vtx.mega_fetch_count = 16;
936 vtx.dst_gpr = dst_reg;
937 vtx.dst_sel_x = 0; /* SEL_X */
938 vtx.dst_sel_y = 1; /* SEL_Y */
939 vtx.dst_sel_z = 2; /* SEL_Z */
940 vtx.dst_sel_w = 3; /* SEL_W */
941 vtx.data_format = FMT_32_32_32_32_FLOAT;
942 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
943 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
944 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
945 vtx.endian = r600_endian_swap(32);
946
947 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
948 return r;
949
950 return 0;
951 }
952
953 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
954 {
955 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
956 struct r600_bytecode_alu alu;
957 int i, j, k, nconst, r;
958
959 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
960 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
961 nconst++;
962 }
963 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
964 }
965 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
966 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
967 continue;
968 }
969
970 if (ctx->src[i].rel) {
971 int treg = r600_get_temp(ctx);
972 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
973 return r;
974
975 ctx->src[i].sel = treg;
976 ctx->src[i].rel = 0;
977 j--;
978 } else if (j > 0) {
979 int treg = r600_get_temp(ctx);
980 for (k = 0; k < 4; k++) {
981 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
982 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
983 alu.src[0].sel = ctx->src[i].sel;
984 alu.src[0].chan = k;
985 alu.src[0].rel = ctx->src[i].rel;
986 alu.dst.sel = treg;
987 alu.dst.chan = k;
988 alu.dst.write = 1;
989 if (k == 3)
990 alu.last = 1;
991 r = r600_bytecode_add_alu(ctx->bc, &alu);
992 if (r)
993 return r;
994 }
995 ctx->src[i].sel = treg;
996 ctx->src[i].rel =0;
997 j--;
998 }
999 }
1000 return 0;
1001 }
1002
1003 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1004 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1005 {
1006 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1007 struct r600_bytecode_alu alu;
1008 int i, j, k, nliteral, r;
1009
1010 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1011 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1012 nliteral++;
1013 }
1014 }
1015 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1016 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1017 int treg = r600_get_temp(ctx);
1018 for (k = 0; k < 4; k++) {
1019 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1020 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1021 alu.src[0].sel = ctx->src[i].sel;
1022 alu.src[0].chan = k;
1023 alu.src[0].value = ctx->src[i].value[k];
1024 alu.dst.sel = treg;
1025 alu.dst.chan = k;
1026 alu.dst.write = 1;
1027 if (k == 3)
1028 alu.last = 1;
1029 r = r600_bytecode_add_alu(ctx->bc, &alu);
1030 if (r)
1031 return r;
1032 }
1033 ctx->src[i].sel = treg;
1034 j--;
1035 }
1036 }
1037 return 0;
1038 }
1039
1040 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1041 {
1042 int i, r, count = ctx->shader->ninput;
1043
1044 /* additional inputs will be allocated right after the existing inputs,
1045 * we won't need them after the color selection, so we don't need to
1046 * reserve these gprs for the rest of the shader code and to adjust
1047 * output offsets etc. */
1048 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1049 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1050
1051 if (ctx->face_gpr == -1) {
1052 i = ctx->shader->ninput++;
1053 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1054 ctx->shader->input[i].spi_sid = 0;
1055 ctx->shader->input[i].gpr = gpr++;
1056 ctx->face_gpr = ctx->shader->input[i].gpr;
1057 }
1058
1059 for (i = 0; i < count; i++) {
1060 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1061 int ni = ctx->shader->ninput++;
1062 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1063 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1064 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1065 ctx->shader->input[ni].gpr = gpr++;
1066
1067 if (ctx->bc->chip_class >= EVERGREEN) {
1068 r = evergreen_interp_input(ctx, ni);
1069 if (r)
1070 return r;
1071 }
1072
1073 r = select_twoside_color(ctx, i, ni);
1074 if (r)
1075 return r;
1076 }
1077 }
1078 return 0;
1079 }
1080
1081 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1082 {
1083 struct r600_shader *shader = &pipeshader->shader;
1084 struct tgsi_token *tokens = pipeshader->tokens;
1085 struct pipe_stream_output_info so = pipeshader->so;
1086 struct tgsi_full_immediate *immediate;
1087 struct tgsi_full_property *property;
1088 struct r600_shader_ctx ctx;
1089 struct r600_bytecode_output output[32];
1090 unsigned output_done, noutput;
1091 unsigned opcode;
1092 int i, j, k, r = 0;
1093 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1094 /* Declarations used by llvm code */
1095 bool use_llvm = false;
1096 unsigned char * inst_bytes = NULL;
1097 unsigned inst_byte_count = 0;
1098
1099 #ifdef R600_USE_LLVM
1100 use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1101 #endif
1102 ctx.bc = &shader->bc;
1103 ctx.shader = shader;
1104 ctx.native_integers = (rctx->screen->glsl_feature_level >= 130);
1105
1106 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1107 ctx.tokens = tokens;
1108 tgsi_scan_shader(tokens, &ctx.info);
1109 tgsi_parse_init(&ctx.parse, tokens);
1110 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1111 shader->processor_type = ctx.type;
1112 ctx.bc->type = shader->processor_type;
1113
1114 ctx.face_gpr = -1;
1115 ctx.fragcoord_input = -1;
1116 ctx.colors_used = 0;
1117 ctx.clip_vertex_write = 0;
1118
1119 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1120 shader->nr_cbufs = rctx->nr_cbufs;
1121
1122 /* register allocations */
1123 /* Values [0,127] correspond to GPR[0..127].
1124 * Values [128,159] correspond to constant buffer bank 0
1125 * Values [160,191] correspond to constant buffer bank 1
1126 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1127 * Values [256,287] correspond to constant buffer bank 2 (EG)
1128 * Values [288,319] correspond to constant buffer bank 3 (EG)
1129 * Other special values are shown in the list below.
1130 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1131 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1132 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1133 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1134 * 248 SQ_ALU_SRC_0: special constant 0.0.
1135 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1136 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1137 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1138 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1139 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1140 * 254 SQ_ALU_SRC_PV: previous vector result.
1141 * 255 SQ_ALU_SRC_PS: previous scalar result.
1142 */
1143 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1144 ctx.file_offset[i] = 0;
1145 }
1146 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1147 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1148 if (ctx.bc->chip_class >= EVERGREEN) {
1149 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1150 } else {
1151 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1152 }
1153 }
1154 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1155 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1156 }
1157
1158 /* LLVM backend setup */
1159 #ifdef R600_USE_LLVM
1160 if (use_llvm && ctx.info.indirect_files) {
1161 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1162 "indirect adressing. Falling back to TGSI "
1163 "backend.\n");
1164 use_llvm = 0;
1165 }
1166 if (use_llvm) {
1167 struct radeon_llvm_context radeon_llvm_ctx;
1168 LLVMModuleRef mod;
1169 unsigned dump = 0;
1170 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1171 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1172 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1173 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1174 dump = 1;
1175 }
1176 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1177 rctx->family, dump)) {
1178 FREE(inst_bytes);
1179 radeon_llvm_dispose(&radeon_llvm_ctx);
1180 use_llvm = 0;
1181 fprintf(stderr, "R600 LLVM backend failed to compile "
1182 "shader. Falling back to TGSI\n");
1183 } else {
1184 ctx.file_offset[TGSI_FILE_OUTPUT] =
1185 ctx.file_offset[TGSI_FILE_INPUT];
1186 }
1187 radeon_llvm_dispose(&radeon_llvm_ctx);
1188 }
1189 #endif
1190 /* End of LLVM backend setup */
1191
1192 if (!use_llvm) {
1193 ctx.file_offset[TGSI_FILE_OUTPUT] =
1194 ctx.file_offset[TGSI_FILE_INPUT] +
1195 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1196 }
1197 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1198 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1199
1200 /* Outside the GPR range. This will be translated to one of the
1201 * kcache banks later. */
1202 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1203
1204 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1205 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1206 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1207 ctx.temp_reg = ctx.bc->ar_reg + 1;
1208
1209 ctx.nliterals = 0;
1210 ctx.literals = NULL;
1211 shader->fs_write_all = FALSE;
1212 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1213 tgsi_parse_token(&ctx.parse);
1214 switch (ctx.parse.FullToken.Token.Type) {
1215 case TGSI_TOKEN_TYPE_IMMEDIATE:
1216 immediate = &ctx.parse.FullToken.FullImmediate;
1217 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1218 if(ctx.literals == NULL) {
1219 r = -ENOMEM;
1220 goto out_err;
1221 }
1222 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1223 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1224 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1225 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1226 ctx.nliterals++;
1227 break;
1228 case TGSI_TOKEN_TYPE_DECLARATION:
1229 r = tgsi_declaration(&ctx);
1230 if (r)
1231 goto out_err;
1232 break;
1233 case TGSI_TOKEN_TYPE_INSTRUCTION:
1234 break;
1235 case TGSI_TOKEN_TYPE_PROPERTY:
1236 property = &ctx.parse.FullToken.FullProperty;
1237 switch (property->Property.PropertyName) {
1238 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1239 if (property->u[0].Data == 1)
1240 shader->fs_write_all = TRUE;
1241 break;
1242 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1243 if (property->u[0].Data == 1)
1244 shader->vs_prohibit_ucps = TRUE;
1245 break;
1246 }
1247 break;
1248 default:
1249 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1250 r = -EINVAL;
1251 goto out_err;
1252 }
1253 }
1254
1255 if (ctx.fragcoord_input >= 0) {
1256 if (ctx.bc->chip_class == CAYMAN) {
1257 for (j = 0 ; j < 4; j++) {
1258 struct r600_bytecode_alu alu;
1259 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1260 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1261 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1262 alu.src[0].chan = 3;
1263
1264 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1265 alu.dst.chan = j;
1266 alu.dst.write = (j == 3);
1267 alu.last = 1;
1268 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1269 return r;
1270 }
1271 } else {
1272 struct r600_bytecode_alu alu;
1273 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1274 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1275 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1276 alu.src[0].chan = 3;
1277
1278 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1279 alu.dst.chan = 3;
1280 alu.dst.write = 1;
1281 alu.last = 1;
1282 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1283 return r;
1284 }
1285 }
1286
1287 if (shader->two_side && ctx.colors_used) {
1288 if ((r = process_twoside_color_inputs(&ctx)))
1289 return r;
1290 }
1291
1292 tgsi_parse_init(&ctx.parse, tokens);
1293 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1294 tgsi_parse_token(&ctx.parse);
1295 switch (ctx.parse.FullToken.Token.Type) {
1296 case TGSI_TOKEN_TYPE_INSTRUCTION:
1297 if (use_llvm) {
1298 continue;
1299 }
1300 r = tgsi_is_supported(&ctx);
1301 if (r)
1302 goto out_err;
1303 ctx.max_driver_temp_used = 0;
1304 /* reserve first tmp for everyone */
1305 r600_get_temp(&ctx);
1306
1307 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1308 if ((r = tgsi_split_constant(&ctx)))
1309 goto out_err;
1310 if ((r = tgsi_split_literal_constant(&ctx)))
1311 goto out_err;
1312 if (ctx.bc->chip_class == CAYMAN)
1313 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1314 else if (ctx.bc->chip_class >= EVERGREEN)
1315 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1316 else
1317 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1318 r = ctx.inst_info->process(&ctx);
1319 if (r)
1320 goto out_err;
1321 break;
1322 default:
1323 break;
1324 }
1325 }
1326
1327 /* Get instructions if we are using the LLVM backend. */
1328 if (use_llvm) {
1329 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1330 FREE(inst_bytes);
1331 }
1332
1333 noutput = shader->noutput;
1334
1335 if (ctx.clip_vertex_write) {
1336 /* need to convert a clipvertex write into clipdistance writes and not export
1337 the clip vertex anymore */
1338
1339 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1340 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1341 shader->output[noutput].gpr = ctx.temp_reg;
1342 noutput++;
1343 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1344 shader->output[noutput].gpr = ctx.temp_reg+1;
1345 noutput++;
1346
1347 /* reset spi_sid for clipvertex output to avoid confusing spi */
1348 shader->output[ctx.cv_output].spi_sid = 0;
1349
1350 shader->clip_dist_write = 0xFF;
1351
1352 for (i = 0; i < 8; i++) {
1353 int oreg = i >> 2;
1354 int ochan = i & 3;
1355
1356 for (j = 0; j < 4; j++) {
1357 struct r600_bytecode_alu alu;
1358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1359 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1360 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1361 alu.src[0].chan = j;
1362
1363 alu.src[1].sel = 512 + i;
1364 alu.src[1].kc_bank = 1;
1365 alu.src[1].chan = j;
1366
1367 alu.dst.sel = ctx.temp_reg + oreg;
1368 alu.dst.chan = j;
1369 alu.dst.write = (j == ochan);
1370 if (j == 3)
1371 alu.last = 1;
1372 r = r600_bytecode_add_alu(ctx.bc, &alu);
1373 if (r)
1374 return r;
1375 }
1376 }
1377 }
1378
1379 /* Add stream outputs. */
1380 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1381 for (i = 0; i < so.num_outputs; i++) {
1382 struct r600_bytecode_output output;
1383
1384 if (so.output[i].output_buffer >= 4) {
1385 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1386 so.output[i].output_buffer);
1387 r = -EINVAL;
1388 goto out_err;
1389 }
1390 if (so.output[i].dst_offset < so.output[i].start_component) {
1391 R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1392 r = -EINVAL;
1393 goto out_err;
1394 }
1395
1396 memset(&output, 0, sizeof(struct r600_bytecode_output));
1397 output.gpr = shader->output[so.output[i].register_index].gpr;
1398 output.elem_size = 0;
1399 output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1400 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1401 output.burst_count = 1;
1402 output.barrier = 1;
1403 /* array_size is an upper limit for the burst_count
1404 * with MEM_STREAM instructions */
1405 output.array_size = 0xFFF;
1406 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1407 if (ctx.bc->chip_class >= EVERGREEN) {
1408 switch (so.output[i].output_buffer) {
1409 case 0:
1410 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1411 break;
1412 case 1:
1413 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1414 break;
1415 case 2:
1416 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1417 break;
1418 case 3:
1419 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1420 break;
1421 }
1422 } else {
1423 switch (so.output[i].output_buffer) {
1424 case 0:
1425 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1426 break;
1427 case 1:
1428 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1429 break;
1430 case 2:
1431 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1432 break;
1433 case 3:
1434 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1435 break;
1436 }
1437 }
1438 r = r600_bytecode_add_output(ctx.bc, &output);
1439 if (r)
1440 goto out_err;
1441 }
1442 }
1443
1444 /* export output */
1445 for (i = 0, j = 0; i < noutput; i++, j++) {
1446 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1447 output[j].gpr = shader->output[i].gpr;
1448 output[j].elem_size = 3;
1449 output[j].swizzle_x = 0;
1450 output[j].swizzle_y = 1;
1451 output[j].swizzle_z = 2;
1452 output[j].swizzle_w = 3;
1453 output[j].burst_count = 1;
1454 output[j].barrier = 1;
1455 output[j].type = -1;
1456 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1457 switch (ctx.type) {
1458 case TGSI_PROCESSOR_VERTEX:
1459 switch (shader->output[i].name) {
1460 case TGSI_SEMANTIC_POSITION:
1461 output[j].array_base = next_pos_base++;
1462 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1463 break;
1464
1465 case TGSI_SEMANTIC_PSIZE:
1466 output[j].array_base = next_pos_base++;
1467 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1468 break;
1469 case TGSI_SEMANTIC_CLIPVERTEX:
1470 j--;
1471 break;
1472 case TGSI_SEMANTIC_CLIPDIST:
1473 output[j].array_base = next_pos_base++;
1474 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1475 /* spi_sid is 0 for clipdistance outputs that were generated
1476 * for clipvertex - we don't need to pass them to PS */
1477 if (shader->output[i].spi_sid) {
1478 j++;
1479 /* duplicate it as PARAM to pass to the pixel shader */
1480 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1481 output[j].array_base = next_param_base++;
1482 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1483 }
1484 break;
1485 case TGSI_SEMANTIC_FOG:
1486 output[j].swizzle_y = 4; /* 0 */
1487 output[j].swizzle_z = 4; /* 0 */
1488 output[j].swizzle_w = 5; /* 1 */
1489 break;
1490 }
1491 break;
1492 case TGSI_PROCESSOR_FRAGMENT:
1493 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1494 output[j].array_base = next_pixel_base++;
1495 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1496 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1497 for (k = 1; k < shader->nr_cbufs; k++) {
1498 j++;
1499 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1500 output[j].gpr = shader->output[i].gpr;
1501 output[j].elem_size = 3;
1502 output[j].swizzle_x = 0;
1503 output[j].swizzle_y = 1;
1504 output[j].swizzle_z = 2;
1505 output[j].swizzle_w = 3;
1506 output[j].burst_count = 1;
1507 output[j].barrier = 1;
1508 output[j].array_base = next_pixel_base++;
1509 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1510 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1511 }
1512 }
1513 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1514 output[j].array_base = 61;
1515 output[j].swizzle_x = 2;
1516 output[j].swizzle_y = 7;
1517 output[j].swizzle_z = output[j].swizzle_w = 7;
1518 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1519 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1520 output[j].array_base = 61;
1521 output[j].swizzle_x = 7;
1522 output[j].swizzle_y = 1;
1523 output[j].swizzle_z = output[j].swizzle_w = 7;
1524 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1525 } else {
1526 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1527 r = -EINVAL;
1528 goto out_err;
1529 }
1530 break;
1531 default:
1532 R600_ERR("unsupported processor type %d\n", ctx.type);
1533 r = -EINVAL;
1534 goto out_err;
1535 }
1536
1537 if (output[j].type==-1) {
1538 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1539 output[j].array_base = next_param_base++;
1540 }
1541 }
1542
1543 /* add fake param output for vertex shader if no param is exported */
1544 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1545 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1546 output[j].gpr = 0;
1547 output[j].elem_size = 3;
1548 output[j].swizzle_x = 7;
1549 output[j].swizzle_y = 7;
1550 output[j].swizzle_z = 7;
1551 output[j].swizzle_w = 7;
1552 output[j].burst_count = 1;
1553 output[j].barrier = 1;
1554 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1555 output[j].array_base = 0;
1556 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1557 j++;
1558 }
1559
1560 /* add fake pixel export */
1561 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
1562 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1563 output[j].gpr = 0;
1564 output[j].elem_size = 3;
1565 output[j].swizzle_x = 7;
1566 output[j].swizzle_y = 7;
1567 output[j].swizzle_z = 7;
1568 output[j].swizzle_w = 7;
1569 output[j].burst_count = 1;
1570 output[j].barrier = 1;
1571 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1572 output[j].array_base = 0;
1573 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1574 j++;
1575 }
1576
1577 noutput = j;
1578
1579 /* set export done on last export of each type */
1580 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1581 if (ctx.bc->chip_class < CAYMAN) {
1582 if (i == (noutput - 1)) {
1583 output[i].end_of_program = 1;
1584 }
1585 }
1586 if (!(output_done & (1 << output[i].type))) {
1587 output_done |= (1 << output[i].type);
1588 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1589 }
1590 }
1591 /* add output to bytecode */
1592 for (i = 0; i < noutput; i++) {
1593 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1594 if (r)
1595 goto out_err;
1596 }
1597 /* add program end */
1598 if (ctx.bc->chip_class == CAYMAN)
1599 cm_bytecode_add_cf_end(ctx.bc);
1600
1601 /* check GPR limit - we have 124 = 128 - 4
1602 * (4 are reserved as alu clause temporary registers) */
1603 if (ctx.bc->ngpr > 124) {
1604 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1605 r = -ENOMEM;
1606 goto out_err;
1607 }
1608
1609 free(ctx.literals);
1610 tgsi_parse_free(&ctx.parse);
1611 return 0;
1612 out_err:
1613 free(ctx.literals);
1614 tgsi_parse_free(&ctx.parse);
1615 return r;
1616 }
1617
1618 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1619 {
1620 R600_ERR("%s tgsi opcode unsupported\n",
1621 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1622 return -EINVAL;
1623 }
1624
1625 static int tgsi_end(struct r600_shader_ctx *ctx)
1626 {
1627 return 0;
1628 }
1629
1630 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1631 const struct r600_shader_src *shader_src,
1632 unsigned chan)
1633 {
1634 bc_src->sel = shader_src->sel;
1635 bc_src->chan = shader_src->swizzle[chan];
1636 bc_src->neg = shader_src->neg;
1637 bc_src->abs = shader_src->abs;
1638 bc_src->rel = shader_src->rel;
1639 bc_src->value = shader_src->value[bc_src->chan];
1640 }
1641
1642 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1643 {
1644 bc_src->abs = 1;
1645 bc_src->neg = 0;
1646 }
1647
1648 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1649 {
1650 bc_src->neg = !bc_src->neg;
1651 }
1652
1653 static void tgsi_dst(struct r600_shader_ctx *ctx,
1654 const struct tgsi_full_dst_register *tgsi_dst,
1655 unsigned swizzle,
1656 struct r600_bytecode_alu_dst *r600_dst)
1657 {
1658 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1659
1660 r600_dst->sel = tgsi_dst->Register.Index;
1661 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1662 r600_dst->chan = swizzle;
1663 r600_dst->write = 1;
1664 if (tgsi_dst->Register.Indirect)
1665 r600_dst->rel = V_SQ_REL_RELATIVE;
1666 if (inst->Instruction.Saturate) {
1667 r600_dst->clamp = 1;
1668 }
1669 }
1670
1671 static int tgsi_last_instruction(unsigned writemask)
1672 {
1673 int i, lasti = 0;
1674
1675 for (i = 0; i < 4; i++) {
1676 if (writemask & (1 << i)) {
1677 lasti = i;
1678 }
1679 }
1680 return lasti;
1681 }
1682
1683 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1684 {
1685 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1686 struct r600_bytecode_alu alu;
1687 int i, j, r;
1688 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1689
1690 for (i = 0; i < lasti + 1; i++) {
1691 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1692 continue;
1693
1694 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1695 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1696
1697 alu.inst = ctx->inst_info->r600_opcode;
1698 if (!swap) {
1699 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1700 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1701 }
1702 } else {
1703 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1704 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1705 }
1706 /* handle some special cases */
1707 switch (ctx->inst_info->tgsi_opcode) {
1708 case TGSI_OPCODE_SUB:
1709 r600_bytecode_src_toggle_neg(&alu.src[1]);
1710 break;
1711 case TGSI_OPCODE_ABS:
1712 r600_bytecode_src_set_abs(&alu.src[0]);
1713 break;
1714 default:
1715 break;
1716 }
1717 if (i == lasti || trans_only) {
1718 alu.last = 1;
1719 }
1720 r = r600_bytecode_add_alu(ctx->bc, &alu);
1721 if (r)
1722 return r;
1723 }
1724 return 0;
1725 }
1726
1727 static int tgsi_op2(struct r600_shader_ctx *ctx)
1728 {
1729 return tgsi_op2_s(ctx, 0, 0);
1730 }
1731
1732 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1733 {
1734 return tgsi_op2_s(ctx, 1, 0);
1735 }
1736
1737 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1738 {
1739 return tgsi_op2_s(ctx, 0, 1);
1740 }
1741
1742 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1743 {
1744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1745 struct r600_bytecode_alu alu;
1746 int i, r;
1747 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1748
1749 for (i = 0; i < lasti + 1; i++) {
1750
1751 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1752 continue;
1753 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1754 alu.inst = ctx->inst_info->r600_opcode;
1755
1756 alu.src[0].sel = V_SQ_ALU_SRC_0;
1757
1758 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1759
1760 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1761
1762 if (i == lasti) {
1763 alu.last = 1;
1764 }
1765 r = r600_bytecode_add_alu(ctx->bc, &alu);
1766 if (r)
1767 return r;
1768 }
1769 return 0;
1770
1771 }
1772
1773 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1774 {
1775 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1776 int i, j, r;
1777 struct r600_bytecode_alu alu;
1778 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1779
1780 for (i = 0 ; i < last_slot; i++) {
1781 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1782 alu.inst = ctx->inst_info->r600_opcode;
1783 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1784 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1785 }
1786 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1787 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1788
1789 if (i == last_slot - 1)
1790 alu.last = 1;
1791 r = r600_bytecode_add_alu(ctx->bc, &alu);
1792 if (r)
1793 return r;
1794 }
1795 return 0;
1796 }
1797
1798 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1799 {
1800 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1801 int i, j, k, r;
1802 struct r600_bytecode_alu alu;
1803 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1804 for (k = 0; k < last_slot; k++) {
1805 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1806 continue;
1807
1808 for (i = 0 ; i < 4; i++) {
1809 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1810 alu.inst = ctx->inst_info->r600_opcode;
1811 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1812 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1813 }
1814 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1815 alu.dst.write = (i == k);
1816 if (i == 3)
1817 alu.last = 1;
1818 r = r600_bytecode_add_alu(ctx->bc, &alu);
1819 if (r)
1820 return r;
1821 }
1822 }
1823 return 0;
1824 }
1825
1826 /*
1827 * r600 - trunc to -PI..PI range
1828 * r700 - normalize by dividing by 2PI
1829 * see fdo bug 27901
1830 */
1831 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1832 {
1833 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1834 static float double_pi = 3.1415926535 * 2;
1835 static float neg_pi = -3.1415926535;
1836
1837 int r;
1838 struct r600_bytecode_alu alu;
1839
1840 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1842 alu.is_op3 = 1;
1843
1844 alu.dst.chan = 0;
1845 alu.dst.sel = ctx->temp_reg;
1846 alu.dst.write = 1;
1847
1848 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1849
1850 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1851 alu.src[1].chan = 0;
1852 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1853 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1854 alu.src[2].chan = 0;
1855 alu.last = 1;
1856 r = r600_bytecode_add_alu(ctx->bc, &alu);
1857 if (r)
1858 return r;
1859
1860 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1861 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1862
1863 alu.dst.chan = 0;
1864 alu.dst.sel = ctx->temp_reg;
1865 alu.dst.write = 1;
1866
1867 alu.src[0].sel = ctx->temp_reg;
1868 alu.src[0].chan = 0;
1869 alu.last = 1;
1870 r = r600_bytecode_add_alu(ctx->bc, &alu);
1871 if (r)
1872 return r;
1873
1874 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1876 alu.is_op3 = 1;
1877
1878 alu.dst.chan = 0;
1879 alu.dst.sel = ctx->temp_reg;
1880 alu.dst.write = 1;
1881
1882 alu.src[0].sel = ctx->temp_reg;
1883 alu.src[0].chan = 0;
1884
1885 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1886 alu.src[1].chan = 0;
1887 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1888 alu.src[2].chan = 0;
1889
1890 if (ctx->bc->chip_class == R600) {
1891 alu.src[1].value = *(uint32_t *)&double_pi;
1892 alu.src[2].value = *(uint32_t *)&neg_pi;
1893 } else {
1894 alu.src[1].sel = V_SQ_ALU_SRC_1;
1895 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1896 alu.src[2].neg = 1;
1897 }
1898
1899 alu.last = 1;
1900 r = r600_bytecode_add_alu(ctx->bc, &alu);
1901 if (r)
1902 return r;
1903 return 0;
1904 }
1905
1906 static int cayman_trig(struct r600_shader_ctx *ctx)
1907 {
1908 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1909 struct r600_bytecode_alu alu;
1910 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1911 int i, r;
1912
1913 r = tgsi_setup_trig(ctx);
1914 if (r)
1915 return r;
1916
1917
1918 for (i = 0; i < last_slot; i++) {
1919 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1920 alu.inst = ctx->inst_info->r600_opcode;
1921 alu.dst.chan = i;
1922
1923 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1924 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1925
1926 alu.src[0].sel = ctx->temp_reg;
1927 alu.src[0].chan = 0;
1928 if (i == last_slot - 1)
1929 alu.last = 1;
1930 r = r600_bytecode_add_alu(ctx->bc, &alu);
1931 if (r)
1932 return r;
1933 }
1934 return 0;
1935 }
1936
1937 static int tgsi_trig(struct r600_shader_ctx *ctx)
1938 {
1939 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1940 struct r600_bytecode_alu alu;
1941 int i, r;
1942 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1943
1944 r = tgsi_setup_trig(ctx);
1945 if (r)
1946 return r;
1947
1948 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1949 alu.inst = ctx->inst_info->r600_opcode;
1950 alu.dst.chan = 0;
1951 alu.dst.sel = ctx->temp_reg;
1952 alu.dst.write = 1;
1953
1954 alu.src[0].sel = ctx->temp_reg;
1955 alu.src[0].chan = 0;
1956 alu.last = 1;
1957 r = r600_bytecode_add_alu(ctx->bc, &alu);
1958 if (r)
1959 return r;
1960
1961 /* replicate result */
1962 for (i = 0; i < lasti + 1; i++) {
1963 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1964 continue;
1965
1966 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1967 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1968
1969 alu.src[0].sel = ctx->temp_reg;
1970 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1971 if (i == lasti)
1972 alu.last = 1;
1973 r = r600_bytecode_add_alu(ctx->bc, &alu);
1974 if (r)
1975 return r;
1976 }
1977 return 0;
1978 }
1979
1980 static int tgsi_scs(struct r600_shader_ctx *ctx)
1981 {
1982 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1983 struct r600_bytecode_alu alu;
1984 int i, r;
1985
1986 /* We'll only need the trig stuff if we are going to write to the
1987 * X or Y components of the destination vector.
1988 */
1989 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1990 r = tgsi_setup_trig(ctx);
1991 if (r)
1992 return r;
1993 }
1994
1995 /* dst.x = COS */
1996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1997 if (ctx->bc->chip_class == CAYMAN) {
1998 for (i = 0 ; i < 3; i++) {
1999 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2001 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2002
2003 if (i == 0)
2004 alu.dst.write = 1;
2005 else
2006 alu.dst.write = 0;
2007 alu.src[0].sel = ctx->temp_reg;
2008 alu.src[0].chan = 0;
2009 if (i == 2)
2010 alu.last = 1;
2011 r = r600_bytecode_add_alu(ctx->bc, &alu);
2012 if (r)
2013 return r;
2014 }
2015 } else {
2016 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2017 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2018 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2019
2020 alu.src[0].sel = ctx->temp_reg;
2021 alu.src[0].chan = 0;
2022 alu.last = 1;
2023 r = r600_bytecode_add_alu(ctx->bc, &alu);
2024 if (r)
2025 return r;
2026 }
2027 }
2028
2029 /* dst.y = SIN */
2030 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2031 if (ctx->bc->chip_class == CAYMAN) {
2032 for (i = 0 ; i < 3; i++) {
2033 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2035 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2036 if (i == 1)
2037 alu.dst.write = 1;
2038 else
2039 alu.dst.write = 0;
2040 alu.src[0].sel = ctx->temp_reg;
2041 alu.src[0].chan = 0;
2042 if (i == 2)
2043 alu.last = 1;
2044 r = r600_bytecode_add_alu(ctx->bc, &alu);
2045 if (r)
2046 return r;
2047 }
2048 } else {
2049 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2050 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2051 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2052
2053 alu.src[0].sel = ctx->temp_reg;
2054 alu.src[0].chan = 0;
2055 alu.last = 1;
2056 r = r600_bytecode_add_alu(ctx->bc, &alu);
2057 if (r)
2058 return r;
2059 }
2060 }
2061
2062 /* dst.z = 0.0; */
2063 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2064 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2065
2066 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2067
2068 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2069
2070 alu.src[0].sel = V_SQ_ALU_SRC_0;
2071 alu.src[0].chan = 0;
2072
2073 alu.last = 1;
2074
2075 r = r600_bytecode_add_alu(ctx->bc, &alu);
2076 if (r)
2077 return r;
2078 }
2079
2080 /* dst.w = 1.0; */
2081 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2082 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2083
2084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2085
2086 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2087
2088 alu.src[0].sel = V_SQ_ALU_SRC_1;
2089 alu.src[0].chan = 0;
2090
2091 alu.last = 1;
2092
2093 r = r600_bytecode_add_alu(ctx->bc, &alu);
2094 if (r)
2095 return r;
2096 }
2097
2098 return 0;
2099 }
2100
2101 static int tgsi_kill(struct r600_shader_ctx *ctx)
2102 {
2103 struct r600_bytecode_alu alu;
2104 int i, r;
2105
2106 for (i = 0; i < 4; i++) {
2107 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2108 alu.inst = ctx->inst_info->r600_opcode;
2109
2110 alu.dst.chan = i;
2111
2112 alu.src[0].sel = V_SQ_ALU_SRC_0;
2113
2114 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2115 alu.src[1].sel = V_SQ_ALU_SRC_1;
2116 alu.src[1].neg = 1;
2117 } else {
2118 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2119 }
2120 if (i == 3) {
2121 alu.last = 1;
2122 }
2123 r = r600_bytecode_add_alu(ctx->bc, &alu);
2124 if (r)
2125 return r;
2126 }
2127
2128 /* kill must be last in ALU */
2129 ctx->bc->force_add_cf = 1;
2130 ctx->shader->uses_kill = TRUE;
2131 return 0;
2132 }
2133
2134 static int tgsi_lit(struct r600_shader_ctx *ctx)
2135 {
2136 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2137 struct r600_bytecode_alu alu;
2138 int r;
2139
2140 /* tmp.x = max(src.y, 0.0) */
2141 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2142 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2143 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2144 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2145 alu.src[1].chan = 1;
2146
2147 alu.dst.sel = ctx->temp_reg;
2148 alu.dst.chan = 0;
2149 alu.dst.write = 1;
2150
2151 alu.last = 1;
2152 r = r600_bytecode_add_alu(ctx->bc, &alu);
2153 if (r)
2154 return r;
2155
2156 if (inst->Dst[0].Register.WriteMask & (1 << 2))
2157 {
2158 int chan;
2159 int sel;
2160 int i;
2161
2162 if (ctx->bc->chip_class == CAYMAN) {
2163 for (i = 0; i < 3; i++) {
2164 /* tmp.z = log(tmp.x) */
2165 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2166 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2167 alu.src[0].sel = ctx->temp_reg;
2168 alu.src[0].chan = 0;
2169 alu.dst.sel = ctx->temp_reg;
2170 alu.dst.chan = i;
2171 if (i == 2) {
2172 alu.dst.write = 1;
2173 alu.last = 1;
2174 } else
2175 alu.dst.write = 0;
2176
2177 r = r600_bytecode_add_alu(ctx->bc, &alu);
2178 if (r)
2179 return r;
2180 }
2181 } else {
2182 /* tmp.z = log(tmp.x) */
2183 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2185 alu.src[0].sel = ctx->temp_reg;
2186 alu.src[0].chan = 0;
2187 alu.dst.sel = ctx->temp_reg;
2188 alu.dst.chan = 2;
2189 alu.dst.write = 1;
2190 alu.last = 1;
2191 r = r600_bytecode_add_alu(ctx->bc, &alu);
2192 if (r)
2193 return r;
2194 }
2195
2196 chan = alu.dst.chan;
2197 sel = alu.dst.sel;
2198
2199 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2200 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2202 alu.src[0].sel = sel;
2203 alu.src[0].chan = chan;
2204 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2205 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2206 alu.dst.sel = ctx->temp_reg;
2207 alu.dst.chan = 0;
2208 alu.dst.write = 1;
2209 alu.is_op3 = 1;
2210 alu.last = 1;
2211 r = r600_bytecode_add_alu(ctx->bc, &alu);
2212 if (r)
2213 return r;
2214
2215 if (ctx->bc->chip_class == CAYMAN) {
2216 for (i = 0; i < 3; i++) {
2217 /* dst.z = exp(tmp.x) */
2218 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2220 alu.src[0].sel = ctx->temp_reg;
2221 alu.src[0].chan = 0;
2222 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2223 if (i == 2) {
2224 alu.dst.write = 1;
2225 alu.last = 1;
2226 } else
2227 alu.dst.write = 0;
2228 r = r600_bytecode_add_alu(ctx->bc, &alu);
2229 if (r)
2230 return r;
2231 }
2232 } else {
2233 /* dst.z = exp(tmp.x) */
2234 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2235 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2236 alu.src[0].sel = ctx->temp_reg;
2237 alu.src[0].chan = 0;
2238 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2239 alu.last = 1;
2240 r = r600_bytecode_add_alu(ctx->bc, &alu);
2241 if (r)
2242 return r;
2243 }
2244 }
2245
2246 /* dst.x, <- 1.0 */
2247 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2248 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2249 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
2250 alu.src[0].chan = 0;
2251 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2252 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2253 r = r600_bytecode_add_alu(ctx->bc, &alu);
2254 if (r)
2255 return r;
2256
2257 /* dst.y = max(src.x, 0.0) */
2258 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2260 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2261 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
2262 alu.src[1].chan = 0;
2263 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2264 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2265 r = r600_bytecode_add_alu(ctx->bc, &alu);
2266 if (r)
2267 return r;
2268
2269 /* dst.w, <- 1.0 */
2270 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2272 alu.src[0].sel = V_SQ_ALU_SRC_1;
2273 alu.src[0].chan = 0;
2274 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2275 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2276 alu.last = 1;
2277 r = r600_bytecode_add_alu(ctx->bc, &alu);
2278 if (r)
2279 return r;
2280
2281 return 0;
2282 }
2283
2284 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2285 {
2286 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2287 struct r600_bytecode_alu alu;
2288 int i, r;
2289
2290 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2291
2292 /* XXX:
2293 * For state trackers other than OpenGL, we'll want to use
2294 * _RECIPSQRT_IEEE instead.
2295 */
2296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2297
2298 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2299 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2300 r600_bytecode_src_set_abs(&alu.src[i]);
2301 }
2302 alu.dst.sel = ctx->temp_reg;
2303 alu.dst.write = 1;
2304 alu.last = 1;
2305 r = r600_bytecode_add_alu(ctx->bc, &alu);
2306 if (r)
2307 return r;
2308 /* replicate result */
2309 return tgsi_helper_tempx_replicate(ctx);
2310 }
2311
2312 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2313 {
2314 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2315 struct r600_bytecode_alu alu;
2316 int i, r;
2317
2318 for (i = 0; i < 4; i++) {
2319 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2320 alu.src[0].sel = ctx->temp_reg;
2321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2322 alu.dst.chan = i;
2323 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2324 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2325 if (i == 3)
2326 alu.last = 1;
2327 r = r600_bytecode_add_alu(ctx->bc, &alu);
2328 if (r)
2329 return r;
2330 }
2331 return 0;
2332 }
2333
2334 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2335 {
2336 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2337 struct r600_bytecode_alu alu;
2338 int i, r;
2339
2340 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2341 alu.inst = ctx->inst_info->r600_opcode;
2342 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2343 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2344 }
2345 alu.dst.sel = ctx->temp_reg;
2346 alu.dst.write = 1;
2347 alu.last = 1;
2348 r = r600_bytecode_add_alu(ctx->bc, &alu);
2349 if (r)
2350 return r;
2351 /* replicate result */
2352 return tgsi_helper_tempx_replicate(ctx);
2353 }
2354
2355 static int cayman_pow(struct r600_shader_ctx *ctx)
2356 {
2357 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2358 int i, r;
2359 struct r600_bytecode_alu alu;
2360 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2361
2362 for (i = 0; i < 3; i++) {
2363 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2364 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2365 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2366 alu.dst.sel = ctx->temp_reg;
2367 alu.dst.chan = i;
2368 alu.dst.write = 1;
2369 if (i == 2)
2370 alu.last = 1;
2371 r = r600_bytecode_add_alu(ctx->bc, &alu);
2372 if (r)
2373 return r;
2374 }
2375
2376 /* b * LOG2(a) */
2377 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2379 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2380 alu.src[1].sel = ctx->temp_reg;
2381 alu.dst.sel = ctx->temp_reg;
2382 alu.dst.write = 1;
2383 alu.last = 1;
2384 r = r600_bytecode_add_alu(ctx->bc, &alu);
2385 if (r)
2386 return r;
2387
2388 for (i = 0; i < last_slot; i++) {
2389 /* POW(a,b) = EXP2(b * LOG2(a))*/
2390 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2391 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2392 alu.src[0].sel = ctx->temp_reg;
2393
2394 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2395 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2396 if (i == last_slot - 1)
2397 alu.last = 1;
2398 r = r600_bytecode_add_alu(ctx->bc, &alu);
2399 if (r)
2400 return r;
2401 }
2402 return 0;
2403 }
2404
2405 static int tgsi_pow(struct r600_shader_ctx *ctx)
2406 {
2407 struct r600_bytecode_alu alu;
2408 int r;
2409
2410 /* LOG2(a) */
2411 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2412 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2413 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2414 alu.dst.sel = ctx->temp_reg;
2415 alu.dst.write = 1;
2416 alu.last = 1;
2417 r = r600_bytecode_add_alu(ctx->bc, &alu);
2418 if (r)
2419 return r;
2420 /* b * LOG2(a) */
2421 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2422 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2423 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2424 alu.src[1].sel = ctx->temp_reg;
2425 alu.dst.sel = ctx->temp_reg;
2426 alu.dst.write = 1;
2427 alu.last = 1;
2428 r = r600_bytecode_add_alu(ctx->bc, &alu);
2429 if (r)
2430 return r;
2431 /* POW(a,b) = EXP2(b * LOG2(a))*/
2432 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2434 alu.src[0].sel = ctx->temp_reg;
2435 alu.dst.sel = ctx->temp_reg;
2436 alu.dst.write = 1;
2437 alu.last = 1;
2438 r = r600_bytecode_add_alu(ctx->bc, &alu);
2439 if (r)
2440 return r;
2441 return tgsi_helper_tempx_replicate(ctx);
2442 }
2443
2444 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2445 {
2446 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2447 struct r600_bytecode_alu alu;
2448 int i, r, j;
2449 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2450 int tmp0 = ctx->temp_reg;
2451 int tmp1 = r600_get_temp(ctx);
2452 int tmp2 = r600_get_temp(ctx);
2453 int tmp3 = r600_get_temp(ctx);
2454 /* Unsigned path:
2455 *
2456 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2457 *
2458 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2459 * 2. tmp0.z = lo (tmp0.x * src2)
2460 * 3. tmp0.w = -tmp0.z
2461 * 4. tmp0.y = hi (tmp0.x * src2)
2462 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2463 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2464 * 7. tmp1.x = tmp0.x - tmp0.w
2465 * 8. tmp1.y = tmp0.x + tmp0.w
2466 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2467 * 10. tmp0.z = hi(tmp0.x * src1) = q
2468 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2469 *
2470 * 12. tmp0.w = src1 - tmp0.y = r
2471 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2472 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2473 *
2474 * if DIV
2475 *
2476 * 15. tmp1.z = tmp0.z + 1 = q + 1
2477 * 16. tmp1.w = tmp0.z - 1 = q - 1
2478 *
2479 * else MOD
2480 *
2481 * 15. tmp1.z = tmp0.w - src2 = r - src2
2482 * 16. tmp1.w = tmp0.w + src2 = r + src2
2483 *
2484 * endif
2485 *
2486 * 17. tmp1.x = tmp1.x & tmp1.y
2487 *
2488 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2489 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2490 *
2491 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2492 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2493 *
2494 * Signed path:
2495 *
2496 * Same as unsigned, using abs values of the operands,
2497 * and fixing the sign of the result in the end.
2498 */
2499
2500 for (i = 0; i < 4; i++) {
2501 if (!(write_mask & (1<<i)))
2502 continue;
2503
2504 if (signed_op) {
2505
2506 /* tmp2.x = -src0 */
2507 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2509
2510 alu.dst.sel = tmp2;
2511 alu.dst.chan = 0;
2512 alu.dst.write = 1;
2513
2514 alu.src[0].sel = V_SQ_ALU_SRC_0;
2515
2516 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2517
2518 alu.last = 1;
2519 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2520 return r;
2521
2522 /* tmp2.y = -src1 */
2523 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2524 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2525
2526 alu.dst.sel = tmp2;
2527 alu.dst.chan = 1;
2528 alu.dst.write = 1;
2529
2530 alu.src[0].sel = V_SQ_ALU_SRC_0;
2531
2532 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2533
2534 alu.last = 1;
2535 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2536 return r;
2537
2538 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2539 /* it will be a sign of the quotient */
2540 if (!mod) {
2541
2542 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2543 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2544
2545 alu.dst.sel = tmp2;
2546 alu.dst.chan = 2;
2547 alu.dst.write = 1;
2548
2549 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2550 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2551
2552 alu.last = 1;
2553 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2554 return r;
2555 }
2556
2557 /* tmp2.x = |src0| */
2558 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2559 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2560 alu.is_op3 = 1;
2561
2562 alu.dst.sel = tmp2;
2563 alu.dst.chan = 0;
2564 alu.dst.write = 1;
2565
2566 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2567 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2568 alu.src[2].sel = tmp2;
2569 alu.src[2].chan = 0;
2570
2571 alu.last = 1;
2572 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2573 return r;
2574
2575 /* tmp2.y = |src1| */
2576 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2577 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2578 alu.is_op3 = 1;
2579
2580 alu.dst.sel = tmp2;
2581 alu.dst.chan = 1;
2582 alu.dst.write = 1;
2583
2584 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2585 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2586 alu.src[2].sel = tmp2;
2587 alu.src[2].chan = 1;
2588
2589 alu.last = 1;
2590 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2591 return r;
2592
2593 }
2594
2595 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2596 if (ctx->bc->chip_class == CAYMAN) {
2597 /* tmp3.x = u2f(src2) */
2598 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2599 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2600
2601 alu.dst.sel = tmp3;
2602 alu.dst.chan = 0;
2603 alu.dst.write = 1;
2604
2605 if (signed_op) {
2606 alu.src[0].sel = tmp2;
2607 alu.src[0].chan = 1;
2608 } else {
2609 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2610 }
2611
2612 alu.last = 1;
2613 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2614 return r;
2615
2616 /* tmp0.x = recip(tmp3.x) */
2617 for (j = 0 ; j < 3; j++) {
2618 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2619 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2620
2621 alu.dst.sel = tmp0;
2622 alu.dst.chan = j;
2623 alu.dst.write = (j == 0);
2624
2625 alu.src[0].sel = tmp3;
2626 alu.src[0].chan = 0;
2627
2628 if (j == 2)
2629 alu.last = 1;
2630 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2631 return r;
2632 }
2633
2634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2636
2637 alu.src[0].sel = tmp0;
2638 alu.src[0].chan = 0;
2639
2640 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2641 alu.src[1].value = 0x4f800000;
2642
2643 alu.dst.sel = tmp3;
2644 alu.dst.write = 1;
2645 alu.last = 1;
2646 r = r600_bytecode_add_alu(ctx->bc, &alu);
2647 if (r)
2648 return r;
2649
2650 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2652
2653 alu.dst.sel = tmp0;
2654 alu.dst.chan = 0;
2655 alu.dst.write = 1;
2656
2657 alu.src[0].sel = tmp3;
2658 alu.src[0].chan = 0;
2659
2660 alu.last = 1;
2661 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2662 return r;
2663
2664 } else {
2665 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2666 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2667
2668 alu.dst.sel = tmp0;
2669 alu.dst.chan = 0;
2670 alu.dst.write = 1;
2671
2672 if (signed_op) {
2673 alu.src[0].sel = tmp2;
2674 alu.src[0].chan = 1;
2675 } else {
2676 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2677 }
2678
2679 alu.last = 1;
2680 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2681 return r;
2682 }
2683
2684 /* 2. tmp0.z = lo (tmp0.x * src2) */
2685 if (ctx->bc->chip_class == CAYMAN) {
2686 for (j = 0 ; j < 4; j++) {
2687 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2689
2690 alu.dst.sel = tmp0;
2691 alu.dst.chan = j;
2692 alu.dst.write = (j == 2);
2693
2694 alu.src[0].sel = tmp0;
2695 alu.src[0].chan = 0;
2696 if (signed_op) {
2697 alu.src[1].sel = tmp2;
2698 alu.src[1].chan = 1;
2699 } else {
2700 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2701 }
2702
2703 alu.last = (j == 3);
2704 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2705 return r;
2706 }
2707 } else {
2708 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2709 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2710
2711 alu.dst.sel = tmp0;
2712 alu.dst.chan = 2;
2713 alu.dst.write = 1;
2714
2715 alu.src[0].sel = tmp0;
2716 alu.src[0].chan = 0;
2717 if (signed_op) {
2718 alu.src[1].sel = tmp2;
2719 alu.src[1].chan = 1;
2720 } else {
2721 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2722 }
2723
2724 alu.last = 1;
2725 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2726 return r;
2727 }
2728
2729 /* 3. tmp0.w = -tmp0.z */
2730 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2731 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2732
2733 alu.dst.sel = tmp0;
2734 alu.dst.chan = 3;
2735 alu.dst.write = 1;
2736
2737 alu.src[0].sel = V_SQ_ALU_SRC_0;
2738 alu.src[1].sel = tmp0;
2739 alu.src[1].chan = 2;
2740
2741 alu.last = 1;
2742 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2743 return r;
2744
2745 /* 4. tmp0.y = hi (tmp0.x * src2) */
2746 if (ctx->bc->chip_class == CAYMAN) {
2747 for (j = 0 ; j < 4; j++) {
2748 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2749 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2750
2751 alu.dst.sel = tmp0;
2752 alu.dst.chan = j;
2753 alu.dst.write = (j == 1);
2754
2755 alu.src[0].sel = tmp0;
2756 alu.src[0].chan = 0;
2757
2758 if (signed_op) {
2759 alu.src[1].sel = tmp2;
2760 alu.src[1].chan = 1;
2761 } else {
2762 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2763 }
2764 alu.last = (j == 3);
2765 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2766 return r;
2767 }
2768 } else {
2769 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2770 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2771
2772 alu.dst.sel = tmp0;
2773 alu.dst.chan = 1;
2774 alu.dst.write = 1;
2775
2776 alu.src[0].sel = tmp0;
2777 alu.src[0].chan = 0;
2778
2779 if (signed_op) {
2780 alu.src[1].sel = tmp2;
2781 alu.src[1].chan = 1;
2782 } else {
2783 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2784 }
2785
2786 alu.last = 1;
2787 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2788 return r;
2789 }
2790
2791 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2792 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2793 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2794 alu.is_op3 = 1;
2795
2796 alu.dst.sel = tmp0;
2797 alu.dst.chan = 2;
2798 alu.dst.write = 1;
2799
2800 alu.src[0].sel = tmp0;
2801 alu.src[0].chan = 1;
2802 alu.src[1].sel = tmp0;
2803 alu.src[1].chan = 3;
2804 alu.src[2].sel = tmp0;
2805 alu.src[2].chan = 2;
2806
2807 alu.last = 1;
2808 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2809 return r;
2810
2811 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2812 if (ctx->bc->chip_class == CAYMAN) {
2813 for (j = 0 ; j < 4; j++) {
2814 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2815 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2816
2817 alu.dst.sel = tmp0;
2818 alu.dst.chan = j;
2819 alu.dst.write = (j == 3);
2820
2821 alu.src[0].sel = tmp0;
2822 alu.src[0].chan = 2;
2823
2824 alu.src[1].sel = tmp0;
2825 alu.src[1].chan = 0;
2826
2827 alu.last = (j == 3);
2828 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2829 return r;
2830 }
2831 } else {
2832 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2833 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2834
2835 alu.dst.sel = tmp0;
2836 alu.dst.chan = 3;
2837 alu.dst.write = 1;
2838
2839 alu.src[0].sel = tmp0;
2840 alu.src[0].chan = 2;
2841
2842 alu.src[1].sel = tmp0;
2843 alu.src[1].chan = 0;
2844
2845 alu.last = 1;
2846 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2847 return r;
2848 }
2849
2850 /* 7. tmp1.x = tmp0.x - tmp0.w */
2851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2853
2854 alu.dst.sel = tmp1;
2855 alu.dst.chan = 0;
2856 alu.dst.write = 1;
2857
2858 alu.src[0].sel = tmp0;
2859 alu.src[0].chan = 0;
2860 alu.src[1].sel = tmp0;
2861 alu.src[1].chan = 3;
2862
2863 alu.last = 1;
2864 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2865 return r;
2866
2867 /* 8. tmp1.y = tmp0.x + tmp0.w */
2868 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2869 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2870
2871 alu.dst.sel = tmp1;
2872 alu.dst.chan = 1;
2873 alu.dst.write = 1;
2874
2875 alu.src[0].sel = tmp0;
2876 alu.src[0].chan = 0;
2877 alu.src[1].sel = tmp0;
2878 alu.src[1].chan = 3;
2879
2880 alu.last = 1;
2881 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2882 return r;
2883
2884 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2885 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2886 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2887 alu.is_op3 = 1;
2888
2889 alu.dst.sel = tmp0;
2890 alu.dst.chan = 0;
2891 alu.dst.write = 1;
2892
2893 alu.src[0].sel = tmp0;
2894 alu.src[0].chan = 1;
2895 alu.src[1].sel = tmp1;
2896 alu.src[1].chan = 1;
2897 alu.src[2].sel = tmp1;
2898 alu.src[2].chan = 0;
2899
2900 alu.last = 1;
2901 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2902 return r;
2903
2904 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
2905 if (ctx->bc->chip_class == CAYMAN) {
2906 for (j = 0 ; j < 4; j++) {
2907 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2908 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2909
2910 alu.dst.sel = tmp0;
2911 alu.dst.chan = j;
2912 alu.dst.write = (j == 2);
2913
2914 alu.src[0].sel = tmp0;
2915 alu.src[0].chan = 0;
2916
2917 if (signed_op) {
2918 alu.src[1].sel = tmp2;
2919 alu.src[1].chan = 0;
2920 } else {
2921 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2922 }
2923
2924 alu.last = (j == 3);
2925 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2926 return r;
2927 }
2928 } else {
2929 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2930 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2931
2932 alu.dst.sel = tmp0;
2933 alu.dst.chan = 2;
2934 alu.dst.write = 1;
2935
2936 alu.src[0].sel = tmp0;
2937 alu.src[0].chan = 0;
2938
2939 if (signed_op) {
2940 alu.src[1].sel = tmp2;
2941 alu.src[1].chan = 0;
2942 } else {
2943 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2944 }
2945
2946 alu.last = 1;
2947 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2948 return r;
2949 }
2950
2951 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
2952 if (ctx->bc->chip_class == CAYMAN) {
2953 for (j = 0 ; j < 4; j++) {
2954 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2955 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2956
2957 alu.dst.sel = tmp0;
2958 alu.dst.chan = j;
2959 alu.dst.write = (j == 1);
2960
2961 if (signed_op) {
2962 alu.src[0].sel = tmp2;
2963 alu.src[0].chan = 1;
2964 } else {
2965 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2966 }
2967
2968 alu.src[1].sel = tmp0;
2969 alu.src[1].chan = 2;
2970
2971 alu.last = (j == 3);
2972 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2973 return r;
2974 }
2975 } else {
2976 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2977 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2978
2979 alu.dst.sel = tmp0;
2980 alu.dst.chan = 1;
2981 alu.dst.write = 1;
2982
2983 if (signed_op) {
2984 alu.src[0].sel = tmp2;
2985 alu.src[0].chan = 1;
2986 } else {
2987 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2988 }
2989
2990 alu.src[1].sel = tmp0;
2991 alu.src[1].chan = 2;
2992
2993 alu.last = 1;
2994 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2995 return r;
2996 }
2997
2998 /* 12. tmp0.w = src1 - tmp0.y = r */
2999 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3001
3002 alu.dst.sel = tmp0;
3003 alu.dst.chan = 3;
3004 alu.dst.write = 1;
3005
3006 if (signed_op) {
3007 alu.src[0].sel = tmp2;
3008 alu.src[0].chan = 0;
3009 } else {
3010 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3011 }
3012
3013 alu.src[1].sel = tmp0;
3014 alu.src[1].chan = 1;
3015
3016 alu.last = 1;
3017 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3018 return r;
3019
3020 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3021 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3022 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3023
3024 alu.dst.sel = tmp1;
3025 alu.dst.chan = 0;
3026 alu.dst.write = 1;
3027
3028 alu.src[0].sel = tmp0;
3029 alu.src[0].chan = 3;
3030 if (signed_op) {
3031 alu.src[1].sel = tmp2;
3032 alu.src[1].chan = 1;
3033 } else {
3034 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3035 }
3036
3037 alu.last = 1;
3038 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3039 return r;
3040
3041 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3042 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3043 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3044
3045 alu.dst.sel = tmp1;
3046 alu.dst.chan = 1;
3047 alu.dst.write = 1;
3048
3049 if (signed_op) {
3050 alu.src[0].sel = tmp2;
3051 alu.src[0].chan = 0;
3052 } else {
3053 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3054 }
3055
3056 alu.src[1].sel = tmp0;
3057 alu.src[1].chan = 1;
3058
3059 alu.last = 1;
3060 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3061 return r;
3062
3063 if (mod) { /* UMOD */
3064
3065 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
3066 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3067 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3068
3069 alu.dst.sel = tmp1;
3070 alu.dst.chan = 2;
3071 alu.dst.write = 1;
3072
3073 alu.src[0].sel = tmp0;
3074 alu.src[0].chan = 3;
3075
3076 if (signed_op) {
3077 alu.src[1].sel = tmp2;
3078 alu.src[1].chan = 1;
3079 } else {
3080 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3081 }
3082
3083 alu.last = 1;
3084 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3085 return r;
3086
3087 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
3088 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3090
3091 alu.dst.sel = tmp1;
3092 alu.dst.chan = 3;
3093 alu.dst.write = 1;
3094
3095 alu.src[0].sel = tmp0;
3096 alu.src[0].chan = 3;
3097 if (signed_op) {
3098 alu.src[1].sel = tmp2;
3099 alu.src[1].chan = 1;
3100 } else {
3101 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3102 }
3103
3104 alu.last = 1;
3105 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3106 return r;
3107
3108 } else { /* UDIV */
3109
3110 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
3111 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3113
3114 alu.dst.sel = tmp1;
3115 alu.dst.chan = 2;
3116 alu.dst.write = 1;
3117
3118 alu.src[0].sel = tmp0;
3119 alu.src[0].chan = 2;
3120 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3121
3122 alu.last = 1;
3123 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3124 return r;
3125
3126 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
3127 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3128 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3129
3130 alu.dst.sel = tmp1;
3131 alu.dst.chan = 3;
3132 alu.dst.write = 1;
3133
3134 alu.src[0].sel = tmp0;
3135 alu.src[0].chan = 2;
3136 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3137
3138 alu.last = 1;
3139 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3140 return r;
3141
3142 }
3143
3144 /* 17. tmp1.x = tmp1.x & tmp1.y */
3145 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3146 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3147
3148 alu.dst.sel = tmp1;
3149 alu.dst.chan = 0;
3150 alu.dst.write = 1;
3151
3152 alu.src[0].sel = tmp1;
3153 alu.src[0].chan = 0;
3154 alu.src[1].sel = tmp1;
3155 alu.src[1].chan = 1;
3156
3157 alu.last = 1;
3158 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3159 return r;
3160
3161 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
3162 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
3163 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3165 alu.is_op3 = 1;
3166
3167 alu.dst.sel = tmp0;
3168 alu.dst.chan = 2;
3169 alu.dst.write = 1;
3170
3171 alu.src[0].sel = tmp1;
3172 alu.src[0].chan = 0;
3173 alu.src[1].sel = tmp0;
3174 alu.src[1].chan = mod ? 3 : 2;
3175 alu.src[2].sel = tmp1;
3176 alu.src[2].chan = 2;
3177
3178 alu.last = 1;
3179 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3180 return r;
3181
3182 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3183 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3185 alu.is_op3 = 1;
3186
3187 if (signed_op) {
3188 alu.dst.sel = tmp0;
3189 alu.dst.chan = 2;
3190 alu.dst.write = 1;
3191 } else {
3192 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3193 }
3194
3195 alu.src[0].sel = tmp1;
3196 alu.src[0].chan = 1;
3197 alu.src[1].sel = tmp1;
3198 alu.src[1].chan = 3;
3199 alu.src[2].sel = tmp0;
3200 alu.src[2].chan = 2;
3201
3202 alu.last = 1;
3203 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3204 return r;
3205
3206 if (signed_op) {
3207
3208 /* fix the sign of the result */
3209
3210 if (mod) {
3211
3212 /* tmp0.x = -tmp0.z */
3213 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3214 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3215
3216 alu.dst.sel = tmp0;
3217 alu.dst.chan = 0;
3218 alu.dst.write = 1;
3219
3220 alu.src[0].sel = V_SQ_ALU_SRC_0;
3221 alu.src[1].sel = tmp0;
3222 alu.src[1].chan = 2;
3223
3224 alu.last = 1;
3225 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3226 return r;
3227
3228 /* sign of the remainder is the same as the sign of src0 */
3229 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3230 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3232 alu.is_op3 = 1;
3233
3234 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3235
3236 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3237 alu.src[1].sel = tmp0;
3238 alu.src[1].chan = 2;
3239 alu.src[2].sel = tmp0;
3240 alu.src[2].chan = 0;
3241
3242 alu.last = 1;
3243 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3244 return r;
3245
3246 } else {
3247
3248 /* tmp0.x = -tmp0.z */
3249 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3251
3252 alu.dst.sel = tmp0;
3253 alu.dst.chan = 0;
3254 alu.dst.write = 1;
3255
3256 alu.src[0].sel = V_SQ_ALU_SRC_0;
3257 alu.src[1].sel = tmp0;
3258 alu.src[1].chan = 2;
3259
3260 alu.last = 1;
3261 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3262 return r;
3263
3264 /* fix the quotient sign (same as the sign of src0*src1) */
3265 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3266 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3268 alu.is_op3 = 1;
3269
3270 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3271
3272 alu.src[0].sel = tmp2;
3273 alu.src[0].chan = 2;
3274 alu.src[1].sel = tmp0;
3275 alu.src[1].chan = 2;
3276 alu.src[2].sel = tmp0;
3277 alu.src[2].chan = 0;
3278
3279 alu.last = 1;
3280 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3281 return r;
3282 }
3283 }
3284 }
3285 return 0;
3286 }
3287
3288 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3289 {
3290 return tgsi_divmod(ctx, 0, 0);
3291 }
3292
3293 static int tgsi_umod(struct r600_shader_ctx *ctx)
3294 {
3295 return tgsi_divmod(ctx, 1, 0);
3296 }
3297
3298 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3299 {
3300 return tgsi_divmod(ctx, 0, 1);
3301 }
3302
3303 static int tgsi_imod(struct r600_shader_ctx *ctx)
3304 {
3305 return tgsi_divmod(ctx, 1, 1);
3306 }
3307
3308 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3309 {
3310 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3311 struct r600_bytecode_alu alu;
3312 int i, r;
3313 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3314 int last_inst = tgsi_last_instruction(write_mask);
3315
3316 /* tmp = -src */
3317 for (i = 0; i < 4; i++) {
3318 if (!(write_mask & (1<<i)))
3319 continue;
3320
3321 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3322 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3323
3324 alu.dst.sel = ctx->temp_reg;
3325 alu.dst.chan = i;
3326 alu.dst.write = 1;
3327
3328 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3329 alu.src[0].sel = V_SQ_ALU_SRC_0;
3330
3331 if (i == last_inst)
3332 alu.last = 1;
3333 r = r600_bytecode_add_alu(ctx->bc, &alu);
3334 if (r)
3335 return r;
3336 }
3337
3338 /* dst = (src >= 0 ? src : tmp) */
3339 for (i = 0; i < 4; i++) {
3340 if (!(write_mask & (1<<i)))
3341 continue;
3342
3343 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3345 alu.is_op3 = 1;
3346 alu.dst.write = 1;
3347
3348 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3349
3350 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3351 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3352 alu.src[2].sel = ctx->temp_reg;
3353 alu.src[2].chan = i;
3354
3355 if (i == last_inst)
3356 alu.last = 1;
3357 r = r600_bytecode_add_alu(ctx->bc, &alu);
3358 if (r)
3359 return r;
3360 }
3361 return 0;
3362 }
3363
3364 static int tgsi_issg(struct r600_shader_ctx *ctx)
3365 {
3366 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3367 struct r600_bytecode_alu alu;
3368 int i, r;
3369 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3370 int last_inst = tgsi_last_instruction(write_mask);
3371
3372 /* tmp = (src >= 0 ? src : -1) */
3373 for (i = 0; i < 4; i++) {
3374 if (!(write_mask & (1<<i)))
3375 continue;
3376
3377 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3379 alu.is_op3 = 1;
3380
3381 alu.dst.sel = ctx->temp_reg;
3382 alu.dst.chan = i;
3383 alu.dst.write = 1;
3384
3385 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3386 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3387 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3388
3389 if (i == last_inst)
3390 alu.last = 1;
3391 r = r600_bytecode_add_alu(ctx->bc, &alu);
3392 if (r)
3393 return r;
3394 }
3395
3396 /* dst = (tmp > 0 ? 1 : tmp) */
3397 for (i = 0; i < 4; i++) {
3398 if (!(write_mask & (1<<i)))
3399 continue;
3400
3401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3402 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3403 alu.is_op3 = 1;
3404 alu.dst.write = 1;
3405
3406 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3407
3408 alu.src[0].sel = ctx->temp_reg;
3409 alu.src[0].chan = i;
3410
3411 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3412
3413 alu.src[2].sel = ctx->temp_reg;
3414 alu.src[2].chan = i;
3415
3416 if (i == last_inst)
3417 alu.last = 1;
3418 r = r600_bytecode_add_alu(ctx->bc, &alu);
3419 if (r)
3420 return r;
3421 }
3422 return 0;
3423 }
3424
3425
3426
3427 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3428 {
3429 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3430 struct r600_bytecode_alu alu;
3431 int i, r;
3432
3433 /* tmp = (src > 0 ? 1 : src) */
3434 for (i = 0; i < 4; i++) {
3435 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3436 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3437 alu.is_op3 = 1;
3438
3439 alu.dst.sel = ctx->temp_reg;
3440 alu.dst.chan = i;
3441
3442 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3443 alu.src[1].sel = V_SQ_ALU_SRC_1;
3444 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3445
3446 if (i == 3)
3447 alu.last = 1;
3448 r = r600_bytecode_add_alu(ctx->bc, &alu);
3449 if (r)
3450 return r;
3451 }
3452
3453 /* dst = (-tmp > 0 ? -1 : tmp) */
3454 for (i = 0; i < 4; i++) {
3455 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3457 alu.is_op3 = 1;
3458 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3459
3460 alu.src[0].sel = ctx->temp_reg;
3461 alu.src[0].chan = i;
3462 alu.src[0].neg = 1;
3463
3464 alu.src[1].sel = V_SQ_ALU_SRC_1;
3465 alu.src[1].neg = 1;
3466
3467 alu.src[2].sel = ctx->temp_reg;
3468 alu.src[2].chan = i;
3469
3470 if (i == 3)
3471 alu.last = 1;
3472 r = r600_bytecode_add_alu(ctx->bc, &alu);
3473 if (r)
3474 return r;
3475 }
3476 return 0;
3477 }
3478
3479 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3480 {
3481 struct r600_bytecode_alu alu;
3482 int i, r;
3483
3484 for (i = 0; i < 4; i++) {
3485 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3486 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3487 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3488 alu.dst.chan = i;
3489 } else {
3490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3491 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3492 alu.src[0].sel = ctx->temp_reg;
3493 alu.src[0].chan = i;
3494 }
3495 if (i == 3) {
3496 alu.last = 1;
3497 }
3498 r = r600_bytecode_add_alu(ctx->bc, &alu);
3499 if (r)
3500 return r;
3501 }
3502 return 0;
3503 }
3504
3505 static int tgsi_op3(struct r600_shader_ctx *ctx)
3506 {
3507 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3508 struct r600_bytecode_alu alu;
3509 int i, j, r;
3510 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3511
3512 for (i = 0; i < lasti + 1; i++) {
3513 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3514 continue;
3515
3516 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3517 alu.inst = ctx->inst_info->r600_opcode;
3518 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3519 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3520 }
3521
3522 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3523 alu.dst.chan = i;
3524 alu.dst.write = 1;
3525 alu.is_op3 = 1;
3526 if (i == lasti) {
3527 alu.last = 1;
3528 }
3529 r = r600_bytecode_add_alu(ctx->bc, &alu);
3530 if (r)
3531 return r;
3532 }
3533 return 0;
3534 }
3535
3536 static int tgsi_dp(struct r600_shader_ctx *ctx)
3537 {
3538 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3539 struct r600_bytecode_alu alu;
3540 int i, j, r;
3541
3542 for (i = 0; i < 4; i++) {
3543 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3544 alu.inst = ctx->inst_info->r600_opcode;
3545 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3546 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3547 }
3548
3549 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3550 alu.dst.chan = i;
3551 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3552 /* handle some special cases */
3553 switch (ctx->inst_info->tgsi_opcode) {
3554 case TGSI_OPCODE_DP2:
3555 if (i > 1) {
3556 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3557 alu.src[0].chan = alu.src[1].chan = 0;
3558 }
3559 break;
3560 case TGSI_OPCODE_DP3:
3561 if (i > 2) {
3562 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3563 alu.src[0].chan = alu.src[1].chan = 0;
3564 }
3565 break;
3566 case TGSI_OPCODE_DPH:
3567 if (i == 3) {
3568 alu.src[0].sel = V_SQ_ALU_SRC_1;
3569 alu.src[0].chan = 0;
3570 alu.src[0].neg = 0;
3571 }
3572 break;
3573 default:
3574 break;
3575 }
3576 if (i == 3) {
3577 alu.last = 1;
3578 }
3579 r = r600_bytecode_add_alu(ctx->bc, &alu);
3580 if (r)
3581 return r;
3582 }
3583 return 0;
3584 }
3585
3586 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3587 unsigned index)
3588 {
3589 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3590 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3591 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3592 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3593 ctx->src[index].neg || ctx->src[index].abs;
3594 }
3595
3596 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3597 unsigned index)
3598 {
3599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3600 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3601 }
3602
3603 static int tgsi_tex(struct r600_shader_ctx *ctx)
3604 {
3605 static float one_point_five = 1.5f;
3606 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3607 struct r600_bytecode_tex tex;
3608 struct r600_bytecode_alu alu;
3609 unsigned src_gpr;
3610 int r, i, j;
3611 int opcode;
3612 /* Texture fetch instructions can only use gprs as source.
3613 * Also they cannot negate the source or take the absolute value */
3614 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
3615 boolean src_loaded = FALSE;
3616 unsigned sampler_src_reg = 1;
3617 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3618
3619 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3620
3621 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3622 /* get offset values */
3623 if (inst->Texture.NumOffsets) {
3624 assert(inst->Texture.NumOffsets == 1);
3625
3626 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3627 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3628 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3629 }
3630 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3631 /* TGSI moves the sampler to src reg 3 for TXD */
3632 sampler_src_reg = 3;
3633
3634 for (i = 1; i < 3; i++) {
3635 /* set gradients h/v */
3636 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3637 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3638 SQ_TEX_INST_SET_GRADIENTS_V;
3639 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3640 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3641
3642 if (tgsi_tex_src_requires_loading(ctx, i)) {
3643 tex.src_gpr = r600_get_temp(ctx);
3644 tex.src_sel_x = 0;
3645 tex.src_sel_y = 1;
3646 tex.src_sel_z = 2;
3647 tex.src_sel_w = 3;
3648
3649 for (j = 0; j < 4; j++) {
3650 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3652 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3653 alu.dst.sel = tex.src_gpr;
3654 alu.dst.chan = j;
3655 if (j == 3)
3656 alu.last = 1;
3657 alu.dst.write = 1;
3658 r = r600_bytecode_add_alu(ctx->bc, &alu);
3659 if (r)
3660 return r;
3661 }
3662
3663 } else {
3664 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3665 tex.src_sel_x = ctx->src[i].swizzle[0];
3666 tex.src_sel_y = ctx->src[i].swizzle[1];
3667 tex.src_sel_z = ctx->src[i].swizzle[2];
3668 tex.src_sel_w = ctx->src[i].swizzle[3];
3669 tex.src_rel = ctx->src[i].rel;
3670 }
3671 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3672 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3673 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3674 tex.coord_type_x = 1;
3675 tex.coord_type_y = 1;
3676 tex.coord_type_z = 1;
3677 tex.coord_type_w = 1;
3678 }
3679 r = r600_bytecode_add_tex(ctx->bc, &tex);
3680 if (r)
3681 return r;
3682 }
3683 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3684 int out_chan;
3685 /* Add perspective divide */
3686 if (ctx->bc->chip_class == CAYMAN) {
3687 out_chan = 2;
3688 for (i = 0; i < 3; i++) {
3689 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3690 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3691 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3692
3693 alu.dst.sel = ctx->temp_reg;
3694 alu.dst.chan = i;
3695 if (i == 2)
3696 alu.last = 1;
3697 if (out_chan == i)
3698 alu.dst.write = 1;
3699 r = r600_bytecode_add_alu(ctx->bc, &alu);
3700 if (r)
3701 return r;
3702 }
3703
3704 } else {
3705 out_chan = 3;
3706 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3707 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3708 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3709
3710 alu.dst.sel = ctx->temp_reg;
3711 alu.dst.chan = out_chan;
3712 alu.last = 1;
3713 alu.dst.write = 1;
3714 r = r600_bytecode_add_alu(ctx->bc, &alu);
3715 if (r)
3716 return r;
3717 }
3718
3719 for (i = 0; i < 3; i++) {
3720 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3721 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3722 alu.src[0].sel = ctx->temp_reg;
3723 alu.src[0].chan = out_chan;
3724 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3725 alu.dst.sel = ctx->temp_reg;
3726 alu.dst.chan = i;
3727 alu.dst.write = 1;
3728 r = r600_bytecode_add_alu(ctx->bc, &alu);
3729 if (r)
3730 return r;
3731 }
3732 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3734 alu.src[0].sel = V_SQ_ALU_SRC_1;
3735 alu.src[0].chan = 0;
3736 alu.dst.sel = ctx->temp_reg;
3737 alu.dst.chan = 3;
3738 alu.last = 1;
3739 alu.dst.write = 1;
3740 r = r600_bytecode_add_alu(ctx->bc, &alu);
3741 if (r)
3742 return r;
3743 src_loaded = TRUE;
3744 src_gpr = ctx->temp_reg;
3745 }
3746
3747 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3748 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3749 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
3750
3751 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3752 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3753
3754 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3755 for (i = 0; i < 4; i++) {
3756 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3757 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3758 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3759 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3760 alu.dst.sel = ctx->temp_reg;
3761 alu.dst.chan = i;
3762 if (i == 3)
3763 alu.last = 1;
3764 alu.dst.write = 1;
3765 r = r600_bytecode_add_alu(ctx->bc, &alu);
3766 if (r)
3767 return r;
3768 }
3769
3770 /* tmp1.z = RCP_e(|tmp1.z|) */
3771 if (ctx->bc->chip_class == CAYMAN) {
3772 for (i = 0; i < 3; i++) {
3773 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3774 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3775 alu.src[0].sel = ctx->temp_reg;
3776 alu.src[0].chan = 2;
3777 alu.src[0].abs = 1;
3778 alu.dst.sel = ctx->temp_reg;
3779 alu.dst.chan = i;
3780 if (i == 2)
3781 alu.dst.write = 1;
3782 if (i == 2)
3783 alu.last = 1;
3784 r = r600_bytecode_add_alu(ctx->bc, &alu);
3785 if (r)
3786 return r;
3787 }
3788 } else {
3789 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3790 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3791 alu.src[0].sel = ctx->temp_reg;
3792 alu.src[0].chan = 2;
3793 alu.src[0].abs = 1;
3794 alu.dst.sel = ctx->temp_reg;
3795 alu.dst.chan = 2;
3796 alu.dst.write = 1;
3797 alu.last = 1;
3798 r = r600_bytecode_add_alu(ctx->bc, &alu);
3799 if (r)
3800 return r;
3801 }
3802
3803 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3804 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3805 * muladd has no writemask, have to use another temp
3806 */
3807 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3809 alu.is_op3 = 1;
3810
3811 alu.src[0].sel = ctx->temp_reg;
3812 alu.src[0].chan = 0;
3813 alu.src[1].sel = ctx->temp_reg;
3814 alu.src[1].chan = 2;
3815
3816 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3817 alu.src[2].chan = 0;
3818 alu.src[2].value = *(uint32_t *)&one_point_five;
3819
3820 alu.dst.sel = ctx->temp_reg;
3821 alu.dst.chan = 0;
3822 alu.dst.write = 1;
3823
3824 r = r600_bytecode_add_alu(ctx->bc, &alu);
3825 if (r)
3826 return r;
3827
3828 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3829 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3830 alu.is_op3 = 1;
3831
3832 alu.src[0].sel = ctx->temp_reg;
3833 alu.src[0].chan = 1;
3834 alu.src[1].sel = ctx->temp_reg;
3835 alu.src[1].chan = 2;
3836
3837 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3838 alu.src[2].chan = 0;
3839 alu.src[2].value = *(uint32_t *)&one_point_five;
3840
3841 alu.dst.sel = ctx->temp_reg;
3842 alu.dst.chan = 1;
3843 alu.dst.write = 1;
3844
3845 alu.last = 1;
3846 r = r600_bytecode_add_alu(ctx->bc, &alu);
3847 if (r)
3848 return r;
3849 /* write initial W value into Z component */
3850 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
3851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3853 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3854 alu.dst.sel = ctx->temp_reg;
3855 alu.dst.chan = 2;
3856 alu.dst.write = 1;
3857 alu.last = 1;
3858 r = r600_bytecode_add_alu(ctx->bc, &alu);
3859 if (r)
3860 return r;
3861 }
3862 src_loaded = TRUE;
3863 src_gpr = ctx->temp_reg;
3864 }
3865
3866 if (src_requires_loading && !src_loaded) {
3867 for (i = 0; i < 4; i++) {
3868 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3869 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3870 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3871 alu.dst.sel = ctx->temp_reg;
3872 alu.dst.chan = i;
3873 if (i == 3)
3874 alu.last = 1;
3875 alu.dst.write = 1;
3876 r = r600_bytecode_add_alu(ctx->bc, &alu);
3877 if (r)
3878 return r;
3879 }
3880 src_loaded = TRUE;
3881 src_gpr = ctx->temp_reg;
3882 }
3883
3884 opcode = ctx->inst_info->r600_opcode;
3885 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
3886 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
3887 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
3888 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
3889 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
3890 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
3891 switch (opcode) {
3892 case SQ_TEX_INST_SAMPLE:
3893 opcode = SQ_TEX_INST_SAMPLE_C;
3894 break;
3895 case SQ_TEX_INST_SAMPLE_L:
3896 opcode = SQ_TEX_INST_SAMPLE_C_L;
3897 break;
3898 case SQ_TEX_INST_SAMPLE_LB:
3899 opcode = SQ_TEX_INST_SAMPLE_C_LB;
3900 break;
3901 case SQ_TEX_INST_SAMPLE_G:
3902 opcode = SQ_TEX_INST_SAMPLE_C_G;
3903 break;
3904 }
3905 }
3906
3907 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3908 tex.inst = opcode;
3909
3910 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3911 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3912 tex.src_gpr = src_gpr;
3913 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
3914 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
3915 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
3916 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
3917 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
3918 if (src_loaded) {
3919 tex.src_sel_x = 0;
3920 tex.src_sel_y = 1;
3921 tex.src_sel_z = 2;
3922 tex.src_sel_w = 3;
3923 } else {
3924 tex.src_sel_x = ctx->src[0].swizzle[0];
3925 tex.src_sel_y = ctx->src[0].swizzle[1];
3926 tex.src_sel_z = ctx->src[0].swizzle[2];
3927 tex.src_sel_w = ctx->src[0].swizzle[3];
3928 tex.src_rel = ctx->src[0].rel;
3929 }
3930
3931 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
3932 tex.src_sel_x = 1;
3933 tex.src_sel_y = 0;
3934 tex.src_sel_z = 3;
3935 tex.src_sel_w = 1;
3936 }
3937 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
3938 tex.src_sel_x = 1;
3939 tex.src_sel_y = 0;
3940 tex.src_sel_z = 3;
3941 tex.src_sel_w = 2; /* route Z compare value into W */
3942 }
3943
3944 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
3945 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
3946 tex.coord_type_x = 1;
3947 tex.coord_type_y = 1;
3948 }
3949 tex.coord_type_z = 1;
3950 tex.coord_type_w = 1;
3951
3952 tex.offset_x = offset_x;
3953 tex.offset_y = offset_y;
3954 tex.offset_z = offset_z;
3955
3956 /* Put the depth for comparison in W.
3957 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
3958 * Some instructions expect the depth in Z. */
3959 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
3960 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
3961 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
3962 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
3963 opcode != SQ_TEX_INST_SAMPLE_C_L &&
3964 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
3965 tex.src_sel_w = tex.src_sel_z;
3966 }
3967
3968 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
3969 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
3970 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
3971 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
3972 /* the array index is read from Y */
3973 tex.coord_type_y = 0;
3974 } else {
3975 /* the array index is read from Z */
3976 tex.coord_type_z = 0;
3977 tex.src_sel_z = tex.src_sel_y;
3978 }
3979 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
3980 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
3981 /* the array index is read from Z */
3982 tex.coord_type_z = 0;
3983
3984 r = r600_bytecode_add_tex(ctx->bc, &tex);
3985 if (r)
3986 return r;
3987
3988 /* add shadow ambient support - gallium doesn't do it yet */
3989 return 0;
3990 }
3991
3992 static int tgsi_lrp(struct r600_shader_ctx *ctx)
3993 {
3994 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3995 struct r600_bytecode_alu alu;
3996 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3997 unsigned i;
3998 int r;
3999
4000 /* optimize if it's just an equal balance */
4001 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4002 for (i = 0; i < lasti + 1; i++) {
4003 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4004 continue;
4005
4006 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4007 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4008 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4009 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4010 alu.omod = 3;
4011 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4012 alu.dst.chan = i;
4013 if (i == lasti) {
4014 alu.last = 1;
4015 }
4016 r = r600_bytecode_add_alu(ctx->bc, &alu);
4017 if (r)
4018 return r;
4019 }
4020 return 0;
4021 }
4022
4023 /* 1 - src0 */
4024 for (i = 0; i < lasti + 1; i++) {
4025 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4026 continue;
4027
4028 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4030 alu.src[0].sel = V_SQ_ALU_SRC_1;
4031 alu.src[0].chan = 0;
4032 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4033 r600_bytecode_src_toggle_neg(&alu.src[1]);
4034 alu.dst.sel = ctx->temp_reg;
4035 alu.dst.chan = i;
4036 if (i == lasti) {
4037 alu.last = 1;
4038 }
4039 alu.dst.write = 1;
4040 r = r600_bytecode_add_alu(ctx->bc, &alu);
4041 if (r)
4042 return r;
4043 }
4044
4045 /* (1 - src0) * src2 */
4046 for (i = 0; i < lasti + 1; i++) {
4047 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4048 continue;
4049
4050 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4051 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4052 alu.src[0].sel = ctx->temp_reg;
4053 alu.src[0].chan = i;
4054 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4055 alu.dst.sel = ctx->temp_reg;
4056 alu.dst.chan = i;
4057 if (i == lasti) {
4058 alu.last = 1;
4059 }
4060 alu.dst.write = 1;
4061 r = r600_bytecode_add_alu(ctx->bc, &alu);
4062 if (r)
4063 return r;
4064 }
4065
4066 /* src0 * src1 + (1 - src0) * src2 */
4067 for (i = 0; i < lasti + 1; i++) {
4068 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4069 continue;
4070
4071 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4073 alu.is_op3 = 1;
4074 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4075 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4076 alu.src[2].sel = ctx->temp_reg;
4077 alu.src[2].chan = i;
4078
4079 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4080 alu.dst.chan = i;
4081 if (i == lasti) {
4082 alu.last = 1;
4083 }
4084 r = r600_bytecode_add_alu(ctx->bc, &alu);
4085 if (r)
4086 return r;
4087 }
4088 return 0;
4089 }
4090
4091 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4092 {
4093 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4094 struct r600_bytecode_alu alu;
4095 int i, r;
4096 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4097
4098 for (i = 0; i < lasti + 1; i++) {
4099 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4100 continue;
4101
4102 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4104 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4105 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4106 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4107 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4108 alu.dst.chan = i;
4109 alu.dst.write = 1;
4110 alu.is_op3 = 1;
4111 if (i == lasti)
4112 alu.last = 1;
4113 r = r600_bytecode_add_alu(ctx->bc, &alu);
4114 if (r)
4115 return r;
4116 }
4117 return 0;
4118 }
4119
4120 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4121 {
4122 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4123 static const unsigned int src0_swizzle[] = {2, 0, 1};
4124 static const unsigned int src1_swizzle[] = {1, 2, 0};
4125 struct r600_bytecode_alu alu;
4126 uint32_t use_temp = 0;
4127 int i, r;
4128
4129 if (inst->Dst[0].Register.WriteMask != 0xf)
4130 use_temp = 1;
4131
4132 for (i = 0; i < 4; i++) {
4133 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4135 if (i < 3) {
4136 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4137 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4138 } else {
4139 alu.src[0].sel = V_SQ_ALU_SRC_0;
4140 alu.src[0].chan = i;
4141 alu.src[1].sel = V_SQ_ALU_SRC_0;
4142 alu.src[1].chan = i;
4143 }
4144
4145 alu.dst.sel = ctx->temp_reg;
4146 alu.dst.chan = i;
4147 alu.dst.write = 1;
4148
4149 if (i == 3)
4150 alu.last = 1;
4151 r = r600_bytecode_add_alu(ctx->bc, &alu);
4152 if (r)
4153 return r;
4154 }
4155
4156 for (i = 0; i < 4; i++) {
4157 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4158 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4159
4160 if (i < 3) {
4161 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4162 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4163 } else {
4164 alu.src[0].sel = V_SQ_ALU_SRC_0;
4165 alu.src[0].chan = i;
4166 alu.src[1].sel = V_SQ_ALU_SRC_0;
4167 alu.src[1].chan = i;
4168 }
4169
4170 alu.src[2].sel = ctx->temp_reg;
4171 alu.src[2].neg = 1;
4172 alu.src[2].chan = i;
4173
4174 if (use_temp)
4175 alu.dst.sel = ctx->temp_reg;
4176 else
4177 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4178 alu.dst.chan = i;
4179 alu.dst.write = 1;
4180 alu.is_op3 = 1;
4181 if (i == 3)
4182 alu.last = 1;
4183 r = r600_bytecode_add_alu(ctx->bc, &alu);
4184 if (r)
4185 return r;
4186 }
4187 if (use_temp)
4188 return tgsi_helper_copy(ctx, inst);
4189 return 0;
4190 }
4191
4192 static int tgsi_exp(struct r600_shader_ctx *ctx)
4193 {
4194 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4195 struct r600_bytecode_alu alu;
4196 int r;
4197 int i;
4198
4199 /* result.x = 2^floor(src); */
4200 if (inst->Dst[0].Register.WriteMask & 1) {
4201 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4202
4203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4204 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4205
4206 alu.dst.sel = ctx->temp_reg;
4207 alu.dst.chan = 0;
4208 alu.dst.write = 1;
4209 alu.last = 1;
4210 r = r600_bytecode_add_alu(ctx->bc, &alu);
4211 if (r)
4212 return r;
4213
4214 if (ctx->bc->chip_class == CAYMAN) {
4215 for (i = 0; i < 3; i++) {
4216 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4217 alu.src[0].sel = ctx->temp_reg;
4218 alu.src[0].chan = 0;
4219
4220 alu.dst.sel = ctx->temp_reg;
4221 alu.dst.chan = i;
4222 if (i == 0)
4223 alu.dst.write = 1;
4224 if (i == 2)
4225 alu.last = 1;
4226 r = r600_bytecode_add_alu(ctx->bc, &alu);
4227 if (r)
4228 return r;
4229 }
4230 } else {
4231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4232 alu.src[0].sel = ctx->temp_reg;
4233 alu.src[0].chan = 0;
4234
4235 alu.dst.sel = ctx->temp_reg;
4236 alu.dst.chan = 0;
4237 alu.dst.write = 1;
4238 alu.last = 1;
4239 r = r600_bytecode_add_alu(ctx->bc, &alu);
4240 if (r)
4241 return r;
4242 }
4243 }
4244
4245 /* result.y = tmp - floor(tmp); */
4246 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4247 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4248
4249 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4250 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4251
4252 alu.dst.sel = ctx->temp_reg;
4253 #if 0
4254 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4255 if (r)
4256 return r;
4257 #endif
4258 alu.dst.write = 1;
4259 alu.dst.chan = 1;
4260
4261 alu.last = 1;
4262
4263 r = r600_bytecode_add_alu(ctx->bc, &alu);
4264 if (r)
4265 return r;
4266 }
4267
4268 /* result.z = RoughApprox2ToX(tmp);*/
4269 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4270 if (ctx->bc->chip_class == CAYMAN) {
4271 for (i = 0; i < 3; i++) {
4272 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4274 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4275
4276 alu.dst.sel = ctx->temp_reg;
4277 alu.dst.chan = i;
4278 if (i == 2) {
4279 alu.dst.write = 1;
4280 alu.last = 1;
4281 }
4282
4283 r = r600_bytecode_add_alu(ctx->bc, &alu);
4284 if (r)
4285 return r;
4286 }
4287 } else {
4288 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4290 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4291
4292 alu.dst.sel = ctx->temp_reg;
4293 alu.dst.write = 1;
4294 alu.dst.chan = 2;
4295
4296 alu.last = 1;
4297
4298 r = r600_bytecode_add_alu(ctx->bc, &alu);
4299 if (r)
4300 return r;
4301 }
4302 }
4303
4304 /* result.w = 1.0;*/
4305 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4306 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4307
4308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4309 alu.src[0].sel = V_SQ_ALU_SRC_1;
4310 alu.src[0].chan = 0;
4311
4312 alu.dst.sel = ctx->temp_reg;
4313 alu.dst.chan = 3;
4314 alu.dst.write = 1;
4315 alu.last = 1;
4316 r = r600_bytecode_add_alu(ctx->bc, &alu);
4317 if (r)
4318 return r;
4319 }
4320 return tgsi_helper_copy(ctx, inst);
4321 }
4322
4323 static int tgsi_log(struct r600_shader_ctx *ctx)
4324 {
4325 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4326 struct r600_bytecode_alu alu;
4327 int r;
4328 int i;
4329
4330 /* result.x = floor(log2(|src|)); */
4331 if (inst->Dst[0].Register.WriteMask & 1) {
4332 if (ctx->bc->chip_class == CAYMAN) {
4333 for (i = 0; i < 3; i++) {
4334 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4335
4336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4337 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4338 r600_bytecode_src_set_abs(&alu.src[0]);
4339
4340 alu.dst.sel = ctx->temp_reg;
4341 alu.dst.chan = i;
4342 if (i == 0)
4343 alu.dst.write = 1;
4344 if (i == 2)
4345 alu.last = 1;
4346 r = r600_bytecode_add_alu(ctx->bc, &alu);
4347 if (r)
4348 return r;
4349 }
4350
4351 } else {
4352 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4353
4354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4355 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4356 r600_bytecode_src_set_abs(&alu.src[0]);
4357
4358 alu.dst.sel = ctx->temp_reg;
4359 alu.dst.chan = 0;
4360 alu.dst.write = 1;
4361 alu.last = 1;
4362 r = r600_bytecode_add_alu(ctx->bc, &alu);
4363 if (r)
4364 return r;
4365 }
4366
4367 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4368 alu.src[0].sel = ctx->temp_reg;
4369 alu.src[0].chan = 0;
4370
4371 alu.dst.sel = ctx->temp_reg;
4372 alu.dst.chan = 0;
4373 alu.dst.write = 1;
4374 alu.last = 1;
4375
4376 r = r600_bytecode_add_alu(ctx->bc, &alu);
4377 if (r)
4378 return r;
4379 }
4380
4381 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4382 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4383
4384 if (ctx->bc->chip_class == CAYMAN) {
4385 for (i = 0; i < 3; i++) {
4386 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4387
4388 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4389 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4390 r600_bytecode_src_set_abs(&alu.src[0]);
4391
4392 alu.dst.sel = ctx->temp_reg;
4393 alu.dst.chan = i;
4394 if (i == 1)
4395 alu.dst.write = 1;
4396 if (i == 2)
4397 alu.last = 1;
4398
4399 r = r600_bytecode_add_alu(ctx->bc, &alu);
4400 if (r)
4401 return r;
4402 }
4403 } else {
4404 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4405
4406 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4407 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4408 r600_bytecode_src_set_abs(&alu.src[0]);
4409
4410 alu.dst.sel = ctx->temp_reg;
4411 alu.dst.chan = 1;
4412 alu.dst.write = 1;
4413 alu.last = 1;
4414
4415 r = r600_bytecode_add_alu(ctx->bc, &alu);
4416 if (r)
4417 return r;
4418 }
4419
4420 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4421
4422 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4423 alu.src[0].sel = ctx->temp_reg;
4424 alu.src[0].chan = 1;
4425
4426 alu.dst.sel = ctx->temp_reg;
4427 alu.dst.chan = 1;
4428 alu.dst.write = 1;
4429 alu.last = 1;
4430
4431 r = r600_bytecode_add_alu(ctx->bc, &alu);
4432 if (r)
4433 return r;
4434
4435 if (ctx->bc->chip_class == CAYMAN) {
4436 for (i = 0; i < 3; i++) {
4437 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4438 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4439 alu.src[0].sel = ctx->temp_reg;
4440 alu.src[0].chan = 1;
4441
4442 alu.dst.sel = ctx->temp_reg;
4443 alu.dst.chan = i;
4444 if (i == 1)
4445 alu.dst.write = 1;
4446 if (i == 2)
4447 alu.last = 1;
4448
4449 r = r600_bytecode_add_alu(ctx->bc, &alu);
4450 if (r)
4451 return r;
4452 }
4453 } else {
4454 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4455 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4456 alu.src[0].sel = ctx->temp_reg;
4457 alu.src[0].chan = 1;
4458
4459 alu.dst.sel = ctx->temp_reg;
4460 alu.dst.chan = 1;
4461 alu.dst.write = 1;
4462 alu.last = 1;
4463
4464 r = r600_bytecode_add_alu(ctx->bc, &alu);
4465 if (r)
4466 return r;
4467 }
4468
4469 if (ctx->bc->chip_class == CAYMAN) {
4470 for (i = 0; i < 3; i++) {
4471 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4472 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4473 alu.src[0].sel = ctx->temp_reg;
4474 alu.src[0].chan = 1;
4475
4476 alu.dst.sel = ctx->temp_reg;
4477 alu.dst.chan = i;
4478 if (i == 1)
4479 alu.dst.write = 1;
4480 if (i == 2)
4481 alu.last = 1;
4482
4483 r = r600_bytecode_add_alu(ctx->bc, &alu);
4484 if (r)
4485 return r;
4486 }
4487 } else {
4488 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4489 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4490 alu.src[0].sel = ctx->temp_reg;
4491 alu.src[0].chan = 1;
4492
4493 alu.dst.sel = ctx->temp_reg;
4494 alu.dst.chan = 1;
4495 alu.dst.write = 1;
4496 alu.last = 1;
4497
4498 r = r600_bytecode_add_alu(ctx->bc, &alu);
4499 if (r)
4500 return r;
4501 }
4502
4503 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4504
4505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4506
4507 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4508 r600_bytecode_src_set_abs(&alu.src[0]);
4509
4510 alu.src[1].sel = ctx->temp_reg;
4511 alu.src[1].chan = 1;
4512
4513 alu.dst.sel = ctx->temp_reg;
4514 alu.dst.chan = 1;
4515 alu.dst.write = 1;
4516 alu.last = 1;
4517
4518 r = r600_bytecode_add_alu(ctx->bc, &alu);
4519 if (r)
4520 return r;
4521 }
4522
4523 /* result.z = log2(|src|);*/
4524 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4525 if (ctx->bc->chip_class == CAYMAN) {
4526 for (i = 0; i < 3; i++) {
4527 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4528
4529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4530 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4531 r600_bytecode_src_set_abs(&alu.src[0]);
4532
4533 alu.dst.sel = ctx->temp_reg;
4534 if (i == 2)
4535 alu.dst.write = 1;
4536 alu.dst.chan = i;
4537 if (i == 2)
4538 alu.last = 1;
4539
4540 r = r600_bytecode_add_alu(ctx->bc, &alu);
4541 if (r)
4542 return r;
4543 }
4544 } else {
4545 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4546
4547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4548 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4549 r600_bytecode_src_set_abs(&alu.src[0]);
4550
4551 alu.dst.sel = ctx->temp_reg;
4552 alu.dst.write = 1;
4553 alu.dst.chan = 2;
4554 alu.last = 1;
4555
4556 r = r600_bytecode_add_alu(ctx->bc, &alu);
4557 if (r)
4558 return r;
4559 }
4560 }
4561
4562 /* result.w = 1.0; */
4563 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4564 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4565
4566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4567 alu.src[0].sel = V_SQ_ALU_SRC_1;
4568 alu.src[0].chan = 0;
4569
4570 alu.dst.sel = ctx->temp_reg;
4571 alu.dst.chan = 3;
4572 alu.dst.write = 1;
4573 alu.last = 1;
4574
4575 r = r600_bytecode_add_alu(ctx->bc, &alu);
4576 if (r)
4577 return r;
4578 }
4579
4580 return tgsi_helper_copy(ctx, inst);
4581 }
4582
4583 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4584 {
4585 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4586 struct r600_bytecode_alu alu;
4587 int r;
4588
4589 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4590
4591 switch (inst->Instruction.Opcode) {
4592 case TGSI_OPCODE_ARL:
4593 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4594 break;
4595 case TGSI_OPCODE_ARR:
4596 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4597 break;
4598 case TGSI_OPCODE_UARL:
4599 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4600 break;
4601 default:
4602 assert(0);
4603 return -1;
4604 }
4605
4606 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4607 alu.last = 1;
4608 alu.dst.sel = ctx->bc->ar_reg;
4609 alu.dst.write = 1;
4610 r = r600_bytecode_add_alu(ctx->bc, &alu);
4611 if (r)
4612 return r;
4613
4614 ctx->bc->ar_loaded = 0;
4615 return 0;
4616 }
4617 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4618 {
4619 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4620 struct r600_bytecode_alu alu;
4621 int r;
4622
4623 switch (inst->Instruction.Opcode) {
4624 case TGSI_OPCODE_ARL:
4625 memset(&alu, 0, sizeof(alu));
4626 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4627 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4628 alu.dst.sel = ctx->bc->ar_reg;
4629 alu.dst.write = 1;
4630 alu.last = 1;
4631
4632 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4633 return r;
4634
4635 memset(&alu, 0, sizeof(alu));
4636 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4637 alu.src[0].sel = ctx->bc->ar_reg;
4638 alu.dst.sel = ctx->bc->ar_reg;
4639 alu.dst.write = 1;
4640 alu.last = 1;
4641
4642 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4643 return r;
4644 break;
4645 case TGSI_OPCODE_ARR:
4646 memset(&alu, 0, sizeof(alu));
4647 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4648 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4649 alu.dst.sel = ctx->bc->ar_reg;
4650 alu.dst.write = 1;
4651 alu.last = 1;
4652
4653 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4654 return r;
4655 break;
4656 case TGSI_OPCODE_UARL:
4657 memset(&alu, 0, sizeof(alu));
4658 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4659 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4660 alu.dst.sel = ctx->bc->ar_reg;
4661 alu.dst.write = 1;
4662 alu.last = 1;
4663
4664 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4665 return r;
4666 break;
4667 default:
4668 assert(0);
4669 return -1;
4670 }
4671
4672 ctx->bc->ar_loaded = 0;
4673 return 0;
4674 }
4675
4676 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4677 {
4678 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4679 struct r600_bytecode_alu alu;
4680 int i, r = 0;
4681
4682 for (i = 0; i < 4; i++) {
4683 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4684
4685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4686 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4687
4688 if (i == 0 || i == 3) {
4689 alu.src[0].sel = V_SQ_ALU_SRC_1;
4690 } else {
4691 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4692 }
4693
4694 if (i == 0 || i == 2) {
4695 alu.src[1].sel = V_SQ_ALU_SRC_1;
4696 } else {
4697 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4698 }
4699 if (i == 3)
4700 alu.last = 1;
4701 r = r600_bytecode_add_alu(ctx->bc, &alu);
4702 if (r)
4703 return r;
4704 }
4705 return 0;
4706 }
4707
4708 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4709 {
4710 struct r600_bytecode_alu alu;
4711 int r;
4712
4713 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4714 alu.inst = opcode;
4715 alu.predicate = 1;
4716
4717 alu.dst.sel = ctx->temp_reg;
4718 alu.dst.write = 1;
4719 alu.dst.chan = 0;
4720
4721 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4722 alu.src[1].sel = V_SQ_ALU_SRC_0;
4723 alu.src[1].chan = 0;
4724
4725 alu.last = 1;
4726
4727 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4728 if (r)
4729 return r;
4730 return 0;
4731 }
4732
4733 static int pops(struct r600_shader_ctx *ctx, int pops)
4734 {
4735 unsigned force_pop = ctx->bc->force_add_cf;
4736
4737 if (!force_pop) {
4738 int alu_pop = 3;
4739 if (ctx->bc->cf_last) {
4740 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4741 alu_pop = 0;
4742 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4743 alu_pop = 1;
4744 }
4745 alu_pop += pops;
4746 if (alu_pop == 1) {
4747 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4748 ctx->bc->force_add_cf = 1;
4749 } else if (alu_pop == 2) {
4750 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4751 ctx->bc->force_add_cf = 1;
4752 } else {
4753 force_pop = 1;
4754 }
4755 }
4756
4757 if (force_pop) {
4758 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4759 ctx->bc->cf_last->pop_count = pops;
4760 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4761 }
4762
4763 return 0;
4764 }
4765
4766 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4767 {
4768 switch(reason) {
4769 case FC_PUSH_VPM:
4770 ctx->bc->callstack[ctx->bc->call_sp].current--;
4771 break;
4772 case FC_PUSH_WQM:
4773 case FC_LOOP:
4774 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4775 break;
4776 case FC_REP:
4777 /* TOODO : for 16 vp asic should -= 2; */
4778 ctx->bc->callstack[ctx->bc->call_sp].current --;
4779 break;
4780 }
4781 }
4782
4783 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4784 {
4785 if (check_max_only) {
4786 int diff;
4787 switch (reason) {
4788 case FC_PUSH_VPM:
4789 diff = 1;
4790 break;
4791 case FC_PUSH_WQM:
4792 diff = 4;
4793 break;
4794 default:
4795 assert(0);
4796 diff = 0;
4797 }
4798 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4799 ctx->bc->callstack[ctx->bc->call_sp].max) {
4800 ctx->bc->callstack[ctx->bc->call_sp].max =
4801 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4802 }
4803 return;
4804 }
4805 switch (reason) {
4806 case FC_PUSH_VPM:
4807 ctx->bc->callstack[ctx->bc->call_sp].current++;
4808 break;
4809 case FC_PUSH_WQM:
4810 case FC_LOOP:
4811 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4812 break;
4813 case FC_REP:
4814 ctx->bc->callstack[ctx->bc->call_sp].current++;
4815 break;
4816 }
4817
4818 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4819 ctx->bc->callstack[ctx->bc->call_sp].max) {
4820 ctx->bc->callstack[ctx->bc->call_sp].max =
4821 ctx->bc->callstack[ctx->bc->call_sp].current;
4822 }
4823 }
4824
4825 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4826 {
4827 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4828
4829 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
4830 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
4831 sp->mid[sp->num_mid] = ctx->bc->cf_last;
4832 sp->num_mid++;
4833 }
4834
4835 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
4836 {
4837 ctx->bc->fc_sp++;
4838 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
4839 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
4840 }
4841
4842 static void fc_poplevel(struct r600_shader_ctx *ctx)
4843 {
4844 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
4845 if (sp->mid) {
4846 free(sp->mid);
4847 sp->mid = NULL;
4848 }
4849 sp->num_mid = 0;
4850 sp->start = NULL;
4851 sp->type = 0;
4852 ctx->bc->fc_sp--;
4853 }
4854
4855 #if 0
4856 static int emit_return(struct r600_shader_ctx *ctx)
4857 {
4858 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
4859 return 0;
4860 }
4861
4862 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
4863 {
4864
4865 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4866 ctx->bc->cf_last->pop_count = pops;
4867 /* XXX work out offset */
4868 return 0;
4869 }
4870
4871 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
4872 {
4873 return 0;
4874 }
4875
4876 static void emit_testflag(struct r600_shader_ctx *ctx)
4877 {
4878
4879 }
4880
4881 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
4882 {
4883 emit_testflag(ctx);
4884 emit_jump_to_offset(ctx, 1, 4);
4885 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
4886 pops(ctx, ifidx + 1);
4887 emit_return(ctx);
4888 }
4889
4890 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
4891 {
4892 emit_testflag(ctx);
4893
4894 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
4895 ctx->bc->cf_last->pop_count = 1;
4896
4897 fc_set_mid(ctx, fc_sp);
4898
4899 pops(ctx, 1);
4900 }
4901 #endif
4902
4903 static int tgsi_if(struct r600_shader_ctx *ctx)
4904 {
4905 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
4906
4907 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4908
4909 fc_pushlevel(ctx, FC_IF);
4910
4911 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
4912 return 0;
4913 }
4914
4915 static int tgsi_else(struct r600_shader_ctx *ctx)
4916 {
4917 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
4918 ctx->bc->cf_last->pop_count = 1;
4919
4920 fc_set_mid(ctx, ctx->bc->fc_sp);
4921 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
4922 return 0;
4923 }
4924
4925 static int tgsi_endif(struct r600_shader_ctx *ctx)
4926 {
4927 pops(ctx, 1);
4928 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
4929 R600_ERR("if/endif unbalanced in shader\n");
4930 return -1;
4931 }
4932
4933 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
4934 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
4935 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
4936 } else {
4937 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
4938 }
4939 fc_poplevel(ctx);
4940
4941 callstack_decrease_current(ctx, FC_PUSH_VPM);
4942 return 0;
4943 }
4944
4945 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
4946 {
4947 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
4948
4949 fc_pushlevel(ctx, FC_LOOP);
4950
4951 /* check stack depth */
4952 callstack_check_depth(ctx, FC_LOOP, 0);
4953 return 0;
4954 }
4955
4956 static int tgsi_endloop(struct r600_shader_ctx *ctx)
4957 {
4958 int i;
4959
4960 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
4961
4962 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
4963 R600_ERR("loop/endloop in shader code are not paired.\n");
4964 return -EINVAL;
4965 }
4966
4967 /* fixup loop pointers - from r600isa
4968 LOOP END points to CF after LOOP START,
4969 LOOP START point to CF after LOOP END
4970 BRK/CONT point to LOOP END CF
4971 */
4972 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
4973
4974 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
4975
4976 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
4977 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
4978 }
4979 /* XXX add LOOPRET support */
4980 fc_poplevel(ctx);
4981 callstack_decrease_current(ctx, FC_LOOP);
4982 return 0;
4983 }
4984
4985 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
4986 {
4987 unsigned int fscp;
4988
4989 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
4990 {
4991 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
4992 break;
4993 }
4994
4995 if (fscp == 0) {
4996 R600_ERR("Break not inside loop/endloop pair\n");
4997 return -EINVAL;
4998 }
4999
5000 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5001
5002 fc_set_mid(ctx, fscp);
5003
5004 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5005 return 0;
5006 }
5007
5008 static int tgsi_umad(struct r600_shader_ctx *ctx)
5009 {
5010 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5011 struct r600_bytecode_alu alu;
5012 int i, j, r;
5013 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5014
5015 /* src0 * src1 */
5016 for (i = 0; i < lasti + 1; i++) {
5017 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5018 continue;
5019
5020 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5021
5022 alu.dst.chan = i;
5023 alu.dst.sel = ctx->temp_reg;
5024 alu.dst.write = 1;
5025
5026 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5027 for (j = 0; j < 2; j++) {
5028 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5029 }
5030
5031 alu.last = 1;
5032 r = r600_bytecode_add_alu(ctx->bc, &alu);
5033 if (r)
5034 return r;
5035 }
5036
5037
5038 for (i = 0; i < lasti + 1; i++) {
5039 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5040 continue;
5041
5042 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5043 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5044
5045 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5046
5047 alu.src[0].sel = ctx->temp_reg;
5048 alu.src[0].chan = i;
5049
5050 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5051 if (i == lasti) {
5052 alu.last = 1;
5053 }
5054 r = r600_bytecode_add_alu(ctx->bc, &alu);
5055 if (r)
5056 return r;
5057 }
5058 return 0;
5059 }
5060
5061 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5062 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5063 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5064 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5065
5066 /* XXX:
5067 * For state trackers other than OpenGL, we'll want to use
5068 * _RECIP_IEEE instead.
5069 */
5070 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5071
5072 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5073 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5074 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5075 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5076 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5077 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5078 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5079 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5080 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5081 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5082 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5083 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5084 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5085 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5086 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5087 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5088 /* gap */
5089 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5090 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5091 /* gap */
5092 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5093 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5094 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5095 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5096 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5097 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5098 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5099 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5100 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5101 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5102 /* gap */
5103 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5104 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5105 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5106 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5107 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5108 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5109 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5110 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5111 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5112 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5113 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5114 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5115 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5116 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5117 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5118 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5119 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5120 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5121 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5122 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5123 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5124 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5125 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5126 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5127 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5128 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5129 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5130 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5131 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5132 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5133 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5134 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5135 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5136 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5137 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5138 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5139 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5140 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5141 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5142 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5143 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5144 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5145 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5146 /* gap */
5147 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5148 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5149 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5150 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5151 /* gap */
5152 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5153 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5154 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5155 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5156 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5157 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5158 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5159 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5160 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5161 /* gap */
5162 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5163 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5164 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5165 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5166 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5167 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5168 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5169 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5170 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5171 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5172 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5173 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5174 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5175 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5176 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5177 /* gap */
5178 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5179 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5180 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5181 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5182 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5183 /* gap */
5184 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5185 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5186 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5187 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5188 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5189 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5190 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5191 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5192 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5193 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5194 /* gap */
5195 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5196 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5197 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5198 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5199 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5200 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5201 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5202 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5203 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5204 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5205 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5206 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5207 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5208 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5209 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5210 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5211 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5212 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5213 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5214 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5215 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5216 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5217 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5218 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5219 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5220 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5221 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5222 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
5223 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
5224 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5225 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5226 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5227 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5228 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5229 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5230 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5231 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
5232 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5233 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5234 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5235 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5236 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5237 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5238 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5239 };
5240
5241 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5242 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5243 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5244 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5245 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5246 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5247 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5248 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5249 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5250 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5251 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5252 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5253 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5254 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5255 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5256 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5257 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5258 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5259 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5260 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5261 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5262 /* gap */
5263 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5264 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5265 /* gap */
5266 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5267 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5268 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5269 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5270 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5271 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5272 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5273 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5274 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5275 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5276 /* gap */
5277 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5279 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5280 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5281 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5282 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5283 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5284 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5285 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5286 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5287 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5288 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5289 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5290 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5291 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5292 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5293 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5294 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5295 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5296 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5297 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5298 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5299 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5300 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5301 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5302 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5303 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5305 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5307 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5309 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5310 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5311 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5312 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5313 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5314 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5315 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5316 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5317 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5318 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5319 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5320 /* gap */
5321 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5322 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5323 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5324 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5325 /* gap */
5326 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5327 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5328 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5329 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5330 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5331 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5332 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5333 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5334 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5335 /* gap */
5336 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5337 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5338 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5339 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5340 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5341 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5342 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5343 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5344 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5345 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5346 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5348 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5349 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5350 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5351 /* gap */
5352 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5353 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5354 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5355 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5356 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5357 /* gap */
5358 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5359 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5360 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5361 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5362 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5363 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5364 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5365 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5366 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5367 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5368 /* gap */
5369 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5370 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5371 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5372 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5373 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5374 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5375 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5376 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5377 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5378 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5379 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5380 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5381 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5382 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5383 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5384 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5385 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5386 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5387 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5388 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5389 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5390 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5391 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5392 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5393 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5394 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5395 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5396 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
5397 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
5398 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5399 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5400 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5401 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5402 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5403 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5404 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5405 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
5406 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5407 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5408 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5409 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5410 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5411 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5412 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5413 };
5414
5415 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5416 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5417 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5418 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5419 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5420 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5421 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5422 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5423 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5424 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5425 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5426 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5427 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5428 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5429 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5430 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5431 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5432 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5433 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5434 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5435 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5436 /* gap */
5437 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5438 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5439 /* gap */
5440 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5441 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5442 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5443 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5444 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5445 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5446 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5447 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5448 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5449 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5450 /* gap */
5451 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5453 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5454 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5455 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5456 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5457 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5458 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
5459 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5460 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5461 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5462 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5463 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5464 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5465 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5466 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5467 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5468 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5469 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5470 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5471 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5472 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5473 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5474 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5475 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5477 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5479 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5481 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5483 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5484 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5485 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5486 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5487 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5488 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5489 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5490 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5491 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5492 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5493 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5494 /* gap */
5495 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5496 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5497 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5498 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5499 /* gap */
5500 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5502 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5503 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5504 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5505 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5506 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5507 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5508 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5509 /* gap */
5510 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5511 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5512 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5513 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5514 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5515 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5516 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
5517 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5518 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5519 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5520 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5522 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5523 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5524 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5525 /* gap */
5526 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5527 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5529 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5530 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5531 /* gap */
5532 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5534 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5535 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5536 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5537 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5538 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5539 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5540 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
5541 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
5542 /* gap */
5543 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5544 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5545 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5546 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5547 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5548 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5549 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5550 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5551 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5552 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5553 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5554 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5555 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5556 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5557 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5558 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5559 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5560 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5561 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5562 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5563 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5564 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5565 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5566 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5567 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5568 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5569 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5570 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
5571 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
5572 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
5573 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
5574 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
5575 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5576 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
5577 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
5578 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
5579 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
5580 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5581 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5582 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5583 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
5584 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
5585 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
5586 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5587 };