r600g: add support for shadow array samplers
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60
61 int r600_find_vs_semantic_index(struct r600_shader *vs,
62 struct r600_shader *ps, int id)
63 {
64 struct r600_shader_io *input = &ps->input[id];
65
66 for (int i = 0; i < vs->noutput; i++) {
67 if (input->name == vs->output[i].name &&
68 input->sid == vs->output[i].sid) {
69 return i - 1;
70 }
71 }
72 return 0;
73 }
74
75 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76 {
77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78 struct r600_shader *rshader = &shader->shader;
79 uint32_t *ptr;
80 int i;
81
82 /* copy new shader */
83 if (shader->bo == NULL) {
84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86 if (shader->bo == NULL) {
87 return -ENOMEM;
88 }
89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
90 if (R600_BIG_ENDIAN) {
91 for (i = 0; i < rshader->bc.ndw; ++i) {
92 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93 }
94 } else {
95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96 }
97 r600_bo_unmap(rctx->radeon, shader->bo);
98 }
99 /* build state */
100 switch (rshader->processor_type) {
101 case TGSI_PROCESSOR_VERTEX:
102 if (rctx->chip_class >= EVERGREEN) {
103 evergreen_pipe_shader_vs(ctx, shader);
104 } else {
105 r600_pipe_shader_vs(ctx, shader);
106 }
107 break;
108 case TGSI_PROCESSOR_FRAGMENT:
109 if (rctx->chip_class >= EVERGREEN) {
110 evergreen_pipe_shader_ps(ctx, shader);
111 } else {
112 r600_pipe_shader_ps(ctx, shader);
113 }
114 break;
115 default:
116 return -EINVAL;
117 }
118 return 0;
119 }
120
121 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124 {
125 static int dump_shaders = -1;
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127 int r;
128
129 /* Would like some magic "get_bool_option_once" routine.
130 */
131 if (dump_shaders == -1)
132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134 if (dump_shaders) {
135 fprintf(stderr, "--------------------------------------------------------------\n");
136 tgsi_dump(shader->tokens, 0);
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bytecode_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bytecode_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 r600_bo_reference(&shader->bo, NULL);
158 r600_bytecode_clear(&shader->shader.bc);
159
160 memset(&shader->shader,0,sizeof(struct r600_shader));
161 }
162
163 /*
164 * tgsi -> r600 shader
165 */
166 struct r600_shader_tgsi_instruction;
167
168 struct r600_shader_src {
169 unsigned sel;
170 unsigned swizzle[4];
171 unsigned neg;
172 unsigned abs;
173 unsigned rel;
174 uint32_t value[4];
175 };
176
177 struct r600_shader_ctx {
178 struct tgsi_shader_info info;
179 struct tgsi_parse_context parse;
180 const struct tgsi_token *tokens;
181 unsigned type;
182 unsigned file_offset[TGSI_FILE_COUNT];
183 unsigned temp_reg;
184 unsigned ar_reg;
185 struct r600_shader_tgsi_instruction *inst_info;
186 struct r600_bytecode *bc;
187 struct r600_shader *shader;
188 struct r600_shader_src src[4];
189 u32 *literals;
190 u32 nliterals;
191 u32 max_driver_temp_used;
192 /* needed for evergreen interpolation */
193 boolean input_centroid;
194 boolean input_linear;
195 boolean input_perspective;
196 int num_interp_gpr;
197 };
198
199 struct r600_shader_tgsi_instruction {
200 unsigned tgsi_opcode;
201 unsigned is_op3;
202 unsigned r600_opcode;
203 int (*process)(struct r600_shader_ctx *ctx);
204 };
205
206 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
207 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
208
209 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
210 {
211 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
212 int j;
213
214 if (i->Instruction.NumDstRegs > 1) {
215 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
216 return -EINVAL;
217 }
218 if (i->Instruction.Predicate) {
219 R600_ERR("predicate unsupported\n");
220 return -EINVAL;
221 }
222 #if 0
223 if (i->Instruction.Label) {
224 R600_ERR("label unsupported\n");
225 return -EINVAL;
226 }
227 #endif
228 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
229 if (i->Src[j].Register.Dimension) {
230 R600_ERR("unsupported src %d (dimension %d)\n", j,
231 i->Src[j].Register.Dimension);
232 return -EINVAL;
233 }
234 }
235 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
236 if (i->Dst[j].Register.Dimension) {
237 R600_ERR("unsupported dst (dimension)\n");
238 return -EINVAL;
239 }
240 }
241 return 0;
242 }
243
244 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
245 {
246 int i, r;
247 struct r600_bytecode_alu alu;
248 int gpr = 0, base_chan = 0;
249 int ij_index = 0;
250
251 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
252 ij_index = 0;
253 if (ctx->shader->input[input].centroid)
254 ij_index++;
255 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
256 ij_index = 0;
257 /* if we have perspective add one */
258 if (ctx->input_perspective) {
259 ij_index++;
260 /* if we have perspective centroid */
261 if (ctx->input_centroid)
262 ij_index++;
263 }
264 if (ctx->shader->input[input].centroid)
265 ij_index++;
266 }
267
268 /* work out gpr and base_chan from index */
269 gpr = ij_index / 2;
270 base_chan = (2 * (ij_index % 2)) + 1;
271
272 for (i = 0; i < 8; i++) {
273 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
274
275 if (i < 4)
276 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
277 else
278 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
279
280 if ((i > 1) && (i < 6)) {
281 alu.dst.sel = ctx->shader->input[input].gpr;
282 alu.dst.write = 1;
283 }
284
285 alu.dst.chan = i % 4;
286
287 alu.src[0].sel = gpr;
288 alu.src[0].chan = (base_chan - (i % 2));
289
290 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
291
292 alu.bank_swizzle_force = SQ_ALU_VEC_210;
293 if ((i % 4) == 3)
294 alu.last = 1;
295 r = r600_bytecode_add_alu(ctx->bc, &alu);
296 if (r)
297 return r;
298 }
299 return 0;
300 }
301
302
303 static int tgsi_declaration(struct r600_shader_ctx *ctx)
304 {
305 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
306 unsigned i;
307 int r;
308
309 switch (d->Declaration.File) {
310 case TGSI_FILE_INPUT:
311 i = ctx->shader->ninput++;
312 ctx->shader->input[i].name = d->Semantic.Name;
313 ctx->shader->input[i].sid = d->Semantic.Index;
314 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
315 ctx->shader->input[i].centroid = d->Declaration.Centroid;
316 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
317 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
318 /* turn input into interpolate on EG */
319 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
320 if (ctx->shader->input[i].interpolate > 0) {
321 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
322 evergreen_interp_alu(ctx, i);
323 }
324 }
325 }
326 break;
327 case TGSI_FILE_OUTPUT:
328 i = ctx->shader->noutput++;
329 ctx->shader->output[i].name = d->Semantic.Name;
330 ctx->shader->output[i].sid = d->Semantic.Index;
331 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
332 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
333 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
334 /* these don't count as vertex param exports */
335 if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
336 (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
337 ctx->shader->npos++;
338 }
339 break;
340 case TGSI_FILE_CONSTANT:
341 case TGSI_FILE_TEMPORARY:
342 case TGSI_FILE_SAMPLER:
343 case TGSI_FILE_ADDRESS:
344 break;
345
346 case TGSI_FILE_SYSTEM_VALUE:
347 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
348 struct r600_bytecode_alu alu;
349 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
350
351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
352 alu.src[0].sel = 0;
353 alu.src[0].chan = 3;
354
355 alu.dst.sel = 0;
356 alu.dst.chan = 3;
357 alu.dst.write = 1;
358 alu.last = 1;
359
360 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
361 return r;
362 break;
363 }
364
365 default:
366 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
367 return -EINVAL;
368 }
369 return 0;
370 }
371
372 static int r600_get_temp(struct r600_shader_ctx *ctx)
373 {
374 return ctx->temp_reg + ctx->max_driver_temp_used++;
375 }
376
377 /*
378 * for evergreen we need to scan the shader to find the number of GPRs we need to
379 * reserve for interpolation.
380 *
381 * we need to know if we are going to emit
382 * any centroid inputs
383 * if perspective and linear are required
384 */
385 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
386 {
387 int i;
388 int num_baryc;
389
390 ctx->input_linear = FALSE;
391 ctx->input_perspective = FALSE;
392 ctx->input_centroid = FALSE;
393 ctx->num_interp_gpr = 1;
394
395 /* any centroid inputs */
396 for (i = 0; i < ctx->info.num_inputs; i++) {
397 /* skip position/face */
398 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
399 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
400 continue;
401 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
402 ctx->input_linear = TRUE;
403 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
404 ctx->input_perspective = TRUE;
405 if (ctx->info.input_centroid[i])
406 ctx->input_centroid = TRUE;
407 }
408
409 num_baryc = 0;
410 /* ignoring sample for now */
411 if (ctx->input_perspective)
412 num_baryc++;
413 if (ctx->input_linear)
414 num_baryc++;
415 if (ctx->input_centroid)
416 num_baryc *= 2;
417
418 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
419
420 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
421 return ctx->num_interp_gpr;
422 }
423
424 static void tgsi_src(struct r600_shader_ctx *ctx,
425 const struct tgsi_full_src_register *tgsi_src,
426 struct r600_shader_src *r600_src)
427 {
428 memset(r600_src, 0, sizeof(*r600_src));
429 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
430 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
431 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
432 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
433 r600_src->neg = tgsi_src->Register.Negate;
434 r600_src->abs = tgsi_src->Register.Absolute;
435
436 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
437 int index;
438 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
439 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
440 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
441
442 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
443 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
444 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
445 return;
446 }
447 index = tgsi_src->Register.Index;
448 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
449 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
450 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
451 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
452 r600_src->swizzle[0] = 3;
453 r600_src->swizzle[1] = 3;
454 r600_src->swizzle[2] = 3;
455 r600_src->swizzle[3] = 3;
456 r600_src->sel = 0;
457 } else {
458 if (tgsi_src->Register.Indirect)
459 r600_src->rel = V_SQ_REL_RELATIVE;
460 r600_src->sel = tgsi_src->Register.Index;
461 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
462 }
463 }
464
465 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
466 {
467 struct r600_bytecode_vtx vtx;
468 unsigned int ar_reg;
469 int r;
470
471 if (offset) {
472 struct r600_bytecode_alu alu;
473
474 memset(&alu, 0, sizeof(alu));
475
476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
477 alu.src[0].sel = ctx->ar_reg;
478
479 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
480 alu.src[1].value = offset;
481
482 alu.dst.sel = dst_reg;
483 alu.dst.write = 1;
484 alu.last = 1;
485
486 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
487 return r;
488
489 ar_reg = dst_reg;
490 } else {
491 ar_reg = ctx->ar_reg;
492 }
493
494 memset(&vtx, 0, sizeof(vtx));
495 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
496 vtx.src_gpr = ar_reg;
497 vtx.mega_fetch_count = 16;
498 vtx.dst_gpr = dst_reg;
499 vtx.dst_sel_x = 0; /* SEL_X */
500 vtx.dst_sel_y = 1; /* SEL_Y */
501 vtx.dst_sel_z = 2; /* SEL_Z */
502 vtx.dst_sel_w = 3; /* SEL_W */
503 vtx.data_format = FMT_32_32_32_32_FLOAT;
504 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
505 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
506 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
507 vtx.endian = r600_endian_swap(32);
508
509 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
510 return r;
511
512 return 0;
513 }
514
515 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
516 {
517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
518 struct r600_bytecode_alu alu;
519 int i, j, k, nconst, r;
520
521 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
522 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
523 nconst++;
524 }
525 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
526 }
527 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
528 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
529 continue;
530 }
531
532 if (ctx->src[i].rel) {
533 int treg = r600_get_temp(ctx);
534 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
535 return r;
536
537 ctx->src[i].sel = treg;
538 ctx->src[i].rel = 0;
539 j--;
540 } else if (j > 0) {
541 int treg = r600_get_temp(ctx);
542 for (k = 0; k < 4; k++) {
543 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
545 alu.src[0].sel = ctx->src[i].sel;
546 alu.src[0].chan = k;
547 alu.src[0].rel = ctx->src[i].rel;
548 alu.dst.sel = treg;
549 alu.dst.chan = k;
550 alu.dst.write = 1;
551 if (k == 3)
552 alu.last = 1;
553 r = r600_bytecode_add_alu(ctx->bc, &alu);
554 if (r)
555 return r;
556 }
557 ctx->src[i].sel = treg;
558 ctx->src[i].rel =0;
559 j--;
560 }
561 }
562 return 0;
563 }
564
565 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
566 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
567 {
568 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
569 struct r600_bytecode_alu alu;
570 int i, j, k, nliteral, r;
571
572 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
573 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
574 nliteral++;
575 }
576 }
577 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
578 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
579 int treg = r600_get_temp(ctx);
580 for (k = 0; k < 4; k++) {
581 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
583 alu.src[0].sel = ctx->src[i].sel;
584 alu.src[0].chan = k;
585 alu.src[0].value = ctx->src[i].value[k];
586 alu.dst.sel = treg;
587 alu.dst.chan = k;
588 alu.dst.write = 1;
589 if (k == 3)
590 alu.last = 1;
591 r = r600_bytecode_add_alu(ctx->bc, &alu);
592 if (r)
593 return r;
594 }
595 ctx->src[i].sel = treg;
596 j--;
597 }
598 }
599 return 0;
600 }
601
602 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
603 {
604 struct r600_shader *shader = &pipeshader->shader;
605 struct tgsi_token *tokens = pipeshader->tokens;
606 struct tgsi_full_immediate *immediate;
607 struct tgsi_full_property *property;
608 struct r600_shader_ctx ctx;
609 struct r600_bytecode_output output[32];
610 unsigned output_done, noutput;
611 unsigned opcode;
612 int i, j, r = 0, pos0;
613
614 ctx.bc = &shader->bc;
615 ctx.shader = shader;
616 r600_bytecode_init(ctx.bc, rctx->chip_class);
617 ctx.tokens = tokens;
618 tgsi_scan_shader(tokens, &ctx.info);
619 tgsi_parse_init(&ctx.parse, tokens);
620 ctx.type = ctx.parse.FullHeader.Processor.Processor;
621 shader->processor_type = ctx.type;
622 ctx.bc->type = shader->processor_type;
623
624 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
625 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
626
627 shader->nr_cbufs = rctx->nr_cbufs;
628
629 /* register allocations */
630 /* Values [0,127] correspond to GPR[0..127].
631 * Values [128,159] correspond to constant buffer bank 0
632 * Values [160,191] correspond to constant buffer bank 1
633 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
634 * Values [256,287] correspond to constant buffer bank 2 (EG)
635 * Values [288,319] correspond to constant buffer bank 3 (EG)
636 * Other special values are shown in the list below.
637 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
638 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
639 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
640 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
641 * 248 SQ_ALU_SRC_0: special constant 0.0.
642 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
643 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
644 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
645 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
646 * 253 SQ_ALU_SRC_LITERAL: literal constant.
647 * 254 SQ_ALU_SRC_PV: previous vector result.
648 * 255 SQ_ALU_SRC_PS: previous scalar result.
649 */
650 for (i = 0; i < TGSI_FILE_COUNT; i++) {
651 ctx.file_offset[i] = 0;
652 }
653 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
654 ctx.file_offset[TGSI_FILE_INPUT] = 1;
655 if (ctx.bc->chip_class >= EVERGREEN) {
656 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
657 } else {
658 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
659 }
660 }
661 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
662 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
663 }
664 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
665 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
666 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
667 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
668
669 /* Outside the GPR range. This will be translated to one of the
670 * kcache banks later. */
671 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
672
673 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
674 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
675 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
676 ctx.temp_reg = ctx.ar_reg + 1;
677
678 ctx.nliterals = 0;
679 ctx.literals = NULL;
680 shader->fs_write_all = FALSE;
681 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
682 tgsi_parse_token(&ctx.parse);
683 switch (ctx.parse.FullToken.Token.Type) {
684 case TGSI_TOKEN_TYPE_IMMEDIATE:
685 immediate = &ctx.parse.FullToken.FullImmediate;
686 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
687 if(ctx.literals == NULL) {
688 r = -ENOMEM;
689 goto out_err;
690 }
691 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
692 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
693 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
694 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
695 ctx.nliterals++;
696 break;
697 case TGSI_TOKEN_TYPE_DECLARATION:
698 r = tgsi_declaration(&ctx);
699 if (r)
700 goto out_err;
701 break;
702 case TGSI_TOKEN_TYPE_INSTRUCTION:
703 r = tgsi_is_supported(&ctx);
704 if (r)
705 goto out_err;
706 ctx.max_driver_temp_used = 0;
707 /* reserve first tmp for everyone */
708 r600_get_temp(&ctx);
709
710 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
711 if ((r = tgsi_split_constant(&ctx)))
712 goto out_err;
713 if ((r = tgsi_split_literal_constant(&ctx)))
714 goto out_err;
715 if (ctx.bc->chip_class == CAYMAN)
716 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
717 else if (ctx.bc->chip_class >= EVERGREEN)
718 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
719 else
720 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
721 r = ctx.inst_info->process(&ctx);
722 if (r)
723 goto out_err;
724 break;
725 case TGSI_TOKEN_TYPE_PROPERTY:
726 property = &ctx.parse.FullToken.FullProperty;
727 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
728 if (property->u[0].Data == 1)
729 shader->fs_write_all = TRUE;
730 }
731 break;
732 default:
733 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
734 r = -EINVAL;
735 goto out_err;
736 }
737 }
738
739 noutput = shader->noutput;
740
741 /* clamp color outputs */
742 if (shader->clamp_color) {
743 for (i = 0; i < noutput; i++) {
744 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
745 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
746
747 int j;
748 for (j = 0; j < 4; j++) {
749 struct r600_bytecode_alu alu;
750 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
751
752 /* MOV_SAT R, R */
753 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
754 alu.dst.sel = shader->output[i].gpr;
755 alu.dst.chan = j;
756 alu.dst.write = 1;
757 alu.dst.clamp = 1;
758 alu.src[0].sel = alu.dst.sel;
759 alu.src[0].chan = j;
760
761 if (j == 3) {
762 alu.last = 1;
763 }
764 r = r600_bytecode_add_alu(ctx.bc, &alu);
765 if (r)
766 return r;
767 }
768 }
769 }
770 }
771
772 /* export output */
773 j = 0;
774 for (i = 0, pos0 = 0; i < noutput; i++) {
775 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
776 output[i + j].gpr = shader->output[i].gpr;
777 output[i + j].elem_size = 3;
778 output[i + j].swizzle_x = 0;
779 output[i + j].swizzle_y = 1;
780 output[i + j].swizzle_z = 2;
781 output[i + j].swizzle_w = 3;
782 output[i + j].burst_count = 1;
783 output[i + j].barrier = 1;
784 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
785 output[i + j].array_base = i - pos0;
786 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
787 switch (ctx.type) {
788 case TGSI_PROCESSOR_VERTEX:
789 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
790 output[i + j].array_base = 60;
791 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
792 /* position doesn't count in array_base */
793 pos0++;
794 }
795 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
796 output[i + j].array_base = 61;
797 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
798 /* position doesn't count in array_base */
799 pos0++;
800 }
801 break;
802 case TGSI_PROCESSOR_FRAGMENT:
803 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
804 output[i + j].array_base = shader->output[i].sid;
805 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
806 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
807 for (j = 1; j < shader->nr_cbufs; j++) {
808 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
809 output[i + j].gpr = shader->output[i].gpr;
810 output[i + j].elem_size = 3;
811 output[i + j].swizzle_x = 0;
812 output[i + j].swizzle_y = 1;
813 output[i + j].swizzle_z = 2;
814 output[i + j].swizzle_w = 3;
815 output[i + j].burst_count = 1;
816 output[i + j].barrier = 1;
817 output[i + j].array_base = shader->output[i].sid + j;
818 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
819 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
820 }
821 j--;
822 }
823 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
824 output[i + j].array_base = 61;
825 output[i + j].swizzle_x = 2;
826 output[i + j].swizzle_y = 7;
827 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
828 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
829 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
830 output[i + j].array_base = 61;
831 output[i + j].swizzle_x = 7;
832 output[i + j].swizzle_y = 1;
833 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
834 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
835 } else {
836 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
837 r = -EINVAL;
838 goto out_err;
839 }
840 break;
841 default:
842 R600_ERR("unsupported processor type %d\n", ctx.type);
843 r = -EINVAL;
844 goto out_err;
845 }
846 }
847 noutput += j;
848 /* add fake param output for vertex shader if no param is exported */
849 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
850 for (i = 0, pos0 = 0; i < noutput; i++) {
851 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
852 pos0 = 1;
853 break;
854 }
855 }
856 if (!pos0) {
857 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
858 output[i].gpr = 0;
859 output[i].elem_size = 3;
860 output[i].swizzle_x = 0;
861 output[i].swizzle_y = 1;
862 output[i].swizzle_z = 2;
863 output[i].swizzle_w = 3;
864 output[i].burst_count = 1;
865 output[i].barrier = 1;
866 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
867 output[i].array_base = 0;
868 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
869 noutput++;
870 }
871 }
872 /* add fake pixel export */
873 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
874 memset(&output[0], 0, sizeof(struct r600_bytecode_output));
875 output[0].gpr = 0;
876 output[0].elem_size = 3;
877 output[0].swizzle_x = 7;
878 output[0].swizzle_y = 7;
879 output[0].swizzle_z = 7;
880 output[0].swizzle_w = 7;
881 output[0].burst_count = 1;
882 output[0].barrier = 1;
883 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
884 output[0].array_base = 0;
885 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
886 noutput++;
887 }
888 /* set export done on last export of each type */
889 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
890 if (ctx.bc->chip_class < CAYMAN) {
891 if (i == (noutput - 1)) {
892 output[i].end_of_program = 1;
893 }
894 }
895 if (!(output_done & (1 << output[i].type))) {
896 output_done |= (1 << output[i].type);
897 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
898 }
899 }
900 /* add output to bytecode */
901 for (i = 0; i < noutput; i++) {
902 r = r600_bytecode_add_output(ctx.bc, &output[i]);
903 if (r)
904 goto out_err;
905 }
906 /* add program end */
907 if (ctx.bc->chip_class == CAYMAN)
908 cm_bytecode_add_cf_end(ctx.bc);
909
910 free(ctx.literals);
911 tgsi_parse_free(&ctx.parse);
912 return 0;
913 out_err:
914 free(ctx.literals);
915 tgsi_parse_free(&ctx.parse);
916 return r;
917 }
918
919 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
920 {
921 R600_ERR("%s tgsi opcode unsupported\n",
922 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
923 return -EINVAL;
924 }
925
926 static int tgsi_end(struct r600_shader_ctx *ctx)
927 {
928 return 0;
929 }
930
931 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
932 const struct r600_shader_src *shader_src,
933 unsigned chan)
934 {
935 bc_src->sel = shader_src->sel;
936 bc_src->chan = shader_src->swizzle[chan];
937 bc_src->neg = shader_src->neg;
938 bc_src->abs = shader_src->abs;
939 bc_src->rel = shader_src->rel;
940 bc_src->value = shader_src->value[bc_src->chan];
941 }
942
943 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
944 {
945 bc_src->abs = 1;
946 bc_src->neg = 0;
947 }
948
949 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
950 {
951 bc_src->neg = !bc_src->neg;
952 }
953
954 static void tgsi_dst(struct r600_shader_ctx *ctx,
955 const struct tgsi_full_dst_register *tgsi_dst,
956 unsigned swizzle,
957 struct r600_bytecode_alu_dst *r600_dst)
958 {
959 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
960
961 r600_dst->sel = tgsi_dst->Register.Index;
962 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
963 r600_dst->chan = swizzle;
964 r600_dst->write = 1;
965 if (tgsi_dst->Register.Indirect)
966 r600_dst->rel = V_SQ_REL_RELATIVE;
967 if (inst->Instruction.Saturate) {
968 r600_dst->clamp = 1;
969 }
970 }
971
972 static int tgsi_last_instruction(unsigned writemask)
973 {
974 int i, lasti = 0;
975
976 for (i = 0; i < 4; i++) {
977 if (writemask & (1 << i)) {
978 lasti = i;
979 }
980 }
981 return lasti;
982 }
983
984 static int tgsi_int_to_flt(struct r600_shader_ctx *ctx)
985 {
986 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
987 struct r600_bytecode_alu alu;
988 int i, j, r;
989 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
990
991 for (i = 0; i < lasti + 1; i++) {
992 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
993 continue;
994
995 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
996 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
997
998 alu.inst = ctx->inst_info->r600_opcode;
999 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1000 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1001 }
1002 alu.last = 1;
1003 r = r600_bytecode_add_alu(ctx->bc, &alu);
1004 if (r)
1005 return r;
1006 }
1007 return 0;
1008 }
1009
1010 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1011 {
1012 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1013 struct r600_bytecode_alu alu;
1014 int i, j, r;
1015 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1016
1017 for (i = 0; i < lasti + 1; i++) {
1018 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1019 continue;
1020
1021 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1022 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1023
1024 alu.inst = ctx->inst_info->r600_opcode;
1025 if (!swap) {
1026 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1027 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1028 }
1029 } else {
1030 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1031 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1032 }
1033 /* handle some special cases */
1034 switch (ctx->inst_info->tgsi_opcode) {
1035 case TGSI_OPCODE_SUB:
1036 r600_bytecode_src_toggle_neg(&alu.src[1]);
1037 break;
1038 case TGSI_OPCODE_ABS:
1039 r600_bytecode_src_set_abs(&alu.src[0]);
1040 break;
1041 default:
1042 break;
1043 }
1044 if (i == lasti) {
1045 alu.last = 1;
1046 }
1047 r = r600_bytecode_add_alu(ctx->bc, &alu);
1048 if (r)
1049 return r;
1050 }
1051 return 0;
1052 }
1053
1054 static int tgsi_op2(struct r600_shader_ctx *ctx)
1055 {
1056 return tgsi_op2_s(ctx, 0);
1057 }
1058
1059 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1060 {
1061 return tgsi_op2_s(ctx, 1);
1062 }
1063
1064 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1065 {
1066 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1067 struct r600_bytecode_alu alu;
1068 int i, r;
1069 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1070
1071 for (i = 0; i < lasti + 1; i++) {
1072
1073 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1074 continue;
1075 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1076 alu.inst = ctx->inst_info->r600_opcode;
1077
1078 alu.src[0].sel = V_SQ_ALU_SRC_0;
1079
1080 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1081
1082 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1083
1084 if (i == lasti) {
1085 alu.last = 1;
1086 }
1087 r = r600_bytecode_add_alu(ctx->bc, &alu);
1088 if (r)
1089 return r;
1090 }
1091 return 0;
1092
1093 }
1094
1095 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1096 {
1097 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1098 int i, j, r;
1099 struct r600_bytecode_alu alu;
1100 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1101
1102 for (i = 0 ; i < last_slot; i++) {
1103 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1104 alu.inst = ctx->inst_info->r600_opcode;
1105 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1106 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1107 }
1108 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1109 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1110
1111 if (i == last_slot - 1)
1112 alu.last = 1;
1113 r = r600_bytecode_add_alu(ctx->bc, &alu);
1114 if (r)
1115 return r;
1116 }
1117 return 0;
1118 }
1119
1120 /*
1121 * r600 - trunc to -PI..PI range
1122 * r700 - normalize by dividing by 2PI
1123 * see fdo bug 27901
1124 */
1125 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1126 {
1127 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1128 static float double_pi = 3.1415926535 * 2;
1129 static float neg_pi = -3.1415926535;
1130
1131 int r;
1132 struct r600_bytecode_alu alu;
1133
1134 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1135 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1136 alu.is_op3 = 1;
1137
1138 alu.dst.chan = 0;
1139 alu.dst.sel = ctx->temp_reg;
1140 alu.dst.write = 1;
1141
1142 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1143
1144 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1145 alu.src[1].chan = 0;
1146 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1147 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1148 alu.src[2].chan = 0;
1149 alu.last = 1;
1150 r = r600_bytecode_add_alu(ctx->bc, &alu);
1151 if (r)
1152 return r;
1153
1154 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1156
1157 alu.dst.chan = 0;
1158 alu.dst.sel = ctx->temp_reg;
1159 alu.dst.write = 1;
1160
1161 alu.src[0].sel = ctx->temp_reg;
1162 alu.src[0].chan = 0;
1163 alu.last = 1;
1164 r = r600_bytecode_add_alu(ctx->bc, &alu);
1165 if (r)
1166 return r;
1167
1168 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1170 alu.is_op3 = 1;
1171
1172 alu.dst.chan = 0;
1173 alu.dst.sel = ctx->temp_reg;
1174 alu.dst.write = 1;
1175
1176 alu.src[0].sel = ctx->temp_reg;
1177 alu.src[0].chan = 0;
1178
1179 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1180 alu.src[1].chan = 0;
1181 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1182 alu.src[2].chan = 0;
1183
1184 if (ctx->bc->chip_class == R600) {
1185 alu.src[1].value = *(uint32_t *)&double_pi;
1186 alu.src[2].value = *(uint32_t *)&neg_pi;
1187 } else {
1188 alu.src[1].sel = V_SQ_ALU_SRC_1;
1189 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1190 alu.src[2].neg = 1;
1191 }
1192
1193 alu.last = 1;
1194 r = r600_bytecode_add_alu(ctx->bc, &alu);
1195 if (r)
1196 return r;
1197 return 0;
1198 }
1199
1200 static int cayman_trig(struct r600_shader_ctx *ctx)
1201 {
1202 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1203 struct r600_bytecode_alu alu;
1204 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1205 int i, r;
1206
1207 r = tgsi_setup_trig(ctx);
1208 if (r)
1209 return r;
1210
1211
1212 for (i = 0; i < last_slot; i++) {
1213 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1214 alu.inst = ctx->inst_info->r600_opcode;
1215 alu.dst.chan = i;
1216
1217 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1218 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1219
1220 alu.src[0].sel = ctx->temp_reg;
1221 alu.src[0].chan = 0;
1222 if (i == last_slot - 1)
1223 alu.last = 1;
1224 r = r600_bytecode_add_alu(ctx->bc, &alu);
1225 if (r)
1226 return r;
1227 }
1228 return 0;
1229 }
1230
1231 static int tgsi_trig(struct r600_shader_ctx *ctx)
1232 {
1233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1234 struct r600_bytecode_alu alu;
1235 int i, r;
1236 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1237
1238 r = tgsi_setup_trig(ctx);
1239 if (r)
1240 return r;
1241
1242 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1243 alu.inst = ctx->inst_info->r600_opcode;
1244 alu.dst.chan = 0;
1245 alu.dst.sel = ctx->temp_reg;
1246 alu.dst.write = 1;
1247
1248 alu.src[0].sel = ctx->temp_reg;
1249 alu.src[0].chan = 0;
1250 alu.last = 1;
1251 r = r600_bytecode_add_alu(ctx->bc, &alu);
1252 if (r)
1253 return r;
1254
1255 /* replicate result */
1256 for (i = 0; i < lasti + 1; i++) {
1257 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1258 continue;
1259
1260 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1261 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1262
1263 alu.src[0].sel = ctx->temp_reg;
1264 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1265 if (i == lasti)
1266 alu.last = 1;
1267 r = r600_bytecode_add_alu(ctx->bc, &alu);
1268 if (r)
1269 return r;
1270 }
1271 return 0;
1272 }
1273
1274 static int tgsi_scs(struct r600_shader_ctx *ctx)
1275 {
1276 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1277 struct r600_bytecode_alu alu;
1278 int i, r;
1279
1280 /* We'll only need the trig stuff if we are going to write to the
1281 * X or Y components of the destination vector.
1282 */
1283 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1284 r = tgsi_setup_trig(ctx);
1285 if (r)
1286 return r;
1287 }
1288
1289 /* dst.x = COS */
1290 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1291 if (ctx->bc->chip_class == CAYMAN) {
1292 for (i = 0 ; i < 3; i++) {
1293 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1294 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1295 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1296
1297 if (i == 0)
1298 alu.dst.write = 1;
1299 else
1300 alu.dst.write = 0;
1301 alu.src[0].sel = ctx->temp_reg;
1302 alu.src[0].chan = 0;
1303 if (i == 2)
1304 alu.last = 1;
1305 r = r600_bytecode_add_alu(ctx->bc, &alu);
1306 if (r)
1307 return r;
1308 }
1309 } else {
1310 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1312 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1313
1314 alu.src[0].sel = ctx->temp_reg;
1315 alu.src[0].chan = 0;
1316 alu.last = 1;
1317 r = r600_bytecode_add_alu(ctx->bc, &alu);
1318 if (r)
1319 return r;
1320 }
1321 }
1322
1323 /* dst.y = SIN */
1324 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1325 if (ctx->bc->chip_class == CAYMAN) {
1326 for (i = 0 ; i < 3; i++) {
1327 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1329 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1330 if (i == 1)
1331 alu.dst.write = 1;
1332 else
1333 alu.dst.write = 0;
1334 alu.src[0].sel = ctx->temp_reg;
1335 alu.src[0].chan = 0;
1336 if (i == 2)
1337 alu.last = 1;
1338 r = r600_bytecode_add_alu(ctx->bc, &alu);
1339 if (r)
1340 return r;
1341 }
1342 } else {
1343 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1345 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1346
1347 alu.src[0].sel = ctx->temp_reg;
1348 alu.src[0].chan = 0;
1349 alu.last = 1;
1350 r = r600_bytecode_add_alu(ctx->bc, &alu);
1351 if (r)
1352 return r;
1353 }
1354 }
1355
1356 /* dst.z = 0.0; */
1357 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1358 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1359
1360 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1361
1362 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1363
1364 alu.src[0].sel = V_SQ_ALU_SRC_0;
1365 alu.src[0].chan = 0;
1366
1367 alu.last = 1;
1368
1369 r = r600_bytecode_add_alu(ctx->bc, &alu);
1370 if (r)
1371 return r;
1372 }
1373
1374 /* dst.w = 1.0; */
1375 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1376 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1377
1378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1379
1380 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1381
1382 alu.src[0].sel = V_SQ_ALU_SRC_1;
1383 alu.src[0].chan = 0;
1384
1385 alu.last = 1;
1386
1387 r = r600_bytecode_add_alu(ctx->bc, &alu);
1388 if (r)
1389 return r;
1390 }
1391
1392 return 0;
1393 }
1394
1395 static int tgsi_kill(struct r600_shader_ctx *ctx)
1396 {
1397 struct r600_bytecode_alu alu;
1398 int i, r;
1399
1400 for (i = 0; i < 4; i++) {
1401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1402 alu.inst = ctx->inst_info->r600_opcode;
1403
1404 alu.dst.chan = i;
1405
1406 alu.src[0].sel = V_SQ_ALU_SRC_0;
1407
1408 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1409 alu.src[1].sel = V_SQ_ALU_SRC_1;
1410 alu.src[1].neg = 1;
1411 } else {
1412 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1413 }
1414 if (i == 3) {
1415 alu.last = 1;
1416 }
1417 r = r600_bytecode_add_alu(ctx->bc, &alu);
1418 if (r)
1419 return r;
1420 }
1421
1422 /* kill must be last in ALU */
1423 ctx->bc->force_add_cf = 1;
1424 ctx->shader->uses_kill = TRUE;
1425 return 0;
1426 }
1427
1428 static int tgsi_lit(struct r600_shader_ctx *ctx)
1429 {
1430 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1431 struct r600_bytecode_alu alu;
1432 int r;
1433
1434 /* tmp.x = max(src.y, 0.0) */
1435 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1436 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1437 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1438 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1439 alu.src[1].chan = 1;
1440
1441 alu.dst.sel = ctx->temp_reg;
1442 alu.dst.chan = 0;
1443 alu.dst.write = 1;
1444
1445 alu.last = 1;
1446 r = r600_bytecode_add_alu(ctx->bc, &alu);
1447 if (r)
1448 return r;
1449
1450 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1451 {
1452 int chan;
1453 int sel;
1454 int i;
1455
1456 if (ctx->bc->chip_class == CAYMAN) {
1457 for (i = 0; i < 3; i++) {
1458 /* tmp.z = log(tmp.x) */
1459 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1460 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1461 alu.src[0].sel = ctx->temp_reg;
1462 alu.src[0].chan = 0;
1463 alu.dst.sel = ctx->temp_reg;
1464 alu.dst.chan = i;
1465 if (i == 2) {
1466 alu.dst.write = 1;
1467 alu.last = 1;
1468 } else
1469 alu.dst.write = 0;
1470
1471 r = r600_bytecode_add_alu(ctx->bc, &alu);
1472 if (r)
1473 return r;
1474 }
1475 } else {
1476 /* tmp.z = log(tmp.x) */
1477 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1478 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1479 alu.src[0].sel = ctx->temp_reg;
1480 alu.src[0].chan = 0;
1481 alu.dst.sel = ctx->temp_reg;
1482 alu.dst.chan = 2;
1483 alu.dst.write = 1;
1484 alu.last = 1;
1485 r = r600_bytecode_add_alu(ctx->bc, &alu);
1486 if (r)
1487 return r;
1488 }
1489
1490 chan = alu.dst.chan;
1491 sel = alu.dst.sel;
1492
1493 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1494 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1495 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1496 alu.src[0].sel = sel;
1497 alu.src[0].chan = chan;
1498 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1499 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1500 alu.dst.sel = ctx->temp_reg;
1501 alu.dst.chan = 0;
1502 alu.dst.write = 1;
1503 alu.is_op3 = 1;
1504 alu.last = 1;
1505 r = r600_bytecode_add_alu(ctx->bc, &alu);
1506 if (r)
1507 return r;
1508
1509 if (ctx->bc->chip_class == CAYMAN) {
1510 for (i = 0; i < 3; i++) {
1511 /* dst.z = exp(tmp.x) */
1512 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1514 alu.src[0].sel = ctx->temp_reg;
1515 alu.src[0].chan = 0;
1516 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1517 if (i == 2) {
1518 alu.dst.write = 1;
1519 alu.last = 1;
1520 } else
1521 alu.dst.write = 0;
1522 r = r600_bytecode_add_alu(ctx->bc, &alu);
1523 if (r)
1524 return r;
1525 }
1526 } else {
1527 /* dst.z = exp(tmp.x) */
1528 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1530 alu.src[0].sel = ctx->temp_reg;
1531 alu.src[0].chan = 0;
1532 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1533 alu.last = 1;
1534 r = r600_bytecode_add_alu(ctx->bc, &alu);
1535 if (r)
1536 return r;
1537 }
1538 }
1539
1540 /* dst.x, <- 1.0 */
1541 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1543 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1544 alu.src[0].chan = 0;
1545 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1546 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1547 r = r600_bytecode_add_alu(ctx->bc, &alu);
1548 if (r)
1549 return r;
1550
1551 /* dst.y = max(src.x, 0.0) */
1552 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1554 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1555 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1556 alu.src[1].chan = 0;
1557 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1558 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1559 r = r600_bytecode_add_alu(ctx->bc, &alu);
1560 if (r)
1561 return r;
1562
1563 /* dst.w, <- 1.0 */
1564 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1565 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1566 alu.src[0].sel = V_SQ_ALU_SRC_1;
1567 alu.src[0].chan = 0;
1568 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1569 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1570 alu.last = 1;
1571 r = r600_bytecode_add_alu(ctx->bc, &alu);
1572 if (r)
1573 return r;
1574
1575 return 0;
1576 }
1577
1578 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1579 {
1580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1581 struct r600_bytecode_alu alu;
1582 int i, r;
1583
1584 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1585
1586 /* FIXME:
1587 * For state trackers other than OpenGL, we'll want to use
1588 * _RECIPSQRT_IEEE instead.
1589 */
1590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1591
1592 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1593 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1594 r600_bytecode_src_set_abs(&alu.src[i]);
1595 }
1596 alu.dst.sel = ctx->temp_reg;
1597 alu.dst.write = 1;
1598 alu.last = 1;
1599 r = r600_bytecode_add_alu(ctx->bc, &alu);
1600 if (r)
1601 return r;
1602 /* replicate result */
1603 return tgsi_helper_tempx_replicate(ctx);
1604 }
1605
1606 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1607 {
1608 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1609 struct r600_bytecode_alu alu;
1610 int i, r;
1611
1612 for (i = 0; i < 4; i++) {
1613 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1614 alu.src[0].sel = ctx->temp_reg;
1615 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1616 alu.dst.chan = i;
1617 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1618 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1619 if (i == 3)
1620 alu.last = 1;
1621 r = r600_bytecode_add_alu(ctx->bc, &alu);
1622 if (r)
1623 return r;
1624 }
1625 return 0;
1626 }
1627
1628 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1629 {
1630 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1631 struct r600_bytecode_alu alu;
1632 int i, r;
1633
1634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1635 alu.inst = ctx->inst_info->r600_opcode;
1636 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1637 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1638 }
1639 alu.dst.sel = ctx->temp_reg;
1640 alu.dst.write = 1;
1641 alu.last = 1;
1642 r = r600_bytecode_add_alu(ctx->bc, &alu);
1643 if (r)
1644 return r;
1645 /* replicate result */
1646 return tgsi_helper_tempx_replicate(ctx);
1647 }
1648
1649 static int cayman_pow(struct r600_shader_ctx *ctx)
1650 {
1651 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1652 int i, r;
1653 struct r600_bytecode_alu alu;
1654 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1655
1656 for (i = 0; i < 3; i++) {
1657 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1659 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1660 alu.dst.sel = ctx->temp_reg;
1661 alu.dst.chan = i;
1662 alu.dst.write = 1;
1663 if (i == 2)
1664 alu.last = 1;
1665 r = r600_bytecode_add_alu(ctx->bc, &alu);
1666 if (r)
1667 return r;
1668 }
1669
1670 /* b * LOG2(a) */
1671 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1673 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1674 alu.src[1].sel = ctx->temp_reg;
1675 alu.dst.sel = ctx->temp_reg;
1676 alu.dst.write = 1;
1677 alu.last = 1;
1678 r = r600_bytecode_add_alu(ctx->bc, &alu);
1679 if (r)
1680 return r;
1681
1682 for (i = 0; i < last_slot; i++) {
1683 /* POW(a,b) = EXP2(b * LOG2(a))*/
1684 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1686 alu.src[0].sel = ctx->temp_reg;
1687
1688 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1689 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1690 if (i == last_slot - 1)
1691 alu.last = 1;
1692 r = r600_bytecode_add_alu(ctx->bc, &alu);
1693 if (r)
1694 return r;
1695 }
1696 return 0;
1697 }
1698
1699 static int tgsi_pow(struct r600_shader_ctx *ctx)
1700 {
1701 struct r600_bytecode_alu alu;
1702 int r;
1703
1704 /* LOG2(a) */
1705 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1706 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1707 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1708 alu.dst.sel = ctx->temp_reg;
1709 alu.dst.write = 1;
1710 alu.last = 1;
1711 r = r600_bytecode_add_alu(ctx->bc, &alu);
1712 if (r)
1713 return r;
1714 /* b * LOG2(a) */
1715 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1717 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1718 alu.src[1].sel = ctx->temp_reg;
1719 alu.dst.sel = ctx->temp_reg;
1720 alu.dst.write = 1;
1721 alu.last = 1;
1722 r = r600_bytecode_add_alu(ctx->bc, &alu);
1723 if (r)
1724 return r;
1725 /* POW(a,b) = EXP2(b * LOG2(a))*/
1726 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1727 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1728 alu.src[0].sel = ctx->temp_reg;
1729 alu.dst.sel = ctx->temp_reg;
1730 alu.dst.write = 1;
1731 alu.last = 1;
1732 r = r600_bytecode_add_alu(ctx->bc, &alu);
1733 if (r)
1734 return r;
1735 return tgsi_helper_tempx_replicate(ctx);
1736 }
1737
1738 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1739 {
1740 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1741 struct r600_bytecode_alu alu;
1742 int i, r;
1743
1744 /* tmp = (src > 0 ? 1 : src) */
1745 for (i = 0; i < 4; i++) {
1746 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1747 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1748 alu.is_op3 = 1;
1749
1750 alu.dst.sel = ctx->temp_reg;
1751 alu.dst.chan = i;
1752
1753 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1754 alu.src[1].sel = V_SQ_ALU_SRC_1;
1755 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1756
1757 if (i == 3)
1758 alu.last = 1;
1759 r = r600_bytecode_add_alu(ctx->bc, &alu);
1760 if (r)
1761 return r;
1762 }
1763
1764 /* dst = (-tmp > 0 ? -1 : tmp) */
1765 for (i = 0; i < 4; i++) {
1766 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1768 alu.is_op3 = 1;
1769 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1770
1771 alu.src[0].sel = ctx->temp_reg;
1772 alu.src[0].chan = i;
1773 alu.src[0].neg = 1;
1774
1775 alu.src[1].sel = V_SQ_ALU_SRC_1;
1776 alu.src[1].neg = 1;
1777
1778 alu.src[2].sel = ctx->temp_reg;
1779 alu.src[2].chan = i;
1780
1781 if (i == 3)
1782 alu.last = 1;
1783 r = r600_bytecode_add_alu(ctx->bc, &alu);
1784 if (r)
1785 return r;
1786 }
1787 return 0;
1788 }
1789
1790 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1791 {
1792 struct r600_bytecode_alu alu;
1793 int i, r;
1794
1795 for (i = 0; i < 4; i++) {
1796 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1797 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1798 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1799 alu.dst.chan = i;
1800 } else {
1801 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1802 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1803 alu.src[0].sel = ctx->temp_reg;
1804 alu.src[0].chan = i;
1805 }
1806 if (i == 3) {
1807 alu.last = 1;
1808 }
1809 r = r600_bytecode_add_alu(ctx->bc, &alu);
1810 if (r)
1811 return r;
1812 }
1813 return 0;
1814 }
1815
1816 static int tgsi_op3(struct r600_shader_ctx *ctx)
1817 {
1818 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1819 struct r600_bytecode_alu alu;
1820 int i, j, r;
1821 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1822
1823 for (i = 0; i < lasti + 1; i++) {
1824 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1825 continue;
1826
1827 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1828 alu.inst = ctx->inst_info->r600_opcode;
1829 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1830 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1831 }
1832
1833 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1834 alu.dst.chan = i;
1835 alu.dst.write = 1;
1836 alu.is_op3 = 1;
1837 if (i == lasti) {
1838 alu.last = 1;
1839 }
1840 r = r600_bytecode_add_alu(ctx->bc, &alu);
1841 if (r)
1842 return r;
1843 }
1844 return 0;
1845 }
1846
1847 static int tgsi_dp(struct r600_shader_ctx *ctx)
1848 {
1849 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1850 struct r600_bytecode_alu alu;
1851 int i, j, r;
1852
1853 for (i = 0; i < 4; i++) {
1854 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1855 alu.inst = ctx->inst_info->r600_opcode;
1856 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1857 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1858 }
1859
1860 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1861 alu.dst.chan = i;
1862 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1863 /* handle some special cases */
1864 switch (ctx->inst_info->tgsi_opcode) {
1865 case TGSI_OPCODE_DP2:
1866 if (i > 1) {
1867 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1868 alu.src[0].chan = alu.src[1].chan = 0;
1869 }
1870 break;
1871 case TGSI_OPCODE_DP3:
1872 if (i > 2) {
1873 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1874 alu.src[0].chan = alu.src[1].chan = 0;
1875 }
1876 break;
1877 case TGSI_OPCODE_DPH:
1878 if (i == 3) {
1879 alu.src[0].sel = V_SQ_ALU_SRC_1;
1880 alu.src[0].chan = 0;
1881 alu.src[0].neg = 0;
1882 }
1883 break;
1884 default:
1885 break;
1886 }
1887 if (i == 3) {
1888 alu.last = 1;
1889 }
1890 r = r600_bytecode_add_alu(ctx->bc, &alu);
1891 if (r)
1892 return r;
1893 }
1894 return 0;
1895 }
1896
1897 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1898 unsigned index)
1899 {
1900 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1901 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1902 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1903 ctx->src[index].neg || ctx->src[index].abs;
1904 }
1905
1906 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1907 unsigned index)
1908 {
1909 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1910 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1911 }
1912
1913 static int tgsi_tex(struct r600_shader_ctx *ctx)
1914 {
1915 static float one_point_five = 1.5f;
1916 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1917 struct r600_bytecode_tex tex;
1918 struct r600_bytecode_alu alu;
1919 unsigned src_gpr;
1920 int r, i, j;
1921 int opcode;
1922 /* Texture fetch instructions can only use gprs as source.
1923 * Also they cannot negate the source or take the absolute value */
1924 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1925 boolean src_loaded = FALSE;
1926 unsigned sampler_src_reg = 1;
1927 u8 offset_x = 0, offset_y = 0, offset_z = 0;
1928
1929 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1930
1931 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1932 /* get offset values */
1933 if (inst->Texture.NumOffsets) {
1934 assert(inst->Texture.NumOffsets == 1);
1935
1936 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
1937 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
1938 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
1939 }
1940 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1941 /* TGSI moves the sampler to src reg 3 for TXD */
1942 sampler_src_reg = 3;
1943
1944 for (i = 1; i < 3; i++) {
1945 /* set gradients h/v */
1946 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
1947 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1948 SQ_TEX_INST_SET_GRADIENTS_V;
1949 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1950 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1951
1952 if (tgsi_tex_src_requires_loading(ctx, i)) {
1953 tex.src_gpr = r600_get_temp(ctx);
1954 tex.src_sel_x = 0;
1955 tex.src_sel_y = 1;
1956 tex.src_sel_z = 2;
1957 tex.src_sel_w = 3;
1958
1959 for (j = 0; j < 4; j++) {
1960 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1961 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1962 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
1963 alu.dst.sel = tex.src_gpr;
1964 alu.dst.chan = j;
1965 if (j == 3)
1966 alu.last = 1;
1967 alu.dst.write = 1;
1968 r = r600_bytecode_add_alu(ctx->bc, &alu);
1969 if (r)
1970 return r;
1971 }
1972
1973 } else {
1974 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1975 tex.src_sel_x = ctx->src[i].swizzle[0];
1976 tex.src_sel_y = ctx->src[i].swizzle[1];
1977 tex.src_sel_z = ctx->src[i].swizzle[2];
1978 tex.src_sel_w = ctx->src[i].swizzle[3];
1979 tex.src_rel = ctx->src[i].rel;
1980 }
1981 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1982 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1983 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1984 tex.coord_type_x = 1;
1985 tex.coord_type_y = 1;
1986 tex.coord_type_z = 1;
1987 tex.coord_type_w = 1;
1988 }
1989 r = r600_bytecode_add_tex(ctx->bc, &tex);
1990 if (r)
1991 return r;
1992 }
1993 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1994 int out_chan;
1995 /* Add perspective divide */
1996 if (ctx->bc->chip_class == CAYMAN) {
1997 out_chan = 2;
1998 for (i = 0; i < 3; i++) {
1999 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2001 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2002
2003 alu.dst.sel = ctx->temp_reg;
2004 alu.dst.chan = i;
2005 if (i == 2)
2006 alu.last = 1;
2007 if (out_chan == i)
2008 alu.dst.write = 1;
2009 r = r600_bytecode_add_alu(ctx->bc, &alu);
2010 if (r)
2011 return r;
2012 }
2013
2014 } else {
2015 out_chan = 3;
2016 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2017 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2018 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2019
2020 alu.dst.sel = ctx->temp_reg;
2021 alu.dst.chan = out_chan;
2022 alu.last = 1;
2023 alu.dst.write = 1;
2024 r = r600_bytecode_add_alu(ctx->bc, &alu);
2025 if (r)
2026 return r;
2027 }
2028
2029 for (i = 0; i < 3; i++) {
2030 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2031 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2032 alu.src[0].sel = ctx->temp_reg;
2033 alu.src[0].chan = out_chan;
2034 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2035 alu.dst.sel = ctx->temp_reg;
2036 alu.dst.chan = i;
2037 alu.dst.write = 1;
2038 r = r600_bytecode_add_alu(ctx->bc, &alu);
2039 if (r)
2040 return r;
2041 }
2042 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2043 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2044 alu.src[0].sel = V_SQ_ALU_SRC_1;
2045 alu.src[0].chan = 0;
2046 alu.dst.sel = ctx->temp_reg;
2047 alu.dst.chan = 3;
2048 alu.last = 1;
2049 alu.dst.write = 1;
2050 r = r600_bytecode_add_alu(ctx->bc, &alu);
2051 if (r)
2052 return r;
2053 src_loaded = TRUE;
2054 src_gpr = ctx->temp_reg;
2055 }
2056
2057 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2058 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2059 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2060
2061 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2062 for (i = 0; i < 4; i++) {
2063 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2065 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2066 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2067 alu.dst.sel = ctx->temp_reg;
2068 alu.dst.chan = i;
2069 if (i == 3)
2070 alu.last = 1;
2071 alu.dst.write = 1;
2072 r = r600_bytecode_add_alu(ctx->bc, &alu);
2073 if (r)
2074 return r;
2075 }
2076
2077 /* tmp1.z = RCP_e(|tmp1.z|) */
2078 if (ctx->bc->chip_class == CAYMAN) {
2079 for (i = 0; i < 3; i++) {
2080 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2081 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2082 alu.src[0].sel = ctx->temp_reg;
2083 alu.src[0].chan = 2;
2084 alu.src[0].abs = 1;
2085 alu.dst.sel = ctx->temp_reg;
2086 alu.dst.chan = i;
2087 if (i == 2)
2088 alu.dst.write = 1;
2089 if (i == 2)
2090 alu.last = 1;
2091 r = r600_bytecode_add_alu(ctx->bc, &alu);
2092 if (r)
2093 return r;
2094 }
2095 } else {
2096 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2098 alu.src[0].sel = ctx->temp_reg;
2099 alu.src[0].chan = 2;
2100 alu.src[0].abs = 1;
2101 alu.dst.sel = ctx->temp_reg;
2102 alu.dst.chan = 2;
2103 alu.dst.write = 1;
2104 alu.last = 1;
2105 r = r600_bytecode_add_alu(ctx->bc, &alu);
2106 if (r)
2107 return r;
2108 }
2109
2110 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2111 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2112 * muladd has no writemask, have to use another temp
2113 */
2114 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2115 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2116 alu.is_op3 = 1;
2117
2118 alu.src[0].sel = ctx->temp_reg;
2119 alu.src[0].chan = 0;
2120 alu.src[1].sel = ctx->temp_reg;
2121 alu.src[1].chan = 2;
2122
2123 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2124 alu.src[2].chan = 0;
2125 alu.src[2].value = *(uint32_t *)&one_point_five;
2126
2127 alu.dst.sel = ctx->temp_reg;
2128 alu.dst.chan = 0;
2129 alu.dst.write = 1;
2130
2131 r = r600_bytecode_add_alu(ctx->bc, &alu);
2132 if (r)
2133 return r;
2134
2135 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2137 alu.is_op3 = 1;
2138
2139 alu.src[0].sel = ctx->temp_reg;
2140 alu.src[0].chan = 1;
2141 alu.src[1].sel = ctx->temp_reg;
2142 alu.src[1].chan = 2;
2143
2144 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2145 alu.src[2].chan = 0;
2146 alu.src[2].value = *(uint32_t *)&one_point_five;
2147
2148 alu.dst.sel = ctx->temp_reg;
2149 alu.dst.chan = 1;
2150 alu.dst.write = 1;
2151
2152 alu.last = 1;
2153 r = r600_bytecode_add_alu(ctx->bc, &alu);
2154 if (r)
2155 return r;
2156
2157 src_loaded = TRUE;
2158 src_gpr = ctx->temp_reg;
2159 }
2160
2161 if (src_requires_loading && !src_loaded) {
2162 for (i = 0; i < 4; i++) {
2163 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2165 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2166 alu.dst.sel = ctx->temp_reg;
2167 alu.dst.chan = i;
2168 if (i == 3)
2169 alu.last = 1;
2170 alu.dst.write = 1;
2171 r = r600_bytecode_add_alu(ctx->bc, &alu);
2172 if (r)
2173 return r;
2174 }
2175 src_loaded = TRUE;
2176 src_gpr = ctx->temp_reg;
2177 }
2178
2179 opcode = ctx->inst_info->r600_opcode;
2180 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2181 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2182 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2183 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2184 switch (opcode) {
2185 case SQ_TEX_INST_SAMPLE:
2186 opcode = SQ_TEX_INST_SAMPLE_C;
2187 break;
2188 case SQ_TEX_INST_SAMPLE_L:
2189 opcode = SQ_TEX_INST_SAMPLE_C_L;
2190 break;
2191 case SQ_TEX_INST_SAMPLE_G:
2192 opcode = SQ_TEX_INST_SAMPLE_C_G;
2193 break;
2194 }
2195 }
2196
2197 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2198 tex.inst = opcode;
2199
2200 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2201 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2202 tex.src_gpr = src_gpr;
2203 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2204 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2205 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2206 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2207 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2208 if (src_loaded) {
2209 tex.src_sel_x = 0;
2210 tex.src_sel_y = 1;
2211 tex.src_sel_z = 2;
2212 tex.src_sel_w = 3;
2213 } else {
2214 tex.src_sel_x = ctx->src[0].swizzle[0];
2215 tex.src_sel_y = ctx->src[0].swizzle[1];
2216 tex.src_sel_z = ctx->src[0].swizzle[2];
2217 tex.src_sel_w = ctx->src[0].swizzle[3];
2218 tex.src_rel = ctx->src[0].rel;
2219 }
2220
2221 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2222 tex.src_sel_x = 1;
2223 tex.src_sel_y = 0;
2224 tex.src_sel_z = 3;
2225 tex.src_sel_w = 1;
2226 }
2227
2228 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2229 tex.coord_type_x = 1;
2230 tex.coord_type_y = 1;
2231 tex.coord_type_z = 1;
2232 tex.coord_type_w = 1;
2233 }
2234
2235 tex.offset_x = offset_x;
2236 tex.offset_y = offset_y;
2237 tex.offset_z = offset_z;
2238
2239 /* Put the depth for comparison in W.
2240 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2241 * Some instructions expect the depth in Z. */
2242 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2243 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2244 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2245 opcode != SQ_TEX_INST_SAMPLE_C_L &&
2246 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2247 tex.src_sel_w = tex.src_sel_z;
2248 }
2249
2250 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2251 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2252 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2253 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2254 /* the array index is read from Y */
2255 tex.coord_type_y = 0;
2256 } else {
2257 /* the array index is read from Z */
2258 tex.coord_type_z = 0;
2259 tex.src_sel_z = tex.src_sel_y;
2260 }
2261 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2262 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2263 /* the array index is read from Z */
2264 tex.coord_type_z = 0;
2265
2266 r = r600_bytecode_add_tex(ctx->bc, &tex);
2267 if (r)
2268 return r;
2269
2270 /* add shadow ambient support - gallium doesn't do it yet */
2271 return 0;
2272 }
2273
2274 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2275 {
2276 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2277 struct r600_bytecode_alu alu;
2278 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2279 unsigned i;
2280 int r;
2281
2282 /* optimize if it's just an equal balance */
2283 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2284 for (i = 0; i < lasti + 1; i++) {
2285 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2286 continue;
2287
2288 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2290 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2291 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2292 alu.omod = 3;
2293 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2294 alu.dst.chan = i;
2295 if (i == lasti) {
2296 alu.last = 1;
2297 }
2298 r = r600_bytecode_add_alu(ctx->bc, &alu);
2299 if (r)
2300 return r;
2301 }
2302 return 0;
2303 }
2304
2305 /* 1 - src0 */
2306 for (i = 0; i < lasti + 1; i++) {
2307 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2308 continue;
2309
2310 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2312 alu.src[0].sel = V_SQ_ALU_SRC_1;
2313 alu.src[0].chan = 0;
2314 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2315 r600_bytecode_src_toggle_neg(&alu.src[1]);
2316 alu.dst.sel = ctx->temp_reg;
2317 alu.dst.chan = i;
2318 if (i == lasti) {
2319 alu.last = 1;
2320 }
2321 alu.dst.write = 1;
2322 r = r600_bytecode_add_alu(ctx->bc, &alu);
2323 if (r)
2324 return r;
2325 }
2326
2327 /* (1 - src0) * src2 */
2328 for (i = 0; i < lasti + 1; i++) {
2329 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2330 continue;
2331
2332 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2333 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2334 alu.src[0].sel = ctx->temp_reg;
2335 alu.src[0].chan = i;
2336 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2337 alu.dst.sel = ctx->temp_reg;
2338 alu.dst.chan = i;
2339 if (i == lasti) {
2340 alu.last = 1;
2341 }
2342 alu.dst.write = 1;
2343 r = r600_bytecode_add_alu(ctx->bc, &alu);
2344 if (r)
2345 return r;
2346 }
2347
2348 /* src0 * src1 + (1 - src0) * src2 */
2349 for (i = 0; i < lasti + 1; i++) {
2350 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2351 continue;
2352
2353 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2355 alu.is_op3 = 1;
2356 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2357 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2358 alu.src[2].sel = ctx->temp_reg;
2359 alu.src[2].chan = i;
2360
2361 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2362 alu.dst.chan = i;
2363 if (i == lasti) {
2364 alu.last = 1;
2365 }
2366 r = r600_bytecode_add_alu(ctx->bc, &alu);
2367 if (r)
2368 return r;
2369 }
2370 return 0;
2371 }
2372
2373 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2374 {
2375 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2376 struct r600_bytecode_alu alu;
2377 int i, r;
2378 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2379
2380 for (i = 0; i < lasti + 1; i++) {
2381 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2382 continue;
2383
2384 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2385 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2386 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2387 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2388 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2389 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2390 alu.dst.chan = i;
2391 alu.dst.write = 1;
2392 alu.is_op3 = 1;
2393 if (i == lasti)
2394 alu.last = 1;
2395 r = r600_bytecode_add_alu(ctx->bc, &alu);
2396 if (r)
2397 return r;
2398 }
2399 return 0;
2400 }
2401
2402 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2403 {
2404 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2405 static const unsigned int src0_swizzle[] = {2, 0, 1};
2406 static const unsigned int src1_swizzle[] = {1, 2, 0};
2407 struct r600_bytecode_alu alu;
2408 uint32_t use_temp = 0;
2409 int i, r;
2410
2411 if (inst->Dst[0].Register.WriteMask != 0xf)
2412 use_temp = 1;
2413
2414 for (i = 0; i < 4; i++) {
2415 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2416 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2417 if (i < 3) {
2418 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2419 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2420 } else {
2421 alu.src[0].sel = V_SQ_ALU_SRC_0;
2422 alu.src[0].chan = i;
2423 alu.src[1].sel = V_SQ_ALU_SRC_0;
2424 alu.src[1].chan = i;
2425 }
2426
2427 alu.dst.sel = ctx->temp_reg;
2428 alu.dst.chan = i;
2429 alu.dst.write = 1;
2430
2431 if (i == 3)
2432 alu.last = 1;
2433 r = r600_bytecode_add_alu(ctx->bc, &alu);
2434 if (r)
2435 return r;
2436 }
2437
2438 for (i = 0; i < 4; i++) {
2439 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2440 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2441
2442 if (i < 3) {
2443 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2444 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2445 } else {
2446 alu.src[0].sel = V_SQ_ALU_SRC_0;
2447 alu.src[0].chan = i;
2448 alu.src[1].sel = V_SQ_ALU_SRC_0;
2449 alu.src[1].chan = i;
2450 }
2451
2452 alu.src[2].sel = ctx->temp_reg;
2453 alu.src[2].neg = 1;
2454 alu.src[2].chan = i;
2455
2456 if (use_temp)
2457 alu.dst.sel = ctx->temp_reg;
2458 else
2459 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2460 alu.dst.chan = i;
2461 alu.dst.write = 1;
2462 alu.is_op3 = 1;
2463 if (i == 3)
2464 alu.last = 1;
2465 r = r600_bytecode_add_alu(ctx->bc, &alu);
2466 if (r)
2467 return r;
2468 }
2469 if (use_temp)
2470 return tgsi_helper_copy(ctx, inst);
2471 return 0;
2472 }
2473
2474 static int tgsi_exp(struct r600_shader_ctx *ctx)
2475 {
2476 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2477 struct r600_bytecode_alu alu;
2478 int r;
2479 int i;
2480
2481 /* result.x = 2^floor(src); */
2482 if (inst->Dst[0].Register.WriteMask & 1) {
2483 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2484
2485 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2486 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2487
2488 alu.dst.sel = ctx->temp_reg;
2489 alu.dst.chan = 0;
2490 alu.dst.write = 1;
2491 alu.last = 1;
2492 r = r600_bytecode_add_alu(ctx->bc, &alu);
2493 if (r)
2494 return r;
2495
2496 if (ctx->bc->chip_class == CAYMAN) {
2497 for (i = 0; i < 3; i++) {
2498 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2499 alu.src[0].sel = ctx->temp_reg;
2500 alu.src[0].chan = 0;
2501
2502 alu.dst.sel = ctx->temp_reg;
2503 alu.dst.chan = i;
2504 if (i == 0)
2505 alu.dst.write = 1;
2506 if (i == 2)
2507 alu.last = 1;
2508 r = r600_bytecode_add_alu(ctx->bc, &alu);
2509 if (r)
2510 return r;
2511 }
2512 } else {
2513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2514 alu.src[0].sel = ctx->temp_reg;
2515 alu.src[0].chan = 0;
2516
2517 alu.dst.sel = ctx->temp_reg;
2518 alu.dst.chan = 0;
2519 alu.dst.write = 1;
2520 alu.last = 1;
2521 r = r600_bytecode_add_alu(ctx->bc, &alu);
2522 if (r)
2523 return r;
2524 }
2525 }
2526
2527 /* result.y = tmp - floor(tmp); */
2528 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2529 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2530
2531 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2532 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2533
2534 alu.dst.sel = ctx->temp_reg;
2535 #if 0
2536 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2537 if (r)
2538 return r;
2539 #endif
2540 alu.dst.write = 1;
2541 alu.dst.chan = 1;
2542
2543 alu.last = 1;
2544
2545 r = r600_bytecode_add_alu(ctx->bc, &alu);
2546 if (r)
2547 return r;
2548 }
2549
2550 /* result.z = RoughApprox2ToX(tmp);*/
2551 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2552 if (ctx->bc->chip_class == CAYMAN) {
2553 for (i = 0; i < 3; i++) {
2554 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2555 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2556 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2557
2558 alu.dst.sel = ctx->temp_reg;
2559 alu.dst.chan = i;
2560 if (i == 2) {
2561 alu.dst.write = 1;
2562 alu.last = 1;
2563 }
2564
2565 r = r600_bytecode_add_alu(ctx->bc, &alu);
2566 if (r)
2567 return r;
2568 }
2569 } else {
2570 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2571 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2572 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2573
2574 alu.dst.sel = ctx->temp_reg;
2575 alu.dst.write = 1;
2576 alu.dst.chan = 2;
2577
2578 alu.last = 1;
2579
2580 r = r600_bytecode_add_alu(ctx->bc, &alu);
2581 if (r)
2582 return r;
2583 }
2584 }
2585
2586 /* result.w = 1.0;*/
2587 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2588 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2589
2590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2591 alu.src[0].sel = V_SQ_ALU_SRC_1;
2592 alu.src[0].chan = 0;
2593
2594 alu.dst.sel = ctx->temp_reg;
2595 alu.dst.chan = 3;
2596 alu.dst.write = 1;
2597 alu.last = 1;
2598 r = r600_bytecode_add_alu(ctx->bc, &alu);
2599 if (r)
2600 return r;
2601 }
2602 return tgsi_helper_copy(ctx, inst);
2603 }
2604
2605 static int tgsi_log(struct r600_shader_ctx *ctx)
2606 {
2607 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2608 struct r600_bytecode_alu alu;
2609 int r;
2610 int i;
2611
2612 /* result.x = floor(log2(|src|)); */
2613 if (inst->Dst[0].Register.WriteMask & 1) {
2614 if (ctx->bc->chip_class == CAYMAN) {
2615 for (i = 0; i < 3; i++) {
2616 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2617
2618 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2619 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2620 r600_bytecode_src_set_abs(&alu.src[0]);
2621
2622 alu.dst.sel = ctx->temp_reg;
2623 alu.dst.chan = i;
2624 if (i == 0)
2625 alu.dst.write = 1;
2626 if (i == 2)
2627 alu.last = 1;
2628 r = r600_bytecode_add_alu(ctx->bc, &alu);
2629 if (r)
2630 return r;
2631 }
2632
2633 } else {
2634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2635
2636 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2637 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2638 r600_bytecode_src_set_abs(&alu.src[0]);
2639
2640 alu.dst.sel = ctx->temp_reg;
2641 alu.dst.chan = 0;
2642 alu.dst.write = 1;
2643 alu.last = 1;
2644 r = r600_bytecode_add_alu(ctx->bc, &alu);
2645 if (r)
2646 return r;
2647 }
2648
2649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2650 alu.src[0].sel = ctx->temp_reg;
2651 alu.src[0].chan = 0;
2652
2653 alu.dst.sel = ctx->temp_reg;
2654 alu.dst.chan = 0;
2655 alu.dst.write = 1;
2656 alu.last = 1;
2657
2658 r = r600_bytecode_add_alu(ctx->bc, &alu);
2659 if (r)
2660 return r;
2661 }
2662
2663 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2664 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2665
2666 if (ctx->bc->chip_class == CAYMAN) {
2667 for (i = 0; i < 3; i++) {
2668 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2669
2670 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2671 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2672 r600_bytecode_src_set_abs(&alu.src[0]);
2673
2674 alu.dst.sel = ctx->temp_reg;
2675 alu.dst.chan = i;
2676 if (i == 1)
2677 alu.dst.write = 1;
2678 if (i == 2)
2679 alu.last = 1;
2680
2681 r = r600_bytecode_add_alu(ctx->bc, &alu);
2682 if (r)
2683 return r;
2684 }
2685 } else {
2686 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2687
2688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2689 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2690 r600_bytecode_src_set_abs(&alu.src[0]);
2691
2692 alu.dst.sel = ctx->temp_reg;
2693 alu.dst.chan = 1;
2694 alu.dst.write = 1;
2695 alu.last = 1;
2696
2697 r = r600_bytecode_add_alu(ctx->bc, &alu);
2698 if (r)
2699 return r;
2700 }
2701
2702 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2703
2704 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2705 alu.src[0].sel = ctx->temp_reg;
2706 alu.src[0].chan = 1;
2707
2708 alu.dst.sel = ctx->temp_reg;
2709 alu.dst.chan = 1;
2710 alu.dst.write = 1;
2711 alu.last = 1;
2712
2713 r = r600_bytecode_add_alu(ctx->bc, &alu);
2714 if (r)
2715 return r;
2716
2717 if (ctx->bc->chip_class == CAYMAN) {
2718 for (i = 0; i < 3; i++) {
2719 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2720 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2721 alu.src[0].sel = ctx->temp_reg;
2722 alu.src[0].chan = 1;
2723
2724 alu.dst.sel = ctx->temp_reg;
2725 alu.dst.chan = i;
2726 if (i == 1)
2727 alu.dst.write = 1;
2728 if (i == 2)
2729 alu.last = 1;
2730
2731 r = r600_bytecode_add_alu(ctx->bc, &alu);
2732 if (r)
2733 return r;
2734 }
2735 } else {
2736 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2737 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2738 alu.src[0].sel = ctx->temp_reg;
2739 alu.src[0].chan = 1;
2740
2741 alu.dst.sel = ctx->temp_reg;
2742 alu.dst.chan = 1;
2743 alu.dst.write = 1;
2744 alu.last = 1;
2745
2746 r = r600_bytecode_add_alu(ctx->bc, &alu);
2747 if (r)
2748 return r;
2749 }
2750
2751 if (ctx->bc->chip_class == CAYMAN) {
2752 for (i = 0; i < 3; i++) {
2753 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2754 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2755 alu.src[0].sel = ctx->temp_reg;
2756 alu.src[0].chan = 1;
2757
2758 alu.dst.sel = ctx->temp_reg;
2759 alu.dst.chan = i;
2760 if (i == 1)
2761 alu.dst.write = 1;
2762 if (i == 2)
2763 alu.last = 1;
2764
2765 r = r600_bytecode_add_alu(ctx->bc, &alu);
2766 if (r)
2767 return r;
2768 }
2769 } else {
2770 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2771 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2772 alu.src[0].sel = ctx->temp_reg;
2773 alu.src[0].chan = 1;
2774
2775 alu.dst.sel = ctx->temp_reg;
2776 alu.dst.chan = 1;
2777 alu.dst.write = 1;
2778 alu.last = 1;
2779
2780 r = r600_bytecode_add_alu(ctx->bc, &alu);
2781 if (r)
2782 return r;
2783 }
2784
2785 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2786
2787 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2788
2789 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2790 r600_bytecode_src_set_abs(&alu.src[0]);
2791
2792 alu.src[1].sel = ctx->temp_reg;
2793 alu.src[1].chan = 1;
2794
2795 alu.dst.sel = ctx->temp_reg;
2796 alu.dst.chan = 1;
2797 alu.dst.write = 1;
2798 alu.last = 1;
2799
2800 r = r600_bytecode_add_alu(ctx->bc, &alu);
2801 if (r)
2802 return r;
2803 }
2804
2805 /* result.z = log2(|src|);*/
2806 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2807 if (ctx->bc->chip_class == CAYMAN) {
2808 for (i = 0; i < 3; i++) {
2809 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2810
2811 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2812 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2813 r600_bytecode_src_set_abs(&alu.src[0]);
2814
2815 alu.dst.sel = ctx->temp_reg;
2816 if (i == 2)
2817 alu.dst.write = 1;
2818 alu.dst.chan = i;
2819 if (i == 2)
2820 alu.last = 1;
2821
2822 r = r600_bytecode_add_alu(ctx->bc, &alu);
2823 if (r)
2824 return r;
2825 }
2826 } else {
2827 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2828
2829 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2830 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2831 r600_bytecode_src_set_abs(&alu.src[0]);
2832
2833 alu.dst.sel = ctx->temp_reg;
2834 alu.dst.write = 1;
2835 alu.dst.chan = 2;
2836 alu.last = 1;
2837
2838 r = r600_bytecode_add_alu(ctx->bc, &alu);
2839 if (r)
2840 return r;
2841 }
2842 }
2843
2844 /* result.w = 1.0; */
2845 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2846 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2847
2848 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2849 alu.src[0].sel = V_SQ_ALU_SRC_1;
2850 alu.src[0].chan = 0;
2851
2852 alu.dst.sel = ctx->temp_reg;
2853 alu.dst.chan = 3;
2854 alu.dst.write = 1;
2855 alu.last = 1;
2856
2857 r = r600_bytecode_add_alu(ctx->bc, &alu);
2858 if (r)
2859 return r;
2860 }
2861
2862 return tgsi_helper_copy(ctx, inst);
2863 }
2864
2865 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2866 {
2867 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2868 struct r600_bytecode_alu alu;
2869 int r;
2870
2871 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2872
2873 switch (inst->Instruction.Opcode) {
2874 case TGSI_OPCODE_ARL:
2875 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2876 break;
2877 case TGSI_OPCODE_ARR:
2878 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2879 break;
2880 default:
2881 assert(0);
2882 return -1;
2883 }
2884
2885 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2886 alu.last = 1;
2887 alu.dst.sel = ctx->ar_reg;
2888 alu.dst.write = 1;
2889 r = r600_bytecode_add_alu(ctx->bc, &alu);
2890 if (r)
2891 return r;
2892
2893 /* TODO: Note that the MOVA can be avoided if we never use AR for
2894 * indexing non-CB registers in the current ALU clause. Similarly, we
2895 * need to load AR from ar_reg again if we started a new clause
2896 * between ARL and AR usage. The easy way to do that is to remove
2897 * the MOVA here, and load it for the first AR access after ar_reg
2898 * has been modified in each clause. */
2899 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2900 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2901 alu.src[0].sel = ctx->ar_reg;
2902 alu.src[0].chan = 0;
2903 alu.last = 1;
2904 r = r600_bytecode_add_alu(ctx->bc, &alu);
2905 if (r)
2906 return r;
2907 return 0;
2908 }
2909 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2910 {
2911 /* TODO from r600c, ar values don't persist between clauses */
2912 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2913 struct r600_bytecode_alu alu;
2914 int r;
2915
2916 switch (inst->Instruction.Opcode) {
2917 case TGSI_OPCODE_ARL:
2918 memset(&alu, 0, sizeof(alu));
2919 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2920 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2921 alu.dst.sel = ctx->ar_reg;
2922 alu.dst.write = 1;
2923 alu.last = 1;
2924
2925 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2926 return r;
2927
2928 memset(&alu, 0, sizeof(alu));
2929 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2930 alu.src[0].sel = ctx->ar_reg;
2931 alu.dst.sel = ctx->ar_reg;
2932 alu.dst.write = 1;
2933 alu.last = 1;
2934
2935 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2936 return r;
2937 break;
2938 case TGSI_OPCODE_ARR:
2939 memset(&alu, 0, sizeof(alu));
2940 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2941 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2942 alu.dst.sel = ctx->ar_reg;
2943 alu.dst.write = 1;
2944 alu.last = 1;
2945
2946 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2947 return r;
2948 break;
2949 default:
2950 assert(0);
2951 return -1;
2952 }
2953
2954 memset(&alu, 0, sizeof(alu));
2955 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2956 alu.src[0].sel = ctx->ar_reg;
2957 alu.last = 1;
2958
2959 r = r600_bytecode_add_alu(ctx->bc, &alu);
2960 if (r)
2961 return r;
2962 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2963 return 0;
2964 }
2965
2966 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2967 {
2968 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2969 struct r600_bytecode_alu alu;
2970 int i, r = 0;
2971
2972 for (i = 0; i < 4; i++) {
2973 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2974
2975 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2976 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2977
2978 if (i == 0 || i == 3) {
2979 alu.src[0].sel = V_SQ_ALU_SRC_1;
2980 } else {
2981 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2982 }
2983
2984 if (i == 0 || i == 2) {
2985 alu.src[1].sel = V_SQ_ALU_SRC_1;
2986 } else {
2987 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2988 }
2989 if (i == 3)
2990 alu.last = 1;
2991 r = r600_bytecode_add_alu(ctx->bc, &alu);
2992 if (r)
2993 return r;
2994 }
2995 return 0;
2996 }
2997
2998 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2999 {
3000 struct r600_bytecode_alu alu;
3001 int r;
3002
3003 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3004 alu.inst = opcode;
3005 alu.predicate = 1;
3006
3007 alu.dst.sel = ctx->temp_reg;
3008 alu.dst.write = 1;
3009 alu.dst.chan = 0;
3010
3011 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3012 alu.src[1].sel = V_SQ_ALU_SRC_0;
3013 alu.src[1].chan = 0;
3014
3015 alu.last = 1;
3016
3017 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3018 if (r)
3019 return r;
3020 return 0;
3021 }
3022
3023 static int pops(struct r600_shader_ctx *ctx, int pops)
3024 {
3025 unsigned force_pop = ctx->bc->force_add_cf;
3026
3027 if (!force_pop) {
3028 int alu_pop = 3;
3029 if (ctx->bc->cf_last) {
3030 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
3031 alu_pop = 0;
3032 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
3033 alu_pop = 1;
3034 }
3035 alu_pop += pops;
3036 if (alu_pop == 1) {
3037 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
3038 ctx->bc->force_add_cf = 1;
3039 } else if (alu_pop == 2) {
3040 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
3041 ctx->bc->force_add_cf = 1;
3042 } else {
3043 force_pop = 1;
3044 }
3045 }
3046
3047 if (force_pop) {
3048 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3049 ctx->bc->cf_last->pop_count = pops;
3050 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3051 }
3052
3053 return 0;
3054 }
3055
3056 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3057 {
3058 switch(reason) {
3059 case FC_PUSH_VPM:
3060 ctx->bc->callstack[ctx->bc->call_sp].current--;
3061 break;
3062 case FC_PUSH_WQM:
3063 case FC_LOOP:
3064 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3065 break;
3066 case FC_REP:
3067 /* TOODO : for 16 vp asic should -= 2; */
3068 ctx->bc->callstack[ctx->bc->call_sp].current --;
3069 break;
3070 }
3071 }
3072
3073 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3074 {
3075 if (check_max_only) {
3076 int diff;
3077 switch (reason) {
3078 case FC_PUSH_VPM:
3079 diff = 1;
3080 break;
3081 case FC_PUSH_WQM:
3082 diff = 4;
3083 break;
3084 default:
3085 assert(0);
3086 diff = 0;
3087 }
3088 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3089 ctx->bc->callstack[ctx->bc->call_sp].max) {
3090 ctx->bc->callstack[ctx->bc->call_sp].max =
3091 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3092 }
3093 return;
3094 }
3095 switch (reason) {
3096 case FC_PUSH_VPM:
3097 ctx->bc->callstack[ctx->bc->call_sp].current++;
3098 break;
3099 case FC_PUSH_WQM:
3100 case FC_LOOP:
3101 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3102 break;
3103 case FC_REP:
3104 ctx->bc->callstack[ctx->bc->call_sp].current++;
3105 break;
3106 }
3107
3108 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3109 ctx->bc->callstack[ctx->bc->call_sp].max) {
3110 ctx->bc->callstack[ctx->bc->call_sp].max =
3111 ctx->bc->callstack[ctx->bc->call_sp].current;
3112 }
3113 }
3114
3115 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3116 {
3117 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3118
3119 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3120 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3121 sp->mid[sp->num_mid] = ctx->bc->cf_last;
3122 sp->num_mid++;
3123 }
3124
3125 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3126 {
3127 ctx->bc->fc_sp++;
3128 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3129 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3130 }
3131
3132 static void fc_poplevel(struct r600_shader_ctx *ctx)
3133 {
3134 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3135 if (sp->mid) {
3136 free(sp->mid);
3137 sp->mid = NULL;
3138 }
3139 sp->num_mid = 0;
3140 sp->start = NULL;
3141 sp->type = 0;
3142 ctx->bc->fc_sp--;
3143 }
3144
3145 #if 0
3146 static int emit_return(struct r600_shader_ctx *ctx)
3147 {
3148 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3149 return 0;
3150 }
3151
3152 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3153 {
3154
3155 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3156 ctx->bc->cf_last->pop_count = pops;
3157 /* TODO work out offset */
3158 return 0;
3159 }
3160
3161 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3162 {
3163 return 0;
3164 }
3165
3166 static void emit_testflag(struct r600_shader_ctx *ctx)
3167 {
3168
3169 }
3170
3171 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3172 {
3173 emit_testflag(ctx);
3174 emit_jump_to_offset(ctx, 1, 4);
3175 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3176 pops(ctx, ifidx + 1);
3177 emit_return(ctx);
3178 }
3179
3180 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3181 {
3182 emit_testflag(ctx);
3183
3184 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3185 ctx->bc->cf_last->pop_count = 1;
3186
3187 fc_set_mid(ctx, fc_sp);
3188
3189 pops(ctx, 1);
3190 }
3191 #endif
3192
3193 static int tgsi_if(struct r600_shader_ctx *ctx)
3194 {
3195 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3196
3197 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3198
3199 fc_pushlevel(ctx, FC_IF);
3200
3201 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3202 return 0;
3203 }
3204
3205 static int tgsi_else(struct r600_shader_ctx *ctx)
3206 {
3207 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3208 ctx->bc->cf_last->pop_count = 1;
3209
3210 fc_set_mid(ctx, ctx->bc->fc_sp);
3211 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3212 return 0;
3213 }
3214
3215 static int tgsi_endif(struct r600_shader_ctx *ctx)
3216 {
3217 pops(ctx, 1);
3218 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3219 R600_ERR("if/endif unbalanced in shader\n");
3220 return -1;
3221 }
3222
3223 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3224 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3225 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3226 } else {
3227 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3228 }
3229 fc_poplevel(ctx);
3230
3231 callstack_decrease_current(ctx, FC_PUSH_VPM);
3232 return 0;
3233 }
3234
3235 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3236 {
3237 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3238
3239 fc_pushlevel(ctx, FC_LOOP);
3240
3241 /* check stack depth */
3242 callstack_check_depth(ctx, FC_LOOP, 0);
3243 return 0;
3244 }
3245
3246 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3247 {
3248 int i;
3249
3250 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3251
3252 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3253 R600_ERR("loop/endloop in shader code are not paired.\n");
3254 return -EINVAL;
3255 }
3256
3257 /* fixup loop pointers - from r600isa
3258 LOOP END points to CF after LOOP START,
3259 LOOP START point to CF after LOOP END
3260 BRK/CONT point to LOOP END CF
3261 */
3262 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3263
3264 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3265
3266 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3267 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3268 }
3269 /* TODO add LOOPRET support */
3270 fc_poplevel(ctx);
3271 callstack_decrease_current(ctx, FC_LOOP);
3272 return 0;
3273 }
3274
3275 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3276 {
3277 unsigned int fscp;
3278
3279 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3280 {
3281 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3282 break;
3283 }
3284
3285 if (fscp == 0) {
3286 R600_ERR("Break not inside loop/endloop pair\n");
3287 return -EINVAL;
3288 }
3289
3290 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3291 ctx->bc->cf_last->pop_count = 1;
3292
3293 fc_set_mid(ctx, fscp);
3294
3295 pops(ctx, 1);
3296 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3297 return 0;
3298 }
3299
3300 static int tgsi_umad(struct r600_shader_ctx *ctx)
3301 {
3302 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3303 struct r600_bytecode_alu alu;
3304 int i, j, r;
3305 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3306
3307 /* src0 * src1 */
3308 for (i = 0; i < lasti + 1; i++) {
3309 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3310 continue;
3311
3312 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3313
3314 alu.dst.chan = i;
3315 alu.dst.sel = ctx->temp_reg;
3316 alu.dst.write = 1;
3317
3318 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT;
3319 for (j = 0; j < 2; j++) {
3320 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3321 }
3322
3323 if (i == lasti) {
3324 alu.last = 1;
3325 }
3326 r = r600_bytecode_add_alu(ctx->bc, &alu);
3327 if (r)
3328 return r;
3329 }
3330
3331
3332 for (i = 0; i < lasti + 1; i++) {
3333 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3334 continue;
3335
3336 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3337 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3338
3339 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT;
3340
3341 alu.src[0].sel = ctx->temp_reg;
3342 alu.src[0].chan = i;
3343
3344 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3345 if (i == lasti) {
3346 alu.last = 1;
3347 }
3348 r = r600_bytecode_add_alu(ctx->bc, &alu);
3349 if (r)
3350 return r;
3351 }
3352 return 0;
3353 }
3354
3355 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3356 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3357 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3358 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3359
3360 /* FIXME:
3361 * For state trackers other than OpenGL, we'll want to use
3362 * _RECIP_IEEE instead.
3363 */
3364 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3365
3366 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3367 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3368 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3369 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3370 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3371 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3372 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3373 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3374 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3375 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3376 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3377 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3378 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3379 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3380 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3381 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3382 /* gap */
3383 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3384 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3385 /* gap */
3386 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3387 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3388 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3389 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3390 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3391 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3392 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3393 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3394 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3395 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3396 /* gap */
3397 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3398 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3399 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3400 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3401 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3402 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3403 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3404 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3405 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3406 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3407 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3408 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3409 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3410 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3411 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3412 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3413 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3414 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3415 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3416 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3417 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3418 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3419 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3420 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3421 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3422 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3424 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3425 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3426 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3427 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3428 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3429 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3430 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3431 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3432 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3433 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3434 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3435 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3436 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3437 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3438 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3439 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3440 /* gap */
3441 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3442 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3443 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3444 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3445 /* gap */
3446 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3447 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3448 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3449 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3450 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3451 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3452 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3453 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3454 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3455 /* gap */
3456 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3457 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3458 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3460 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3461 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3462 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3463 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3464 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3465 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3466 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3467 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3468 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3469 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3470 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471 /* gap */
3472 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3474 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3476 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3477 /* gap */
3478 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3479 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3480 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3481 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3486 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3487 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3488 /* gap */
3489 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3491 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3493 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3494 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3495 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3500 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3501 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3502 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3505 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3507 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3517 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3518 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3519 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3520 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3521 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3522 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3523 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3524 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3525 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3526 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3527 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3528 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3529 };
3530
3531 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3532 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3533 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3534 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3535 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3536 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3537 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3538 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3539 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3540 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3541 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3542 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3543 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3544 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3545 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3546 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3547 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3548 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3549 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3550 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3551 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3552 /* gap */
3553 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555 /* gap */
3556 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3557 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3559 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3560 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3561 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3562 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3563 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3564 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3565 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3566 /* gap */
3567 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3568 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3569 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3570 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3571 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3572 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3573 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3574 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3575 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3576 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3578 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3579 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3581 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3582 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3583 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3584 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3585 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3586 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3587 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3588 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3589 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3590 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3591 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3593 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3594 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3595 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3596 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3597 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3598 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3600 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3601 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3602 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3603 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3604 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3607 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3608 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3609 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3610 /* gap */
3611 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3612 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3614 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3615 /* gap */
3616 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3619 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3620 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3621 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_int_to_flt},
3622 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3623 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3624 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3625 /* gap */
3626 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3627 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3628 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3629 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3630 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3631 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3632 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3633 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3634 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3635 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3636 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3637 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3638 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3639 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3640 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3641 /* gap */
3642 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3643 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3644 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647 /* gap */
3648 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3649 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3650 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3651 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3652 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3657 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3658 /* gap */
3659 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3660 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3661 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3662 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3663 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3664 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
3665 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3666 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3668 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3670 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3671 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3673 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3674 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3675 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
3677 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3678 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3679 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3680 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3681 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
3682 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3683 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3684 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3685 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3687 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3688 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3689 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3690 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3691 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3692 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3693 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3694 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3695 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3696 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3697 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3698 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3699 };
3700
3701 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3702 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3703 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3704 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3705 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3706 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3707 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3708 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3709 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3710 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3711 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3712 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3713 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3714 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3715 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3716 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3717 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3718 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3719 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3720 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3721 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3722 /* gap */
3723 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3724 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3725 /* gap */
3726 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3727 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3728 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3729 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3730 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3731 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3732 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3733 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3734 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3735 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3736 /* gap */
3737 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3738 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3739 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3740 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3741 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3742 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3743 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3744 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3745 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3746 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3747 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3748 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3749 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3750 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3751 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3752 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3753 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3754 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3755 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3756 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3757 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3758 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3759 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3760 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3761 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3762 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3763 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3764 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3765 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3766 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3767 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3768 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3769 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3770 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3771 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3772 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3773 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3774 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3775 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3776 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3777 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3778 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3779 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3780 /* gap */
3781 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3782 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3783 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3784 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3785 /* gap */
3786 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3787 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3788 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3789 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3790 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3791 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3792 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3793 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3794 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3795 /* gap */
3796 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3797 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3798 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3799 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3800 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3801 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3802 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3803 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3804 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3805 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3806 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3807 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3808 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3809 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3810 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3811 /* gap */
3812 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3813 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3814 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3815 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3816 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3817 /* gap */
3818 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3819 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3820 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3821 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3822 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3823 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3824 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3825 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3826 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3827 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3828 /* gap */
3829 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3830 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3831 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3832 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3833 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3834 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3835 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3836 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3837 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3838 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3839 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3840 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3841 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3842 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3843 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3844 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3845 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3846 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3847 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3848 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3849 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3850 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3851 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3852 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3853 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3854 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3855 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3856 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3857 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3858 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3859 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3860 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3861 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3862 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3863 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3864 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3865 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3866 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3867 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3868 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3869 };