r600g: Get rid of leftover PB_USAGE_* flags.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60
61 int r600_find_vs_semantic_index(struct r600_shader *vs,
62 struct r600_shader *ps, int id)
63 {
64 struct r600_shader_io *input = &ps->input[id];
65
66 for (int i = 0; i < vs->noutput; i++) {
67 if (input->name == vs->output[i].name &&
68 input->sid == vs->output[i].sid) {
69 return i - 1;
70 }
71 }
72 return 0;
73 }
74
75 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76 {
77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78 struct r600_shader *rshader = &shader->shader;
79 uint32_t *ptr;
80 int i;
81
82 /* copy new shader */
83 if (shader->bo == NULL) {
84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86 if (shader->bo == NULL) {
87 return -ENOMEM;
88 }
89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
90 if (R600_BIG_ENDIAN) {
91 for (i = 0; i < rshader->bc.ndw; ++i) {
92 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93 }
94 } else {
95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96 }
97 r600_bo_unmap(rctx->radeon, shader->bo);
98 }
99 /* build state */
100 switch (rshader->processor_type) {
101 case TGSI_PROCESSOR_VERTEX:
102 if (rctx->chip_class >= EVERGREEN) {
103 evergreen_pipe_shader_vs(ctx, shader);
104 } else {
105 r600_pipe_shader_vs(ctx, shader);
106 }
107 break;
108 case TGSI_PROCESSOR_FRAGMENT:
109 if (rctx->chip_class >= EVERGREEN) {
110 evergreen_pipe_shader_ps(ctx, shader);
111 } else {
112 r600_pipe_shader_ps(ctx, shader);
113 }
114 break;
115 default:
116 return -EINVAL;
117 }
118 return 0;
119 }
120
121 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124 {
125 static int dump_shaders = -1;
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127 int r;
128
129 /* Would like some magic "get_bool_option_once" routine.
130 */
131 if (dump_shaders == -1)
132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134 if (dump_shaders) {
135 fprintf(stderr, "--------------------------------------------------------------\n");
136 tgsi_dump(shader->tokens, 0);
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bc_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bc_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
158
159 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
160 r600_bc_clear(&shader->shader.bc);
161
162 memset(&shader->shader,0,sizeof(struct r600_shader));
163 }
164
165 /*
166 * tgsi -> r600 shader
167 */
168 struct r600_shader_tgsi_instruction;
169
170 struct r600_shader_src {
171 unsigned sel;
172 unsigned swizzle[4];
173 unsigned neg;
174 unsigned abs;
175 unsigned rel;
176 uint32_t value[4];
177 };
178
179 struct r600_shader_ctx {
180 struct tgsi_shader_info info;
181 struct tgsi_parse_context parse;
182 const struct tgsi_token *tokens;
183 unsigned type;
184 unsigned file_offset[TGSI_FILE_COUNT];
185 unsigned temp_reg;
186 unsigned ar_reg;
187 struct r600_shader_tgsi_instruction *inst_info;
188 struct r600_bc *bc;
189 struct r600_shader *shader;
190 struct r600_shader_src src[4];
191 u32 *literals;
192 u32 nliterals;
193 u32 max_driver_temp_used;
194 /* needed for evergreen interpolation */
195 boolean input_centroid;
196 boolean input_linear;
197 boolean input_perspective;
198 int num_interp_gpr;
199 };
200
201 struct r600_shader_tgsi_instruction {
202 unsigned tgsi_opcode;
203 unsigned is_op3;
204 unsigned r600_opcode;
205 int (*process)(struct r600_shader_ctx *ctx);
206 };
207
208 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
209 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
210
211 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
212 {
213 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
214 int j;
215
216 if (i->Instruction.NumDstRegs > 1) {
217 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
218 return -EINVAL;
219 }
220 if (i->Instruction.Predicate) {
221 R600_ERR("predicate unsupported\n");
222 return -EINVAL;
223 }
224 #if 0
225 if (i->Instruction.Label) {
226 R600_ERR("label unsupported\n");
227 return -EINVAL;
228 }
229 #endif
230 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
231 if (i->Src[j].Register.Dimension) {
232 R600_ERR("unsupported src %d (dimension %d)\n", j,
233 i->Src[j].Register.Dimension);
234 return -EINVAL;
235 }
236 }
237 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
238 if (i->Dst[j].Register.Dimension) {
239 R600_ERR("unsupported dst (dimension)\n");
240 return -EINVAL;
241 }
242 }
243 return 0;
244 }
245
246 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
247 {
248 int i, r;
249 struct r600_bc_alu alu;
250 int gpr = 0, base_chan = 0;
251 int ij_index = 0;
252
253 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
254 ij_index = 0;
255 if (ctx->shader->input[input].centroid)
256 ij_index++;
257 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
258 ij_index = 0;
259 /* if we have perspective add one */
260 if (ctx->input_perspective) {
261 ij_index++;
262 /* if we have perspective centroid */
263 if (ctx->input_centroid)
264 ij_index++;
265 }
266 if (ctx->shader->input[input].centroid)
267 ij_index++;
268 }
269
270 /* work out gpr and base_chan from index */
271 gpr = ij_index / 2;
272 base_chan = (2 * (ij_index % 2)) + 1;
273
274 for (i = 0; i < 8; i++) {
275 memset(&alu, 0, sizeof(struct r600_bc_alu));
276
277 if (i < 4)
278 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
279 else
280 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
281
282 if ((i > 1) && (i < 6)) {
283 alu.dst.sel = ctx->shader->input[input].gpr;
284 alu.dst.write = 1;
285 }
286
287 alu.dst.chan = i % 4;
288
289 alu.src[0].sel = gpr;
290 alu.src[0].chan = (base_chan - (i % 2));
291
292 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
293
294 alu.bank_swizzle_force = SQ_ALU_VEC_210;
295 if ((i % 4) == 3)
296 alu.last = 1;
297 r = r600_bc_add_alu(ctx->bc, &alu);
298 if (r)
299 return r;
300 }
301 return 0;
302 }
303
304
305 static int tgsi_declaration(struct r600_shader_ctx *ctx)
306 {
307 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
308 unsigned i;
309 int r;
310
311 switch (d->Declaration.File) {
312 case TGSI_FILE_INPUT:
313 i = ctx->shader->ninput++;
314 ctx->shader->input[i].name = d->Semantic.Name;
315 ctx->shader->input[i].sid = d->Semantic.Index;
316 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
317 ctx->shader->input[i].centroid = d->Declaration.Centroid;
318 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
319 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
320 /* turn input into interpolate on EG */
321 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
322 if (ctx->shader->input[i].interpolate > 0) {
323 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
324 evergreen_interp_alu(ctx, i);
325 }
326 }
327 }
328 break;
329 case TGSI_FILE_OUTPUT:
330 i = ctx->shader->noutput++;
331 ctx->shader->output[i].name = d->Semantic.Name;
332 ctx->shader->output[i].sid = d->Semantic.Index;
333 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
334 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
335 break;
336 case TGSI_FILE_CONSTANT:
337 case TGSI_FILE_TEMPORARY:
338 case TGSI_FILE_SAMPLER:
339 case TGSI_FILE_ADDRESS:
340 break;
341
342 case TGSI_FILE_SYSTEM_VALUE:
343 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
344 struct r600_bc_alu alu;
345 memset(&alu, 0, sizeof(struct r600_bc_alu));
346
347 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
348 alu.src[0].sel = 0;
349 alu.src[0].chan = 3;
350
351 alu.dst.sel = 0;
352 alu.dst.chan = 3;
353 alu.dst.write = 1;
354 alu.last = 1;
355
356 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
357 return r;
358 break;
359 }
360
361 default:
362 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
363 return -EINVAL;
364 }
365 return 0;
366 }
367
368 static int r600_get_temp(struct r600_shader_ctx *ctx)
369 {
370 return ctx->temp_reg + ctx->max_driver_temp_used++;
371 }
372
373 /*
374 * for evergreen we need to scan the shader to find the number of GPRs we need to
375 * reserve for interpolation.
376 *
377 * we need to know if we are going to emit
378 * any centroid inputs
379 * if perspective and linear are required
380 */
381 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
382 {
383 int i;
384 int num_baryc;
385
386 ctx->input_linear = FALSE;
387 ctx->input_perspective = FALSE;
388 ctx->input_centroid = FALSE;
389 ctx->num_interp_gpr = 1;
390
391 /* any centroid inputs */
392 for (i = 0; i < ctx->info.num_inputs; i++) {
393 /* skip position/face */
394 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
395 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
396 continue;
397 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
398 ctx->input_linear = TRUE;
399 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
400 ctx->input_perspective = TRUE;
401 if (ctx->info.input_centroid[i])
402 ctx->input_centroid = TRUE;
403 }
404
405 num_baryc = 0;
406 /* ignoring sample for now */
407 if (ctx->input_perspective)
408 num_baryc++;
409 if (ctx->input_linear)
410 num_baryc++;
411 if (ctx->input_centroid)
412 num_baryc *= 2;
413
414 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
415
416 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
417 return ctx->num_interp_gpr;
418 }
419
420 static void tgsi_src(struct r600_shader_ctx *ctx,
421 const struct tgsi_full_src_register *tgsi_src,
422 struct r600_shader_src *r600_src)
423 {
424 memset(r600_src, 0, sizeof(*r600_src));
425 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
426 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
427 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
428 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
429 r600_src->neg = tgsi_src->Register.Negate;
430 r600_src->abs = tgsi_src->Register.Absolute;
431
432 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
433 int index;
434 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
435 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
436 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
437
438 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
439 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
440 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
441 return;
442 }
443 index = tgsi_src->Register.Index;
444 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
445 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
446 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
447 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
448 r600_src->swizzle[0] = 3;
449 r600_src->swizzle[1] = 3;
450 r600_src->swizzle[2] = 3;
451 r600_src->swizzle[3] = 3;
452 r600_src->sel = 0;
453 } else {
454 if (tgsi_src->Register.Indirect)
455 r600_src->rel = V_SQ_REL_RELATIVE;
456 r600_src->sel = tgsi_src->Register.Index;
457 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
458 }
459 }
460
461 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
462 {
463 struct r600_bc_vtx vtx;
464 unsigned int ar_reg;
465 int r;
466
467 if (offset) {
468 struct r600_bc_alu alu;
469
470 memset(&alu, 0, sizeof(alu));
471
472 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
473 alu.src[0].sel = ctx->ar_reg;
474
475 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
476 alu.src[1].value = offset;
477
478 alu.dst.sel = dst_reg;
479 alu.dst.write = 1;
480 alu.last = 1;
481
482 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
483 return r;
484
485 ar_reg = dst_reg;
486 } else {
487 ar_reg = ctx->ar_reg;
488 }
489
490 memset(&vtx, 0, sizeof(vtx));
491 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
492 vtx.src_gpr = ar_reg;
493 vtx.mega_fetch_count = 16;
494 vtx.dst_gpr = dst_reg;
495 vtx.dst_sel_x = 0; /* SEL_X */
496 vtx.dst_sel_y = 1; /* SEL_Y */
497 vtx.dst_sel_z = 2; /* SEL_Z */
498 vtx.dst_sel_w = 3; /* SEL_W */
499 vtx.data_format = FMT_32_32_32_32_FLOAT;
500 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
501 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
502 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
503 vtx.endian = r600_endian_swap(32);
504
505 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
506 return r;
507
508 return 0;
509 }
510
511 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
512 {
513 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
514 struct r600_bc_alu alu;
515 int i, j, k, nconst, r;
516
517 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
518 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
519 nconst++;
520 }
521 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
522 }
523 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
524 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
525 continue;
526 }
527
528 if (ctx->src[i].rel) {
529 int treg = r600_get_temp(ctx);
530 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
531 return r;
532
533 ctx->src[i].sel = treg;
534 ctx->src[i].rel = 0;
535 j--;
536 } else if (j > 0) {
537 int treg = r600_get_temp(ctx);
538 for (k = 0; k < 4; k++) {
539 memset(&alu, 0, sizeof(struct r600_bc_alu));
540 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
541 alu.src[0].sel = ctx->src[i].sel;
542 alu.src[0].chan = k;
543 alu.src[0].rel = ctx->src[i].rel;
544 alu.dst.sel = treg;
545 alu.dst.chan = k;
546 alu.dst.write = 1;
547 if (k == 3)
548 alu.last = 1;
549 r = r600_bc_add_alu(ctx->bc, &alu);
550 if (r)
551 return r;
552 }
553 ctx->src[i].sel = treg;
554 ctx->src[i].rel =0;
555 j--;
556 }
557 }
558 return 0;
559 }
560
561 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
562 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
563 {
564 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
565 struct r600_bc_alu alu;
566 int i, j, k, nliteral, r;
567
568 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
569 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
570 nliteral++;
571 }
572 }
573 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
574 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
575 int treg = r600_get_temp(ctx);
576 for (k = 0; k < 4; k++) {
577 memset(&alu, 0, sizeof(struct r600_bc_alu));
578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
579 alu.src[0].sel = ctx->src[i].sel;
580 alu.src[0].chan = k;
581 alu.src[0].value = ctx->src[i].value[k];
582 alu.dst.sel = treg;
583 alu.dst.chan = k;
584 alu.dst.write = 1;
585 if (k == 3)
586 alu.last = 1;
587 r = r600_bc_add_alu(ctx->bc, &alu);
588 if (r)
589 return r;
590 }
591 ctx->src[i].sel = treg;
592 j--;
593 }
594 }
595 return 0;
596 }
597
598 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
599 {
600 struct r600_shader *shader = &pipeshader->shader;
601 struct tgsi_token *tokens = pipeshader->tokens;
602 struct tgsi_full_immediate *immediate;
603 struct tgsi_full_property *property;
604 struct r600_shader_ctx ctx;
605 struct r600_bc_output output[32];
606 unsigned output_done, noutput;
607 unsigned opcode;
608 int i, j, r = 0, pos0;
609
610 ctx.bc = &shader->bc;
611 ctx.shader = shader;
612 r600_bc_init(ctx.bc, rctx->chip_class);
613 ctx.tokens = tokens;
614 tgsi_scan_shader(tokens, &ctx.info);
615 tgsi_parse_init(&ctx.parse, tokens);
616 ctx.type = ctx.parse.FullHeader.Processor.Processor;
617 shader->processor_type = ctx.type;
618 ctx.bc->type = shader->processor_type;
619
620 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
621 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
622
623 shader->nr_cbufs = rctx->nr_cbufs;
624
625 /* register allocations */
626 /* Values [0,127] correspond to GPR[0..127].
627 * Values [128,159] correspond to constant buffer bank 0
628 * Values [160,191] correspond to constant buffer bank 1
629 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
630 * Values [256,287] correspond to constant buffer bank 2 (EG)
631 * Values [288,319] correspond to constant buffer bank 3 (EG)
632 * Other special values are shown in the list below.
633 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
634 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
635 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
636 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
637 * 248 SQ_ALU_SRC_0: special constant 0.0.
638 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
639 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
640 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
641 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
642 * 253 SQ_ALU_SRC_LITERAL: literal constant.
643 * 254 SQ_ALU_SRC_PV: previous vector result.
644 * 255 SQ_ALU_SRC_PS: previous scalar result.
645 */
646 for (i = 0; i < TGSI_FILE_COUNT; i++) {
647 ctx.file_offset[i] = 0;
648 }
649 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
650 ctx.file_offset[TGSI_FILE_INPUT] = 1;
651 if (ctx.bc->chip_class >= EVERGREEN) {
652 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
653 } else {
654 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
655 }
656 }
657 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
658 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
659 }
660 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
661 ctx.info.file_count[TGSI_FILE_INPUT];
662 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
663 ctx.info.file_count[TGSI_FILE_OUTPUT];
664
665 /* Outside the GPR range. This will be translated to one of the
666 * kcache banks later. */
667 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
668
669 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
670 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
671 ctx.info.file_count[TGSI_FILE_TEMPORARY];
672 ctx.temp_reg = ctx.ar_reg + 1;
673
674 ctx.nliterals = 0;
675 ctx.literals = NULL;
676 shader->fs_write_all = FALSE;
677 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
678 tgsi_parse_token(&ctx.parse);
679 switch (ctx.parse.FullToken.Token.Type) {
680 case TGSI_TOKEN_TYPE_IMMEDIATE:
681 immediate = &ctx.parse.FullToken.FullImmediate;
682 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
683 if(ctx.literals == NULL) {
684 r = -ENOMEM;
685 goto out_err;
686 }
687 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
688 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
689 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
690 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
691 ctx.nliterals++;
692 break;
693 case TGSI_TOKEN_TYPE_DECLARATION:
694 r = tgsi_declaration(&ctx);
695 if (r)
696 goto out_err;
697 break;
698 case TGSI_TOKEN_TYPE_INSTRUCTION:
699 r = tgsi_is_supported(&ctx);
700 if (r)
701 goto out_err;
702 ctx.max_driver_temp_used = 0;
703 /* reserve first tmp for everyone */
704 r600_get_temp(&ctx);
705
706 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
707 if ((r = tgsi_split_constant(&ctx)))
708 goto out_err;
709 if ((r = tgsi_split_literal_constant(&ctx)))
710 goto out_err;
711 if (ctx.bc->chip_class == CAYMAN)
712 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
713 else if (ctx.bc->chip_class >= EVERGREEN)
714 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
715 else
716 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
717 r = ctx.inst_info->process(&ctx);
718 if (r)
719 goto out_err;
720 break;
721 case TGSI_TOKEN_TYPE_PROPERTY:
722 property = &ctx.parse.FullToken.FullProperty;
723 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
724 if (property->u[0].Data == 1)
725 shader->fs_write_all = TRUE;
726 }
727 break;
728 default:
729 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
730 r = -EINVAL;
731 goto out_err;
732 }
733 }
734
735 noutput = shader->noutput;
736
737 /* clamp color outputs */
738 if (shader->clamp_color) {
739 for (i = 0; i < noutput; i++) {
740 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
741 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
742
743 int j;
744 for (j = 0; j < 4; j++) {
745 struct r600_bc_alu alu;
746 memset(&alu, 0, sizeof(struct r600_bc_alu));
747
748 /* MOV_SAT R, R */
749 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
750 alu.dst.sel = shader->output[i].gpr;
751 alu.dst.chan = j;
752 alu.dst.write = 1;
753 alu.dst.clamp = 1;
754 alu.src[0].sel = alu.dst.sel;
755 alu.src[0].chan = j;
756
757 if (j == 3) {
758 alu.last = 1;
759 }
760 r = r600_bc_add_alu(ctx.bc, &alu);
761 if (r)
762 return r;
763 }
764 }
765 }
766 }
767
768 /* export output */
769 j = 0;
770 for (i = 0, pos0 = 0; i < noutput; i++) {
771 memset(&output[i], 0, sizeof(struct r600_bc_output));
772 output[i + j].gpr = shader->output[i].gpr;
773 output[i + j].elem_size = 3;
774 output[i + j].swizzle_x = 0;
775 output[i + j].swizzle_y = 1;
776 output[i + j].swizzle_z = 2;
777 output[i + j].swizzle_w = 3;
778 output[i + j].burst_count = 1;
779 output[i + j].barrier = 1;
780 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
781 output[i + j].array_base = i - pos0;
782 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
783 switch (ctx.type) {
784 case TGSI_PROCESSOR_VERTEX:
785 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
786 output[i + j].array_base = 60;
787 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
788 /* position doesn't count in array_base */
789 pos0++;
790 }
791 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
792 output[i + j].array_base = 61;
793 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
794 /* position doesn't count in array_base */
795 pos0++;
796 }
797 break;
798 case TGSI_PROCESSOR_FRAGMENT:
799 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
800 output[i + j].array_base = shader->output[i].sid;
801 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
802 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
803 for (j = 1; j < shader->nr_cbufs; j++) {
804 memset(&output[i + j], 0, sizeof(struct r600_bc_output));
805 output[i + j].gpr = shader->output[i].gpr;
806 output[i + j].elem_size = 3;
807 output[i + j].swizzle_x = 0;
808 output[i + j].swizzle_y = 1;
809 output[i + j].swizzle_z = 2;
810 output[i + j].swizzle_w = 3;
811 output[i + j].burst_count = 1;
812 output[i + j].barrier = 1;
813 output[i + j].array_base = shader->output[i].sid + j;
814 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
815 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
816 }
817 j--;
818 }
819 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
820 output[i + j].array_base = 61;
821 output[i + j].swizzle_x = 2;
822 output[i + j].swizzle_y = 7;
823 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
824 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
825 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
826 output[i + j].array_base = 61;
827 output[i + j].swizzle_x = 7;
828 output[i + j].swizzle_y = 1;
829 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
830 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
831 } else {
832 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
833 r = -EINVAL;
834 goto out_err;
835 }
836 break;
837 default:
838 R600_ERR("unsupported processor type %d\n", ctx.type);
839 r = -EINVAL;
840 goto out_err;
841 }
842 }
843 noutput += j;
844 /* add fake param output for vertex shader if no param is exported */
845 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
846 for (i = 0, pos0 = 0; i < noutput; i++) {
847 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
848 pos0 = 1;
849 break;
850 }
851 }
852 if (!pos0) {
853 memset(&output[i], 0, sizeof(struct r600_bc_output));
854 output[i].gpr = 0;
855 output[i].elem_size = 3;
856 output[i].swizzle_x = 0;
857 output[i].swizzle_y = 1;
858 output[i].swizzle_z = 2;
859 output[i].swizzle_w = 3;
860 output[i].burst_count = 1;
861 output[i].barrier = 1;
862 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
863 output[i].array_base = 0;
864 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
865 noutput++;
866 }
867 }
868 /* add fake pixel export */
869 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
870 memset(&output[0], 0, sizeof(struct r600_bc_output));
871 output[0].gpr = 0;
872 output[0].elem_size = 3;
873 output[0].swizzle_x = 7;
874 output[0].swizzle_y = 7;
875 output[0].swizzle_z = 7;
876 output[0].swizzle_w = 7;
877 output[0].burst_count = 1;
878 output[0].barrier = 1;
879 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
880 output[0].array_base = 0;
881 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
882 noutput++;
883 }
884 /* set export done on last export of each type */
885 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
886 if (ctx.bc->chip_class < CAYMAN) {
887 if (i == (noutput - 1)) {
888 output[i].end_of_program = 1;
889 }
890 }
891 if (!(output_done & (1 << output[i].type))) {
892 output_done |= (1 << output[i].type);
893 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
894 }
895 }
896 /* add output to bytecode */
897 for (i = 0; i < noutput; i++) {
898 r = r600_bc_add_output(ctx.bc, &output[i]);
899 if (r)
900 goto out_err;
901 }
902 /* add program end */
903 if (ctx.bc->chip_class == CAYMAN)
904 cm_bc_add_cf_end(ctx.bc);
905
906 free(ctx.literals);
907 tgsi_parse_free(&ctx.parse);
908 return 0;
909 out_err:
910 free(ctx.literals);
911 tgsi_parse_free(&ctx.parse);
912 return r;
913 }
914
915 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
916 {
917 R600_ERR("%s tgsi opcode unsupported\n",
918 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
919 return -EINVAL;
920 }
921
922 static int tgsi_end(struct r600_shader_ctx *ctx)
923 {
924 return 0;
925 }
926
927 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
928 const struct r600_shader_src *shader_src,
929 unsigned chan)
930 {
931 bc_src->sel = shader_src->sel;
932 bc_src->chan = shader_src->swizzle[chan];
933 bc_src->neg = shader_src->neg;
934 bc_src->abs = shader_src->abs;
935 bc_src->rel = shader_src->rel;
936 bc_src->value = shader_src->value[bc_src->chan];
937 }
938
939 static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src)
940 {
941 bc_src->abs = 1;
942 bc_src->neg = 0;
943 }
944
945 static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
946 {
947 bc_src->neg = !bc_src->neg;
948 }
949
950 static void tgsi_dst(struct r600_shader_ctx *ctx,
951 const struct tgsi_full_dst_register *tgsi_dst,
952 unsigned swizzle,
953 struct r600_bc_alu_dst *r600_dst)
954 {
955 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
956
957 r600_dst->sel = tgsi_dst->Register.Index;
958 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
959 r600_dst->chan = swizzle;
960 r600_dst->write = 1;
961 if (tgsi_dst->Register.Indirect)
962 r600_dst->rel = V_SQ_REL_RELATIVE;
963 if (inst->Instruction.Saturate) {
964 r600_dst->clamp = 1;
965 }
966 }
967
968 static int tgsi_last_instruction(unsigned writemask)
969 {
970 int i, lasti = 0;
971
972 for (i = 0; i < 4; i++) {
973 if (writemask & (1 << i)) {
974 lasti = i;
975 }
976 }
977 return lasti;
978 }
979
980 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
981 {
982 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
983 struct r600_bc_alu alu;
984 int i, j, r;
985 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
986
987 for (i = 0; i < lasti + 1; i++) {
988 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
989 continue;
990
991 memset(&alu, 0, sizeof(struct r600_bc_alu));
992 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
993
994 alu.inst = ctx->inst_info->r600_opcode;
995 if (!swap) {
996 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
997 r600_bc_src(&alu.src[j], &ctx->src[j], i);
998 }
999 } else {
1000 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1001 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1002 }
1003 /* handle some special cases */
1004 switch (ctx->inst_info->tgsi_opcode) {
1005 case TGSI_OPCODE_SUB:
1006 r600_bc_src_toggle_neg(&alu.src[1]);
1007 break;
1008 case TGSI_OPCODE_ABS:
1009 r600_bc_src_set_abs(&alu.src[0]);
1010 break;
1011 default:
1012 break;
1013 }
1014 if (i == lasti) {
1015 alu.last = 1;
1016 }
1017 r = r600_bc_add_alu(ctx->bc, &alu);
1018 if (r)
1019 return r;
1020 }
1021 return 0;
1022 }
1023
1024 static int tgsi_op2(struct r600_shader_ctx *ctx)
1025 {
1026 return tgsi_op2_s(ctx, 0);
1027 }
1028
1029 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1030 {
1031 return tgsi_op2_s(ctx, 1);
1032 }
1033
1034 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1035 {
1036 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1037 int i, j, r;
1038 struct r600_bc_alu alu;
1039 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1040
1041 for (i = 0 ; i < last_slot; i++) {
1042 memset(&alu, 0, sizeof(struct r600_bc_alu));
1043 alu.inst = ctx->inst_info->r600_opcode;
1044 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1045 r600_bc_src(&alu.src[j], &ctx->src[j], 0);
1046 }
1047 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1048 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1049
1050 if (i == last_slot - 1)
1051 alu.last = 1;
1052 r = r600_bc_add_alu(ctx->bc, &alu);
1053 if (r)
1054 return r;
1055 }
1056 return 0;
1057 }
1058
1059 /*
1060 * r600 - trunc to -PI..PI range
1061 * r700 - normalize by dividing by 2PI
1062 * see fdo bug 27901
1063 */
1064 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1065 {
1066 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1067 static float double_pi = 3.1415926535 * 2;
1068 static float neg_pi = -3.1415926535;
1069
1070 int r;
1071 struct r600_bc_alu alu;
1072
1073 memset(&alu, 0, sizeof(struct r600_bc_alu));
1074 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1075 alu.is_op3 = 1;
1076
1077 alu.dst.chan = 0;
1078 alu.dst.sel = ctx->temp_reg;
1079 alu.dst.write = 1;
1080
1081 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1082
1083 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1084 alu.src[1].chan = 0;
1085 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1086 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1087 alu.src[2].chan = 0;
1088 alu.last = 1;
1089 r = r600_bc_add_alu(ctx->bc, &alu);
1090 if (r)
1091 return r;
1092
1093 memset(&alu, 0, sizeof(struct r600_bc_alu));
1094 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1095
1096 alu.dst.chan = 0;
1097 alu.dst.sel = ctx->temp_reg;
1098 alu.dst.write = 1;
1099
1100 alu.src[0].sel = ctx->temp_reg;
1101 alu.src[0].chan = 0;
1102 alu.last = 1;
1103 r = r600_bc_add_alu(ctx->bc, &alu);
1104 if (r)
1105 return r;
1106
1107 memset(&alu, 0, sizeof(struct r600_bc_alu));
1108 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1109 alu.is_op3 = 1;
1110
1111 alu.dst.chan = 0;
1112 alu.dst.sel = ctx->temp_reg;
1113 alu.dst.write = 1;
1114
1115 alu.src[0].sel = ctx->temp_reg;
1116 alu.src[0].chan = 0;
1117
1118 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1119 alu.src[1].chan = 0;
1120 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1121 alu.src[2].chan = 0;
1122
1123 if (ctx->bc->chip_class == R600) {
1124 alu.src[1].value = *(uint32_t *)&double_pi;
1125 alu.src[2].value = *(uint32_t *)&neg_pi;
1126 } else {
1127 alu.src[1].sel = V_SQ_ALU_SRC_1;
1128 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1129 alu.src[2].neg = 1;
1130 }
1131
1132 alu.last = 1;
1133 r = r600_bc_add_alu(ctx->bc, &alu);
1134 if (r)
1135 return r;
1136 return 0;
1137 }
1138
1139 static int cayman_trig(struct r600_shader_ctx *ctx)
1140 {
1141 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1142 struct r600_bc_alu alu;
1143 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1144 int i, r;
1145
1146 r = tgsi_setup_trig(ctx);
1147 if (r)
1148 return r;
1149
1150
1151 for (i = 0; i < last_slot; i++) {
1152 memset(&alu, 0, sizeof(struct r600_bc_alu));
1153 alu.inst = ctx->inst_info->r600_opcode;
1154 alu.dst.chan = i;
1155
1156 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1157 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1158
1159 alu.src[0].sel = ctx->temp_reg;
1160 alu.src[0].chan = 0;
1161 if (i == last_slot - 1)
1162 alu.last = 1;
1163 r = r600_bc_add_alu(ctx->bc, &alu);
1164 if (r)
1165 return r;
1166 }
1167 return 0;
1168 }
1169
1170 static int tgsi_trig(struct r600_shader_ctx *ctx)
1171 {
1172 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1173 struct r600_bc_alu alu;
1174 int i, r;
1175 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1176
1177 r = tgsi_setup_trig(ctx);
1178 if (r)
1179 return r;
1180
1181 memset(&alu, 0, sizeof(struct r600_bc_alu));
1182 alu.inst = ctx->inst_info->r600_opcode;
1183 alu.dst.chan = 0;
1184 alu.dst.sel = ctx->temp_reg;
1185 alu.dst.write = 1;
1186
1187 alu.src[0].sel = ctx->temp_reg;
1188 alu.src[0].chan = 0;
1189 alu.last = 1;
1190 r = r600_bc_add_alu(ctx->bc, &alu);
1191 if (r)
1192 return r;
1193
1194 /* replicate result */
1195 for (i = 0; i < lasti + 1; i++) {
1196 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1197 continue;
1198
1199 memset(&alu, 0, sizeof(struct r600_bc_alu));
1200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1201
1202 alu.src[0].sel = ctx->temp_reg;
1203 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1204 if (i == lasti)
1205 alu.last = 1;
1206 r = r600_bc_add_alu(ctx->bc, &alu);
1207 if (r)
1208 return r;
1209 }
1210 return 0;
1211 }
1212
1213 static int tgsi_scs(struct r600_shader_ctx *ctx)
1214 {
1215 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1216 struct r600_bc_alu alu;
1217 int i, r;
1218
1219 /* We'll only need the trig stuff if we are going to write to the
1220 * X or Y components of the destination vector.
1221 */
1222 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1223 r = tgsi_setup_trig(ctx);
1224 if (r)
1225 return r;
1226 }
1227
1228 /* dst.x = COS */
1229 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1230 if (ctx->bc->chip_class == CAYMAN) {
1231 for (i = 0 ; i < 3; i++) {
1232 memset(&alu, 0, sizeof(struct r600_bc_alu));
1233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1234 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1235
1236 if (i == 0)
1237 alu.dst.write = 1;
1238 else
1239 alu.dst.write = 0;
1240 alu.src[0].sel = ctx->temp_reg;
1241 alu.src[0].chan = 0;
1242 if (i == 2)
1243 alu.last = 1;
1244 r = r600_bc_add_alu(ctx->bc, &alu);
1245 if (r)
1246 return r;
1247 }
1248 } else {
1249 memset(&alu, 0, sizeof(struct r600_bc_alu));
1250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1251 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1252
1253 alu.src[0].sel = ctx->temp_reg;
1254 alu.src[0].chan = 0;
1255 alu.last = 1;
1256 r = r600_bc_add_alu(ctx->bc, &alu);
1257 if (r)
1258 return r;
1259 }
1260 }
1261
1262 /* dst.y = SIN */
1263 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1264 if (ctx->bc->chip_class == CAYMAN) {
1265 for (i = 0 ; i < 3; i++) {
1266 memset(&alu, 0, sizeof(struct r600_bc_alu));
1267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1268 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1269 if (i == 1)
1270 alu.dst.write = 1;
1271 else
1272 alu.dst.write = 0;
1273 alu.src[0].sel = ctx->temp_reg;
1274 alu.src[0].chan = 0;
1275 if (i == 2)
1276 alu.last = 1;
1277 r = r600_bc_add_alu(ctx->bc, &alu);
1278 if (r)
1279 return r;
1280 }
1281 } else {
1282 memset(&alu, 0, sizeof(struct r600_bc_alu));
1283 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1284 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1285
1286 alu.src[0].sel = ctx->temp_reg;
1287 alu.src[0].chan = 0;
1288 alu.last = 1;
1289 r = r600_bc_add_alu(ctx->bc, &alu);
1290 if (r)
1291 return r;
1292 }
1293 }
1294
1295 /* dst.z = 0.0; */
1296 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1297 memset(&alu, 0, sizeof(struct r600_bc_alu));
1298
1299 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1300
1301 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1302
1303 alu.src[0].sel = V_SQ_ALU_SRC_0;
1304 alu.src[0].chan = 0;
1305
1306 alu.last = 1;
1307
1308 r = r600_bc_add_alu(ctx->bc, &alu);
1309 if (r)
1310 return r;
1311 }
1312
1313 /* dst.w = 1.0; */
1314 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1315 memset(&alu, 0, sizeof(struct r600_bc_alu));
1316
1317 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1318
1319 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1320
1321 alu.src[0].sel = V_SQ_ALU_SRC_1;
1322 alu.src[0].chan = 0;
1323
1324 alu.last = 1;
1325
1326 r = r600_bc_add_alu(ctx->bc, &alu);
1327 if (r)
1328 return r;
1329 }
1330
1331 return 0;
1332 }
1333
1334 static int tgsi_kill(struct r600_shader_ctx *ctx)
1335 {
1336 struct r600_bc_alu alu;
1337 int i, r;
1338
1339 for (i = 0; i < 4; i++) {
1340 memset(&alu, 0, sizeof(struct r600_bc_alu));
1341 alu.inst = ctx->inst_info->r600_opcode;
1342
1343 alu.dst.chan = i;
1344
1345 alu.src[0].sel = V_SQ_ALU_SRC_0;
1346
1347 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1348 alu.src[1].sel = V_SQ_ALU_SRC_1;
1349 alu.src[1].neg = 1;
1350 } else {
1351 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1352 }
1353 if (i == 3) {
1354 alu.last = 1;
1355 }
1356 r = r600_bc_add_alu(ctx->bc, &alu);
1357 if (r)
1358 return r;
1359 }
1360
1361 /* kill must be last in ALU */
1362 ctx->bc->force_add_cf = 1;
1363 ctx->shader->uses_kill = TRUE;
1364 return 0;
1365 }
1366
1367 static int tgsi_lit(struct r600_shader_ctx *ctx)
1368 {
1369 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1370 struct r600_bc_alu alu;
1371 int r;
1372
1373 /* tmp.x = max(src.y, 0.0) */
1374 memset(&alu, 0, sizeof(struct r600_bc_alu));
1375 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1376 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1377 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1378 alu.src[1].chan = 1;
1379
1380 alu.dst.sel = ctx->temp_reg;
1381 alu.dst.chan = 0;
1382 alu.dst.write = 1;
1383
1384 alu.last = 1;
1385 r = r600_bc_add_alu(ctx->bc, &alu);
1386 if (r)
1387 return r;
1388
1389 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1390 {
1391 int chan;
1392 int sel;
1393 int i;
1394
1395 if (ctx->bc->chip_class == CAYMAN) {
1396 for (i = 0; i < 3; i++) {
1397 /* tmp.z = log(tmp.x) */
1398 memset(&alu, 0, sizeof(struct r600_bc_alu));
1399 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1400 alu.src[0].sel = ctx->temp_reg;
1401 alu.src[0].chan = 0;
1402 alu.dst.sel = ctx->temp_reg;
1403 alu.dst.chan = i;
1404 if (i == 2) {
1405 alu.dst.write = 1;
1406 alu.last = 1;
1407 } else
1408 alu.dst.write = 0;
1409
1410 r = r600_bc_add_alu(ctx->bc, &alu);
1411 if (r)
1412 return r;
1413 }
1414 } else {
1415 /* tmp.z = log(tmp.x) */
1416 memset(&alu, 0, sizeof(struct r600_bc_alu));
1417 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1418 alu.src[0].sel = ctx->temp_reg;
1419 alu.src[0].chan = 0;
1420 alu.dst.sel = ctx->temp_reg;
1421 alu.dst.chan = 2;
1422 alu.dst.write = 1;
1423 alu.last = 1;
1424 r = r600_bc_add_alu(ctx->bc, &alu);
1425 if (r)
1426 return r;
1427 }
1428
1429 chan = alu.dst.chan;
1430 sel = alu.dst.sel;
1431
1432 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1433 memset(&alu, 0, sizeof(struct r600_bc_alu));
1434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1435 alu.src[0].sel = sel;
1436 alu.src[0].chan = chan;
1437 r600_bc_src(&alu.src[1], &ctx->src[0], 3);
1438 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1439 alu.dst.sel = ctx->temp_reg;
1440 alu.dst.chan = 0;
1441 alu.dst.write = 1;
1442 alu.is_op3 = 1;
1443 alu.last = 1;
1444 r = r600_bc_add_alu(ctx->bc, &alu);
1445 if (r)
1446 return r;
1447
1448 if (ctx->bc->chip_class == CAYMAN) {
1449 for (i = 0; i < 3; i++) {
1450 /* dst.z = exp(tmp.x) */
1451 memset(&alu, 0, sizeof(struct r600_bc_alu));
1452 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1453 alu.src[0].sel = ctx->temp_reg;
1454 alu.src[0].chan = 0;
1455 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1456 if (i == 2) {
1457 alu.dst.write = 1;
1458 alu.last = 1;
1459 } else
1460 alu.dst.write = 0;
1461 r = r600_bc_add_alu(ctx->bc, &alu);
1462 if (r)
1463 return r;
1464 }
1465 } else {
1466 /* dst.z = exp(tmp.x) */
1467 memset(&alu, 0, sizeof(struct r600_bc_alu));
1468 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1469 alu.src[0].sel = ctx->temp_reg;
1470 alu.src[0].chan = 0;
1471 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1472 alu.last = 1;
1473 r = r600_bc_add_alu(ctx->bc, &alu);
1474 if (r)
1475 return r;
1476 }
1477 }
1478
1479 /* dst.x, <- 1.0 */
1480 memset(&alu, 0, sizeof(struct r600_bc_alu));
1481 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1482 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1483 alu.src[0].chan = 0;
1484 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1485 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1486 r = r600_bc_add_alu(ctx->bc, &alu);
1487 if (r)
1488 return r;
1489
1490 /* dst.y = max(src.x, 0.0) */
1491 memset(&alu, 0, sizeof(struct r600_bc_alu));
1492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1493 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1494 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1495 alu.src[1].chan = 0;
1496 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1497 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1498 r = r600_bc_add_alu(ctx->bc, &alu);
1499 if (r)
1500 return r;
1501
1502 /* dst.w, <- 1.0 */
1503 memset(&alu, 0, sizeof(struct r600_bc_alu));
1504 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1505 alu.src[0].sel = V_SQ_ALU_SRC_1;
1506 alu.src[0].chan = 0;
1507 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1508 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1509 alu.last = 1;
1510 r = r600_bc_add_alu(ctx->bc, &alu);
1511 if (r)
1512 return r;
1513
1514 return 0;
1515 }
1516
1517 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1518 {
1519 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1520 struct r600_bc_alu alu;
1521 int i, r;
1522
1523 memset(&alu, 0, sizeof(struct r600_bc_alu));
1524
1525 /* FIXME:
1526 * For state trackers other than OpenGL, we'll want to use
1527 * _RECIPSQRT_IEEE instead.
1528 */
1529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1530
1531 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1532 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1533 r600_bc_src_set_abs(&alu.src[i]);
1534 }
1535 alu.dst.sel = ctx->temp_reg;
1536 alu.dst.write = 1;
1537 alu.last = 1;
1538 r = r600_bc_add_alu(ctx->bc, &alu);
1539 if (r)
1540 return r;
1541 /* replicate result */
1542 return tgsi_helper_tempx_replicate(ctx);
1543 }
1544
1545 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1546 {
1547 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1548 struct r600_bc_alu alu;
1549 int i, r;
1550
1551 for (i = 0; i < 4; i++) {
1552 memset(&alu, 0, sizeof(struct r600_bc_alu));
1553 alu.src[0].sel = ctx->temp_reg;
1554 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1555 alu.dst.chan = i;
1556 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1557 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1558 if (i == 3)
1559 alu.last = 1;
1560 r = r600_bc_add_alu(ctx->bc, &alu);
1561 if (r)
1562 return r;
1563 }
1564 return 0;
1565 }
1566
1567 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1568 {
1569 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1570 struct r600_bc_alu alu;
1571 int i, r;
1572
1573 memset(&alu, 0, sizeof(struct r600_bc_alu));
1574 alu.inst = ctx->inst_info->r600_opcode;
1575 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1576 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1577 }
1578 alu.dst.sel = ctx->temp_reg;
1579 alu.dst.write = 1;
1580 alu.last = 1;
1581 r = r600_bc_add_alu(ctx->bc, &alu);
1582 if (r)
1583 return r;
1584 /* replicate result */
1585 return tgsi_helper_tempx_replicate(ctx);
1586 }
1587
1588 static int cayman_pow(struct r600_shader_ctx *ctx)
1589 {
1590 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1591 int i, r;
1592 struct r600_bc_alu alu;
1593 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1594
1595 for (i = 0; i < 3; i++) {
1596 memset(&alu, 0, sizeof(struct r600_bc_alu));
1597 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1598 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1599 alu.dst.sel = ctx->temp_reg;
1600 alu.dst.chan = i;
1601 alu.dst.write = 1;
1602 if (i == 2)
1603 alu.last = 1;
1604 r = r600_bc_add_alu(ctx->bc, &alu);
1605 if (r)
1606 return r;
1607 }
1608
1609 /* b * LOG2(a) */
1610 memset(&alu, 0, sizeof(struct r600_bc_alu));
1611 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1612 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1613 alu.src[1].sel = ctx->temp_reg;
1614 alu.dst.sel = ctx->temp_reg;
1615 alu.dst.write = 1;
1616 alu.last = 1;
1617 r = r600_bc_add_alu(ctx->bc, &alu);
1618 if (r)
1619 return r;
1620
1621 for (i = 0; i < last_slot; i++) {
1622 /* POW(a,b) = EXP2(b * LOG2(a))*/
1623 memset(&alu, 0, sizeof(struct r600_bc_alu));
1624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1625 alu.src[0].sel = ctx->temp_reg;
1626
1627 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1628 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1629 if (i == last_slot - 1)
1630 alu.last = 1;
1631 r = r600_bc_add_alu(ctx->bc, &alu);
1632 if (r)
1633 return r;
1634 }
1635 return 0;
1636 }
1637
1638 static int tgsi_pow(struct r600_shader_ctx *ctx)
1639 {
1640 struct r600_bc_alu alu;
1641 int r;
1642
1643 /* LOG2(a) */
1644 memset(&alu, 0, sizeof(struct r600_bc_alu));
1645 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1646 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1647 alu.dst.sel = ctx->temp_reg;
1648 alu.dst.write = 1;
1649 alu.last = 1;
1650 r = r600_bc_add_alu(ctx->bc, &alu);
1651 if (r)
1652 return r;
1653 /* b * LOG2(a) */
1654 memset(&alu, 0, sizeof(struct r600_bc_alu));
1655 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1656 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1657 alu.src[1].sel = ctx->temp_reg;
1658 alu.dst.sel = ctx->temp_reg;
1659 alu.dst.write = 1;
1660 alu.last = 1;
1661 r = r600_bc_add_alu(ctx->bc, &alu);
1662 if (r)
1663 return r;
1664 /* POW(a,b) = EXP2(b * LOG2(a))*/
1665 memset(&alu, 0, sizeof(struct r600_bc_alu));
1666 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1667 alu.src[0].sel = ctx->temp_reg;
1668 alu.dst.sel = ctx->temp_reg;
1669 alu.dst.write = 1;
1670 alu.last = 1;
1671 r = r600_bc_add_alu(ctx->bc, &alu);
1672 if (r)
1673 return r;
1674 return tgsi_helper_tempx_replicate(ctx);
1675 }
1676
1677 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1678 {
1679 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1680 struct r600_bc_alu alu;
1681 int i, r;
1682
1683 /* tmp = (src > 0 ? 1 : src) */
1684 for (i = 0; i < 4; i++) {
1685 memset(&alu, 0, sizeof(struct r600_bc_alu));
1686 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1687 alu.is_op3 = 1;
1688
1689 alu.dst.sel = ctx->temp_reg;
1690 alu.dst.chan = i;
1691
1692 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1693 alu.src[1].sel = V_SQ_ALU_SRC_1;
1694 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1695
1696 if (i == 3)
1697 alu.last = 1;
1698 r = r600_bc_add_alu(ctx->bc, &alu);
1699 if (r)
1700 return r;
1701 }
1702
1703 /* dst = (-tmp > 0 ? -1 : tmp) */
1704 for (i = 0; i < 4; i++) {
1705 memset(&alu, 0, sizeof(struct r600_bc_alu));
1706 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1707 alu.is_op3 = 1;
1708 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1709
1710 alu.src[0].sel = ctx->temp_reg;
1711 alu.src[0].chan = i;
1712 alu.src[0].neg = 1;
1713
1714 alu.src[1].sel = V_SQ_ALU_SRC_1;
1715 alu.src[1].neg = 1;
1716
1717 alu.src[2].sel = ctx->temp_reg;
1718 alu.src[2].chan = i;
1719
1720 if (i == 3)
1721 alu.last = 1;
1722 r = r600_bc_add_alu(ctx->bc, &alu);
1723 if (r)
1724 return r;
1725 }
1726 return 0;
1727 }
1728
1729 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1730 {
1731 struct r600_bc_alu alu;
1732 int i, r;
1733
1734 for (i = 0; i < 4; i++) {
1735 memset(&alu, 0, sizeof(struct r600_bc_alu));
1736 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1737 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1738 alu.dst.chan = i;
1739 } else {
1740 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1741 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1742 alu.src[0].sel = ctx->temp_reg;
1743 alu.src[0].chan = i;
1744 }
1745 if (i == 3) {
1746 alu.last = 1;
1747 }
1748 r = r600_bc_add_alu(ctx->bc, &alu);
1749 if (r)
1750 return r;
1751 }
1752 return 0;
1753 }
1754
1755 static int tgsi_op3(struct r600_shader_ctx *ctx)
1756 {
1757 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1758 struct r600_bc_alu alu;
1759 int i, j, r;
1760 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1761
1762 for (i = 0; i < lasti + 1; i++) {
1763 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1764 continue;
1765
1766 memset(&alu, 0, sizeof(struct r600_bc_alu));
1767 alu.inst = ctx->inst_info->r600_opcode;
1768 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1769 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1770 }
1771
1772 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1773 alu.dst.chan = i;
1774 alu.dst.write = 1;
1775 alu.is_op3 = 1;
1776 if (i == lasti) {
1777 alu.last = 1;
1778 }
1779 r = r600_bc_add_alu(ctx->bc, &alu);
1780 if (r)
1781 return r;
1782 }
1783 return 0;
1784 }
1785
1786 static int tgsi_dp(struct r600_shader_ctx *ctx)
1787 {
1788 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1789 struct r600_bc_alu alu;
1790 int i, j, r;
1791
1792 for (i = 0; i < 4; i++) {
1793 memset(&alu, 0, sizeof(struct r600_bc_alu));
1794 alu.inst = ctx->inst_info->r600_opcode;
1795 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1796 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1797 }
1798
1799 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1800 alu.dst.chan = i;
1801 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1802 /* handle some special cases */
1803 switch (ctx->inst_info->tgsi_opcode) {
1804 case TGSI_OPCODE_DP2:
1805 if (i > 1) {
1806 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1807 alu.src[0].chan = alu.src[1].chan = 0;
1808 }
1809 break;
1810 case TGSI_OPCODE_DP3:
1811 if (i > 2) {
1812 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1813 alu.src[0].chan = alu.src[1].chan = 0;
1814 }
1815 break;
1816 case TGSI_OPCODE_DPH:
1817 if (i == 3) {
1818 alu.src[0].sel = V_SQ_ALU_SRC_1;
1819 alu.src[0].chan = 0;
1820 alu.src[0].neg = 0;
1821 }
1822 break;
1823 default:
1824 break;
1825 }
1826 if (i == 3) {
1827 alu.last = 1;
1828 }
1829 r = r600_bc_add_alu(ctx->bc, &alu);
1830 if (r)
1831 return r;
1832 }
1833 return 0;
1834 }
1835
1836 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1837 unsigned index)
1838 {
1839 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1840 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1841 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1842 ctx->src[index].neg || ctx->src[index].abs;
1843 }
1844
1845 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1846 unsigned index)
1847 {
1848 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1849 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1850 }
1851
1852 static int tgsi_tex(struct r600_shader_ctx *ctx)
1853 {
1854 static float one_point_five = 1.5f;
1855 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1856 struct r600_bc_tex tex;
1857 struct r600_bc_alu alu;
1858 unsigned src_gpr;
1859 int r, i, j;
1860 int opcode;
1861 /* Texture fetch instructions can only use gprs as source.
1862 * Also they cannot negate the source or take the absolute value */
1863 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1864 boolean src_loaded = FALSE;
1865 unsigned sampler_src_reg = 1;
1866
1867 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1868
1869 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1870 /* TGSI moves the sampler to src reg 3 for TXD */
1871 sampler_src_reg = 3;
1872
1873 for (i = 1; i < 3; i++) {
1874 /* set gradients h/v */
1875 memset(&tex, 0, sizeof(struct r600_bc_tex));
1876 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1877 SQ_TEX_INST_SET_GRADIENTS_V;
1878 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1879 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1880
1881 if (tgsi_tex_src_requires_loading(ctx, i)) {
1882 tex.src_gpr = r600_get_temp(ctx);
1883 tex.src_sel_x = 0;
1884 tex.src_sel_y = 1;
1885 tex.src_sel_z = 2;
1886 tex.src_sel_w = 3;
1887
1888 for (j = 0; j < 4; j++) {
1889 memset(&alu, 0, sizeof(struct r600_bc_alu));
1890 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1891 r600_bc_src(&alu.src[0], &ctx->src[i], j);
1892 alu.dst.sel = tex.src_gpr;
1893 alu.dst.chan = j;
1894 if (j == 3)
1895 alu.last = 1;
1896 alu.dst.write = 1;
1897 r = r600_bc_add_alu(ctx->bc, &alu);
1898 if (r)
1899 return r;
1900 }
1901
1902 } else {
1903 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1904 tex.src_sel_x = ctx->src[i].swizzle[0];
1905 tex.src_sel_y = ctx->src[i].swizzle[1];
1906 tex.src_sel_z = ctx->src[i].swizzle[2];
1907 tex.src_sel_w = ctx->src[i].swizzle[3];
1908 tex.src_rel = ctx->src[i].rel;
1909 }
1910 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1911 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1912 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1913 tex.coord_type_x = 1;
1914 tex.coord_type_y = 1;
1915 tex.coord_type_z = 1;
1916 tex.coord_type_w = 1;
1917 }
1918 r = r600_bc_add_tex(ctx->bc, &tex);
1919 if (r)
1920 return r;
1921 }
1922 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1923 int out_chan;
1924 /* Add perspective divide */
1925 if (ctx->bc->chip_class == CAYMAN) {
1926 out_chan = 2;
1927 for (i = 0; i < 3; i++) {
1928 memset(&alu, 0, sizeof(struct r600_bc_alu));
1929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1930 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1931
1932 alu.dst.sel = ctx->temp_reg;
1933 alu.dst.chan = i;
1934 if (i == 2)
1935 alu.last = 1;
1936 if (out_chan == i)
1937 alu.dst.write = 1;
1938 r = r600_bc_add_alu(ctx->bc, &alu);
1939 if (r)
1940 return r;
1941 }
1942
1943 } else {
1944 out_chan = 3;
1945 memset(&alu, 0, sizeof(struct r600_bc_alu));
1946 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1947 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1948
1949 alu.dst.sel = ctx->temp_reg;
1950 alu.dst.chan = out_chan;
1951 alu.last = 1;
1952 alu.dst.write = 1;
1953 r = r600_bc_add_alu(ctx->bc, &alu);
1954 if (r)
1955 return r;
1956 }
1957
1958 for (i = 0; i < 3; i++) {
1959 memset(&alu, 0, sizeof(struct r600_bc_alu));
1960 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1961 alu.src[0].sel = ctx->temp_reg;
1962 alu.src[0].chan = out_chan;
1963 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1964 alu.dst.sel = ctx->temp_reg;
1965 alu.dst.chan = i;
1966 alu.dst.write = 1;
1967 r = r600_bc_add_alu(ctx->bc, &alu);
1968 if (r)
1969 return r;
1970 }
1971 memset(&alu, 0, sizeof(struct r600_bc_alu));
1972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1973 alu.src[0].sel = V_SQ_ALU_SRC_1;
1974 alu.src[0].chan = 0;
1975 alu.dst.sel = ctx->temp_reg;
1976 alu.dst.chan = 3;
1977 alu.last = 1;
1978 alu.dst.write = 1;
1979 r = r600_bc_add_alu(ctx->bc, &alu);
1980 if (r)
1981 return r;
1982 src_loaded = TRUE;
1983 src_gpr = ctx->temp_reg;
1984 }
1985
1986 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1987 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1988 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1989
1990 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1991 for (i = 0; i < 4; i++) {
1992 memset(&alu, 0, sizeof(struct r600_bc_alu));
1993 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1994 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1995 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1996 alu.dst.sel = ctx->temp_reg;
1997 alu.dst.chan = i;
1998 if (i == 3)
1999 alu.last = 1;
2000 alu.dst.write = 1;
2001 r = r600_bc_add_alu(ctx->bc, &alu);
2002 if (r)
2003 return r;
2004 }
2005
2006 /* tmp1.z = RCP_e(|tmp1.z|) */
2007 if (ctx->bc->chip_class == CAYMAN) {
2008 for (i = 0; i < 3; i++) {
2009 memset(&alu, 0, sizeof(struct r600_bc_alu));
2010 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2011 alu.src[0].sel = ctx->temp_reg;
2012 alu.src[0].chan = 2;
2013 alu.src[0].abs = 1;
2014 alu.dst.sel = ctx->temp_reg;
2015 alu.dst.chan = i;
2016 if (i == 2)
2017 alu.dst.write = 1;
2018 if (i == 2)
2019 alu.last = 1;
2020 r = r600_bc_add_alu(ctx->bc, &alu);
2021 if (r)
2022 return r;
2023 }
2024 } else {
2025 memset(&alu, 0, sizeof(struct r600_bc_alu));
2026 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2027 alu.src[0].sel = ctx->temp_reg;
2028 alu.src[0].chan = 2;
2029 alu.src[0].abs = 1;
2030 alu.dst.sel = ctx->temp_reg;
2031 alu.dst.chan = 2;
2032 alu.dst.write = 1;
2033 alu.last = 1;
2034 r = r600_bc_add_alu(ctx->bc, &alu);
2035 if (r)
2036 return r;
2037 }
2038
2039 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2040 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2041 * muladd has no writemask, have to use another temp
2042 */
2043 memset(&alu, 0, sizeof(struct r600_bc_alu));
2044 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2045 alu.is_op3 = 1;
2046
2047 alu.src[0].sel = ctx->temp_reg;
2048 alu.src[0].chan = 0;
2049 alu.src[1].sel = ctx->temp_reg;
2050 alu.src[1].chan = 2;
2051
2052 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2053 alu.src[2].chan = 0;
2054 alu.src[2].value = *(uint32_t *)&one_point_five;
2055
2056 alu.dst.sel = ctx->temp_reg;
2057 alu.dst.chan = 0;
2058 alu.dst.write = 1;
2059
2060 r = r600_bc_add_alu(ctx->bc, &alu);
2061 if (r)
2062 return r;
2063
2064 memset(&alu, 0, sizeof(struct r600_bc_alu));
2065 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2066 alu.is_op3 = 1;
2067
2068 alu.src[0].sel = ctx->temp_reg;
2069 alu.src[0].chan = 1;
2070 alu.src[1].sel = ctx->temp_reg;
2071 alu.src[1].chan = 2;
2072
2073 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2074 alu.src[2].chan = 0;
2075 alu.src[2].value = *(uint32_t *)&one_point_five;
2076
2077 alu.dst.sel = ctx->temp_reg;
2078 alu.dst.chan = 1;
2079 alu.dst.write = 1;
2080
2081 alu.last = 1;
2082 r = r600_bc_add_alu(ctx->bc, &alu);
2083 if (r)
2084 return r;
2085
2086 src_loaded = TRUE;
2087 src_gpr = ctx->temp_reg;
2088 }
2089
2090 if (src_requires_loading && !src_loaded) {
2091 for (i = 0; i < 4; i++) {
2092 memset(&alu, 0, sizeof(struct r600_bc_alu));
2093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2094 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2095 alu.dst.sel = ctx->temp_reg;
2096 alu.dst.chan = i;
2097 if (i == 3)
2098 alu.last = 1;
2099 alu.dst.write = 1;
2100 r = r600_bc_add_alu(ctx->bc, &alu);
2101 if (r)
2102 return r;
2103 }
2104 src_loaded = TRUE;
2105 src_gpr = ctx->temp_reg;
2106 }
2107
2108 opcode = ctx->inst_info->r600_opcode;
2109 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
2110 switch (opcode) {
2111 case SQ_TEX_INST_SAMPLE:
2112 opcode = SQ_TEX_INST_SAMPLE_C;
2113 break;
2114 case SQ_TEX_INST_SAMPLE_L:
2115 opcode = SQ_TEX_INST_SAMPLE_C_L;
2116 break;
2117 case SQ_TEX_INST_SAMPLE_G:
2118 opcode = SQ_TEX_INST_SAMPLE_C_G;
2119 break;
2120 }
2121 }
2122
2123 memset(&tex, 0, sizeof(struct r600_bc_tex));
2124 tex.inst = opcode;
2125
2126 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2127 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2128 tex.src_gpr = src_gpr;
2129 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2130 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2131 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2132 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2133 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2134 if (src_loaded) {
2135 tex.src_sel_x = 0;
2136 tex.src_sel_y = 1;
2137 tex.src_sel_z = 2;
2138 tex.src_sel_w = 3;
2139 } else {
2140 tex.src_sel_x = ctx->src[0].swizzle[0];
2141 tex.src_sel_y = ctx->src[0].swizzle[1];
2142 tex.src_sel_z = ctx->src[0].swizzle[2];
2143 tex.src_sel_w = ctx->src[0].swizzle[3];
2144 tex.src_rel = ctx->src[0].rel;
2145 }
2146
2147 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2148 tex.src_sel_x = 1;
2149 tex.src_sel_y = 0;
2150 tex.src_sel_z = 3;
2151 tex.src_sel_w = 1;
2152 }
2153
2154 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2155 tex.coord_type_x = 1;
2156 tex.coord_type_y = 1;
2157 tex.coord_type_z = 1;
2158 tex.coord_type_w = 1;
2159 }
2160
2161 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
2162 tex.coord_type_z = 0;
2163 tex.src_sel_z = tex.src_sel_y;
2164 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
2165 tex.coord_type_z = 0;
2166
2167 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2168 tex.src_sel_w = tex.src_sel_z;
2169
2170 r = r600_bc_add_tex(ctx->bc, &tex);
2171 if (r)
2172 return r;
2173
2174 /* add shadow ambient support - gallium doesn't do it yet */
2175 return 0;
2176 }
2177
2178 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2179 {
2180 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2181 struct r600_bc_alu alu;
2182 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2183 unsigned i;
2184 int r;
2185
2186 /* optimize if it's just an equal balance */
2187 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2188 for (i = 0; i < lasti + 1; i++) {
2189 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2190 continue;
2191
2192 memset(&alu, 0, sizeof(struct r600_bc_alu));
2193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2194 r600_bc_src(&alu.src[0], &ctx->src[1], i);
2195 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2196 alu.omod = 3;
2197 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2198 alu.dst.chan = i;
2199 if (i == lasti) {
2200 alu.last = 1;
2201 }
2202 r = r600_bc_add_alu(ctx->bc, &alu);
2203 if (r)
2204 return r;
2205 }
2206 return 0;
2207 }
2208
2209 /* 1 - src0 */
2210 for (i = 0; i < lasti + 1; i++) {
2211 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2212 continue;
2213
2214 memset(&alu, 0, sizeof(struct r600_bc_alu));
2215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2216 alu.src[0].sel = V_SQ_ALU_SRC_1;
2217 alu.src[0].chan = 0;
2218 r600_bc_src(&alu.src[1], &ctx->src[0], i);
2219 r600_bc_src_toggle_neg(&alu.src[1]);
2220 alu.dst.sel = ctx->temp_reg;
2221 alu.dst.chan = i;
2222 if (i == lasti) {
2223 alu.last = 1;
2224 }
2225 alu.dst.write = 1;
2226 r = r600_bc_add_alu(ctx->bc, &alu);
2227 if (r)
2228 return r;
2229 }
2230
2231 /* (1 - src0) * src2 */
2232 for (i = 0; i < lasti + 1; i++) {
2233 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2234 continue;
2235
2236 memset(&alu, 0, sizeof(struct r600_bc_alu));
2237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2238 alu.src[0].sel = ctx->temp_reg;
2239 alu.src[0].chan = i;
2240 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2241 alu.dst.sel = ctx->temp_reg;
2242 alu.dst.chan = i;
2243 if (i == lasti) {
2244 alu.last = 1;
2245 }
2246 alu.dst.write = 1;
2247 r = r600_bc_add_alu(ctx->bc, &alu);
2248 if (r)
2249 return r;
2250 }
2251
2252 /* src0 * src1 + (1 - src0) * src2 */
2253 for (i = 0; i < lasti + 1; i++) {
2254 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2255 continue;
2256
2257 memset(&alu, 0, sizeof(struct r600_bc_alu));
2258 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2259 alu.is_op3 = 1;
2260 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2261 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2262 alu.src[2].sel = ctx->temp_reg;
2263 alu.src[2].chan = i;
2264
2265 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2266 alu.dst.chan = i;
2267 if (i == lasti) {
2268 alu.last = 1;
2269 }
2270 r = r600_bc_add_alu(ctx->bc, &alu);
2271 if (r)
2272 return r;
2273 }
2274 return 0;
2275 }
2276
2277 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2278 {
2279 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2280 struct r600_bc_alu alu;
2281 int i, r;
2282 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2283
2284 for (i = 0; i < lasti + 1; i++) {
2285 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2286 continue;
2287
2288 memset(&alu, 0, sizeof(struct r600_bc_alu));
2289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2290 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2291 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2292 r600_bc_src(&alu.src[2], &ctx->src[1], i);
2293 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2294 alu.dst.chan = i;
2295 alu.dst.write = 1;
2296 alu.is_op3 = 1;
2297 if (i == lasti)
2298 alu.last = 1;
2299 r = r600_bc_add_alu(ctx->bc, &alu);
2300 if (r)
2301 return r;
2302 }
2303 return 0;
2304 }
2305
2306 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2307 {
2308 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2309 static const unsigned int src0_swizzle[] = {2, 0, 1};
2310 static const unsigned int src1_swizzle[] = {1, 2, 0};
2311 struct r600_bc_alu alu;
2312 uint32_t use_temp = 0;
2313 int i, r;
2314
2315 if (inst->Dst[0].Register.WriteMask != 0xf)
2316 use_temp = 1;
2317
2318 for (i = 0; i < 4; i++) {
2319 memset(&alu, 0, sizeof(struct r600_bc_alu));
2320 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2321 if (i < 3) {
2322 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2323 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2324 } else {
2325 alu.src[0].sel = V_SQ_ALU_SRC_0;
2326 alu.src[0].chan = i;
2327 alu.src[1].sel = V_SQ_ALU_SRC_0;
2328 alu.src[1].chan = i;
2329 }
2330
2331 alu.dst.sel = ctx->temp_reg;
2332 alu.dst.chan = i;
2333 alu.dst.write = 1;
2334
2335 if (i == 3)
2336 alu.last = 1;
2337 r = r600_bc_add_alu(ctx->bc, &alu);
2338 if (r)
2339 return r;
2340 }
2341
2342 for (i = 0; i < 4; i++) {
2343 memset(&alu, 0, sizeof(struct r600_bc_alu));
2344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2345
2346 if (i < 3) {
2347 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2348 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2349 } else {
2350 alu.src[0].sel = V_SQ_ALU_SRC_0;
2351 alu.src[0].chan = i;
2352 alu.src[1].sel = V_SQ_ALU_SRC_0;
2353 alu.src[1].chan = i;
2354 }
2355
2356 alu.src[2].sel = ctx->temp_reg;
2357 alu.src[2].neg = 1;
2358 alu.src[2].chan = i;
2359
2360 if (use_temp)
2361 alu.dst.sel = ctx->temp_reg;
2362 else
2363 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2364 alu.dst.chan = i;
2365 alu.dst.write = 1;
2366 alu.is_op3 = 1;
2367 if (i == 3)
2368 alu.last = 1;
2369 r = r600_bc_add_alu(ctx->bc, &alu);
2370 if (r)
2371 return r;
2372 }
2373 if (use_temp)
2374 return tgsi_helper_copy(ctx, inst);
2375 return 0;
2376 }
2377
2378 static int tgsi_exp(struct r600_shader_ctx *ctx)
2379 {
2380 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2381 struct r600_bc_alu alu;
2382 int r;
2383 int i;
2384
2385 /* result.x = 2^floor(src); */
2386 if (inst->Dst[0].Register.WriteMask & 1) {
2387 memset(&alu, 0, sizeof(struct r600_bc_alu));
2388
2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2390 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2391
2392 alu.dst.sel = ctx->temp_reg;
2393 alu.dst.chan = 0;
2394 alu.dst.write = 1;
2395 alu.last = 1;
2396 r = r600_bc_add_alu(ctx->bc, &alu);
2397 if (r)
2398 return r;
2399
2400 if (ctx->bc->chip_class == CAYMAN) {
2401 for (i = 0; i < 3; i++) {
2402 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2403 alu.src[0].sel = ctx->temp_reg;
2404 alu.src[0].chan = 0;
2405
2406 alu.dst.sel = ctx->temp_reg;
2407 alu.dst.chan = i;
2408 if (i == 0)
2409 alu.dst.write = 1;
2410 if (i == 2)
2411 alu.last = 1;
2412 r = r600_bc_add_alu(ctx->bc, &alu);
2413 if (r)
2414 return r;
2415 }
2416 } else {
2417 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2418 alu.src[0].sel = ctx->temp_reg;
2419 alu.src[0].chan = 0;
2420
2421 alu.dst.sel = ctx->temp_reg;
2422 alu.dst.chan = 0;
2423 alu.dst.write = 1;
2424 alu.last = 1;
2425 r = r600_bc_add_alu(ctx->bc, &alu);
2426 if (r)
2427 return r;
2428 }
2429 }
2430
2431 /* result.y = tmp - floor(tmp); */
2432 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2433 memset(&alu, 0, sizeof(struct r600_bc_alu));
2434
2435 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2436 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2437
2438 alu.dst.sel = ctx->temp_reg;
2439 #if 0
2440 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2441 if (r)
2442 return r;
2443 #endif
2444 alu.dst.write = 1;
2445 alu.dst.chan = 1;
2446
2447 alu.last = 1;
2448
2449 r = r600_bc_add_alu(ctx->bc, &alu);
2450 if (r)
2451 return r;
2452 }
2453
2454 /* result.z = RoughApprox2ToX(tmp);*/
2455 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2456 if (ctx->bc->chip_class == CAYMAN) {
2457 for (i = 0; i < 3; i++) {
2458 memset(&alu, 0, sizeof(struct r600_bc_alu));
2459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2460 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2461
2462 alu.dst.sel = ctx->temp_reg;
2463 alu.dst.chan = i;
2464 if (i == 2) {
2465 alu.dst.write = 1;
2466 alu.last = 1;
2467 }
2468
2469 r = r600_bc_add_alu(ctx->bc, &alu);
2470 if (r)
2471 return r;
2472 }
2473 } else {
2474 memset(&alu, 0, sizeof(struct r600_bc_alu));
2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2476 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2477
2478 alu.dst.sel = ctx->temp_reg;
2479 alu.dst.write = 1;
2480 alu.dst.chan = 2;
2481
2482 alu.last = 1;
2483
2484 r = r600_bc_add_alu(ctx->bc, &alu);
2485 if (r)
2486 return r;
2487 }
2488 }
2489
2490 /* result.w = 1.0;*/
2491 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2492 memset(&alu, 0, sizeof(struct r600_bc_alu));
2493
2494 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2495 alu.src[0].sel = V_SQ_ALU_SRC_1;
2496 alu.src[0].chan = 0;
2497
2498 alu.dst.sel = ctx->temp_reg;
2499 alu.dst.chan = 3;
2500 alu.dst.write = 1;
2501 alu.last = 1;
2502 r = r600_bc_add_alu(ctx->bc, &alu);
2503 if (r)
2504 return r;
2505 }
2506 return tgsi_helper_copy(ctx, inst);
2507 }
2508
2509 static int tgsi_log(struct r600_shader_ctx *ctx)
2510 {
2511 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2512 struct r600_bc_alu alu;
2513 int r;
2514 int i;
2515
2516 /* result.x = floor(log2(|src|)); */
2517 if (inst->Dst[0].Register.WriteMask & 1) {
2518 if (ctx->bc->chip_class == CAYMAN) {
2519 for (i = 0; i < 3; i++) {
2520 memset(&alu, 0, sizeof(struct r600_bc_alu));
2521
2522 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2523 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2524 r600_bc_src_set_abs(&alu.src[0]);
2525
2526 alu.dst.sel = ctx->temp_reg;
2527 alu.dst.chan = i;
2528 if (i == 0)
2529 alu.dst.write = 1;
2530 if (i == 2)
2531 alu.last = 1;
2532 r = r600_bc_add_alu(ctx->bc, &alu);
2533 if (r)
2534 return r;
2535 }
2536
2537 } else {
2538 memset(&alu, 0, sizeof(struct r600_bc_alu));
2539
2540 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2541 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2542 r600_bc_src_set_abs(&alu.src[0]);
2543
2544 alu.dst.sel = ctx->temp_reg;
2545 alu.dst.chan = 0;
2546 alu.dst.write = 1;
2547 alu.last = 1;
2548 r = r600_bc_add_alu(ctx->bc, &alu);
2549 if (r)
2550 return r;
2551 }
2552
2553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2554 alu.src[0].sel = ctx->temp_reg;
2555 alu.src[0].chan = 0;
2556
2557 alu.dst.sel = ctx->temp_reg;
2558 alu.dst.chan = 0;
2559 alu.dst.write = 1;
2560 alu.last = 1;
2561
2562 r = r600_bc_add_alu(ctx->bc, &alu);
2563 if (r)
2564 return r;
2565 }
2566
2567 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2568 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2569
2570 if (ctx->bc->chip_class == CAYMAN) {
2571 for (i = 0; i < 3; i++) {
2572 memset(&alu, 0, sizeof(struct r600_bc_alu));
2573
2574 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2575 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2576 r600_bc_src_set_abs(&alu.src[0]);
2577
2578 alu.dst.sel = ctx->temp_reg;
2579 alu.dst.chan = i;
2580 if (i == 1)
2581 alu.dst.write = 1;
2582 if (i == 2)
2583 alu.last = 1;
2584
2585 r = r600_bc_add_alu(ctx->bc, &alu);
2586 if (r)
2587 return r;
2588 }
2589 } else {
2590 memset(&alu, 0, sizeof(struct r600_bc_alu));
2591
2592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2593 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2594 r600_bc_src_set_abs(&alu.src[0]);
2595
2596 alu.dst.sel = ctx->temp_reg;
2597 alu.dst.chan = 1;
2598 alu.dst.write = 1;
2599 alu.last = 1;
2600
2601 r = r600_bc_add_alu(ctx->bc, &alu);
2602 if (r)
2603 return r;
2604 }
2605
2606 memset(&alu, 0, sizeof(struct r600_bc_alu));
2607
2608 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2609 alu.src[0].sel = ctx->temp_reg;
2610 alu.src[0].chan = 1;
2611
2612 alu.dst.sel = ctx->temp_reg;
2613 alu.dst.chan = 1;
2614 alu.dst.write = 1;
2615 alu.last = 1;
2616
2617 r = r600_bc_add_alu(ctx->bc, &alu);
2618 if (r)
2619 return r;
2620
2621 if (ctx->bc->chip_class == CAYMAN) {
2622 for (i = 0; i < 3; i++) {
2623 memset(&alu, 0, sizeof(struct r600_bc_alu));
2624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2625 alu.src[0].sel = ctx->temp_reg;
2626 alu.src[0].chan = 1;
2627
2628 alu.dst.sel = ctx->temp_reg;
2629 alu.dst.chan = i;
2630 if (i == 1)
2631 alu.dst.write = 1;
2632 if (i == 2)
2633 alu.last = 1;
2634
2635 r = r600_bc_add_alu(ctx->bc, &alu);
2636 if (r)
2637 return r;
2638 }
2639 } else {
2640 memset(&alu, 0, sizeof(struct r600_bc_alu));
2641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2642 alu.src[0].sel = ctx->temp_reg;
2643 alu.src[0].chan = 1;
2644
2645 alu.dst.sel = ctx->temp_reg;
2646 alu.dst.chan = 1;
2647 alu.dst.write = 1;
2648 alu.last = 1;
2649
2650 r = r600_bc_add_alu(ctx->bc, &alu);
2651 if (r)
2652 return r;
2653 }
2654
2655 if (ctx->bc->chip_class == CAYMAN) {
2656 for (i = 0; i < 3; i++) {
2657 memset(&alu, 0, sizeof(struct r600_bc_alu));
2658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2659 alu.src[0].sel = ctx->temp_reg;
2660 alu.src[0].chan = 1;
2661
2662 alu.dst.sel = ctx->temp_reg;
2663 alu.dst.chan = i;
2664 if (i == 1)
2665 alu.dst.write = 1;
2666 if (i == 2)
2667 alu.last = 1;
2668
2669 r = r600_bc_add_alu(ctx->bc, &alu);
2670 if (r)
2671 return r;
2672 }
2673 } else {
2674 memset(&alu, 0, sizeof(struct r600_bc_alu));
2675 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2676 alu.src[0].sel = ctx->temp_reg;
2677 alu.src[0].chan = 1;
2678
2679 alu.dst.sel = ctx->temp_reg;
2680 alu.dst.chan = 1;
2681 alu.dst.write = 1;
2682 alu.last = 1;
2683
2684 r = r600_bc_add_alu(ctx->bc, &alu);
2685 if (r)
2686 return r;
2687 }
2688
2689 memset(&alu, 0, sizeof(struct r600_bc_alu));
2690
2691 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2692
2693 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2694 r600_bc_src_set_abs(&alu.src[0]);
2695
2696 alu.src[1].sel = ctx->temp_reg;
2697 alu.src[1].chan = 1;
2698
2699 alu.dst.sel = ctx->temp_reg;
2700 alu.dst.chan = 1;
2701 alu.dst.write = 1;
2702 alu.last = 1;
2703
2704 r = r600_bc_add_alu(ctx->bc, &alu);
2705 if (r)
2706 return r;
2707 }
2708
2709 /* result.z = log2(|src|);*/
2710 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2711 if (ctx->bc->chip_class == CAYMAN) {
2712 for (i = 0; i < 3; i++) {
2713 memset(&alu, 0, sizeof(struct r600_bc_alu));
2714
2715 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2716 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2717 r600_bc_src_set_abs(&alu.src[0]);
2718
2719 alu.dst.sel = ctx->temp_reg;
2720 if (i == 2)
2721 alu.dst.write = 1;
2722 alu.dst.chan = i;
2723 if (i == 2)
2724 alu.last = 1;
2725
2726 r = r600_bc_add_alu(ctx->bc, &alu);
2727 if (r)
2728 return r;
2729 }
2730 } else {
2731 memset(&alu, 0, sizeof(struct r600_bc_alu));
2732
2733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2734 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2735 r600_bc_src_set_abs(&alu.src[0]);
2736
2737 alu.dst.sel = ctx->temp_reg;
2738 alu.dst.write = 1;
2739 alu.dst.chan = 2;
2740 alu.last = 1;
2741
2742 r = r600_bc_add_alu(ctx->bc, &alu);
2743 if (r)
2744 return r;
2745 }
2746 }
2747
2748 /* result.w = 1.0; */
2749 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2750 memset(&alu, 0, sizeof(struct r600_bc_alu));
2751
2752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2753 alu.src[0].sel = V_SQ_ALU_SRC_1;
2754 alu.src[0].chan = 0;
2755
2756 alu.dst.sel = ctx->temp_reg;
2757 alu.dst.chan = 3;
2758 alu.dst.write = 1;
2759 alu.last = 1;
2760
2761 r = r600_bc_add_alu(ctx->bc, &alu);
2762 if (r)
2763 return r;
2764 }
2765
2766 return tgsi_helper_copy(ctx, inst);
2767 }
2768
2769 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2770 {
2771 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2772 struct r600_bc_alu alu;
2773 int r;
2774
2775 memset(&alu, 0, sizeof(struct r600_bc_alu));
2776
2777 switch (inst->Instruction.Opcode) {
2778 case TGSI_OPCODE_ARL:
2779 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2780 break;
2781 case TGSI_OPCODE_ARR:
2782 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2783 break;
2784 default:
2785 assert(0);
2786 return -1;
2787 }
2788
2789 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2790 alu.last = 1;
2791 alu.dst.sel = ctx->ar_reg;
2792 alu.dst.write = 1;
2793 r = r600_bc_add_alu(ctx->bc, &alu);
2794 if (r)
2795 return r;
2796
2797 /* TODO: Note that the MOVA can be avoided if we never use AR for
2798 * indexing non-CB registers in the current ALU clause. Similarly, we
2799 * need to load AR from ar_reg again if we started a new clause
2800 * between ARL and AR usage. The easy way to do that is to remove
2801 * the MOVA here, and load it for the first AR access after ar_reg
2802 * has been modified in each clause. */
2803 memset(&alu, 0, sizeof(struct r600_bc_alu));
2804 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2805 alu.src[0].sel = ctx->ar_reg;
2806 alu.src[0].chan = 0;
2807 alu.last = 1;
2808 r = r600_bc_add_alu(ctx->bc, &alu);
2809 if (r)
2810 return r;
2811 return 0;
2812 }
2813 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2814 {
2815 /* TODO from r600c, ar values don't persist between clauses */
2816 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2817 struct r600_bc_alu alu;
2818 int r;
2819
2820 switch (inst->Instruction.Opcode) {
2821 case TGSI_OPCODE_ARL:
2822 memset(&alu, 0, sizeof(alu));
2823 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2824 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2825 alu.dst.sel = ctx->ar_reg;
2826 alu.dst.write = 1;
2827 alu.last = 1;
2828
2829 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2830 return r;
2831
2832 memset(&alu, 0, sizeof(alu));
2833 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2834 alu.src[0].sel = ctx->ar_reg;
2835 alu.dst.sel = ctx->ar_reg;
2836 alu.dst.write = 1;
2837 alu.last = 1;
2838
2839 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2840 return r;
2841 break;
2842 case TGSI_OPCODE_ARR:
2843 memset(&alu, 0, sizeof(alu));
2844 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2845 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2846 alu.dst.sel = ctx->ar_reg;
2847 alu.dst.write = 1;
2848 alu.last = 1;
2849
2850 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2851 return r;
2852 break;
2853 default:
2854 assert(0);
2855 return -1;
2856 }
2857
2858 memset(&alu, 0, sizeof(alu));
2859 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2860 alu.src[0].sel = ctx->ar_reg;
2861 alu.last = 1;
2862
2863 r = r600_bc_add_alu(ctx->bc, &alu);
2864 if (r)
2865 return r;
2866 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2867 return 0;
2868 }
2869
2870 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2871 {
2872 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2873 struct r600_bc_alu alu;
2874 int i, r = 0;
2875
2876 for (i = 0; i < 4; i++) {
2877 memset(&alu, 0, sizeof(struct r600_bc_alu));
2878
2879 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2880 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2881
2882 if (i == 0 || i == 3) {
2883 alu.src[0].sel = V_SQ_ALU_SRC_1;
2884 } else {
2885 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2886 }
2887
2888 if (i == 0 || i == 2) {
2889 alu.src[1].sel = V_SQ_ALU_SRC_1;
2890 } else {
2891 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2892 }
2893 if (i == 3)
2894 alu.last = 1;
2895 r = r600_bc_add_alu(ctx->bc, &alu);
2896 if (r)
2897 return r;
2898 }
2899 return 0;
2900 }
2901
2902 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2903 {
2904 struct r600_bc_alu alu;
2905 int r;
2906
2907 memset(&alu, 0, sizeof(struct r600_bc_alu));
2908 alu.inst = opcode;
2909 alu.predicate = 1;
2910
2911 alu.dst.sel = ctx->temp_reg;
2912 alu.dst.write = 1;
2913 alu.dst.chan = 0;
2914
2915 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2916 alu.src[1].sel = V_SQ_ALU_SRC_0;
2917 alu.src[1].chan = 0;
2918
2919 alu.last = 1;
2920
2921 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2922 if (r)
2923 return r;
2924 return 0;
2925 }
2926
2927 static int pops(struct r600_shader_ctx *ctx, int pops)
2928 {
2929 int alu_pop = 3;
2930 if (ctx->bc->cf_last) {
2931 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2932 alu_pop = 0;
2933 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2934 alu_pop = 1;
2935 }
2936 alu_pop += pops;
2937 if (alu_pop == 1) {
2938 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2939 ctx->bc->force_add_cf = 1;
2940 } else if (alu_pop == 2) {
2941 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2942 ctx->bc->force_add_cf = 1;
2943 } else {
2944 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2945 ctx->bc->cf_last->pop_count = pops;
2946 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2947 }
2948 return 0;
2949 }
2950
2951 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2952 {
2953 switch(reason) {
2954 case FC_PUSH_VPM:
2955 ctx->bc->callstack[ctx->bc->call_sp].current--;
2956 break;
2957 case FC_PUSH_WQM:
2958 case FC_LOOP:
2959 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2960 break;
2961 case FC_REP:
2962 /* TOODO : for 16 vp asic should -= 2; */
2963 ctx->bc->callstack[ctx->bc->call_sp].current --;
2964 break;
2965 }
2966 }
2967
2968 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2969 {
2970 if (check_max_only) {
2971 int diff;
2972 switch (reason) {
2973 case FC_PUSH_VPM:
2974 diff = 1;
2975 break;
2976 case FC_PUSH_WQM:
2977 diff = 4;
2978 break;
2979 default:
2980 assert(0);
2981 diff = 0;
2982 }
2983 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2984 ctx->bc->callstack[ctx->bc->call_sp].max) {
2985 ctx->bc->callstack[ctx->bc->call_sp].max =
2986 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2987 }
2988 return;
2989 }
2990 switch (reason) {
2991 case FC_PUSH_VPM:
2992 ctx->bc->callstack[ctx->bc->call_sp].current++;
2993 break;
2994 case FC_PUSH_WQM:
2995 case FC_LOOP:
2996 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2997 break;
2998 case FC_REP:
2999 ctx->bc->callstack[ctx->bc->call_sp].current++;
3000 break;
3001 }
3002
3003 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3004 ctx->bc->callstack[ctx->bc->call_sp].max) {
3005 ctx->bc->callstack[ctx->bc->call_sp].max =
3006 ctx->bc->callstack[ctx->bc->call_sp].current;
3007 }
3008 }
3009
3010 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3011 {
3012 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3013
3014 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
3015 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
3016 sp->mid[sp->num_mid] = ctx->bc->cf_last;
3017 sp->num_mid++;
3018 }
3019
3020 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3021 {
3022 ctx->bc->fc_sp++;
3023 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3024 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3025 }
3026
3027 static void fc_poplevel(struct r600_shader_ctx *ctx)
3028 {
3029 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3030 if (sp->mid) {
3031 free(sp->mid);
3032 sp->mid = NULL;
3033 }
3034 sp->num_mid = 0;
3035 sp->start = NULL;
3036 sp->type = 0;
3037 ctx->bc->fc_sp--;
3038 }
3039
3040 #if 0
3041 static int emit_return(struct r600_shader_ctx *ctx)
3042 {
3043 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3044 return 0;
3045 }
3046
3047 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3048 {
3049
3050 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3051 ctx->bc->cf_last->pop_count = pops;
3052 /* TODO work out offset */
3053 return 0;
3054 }
3055
3056 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3057 {
3058 return 0;
3059 }
3060
3061 static void emit_testflag(struct r600_shader_ctx *ctx)
3062 {
3063
3064 }
3065
3066 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3067 {
3068 emit_testflag(ctx);
3069 emit_jump_to_offset(ctx, 1, 4);
3070 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3071 pops(ctx, ifidx + 1);
3072 emit_return(ctx);
3073 }
3074
3075 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3076 {
3077 emit_testflag(ctx);
3078
3079 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3080 ctx->bc->cf_last->pop_count = 1;
3081
3082 fc_set_mid(ctx, fc_sp);
3083
3084 pops(ctx, 1);
3085 }
3086 #endif
3087
3088 static int tgsi_if(struct r600_shader_ctx *ctx)
3089 {
3090 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3091
3092 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3093
3094 fc_pushlevel(ctx, FC_IF);
3095
3096 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3097 return 0;
3098 }
3099
3100 static int tgsi_else(struct r600_shader_ctx *ctx)
3101 {
3102 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3103 ctx->bc->cf_last->pop_count = 1;
3104
3105 fc_set_mid(ctx, ctx->bc->fc_sp);
3106 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3107 return 0;
3108 }
3109
3110 static int tgsi_endif(struct r600_shader_ctx *ctx)
3111 {
3112 pops(ctx, 1);
3113 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3114 R600_ERR("if/endif unbalanced in shader\n");
3115 return -1;
3116 }
3117
3118 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3119 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3120 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3121 } else {
3122 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3123 }
3124 fc_poplevel(ctx);
3125
3126 callstack_decrease_current(ctx, FC_PUSH_VPM);
3127 return 0;
3128 }
3129
3130 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3131 {
3132 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3133
3134 fc_pushlevel(ctx, FC_LOOP);
3135
3136 /* check stack depth */
3137 callstack_check_depth(ctx, FC_LOOP, 0);
3138 return 0;
3139 }
3140
3141 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3142 {
3143 int i;
3144
3145 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3146
3147 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3148 R600_ERR("loop/endloop in shader code are not paired.\n");
3149 return -EINVAL;
3150 }
3151
3152 /* fixup loop pointers - from r600isa
3153 LOOP END points to CF after LOOP START,
3154 LOOP START point to CF after LOOP END
3155 BRK/CONT point to LOOP END CF
3156 */
3157 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3158
3159 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3160
3161 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3162 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3163 }
3164 /* TODO add LOOPRET support */
3165 fc_poplevel(ctx);
3166 callstack_decrease_current(ctx, FC_LOOP);
3167 return 0;
3168 }
3169
3170 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3171 {
3172 unsigned int fscp;
3173
3174 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3175 {
3176 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3177 break;
3178 }
3179
3180 if (fscp == 0) {
3181 R600_ERR("Break not inside loop/endloop pair\n");
3182 return -EINVAL;
3183 }
3184
3185 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3186 ctx->bc->cf_last->pop_count = 1;
3187
3188 fc_set_mid(ctx, fscp);
3189
3190 pops(ctx, 1);
3191 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3192 return 0;
3193 }
3194
3195 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3196 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3197 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3198 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3199
3200 /* FIXME:
3201 * For state trackers other than OpenGL, we'll want to use
3202 * _RECIP_IEEE instead.
3203 */
3204 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3205
3206 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3207 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3208 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3209 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3210 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3211 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3212 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3213 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3214 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3215 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3216 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3217 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3218 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3219 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3220 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3221 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222 /* gap */
3223 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3224 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3225 /* gap */
3226 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3229 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3231 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3233 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3234 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3235 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3236 /* gap */
3237 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3238 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3239 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3241 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3242 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3243 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3244 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3245 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3251 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3253 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3254 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3255 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3256 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3258 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3259 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3260 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3267 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3271 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3272 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3273 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3274 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3277 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3278 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3279 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3280 /* gap */
3281 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3284 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3285 /* gap */
3286 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3292 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3294 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295 /* gap */
3296 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3298 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3305 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3308 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3310 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311 /* gap */
3312 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317 /* gap */
3318 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3323 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3327 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3328 /* gap */
3329 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357 };
3358
3359 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3360 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3361 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3362 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3363 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3364 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3365 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3366 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3367 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3368 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3369 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3370 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3371 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3372 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3373 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3374 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3375 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3376 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3377 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3378 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3379 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3380 /* gap */
3381 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3382 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3383 /* gap */
3384 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3385 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3386 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3387 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3388 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3389 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3390 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3391 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3392 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3393 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3394 /* gap */
3395 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3396 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3397 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3398 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3399 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3400 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3401 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3402 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3403 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3404 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3405 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3406 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3407 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3408 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3409 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3410 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3411 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3412 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3413 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3414 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3415 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3416 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3417 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3418 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3419 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3420 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3421 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3422 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3424 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3425 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3426 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3427 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3428 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3429 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3430 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3431 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3432 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3433 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3434 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3435 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3436 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3437 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3438 /* gap */
3439 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3440 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3441 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3442 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3443 /* gap */
3444 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3445 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3446 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3447 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3448 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3449 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3450 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3451 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3452 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3453 /* gap */
3454 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3455 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3456 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3457 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3458 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3460 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3461 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3462 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3463 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3464 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3466 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3467 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3468 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3469 /* gap */
3470 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3472 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3474 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475 /* gap */
3476 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3477 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3478 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3479 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3480 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3481 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3485 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3486 /* gap */
3487 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3488 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3489 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3491 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3493 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3494 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3495 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3500 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3501 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3502 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3505 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3507 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515 };
3516
3517 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3518 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3519 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3520 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3521 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3522 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3523 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3524 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3525 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3526 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3527 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3528 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3529 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3530 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3531 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3532 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3533 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3534 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3535 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3536 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3537 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538 /* gap */
3539 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3540 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541 /* gap */
3542 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3543 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3545 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3546 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3547 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3548 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3549 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3550 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3551 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3552 /* gap */
3553 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3555 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3557 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3558 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3559 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3560 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3561 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3562 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3563 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3564 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3565 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3566 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3567 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3568 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3569 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3570 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3571 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3572 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3573 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3574 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3575 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3576 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3578 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3579 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3582 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3583 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3584 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3585 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3586 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3587 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3588 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3589 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3590 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3591 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3593 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3594 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3595 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3596 /* gap */
3597 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3598 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3600 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3601 /* gap */
3602 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3604 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3607 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3608 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3609 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3610 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3611 /* gap */
3612 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3615 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3616 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3619 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3620 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3621 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3623 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3624 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3625 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3626 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3627 /* gap */
3628 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3630 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3631 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3632 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3633 /* gap */
3634 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3636 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3637 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3638 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3639 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3640 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3641 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3642 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3643 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3644 /* gap */
3645 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3649 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3650 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3651 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3652 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3657 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3658 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3659 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3660 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3661 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3662 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3663 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3664 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3665 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3666 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3668 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3670 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3671 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3673 };