c55cdd707eb707fdb028e87f9b843680a42e9962
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60
61 int r600_find_vs_semantic_index(struct r600_shader *vs,
62 struct r600_shader *ps, int id)
63 {
64 struct r600_shader_io *input = &ps->input[id];
65
66 for (int i = 0; i < vs->noutput; i++) {
67 if (input->name == vs->output[i].name &&
68 input->sid == vs->output[i].sid) {
69 return i - 1;
70 }
71 }
72 return 0;
73 }
74
75 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76 {
77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78 struct r600_shader *rshader = &shader->shader;
79 uint32_t *ptr;
80 int i;
81
82 /* copy new shader */
83 if (shader->bo == NULL) {
84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86 if (shader->bo == NULL) {
87 return -ENOMEM;
88 }
89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
90 if (R600_BIG_ENDIAN) {
91 for (i = 0; i < rshader->bc.ndw; ++i) {
92 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93 }
94 } else {
95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96 }
97 r600_bo_unmap(rctx->radeon, shader->bo);
98 }
99 /* build state */
100 switch (rshader->processor_type) {
101 case TGSI_PROCESSOR_VERTEX:
102 if (rctx->chip_class >= EVERGREEN) {
103 evergreen_pipe_shader_vs(ctx, shader);
104 } else {
105 r600_pipe_shader_vs(ctx, shader);
106 }
107 break;
108 case TGSI_PROCESSOR_FRAGMENT:
109 if (rctx->chip_class >= EVERGREEN) {
110 evergreen_pipe_shader_ps(ctx, shader);
111 } else {
112 r600_pipe_shader_ps(ctx, shader);
113 }
114 break;
115 default:
116 return -EINVAL;
117 }
118 return 0;
119 }
120
121 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124 {
125 static int dump_shaders = -1;
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127 int r;
128
129 /* Would like some magic "get_bool_option_once" routine.
130 */
131 if (dump_shaders == -1)
132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134 if (dump_shaders) {
135 fprintf(stderr, "--------------------------------------------------------------\n");
136 tgsi_dump(shader->tokens, 0);
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bc_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bc_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
158
159 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
160 r600_bc_clear(&shader->shader.bc);
161
162 memset(&shader->shader,0,sizeof(struct r600_shader));
163 }
164
165 /*
166 * tgsi -> r600 shader
167 */
168 struct r600_shader_tgsi_instruction;
169
170 struct r600_shader_src {
171 unsigned sel;
172 unsigned swizzle[4];
173 unsigned neg;
174 unsigned abs;
175 unsigned rel;
176 uint32_t value[4];
177 };
178
179 struct r600_shader_ctx {
180 struct tgsi_shader_info info;
181 struct tgsi_parse_context parse;
182 const struct tgsi_token *tokens;
183 unsigned type;
184 unsigned file_offset[TGSI_FILE_COUNT];
185 unsigned temp_reg;
186 unsigned ar_reg;
187 struct r600_shader_tgsi_instruction *inst_info;
188 struct r600_bc *bc;
189 struct r600_shader *shader;
190 struct r600_shader_src src[4];
191 u32 *literals;
192 u32 nliterals;
193 u32 max_driver_temp_used;
194 /* needed for evergreen interpolation */
195 boolean input_centroid;
196 boolean input_linear;
197 boolean input_perspective;
198 int num_interp_gpr;
199 };
200
201 struct r600_shader_tgsi_instruction {
202 unsigned tgsi_opcode;
203 unsigned is_op3;
204 unsigned r600_opcode;
205 int (*process)(struct r600_shader_ctx *ctx);
206 };
207
208 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
209 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
210
211 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
212 {
213 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
214 int j;
215
216 if (i->Instruction.NumDstRegs > 1) {
217 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
218 return -EINVAL;
219 }
220 if (i->Instruction.Predicate) {
221 R600_ERR("predicate unsupported\n");
222 return -EINVAL;
223 }
224 #if 0
225 if (i->Instruction.Label) {
226 R600_ERR("label unsupported\n");
227 return -EINVAL;
228 }
229 #endif
230 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
231 if (i->Src[j].Register.Dimension) {
232 R600_ERR("unsupported src %d (dimension %d)\n", j,
233 i->Src[j].Register.Dimension);
234 return -EINVAL;
235 }
236 }
237 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
238 if (i->Dst[j].Register.Dimension) {
239 R600_ERR("unsupported dst (dimension)\n");
240 return -EINVAL;
241 }
242 }
243 return 0;
244 }
245
246 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
247 {
248 int i, r;
249 struct r600_bc_alu alu;
250 int gpr = 0, base_chan = 0;
251 int ij_index = 0;
252
253 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
254 ij_index = 0;
255 if (ctx->shader->input[input].centroid)
256 ij_index++;
257 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
258 ij_index = 0;
259 /* if we have perspective add one */
260 if (ctx->input_perspective) {
261 ij_index++;
262 /* if we have perspective centroid */
263 if (ctx->input_centroid)
264 ij_index++;
265 }
266 if (ctx->shader->input[input].centroid)
267 ij_index++;
268 }
269
270 /* work out gpr and base_chan from index */
271 gpr = ij_index / 2;
272 base_chan = (2 * (ij_index % 2)) + 1;
273
274 for (i = 0; i < 8; i++) {
275 memset(&alu, 0, sizeof(struct r600_bc_alu));
276
277 if (i < 4)
278 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
279 else
280 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
281
282 if ((i > 1) && (i < 6)) {
283 alu.dst.sel = ctx->shader->input[input].gpr;
284 alu.dst.write = 1;
285 }
286
287 alu.dst.chan = i % 4;
288
289 alu.src[0].sel = gpr;
290 alu.src[0].chan = (base_chan - (i % 2));
291
292 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
293
294 alu.bank_swizzle_force = SQ_ALU_VEC_210;
295 if ((i % 4) == 3)
296 alu.last = 1;
297 r = r600_bc_add_alu(ctx->bc, &alu);
298 if (r)
299 return r;
300 }
301 return 0;
302 }
303
304
305 static int tgsi_declaration(struct r600_shader_ctx *ctx)
306 {
307 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
308 unsigned i;
309 int r;
310
311 switch (d->Declaration.File) {
312 case TGSI_FILE_INPUT:
313 i = ctx->shader->ninput++;
314 ctx->shader->input[i].name = d->Semantic.Name;
315 ctx->shader->input[i].sid = d->Semantic.Index;
316 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
317 ctx->shader->input[i].centroid = d->Declaration.Centroid;
318 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
319 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
320 /* turn input into interpolate on EG */
321 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
322 if (ctx->shader->input[i].interpolate > 0) {
323 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
324 evergreen_interp_alu(ctx, i);
325 }
326 }
327 }
328 break;
329 case TGSI_FILE_OUTPUT:
330 i = ctx->shader->noutput++;
331 ctx->shader->output[i].name = d->Semantic.Name;
332 ctx->shader->output[i].sid = d->Semantic.Index;
333 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
334 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
335 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
336 /* these don't count as vertex param exports */
337 if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
338 (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
339 ctx->shader->npos++;
340 }
341 break;
342 case TGSI_FILE_CONSTANT:
343 case TGSI_FILE_TEMPORARY:
344 case TGSI_FILE_SAMPLER:
345 case TGSI_FILE_ADDRESS:
346 break;
347
348 case TGSI_FILE_SYSTEM_VALUE:
349 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
350 struct r600_bc_alu alu;
351 memset(&alu, 0, sizeof(struct r600_bc_alu));
352
353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
354 alu.src[0].sel = 0;
355 alu.src[0].chan = 3;
356
357 alu.dst.sel = 0;
358 alu.dst.chan = 3;
359 alu.dst.write = 1;
360 alu.last = 1;
361
362 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
363 return r;
364 break;
365 }
366
367 default:
368 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
369 return -EINVAL;
370 }
371 return 0;
372 }
373
374 static int r600_get_temp(struct r600_shader_ctx *ctx)
375 {
376 return ctx->temp_reg + ctx->max_driver_temp_used++;
377 }
378
379 /*
380 * for evergreen we need to scan the shader to find the number of GPRs we need to
381 * reserve for interpolation.
382 *
383 * we need to know if we are going to emit
384 * any centroid inputs
385 * if perspective and linear are required
386 */
387 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
388 {
389 int i;
390 int num_baryc;
391
392 ctx->input_linear = FALSE;
393 ctx->input_perspective = FALSE;
394 ctx->input_centroid = FALSE;
395 ctx->num_interp_gpr = 1;
396
397 /* any centroid inputs */
398 for (i = 0; i < ctx->info.num_inputs; i++) {
399 /* skip position/face */
400 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
401 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
402 continue;
403 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
404 ctx->input_linear = TRUE;
405 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
406 ctx->input_perspective = TRUE;
407 if (ctx->info.input_centroid[i])
408 ctx->input_centroid = TRUE;
409 }
410
411 num_baryc = 0;
412 /* ignoring sample for now */
413 if (ctx->input_perspective)
414 num_baryc++;
415 if (ctx->input_linear)
416 num_baryc++;
417 if (ctx->input_centroid)
418 num_baryc *= 2;
419
420 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
421
422 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
423 return ctx->num_interp_gpr;
424 }
425
426 static void tgsi_src(struct r600_shader_ctx *ctx,
427 const struct tgsi_full_src_register *tgsi_src,
428 struct r600_shader_src *r600_src)
429 {
430 memset(r600_src, 0, sizeof(*r600_src));
431 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
432 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
433 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
434 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
435 r600_src->neg = tgsi_src->Register.Negate;
436 r600_src->abs = tgsi_src->Register.Absolute;
437
438 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
439 int index;
440 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
441 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
442 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
443
444 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
445 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
446 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
447 return;
448 }
449 index = tgsi_src->Register.Index;
450 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
451 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
452 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
453 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
454 r600_src->swizzle[0] = 3;
455 r600_src->swizzle[1] = 3;
456 r600_src->swizzle[2] = 3;
457 r600_src->swizzle[3] = 3;
458 r600_src->sel = 0;
459 } else {
460 if (tgsi_src->Register.Indirect)
461 r600_src->rel = V_SQ_REL_RELATIVE;
462 r600_src->sel = tgsi_src->Register.Index;
463 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
464 }
465 }
466
467 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
468 {
469 struct r600_bc_vtx vtx;
470 unsigned int ar_reg;
471 int r;
472
473 if (offset) {
474 struct r600_bc_alu alu;
475
476 memset(&alu, 0, sizeof(alu));
477
478 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
479 alu.src[0].sel = ctx->ar_reg;
480
481 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
482 alu.src[1].value = offset;
483
484 alu.dst.sel = dst_reg;
485 alu.dst.write = 1;
486 alu.last = 1;
487
488 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
489 return r;
490
491 ar_reg = dst_reg;
492 } else {
493 ar_reg = ctx->ar_reg;
494 }
495
496 memset(&vtx, 0, sizeof(vtx));
497 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
498 vtx.src_gpr = ar_reg;
499 vtx.mega_fetch_count = 16;
500 vtx.dst_gpr = dst_reg;
501 vtx.dst_sel_x = 0; /* SEL_X */
502 vtx.dst_sel_y = 1; /* SEL_Y */
503 vtx.dst_sel_z = 2; /* SEL_Z */
504 vtx.dst_sel_w = 3; /* SEL_W */
505 vtx.data_format = FMT_32_32_32_32_FLOAT;
506 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
507 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
508 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
509 vtx.endian = r600_endian_swap(32);
510
511 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
512 return r;
513
514 return 0;
515 }
516
517 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
518 {
519 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
520 struct r600_bc_alu alu;
521 int i, j, k, nconst, r;
522
523 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
524 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
525 nconst++;
526 }
527 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
528 }
529 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
530 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
531 continue;
532 }
533
534 if (ctx->src[i].rel) {
535 int treg = r600_get_temp(ctx);
536 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
537 return r;
538
539 ctx->src[i].sel = treg;
540 ctx->src[i].rel = 0;
541 j--;
542 } else if (j > 0) {
543 int treg = r600_get_temp(ctx);
544 for (k = 0; k < 4; k++) {
545 memset(&alu, 0, sizeof(struct r600_bc_alu));
546 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
547 alu.src[0].sel = ctx->src[i].sel;
548 alu.src[0].chan = k;
549 alu.src[0].rel = ctx->src[i].rel;
550 alu.dst.sel = treg;
551 alu.dst.chan = k;
552 alu.dst.write = 1;
553 if (k == 3)
554 alu.last = 1;
555 r = r600_bc_add_alu(ctx->bc, &alu);
556 if (r)
557 return r;
558 }
559 ctx->src[i].sel = treg;
560 ctx->src[i].rel =0;
561 j--;
562 }
563 }
564 return 0;
565 }
566
567 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
568 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
569 {
570 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
571 struct r600_bc_alu alu;
572 int i, j, k, nliteral, r;
573
574 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
575 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
576 nliteral++;
577 }
578 }
579 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
580 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
581 int treg = r600_get_temp(ctx);
582 for (k = 0; k < 4; k++) {
583 memset(&alu, 0, sizeof(struct r600_bc_alu));
584 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
585 alu.src[0].sel = ctx->src[i].sel;
586 alu.src[0].chan = k;
587 alu.src[0].value = ctx->src[i].value[k];
588 alu.dst.sel = treg;
589 alu.dst.chan = k;
590 alu.dst.write = 1;
591 if (k == 3)
592 alu.last = 1;
593 r = r600_bc_add_alu(ctx->bc, &alu);
594 if (r)
595 return r;
596 }
597 ctx->src[i].sel = treg;
598 j--;
599 }
600 }
601 return 0;
602 }
603
604 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
605 {
606 struct r600_shader *shader = &pipeshader->shader;
607 struct tgsi_token *tokens = pipeshader->tokens;
608 struct tgsi_full_immediate *immediate;
609 struct tgsi_full_property *property;
610 struct r600_shader_ctx ctx;
611 struct r600_bc_output output[32];
612 unsigned output_done, noutput;
613 unsigned opcode;
614 int i, j, r = 0, pos0;
615
616 ctx.bc = &shader->bc;
617 ctx.shader = shader;
618 r600_bc_init(ctx.bc, rctx->chip_class);
619 ctx.tokens = tokens;
620 tgsi_scan_shader(tokens, &ctx.info);
621 tgsi_parse_init(&ctx.parse, tokens);
622 ctx.type = ctx.parse.FullHeader.Processor.Processor;
623 shader->processor_type = ctx.type;
624 ctx.bc->type = shader->processor_type;
625
626 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
627 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
628
629 shader->nr_cbufs = rctx->nr_cbufs;
630
631 /* register allocations */
632 /* Values [0,127] correspond to GPR[0..127].
633 * Values [128,159] correspond to constant buffer bank 0
634 * Values [160,191] correspond to constant buffer bank 1
635 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
636 * Values [256,287] correspond to constant buffer bank 2 (EG)
637 * Values [288,319] correspond to constant buffer bank 3 (EG)
638 * Other special values are shown in the list below.
639 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
640 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
641 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
642 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
643 * 248 SQ_ALU_SRC_0: special constant 0.0.
644 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
645 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
646 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
647 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
648 * 253 SQ_ALU_SRC_LITERAL: literal constant.
649 * 254 SQ_ALU_SRC_PV: previous vector result.
650 * 255 SQ_ALU_SRC_PS: previous scalar result.
651 */
652 for (i = 0; i < TGSI_FILE_COUNT; i++) {
653 ctx.file_offset[i] = 0;
654 }
655 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
656 ctx.file_offset[TGSI_FILE_INPUT] = 1;
657 if (ctx.bc->chip_class >= EVERGREEN) {
658 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
659 } else {
660 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
661 }
662 }
663 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
664 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
665 }
666 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
667 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
668 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
669 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
670
671 /* Outside the GPR range. This will be translated to one of the
672 * kcache banks later. */
673 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
674
675 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
676 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
677 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
678 ctx.temp_reg = ctx.ar_reg + 1;
679
680 ctx.nliterals = 0;
681 ctx.literals = NULL;
682 shader->fs_write_all = FALSE;
683 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
684 tgsi_parse_token(&ctx.parse);
685 switch (ctx.parse.FullToken.Token.Type) {
686 case TGSI_TOKEN_TYPE_IMMEDIATE:
687 immediate = &ctx.parse.FullToken.FullImmediate;
688 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
689 if(ctx.literals == NULL) {
690 r = -ENOMEM;
691 goto out_err;
692 }
693 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
694 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
695 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
696 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
697 ctx.nliterals++;
698 break;
699 case TGSI_TOKEN_TYPE_DECLARATION:
700 r = tgsi_declaration(&ctx);
701 if (r)
702 goto out_err;
703 break;
704 case TGSI_TOKEN_TYPE_INSTRUCTION:
705 r = tgsi_is_supported(&ctx);
706 if (r)
707 goto out_err;
708 ctx.max_driver_temp_used = 0;
709 /* reserve first tmp for everyone */
710 r600_get_temp(&ctx);
711
712 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
713 if ((r = tgsi_split_constant(&ctx)))
714 goto out_err;
715 if ((r = tgsi_split_literal_constant(&ctx)))
716 goto out_err;
717 if (ctx.bc->chip_class == CAYMAN)
718 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
719 else if (ctx.bc->chip_class >= EVERGREEN)
720 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
721 else
722 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
723 r = ctx.inst_info->process(&ctx);
724 if (r)
725 goto out_err;
726 break;
727 case TGSI_TOKEN_TYPE_PROPERTY:
728 property = &ctx.parse.FullToken.FullProperty;
729 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
730 if (property->u[0].Data == 1)
731 shader->fs_write_all = TRUE;
732 }
733 break;
734 default:
735 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
736 r = -EINVAL;
737 goto out_err;
738 }
739 }
740
741 noutput = shader->noutput;
742
743 /* clamp color outputs */
744 if (shader->clamp_color) {
745 for (i = 0; i < noutput; i++) {
746 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
747 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
748
749 int j;
750 for (j = 0; j < 4; j++) {
751 struct r600_bc_alu alu;
752 memset(&alu, 0, sizeof(struct r600_bc_alu));
753
754 /* MOV_SAT R, R */
755 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
756 alu.dst.sel = shader->output[i].gpr;
757 alu.dst.chan = j;
758 alu.dst.write = 1;
759 alu.dst.clamp = 1;
760 alu.src[0].sel = alu.dst.sel;
761 alu.src[0].chan = j;
762
763 if (j == 3) {
764 alu.last = 1;
765 }
766 r = r600_bc_add_alu(ctx.bc, &alu);
767 if (r)
768 return r;
769 }
770 }
771 }
772 }
773
774 /* export output */
775 j = 0;
776 for (i = 0, pos0 = 0; i < noutput; i++) {
777 memset(&output[i], 0, sizeof(struct r600_bc_output));
778 output[i + j].gpr = shader->output[i].gpr;
779 output[i + j].elem_size = 3;
780 output[i + j].swizzle_x = 0;
781 output[i + j].swizzle_y = 1;
782 output[i + j].swizzle_z = 2;
783 output[i + j].swizzle_w = 3;
784 output[i + j].burst_count = 1;
785 output[i + j].barrier = 1;
786 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
787 output[i + j].array_base = i - pos0;
788 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
789 switch (ctx.type) {
790 case TGSI_PROCESSOR_VERTEX:
791 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
792 output[i + j].array_base = 60;
793 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
794 /* position doesn't count in array_base */
795 pos0++;
796 }
797 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
798 output[i + j].array_base = 61;
799 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
800 /* position doesn't count in array_base */
801 pos0++;
802 }
803 break;
804 case TGSI_PROCESSOR_FRAGMENT:
805 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
806 output[i + j].array_base = shader->output[i].sid;
807 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
808 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
809 for (j = 1; j < shader->nr_cbufs; j++) {
810 memset(&output[i + j], 0, sizeof(struct r600_bc_output));
811 output[i + j].gpr = shader->output[i].gpr;
812 output[i + j].elem_size = 3;
813 output[i + j].swizzle_x = 0;
814 output[i + j].swizzle_y = 1;
815 output[i + j].swizzle_z = 2;
816 output[i + j].swizzle_w = 3;
817 output[i + j].burst_count = 1;
818 output[i + j].barrier = 1;
819 output[i + j].array_base = shader->output[i].sid + j;
820 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
821 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
822 }
823 j--;
824 }
825 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
826 output[i + j].array_base = 61;
827 output[i + j].swizzle_x = 2;
828 output[i + j].swizzle_y = 7;
829 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
830 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
831 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
832 output[i + j].array_base = 61;
833 output[i + j].swizzle_x = 7;
834 output[i + j].swizzle_y = 1;
835 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
836 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
837 } else {
838 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
839 r = -EINVAL;
840 goto out_err;
841 }
842 break;
843 default:
844 R600_ERR("unsupported processor type %d\n", ctx.type);
845 r = -EINVAL;
846 goto out_err;
847 }
848 }
849 noutput += j;
850 /* add fake param output for vertex shader if no param is exported */
851 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
852 for (i = 0, pos0 = 0; i < noutput; i++) {
853 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
854 pos0 = 1;
855 break;
856 }
857 }
858 if (!pos0) {
859 memset(&output[i], 0, sizeof(struct r600_bc_output));
860 output[i].gpr = 0;
861 output[i].elem_size = 3;
862 output[i].swizzle_x = 0;
863 output[i].swizzle_y = 1;
864 output[i].swizzle_z = 2;
865 output[i].swizzle_w = 3;
866 output[i].burst_count = 1;
867 output[i].barrier = 1;
868 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
869 output[i].array_base = 0;
870 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
871 noutput++;
872 }
873 }
874 /* add fake pixel export */
875 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
876 memset(&output[0], 0, sizeof(struct r600_bc_output));
877 output[0].gpr = 0;
878 output[0].elem_size = 3;
879 output[0].swizzle_x = 7;
880 output[0].swizzle_y = 7;
881 output[0].swizzle_z = 7;
882 output[0].swizzle_w = 7;
883 output[0].burst_count = 1;
884 output[0].barrier = 1;
885 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
886 output[0].array_base = 0;
887 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
888 noutput++;
889 }
890 /* set export done on last export of each type */
891 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
892 if (ctx.bc->chip_class < CAYMAN) {
893 if (i == (noutput - 1)) {
894 output[i].end_of_program = 1;
895 }
896 }
897 if (!(output_done & (1 << output[i].type))) {
898 output_done |= (1 << output[i].type);
899 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
900 }
901 }
902 /* add output to bytecode */
903 for (i = 0; i < noutput; i++) {
904 r = r600_bc_add_output(ctx.bc, &output[i]);
905 if (r)
906 goto out_err;
907 }
908 /* add program end */
909 if (ctx.bc->chip_class == CAYMAN)
910 cm_bc_add_cf_end(ctx.bc);
911
912 free(ctx.literals);
913 tgsi_parse_free(&ctx.parse);
914 return 0;
915 out_err:
916 free(ctx.literals);
917 tgsi_parse_free(&ctx.parse);
918 return r;
919 }
920
921 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
922 {
923 R600_ERR("%s tgsi opcode unsupported\n",
924 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
925 return -EINVAL;
926 }
927
928 static int tgsi_end(struct r600_shader_ctx *ctx)
929 {
930 return 0;
931 }
932
933 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
934 const struct r600_shader_src *shader_src,
935 unsigned chan)
936 {
937 bc_src->sel = shader_src->sel;
938 bc_src->chan = shader_src->swizzle[chan];
939 bc_src->neg = shader_src->neg;
940 bc_src->abs = shader_src->abs;
941 bc_src->rel = shader_src->rel;
942 bc_src->value = shader_src->value[bc_src->chan];
943 }
944
945 static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src)
946 {
947 bc_src->abs = 1;
948 bc_src->neg = 0;
949 }
950
951 static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
952 {
953 bc_src->neg = !bc_src->neg;
954 }
955
956 static void tgsi_dst(struct r600_shader_ctx *ctx,
957 const struct tgsi_full_dst_register *tgsi_dst,
958 unsigned swizzle,
959 struct r600_bc_alu_dst *r600_dst)
960 {
961 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
962
963 r600_dst->sel = tgsi_dst->Register.Index;
964 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
965 r600_dst->chan = swizzle;
966 r600_dst->write = 1;
967 if (tgsi_dst->Register.Indirect)
968 r600_dst->rel = V_SQ_REL_RELATIVE;
969 if (inst->Instruction.Saturate) {
970 r600_dst->clamp = 1;
971 }
972 }
973
974 static int tgsi_last_instruction(unsigned writemask)
975 {
976 int i, lasti = 0;
977
978 for (i = 0; i < 4; i++) {
979 if (writemask & (1 << i)) {
980 lasti = i;
981 }
982 }
983 return lasti;
984 }
985
986 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
987 {
988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
989 struct r600_bc_alu alu;
990 int i, j, r;
991 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
992
993 for (i = 0; i < lasti + 1; i++) {
994 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
995 continue;
996
997 memset(&alu, 0, sizeof(struct r600_bc_alu));
998 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
999
1000 alu.inst = ctx->inst_info->r600_opcode;
1001 if (!swap) {
1002 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1003 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1004 }
1005 } else {
1006 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1007 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1008 }
1009 /* handle some special cases */
1010 switch (ctx->inst_info->tgsi_opcode) {
1011 case TGSI_OPCODE_SUB:
1012 r600_bc_src_toggle_neg(&alu.src[1]);
1013 break;
1014 case TGSI_OPCODE_ABS:
1015 r600_bc_src_set_abs(&alu.src[0]);
1016 break;
1017 default:
1018 break;
1019 }
1020 if (i == lasti) {
1021 alu.last = 1;
1022 }
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026 }
1027 return 0;
1028 }
1029
1030 static int tgsi_op2(struct r600_shader_ctx *ctx)
1031 {
1032 return tgsi_op2_s(ctx, 0);
1033 }
1034
1035 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1036 {
1037 return tgsi_op2_s(ctx, 1);
1038 }
1039
1040 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1041 {
1042 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1043 int i, j, r;
1044 struct r600_bc_alu alu;
1045 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1046
1047 for (i = 0 ; i < last_slot; i++) {
1048 memset(&alu, 0, sizeof(struct r600_bc_alu));
1049 alu.inst = ctx->inst_info->r600_opcode;
1050 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1051 r600_bc_src(&alu.src[j], &ctx->src[j], 0);
1052 }
1053 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1054 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1055
1056 if (i == last_slot - 1)
1057 alu.last = 1;
1058 r = r600_bc_add_alu(ctx->bc, &alu);
1059 if (r)
1060 return r;
1061 }
1062 return 0;
1063 }
1064
1065 /*
1066 * r600 - trunc to -PI..PI range
1067 * r700 - normalize by dividing by 2PI
1068 * see fdo bug 27901
1069 */
1070 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1071 {
1072 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1073 static float double_pi = 3.1415926535 * 2;
1074 static float neg_pi = -3.1415926535;
1075
1076 int r;
1077 struct r600_bc_alu alu;
1078
1079 memset(&alu, 0, sizeof(struct r600_bc_alu));
1080 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1081 alu.is_op3 = 1;
1082
1083 alu.dst.chan = 0;
1084 alu.dst.sel = ctx->temp_reg;
1085 alu.dst.write = 1;
1086
1087 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1088
1089 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1090 alu.src[1].chan = 0;
1091 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1092 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1093 alu.src[2].chan = 0;
1094 alu.last = 1;
1095 r = r600_bc_add_alu(ctx->bc, &alu);
1096 if (r)
1097 return r;
1098
1099 memset(&alu, 0, sizeof(struct r600_bc_alu));
1100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1101
1102 alu.dst.chan = 0;
1103 alu.dst.sel = ctx->temp_reg;
1104 alu.dst.write = 1;
1105
1106 alu.src[0].sel = ctx->temp_reg;
1107 alu.src[0].chan = 0;
1108 alu.last = 1;
1109 r = r600_bc_add_alu(ctx->bc, &alu);
1110 if (r)
1111 return r;
1112
1113 memset(&alu, 0, sizeof(struct r600_bc_alu));
1114 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1115 alu.is_op3 = 1;
1116
1117 alu.dst.chan = 0;
1118 alu.dst.sel = ctx->temp_reg;
1119 alu.dst.write = 1;
1120
1121 alu.src[0].sel = ctx->temp_reg;
1122 alu.src[0].chan = 0;
1123
1124 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1125 alu.src[1].chan = 0;
1126 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1127 alu.src[2].chan = 0;
1128
1129 if (ctx->bc->chip_class == R600) {
1130 alu.src[1].value = *(uint32_t *)&double_pi;
1131 alu.src[2].value = *(uint32_t *)&neg_pi;
1132 } else {
1133 alu.src[1].sel = V_SQ_ALU_SRC_1;
1134 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1135 alu.src[2].neg = 1;
1136 }
1137
1138 alu.last = 1;
1139 r = r600_bc_add_alu(ctx->bc, &alu);
1140 if (r)
1141 return r;
1142 return 0;
1143 }
1144
1145 static int cayman_trig(struct r600_shader_ctx *ctx)
1146 {
1147 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1148 struct r600_bc_alu alu;
1149 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1150 int i, r;
1151
1152 r = tgsi_setup_trig(ctx);
1153 if (r)
1154 return r;
1155
1156
1157 for (i = 0; i < last_slot; i++) {
1158 memset(&alu, 0, sizeof(struct r600_bc_alu));
1159 alu.inst = ctx->inst_info->r600_opcode;
1160 alu.dst.chan = i;
1161
1162 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1163 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1164
1165 alu.src[0].sel = ctx->temp_reg;
1166 alu.src[0].chan = 0;
1167 if (i == last_slot - 1)
1168 alu.last = 1;
1169 r = r600_bc_add_alu(ctx->bc, &alu);
1170 if (r)
1171 return r;
1172 }
1173 return 0;
1174 }
1175
1176 static int tgsi_trig(struct r600_shader_ctx *ctx)
1177 {
1178 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1179 struct r600_bc_alu alu;
1180 int i, r;
1181 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1182
1183 r = tgsi_setup_trig(ctx);
1184 if (r)
1185 return r;
1186
1187 memset(&alu, 0, sizeof(struct r600_bc_alu));
1188 alu.inst = ctx->inst_info->r600_opcode;
1189 alu.dst.chan = 0;
1190 alu.dst.sel = ctx->temp_reg;
1191 alu.dst.write = 1;
1192
1193 alu.src[0].sel = ctx->temp_reg;
1194 alu.src[0].chan = 0;
1195 alu.last = 1;
1196 r = r600_bc_add_alu(ctx->bc, &alu);
1197 if (r)
1198 return r;
1199
1200 /* replicate result */
1201 for (i = 0; i < lasti + 1; i++) {
1202 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1203 continue;
1204
1205 memset(&alu, 0, sizeof(struct r600_bc_alu));
1206 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1207
1208 alu.src[0].sel = ctx->temp_reg;
1209 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1210 if (i == lasti)
1211 alu.last = 1;
1212 r = r600_bc_add_alu(ctx->bc, &alu);
1213 if (r)
1214 return r;
1215 }
1216 return 0;
1217 }
1218
1219 static int tgsi_scs(struct r600_shader_ctx *ctx)
1220 {
1221 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1222 struct r600_bc_alu alu;
1223 int i, r;
1224
1225 /* We'll only need the trig stuff if we are going to write to the
1226 * X or Y components of the destination vector.
1227 */
1228 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1229 r = tgsi_setup_trig(ctx);
1230 if (r)
1231 return r;
1232 }
1233
1234 /* dst.x = COS */
1235 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1236 if (ctx->bc->chip_class == CAYMAN) {
1237 for (i = 0 ; i < 3; i++) {
1238 memset(&alu, 0, sizeof(struct r600_bc_alu));
1239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1240 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1241
1242 if (i == 0)
1243 alu.dst.write = 1;
1244 else
1245 alu.dst.write = 0;
1246 alu.src[0].sel = ctx->temp_reg;
1247 alu.src[0].chan = 0;
1248 if (i == 2)
1249 alu.last = 1;
1250 r = r600_bc_add_alu(ctx->bc, &alu);
1251 if (r)
1252 return r;
1253 }
1254 } else {
1255 memset(&alu, 0, sizeof(struct r600_bc_alu));
1256 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1257 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1258
1259 alu.src[0].sel = ctx->temp_reg;
1260 alu.src[0].chan = 0;
1261 alu.last = 1;
1262 r = r600_bc_add_alu(ctx->bc, &alu);
1263 if (r)
1264 return r;
1265 }
1266 }
1267
1268 /* dst.y = SIN */
1269 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1270 if (ctx->bc->chip_class == CAYMAN) {
1271 for (i = 0 ; i < 3; i++) {
1272 memset(&alu, 0, sizeof(struct r600_bc_alu));
1273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1274 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1275 if (i == 1)
1276 alu.dst.write = 1;
1277 else
1278 alu.dst.write = 0;
1279 alu.src[0].sel = ctx->temp_reg;
1280 alu.src[0].chan = 0;
1281 if (i == 2)
1282 alu.last = 1;
1283 r = r600_bc_add_alu(ctx->bc, &alu);
1284 if (r)
1285 return r;
1286 }
1287 } else {
1288 memset(&alu, 0, sizeof(struct r600_bc_alu));
1289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1290 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1291
1292 alu.src[0].sel = ctx->temp_reg;
1293 alu.src[0].chan = 0;
1294 alu.last = 1;
1295 r = r600_bc_add_alu(ctx->bc, &alu);
1296 if (r)
1297 return r;
1298 }
1299 }
1300
1301 /* dst.z = 0.0; */
1302 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1303 memset(&alu, 0, sizeof(struct r600_bc_alu));
1304
1305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1306
1307 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1308
1309 alu.src[0].sel = V_SQ_ALU_SRC_0;
1310 alu.src[0].chan = 0;
1311
1312 alu.last = 1;
1313
1314 r = r600_bc_add_alu(ctx->bc, &alu);
1315 if (r)
1316 return r;
1317 }
1318
1319 /* dst.w = 1.0; */
1320 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1321 memset(&alu, 0, sizeof(struct r600_bc_alu));
1322
1323 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1324
1325 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1326
1327 alu.src[0].sel = V_SQ_ALU_SRC_1;
1328 alu.src[0].chan = 0;
1329
1330 alu.last = 1;
1331
1332 r = r600_bc_add_alu(ctx->bc, &alu);
1333 if (r)
1334 return r;
1335 }
1336
1337 return 0;
1338 }
1339
1340 static int tgsi_kill(struct r600_shader_ctx *ctx)
1341 {
1342 struct r600_bc_alu alu;
1343 int i, r;
1344
1345 for (i = 0; i < 4; i++) {
1346 memset(&alu, 0, sizeof(struct r600_bc_alu));
1347 alu.inst = ctx->inst_info->r600_opcode;
1348
1349 alu.dst.chan = i;
1350
1351 alu.src[0].sel = V_SQ_ALU_SRC_0;
1352
1353 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1354 alu.src[1].sel = V_SQ_ALU_SRC_1;
1355 alu.src[1].neg = 1;
1356 } else {
1357 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1358 }
1359 if (i == 3) {
1360 alu.last = 1;
1361 }
1362 r = r600_bc_add_alu(ctx->bc, &alu);
1363 if (r)
1364 return r;
1365 }
1366
1367 /* kill must be last in ALU */
1368 ctx->bc->force_add_cf = 1;
1369 ctx->shader->uses_kill = TRUE;
1370 return 0;
1371 }
1372
1373 static int tgsi_lit(struct r600_shader_ctx *ctx)
1374 {
1375 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1376 struct r600_bc_alu alu;
1377 int r;
1378
1379 /* tmp.x = max(src.y, 0.0) */
1380 memset(&alu, 0, sizeof(struct r600_bc_alu));
1381 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1382 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1383 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1384 alu.src[1].chan = 1;
1385
1386 alu.dst.sel = ctx->temp_reg;
1387 alu.dst.chan = 0;
1388 alu.dst.write = 1;
1389
1390 alu.last = 1;
1391 r = r600_bc_add_alu(ctx->bc, &alu);
1392 if (r)
1393 return r;
1394
1395 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1396 {
1397 int chan;
1398 int sel;
1399 int i;
1400
1401 if (ctx->bc->chip_class == CAYMAN) {
1402 for (i = 0; i < 3; i++) {
1403 /* tmp.z = log(tmp.x) */
1404 memset(&alu, 0, sizeof(struct r600_bc_alu));
1405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1406 alu.src[0].sel = ctx->temp_reg;
1407 alu.src[0].chan = 0;
1408 alu.dst.sel = ctx->temp_reg;
1409 alu.dst.chan = i;
1410 if (i == 2) {
1411 alu.dst.write = 1;
1412 alu.last = 1;
1413 } else
1414 alu.dst.write = 0;
1415
1416 r = r600_bc_add_alu(ctx->bc, &alu);
1417 if (r)
1418 return r;
1419 }
1420 } else {
1421 /* tmp.z = log(tmp.x) */
1422 memset(&alu, 0, sizeof(struct r600_bc_alu));
1423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1424 alu.src[0].sel = ctx->temp_reg;
1425 alu.src[0].chan = 0;
1426 alu.dst.sel = ctx->temp_reg;
1427 alu.dst.chan = 2;
1428 alu.dst.write = 1;
1429 alu.last = 1;
1430 r = r600_bc_add_alu(ctx->bc, &alu);
1431 if (r)
1432 return r;
1433 }
1434
1435 chan = alu.dst.chan;
1436 sel = alu.dst.sel;
1437
1438 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1439 memset(&alu, 0, sizeof(struct r600_bc_alu));
1440 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1441 alu.src[0].sel = sel;
1442 alu.src[0].chan = chan;
1443 r600_bc_src(&alu.src[1], &ctx->src[0], 3);
1444 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1445 alu.dst.sel = ctx->temp_reg;
1446 alu.dst.chan = 0;
1447 alu.dst.write = 1;
1448 alu.is_op3 = 1;
1449 alu.last = 1;
1450 r = r600_bc_add_alu(ctx->bc, &alu);
1451 if (r)
1452 return r;
1453
1454 if (ctx->bc->chip_class == CAYMAN) {
1455 for (i = 0; i < 3; i++) {
1456 /* dst.z = exp(tmp.x) */
1457 memset(&alu, 0, sizeof(struct r600_bc_alu));
1458 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1459 alu.src[0].sel = ctx->temp_reg;
1460 alu.src[0].chan = 0;
1461 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1462 if (i == 2) {
1463 alu.dst.write = 1;
1464 alu.last = 1;
1465 } else
1466 alu.dst.write = 0;
1467 r = r600_bc_add_alu(ctx->bc, &alu);
1468 if (r)
1469 return r;
1470 }
1471 } else {
1472 /* dst.z = exp(tmp.x) */
1473 memset(&alu, 0, sizeof(struct r600_bc_alu));
1474 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1475 alu.src[0].sel = ctx->temp_reg;
1476 alu.src[0].chan = 0;
1477 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1478 alu.last = 1;
1479 r = r600_bc_add_alu(ctx->bc, &alu);
1480 if (r)
1481 return r;
1482 }
1483 }
1484
1485 /* dst.x, <- 1.0 */
1486 memset(&alu, 0, sizeof(struct r600_bc_alu));
1487 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1488 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1489 alu.src[0].chan = 0;
1490 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1491 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1492 r = r600_bc_add_alu(ctx->bc, &alu);
1493 if (r)
1494 return r;
1495
1496 /* dst.y = max(src.x, 0.0) */
1497 memset(&alu, 0, sizeof(struct r600_bc_alu));
1498 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1499 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1500 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1501 alu.src[1].chan = 0;
1502 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1503 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1504 r = r600_bc_add_alu(ctx->bc, &alu);
1505 if (r)
1506 return r;
1507
1508 /* dst.w, <- 1.0 */
1509 memset(&alu, 0, sizeof(struct r600_bc_alu));
1510 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1511 alu.src[0].sel = V_SQ_ALU_SRC_1;
1512 alu.src[0].chan = 0;
1513 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1514 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1515 alu.last = 1;
1516 r = r600_bc_add_alu(ctx->bc, &alu);
1517 if (r)
1518 return r;
1519
1520 return 0;
1521 }
1522
1523 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1524 {
1525 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1526 struct r600_bc_alu alu;
1527 int i, r;
1528
1529 memset(&alu, 0, sizeof(struct r600_bc_alu));
1530
1531 /* FIXME:
1532 * For state trackers other than OpenGL, we'll want to use
1533 * _RECIPSQRT_IEEE instead.
1534 */
1535 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1536
1537 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1538 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1539 r600_bc_src_set_abs(&alu.src[i]);
1540 }
1541 alu.dst.sel = ctx->temp_reg;
1542 alu.dst.write = 1;
1543 alu.last = 1;
1544 r = r600_bc_add_alu(ctx->bc, &alu);
1545 if (r)
1546 return r;
1547 /* replicate result */
1548 return tgsi_helper_tempx_replicate(ctx);
1549 }
1550
1551 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1552 {
1553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1554 struct r600_bc_alu alu;
1555 int i, r;
1556
1557 for (i = 0; i < 4; i++) {
1558 memset(&alu, 0, sizeof(struct r600_bc_alu));
1559 alu.src[0].sel = ctx->temp_reg;
1560 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1561 alu.dst.chan = i;
1562 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1563 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1564 if (i == 3)
1565 alu.last = 1;
1566 r = r600_bc_add_alu(ctx->bc, &alu);
1567 if (r)
1568 return r;
1569 }
1570 return 0;
1571 }
1572
1573 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1574 {
1575 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1576 struct r600_bc_alu alu;
1577 int i, r;
1578
1579 memset(&alu, 0, sizeof(struct r600_bc_alu));
1580 alu.inst = ctx->inst_info->r600_opcode;
1581 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1582 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1583 }
1584 alu.dst.sel = ctx->temp_reg;
1585 alu.dst.write = 1;
1586 alu.last = 1;
1587 r = r600_bc_add_alu(ctx->bc, &alu);
1588 if (r)
1589 return r;
1590 /* replicate result */
1591 return tgsi_helper_tempx_replicate(ctx);
1592 }
1593
1594 static int cayman_pow(struct r600_shader_ctx *ctx)
1595 {
1596 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1597 int i, r;
1598 struct r600_bc_alu alu;
1599 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1600
1601 for (i = 0; i < 3; i++) {
1602 memset(&alu, 0, sizeof(struct r600_bc_alu));
1603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1604 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1605 alu.dst.sel = ctx->temp_reg;
1606 alu.dst.chan = i;
1607 alu.dst.write = 1;
1608 if (i == 2)
1609 alu.last = 1;
1610 r = r600_bc_add_alu(ctx->bc, &alu);
1611 if (r)
1612 return r;
1613 }
1614
1615 /* b * LOG2(a) */
1616 memset(&alu, 0, sizeof(struct r600_bc_alu));
1617 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1618 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1619 alu.src[1].sel = ctx->temp_reg;
1620 alu.dst.sel = ctx->temp_reg;
1621 alu.dst.write = 1;
1622 alu.last = 1;
1623 r = r600_bc_add_alu(ctx->bc, &alu);
1624 if (r)
1625 return r;
1626
1627 for (i = 0; i < last_slot; i++) {
1628 /* POW(a,b) = EXP2(b * LOG2(a))*/
1629 memset(&alu, 0, sizeof(struct r600_bc_alu));
1630 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1631 alu.src[0].sel = ctx->temp_reg;
1632
1633 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1634 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1635 if (i == last_slot - 1)
1636 alu.last = 1;
1637 r = r600_bc_add_alu(ctx->bc, &alu);
1638 if (r)
1639 return r;
1640 }
1641 return 0;
1642 }
1643
1644 static int tgsi_pow(struct r600_shader_ctx *ctx)
1645 {
1646 struct r600_bc_alu alu;
1647 int r;
1648
1649 /* LOG2(a) */
1650 memset(&alu, 0, sizeof(struct r600_bc_alu));
1651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1652 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1653 alu.dst.sel = ctx->temp_reg;
1654 alu.dst.write = 1;
1655 alu.last = 1;
1656 r = r600_bc_add_alu(ctx->bc, &alu);
1657 if (r)
1658 return r;
1659 /* b * LOG2(a) */
1660 memset(&alu, 0, sizeof(struct r600_bc_alu));
1661 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1662 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1663 alu.src[1].sel = ctx->temp_reg;
1664 alu.dst.sel = ctx->temp_reg;
1665 alu.dst.write = 1;
1666 alu.last = 1;
1667 r = r600_bc_add_alu(ctx->bc, &alu);
1668 if (r)
1669 return r;
1670 /* POW(a,b) = EXP2(b * LOG2(a))*/
1671 memset(&alu, 0, sizeof(struct r600_bc_alu));
1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1673 alu.src[0].sel = ctx->temp_reg;
1674 alu.dst.sel = ctx->temp_reg;
1675 alu.dst.write = 1;
1676 alu.last = 1;
1677 r = r600_bc_add_alu(ctx->bc, &alu);
1678 if (r)
1679 return r;
1680 return tgsi_helper_tempx_replicate(ctx);
1681 }
1682
1683 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1684 {
1685 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1686 struct r600_bc_alu alu;
1687 int i, r;
1688
1689 /* tmp = (src > 0 ? 1 : src) */
1690 for (i = 0; i < 4; i++) {
1691 memset(&alu, 0, sizeof(struct r600_bc_alu));
1692 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1693 alu.is_op3 = 1;
1694
1695 alu.dst.sel = ctx->temp_reg;
1696 alu.dst.chan = i;
1697
1698 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1699 alu.src[1].sel = V_SQ_ALU_SRC_1;
1700 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1701
1702 if (i == 3)
1703 alu.last = 1;
1704 r = r600_bc_add_alu(ctx->bc, &alu);
1705 if (r)
1706 return r;
1707 }
1708
1709 /* dst = (-tmp > 0 ? -1 : tmp) */
1710 for (i = 0; i < 4; i++) {
1711 memset(&alu, 0, sizeof(struct r600_bc_alu));
1712 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1713 alu.is_op3 = 1;
1714 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1715
1716 alu.src[0].sel = ctx->temp_reg;
1717 alu.src[0].chan = i;
1718 alu.src[0].neg = 1;
1719
1720 alu.src[1].sel = V_SQ_ALU_SRC_1;
1721 alu.src[1].neg = 1;
1722
1723 alu.src[2].sel = ctx->temp_reg;
1724 alu.src[2].chan = i;
1725
1726 if (i == 3)
1727 alu.last = 1;
1728 r = r600_bc_add_alu(ctx->bc, &alu);
1729 if (r)
1730 return r;
1731 }
1732 return 0;
1733 }
1734
1735 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1736 {
1737 struct r600_bc_alu alu;
1738 int i, r;
1739
1740 for (i = 0; i < 4; i++) {
1741 memset(&alu, 0, sizeof(struct r600_bc_alu));
1742 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1743 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1744 alu.dst.chan = i;
1745 } else {
1746 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1747 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1748 alu.src[0].sel = ctx->temp_reg;
1749 alu.src[0].chan = i;
1750 }
1751 if (i == 3) {
1752 alu.last = 1;
1753 }
1754 r = r600_bc_add_alu(ctx->bc, &alu);
1755 if (r)
1756 return r;
1757 }
1758 return 0;
1759 }
1760
1761 static int tgsi_op3(struct r600_shader_ctx *ctx)
1762 {
1763 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1764 struct r600_bc_alu alu;
1765 int i, j, r;
1766 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1767
1768 for (i = 0; i < lasti + 1; i++) {
1769 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1770 continue;
1771
1772 memset(&alu, 0, sizeof(struct r600_bc_alu));
1773 alu.inst = ctx->inst_info->r600_opcode;
1774 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1775 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1776 }
1777
1778 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1779 alu.dst.chan = i;
1780 alu.dst.write = 1;
1781 alu.is_op3 = 1;
1782 if (i == lasti) {
1783 alu.last = 1;
1784 }
1785 r = r600_bc_add_alu(ctx->bc, &alu);
1786 if (r)
1787 return r;
1788 }
1789 return 0;
1790 }
1791
1792 static int tgsi_dp(struct r600_shader_ctx *ctx)
1793 {
1794 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1795 struct r600_bc_alu alu;
1796 int i, j, r;
1797
1798 for (i = 0; i < 4; i++) {
1799 memset(&alu, 0, sizeof(struct r600_bc_alu));
1800 alu.inst = ctx->inst_info->r600_opcode;
1801 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1802 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1803 }
1804
1805 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1806 alu.dst.chan = i;
1807 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1808 /* handle some special cases */
1809 switch (ctx->inst_info->tgsi_opcode) {
1810 case TGSI_OPCODE_DP2:
1811 if (i > 1) {
1812 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1813 alu.src[0].chan = alu.src[1].chan = 0;
1814 }
1815 break;
1816 case TGSI_OPCODE_DP3:
1817 if (i > 2) {
1818 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1819 alu.src[0].chan = alu.src[1].chan = 0;
1820 }
1821 break;
1822 case TGSI_OPCODE_DPH:
1823 if (i == 3) {
1824 alu.src[0].sel = V_SQ_ALU_SRC_1;
1825 alu.src[0].chan = 0;
1826 alu.src[0].neg = 0;
1827 }
1828 break;
1829 default:
1830 break;
1831 }
1832 if (i == 3) {
1833 alu.last = 1;
1834 }
1835 r = r600_bc_add_alu(ctx->bc, &alu);
1836 if (r)
1837 return r;
1838 }
1839 return 0;
1840 }
1841
1842 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1843 unsigned index)
1844 {
1845 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1846 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1847 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1848 ctx->src[index].neg || ctx->src[index].abs;
1849 }
1850
1851 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1852 unsigned index)
1853 {
1854 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1855 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1856 }
1857
1858 static int tgsi_tex(struct r600_shader_ctx *ctx)
1859 {
1860 static float one_point_five = 1.5f;
1861 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1862 struct r600_bc_tex tex;
1863 struct r600_bc_alu alu;
1864 unsigned src_gpr;
1865 int r, i, j;
1866 int opcode;
1867 /* Texture fetch instructions can only use gprs as source.
1868 * Also they cannot negate the source or take the absolute value */
1869 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1870 boolean src_loaded = FALSE;
1871 unsigned sampler_src_reg = 1;
1872
1873 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1874
1875 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1876 /* TGSI moves the sampler to src reg 3 for TXD */
1877 sampler_src_reg = 3;
1878
1879 for (i = 1; i < 3; i++) {
1880 /* set gradients h/v */
1881 memset(&tex, 0, sizeof(struct r600_bc_tex));
1882 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1883 SQ_TEX_INST_SET_GRADIENTS_V;
1884 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1885 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1886
1887 if (tgsi_tex_src_requires_loading(ctx, i)) {
1888 tex.src_gpr = r600_get_temp(ctx);
1889 tex.src_sel_x = 0;
1890 tex.src_sel_y = 1;
1891 tex.src_sel_z = 2;
1892 tex.src_sel_w = 3;
1893
1894 for (j = 0; j < 4; j++) {
1895 memset(&alu, 0, sizeof(struct r600_bc_alu));
1896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1897 r600_bc_src(&alu.src[0], &ctx->src[i], j);
1898 alu.dst.sel = tex.src_gpr;
1899 alu.dst.chan = j;
1900 if (j == 3)
1901 alu.last = 1;
1902 alu.dst.write = 1;
1903 r = r600_bc_add_alu(ctx->bc, &alu);
1904 if (r)
1905 return r;
1906 }
1907
1908 } else {
1909 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1910 tex.src_sel_x = ctx->src[i].swizzle[0];
1911 tex.src_sel_y = ctx->src[i].swizzle[1];
1912 tex.src_sel_z = ctx->src[i].swizzle[2];
1913 tex.src_sel_w = ctx->src[i].swizzle[3];
1914 tex.src_rel = ctx->src[i].rel;
1915 }
1916 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1917 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1918 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1919 tex.coord_type_x = 1;
1920 tex.coord_type_y = 1;
1921 tex.coord_type_z = 1;
1922 tex.coord_type_w = 1;
1923 }
1924 r = r600_bc_add_tex(ctx->bc, &tex);
1925 if (r)
1926 return r;
1927 }
1928 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1929 int out_chan;
1930 /* Add perspective divide */
1931 if (ctx->bc->chip_class == CAYMAN) {
1932 out_chan = 2;
1933 for (i = 0; i < 3; i++) {
1934 memset(&alu, 0, sizeof(struct r600_bc_alu));
1935 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1936 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1937
1938 alu.dst.sel = ctx->temp_reg;
1939 alu.dst.chan = i;
1940 if (i == 2)
1941 alu.last = 1;
1942 if (out_chan == i)
1943 alu.dst.write = 1;
1944 r = r600_bc_add_alu(ctx->bc, &alu);
1945 if (r)
1946 return r;
1947 }
1948
1949 } else {
1950 out_chan = 3;
1951 memset(&alu, 0, sizeof(struct r600_bc_alu));
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1953 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1954
1955 alu.dst.sel = ctx->temp_reg;
1956 alu.dst.chan = out_chan;
1957 alu.last = 1;
1958 alu.dst.write = 1;
1959 r = r600_bc_add_alu(ctx->bc, &alu);
1960 if (r)
1961 return r;
1962 }
1963
1964 for (i = 0; i < 3; i++) {
1965 memset(&alu, 0, sizeof(struct r600_bc_alu));
1966 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1967 alu.src[0].sel = ctx->temp_reg;
1968 alu.src[0].chan = out_chan;
1969 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1970 alu.dst.sel = ctx->temp_reg;
1971 alu.dst.chan = i;
1972 alu.dst.write = 1;
1973 r = r600_bc_add_alu(ctx->bc, &alu);
1974 if (r)
1975 return r;
1976 }
1977 memset(&alu, 0, sizeof(struct r600_bc_alu));
1978 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1979 alu.src[0].sel = V_SQ_ALU_SRC_1;
1980 alu.src[0].chan = 0;
1981 alu.dst.sel = ctx->temp_reg;
1982 alu.dst.chan = 3;
1983 alu.last = 1;
1984 alu.dst.write = 1;
1985 r = r600_bc_add_alu(ctx->bc, &alu);
1986 if (r)
1987 return r;
1988 src_loaded = TRUE;
1989 src_gpr = ctx->temp_reg;
1990 }
1991
1992 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1993 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1994 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1995
1996 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1997 for (i = 0; i < 4; i++) {
1998 memset(&alu, 0, sizeof(struct r600_bc_alu));
1999 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2000 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2001 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2002 alu.dst.sel = ctx->temp_reg;
2003 alu.dst.chan = i;
2004 if (i == 3)
2005 alu.last = 1;
2006 alu.dst.write = 1;
2007 r = r600_bc_add_alu(ctx->bc, &alu);
2008 if (r)
2009 return r;
2010 }
2011
2012 /* tmp1.z = RCP_e(|tmp1.z|) */
2013 if (ctx->bc->chip_class == CAYMAN) {
2014 for (i = 0; i < 3; i++) {
2015 memset(&alu, 0, sizeof(struct r600_bc_alu));
2016 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2017 alu.src[0].sel = ctx->temp_reg;
2018 alu.src[0].chan = 2;
2019 alu.src[0].abs = 1;
2020 alu.dst.sel = ctx->temp_reg;
2021 alu.dst.chan = i;
2022 if (i == 2)
2023 alu.dst.write = 1;
2024 if (i == 2)
2025 alu.last = 1;
2026 r = r600_bc_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029 }
2030 } else {
2031 memset(&alu, 0, sizeof(struct r600_bc_alu));
2032 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2033 alu.src[0].sel = ctx->temp_reg;
2034 alu.src[0].chan = 2;
2035 alu.src[0].abs = 1;
2036 alu.dst.sel = ctx->temp_reg;
2037 alu.dst.chan = 2;
2038 alu.dst.write = 1;
2039 alu.last = 1;
2040 r = r600_bc_add_alu(ctx->bc, &alu);
2041 if (r)
2042 return r;
2043 }
2044
2045 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2046 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2047 * muladd has no writemask, have to use another temp
2048 */
2049 memset(&alu, 0, sizeof(struct r600_bc_alu));
2050 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2051 alu.is_op3 = 1;
2052
2053 alu.src[0].sel = ctx->temp_reg;
2054 alu.src[0].chan = 0;
2055 alu.src[1].sel = ctx->temp_reg;
2056 alu.src[1].chan = 2;
2057
2058 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2059 alu.src[2].chan = 0;
2060 alu.src[2].value = *(uint32_t *)&one_point_five;
2061
2062 alu.dst.sel = ctx->temp_reg;
2063 alu.dst.chan = 0;
2064 alu.dst.write = 1;
2065
2066 r = r600_bc_add_alu(ctx->bc, &alu);
2067 if (r)
2068 return r;
2069
2070 memset(&alu, 0, sizeof(struct r600_bc_alu));
2071 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2072 alu.is_op3 = 1;
2073
2074 alu.src[0].sel = ctx->temp_reg;
2075 alu.src[0].chan = 1;
2076 alu.src[1].sel = ctx->temp_reg;
2077 alu.src[1].chan = 2;
2078
2079 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2080 alu.src[2].chan = 0;
2081 alu.src[2].value = *(uint32_t *)&one_point_five;
2082
2083 alu.dst.sel = ctx->temp_reg;
2084 alu.dst.chan = 1;
2085 alu.dst.write = 1;
2086
2087 alu.last = 1;
2088 r = r600_bc_add_alu(ctx->bc, &alu);
2089 if (r)
2090 return r;
2091
2092 src_loaded = TRUE;
2093 src_gpr = ctx->temp_reg;
2094 }
2095
2096 if (src_requires_loading && !src_loaded) {
2097 for (i = 0; i < 4; i++) {
2098 memset(&alu, 0, sizeof(struct r600_bc_alu));
2099 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2100 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2101 alu.dst.sel = ctx->temp_reg;
2102 alu.dst.chan = i;
2103 if (i == 3)
2104 alu.last = 1;
2105 alu.dst.write = 1;
2106 r = r600_bc_add_alu(ctx->bc, &alu);
2107 if (r)
2108 return r;
2109 }
2110 src_loaded = TRUE;
2111 src_gpr = ctx->temp_reg;
2112 }
2113
2114 opcode = ctx->inst_info->r600_opcode;
2115 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
2116 switch (opcode) {
2117 case SQ_TEX_INST_SAMPLE:
2118 opcode = SQ_TEX_INST_SAMPLE_C;
2119 break;
2120 case SQ_TEX_INST_SAMPLE_L:
2121 opcode = SQ_TEX_INST_SAMPLE_C_L;
2122 break;
2123 case SQ_TEX_INST_SAMPLE_G:
2124 opcode = SQ_TEX_INST_SAMPLE_C_G;
2125 break;
2126 }
2127 }
2128
2129 memset(&tex, 0, sizeof(struct r600_bc_tex));
2130 tex.inst = opcode;
2131
2132 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2133 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2134 tex.src_gpr = src_gpr;
2135 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2136 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2137 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2138 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2139 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2140 if (src_loaded) {
2141 tex.src_sel_x = 0;
2142 tex.src_sel_y = 1;
2143 tex.src_sel_z = 2;
2144 tex.src_sel_w = 3;
2145 } else {
2146 tex.src_sel_x = ctx->src[0].swizzle[0];
2147 tex.src_sel_y = ctx->src[0].swizzle[1];
2148 tex.src_sel_z = ctx->src[0].swizzle[2];
2149 tex.src_sel_w = ctx->src[0].swizzle[3];
2150 tex.src_rel = ctx->src[0].rel;
2151 }
2152
2153 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2154 tex.src_sel_x = 1;
2155 tex.src_sel_y = 0;
2156 tex.src_sel_z = 3;
2157 tex.src_sel_w = 1;
2158 }
2159
2160 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2161 tex.coord_type_x = 1;
2162 tex.coord_type_y = 1;
2163 tex.coord_type_z = 1;
2164 tex.coord_type_w = 1;
2165 }
2166
2167 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
2168 tex.coord_type_z = 0;
2169 tex.src_sel_z = tex.src_sel_y;
2170 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
2171 tex.coord_type_z = 0;
2172
2173 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2174 tex.src_sel_w = tex.src_sel_z;
2175
2176 r = r600_bc_add_tex(ctx->bc, &tex);
2177 if (r)
2178 return r;
2179
2180 /* add shadow ambient support - gallium doesn't do it yet */
2181 return 0;
2182 }
2183
2184 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2185 {
2186 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2187 struct r600_bc_alu alu;
2188 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2189 unsigned i;
2190 int r;
2191
2192 /* optimize if it's just an equal balance */
2193 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2194 for (i = 0; i < lasti + 1; i++) {
2195 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2196 continue;
2197
2198 memset(&alu, 0, sizeof(struct r600_bc_alu));
2199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2200 r600_bc_src(&alu.src[0], &ctx->src[1], i);
2201 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2202 alu.omod = 3;
2203 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2204 alu.dst.chan = i;
2205 if (i == lasti) {
2206 alu.last = 1;
2207 }
2208 r = r600_bc_add_alu(ctx->bc, &alu);
2209 if (r)
2210 return r;
2211 }
2212 return 0;
2213 }
2214
2215 /* 1 - src0 */
2216 for (i = 0; i < lasti + 1; i++) {
2217 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2218 continue;
2219
2220 memset(&alu, 0, sizeof(struct r600_bc_alu));
2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2222 alu.src[0].sel = V_SQ_ALU_SRC_1;
2223 alu.src[0].chan = 0;
2224 r600_bc_src(&alu.src[1], &ctx->src[0], i);
2225 r600_bc_src_toggle_neg(&alu.src[1]);
2226 alu.dst.sel = ctx->temp_reg;
2227 alu.dst.chan = i;
2228 if (i == lasti) {
2229 alu.last = 1;
2230 }
2231 alu.dst.write = 1;
2232 r = r600_bc_add_alu(ctx->bc, &alu);
2233 if (r)
2234 return r;
2235 }
2236
2237 /* (1 - src0) * src2 */
2238 for (i = 0; i < lasti + 1; i++) {
2239 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2240 continue;
2241
2242 memset(&alu, 0, sizeof(struct r600_bc_alu));
2243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2244 alu.src[0].sel = ctx->temp_reg;
2245 alu.src[0].chan = i;
2246 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2247 alu.dst.sel = ctx->temp_reg;
2248 alu.dst.chan = i;
2249 if (i == lasti) {
2250 alu.last = 1;
2251 }
2252 alu.dst.write = 1;
2253 r = r600_bc_add_alu(ctx->bc, &alu);
2254 if (r)
2255 return r;
2256 }
2257
2258 /* src0 * src1 + (1 - src0) * src2 */
2259 for (i = 0; i < lasti + 1; i++) {
2260 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2261 continue;
2262
2263 memset(&alu, 0, sizeof(struct r600_bc_alu));
2264 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2265 alu.is_op3 = 1;
2266 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2267 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2268 alu.src[2].sel = ctx->temp_reg;
2269 alu.src[2].chan = i;
2270
2271 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2272 alu.dst.chan = i;
2273 if (i == lasti) {
2274 alu.last = 1;
2275 }
2276 r = r600_bc_add_alu(ctx->bc, &alu);
2277 if (r)
2278 return r;
2279 }
2280 return 0;
2281 }
2282
2283 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2284 {
2285 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2286 struct r600_bc_alu alu;
2287 int i, r;
2288 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2289
2290 for (i = 0; i < lasti + 1; i++) {
2291 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2292 continue;
2293
2294 memset(&alu, 0, sizeof(struct r600_bc_alu));
2295 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2296 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2297 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2298 r600_bc_src(&alu.src[2], &ctx->src[1], i);
2299 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2300 alu.dst.chan = i;
2301 alu.dst.write = 1;
2302 alu.is_op3 = 1;
2303 if (i == lasti)
2304 alu.last = 1;
2305 r = r600_bc_add_alu(ctx->bc, &alu);
2306 if (r)
2307 return r;
2308 }
2309 return 0;
2310 }
2311
2312 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2313 {
2314 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2315 static const unsigned int src0_swizzle[] = {2, 0, 1};
2316 static const unsigned int src1_swizzle[] = {1, 2, 0};
2317 struct r600_bc_alu alu;
2318 uint32_t use_temp = 0;
2319 int i, r;
2320
2321 if (inst->Dst[0].Register.WriteMask != 0xf)
2322 use_temp = 1;
2323
2324 for (i = 0; i < 4; i++) {
2325 memset(&alu, 0, sizeof(struct r600_bc_alu));
2326 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2327 if (i < 3) {
2328 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2329 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2330 } else {
2331 alu.src[0].sel = V_SQ_ALU_SRC_0;
2332 alu.src[0].chan = i;
2333 alu.src[1].sel = V_SQ_ALU_SRC_0;
2334 alu.src[1].chan = i;
2335 }
2336
2337 alu.dst.sel = ctx->temp_reg;
2338 alu.dst.chan = i;
2339 alu.dst.write = 1;
2340
2341 if (i == 3)
2342 alu.last = 1;
2343 r = r600_bc_add_alu(ctx->bc, &alu);
2344 if (r)
2345 return r;
2346 }
2347
2348 for (i = 0; i < 4; i++) {
2349 memset(&alu, 0, sizeof(struct r600_bc_alu));
2350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2351
2352 if (i < 3) {
2353 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2354 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2355 } else {
2356 alu.src[0].sel = V_SQ_ALU_SRC_0;
2357 alu.src[0].chan = i;
2358 alu.src[1].sel = V_SQ_ALU_SRC_0;
2359 alu.src[1].chan = i;
2360 }
2361
2362 alu.src[2].sel = ctx->temp_reg;
2363 alu.src[2].neg = 1;
2364 alu.src[2].chan = i;
2365
2366 if (use_temp)
2367 alu.dst.sel = ctx->temp_reg;
2368 else
2369 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2370 alu.dst.chan = i;
2371 alu.dst.write = 1;
2372 alu.is_op3 = 1;
2373 if (i == 3)
2374 alu.last = 1;
2375 r = r600_bc_add_alu(ctx->bc, &alu);
2376 if (r)
2377 return r;
2378 }
2379 if (use_temp)
2380 return tgsi_helper_copy(ctx, inst);
2381 return 0;
2382 }
2383
2384 static int tgsi_exp(struct r600_shader_ctx *ctx)
2385 {
2386 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2387 struct r600_bc_alu alu;
2388 int r;
2389 int i;
2390
2391 /* result.x = 2^floor(src); */
2392 if (inst->Dst[0].Register.WriteMask & 1) {
2393 memset(&alu, 0, sizeof(struct r600_bc_alu));
2394
2395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2396 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2397
2398 alu.dst.sel = ctx->temp_reg;
2399 alu.dst.chan = 0;
2400 alu.dst.write = 1;
2401 alu.last = 1;
2402 r = r600_bc_add_alu(ctx->bc, &alu);
2403 if (r)
2404 return r;
2405
2406 if (ctx->bc->chip_class == CAYMAN) {
2407 for (i = 0; i < 3; i++) {
2408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2409 alu.src[0].sel = ctx->temp_reg;
2410 alu.src[0].chan = 0;
2411
2412 alu.dst.sel = ctx->temp_reg;
2413 alu.dst.chan = i;
2414 if (i == 0)
2415 alu.dst.write = 1;
2416 if (i == 2)
2417 alu.last = 1;
2418 r = r600_bc_add_alu(ctx->bc, &alu);
2419 if (r)
2420 return r;
2421 }
2422 } else {
2423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2424 alu.src[0].sel = ctx->temp_reg;
2425 alu.src[0].chan = 0;
2426
2427 alu.dst.sel = ctx->temp_reg;
2428 alu.dst.chan = 0;
2429 alu.dst.write = 1;
2430 alu.last = 1;
2431 r = r600_bc_add_alu(ctx->bc, &alu);
2432 if (r)
2433 return r;
2434 }
2435 }
2436
2437 /* result.y = tmp - floor(tmp); */
2438 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2439 memset(&alu, 0, sizeof(struct r600_bc_alu));
2440
2441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2442 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2443
2444 alu.dst.sel = ctx->temp_reg;
2445 #if 0
2446 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2447 if (r)
2448 return r;
2449 #endif
2450 alu.dst.write = 1;
2451 alu.dst.chan = 1;
2452
2453 alu.last = 1;
2454
2455 r = r600_bc_add_alu(ctx->bc, &alu);
2456 if (r)
2457 return r;
2458 }
2459
2460 /* result.z = RoughApprox2ToX(tmp);*/
2461 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2462 if (ctx->bc->chip_class == CAYMAN) {
2463 for (i = 0; i < 3; i++) {
2464 memset(&alu, 0, sizeof(struct r600_bc_alu));
2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2466 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2467
2468 alu.dst.sel = ctx->temp_reg;
2469 alu.dst.chan = i;
2470 if (i == 2) {
2471 alu.dst.write = 1;
2472 alu.last = 1;
2473 }
2474
2475 r = r600_bc_add_alu(ctx->bc, &alu);
2476 if (r)
2477 return r;
2478 }
2479 } else {
2480 memset(&alu, 0, sizeof(struct r600_bc_alu));
2481 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2482 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2483
2484 alu.dst.sel = ctx->temp_reg;
2485 alu.dst.write = 1;
2486 alu.dst.chan = 2;
2487
2488 alu.last = 1;
2489
2490 r = r600_bc_add_alu(ctx->bc, &alu);
2491 if (r)
2492 return r;
2493 }
2494 }
2495
2496 /* result.w = 1.0;*/
2497 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2498 memset(&alu, 0, sizeof(struct r600_bc_alu));
2499
2500 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2501 alu.src[0].sel = V_SQ_ALU_SRC_1;
2502 alu.src[0].chan = 0;
2503
2504 alu.dst.sel = ctx->temp_reg;
2505 alu.dst.chan = 3;
2506 alu.dst.write = 1;
2507 alu.last = 1;
2508 r = r600_bc_add_alu(ctx->bc, &alu);
2509 if (r)
2510 return r;
2511 }
2512 return tgsi_helper_copy(ctx, inst);
2513 }
2514
2515 static int tgsi_log(struct r600_shader_ctx *ctx)
2516 {
2517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2518 struct r600_bc_alu alu;
2519 int r;
2520 int i;
2521
2522 /* result.x = floor(log2(|src|)); */
2523 if (inst->Dst[0].Register.WriteMask & 1) {
2524 if (ctx->bc->chip_class == CAYMAN) {
2525 for (i = 0; i < 3; i++) {
2526 memset(&alu, 0, sizeof(struct r600_bc_alu));
2527
2528 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2529 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2530 r600_bc_src_set_abs(&alu.src[0]);
2531
2532 alu.dst.sel = ctx->temp_reg;
2533 alu.dst.chan = i;
2534 if (i == 0)
2535 alu.dst.write = 1;
2536 if (i == 2)
2537 alu.last = 1;
2538 r = r600_bc_add_alu(ctx->bc, &alu);
2539 if (r)
2540 return r;
2541 }
2542
2543 } else {
2544 memset(&alu, 0, sizeof(struct r600_bc_alu));
2545
2546 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2547 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2548 r600_bc_src_set_abs(&alu.src[0]);
2549
2550 alu.dst.sel = ctx->temp_reg;
2551 alu.dst.chan = 0;
2552 alu.dst.write = 1;
2553 alu.last = 1;
2554 r = r600_bc_add_alu(ctx->bc, &alu);
2555 if (r)
2556 return r;
2557 }
2558
2559 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2560 alu.src[0].sel = ctx->temp_reg;
2561 alu.src[0].chan = 0;
2562
2563 alu.dst.sel = ctx->temp_reg;
2564 alu.dst.chan = 0;
2565 alu.dst.write = 1;
2566 alu.last = 1;
2567
2568 r = r600_bc_add_alu(ctx->bc, &alu);
2569 if (r)
2570 return r;
2571 }
2572
2573 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2574 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2575
2576 if (ctx->bc->chip_class == CAYMAN) {
2577 for (i = 0; i < 3; i++) {
2578 memset(&alu, 0, sizeof(struct r600_bc_alu));
2579
2580 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2581 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2582 r600_bc_src_set_abs(&alu.src[0]);
2583
2584 alu.dst.sel = ctx->temp_reg;
2585 alu.dst.chan = i;
2586 if (i == 1)
2587 alu.dst.write = 1;
2588 if (i == 2)
2589 alu.last = 1;
2590
2591 r = r600_bc_add_alu(ctx->bc, &alu);
2592 if (r)
2593 return r;
2594 }
2595 } else {
2596 memset(&alu, 0, sizeof(struct r600_bc_alu));
2597
2598 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2599 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2600 r600_bc_src_set_abs(&alu.src[0]);
2601
2602 alu.dst.sel = ctx->temp_reg;
2603 alu.dst.chan = 1;
2604 alu.dst.write = 1;
2605 alu.last = 1;
2606
2607 r = r600_bc_add_alu(ctx->bc, &alu);
2608 if (r)
2609 return r;
2610 }
2611
2612 memset(&alu, 0, sizeof(struct r600_bc_alu));
2613
2614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2615 alu.src[0].sel = ctx->temp_reg;
2616 alu.src[0].chan = 1;
2617
2618 alu.dst.sel = ctx->temp_reg;
2619 alu.dst.chan = 1;
2620 alu.dst.write = 1;
2621 alu.last = 1;
2622
2623 r = r600_bc_add_alu(ctx->bc, &alu);
2624 if (r)
2625 return r;
2626
2627 if (ctx->bc->chip_class == CAYMAN) {
2628 for (i = 0; i < 3; i++) {
2629 memset(&alu, 0, sizeof(struct r600_bc_alu));
2630 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2631 alu.src[0].sel = ctx->temp_reg;
2632 alu.src[0].chan = 1;
2633
2634 alu.dst.sel = ctx->temp_reg;
2635 alu.dst.chan = i;
2636 if (i == 1)
2637 alu.dst.write = 1;
2638 if (i == 2)
2639 alu.last = 1;
2640
2641 r = r600_bc_add_alu(ctx->bc, &alu);
2642 if (r)
2643 return r;
2644 }
2645 } else {
2646 memset(&alu, 0, sizeof(struct r600_bc_alu));
2647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2648 alu.src[0].sel = ctx->temp_reg;
2649 alu.src[0].chan = 1;
2650
2651 alu.dst.sel = ctx->temp_reg;
2652 alu.dst.chan = 1;
2653 alu.dst.write = 1;
2654 alu.last = 1;
2655
2656 r = r600_bc_add_alu(ctx->bc, &alu);
2657 if (r)
2658 return r;
2659 }
2660
2661 if (ctx->bc->chip_class == CAYMAN) {
2662 for (i = 0; i < 3; i++) {
2663 memset(&alu, 0, sizeof(struct r600_bc_alu));
2664 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2665 alu.src[0].sel = ctx->temp_reg;
2666 alu.src[0].chan = 1;
2667
2668 alu.dst.sel = ctx->temp_reg;
2669 alu.dst.chan = i;
2670 if (i == 1)
2671 alu.dst.write = 1;
2672 if (i == 2)
2673 alu.last = 1;
2674
2675 r = r600_bc_add_alu(ctx->bc, &alu);
2676 if (r)
2677 return r;
2678 }
2679 } else {
2680 memset(&alu, 0, sizeof(struct r600_bc_alu));
2681 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2682 alu.src[0].sel = ctx->temp_reg;
2683 alu.src[0].chan = 1;
2684
2685 alu.dst.sel = ctx->temp_reg;
2686 alu.dst.chan = 1;
2687 alu.dst.write = 1;
2688 alu.last = 1;
2689
2690 r = r600_bc_add_alu(ctx->bc, &alu);
2691 if (r)
2692 return r;
2693 }
2694
2695 memset(&alu, 0, sizeof(struct r600_bc_alu));
2696
2697 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2698
2699 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2700 r600_bc_src_set_abs(&alu.src[0]);
2701
2702 alu.src[1].sel = ctx->temp_reg;
2703 alu.src[1].chan = 1;
2704
2705 alu.dst.sel = ctx->temp_reg;
2706 alu.dst.chan = 1;
2707 alu.dst.write = 1;
2708 alu.last = 1;
2709
2710 r = r600_bc_add_alu(ctx->bc, &alu);
2711 if (r)
2712 return r;
2713 }
2714
2715 /* result.z = log2(|src|);*/
2716 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2717 if (ctx->bc->chip_class == CAYMAN) {
2718 for (i = 0; i < 3; i++) {
2719 memset(&alu, 0, sizeof(struct r600_bc_alu));
2720
2721 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2722 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2723 r600_bc_src_set_abs(&alu.src[0]);
2724
2725 alu.dst.sel = ctx->temp_reg;
2726 if (i == 2)
2727 alu.dst.write = 1;
2728 alu.dst.chan = i;
2729 if (i == 2)
2730 alu.last = 1;
2731
2732 r = r600_bc_add_alu(ctx->bc, &alu);
2733 if (r)
2734 return r;
2735 }
2736 } else {
2737 memset(&alu, 0, sizeof(struct r600_bc_alu));
2738
2739 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2740 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2741 r600_bc_src_set_abs(&alu.src[0]);
2742
2743 alu.dst.sel = ctx->temp_reg;
2744 alu.dst.write = 1;
2745 alu.dst.chan = 2;
2746 alu.last = 1;
2747
2748 r = r600_bc_add_alu(ctx->bc, &alu);
2749 if (r)
2750 return r;
2751 }
2752 }
2753
2754 /* result.w = 1.0; */
2755 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2756 memset(&alu, 0, sizeof(struct r600_bc_alu));
2757
2758 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2759 alu.src[0].sel = V_SQ_ALU_SRC_1;
2760 alu.src[0].chan = 0;
2761
2762 alu.dst.sel = ctx->temp_reg;
2763 alu.dst.chan = 3;
2764 alu.dst.write = 1;
2765 alu.last = 1;
2766
2767 r = r600_bc_add_alu(ctx->bc, &alu);
2768 if (r)
2769 return r;
2770 }
2771
2772 return tgsi_helper_copy(ctx, inst);
2773 }
2774
2775 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2776 {
2777 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2778 struct r600_bc_alu alu;
2779 int r;
2780
2781 memset(&alu, 0, sizeof(struct r600_bc_alu));
2782
2783 switch (inst->Instruction.Opcode) {
2784 case TGSI_OPCODE_ARL:
2785 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2786 break;
2787 case TGSI_OPCODE_ARR:
2788 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2789 break;
2790 default:
2791 assert(0);
2792 return -1;
2793 }
2794
2795 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2796 alu.last = 1;
2797 alu.dst.sel = ctx->ar_reg;
2798 alu.dst.write = 1;
2799 r = r600_bc_add_alu(ctx->bc, &alu);
2800 if (r)
2801 return r;
2802
2803 /* TODO: Note that the MOVA can be avoided if we never use AR for
2804 * indexing non-CB registers in the current ALU clause. Similarly, we
2805 * need to load AR from ar_reg again if we started a new clause
2806 * between ARL and AR usage. The easy way to do that is to remove
2807 * the MOVA here, and load it for the first AR access after ar_reg
2808 * has been modified in each clause. */
2809 memset(&alu, 0, sizeof(struct r600_bc_alu));
2810 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2811 alu.src[0].sel = ctx->ar_reg;
2812 alu.src[0].chan = 0;
2813 alu.last = 1;
2814 r = r600_bc_add_alu(ctx->bc, &alu);
2815 if (r)
2816 return r;
2817 return 0;
2818 }
2819 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2820 {
2821 /* TODO from r600c, ar values don't persist between clauses */
2822 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2823 struct r600_bc_alu alu;
2824 int r;
2825
2826 switch (inst->Instruction.Opcode) {
2827 case TGSI_OPCODE_ARL:
2828 memset(&alu, 0, sizeof(alu));
2829 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2830 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2831 alu.dst.sel = ctx->ar_reg;
2832 alu.dst.write = 1;
2833 alu.last = 1;
2834
2835 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2836 return r;
2837
2838 memset(&alu, 0, sizeof(alu));
2839 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2840 alu.src[0].sel = ctx->ar_reg;
2841 alu.dst.sel = ctx->ar_reg;
2842 alu.dst.write = 1;
2843 alu.last = 1;
2844
2845 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2846 return r;
2847 break;
2848 case TGSI_OPCODE_ARR:
2849 memset(&alu, 0, sizeof(alu));
2850 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2851 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2852 alu.dst.sel = ctx->ar_reg;
2853 alu.dst.write = 1;
2854 alu.last = 1;
2855
2856 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2857 return r;
2858 break;
2859 default:
2860 assert(0);
2861 return -1;
2862 }
2863
2864 memset(&alu, 0, sizeof(alu));
2865 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2866 alu.src[0].sel = ctx->ar_reg;
2867 alu.last = 1;
2868
2869 r = r600_bc_add_alu(ctx->bc, &alu);
2870 if (r)
2871 return r;
2872 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2873 return 0;
2874 }
2875
2876 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2877 {
2878 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2879 struct r600_bc_alu alu;
2880 int i, r = 0;
2881
2882 for (i = 0; i < 4; i++) {
2883 memset(&alu, 0, sizeof(struct r600_bc_alu));
2884
2885 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2886 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2887
2888 if (i == 0 || i == 3) {
2889 alu.src[0].sel = V_SQ_ALU_SRC_1;
2890 } else {
2891 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2892 }
2893
2894 if (i == 0 || i == 2) {
2895 alu.src[1].sel = V_SQ_ALU_SRC_1;
2896 } else {
2897 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2898 }
2899 if (i == 3)
2900 alu.last = 1;
2901 r = r600_bc_add_alu(ctx->bc, &alu);
2902 if (r)
2903 return r;
2904 }
2905 return 0;
2906 }
2907
2908 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2909 {
2910 struct r600_bc_alu alu;
2911 int r;
2912
2913 memset(&alu, 0, sizeof(struct r600_bc_alu));
2914 alu.inst = opcode;
2915 alu.predicate = 1;
2916
2917 alu.dst.sel = ctx->temp_reg;
2918 alu.dst.write = 1;
2919 alu.dst.chan = 0;
2920
2921 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2922 alu.src[1].sel = V_SQ_ALU_SRC_0;
2923 alu.src[1].chan = 0;
2924
2925 alu.last = 1;
2926
2927 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2928 if (r)
2929 return r;
2930 return 0;
2931 }
2932
2933 static int pops(struct r600_shader_ctx *ctx, int pops)
2934 {
2935 unsigned force_pop = ctx->bc->force_add_cf;
2936
2937 if (!force_pop) {
2938 int alu_pop = 3;
2939 if (ctx->bc->cf_last) {
2940 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2941 alu_pop = 0;
2942 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2943 alu_pop = 1;
2944 }
2945 alu_pop += pops;
2946 if (alu_pop == 1) {
2947 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2948 ctx->bc->force_add_cf = 1;
2949 } else if (alu_pop == 2) {
2950 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2951 ctx->bc->force_add_cf = 1;
2952 } else {
2953 force_pop = 1;
2954 }
2955 }
2956
2957 if (force_pop) {
2958 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2959 ctx->bc->cf_last->pop_count = pops;
2960 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2961 }
2962
2963 return 0;
2964 }
2965
2966 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2967 {
2968 switch(reason) {
2969 case FC_PUSH_VPM:
2970 ctx->bc->callstack[ctx->bc->call_sp].current--;
2971 break;
2972 case FC_PUSH_WQM:
2973 case FC_LOOP:
2974 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2975 break;
2976 case FC_REP:
2977 /* TOODO : for 16 vp asic should -= 2; */
2978 ctx->bc->callstack[ctx->bc->call_sp].current --;
2979 break;
2980 }
2981 }
2982
2983 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2984 {
2985 if (check_max_only) {
2986 int diff;
2987 switch (reason) {
2988 case FC_PUSH_VPM:
2989 diff = 1;
2990 break;
2991 case FC_PUSH_WQM:
2992 diff = 4;
2993 break;
2994 default:
2995 assert(0);
2996 diff = 0;
2997 }
2998 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2999 ctx->bc->callstack[ctx->bc->call_sp].max) {
3000 ctx->bc->callstack[ctx->bc->call_sp].max =
3001 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3002 }
3003 return;
3004 }
3005 switch (reason) {
3006 case FC_PUSH_VPM:
3007 ctx->bc->callstack[ctx->bc->call_sp].current++;
3008 break;
3009 case FC_PUSH_WQM:
3010 case FC_LOOP:
3011 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3012 break;
3013 case FC_REP:
3014 ctx->bc->callstack[ctx->bc->call_sp].current++;
3015 break;
3016 }
3017
3018 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3019 ctx->bc->callstack[ctx->bc->call_sp].max) {
3020 ctx->bc->callstack[ctx->bc->call_sp].max =
3021 ctx->bc->callstack[ctx->bc->call_sp].current;
3022 }
3023 }
3024
3025 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3026 {
3027 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3028
3029 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
3030 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
3031 sp->mid[sp->num_mid] = ctx->bc->cf_last;
3032 sp->num_mid++;
3033 }
3034
3035 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3036 {
3037 ctx->bc->fc_sp++;
3038 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3039 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3040 }
3041
3042 static void fc_poplevel(struct r600_shader_ctx *ctx)
3043 {
3044 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3045 if (sp->mid) {
3046 free(sp->mid);
3047 sp->mid = NULL;
3048 }
3049 sp->num_mid = 0;
3050 sp->start = NULL;
3051 sp->type = 0;
3052 ctx->bc->fc_sp--;
3053 }
3054
3055 #if 0
3056 static int emit_return(struct r600_shader_ctx *ctx)
3057 {
3058 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3059 return 0;
3060 }
3061
3062 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3063 {
3064
3065 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3066 ctx->bc->cf_last->pop_count = pops;
3067 /* TODO work out offset */
3068 return 0;
3069 }
3070
3071 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3072 {
3073 return 0;
3074 }
3075
3076 static void emit_testflag(struct r600_shader_ctx *ctx)
3077 {
3078
3079 }
3080
3081 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3082 {
3083 emit_testflag(ctx);
3084 emit_jump_to_offset(ctx, 1, 4);
3085 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3086 pops(ctx, ifidx + 1);
3087 emit_return(ctx);
3088 }
3089
3090 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3091 {
3092 emit_testflag(ctx);
3093
3094 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3095 ctx->bc->cf_last->pop_count = 1;
3096
3097 fc_set_mid(ctx, fc_sp);
3098
3099 pops(ctx, 1);
3100 }
3101 #endif
3102
3103 static int tgsi_if(struct r600_shader_ctx *ctx)
3104 {
3105 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3106
3107 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3108
3109 fc_pushlevel(ctx, FC_IF);
3110
3111 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3112 return 0;
3113 }
3114
3115 static int tgsi_else(struct r600_shader_ctx *ctx)
3116 {
3117 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3118 ctx->bc->cf_last->pop_count = 1;
3119
3120 fc_set_mid(ctx, ctx->bc->fc_sp);
3121 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3122 return 0;
3123 }
3124
3125 static int tgsi_endif(struct r600_shader_ctx *ctx)
3126 {
3127 pops(ctx, 1);
3128 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3129 R600_ERR("if/endif unbalanced in shader\n");
3130 return -1;
3131 }
3132
3133 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3134 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3135 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3136 } else {
3137 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3138 }
3139 fc_poplevel(ctx);
3140
3141 callstack_decrease_current(ctx, FC_PUSH_VPM);
3142 return 0;
3143 }
3144
3145 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3146 {
3147 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3148
3149 fc_pushlevel(ctx, FC_LOOP);
3150
3151 /* check stack depth */
3152 callstack_check_depth(ctx, FC_LOOP, 0);
3153 return 0;
3154 }
3155
3156 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3157 {
3158 int i;
3159
3160 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3161
3162 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3163 R600_ERR("loop/endloop in shader code are not paired.\n");
3164 return -EINVAL;
3165 }
3166
3167 /* fixup loop pointers - from r600isa
3168 LOOP END points to CF after LOOP START,
3169 LOOP START point to CF after LOOP END
3170 BRK/CONT point to LOOP END CF
3171 */
3172 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3173
3174 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3175
3176 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3177 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3178 }
3179 /* TODO add LOOPRET support */
3180 fc_poplevel(ctx);
3181 callstack_decrease_current(ctx, FC_LOOP);
3182 return 0;
3183 }
3184
3185 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3186 {
3187 unsigned int fscp;
3188
3189 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3190 {
3191 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3192 break;
3193 }
3194
3195 if (fscp == 0) {
3196 R600_ERR("Break not inside loop/endloop pair\n");
3197 return -EINVAL;
3198 }
3199
3200 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3201 ctx->bc->cf_last->pop_count = 1;
3202
3203 fc_set_mid(ctx, fscp);
3204
3205 pops(ctx, 1);
3206 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3207 return 0;
3208 }
3209
3210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3211 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3212 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3213 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3214
3215 /* FIXME:
3216 * For state trackers other than OpenGL, we'll want to use
3217 * _RECIP_IEEE instead.
3218 */
3219 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3220
3221 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3222 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3223 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3224 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3225 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3226 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3227 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3228 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3229 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3230 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3231 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3232 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3233 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3234 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3235 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3236 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3237 /* gap */
3238 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240 /* gap */
3241 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3244 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3246 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3248 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3249 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3250 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3251 /* gap */
3252 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3254 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3256 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3257 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3258 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3259 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3260 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3266 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3268 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3269 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3270 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3271 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3272 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3273 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3274 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3275 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3277 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3282 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3286 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3287 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3288 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3289 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3292 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3293 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3294 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3295 /* gap */
3296 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3298 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3299 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3300 /* gap */
3301 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3309 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310 /* gap */
3311 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3320 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3323 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3325 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326 /* gap */
3327 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332 /* gap */
3333 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3342 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3343 /* gap */
3344 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372 };
3373
3374 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3375 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3376 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3377 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3378 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3379 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3380 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3381 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3382 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3383 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3384 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3385 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3386 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3387 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3388 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3389 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3390 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3391 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3392 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3393 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3394 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3395 /* gap */
3396 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3397 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3398 /* gap */
3399 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3400 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3401 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3402 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3403 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3404 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3405 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3406 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3407 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3408 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3409 /* gap */
3410 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3411 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3412 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3413 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3414 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3415 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3416 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3417 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3418 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3419 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3420 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3421 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3422 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3424 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3425 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3426 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3427 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3428 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3429 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3430 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3431 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3432 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3433 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3434 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3435 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3436 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3437 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3438 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3439 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3440 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3441 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3442 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3443 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3444 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3445 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3446 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3447 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3448 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3449 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3450 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3451 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3452 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3453 /* gap */
3454 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3455 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3456 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3457 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3458 /* gap */
3459 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3460 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3461 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3462 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3463 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3464 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3466 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3467 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3468 /* gap */
3469 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3470 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3472 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3474 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3476 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3477 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3478 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3479 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3480 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3481 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3483 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484 /* gap */
3485 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3486 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3488 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3489 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490 /* gap */
3491 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3493 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3494 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3495 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3500 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3501 /* gap */
3502 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3505 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3507 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3521 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3523 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3524 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3526 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3527 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3528 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3529 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530 };
3531
3532 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3533 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3534 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3535 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3536 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3537 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3538 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3539 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3540 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3541 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3542 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3543 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3544 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3545 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3546 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3547 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3548 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3549 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3550 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3551 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3552 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3553 /* gap */
3554 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556 /* gap */
3557 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3559 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3560 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3562 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3563 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3564 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3565 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3566 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3567 /* gap */
3568 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3569 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3570 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3571 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3572 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3573 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3574 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3575 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3576 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3578 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3579 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3582 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3583 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3584 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3585 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3586 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3587 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3588 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3589 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3590 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3591 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3593 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3594 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3595 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3596 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3597 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3598 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3600 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3601 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3602 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3603 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3604 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3605 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3607 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3608 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3609 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3610 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3611 /* gap */
3612 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3615 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3616 /* gap */
3617 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3619 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3620 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3621 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3623 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3624 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3625 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3626 /* gap */
3627 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3628 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3630 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3631 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3632 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3633 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3636 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3637 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3638 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3639 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3640 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3641 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3642 /* gap */
3643 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3644 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648 /* gap */
3649 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3650 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3651 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3652 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3657 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3658 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3659 /* gap */
3660 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3661 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3662 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3663 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3664 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3665 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3666 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3668 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3670 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3671 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3673 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3674 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3675 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3677 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3678 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3679 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3680 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3681 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3682 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3683 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3684 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3685 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3687 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688 };