r600g: precalculate semantic indices for SPI setup
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
112 int r;
113
114 /* Would like some magic "get_bool_option_once" routine.
115 */
116 if (dump_shaders == -1)
117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119 if (dump_shaders) {
120 fprintf(stderr, "--------------------------------------------------------------\n");
121 tgsi_dump(shader->tokens, 0);
122 }
123 r = r600_shader_from_tgsi(rctx, shader);
124 if (r) {
125 R600_ERR("translation from TGSI failed !\n");
126 return r;
127 }
128 r = r600_bytecode_build(&shader->shader.bc);
129 if (r) {
130 R600_ERR("building bytecode failed !\n");
131 return r;
132 }
133 if (dump_shaders) {
134 r600_bytecode_dump(&shader->shader.bc);
135 fprintf(stderr, "______________________________________________________________\n");
136 }
137 return r600_pipe_shader(ctx, shader);
138 }
139
140 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
141 {
142 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
143 r600_bytecode_clear(&shader->shader.bc);
144
145 memset(&shader->shader,0,sizeof(struct r600_shader));
146 }
147
148 /*
149 * tgsi -> r600 shader
150 */
151 struct r600_shader_tgsi_instruction;
152
153 struct r600_shader_src {
154 unsigned sel;
155 unsigned swizzle[4];
156 unsigned neg;
157 unsigned abs;
158 unsigned rel;
159 uint32_t value[4];
160 };
161
162 struct r600_shader_ctx {
163 struct tgsi_shader_info info;
164 struct tgsi_parse_context parse;
165 const struct tgsi_token *tokens;
166 unsigned type;
167 unsigned file_offset[TGSI_FILE_COUNT];
168 unsigned temp_reg;
169 unsigned ar_reg;
170 struct r600_shader_tgsi_instruction *inst_info;
171 struct r600_bytecode *bc;
172 struct r600_shader *shader;
173 struct r600_shader_src src[4];
174 u32 *literals;
175 u32 nliterals;
176 u32 max_driver_temp_used;
177 /* needed for evergreen interpolation */
178 boolean input_centroid;
179 boolean input_linear;
180 boolean input_perspective;
181 int num_interp_gpr;
182 };
183
184 struct r600_shader_tgsi_instruction {
185 unsigned tgsi_opcode;
186 unsigned is_op3;
187 unsigned r600_opcode;
188 int (*process)(struct r600_shader_ctx *ctx);
189 };
190
191 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
192 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
193
194 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
195 {
196 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
197 int j;
198
199 if (i->Instruction.NumDstRegs > 1) {
200 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
201 return -EINVAL;
202 }
203 if (i->Instruction.Predicate) {
204 R600_ERR("predicate unsupported\n");
205 return -EINVAL;
206 }
207 #if 0
208 if (i->Instruction.Label) {
209 R600_ERR("label unsupported\n");
210 return -EINVAL;
211 }
212 #endif
213 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
214 if (i->Src[j].Register.Dimension) {
215 R600_ERR("unsupported src %d (dimension %d)\n", j,
216 i->Src[j].Register.Dimension);
217 return -EINVAL;
218 }
219 }
220 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
221 if (i->Dst[j].Register.Dimension) {
222 R600_ERR("unsupported dst (dimension)\n");
223 return -EINVAL;
224 }
225 }
226 return 0;
227 }
228
229 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
230 {
231 int i, r;
232 struct r600_bytecode_alu alu;
233 int gpr = 0, base_chan = 0;
234 int ij_index = 0;
235
236 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
237 ij_index = 0;
238 if (ctx->shader->input[input].centroid)
239 ij_index++;
240 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
241 ij_index = 0;
242 /* if we have perspective add one */
243 if (ctx->input_perspective) {
244 ij_index++;
245 /* if we have perspective centroid */
246 if (ctx->input_centroid)
247 ij_index++;
248 }
249 if (ctx->shader->input[input].centroid)
250 ij_index++;
251 }
252
253 /* work out gpr and base_chan from index */
254 gpr = ij_index / 2;
255 base_chan = (2 * (ij_index % 2)) + 1;
256
257 for (i = 0; i < 8; i++) {
258 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
259
260 if (i < 4)
261 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
262 else
263 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
264
265 if ((i > 1) && (i < 6)) {
266 alu.dst.sel = ctx->shader->input[input].gpr;
267 alu.dst.write = 1;
268 }
269
270 alu.dst.chan = i % 4;
271
272 alu.src[0].sel = gpr;
273 alu.src[0].chan = (base_chan - (i % 2));
274
275 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
276
277 alu.bank_swizzle_force = SQ_ALU_VEC_210;
278 if ((i % 4) == 3)
279 alu.last = 1;
280 r = r600_bytecode_add_alu(ctx->bc, &alu);
281 if (r)
282 return r;
283 }
284 return 0;
285 }
286
287 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
288 {
289 int i, r;
290 struct r600_bytecode_alu alu;
291
292 for (i = 0; i < 4; i++) {
293 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
294
295 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
296
297 alu.dst.sel = ctx->shader->input[input].gpr;
298 alu.dst.write = 1;
299
300 alu.dst.chan = i;
301
302 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
303 alu.src[0].chan = i;
304
305 if (i == 3)
306 alu.last = 1;
307 r = r600_bytecode_add_alu(ctx->bc, &alu);
308 if (r)
309 return r;
310 }
311 return 0;
312 }
313
314 /*
315 * Special export handling in shaders
316 *
317 * shader export ARRAY_BASE for EXPORT_POS:
318 * 60 is position
319 * 61 is misc vector
320 * 62, 63 are clip distance vectors
321 *
322 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
323 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
324 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
325 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
326 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
327 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
328 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
329 * exclusive from render target index)
330 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
331 *
332 *
333 * shader export ARRAY_BASE for EXPORT_PIXEL:
334 * 0-7 CB targets
335 * 61 computed Z vector
336 *
337 * The use of the values exported in the computed Z vector are controlled
338 * by DB_SHADER_CONTROL:
339 * Z_EXPORT_ENABLE - Z as a float in RED
340 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
341 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
342 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
343 * DB_SOURCE_FORMAT - export control restrictions
344 *
345 */
346
347
348 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
349 static int r600_spi_sid(struct r600_shader_io * io)
350 {
351 int index, name = io->name;
352
353 /* These params are handled differently, they don't need
354 * semantic indices, so we'll use 0 for them.
355 */
356 if (name == TGSI_SEMANTIC_POSITION ||
357 name == TGSI_SEMANTIC_PSIZE ||
358 name == TGSI_SEMANTIC_FACE)
359 index = 0;
360 else {
361 if (name == TGSI_SEMANTIC_GENERIC) {
362 /* For generic params simply use sid from tgsi */
363 index = io->sid;
364 } else {
365
366 /* FIXME: two-side rendering is broken in r600g, this will
367 * keep old functionality */
368 if (name == TGSI_SEMANTIC_BCOLOR)
369 name = TGSI_SEMANTIC_COLOR;
370
371 /* For non-generic params - pack name and sid into 8 bits */
372 index = 0x80 | (name<<3) | (io->sid);
373 }
374
375 /* Make sure that all really used indices have nonzero value, so
376 * we can just compare it to 0 later instead of comparing the name
377 * with different values to detect special cases. */
378 index++;
379 }
380
381 return index;
382 };
383
384 static int tgsi_declaration(struct r600_shader_ctx *ctx)
385 {
386 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
387 unsigned i;
388 int r;
389
390 switch (d->Declaration.File) {
391 case TGSI_FILE_INPUT:
392 i = ctx->shader->ninput++;
393 ctx->shader->input[i].name = d->Semantic.Name;
394 ctx->shader->input[i].sid = d->Semantic.Index;
395 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
396 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
397 ctx->shader->input[i].centroid = d->Declaration.Centroid;
398 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
399 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
400 /* turn input into interpolate on EG */
401 if (ctx->shader->input[i].spi_sid) {
402 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
403 if (ctx->shader->input[i].interpolate > 0) {
404 evergreen_interp_alu(ctx, i);
405 } else {
406 evergreen_interp_flat(ctx, i);
407 }
408 }
409 }
410 break;
411 case TGSI_FILE_OUTPUT:
412 i = ctx->shader->noutput++;
413 ctx->shader->output[i].name = d->Semantic.Name;
414 ctx->shader->output[i].sid = d->Semantic.Index;
415 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
416 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
417 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
418 break;
419 case TGSI_FILE_CONSTANT:
420 case TGSI_FILE_TEMPORARY:
421 case TGSI_FILE_SAMPLER:
422 case TGSI_FILE_ADDRESS:
423 break;
424
425 case TGSI_FILE_SYSTEM_VALUE:
426 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
427 struct r600_bytecode_alu alu;
428 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
429
430 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
431 alu.src[0].sel = 0;
432 alu.src[0].chan = 3;
433
434 alu.dst.sel = 0;
435 alu.dst.chan = 3;
436 alu.dst.write = 1;
437 alu.last = 1;
438
439 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
440 return r;
441 break;
442 }
443
444 default:
445 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
446 return -EINVAL;
447 }
448 return 0;
449 }
450
451 static int r600_get_temp(struct r600_shader_ctx *ctx)
452 {
453 return ctx->temp_reg + ctx->max_driver_temp_used++;
454 }
455
456 /*
457 * for evergreen we need to scan the shader to find the number of GPRs we need to
458 * reserve for interpolation.
459 *
460 * we need to know if we are going to emit
461 * any centroid inputs
462 * if perspective and linear are required
463 */
464 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
465 {
466 int i;
467 int num_baryc;
468
469 ctx->input_linear = FALSE;
470 ctx->input_perspective = FALSE;
471 ctx->input_centroid = FALSE;
472 ctx->num_interp_gpr = 1;
473
474 /* any centroid inputs */
475 for (i = 0; i < ctx->info.num_inputs; i++) {
476 /* skip position/face */
477 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
478 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
479 continue;
480 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
481 ctx->input_linear = TRUE;
482 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
483 ctx->input_perspective = TRUE;
484 if (ctx->info.input_centroid[i])
485 ctx->input_centroid = TRUE;
486 }
487
488 num_baryc = 0;
489 /* ignoring sample for now */
490 if (ctx->input_perspective)
491 num_baryc++;
492 if (ctx->input_linear)
493 num_baryc++;
494 if (ctx->input_centroid)
495 num_baryc *= 2;
496
497 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
498
499 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
500 return ctx->num_interp_gpr;
501 }
502
503 static void tgsi_src(struct r600_shader_ctx *ctx,
504 const struct tgsi_full_src_register *tgsi_src,
505 struct r600_shader_src *r600_src)
506 {
507 memset(r600_src, 0, sizeof(*r600_src));
508 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
509 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
510 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
511 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
512 r600_src->neg = tgsi_src->Register.Negate;
513 r600_src->abs = tgsi_src->Register.Absolute;
514
515 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
516 int index;
517 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
518 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
519 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
520
521 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
522 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
523 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
524 return;
525 }
526 index = tgsi_src->Register.Index;
527 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
528 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
529 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
530 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
531 r600_src->swizzle[0] = 3;
532 r600_src->swizzle[1] = 3;
533 r600_src->swizzle[2] = 3;
534 r600_src->swizzle[3] = 3;
535 r600_src->sel = 0;
536 } else {
537 if (tgsi_src->Register.Indirect)
538 r600_src->rel = V_SQ_REL_RELATIVE;
539 r600_src->sel = tgsi_src->Register.Index;
540 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
541 }
542 }
543
544 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
545 {
546 struct r600_bytecode_vtx vtx;
547 unsigned int ar_reg;
548 int r;
549
550 if (offset) {
551 struct r600_bytecode_alu alu;
552
553 memset(&alu, 0, sizeof(alu));
554
555 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
556 alu.src[0].sel = ctx->ar_reg;
557
558 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
559 alu.src[1].value = offset;
560
561 alu.dst.sel = dst_reg;
562 alu.dst.write = 1;
563 alu.last = 1;
564
565 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
566 return r;
567
568 ar_reg = dst_reg;
569 } else {
570 ar_reg = ctx->ar_reg;
571 }
572
573 memset(&vtx, 0, sizeof(vtx));
574 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
575 vtx.src_gpr = ar_reg;
576 vtx.mega_fetch_count = 16;
577 vtx.dst_gpr = dst_reg;
578 vtx.dst_sel_x = 0; /* SEL_X */
579 vtx.dst_sel_y = 1; /* SEL_Y */
580 vtx.dst_sel_z = 2; /* SEL_Z */
581 vtx.dst_sel_w = 3; /* SEL_W */
582 vtx.data_format = FMT_32_32_32_32_FLOAT;
583 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
584 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
585 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
586 vtx.endian = r600_endian_swap(32);
587
588 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
589 return r;
590
591 return 0;
592 }
593
594 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
595 {
596 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
597 struct r600_bytecode_alu alu;
598 int i, j, k, nconst, r;
599
600 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
601 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
602 nconst++;
603 }
604 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
605 }
606 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
607 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
608 continue;
609 }
610
611 if (ctx->src[i].rel) {
612 int treg = r600_get_temp(ctx);
613 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
614 return r;
615
616 ctx->src[i].sel = treg;
617 ctx->src[i].rel = 0;
618 j--;
619 } else if (j > 0) {
620 int treg = r600_get_temp(ctx);
621 for (k = 0; k < 4; k++) {
622 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
623 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
624 alu.src[0].sel = ctx->src[i].sel;
625 alu.src[0].chan = k;
626 alu.src[0].rel = ctx->src[i].rel;
627 alu.dst.sel = treg;
628 alu.dst.chan = k;
629 alu.dst.write = 1;
630 if (k == 3)
631 alu.last = 1;
632 r = r600_bytecode_add_alu(ctx->bc, &alu);
633 if (r)
634 return r;
635 }
636 ctx->src[i].sel = treg;
637 ctx->src[i].rel =0;
638 j--;
639 }
640 }
641 return 0;
642 }
643
644 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
645 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
646 {
647 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
648 struct r600_bytecode_alu alu;
649 int i, j, k, nliteral, r;
650
651 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
652 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
653 nliteral++;
654 }
655 }
656 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
657 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
658 int treg = r600_get_temp(ctx);
659 for (k = 0; k < 4; k++) {
660 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
661 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
662 alu.src[0].sel = ctx->src[i].sel;
663 alu.src[0].chan = k;
664 alu.src[0].value = ctx->src[i].value[k];
665 alu.dst.sel = treg;
666 alu.dst.chan = k;
667 alu.dst.write = 1;
668 if (k == 3)
669 alu.last = 1;
670 r = r600_bytecode_add_alu(ctx->bc, &alu);
671 if (r)
672 return r;
673 }
674 ctx->src[i].sel = treg;
675 j--;
676 }
677 }
678 return 0;
679 }
680
681 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
682 {
683 struct r600_shader *shader = &pipeshader->shader;
684 struct tgsi_token *tokens = pipeshader->tokens;
685 struct tgsi_full_immediate *immediate;
686 struct tgsi_full_property *property;
687 struct r600_shader_ctx ctx;
688 struct r600_bytecode_output output[32];
689 unsigned output_done, noutput;
690 unsigned opcode;
691 int i, j, r = 0, pos0;
692
693 ctx.bc = &shader->bc;
694 ctx.shader = shader;
695 r600_bytecode_init(ctx.bc, rctx->chip_class);
696 ctx.tokens = tokens;
697 tgsi_scan_shader(tokens, &ctx.info);
698 tgsi_parse_init(&ctx.parse, tokens);
699 ctx.type = ctx.parse.FullHeader.Processor.Processor;
700 shader->processor_type = ctx.type;
701 ctx.bc->type = shader->processor_type;
702
703 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
704 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
705
706 shader->nr_cbufs = rctx->nr_cbufs;
707
708 /* register allocations */
709 /* Values [0,127] correspond to GPR[0..127].
710 * Values [128,159] correspond to constant buffer bank 0
711 * Values [160,191] correspond to constant buffer bank 1
712 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
713 * Values [256,287] correspond to constant buffer bank 2 (EG)
714 * Values [288,319] correspond to constant buffer bank 3 (EG)
715 * Other special values are shown in the list below.
716 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
717 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
718 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
719 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
720 * 248 SQ_ALU_SRC_0: special constant 0.0.
721 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
722 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
723 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
724 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
725 * 253 SQ_ALU_SRC_LITERAL: literal constant.
726 * 254 SQ_ALU_SRC_PV: previous vector result.
727 * 255 SQ_ALU_SRC_PS: previous scalar result.
728 */
729 for (i = 0; i < TGSI_FILE_COUNT; i++) {
730 ctx.file_offset[i] = 0;
731 }
732 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
733 ctx.file_offset[TGSI_FILE_INPUT] = 1;
734 if (ctx.bc->chip_class >= EVERGREEN) {
735 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
736 } else {
737 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
738 }
739 }
740 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
741 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
742 }
743 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
744 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
745 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
746 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
747
748 /* Outside the GPR range. This will be translated to one of the
749 * kcache banks later. */
750 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
751
752 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
753 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
754 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
755 ctx.temp_reg = ctx.ar_reg + 1;
756
757 ctx.nliterals = 0;
758 ctx.literals = NULL;
759 shader->fs_write_all = FALSE;
760 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
761 tgsi_parse_token(&ctx.parse);
762 switch (ctx.parse.FullToken.Token.Type) {
763 case TGSI_TOKEN_TYPE_IMMEDIATE:
764 immediate = &ctx.parse.FullToken.FullImmediate;
765 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
766 if(ctx.literals == NULL) {
767 r = -ENOMEM;
768 goto out_err;
769 }
770 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
771 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
772 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
773 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
774 ctx.nliterals++;
775 break;
776 case TGSI_TOKEN_TYPE_DECLARATION:
777 r = tgsi_declaration(&ctx);
778 if (r)
779 goto out_err;
780 break;
781 case TGSI_TOKEN_TYPE_INSTRUCTION:
782 r = tgsi_is_supported(&ctx);
783 if (r)
784 goto out_err;
785 ctx.max_driver_temp_used = 0;
786 /* reserve first tmp for everyone */
787 r600_get_temp(&ctx);
788
789 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
790 if ((r = tgsi_split_constant(&ctx)))
791 goto out_err;
792 if ((r = tgsi_split_literal_constant(&ctx)))
793 goto out_err;
794 if (ctx.bc->chip_class == CAYMAN)
795 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
796 else if (ctx.bc->chip_class >= EVERGREEN)
797 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
798 else
799 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
800 r = ctx.inst_info->process(&ctx);
801 if (r)
802 goto out_err;
803 break;
804 case TGSI_TOKEN_TYPE_PROPERTY:
805 property = &ctx.parse.FullToken.FullProperty;
806 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
807 if (property->u[0].Data == 1)
808 shader->fs_write_all = TRUE;
809 }
810 break;
811 default:
812 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
813 r = -EINVAL;
814 goto out_err;
815 }
816 }
817
818 noutput = shader->noutput;
819
820 /* clamp color outputs */
821 if (shader->clamp_color) {
822 for (i = 0; i < noutput; i++) {
823 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
824 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
825
826 int j;
827 for (j = 0; j < 4; j++) {
828 struct r600_bytecode_alu alu;
829 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
830
831 /* MOV_SAT R, R */
832 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
833 alu.dst.sel = shader->output[i].gpr;
834 alu.dst.chan = j;
835 alu.dst.write = 1;
836 alu.dst.clamp = 1;
837 alu.src[0].sel = alu.dst.sel;
838 alu.src[0].chan = j;
839
840 if (j == 3) {
841 alu.last = 1;
842 }
843 r = r600_bytecode_add_alu(ctx.bc, &alu);
844 if (r)
845 return r;
846 }
847 }
848 }
849 }
850
851 /* export output */
852 j = 0;
853 for (i = 0, pos0 = 0; i < noutput; i++) {
854 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
855 output[i + j].gpr = shader->output[i].gpr;
856 output[i + j].elem_size = 3;
857 output[i + j].swizzle_x = 0;
858 output[i + j].swizzle_y = 1;
859 output[i + j].swizzle_z = 2;
860 output[i + j].swizzle_w = 3;
861 output[i + j].burst_count = 1;
862 output[i + j].barrier = 1;
863 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
864 output[i + j].array_base = i - pos0;
865 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
866 switch (ctx.type) {
867 case TGSI_PROCESSOR_VERTEX:
868 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
869 output[i + j].array_base = 60;
870 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
871 /* position doesn't count in array_base */
872 pos0++;
873 }
874 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
875 output[i + j].array_base = 61;
876 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
877 /* position doesn't count in array_base */
878 pos0++;
879 }
880 break;
881 case TGSI_PROCESSOR_FRAGMENT:
882 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
883 output[i + j].array_base = shader->output[i].sid;
884 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
885 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
886 for (j = 1; j < shader->nr_cbufs; j++) {
887 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
888 output[i + j].gpr = shader->output[i].gpr;
889 output[i + j].elem_size = 3;
890 output[i + j].swizzle_x = 0;
891 output[i + j].swizzle_y = 1;
892 output[i + j].swizzle_z = 2;
893 output[i + j].swizzle_w = 3;
894 output[i + j].burst_count = 1;
895 output[i + j].barrier = 1;
896 output[i + j].array_base = shader->output[i].sid + j;
897 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
898 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
899 }
900 j--;
901 }
902 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
903 output[i + j].array_base = 61;
904 output[i + j].swizzle_x = 2;
905 output[i + j].swizzle_y = 7;
906 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
907 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
908 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
909 output[i + j].array_base = 61;
910 output[i + j].swizzle_x = 7;
911 output[i + j].swizzle_y = 1;
912 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
913 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
914 } else {
915 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
916 r = -EINVAL;
917 goto out_err;
918 }
919 break;
920 default:
921 R600_ERR("unsupported processor type %d\n", ctx.type);
922 r = -EINVAL;
923 goto out_err;
924 }
925 }
926 noutput += j;
927 /* add fake param output for vertex shader if no param is exported */
928 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
929 for (i = 0, pos0 = 0; i < noutput; i++) {
930 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
931 pos0 = 1;
932 break;
933 }
934 }
935 if (!pos0) {
936 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
937 output[i].gpr = 0;
938 output[i].elem_size = 3;
939 output[i].swizzle_x = 0;
940 output[i].swizzle_y = 1;
941 output[i].swizzle_z = 2;
942 output[i].swizzle_w = 3;
943 output[i].burst_count = 1;
944 output[i].barrier = 1;
945 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
946 output[i].array_base = 0;
947 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
948 noutput++;
949 }
950 }
951 /* add fake pixel export */
952 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
953 memset(&output[0], 0, sizeof(struct r600_bytecode_output));
954 output[0].gpr = 0;
955 output[0].elem_size = 3;
956 output[0].swizzle_x = 7;
957 output[0].swizzle_y = 7;
958 output[0].swizzle_z = 7;
959 output[0].swizzle_w = 7;
960 output[0].burst_count = 1;
961 output[0].barrier = 1;
962 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
963 output[0].array_base = 0;
964 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
965 noutput++;
966 }
967 /* set export done on last export of each type */
968 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
969 if (ctx.bc->chip_class < CAYMAN) {
970 if (i == (noutput - 1)) {
971 output[i].end_of_program = 1;
972 }
973 }
974 if (!(output_done & (1 << output[i].type))) {
975 output_done |= (1 << output[i].type);
976 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
977 }
978 }
979 /* add output to bytecode */
980 for (i = 0; i < noutput; i++) {
981 r = r600_bytecode_add_output(ctx.bc, &output[i]);
982 if (r)
983 goto out_err;
984 }
985 /* add program end */
986 if (ctx.bc->chip_class == CAYMAN)
987 cm_bytecode_add_cf_end(ctx.bc);
988
989 free(ctx.literals);
990 tgsi_parse_free(&ctx.parse);
991 return 0;
992 out_err:
993 free(ctx.literals);
994 tgsi_parse_free(&ctx.parse);
995 return r;
996 }
997
998 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
999 {
1000 R600_ERR("%s tgsi opcode unsupported\n",
1001 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1002 return -EINVAL;
1003 }
1004
1005 static int tgsi_end(struct r600_shader_ctx *ctx)
1006 {
1007 return 0;
1008 }
1009
1010 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1011 const struct r600_shader_src *shader_src,
1012 unsigned chan)
1013 {
1014 bc_src->sel = shader_src->sel;
1015 bc_src->chan = shader_src->swizzle[chan];
1016 bc_src->neg = shader_src->neg;
1017 bc_src->abs = shader_src->abs;
1018 bc_src->rel = shader_src->rel;
1019 bc_src->value = shader_src->value[bc_src->chan];
1020 }
1021
1022 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1023 {
1024 bc_src->abs = 1;
1025 bc_src->neg = 0;
1026 }
1027
1028 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1029 {
1030 bc_src->neg = !bc_src->neg;
1031 }
1032
1033 static void tgsi_dst(struct r600_shader_ctx *ctx,
1034 const struct tgsi_full_dst_register *tgsi_dst,
1035 unsigned swizzle,
1036 struct r600_bytecode_alu_dst *r600_dst)
1037 {
1038 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1039
1040 r600_dst->sel = tgsi_dst->Register.Index;
1041 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1042 r600_dst->chan = swizzle;
1043 r600_dst->write = 1;
1044 if (tgsi_dst->Register.Indirect)
1045 r600_dst->rel = V_SQ_REL_RELATIVE;
1046 if (inst->Instruction.Saturate) {
1047 r600_dst->clamp = 1;
1048 }
1049 }
1050
1051 static int tgsi_last_instruction(unsigned writemask)
1052 {
1053 int i, lasti = 0;
1054
1055 for (i = 0; i < 4; i++) {
1056 if (writemask & (1 << i)) {
1057 lasti = i;
1058 }
1059 }
1060 return lasti;
1061 }
1062
1063 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1064 {
1065 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1066 struct r600_bytecode_alu alu;
1067 int i, j, r;
1068 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1069
1070 for (i = 0; i < lasti + 1; i++) {
1071 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1072 continue;
1073
1074 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1075 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1076
1077 alu.inst = ctx->inst_info->r600_opcode;
1078 if (!swap) {
1079 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1080 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1081 }
1082 } else {
1083 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1084 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1085 }
1086 /* handle some special cases */
1087 switch (ctx->inst_info->tgsi_opcode) {
1088 case TGSI_OPCODE_SUB:
1089 r600_bytecode_src_toggle_neg(&alu.src[1]);
1090 break;
1091 case TGSI_OPCODE_ABS:
1092 r600_bytecode_src_set_abs(&alu.src[0]);
1093 break;
1094 default:
1095 break;
1096 }
1097 if (i == lasti || trans_only) {
1098 alu.last = 1;
1099 }
1100 r = r600_bytecode_add_alu(ctx->bc, &alu);
1101 if (r)
1102 return r;
1103 }
1104 return 0;
1105 }
1106
1107 static int tgsi_op2(struct r600_shader_ctx *ctx)
1108 {
1109 return tgsi_op2_s(ctx, 0, 0);
1110 }
1111
1112 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1113 {
1114 return tgsi_op2_s(ctx, 1, 0);
1115 }
1116
1117 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1118 {
1119 return tgsi_op2_s(ctx, 0, 1);
1120 }
1121
1122 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1123 {
1124 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1125 struct r600_bytecode_alu alu;
1126 int i, r;
1127 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1128
1129 for (i = 0; i < lasti + 1; i++) {
1130
1131 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1132 continue;
1133 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1134 alu.inst = ctx->inst_info->r600_opcode;
1135
1136 alu.src[0].sel = V_SQ_ALU_SRC_0;
1137
1138 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1139
1140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1141
1142 if (i == lasti) {
1143 alu.last = 1;
1144 }
1145 r = r600_bytecode_add_alu(ctx->bc, &alu);
1146 if (r)
1147 return r;
1148 }
1149 return 0;
1150
1151 }
1152
1153 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1154 {
1155 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1156 int i, j, r;
1157 struct r600_bytecode_alu alu;
1158 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1159
1160 for (i = 0 ; i < last_slot; i++) {
1161 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1162 alu.inst = ctx->inst_info->r600_opcode;
1163 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1164 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1165 }
1166 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1167 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1168
1169 if (i == last_slot - 1)
1170 alu.last = 1;
1171 r = r600_bytecode_add_alu(ctx->bc, &alu);
1172 if (r)
1173 return r;
1174 }
1175 return 0;
1176 }
1177
1178 /*
1179 * r600 - trunc to -PI..PI range
1180 * r700 - normalize by dividing by 2PI
1181 * see fdo bug 27901
1182 */
1183 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1184 {
1185 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1186 static float double_pi = 3.1415926535 * 2;
1187 static float neg_pi = -3.1415926535;
1188
1189 int r;
1190 struct r600_bytecode_alu alu;
1191
1192 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1194 alu.is_op3 = 1;
1195
1196 alu.dst.chan = 0;
1197 alu.dst.sel = ctx->temp_reg;
1198 alu.dst.write = 1;
1199
1200 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1201
1202 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1203 alu.src[1].chan = 0;
1204 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1205 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1206 alu.src[2].chan = 0;
1207 alu.last = 1;
1208 r = r600_bytecode_add_alu(ctx->bc, &alu);
1209 if (r)
1210 return r;
1211
1212 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1213 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1214
1215 alu.dst.chan = 0;
1216 alu.dst.sel = ctx->temp_reg;
1217 alu.dst.write = 1;
1218
1219 alu.src[0].sel = ctx->temp_reg;
1220 alu.src[0].chan = 0;
1221 alu.last = 1;
1222 r = r600_bytecode_add_alu(ctx->bc, &alu);
1223 if (r)
1224 return r;
1225
1226 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1227 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1228 alu.is_op3 = 1;
1229
1230 alu.dst.chan = 0;
1231 alu.dst.sel = ctx->temp_reg;
1232 alu.dst.write = 1;
1233
1234 alu.src[0].sel = ctx->temp_reg;
1235 alu.src[0].chan = 0;
1236
1237 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1238 alu.src[1].chan = 0;
1239 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1240 alu.src[2].chan = 0;
1241
1242 if (ctx->bc->chip_class == R600) {
1243 alu.src[1].value = *(uint32_t *)&double_pi;
1244 alu.src[2].value = *(uint32_t *)&neg_pi;
1245 } else {
1246 alu.src[1].sel = V_SQ_ALU_SRC_1;
1247 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1248 alu.src[2].neg = 1;
1249 }
1250
1251 alu.last = 1;
1252 r = r600_bytecode_add_alu(ctx->bc, &alu);
1253 if (r)
1254 return r;
1255 return 0;
1256 }
1257
1258 static int cayman_trig(struct r600_shader_ctx *ctx)
1259 {
1260 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1261 struct r600_bytecode_alu alu;
1262 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1263 int i, r;
1264
1265 r = tgsi_setup_trig(ctx);
1266 if (r)
1267 return r;
1268
1269
1270 for (i = 0; i < last_slot; i++) {
1271 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1272 alu.inst = ctx->inst_info->r600_opcode;
1273 alu.dst.chan = i;
1274
1275 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1276 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1277
1278 alu.src[0].sel = ctx->temp_reg;
1279 alu.src[0].chan = 0;
1280 if (i == last_slot - 1)
1281 alu.last = 1;
1282 r = r600_bytecode_add_alu(ctx->bc, &alu);
1283 if (r)
1284 return r;
1285 }
1286 return 0;
1287 }
1288
1289 static int tgsi_trig(struct r600_shader_ctx *ctx)
1290 {
1291 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1292 struct r600_bytecode_alu alu;
1293 int i, r;
1294 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1295
1296 r = tgsi_setup_trig(ctx);
1297 if (r)
1298 return r;
1299
1300 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1301 alu.inst = ctx->inst_info->r600_opcode;
1302 alu.dst.chan = 0;
1303 alu.dst.sel = ctx->temp_reg;
1304 alu.dst.write = 1;
1305
1306 alu.src[0].sel = ctx->temp_reg;
1307 alu.src[0].chan = 0;
1308 alu.last = 1;
1309 r = r600_bytecode_add_alu(ctx->bc, &alu);
1310 if (r)
1311 return r;
1312
1313 /* replicate result */
1314 for (i = 0; i < lasti + 1; i++) {
1315 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1316 continue;
1317
1318 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1319 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1320
1321 alu.src[0].sel = ctx->temp_reg;
1322 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1323 if (i == lasti)
1324 alu.last = 1;
1325 r = r600_bytecode_add_alu(ctx->bc, &alu);
1326 if (r)
1327 return r;
1328 }
1329 return 0;
1330 }
1331
1332 static int tgsi_scs(struct r600_shader_ctx *ctx)
1333 {
1334 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1335 struct r600_bytecode_alu alu;
1336 int i, r;
1337
1338 /* We'll only need the trig stuff if we are going to write to the
1339 * X or Y components of the destination vector.
1340 */
1341 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1342 r = tgsi_setup_trig(ctx);
1343 if (r)
1344 return r;
1345 }
1346
1347 /* dst.x = COS */
1348 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1349 if (ctx->bc->chip_class == CAYMAN) {
1350 for (i = 0 ; i < 3; i++) {
1351 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1353 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1354
1355 if (i == 0)
1356 alu.dst.write = 1;
1357 else
1358 alu.dst.write = 0;
1359 alu.src[0].sel = ctx->temp_reg;
1360 alu.src[0].chan = 0;
1361 if (i == 2)
1362 alu.last = 1;
1363 r = r600_bytecode_add_alu(ctx->bc, &alu);
1364 if (r)
1365 return r;
1366 }
1367 } else {
1368 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1369 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1370 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1371
1372 alu.src[0].sel = ctx->temp_reg;
1373 alu.src[0].chan = 0;
1374 alu.last = 1;
1375 r = r600_bytecode_add_alu(ctx->bc, &alu);
1376 if (r)
1377 return r;
1378 }
1379 }
1380
1381 /* dst.y = SIN */
1382 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1383 if (ctx->bc->chip_class == CAYMAN) {
1384 for (i = 0 ; i < 3; i++) {
1385 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1386 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1387 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1388 if (i == 1)
1389 alu.dst.write = 1;
1390 else
1391 alu.dst.write = 0;
1392 alu.src[0].sel = ctx->temp_reg;
1393 alu.src[0].chan = 0;
1394 if (i == 2)
1395 alu.last = 1;
1396 r = r600_bytecode_add_alu(ctx->bc, &alu);
1397 if (r)
1398 return r;
1399 }
1400 } else {
1401 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1402 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1403 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1404
1405 alu.src[0].sel = ctx->temp_reg;
1406 alu.src[0].chan = 0;
1407 alu.last = 1;
1408 r = r600_bytecode_add_alu(ctx->bc, &alu);
1409 if (r)
1410 return r;
1411 }
1412 }
1413
1414 /* dst.z = 0.0; */
1415 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1416 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1417
1418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1419
1420 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1421
1422 alu.src[0].sel = V_SQ_ALU_SRC_0;
1423 alu.src[0].chan = 0;
1424
1425 alu.last = 1;
1426
1427 r = r600_bytecode_add_alu(ctx->bc, &alu);
1428 if (r)
1429 return r;
1430 }
1431
1432 /* dst.w = 1.0; */
1433 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1434 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1435
1436 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1437
1438 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1439
1440 alu.src[0].sel = V_SQ_ALU_SRC_1;
1441 alu.src[0].chan = 0;
1442
1443 alu.last = 1;
1444
1445 r = r600_bytecode_add_alu(ctx->bc, &alu);
1446 if (r)
1447 return r;
1448 }
1449
1450 return 0;
1451 }
1452
1453 static int tgsi_kill(struct r600_shader_ctx *ctx)
1454 {
1455 struct r600_bytecode_alu alu;
1456 int i, r;
1457
1458 for (i = 0; i < 4; i++) {
1459 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1460 alu.inst = ctx->inst_info->r600_opcode;
1461
1462 alu.dst.chan = i;
1463
1464 alu.src[0].sel = V_SQ_ALU_SRC_0;
1465
1466 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1467 alu.src[1].sel = V_SQ_ALU_SRC_1;
1468 alu.src[1].neg = 1;
1469 } else {
1470 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1471 }
1472 if (i == 3) {
1473 alu.last = 1;
1474 }
1475 r = r600_bytecode_add_alu(ctx->bc, &alu);
1476 if (r)
1477 return r;
1478 }
1479
1480 /* kill must be last in ALU */
1481 ctx->bc->force_add_cf = 1;
1482 ctx->shader->uses_kill = TRUE;
1483 return 0;
1484 }
1485
1486 static int tgsi_lit(struct r600_shader_ctx *ctx)
1487 {
1488 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1489 struct r600_bytecode_alu alu;
1490 int r;
1491
1492 /* tmp.x = max(src.y, 0.0) */
1493 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1494 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1495 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1496 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1497 alu.src[1].chan = 1;
1498
1499 alu.dst.sel = ctx->temp_reg;
1500 alu.dst.chan = 0;
1501 alu.dst.write = 1;
1502
1503 alu.last = 1;
1504 r = r600_bytecode_add_alu(ctx->bc, &alu);
1505 if (r)
1506 return r;
1507
1508 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1509 {
1510 int chan;
1511 int sel;
1512 int i;
1513
1514 if (ctx->bc->chip_class == CAYMAN) {
1515 for (i = 0; i < 3; i++) {
1516 /* tmp.z = log(tmp.x) */
1517 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1518 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1519 alu.src[0].sel = ctx->temp_reg;
1520 alu.src[0].chan = 0;
1521 alu.dst.sel = ctx->temp_reg;
1522 alu.dst.chan = i;
1523 if (i == 2) {
1524 alu.dst.write = 1;
1525 alu.last = 1;
1526 } else
1527 alu.dst.write = 0;
1528
1529 r = r600_bytecode_add_alu(ctx->bc, &alu);
1530 if (r)
1531 return r;
1532 }
1533 } else {
1534 /* tmp.z = log(tmp.x) */
1535 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1537 alu.src[0].sel = ctx->temp_reg;
1538 alu.src[0].chan = 0;
1539 alu.dst.sel = ctx->temp_reg;
1540 alu.dst.chan = 2;
1541 alu.dst.write = 1;
1542 alu.last = 1;
1543 r = r600_bytecode_add_alu(ctx->bc, &alu);
1544 if (r)
1545 return r;
1546 }
1547
1548 chan = alu.dst.chan;
1549 sel = alu.dst.sel;
1550
1551 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1552 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1554 alu.src[0].sel = sel;
1555 alu.src[0].chan = chan;
1556 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1557 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1558 alu.dst.sel = ctx->temp_reg;
1559 alu.dst.chan = 0;
1560 alu.dst.write = 1;
1561 alu.is_op3 = 1;
1562 alu.last = 1;
1563 r = r600_bytecode_add_alu(ctx->bc, &alu);
1564 if (r)
1565 return r;
1566
1567 if (ctx->bc->chip_class == CAYMAN) {
1568 for (i = 0; i < 3; i++) {
1569 /* dst.z = exp(tmp.x) */
1570 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1571 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1572 alu.src[0].sel = ctx->temp_reg;
1573 alu.src[0].chan = 0;
1574 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1575 if (i == 2) {
1576 alu.dst.write = 1;
1577 alu.last = 1;
1578 } else
1579 alu.dst.write = 0;
1580 r = r600_bytecode_add_alu(ctx->bc, &alu);
1581 if (r)
1582 return r;
1583 }
1584 } else {
1585 /* dst.z = exp(tmp.x) */
1586 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1587 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1588 alu.src[0].sel = ctx->temp_reg;
1589 alu.src[0].chan = 0;
1590 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1591 alu.last = 1;
1592 r = r600_bytecode_add_alu(ctx->bc, &alu);
1593 if (r)
1594 return r;
1595 }
1596 }
1597
1598 /* dst.x, <- 1.0 */
1599 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1600 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1601 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1602 alu.src[0].chan = 0;
1603 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1604 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1605 r = r600_bytecode_add_alu(ctx->bc, &alu);
1606 if (r)
1607 return r;
1608
1609 /* dst.y = max(src.x, 0.0) */
1610 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1611 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1612 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1613 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1614 alu.src[1].chan = 0;
1615 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1616 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1617 r = r600_bytecode_add_alu(ctx->bc, &alu);
1618 if (r)
1619 return r;
1620
1621 /* dst.w, <- 1.0 */
1622 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1623 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1624 alu.src[0].sel = V_SQ_ALU_SRC_1;
1625 alu.src[0].chan = 0;
1626 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1627 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1628 alu.last = 1;
1629 r = r600_bytecode_add_alu(ctx->bc, &alu);
1630 if (r)
1631 return r;
1632
1633 return 0;
1634 }
1635
1636 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1637 {
1638 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1639 struct r600_bytecode_alu alu;
1640 int i, r;
1641
1642 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1643
1644 /* FIXME:
1645 * For state trackers other than OpenGL, we'll want to use
1646 * _RECIPSQRT_IEEE instead.
1647 */
1648 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1649
1650 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1651 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1652 r600_bytecode_src_set_abs(&alu.src[i]);
1653 }
1654 alu.dst.sel = ctx->temp_reg;
1655 alu.dst.write = 1;
1656 alu.last = 1;
1657 r = r600_bytecode_add_alu(ctx->bc, &alu);
1658 if (r)
1659 return r;
1660 /* replicate result */
1661 return tgsi_helper_tempx_replicate(ctx);
1662 }
1663
1664 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1665 {
1666 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1667 struct r600_bytecode_alu alu;
1668 int i, r;
1669
1670 for (i = 0; i < 4; i++) {
1671 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1672 alu.src[0].sel = ctx->temp_reg;
1673 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1674 alu.dst.chan = i;
1675 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1676 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1677 if (i == 3)
1678 alu.last = 1;
1679 r = r600_bytecode_add_alu(ctx->bc, &alu);
1680 if (r)
1681 return r;
1682 }
1683 return 0;
1684 }
1685
1686 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1687 {
1688 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1689 struct r600_bytecode_alu alu;
1690 int i, r;
1691
1692 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1693 alu.inst = ctx->inst_info->r600_opcode;
1694 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1695 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1696 }
1697 alu.dst.sel = ctx->temp_reg;
1698 alu.dst.write = 1;
1699 alu.last = 1;
1700 r = r600_bytecode_add_alu(ctx->bc, &alu);
1701 if (r)
1702 return r;
1703 /* replicate result */
1704 return tgsi_helper_tempx_replicate(ctx);
1705 }
1706
1707 static int cayman_pow(struct r600_shader_ctx *ctx)
1708 {
1709 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1710 int i, r;
1711 struct r600_bytecode_alu alu;
1712 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1713
1714 for (i = 0; i < 3; i++) {
1715 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1717 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1718 alu.dst.sel = ctx->temp_reg;
1719 alu.dst.chan = i;
1720 alu.dst.write = 1;
1721 if (i == 2)
1722 alu.last = 1;
1723 r = r600_bytecode_add_alu(ctx->bc, &alu);
1724 if (r)
1725 return r;
1726 }
1727
1728 /* b * LOG2(a) */
1729 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1730 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1731 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1732 alu.src[1].sel = ctx->temp_reg;
1733 alu.dst.sel = ctx->temp_reg;
1734 alu.dst.write = 1;
1735 alu.last = 1;
1736 r = r600_bytecode_add_alu(ctx->bc, &alu);
1737 if (r)
1738 return r;
1739
1740 for (i = 0; i < last_slot; i++) {
1741 /* POW(a,b) = EXP2(b * LOG2(a))*/
1742 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1743 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1744 alu.src[0].sel = ctx->temp_reg;
1745
1746 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1747 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1748 if (i == last_slot - 1)
1749 alu.last = 1;
1750 r = r600_bytecode_add_alu(ctx->bc, &alu);
1751 if (r)
1752 return r;
1753 }
1754 return 0;
1755 }
1756
1757 static int tgsi_pow(struct r600_shader_ctx *ctx)
1758 {
1759 struct r600_bytecode_alu alu;
1760 int r;
1761
1762 /* LOG2(a) */
1763 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1764 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1765 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1766 alu.dst.sel = ctx->temp_reg;
1767 alu.dst.write = 1;
1768 alu.last = 1;
1769 r = r600_bytecode_add_alu(ctx->bc, &alu);
1770 if (r)
1771 return r;
1772 /* b * LOG2(a) */
1773 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1774 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1775 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1776 alu.src[1].sel = ctx->temp_reg;
1777 alu.dst.sel = ctx->temp_reg;
1778 alu.dst.write = 1;
1779 alu.last = 1;
1780 r = r600_bytecode_add_alu(ctx->bc, &alu);
1781 if (r)
1782 return r;
1783 /* POW(a,b) = EXP2(b * LOG2(a))*/
1784 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1786 alu.src[0].sel = ctx->temp_reg;
1787 alu.dst.sel = ctx->temp_reg;
1788 alu.dst.write = 1;
1789 alu.last = 1;
1790 r = r600_bytecode_add_alu(ctx->bc, &alu);
1791 if (r)
1792 return r;
1793 return tgsi_helper_tempx_replicate(ctx);
1794 }
1795
1796 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1797 {
1798 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1799 struct r600_bytecode_alu alu;
1800 int i, r;
1801
1802 /* tmp = (src > 0 ? 1 : src) */
1803 for (i = 0; i < 4; i++) {
1804 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1805 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1806 alu.is_op3 = 1;
1807
1808 alu.dst.sel = ctx->temp_reg;
1809 alu.dst.chan = i;
1810
1811 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1812 alu.src[1].sel = V_SQ_ALU_SRC_1;
1813 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1814
1815 if (i == 3)
1816 alu.last = 1;
1817 r = r600_bytecode_add_alu(ctx->bc, &alu);
1818 if (r)
1819 return r;
1820 }
1821
1822 /* dst = (-tmp > 0 ? -1 : tmp) */
1823 for (i = 0; i < 4; i++) {
1824 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1825 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1826 alu.is_op3 = 1;
1827 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1828
1829 alu.src[0].sel = ctx->temp_reg;
1830 alu.src[0].chan = i;
1831 alu.src[0].neg = 1;
1832
1833 alu.src[1].sel = V_SQ_ALU_SRC_1;
1834 alu.src[1].neg = 1;
1835
1836 alu.src[2].sel = ctx->temp_reg;
1837 alu.src[2].chan = i;
1838
1839 if (i == 3)
1840 alu.last = 1;
1841 r = r600_bytecode_add_alu(ctx->bc, &alu);
1842 if (r)
1843 return r;
1844 }
1845 return 0;
1846 }
1847
1848 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1849 {
1850 struct r600_bytecode_alu alu;
1851 int i, r;
1852
1853 for (i = 0; i < 4; i++) {
1854 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1855 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1857 alu.dst.chan = i;
1858 } else {
1859 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1860 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1861 alu.src[0].sel = ctx->temp_reg;
1862 alu.src[0].chan = i;
1863 }
1864 if (i == 3) {
1865 alu.last = 1;
1866 }
1867 r = r600_bytecode_add_alu(ctx->bc, &alu);
1868 if (r)
1869 return r;
1870 }
1871 return 0;
1872 }
1873
1874 static int tgsi_op3(struct r600_shader_ctx *ctx)
1875 {
1876 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1877 struct r600_bytecode_alu alu;
1878 int i, j, r;
1879 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1880
1881 for (i = 0; i < lasti + 1; i++) {
1882 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1883 continue;
1884
1885 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1886 alu.inst = ctx->inst_info->r600_opcode;
1887 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1888 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1889 }
1890
1891 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1892 alu.dst.chan = i;
1893 alu.dst.write = 1;
1894 alu.is_op3 = 1;
1895 if (i == lasti) {
1896 alu.last = 1;
1897 }
1898 r = r600_bytecode_add_alu(ctx->bc, &alu);
1899 if (r)
1900 return r;
1901 }
1902 return 0;
1903 }
1904
1905 static int tgsi_dp(struct r600_shader_ctx *ctx)
1906 {
1907 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1908 struct r600_bytecode_alu alu;
1909 int i, j, r;
1910
1911 for (i = 0; i < 4; i++) {
1912 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1913 alu.inst = ctx->inst_info->r600_opcode;
1914 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1915 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1916 }
1917
1918 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1919 alu.dst.chan = i;
1920 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1921 /* handle some special cases */
1922 switch (ctx->inst_info->tgsi_opcode) {
1923 case TGSI_OPCODE_DP2:
1924 if (i > 1) {
1925 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1926 alu.src[0].chan = alu.src[1].chan = 0;
1927 }
1928 break;
1929 case TGSI_OPCODE_DP3:
1930 if (i > 2) {
1931 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1932 alu.src[0].chan = alu.src[1].chan = 0;
1933 }
1934 break;
1935 case TGSI_OPCODE_DPH:
1936 if (i == 3) {
1937 alu.src[0].sel = V_SQ_ALU_SRC_1;
1938 alu.src[0].chan = 0;
1939 alu.src[0].neg = 0;
1940 }
1941 break;
1942 default:
1943 break;
1944 }
1945 if (i == 3) {
1946 alu.last = 1;
1947 }
1948 r = r600_bytecode_add_alu(ctx->bc, &alu);
1949 if (r)
1950 return r;
1951 }
1952 return 0;
1953 }
1954
1955 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1956 unsigned index)
1957 {
1958 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1959 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1960 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1961 ctx->src[index].neg || ctx->src[index].abs;
1962 }
1963
1964 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1965 unsigned index)
1966 {
1967 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1968 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1969 }
1970
1971 static int tgsi_tex(struct r600_shader_ctx *ctx)
1972 {
1973 static float one_point_five = 1.5f;
1974 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1975 struct r600_bytecode_tex tex;
1976 struct r600_bytecode_alu alu;
1977 unsigned src_gpr;
1978 int r, i, j;
1979 int opcode;
1980 /* Texture fetch instructions can only use gprs as source.
1981 * Also they cannot negate the source or take the absolute value */
1982 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1983 boolean src_loaded = FALSE;
1984 unsigned sampler_src_reg = 1;
1985 u8 offset_x = 0, offset_y = 0, offset_z = 0;
1986
1987 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1988
1989 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1990 /* get offset values */
1991 if (inst->Texture.NumOffsets) {
1992 assert(inst->Texture.NumOffsets == 1);
1993
1994 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
1995 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
1996 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
1997 }
1998 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1999 /* TGSI moves the sampler to src reg 3 for TXD */
2000 sampler_src_reg = 3;
2001
2002 for (i = 1; i < 3; i++) {
2003 /* set gradients h/v */
2004 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2005 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
2006 SQ_TEX_INST_SET_GRADIENTS_V;
2007 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2008 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2009
2010 if (tgsi_tex_src_requires_loading(ctx, i)) {
2011 tex.src_gpr = r600_get_temp(ctx);
2012 tex.src_sel_x = 0;
2013 tex.src_sel_y = 1;
2014 tex.src_sel_z = 2;
2015 tex.src_sel_w = 3;
2016
2017 for (j = 0; j < 4; j++) {
2018 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2020 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
2021 alu.dst.sel = tex.src_gpr;
2022 alu.dst.chan = j;
2023 if (j == 3)
2024 alu.last = 1;
2025 alu.dst.write = 1;
2026 r = r600_bytecode_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029 }
2030
2031 } else {
2032 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
2033 tex.src_sel_x = ctx->src[i].swizzle[0];
2034 tex.src_sel_y = ctx->src[i].swizzle[1];
2035 tex.src_sel_z = ctx->src[i].swizzle[2];
2036 tex.src_sel_w = ctx->src[i].swizzle[3];
2037 tex.src_rel = ctx->src[i].rel;
2038 }
2039 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
2040 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
2041 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2042 tex.coord_type_x = 1;
2043 tex.coord_type_y = 1;
2044 tex.coord_type_z = 1;
2045 tex.coord_type_w = 1;
2046 }
2047 r = r600_bytecode_add_tex(ctx->bc, &tex);
2048 if (r)
2049 return r;
2050 }
2051 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
2052 int out_chan;
2053 /* Add perspective divide */
2054 if (ctx->bc->chip_class == CAYMAN) {
2055 out_chan = 2;
2056 for (i = 0; i < 3; i++) {
2057 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2059 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2060
2061 alu.dst.sel = ctx->temp_reg;
2062 alu.dst.chan = i;
2063 if (i == 2)
2064 alu.last = 1;
2065 if (out_chan == i)
2066 alu.dst.write = 1;
2067 r = r600_bytecode_add_alu(ctx->bc, &alu);
2068 if (r)
2069 return r;
2070 }
2071
2072 } else {
2073 out_chan = 3;
2074 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2075 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2076 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2077
2078 alu.dst.sel = ctx->temp_reg;
2079 alu.dst.chan = out_chan;
2080 alu.last = 1;
2081 alu.dst.write = 1;
2082 r = r600_bytecode_add_alu(ctx->bc, &alu);
2083 if (r)
2084 return r;
2085 }
2086
2087 for (i = 0; i < 3; i++) {
2088 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2090 alu.src[0].sel = ctx->temp_reg;
2091 alu.src[0].chan = out_chan;
2092 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2093 alu.dst.sel = ctx->temp_reg;
2094 alu.dst.chan = i;
2095 alu.dst.write = 1;
2096 r = r600_bytecode_add_alu(ctx->bc, &alu);
2097 if (r)
2098 return r;
2099 }
2100 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2102 alu.src[0].sel = V_SQ_ALU_SRC_1;
2103 alu.src[0].chan = 0;
2104 alu.dst.sel = ctx->temp_reg;
2105 alu.dst.chan = 3;
2106 alu.last = 1;
2107 alu.dst.write = 1;
2108 r = r600_bytecode_add_alu(ctx->bc, &alu);
2109 if (r)
2110 return r;
2111 src_loaded = TRUE;
2112 src_gpr = ctx->temp_reg;
2113 }
2114
2115 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2116 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2117 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2118
2119 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2120 for (i = 0; i < 4; i++) {
2121 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2123 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2124 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2125 alu.dst.sel = ctx->temp_reg;
2126 alu.dst.chan = i;
2127 if (i == 3)
2128 alu.last = 1;
2129 alu.dst.write = 1;
2130 r = r600_bytecode_add_alu(ctx->bc, &alu);
2131 if (r)
2132 return r;
2133 }
2134
2135 /* tmp1.z = RCP_e(|tmp1.z|) */
2136 if (ctx->bc->chip_class == CAYMAN) {
2137 for (i = 0; i < 3; i++) {
2138 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2139 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2140 alu.src[0].sel = ctx->temp_reg;
2141 alu.src[0].chan = 2;
2142 alu.src[0].abs = 1;
2143 alu.dst.sel = ctx->temp_reg;
2144 alu.dst.chan = i;
2145 if (i == 2)
2146 alu.dst.write = 1;
2147 if (i == 2)
2148 alu.last = 1;
2149 r = r600_bytecode_add_alu(ctx->bc, &alu);
2150 if (r)
2151 return r;
2152 }
2153 } else {
2154 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2156 alu.src[0].sel = ctx->temp_reg;
2157 alu.src[0].chan = 2;
2158 alu.src[0].abs = 1;
2159 alu.dst.sel = ctx->temp_reg;
2160 alu.dst.chan = 2;
2161 alu.dst.write = 1;
2162 alu.last = 1;
2163 r = r600_bytecode_add_alu(ctx->bc, &alu);
2164 if (r)
2165 return r;
2166 }
2167
2168 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2169 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2170 * muladd has no writemask, have to use another temp
2171 */
2172 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2173 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2174 alu.is_op3 = 1;
2175
2176 alu.src[0].sel = ctx->temp_reg;
2177 alu.src[0].chan = 0;
2178 alu.src[1].sel = ctx->temp_reg;
2179 alu.src[1].chan = 2;
2180
2181 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2182 alu.src[2].chan = 0;
2183 alu.src[2].value = *(uint32_t *)&one_point_five;
2184
2185 alu.dst.sel = ctx->temp_reg;
2186 alu.dst.chan = 0;
2187 alu.dst.write = 1;
2188
2189 r = r600_bytecode_add_alu(ctx->bc, &alu);
2190 if (r)
2191 return r;
2192
2193 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2194 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2195 alu.is_op3 = 1;
2196
2197 alu.src[0].sel = ctx->temp_reg;
2198 alu.src[0].chan = 1;
2199 alu.src[1].sel = ctx->temp_reg;
2200 alu.src[1].chan = 2;
2201
2202 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2203 alu.src[2].chan = 0;
2204 alu.src[2].value = *(uint32_t *)&one_point_five;
2205
2206 alu.dst.sel = ctx->temp_reg;
2207 alu.dst.chan = 1;
2208 alu.dst.write = 1;
2209
2210 alu.last = 1;
2211 r = r600_bytecode_add_alu(ctx->bc, &alu);
2212 if (r)
2213 return r;
2214
2215 src_loaded = TRUE;
2216 src_gpr = ctx->temp_reg;
2217 }
2218
2219 if (src_requires_loading && !src_loaded) {
2220 for (i = 0; i < 4; i++) {
2221 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2223 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2224 alu.dst.sel = ctx->temp_reg;
2225 alu.dst.chan = i;
2226 if (i == 3)
2227 alu.last = 1;
2228 alu.dst.write = 1;
2229 r = r600_bytecode_add_alu(ctx->bc, &alu);
2230 if (r)
2231 return r;
2232 }
2233 src_loaded = TRUE;
2234 src_gpr = ctx->temp_reg;
2235 }
2236
2237 opcode = ctx->inst_info->r600_opcode;
2238 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2239 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2240 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2241 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2242 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2243 switch (opcode) {
2244 case SQ_TEX_INST_SAMPLE:
2245 opcode = SQ_TEX_INST_SAMPLE_C;
2246 break;
2247 case SQ_TEX_INST_SAMPLE_L:
2248 opcode = SQ_TEX_INST_SAMPLE_C_L;
2249 break;
2250 case SQ_TEX_INST_SAMPLE_LB:
2251 opcode = SQ_TEX_INST_SAMPLE_C_LB;
2252 break;
2253 case SQ_TEX_INST_SAMPLE_G:
2254 opcode = SQ_TEX_INST_SAMPLE_C_G;
2255 break;
2256 }
2257 }
2258
2259 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2260 tex.inst = opcode;
2261
2262 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2263 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2264 tex.src_gpr = src_gpr;
2265 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2266 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2267 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2268 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2269 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2270 if (src_loaded) {
2271 tex.src_sel_x = 0;
2272 tex.src_sel_y = 1;
2273 tex.src_sel_z = 2;
2274 tex.src_sel_w = 3;
2275 } else {
2276 tex.src_sel_x = ctx->src[0].swizzle[0];
2277 tex.src_sel_y = ctx->src[0].swizzle[1];
2278 tex.src_sel_z = ctx->src[0].swizzle[2];
2279 tex.src_sel_w = ctx->src[0].swizzle[3];
2280 tex.src_rel = ctx->src[0].rel;
2281 }
2282
2283 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2284 tex.src_sel_x = 1;
2285 tex.src_sel_y = 0;
2286 tex.src_sel_z = 3;
2287 tex.src_sel_w = 1;
2288 }
2289
2290 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
2291 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
2292 tex.coord_type_x = 1;
2293 tex.coord_type_y = 1;
2294 }
2295 tex.coord_type_z = 1;
2296 tex.coord_type_w = 1;
2297
2298 tex.offset_x = offset_x;
2299 tex.offset_y = offset_y;
2300 tex.offset_z = offset_z;
2301
2302 /* Put the depth for comparison in W.
2303 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2304 * Some instructions expect the depth in Z. */
2305 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2306 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2307 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2308 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2309 opcode != SQ_TEX_INST_SAMPLE_C_L &&
2310 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2311 tex.src_sel_w = tex.src_sel_z;
2312 }
2313
2314 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2315 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2316 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2317 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2318 /* the array index is read from Y */
2319 tex.coord_type_y = 0;
2320 } else {
2321 /* the array index is read from Z */
2322 tex.coord_type_z = 0;
2323 tex.src_sel_z = tex.src_sel_y;
2324 }
2325 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2326 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2327 /* the array index is read from Z */
2328 tex.coord_type_z = 0;
2329
2330 r = r600_bytecode_add_tex(ctx->bc, &tex);
2331 if (r)
2332 return r;
2333
2334 /* add shadow ambient support - gallium doesn't do it yet */
2335 return 0;
2336 }
2337
2338 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2339 {
2340 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2341 struct r600_bytecode_alu alu;
2342 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2343 unsigned i;
2344 int r;
2345
2346 /* optimize if it's just an equal balance */
2347 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2348 for (i = 0; i < lasti + 1; i++) {
2349 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2350 continue;
2351
2352 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2354 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2355 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2356 alu.omod = 3;
2357 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2358 alu.dst.chan = i;
2359 if (i == lasti) {
2360 alu.last = 1;
2361 }
2362 r = r600_bytecode_add_alu(ctx->bc, &alu);
2363 if (r)
2364 return r;
2365 }
2366 return 0;
2367 }
2368
2369 /* 1 - src0 */
2370 for (i = 0; i < lasti + 1; i++) {
2371 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2372 continue;
2373
2374 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2375 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2376 alu.src[0].sel = V_SQ_ALU_SRC_1;
2377 alu.src[0].chan = 0;
2378 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2379 r600_bytecode_src_toggle_neg(&alu.src[1]);
2380 alu.dst.sel = ctx->temp_reg;
2381 alu.dst.chan = i;
2382 if (i == lasti) {
2383 alu.last = 1;
2384 }
2385 alu.dst.write = 1;
2386 r = r600_bytecode_add_alu(ctx->bc, &alu);
2387 if (r)
2388 return r;
2389 }
2390
2391 /* (1 - src0) * src2 */
2392 for (i = 0; i < lasti + 1; i++) {
2393 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2394 continue;
2395
2396 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2397 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2398 alu.src[0].sel = ctx->temp_reg;
2399 alu.src[0].chan = i;
2400 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2401 alu.dst.sel = ctx->temp_reg;
2402 alu.dst.chan = i;
2403 if (i == lasti) {
2404 alu.last = 1;
2405 }
2406 alu.dst.write = 1;
2407 r = r600_bytecode_add_alu(ctx->bc, &alu);
2408 if (r)
2409 return r;
2410 }
2411
2412 /* src0 * src1 + (1 - src0) * src2 */
2413 for (i = 0; i < lasti + 1; i++) {
2414 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2415 continue;
2416
2417 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2419 alu.is_op3 = 1;
2420 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2421 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2422 alu.src[2].sel = ctx->temp_reg;
2423 alu.src[2].chan = i;
2424
2425 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2426 alu.dst.chan = i;
2427 if (i == lasti) {
2428 alu.last = 1;
2429 }
2430 r = r600_bytecode_add_alu(ctx->bc, &alu);
2431 if (r)
2432 return r;
2433 }
2434 return 0;
2435 }
2436
2437 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2438 {
2439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2440 struct r600_bytecode_alu alu;
2441 int i, r;
2442 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2443
2444 for (i = 0; i < lasti + 1; i++) {
2445 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2446 continue;
2447
2448 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2449 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2450 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2451 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2452 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2453 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2454 alu.dst.chan = i;
2455 alu.dst.write = 1;
2456 alu.is_op3 = 1;
2457 if (i == lasti)
2458 alu.last = 1;
2459 r = r600_bytecode_add_alu(ctx->bc, &alu);
2460 if (r)
2461 return r;
2462 }
2463 return 0;
2464 }
2465
2466 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2467 {
2468 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2469 static const unsigned int src0_swizzle[] = {2, 0, 1};
2470 static const unsigned int src1_swizzle[] = {1, 2, 0};
2471 struct r600_bytecode_alu alu;
2472 uint32_t use_temp = 0;
2473 int i, r;
2474
2475 if (inst->Dst[0].Register.WriteMask != 0xf)
2476 use_temp = 1;
2477
2478 for (i = 0; i < 4; i++) {
2479 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2480 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2481 if (i < 3) {
2482 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2483 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2484 } else {
2485 alu.src[0].sel = V_SQ_ALU_SRC_0;
2486 alu.src[0].chan = i;
2487 alu.src[1].sel = V_SQ_ALU_SRC_0;
2488 alu.src[1].chan = i;
2489 }
2490
2491 alu.dst.sel = ctx->temp_reg;
2492 alu.dst.chan = i;
2493 alu.dst.write = 1;
2494
2495 if (i == 3)
2496 alu.last = 1;
2497 r = r600_bytecode_add_alu(ctx->bc, &alu);
2498 if (r)
2499 return r;
2500 }
2501
2502 for (i = 0; i < 4; i++) {
2503 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2504 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2505
2506 if (i < 3) {
2507 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2508 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2509 } else {
2510 alu.src[0].sel = V_SQ_ALU_SRC_0;
2511 alu.src[0].chan = i;
2512 alu.src[1].sel = V_SQ_ALU_SRC_0;
2513 alu.src[1].chan = i;
2514 }
2515
2516 alu.src[2].sel = ctx->temp_reg;
2517 alu.src[2].neg = 1;
2518 alu.src[2].chan = i;
2519
2520 if (use_temp)
2521 alu.dst.sel = ctx->temp_reg;
2522 else
2523 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2524 alu.dst.chan = i;
2525 alu.dst.write = 1;
2526 alu.is_op3 = 1;
2527 if (i == 3)
2528 alu.last = 1;
2529 r = r600_bytecode_add_alu(ctx->bc, &alu);
2530 if (r)
2531 return r;
2532 }
2533 if (use_temp)
2534 return tgsi_helper_copy(ctx, inst);
2535 return 0;
2536 }
2537
2538 static int tgsi_exp(struct r600_shader_ctx *ctx)
2539 {
2540 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2541 struct r600_bytecode_alu alu;
2542 int r;
2543 int i;
2544
2545 /* result.x = 2^floor(src); */
2546 if (inst->Dst[0].Register.WriteMask & 1) {
2547 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2548
2549 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2550 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2551
2552 alu.dst.sel = ctx->temp_reg;
2553 alu.dst.chan = 0;
2554 alu.dst.write = 1;
2555 alu.last = 1;
2556 r = r600_bytecode_add_alu(ctx->bc, &alu);
2557 if (r)
2558 return r;
2559
2560 if (ctx->bc->chip_class == CAYMAN) {
2561 for (i = 0; i < 3; i++) {
2562 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2563 alu.src[0].sel = ctx->temp_reg;
2564 alu.src[0].chan = 0;
2565
2566 alu.dst.sel = ctx->temp_reg;
2567 alu.dst.chan = i;
2568 if (i == 0)
2569 alu.dst.write = 1;
2570 if (i == 2)
2571 alu.last = 1;
2572 r = r600_bytecode_add_alu(ctx->bc, &alu);
2573 if (r)
2574 return r;
2575 }
2576 } else {
2577 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2578 alu.src[0].sel = ctx->temp_reg;
2579 alu.src[0].chan = 0;
2580
2581 alu.dst.sel = ctx->temp_reg;
2582 alu.dst.chan = 0;
2583 alu.dst.write = 1;
2584 alu.last = 1;
2585 r = r600_bytecode_add_alu(ctx->bc, &alu);
2586 if (r)
2587 return r;
2588 }
2589 }
2590
2591 /* result.y = tmp - floor(tmp); */
2592 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2593 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2594
2595 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2596 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2597
2598 alu.dst.sel = ctx->temp_reg;
2599 #if 0
2600 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2601 if (r)
2602 return r;
2603 #endif
2604 alu.dst.write = 1;
2605 alu.dst.chan = 1;
2606
2607 alu.last = 1;
2608
2609 r = r600_bytecode_add_alu(ctx->bc, &alu);
2610 if (r)
2611 return r;
2612 }
2613
2614 /* result.z = RoughApprox2ToX(tmp);*/
2615 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2616 if (ctx->bc->chip_class == CAYMAN) {
2617 for (i = 0; i < 3; i++) {
2618 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2620 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2621
2622 alu.dst.sel = ctx->temp_reg;
2623 alu.dst.chan = i;
2624 if (i == 2) {
2625 alu.dst.write = 1;
2626 alu.last = 1;
2627 }
2628
2629 r = r600_bytecode_add_alu(ctx->bc, &alu);
2630 if (r)
2631 return r;
2632 }
2633 } else {
2634 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2636 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2637
2638 alu.dst.sel = ctx->temp_reg;
2639 alu.dst.write = 1;
2640 alu.dst.chan = 2;
2641
2642 alu.last = 1;
2643
2644 r = r600_bytecode_add_alu(ctx->bc, &alu);
2645 if (r)
2646 return r;
2647 }
2648 }
2649
2650 /* result.w = 1.0;*/
2651 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2652 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2653
2654 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2655 alu.src[0].sel = V_SQ_ALU_SRC_1;
2656 alu.src[0].chan = 0;
2657
2658 alu.dst.sel = ctx->temp_reg;
2659 alu.dst.chan = 3;
2660 alu.dst.write = 1;
2661 alu.last = 1;
2662 r = r600_bytecode_add_alu(ctx->bc, &alu);
2663 if (r)
2664 return r;
2665 }
2666 return tgsi_helper_copy(ctx, inst);
2667 }
2668
2669 static int tgsi_log(struct r600_shader_ctx *ctx)
2670 {
2671 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2672 struct r600_bytecode_alu alu;
2673 int r;
2674 int i;
2675
2676 /* result.x = floor(log2(|src|)); */
2677 if (inst->Dst[0].Register.WriteMask & 1) {
2678 if (ctx->bc->chip_class == CAYMAN) {
2679 for (i = 0; i < 3; i++) {
2680 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2681
2682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2683 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2684 r600_bytecode_src_set_abs(&alu.src[0]);
2685
2686 alu.dst.sel = ctx->temp_reg;
2687 alu.dst.chan = i;
2688 if (i == 0)
2689 alu.dst.write = 1;
2690 if (i == 2)
2691 alu.last = 1;
2692 r = r600_bytecode_add_alu(ctx->bc, &alu);
2693 if (r)
2694 return r;
2695 }
2696
2697 } else {
2698 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2699
2700 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2701 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2702 r600_bytecode_src_set_abs(&alu.src[0]);
2703
2704 alu.dst.sel = ctx->temp_reg;
2705 alu.dst.chan = 0;
2706 alu.dst.write = 1;
2707 alu.last = 1;
2708 r = r600_bytecode_add_alu(ctx->bc, &alu);
2709 if (r)
2710 return r;
2711 }
2712
2713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2714 alu.src[0].sel = ctx->temp_reg;
2715 alu.src[0].chan = 0;
2716
2717 alu.dst.sel = ctx->temp_reg;
2718 alu.dst.chan = 0;
2719 alu.dst.write = 1;
2720 alu.last = 1;
2721
2722 r = r600_bytecode_add_alu(ctx->bc, &alu);
2723 if (r)
2724 return r;
2725 }
2726
2727 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2728 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2729
2730 if (ctx->bc->chip_class == CAYMAN) {
2731 for (i = 0; i < 3; i++) {
2732 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2733
2734 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2735 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2736 r600_bytecode_src_set_abs(&alu.src[0]);
2737
2738 alu.dst.sel = ctx->temp_reg;
2739 alu.dst.chan = i;
2740 if (i == 1)
2741 alu.dst.write = 1;
2742 if (i == 2)
2743 alu.last = 1;
2744
2745 r = r600_bytecode_add_alu(ctx->bc, &alu);
2746 if (r)
2747 return r;
2748 }
2749 } else {
2750 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2751
2752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2753 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2754 r600_bytecode_src_set_abs(&alu.src[0]);
2755
2756 alu.dst.sel = ctx->temp_reg;
2757 alu.dst.chan = 1;
2758 alu.dst.write = 1;
2759 alu.last = 1;
2760
2761 r = r600_bytecode_add_alu(ctx->bc, &alu);
2762 if (r)
2763 return r;
2764 }
2765
2766 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2767
2768 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2769 alu.src[0].sel = ctx->temp_reg;
2770 alu.src[0].chan = 1;
2771
2772 alu.dst.sel = ctx->temp_reg;
2773 alu.dst.chan = 1;
2774 alu.dst.write = 1;
2775 alu.last = 1;
2776
2777 r = r600_bytecode_add_alu(ctx->bc, &alu);
2778 if (r)
2779 return r;
2780
2781 if (ctx->bc->chip_class == CAYMAN) {
2782 for (i = 0; i < 3; i++) {
2783 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2784 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2785 alu.src[0].sel = ctx->temp_reg;
2786 alu.src[0].chan = 1;
2787
2788 alu.dst.sel = ctx->temp_reg;
2789 alu.dst.chan = i;
2790 if (i == 1)
2791 alu.dst.write = 1;
2792 if (i == 2)
2793 alu.last = 1;
2794
2795 r = r600_bytecode_add_alu(ctx->bc, &alu);
2796 if (r)
2797 return r;
2798 }
2799 } else {
2800 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2801 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2802 alu.src[0].sel = ctx->temp_reg;
2803 alu.src[0].chan = 1;
2804
2805 alu.dst.sel = ctx->temp_reg;
2806 alu.dst.chan = 1;
2807 alu.dst.write = 1;
2808 alu.last = 1;
2809
2810 r = r600_bytecode_add_alu(ctx->bc, &alu);
2811 if (r)
2812 return r;
2813 }
2814
2815 if (ctx->bc->chip_class == CAYMAN) {
2816 for (i = 0; i < 3; i++) {
2817 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2818 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2819 alu.src[0].sel = ctx->temp_reg;
2820 alu.src[0].chan = 1;
2821
2822 alu.dst.sel = ctx->temp_reg;
2823 alu.dst.chan = i;
2824 if (i == 1)
2825 alu.dst.write = 1;
2826 if (i == 2)
2827 alu.last = 1;
2828
2829 r = r600_bytecode_add_alu(ctx->bc, &alu);
2830 if (r)
2831 return r;
2832 }
2833 } else {
2834 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2835 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2836 alu.src[0].sel = ctx->temp_reg;
2837 alu.src[0].chan = 1;
2838
2839 alu.dst.sel = ctx->temp_reg;
2840 alu.dst.chan = 1;
2841 alu.dst.write = 1;
2842 alu.last = 1;
2843
2844 r = r600_bytecode_add_alu(ctx->bc, &alu);
2845 if (r)
2846 return r;
2847 }
2848
2849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2850
2851 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2852
2853 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2854 r600_bytecode_src_set_abs(&alu.src[0]);
2855
2856 alu.src[1].sel = ctx->temp_reg;
2857 alu.src[1].chan = 1;
2858
2859 alu.dst.sel = ctx->temp_reg;
2860 alu.dst.chan = 1;
2861 alu.dst.write = 1;
2862 alu.last = 1;
2863
2864 r = r600_bytecode_add_alu(ctx->bc, &alu);
2865 if (r)
2866 return r;
2867 }
2868
2869 /* result.z = log2(|src|);*/
2870 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2871 if (ctx->bc->chip_class == CAYMAN) {
2872 for (i = 0; i < 3; i++) {
2873 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2874
2875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2876 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2877 r600_bytecode_src_set_abs(&alu.src[0]);
2878
2879 alu.dst.sel = ctx->temp_reg;
2880 if (i == 2)
2881 alu.dst.write = 1;
2882 alu.dst.chan = i;
2883 if (i == 2)
2884 alu.last = 1;
2885
2886 r = r600_bytecode_add_alu(ctx->bc, &alu);
2887 if (r)
2888 return r;
2889 }
2890 } else {
2891 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2892
2893 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2894 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2895 r600_bytecode_src_set_abs(&alu.src[0]);
2896
2897 alu.dst.sel = ctx->temp_reg;
2898 alu.dst.write = 1;
2899 alu.dst.chan = 2;
2900 alu.last = 1;
2901
2902 r = r600_bytecode_add_alu(ctx->bc, &alu);
2903 if (r)
2904 return r;
2905 }
2906 }
2907
2908 /* result.w = 1.0; */
2909 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2910 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2911
2912 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2913 alu.src[0].sel = V_SQ_ALU_SRC_1;
2914 alu.src[0].chan = 0;
2915
2916 alu.dst.sel = ctx->temp_reg;
2917 alu.dst.chan = 3;
2918 alu.dst.write = 1;
2919 alu.last = 1;
2920
2921 r = r600_bytecode_add_alu(ctx->bc, &alu);
2922 if (r)
2923 return r;
2924 }
2925
2926 return tgsi_helper_copy(ctx, inst);
2927 }
2928
2929 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2930 {
2931 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2932 struct r600_bytecode_alu alu;
2933 int r;
2934
2935 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2936
2937 switch (inst->Instruction.Opcode) {
2938 case TGSI_OPCODE_ARL:
2939 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2940 break;
2941 case TGSI_OPCODE_ARR:
2942 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2943 break;
2944 case TGSI_OPCODE_UARL:
2945 break;
2946 default:
2947 assert(0);
2948 return -1;
2949 }
2950
2951 if (alu.inst) {
2952 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2953 alu.last = 1;
2954 alu.dst.sel = ctx->ar_reg;
2955 alu.dst.write = 1;
2956 r = r600_bytecode_add_alu(ctx->bc, &alu);
2957 if (r)
2958 return r;
2959 }
2960
2961 /* TODO: Note that the MOVA can be avoided if we never use AR for
2962 * indexing non-CB registers in the current ALU clause. Similarly, we
2963 * need to load AR from ar_reg again if we started a new clause
2964 * between ARL and AR usage. The easy way to do that is to remove
2965 * the MOVA here, and load it for the first AR access after ar_reg
2966 * has been modified in each clause. */
2967 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2968 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2969 if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
2970 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2971 else {
2972 alu.src[0].sel = ctx->ar_reg;
2973 alu.src[0].chan = 0;
2974 }
2975 alu.last = 1;
2976 r = r600_bytecode_add_alu(ctx->bc, &alu);
2977 if (r)
2978 return r;
2979 return 0;
2980 }
2981 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2982 {
2983 /* TODO from r600c, ar values don't persist between clauses */
2984 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2985 struct r600_bytecode_alu alu;
2986 int r;
2987
2988 switch (inst->Instruction.Opcode) {
2989 case TGSI_OPCODE_ARL:
2990 memset(&alu, 0, sizeof(alu));
2991 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2992 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2993 alu.dst.sel = ctx->ar_reg;
2994 alu.dst.write = 1;
2995 alu.last = 1;
2996
2997 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2998 return r;
2999
3000 memset(&alu, 0, sizeof(alu));
3001 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3002 alu.src[0].sel = ctx->ar_reg;
3003 alu.dst.sel = ctx->ar_reg;
3004 alu.dst.write = 1;
3005 alu.last = 1;
3006
3007 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3008 return r;
3009 break;
3010 case TGSI_OPCODE_ARR:
3011 memset(&alu, 0, sizeof(alu));
3012 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3013 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3014 alu.dst.sel = ctx->ar_reg;
3015 alu.dst.write = 1;
3016 alu.last = 1;
3017
3018 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3019 return r;
3020 break;
3021 case TGSI_OPCODE_UARL:
3022 break;
3023 default:
3024 assert(0);
3025 return -1;
3026 }
3027
3028 memset(&alu, 0, sizeof(alu));
3029 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
3030 if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
3031 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3032 else
3033 alu.src[0].sel = ctx->ar_reg;
3034 alu.last = 1;
3035
3036 r = r600_bytecode_add_alu(ctx->bc, &alu);
3037 if (r)
3038 return r;
3039 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
3040 return 0;
3041 }
3042
3043 static int tgsi_opdst(struct r600_shader_ctx *ctx)
3044 {
3045 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3046 struct r600_bytecode_alu alu;
3047 int i, r = 0;
3048
3049 for (i = 0; i < 4; i++) {
3050 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3051
3052 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3053 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3054
3055 if (i == 0 || i == 3) {
3056 alu.src[0].sel = V_SQ_ALU_SRC_1;
3057 } else {
3058 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3059 }
3060
3061 if (i == 0 || i == 2) {
3062 alu.src[1].sel = V_SQ_ALU_SRC_1;
3063 } else {
3064 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3065 }
3066 if (i == 3)
3067 alu.last = 1;
3068 r = r600_bytecode_add_alu(ctx->bc, &alu);
3069 if (r)
3070 return r;
3071 }
3072 return 0;
3073 }
3074
3075 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
3076 {
3077 struct r600_bytecode_alu alu;
3078 int r;
3079
3080 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3081 alu.inst = opcode;
3082 alu.predicate = 1;
3083
3084 alu.dst.sel = ctx->temp_reg;
3085 alu.dst.write = 1;
3086 alu.dst.chan = 0;
3087
3088 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3089 alu.src[1].sel = V_SQ_ALU_SRC_0;
3090 alu.src[1].chan = 0;
3091
3092 alu.last = 1;
3093
3094 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3095 if (r)
3096 return r;
3097 return 0;
3098 }
3099
3100 static int pops(struct r600_shader_ctx *ctx, int pops)
3101 {
3102 unsigned force_pop = ctx->bc->force_add_cf;
3103
3104 if (!force_pop) {
3105 int alu_pop = 3;
3106 if (ctx->bc->cf_last) {
3107 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
3108 alu_pop = 0;
3109 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
3110 alu_pop = 1;
3111 }
3112 alu_pop += pops;
3113 if (alu_pop == 1) {
3114 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
3115 ctx->bc->force_add_cf = 1;
3116 } else if (alu_pop == 2) {
3117 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
3118 ctx->bc->force_add_cf = 1;
3119 } else {
3120 force_pop = 1;
3121 }
3122 }
3123
3124 if (force_pop) {
3125 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3126 ctx->bc->cf_last->pop_count = pops;
3127 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3128 }
3129
3130 return 0;
3131 }
3132
3133 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3134 {
3135 switch(reason) {
3136 case FC_PUSH_VPM:
3137 ctx->bc->callstack[ctx->bc->call_sp].current--;
3138 break;
3139 case FC_PUSH_WQM:
3140 case FC_LOOP:
3141 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3142 break;
3143 case FC_REP:
3144 /* TOODO : for 16 vp asic should -= 2; */
3145 ctx->bc->callstack[ctx->bc->call_sp].current --;
3146 break;
3147 }
3148 }
3149
3150 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3151 {
3152 if (check_max_only) {
3153 int diff;
3154 switch (reason) {
3155 case FC_PUSH_VPM:
3156 diff = 1;
3157 break;
3158 case FC_PUSH_WQM:
3159 diff = 4;
3160 break;
3161 default:
3162 assert(0);
3163 diff = 0;
3164 }
3165 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3166 ctx->bc->callstack[ctx->bc->call_sp].max) {
3167 ctx->bc->callstack[ctx->bc->call_sp].max =
3168 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3169 }
3170 return;
3171 }
3172 switch (reason) {
3173 case FC_PUSH_VPM:
3174 ctx->bc->callstack[ctx->bc->call_sp].current++;
3175 break;
3176 case FC_PUSH_WQM:
3177 case FC_LOOP:
3178 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3179 break;
3180 case FC_REP:
3181 ctx->bc->callstack[ctx->bc->call_sp].current++;
3182 break;
3183 }
3184
3185 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3186 ctx->bc->callstack[ctx->bc->call_sp].max) {
3187 ctx->bc->callstack[ctx->bc->call_sp].max =
3188 ctx->bc->callstack[ctx->bc->call_sp].current;
3189 }
3190 }
3191
3192 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3193 {
3194 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3195
3196 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3197 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3198 sp->mid[sp->num_mid] = ctx->bc->cf_last;
3199 sp->num_mid++;
3200 }
3201
3202 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3203 {
3204 ctx->bc->fc_sp++;
3205 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3206 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3207 }
3208
3209 static void fc_poplevel(struct r600_shader_ctx *ctx)
3210 {
3211 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3212 if (sp->mid) {
3213 free(sp->mid);
3214 sp->mid = NULL;
3215 }
3216 sp->num_mid = 0;
3217 sp->start = NULL;
3218 sp->type = 0;
3219 ctx->bc->fc_sp--;
3220 }
3221
3222 #if 0
3223 static int emit_return(struct r600_shader_ctx *ctx)
3224 {
3225 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3226 return 0;
3227 }
3228
3229 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3230 {
3231
3232 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3233 ctx->bc->cf_last->pop_count = pops;
3234 /* TODO work out offset */
3235 return 0;
3236 }
3237
3238 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3239 {
3240 return 0;
3241 }
3242
3243 static void emit_testflag(struct r600_shader_ctx *ctx)
3244 {
3245
3246 }
3247
3248 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3249 {
3250 emit_testflag(ctx);
3251 emit_jump_to_offset(ctx, 1, 4);
3252 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3253 pops(ctx, ifidx + 1);
3254 emit_return(ctx);
3255 }
3256
3257 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3258 {
3259 emit_testflag(ctx);
3260
3261 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3262 ctx->bc->cf_last->pop_count = 1;
3263
3264 fc_set_mid(ctx, fc_sp);
3265
3266 pops(ctx, 1);
3267 }
3268 #endif
3269
3270 static int tgsi_if(struct r600_shader_ctx *ctx)
3271 {
3272 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
3273
3274 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3275
3276 fc_pushlevel(ctx, FC_IF);
3277
3278 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3279 return 0;
3280 }
3281
3282 static int tgsi_else(struct r600_shader_ctx *ctx)
3283 {
3284 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3285 ctx->bc->cf_last->pop_count = 1;
3286
3287 fc_set_mid(ctx, ctx->bc->fc_sp);
3288 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3289 return 0;
3290 }
3291
3292 static int tgsi_endif(struct r600_shader_ctx *ctx)
3293 {
3294 pops(ctx, 1);
3295 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3296 R600_ERR("if/endif unbalanced in shader\n");
3297 return -1;
3298 }
3299
3300 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3301 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3302 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3303 } else {
3304 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3305 }
3306 fc_poplevel(ctx);
3307
3308 callstack_decrease_current(ctx, FC_PUSH_VPM);
3309 return 0;
3310 }
3311
3312 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3313 {
3314 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3315
3316 fc_pushlevel(ctx, FC_LOOP);
3317
3318 /* check stack depth */
3319 callstack_check_depth(ctx, FC_LOOP, 0);
3320 return 0;
3321 }
3322
3323 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3324 {
3325 int i;
3326
3327 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3328
3329 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3330 R600_ERR("loop/endloop in shader code are not paired.\n");
3331 return -EINVAL;
3332 }
3333
3334 /* fixup loop pointers - from r600isa
3335 LOOP END points to CF after LOOP START,
3336 LOOP START point to CF after LOOP END
3337 BRK/CONT point to LOOP END CF
3338 */
3339 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3340
3341 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3342
3343 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3344 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3345 }
3346 /* TODO add LOOPRET support */
3347 fc_poplevel(ctx);
3348 callstack_decrease_current(ctx, FC_LOOP);
3349 return 0;
3350 }
3351
3352 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3353 {
3354 unsigned int fscp;
3355
3356 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3357 {
3358 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3359 break;
3360 }
3361
3362 if (fscp == 0) {
3363 R600_ERR("Break not inside loop/endloop pair\n");
3364 return -EINVAL;
3365 }
3366
3367 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3368 ctx->bc->cf_last->pop_count = 1;
3369
3370 fc_set_mid(ctx, fscp);
3371
3372 pops(ctx, 1);
3373 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3374 return 0;
3375 }
3376
3377 static int tgsi_umad(struct r600_shader_ctx *ctx)
3378 {
3379 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3380 struct r600_bytecode_alu alu;
3381 int i, j, r;
3382 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3383
3384 /* src0 * src1 */
3385 for (i = 0; i < lasti + 1; i++) {
3386 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3387 continue;
3388
3389 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3390
3391 alu.dst.chan = i;
3392 alu.dst.sel = ctx->temp_reg;
3393 alu.dst.write = 1;
3394
3395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3396 for (j = 0; j < 2; j++) {
3397 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3398 }
3399
3400 alu.last = 1;
3401 r = r600_bytecode_add_alu(ctx->bc, &alu);
3402 if (r)
3403 return r;
3404 }
3405
3406
3407 for (i = 0; i < lasti + 1; i++) {
3408 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3409 continue;
3410
3411 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3412 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3413
3414 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3415
3416 alu.src[0].sel = ctx->temp_reg;
3417 alu.src[0].chan = i;
3418
3419 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3420 if (i == lasti) {
3421 alu.last = 1;
3422 }
3423 r = r600_bytecode_add_alu(ctx->bc, &alu);
3424 if (r)
3425 return r;
3426 }
3427 return 0;
3428 }
3429
3430 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3431 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3432 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3433 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3434
3435 /* FIXME:
3436 * For state trackers other than OpenGL, we'll want to use
3437 * _RECIP_IEEE instead.
3438 */
3439 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3440
3441 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3442 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3443 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3444 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3445 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3446 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3447 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3448 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3449 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3450 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3451 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3452 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3453 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3454 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3455 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3456 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3457 /* gap */
3458 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3460 /* gap */
3461 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3462 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3463 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3464 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3466 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3467 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3468 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3469 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3470 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3471 /* gap */
3472 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3474 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3476 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3477 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3478 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3479 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3480 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3481 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3486 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3488 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3489 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3490 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3491 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3493 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3494 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3495 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3500 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3501 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3502 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3505 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3506 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3507 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3508 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3509 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3512 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3513 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3514 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3515 /* gap */
3516 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3519 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3520 /* gap */
3521 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3523 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3524 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3526 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3527 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3528 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3529 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530 /* gap */
3531 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3533 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3534 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3535 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3536 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3537 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3538 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3539 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3540 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3542 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3543 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3545 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3546 /* gap */
3547 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3548 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3549 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3550 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3551 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3552 /* gap */
3553 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3557 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3559 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3560 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3562 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3563 /* gap */
3564 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3565 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3566 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3567 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3568 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3569 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2},
3570 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3571 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3572 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2},
3573 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
3574 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3575 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3576 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3578 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3579 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3580 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
3582 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3583 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3584 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3585 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3586 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
3587 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3588 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3589 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3590 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3591 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3592 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3593 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3594 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3595 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3596 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3597 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3598 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3599 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3600 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3601 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3602 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3603 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
3604 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
3605 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606 };
3607
3608 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3609 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3610 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3611 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3612 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3613 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3614 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3615 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3616 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3617 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3618 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3619 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3620 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3621 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3622 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3623 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3624 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3625 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3626 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3627 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3628 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629 /* gap */
3630 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3631 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3632 /* gap */
3633 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3636 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3637 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3638 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3639 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3640 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3641 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3642 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3643 /* gap */
3644 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3646 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3648 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3649 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3650 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3651 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3652 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3657 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3658 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3659 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3660 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3661 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3662 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3663 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3664 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3665 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3666 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3667 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3668 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3670 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3671 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3673 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3674 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3675 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3677 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3678 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3679 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3680 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3681 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3682 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3683 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3684 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3685 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3686 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3687 /* gap */
3688 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3690 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3691 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3692 /* gap */
3693 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3694 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3695 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3696 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3697 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3698 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3699 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3700 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3701 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3702 /* gap */
3703 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3704 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3705 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3706 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3707 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3708 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3709 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3710 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3711 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3712 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3713 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3714 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3715 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3716 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3717 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3718 /* gap */
3719 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3720 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3721 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3722 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3723 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3724 /* gap */
3725 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3726 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3727 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3728 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3729 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3730 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3731 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3732 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3733 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3734 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3735 /* gap */
3736 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3737 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3738 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3739 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3740 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3741 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
3742 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3743 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3744 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3745 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3746 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3747 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3748 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3749 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3750 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3751 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3752 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3753 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
3754 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3755 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3756 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3757 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3758 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
3759 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3760 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3761 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3762 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3763 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3764 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3765 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3766 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3767 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3768 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3769 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3770 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3771 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3772 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3773 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3774 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3775 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
3776 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
3777 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3778 };
3779
3780 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3781 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3782 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3783 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3784 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3785 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3786 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3787 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3788 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3789 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3790 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3791 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3792 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3793 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3794 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3795 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3796 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3797 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3798 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3799 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3800 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3801 /* gap */
3802 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3803 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3804 /* gap */
3805 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3806 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3807 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3808 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3809 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3810 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3811 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3812 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3813 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3814 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3815 /* gap */
3816 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3817 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3818 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3819 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3820 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3821 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3822 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3823 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3824 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3825 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3826 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3827 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3828 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3829 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3830 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3831 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3832 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3833 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3834 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3835 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3836 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3837 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3838 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3839 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3840 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3841 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3842 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3843 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3844 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3845 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3846 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3847 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3848 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3849 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3850 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3851 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3852 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3853 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3854 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3855 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3856 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3857 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3858 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3859 /* gap */
3860 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3861 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3862 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3863 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3864 /* gap */
3865 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3866 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3867 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3868 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3869 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3870 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3871 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3872 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3873 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3874 /* gap */
3875 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3876 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3877 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3878 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3879 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3880 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3881 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3882 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3883 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3884 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3885 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3886 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3887 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3888 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3889 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3890 /* gap */
3891 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3892 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3893 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3894 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3895 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3896 /* gap */
3897 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3898 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3899 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3900 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3901 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3902 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3903 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3904 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3905 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3906 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3907 /* gap */
3908 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3909 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3910 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3911 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3912 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3913 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3914 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3915 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3916 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3917 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3918 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3919 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3920 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3921 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3922 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3923 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3924 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3925 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3926 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3927 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3928 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3929 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3930 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3931 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3932 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3933 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3934 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3935 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3936 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3937 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3938 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3939 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3940 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3941 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3942 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3943 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3944 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3945 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3946 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3947 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported},
3948 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
3949 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3950 };