r600g: make u2f trans only
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
112 int r;
113
114 /* Would like some magic "get_bool_option_once" routine.
115 */
116 if (dump_shaders == -1)
117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119 if (dump_shaders) {
120 fprintf(stderr, "--------------------------------------------------------------\n");
121 tgsi_dump(shader->tokens, 0);
122
123 if (shader->so.num_outputs) {
124 unsigned i;
125 fprintf(stderr, "STREAMOUT\n");
126 for (i = 0; i < shader->so.num_outputs; i++) {
127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128 shader->so.output[i].start_component;
129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130 shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131 mask & 1 ? "x" : "_",
132 (mask >> 1) & 1 ? "y" : "_",
133 (mask >> 2) & 1 ? "z" : "_",
134 (mask >> 3) & 1 ? "w" : "_");
135 }
136 }
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bytecode_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bytecode_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158 r600_bytecode_clear(&shader->shader.bc);
159
160 memset(&shader->shader,0,sizeof(struct r600_shader));
161 }
162
163 /*
164 * tgsi -> r600 shader
165 */
166 struct r600_shader_tgsi_instruction;
167
168 struct r600_shader_src {
169 unsigned sel;
170 unsigned swizzle[4];
171 unsigned neg;
172 unsigned abs;
173 unsigned rel;
174 uint32_t value[4];
175 };
176
177 struct r600_shader_ctx {
178 struct tgsi_shader_info info;
179 struct tgsi_parse_context parse;
180 const struct tgsi_token *tokens;
181 unsigned type;
182 unsigned file_offset[TGSI_FILE_COUNT];
183 unsigned temp_reg;
184 struct r600_shader_tgsi_instruction *inst_info;
185 struct r600_bytecode *bc;
186 struct r600_shader *shader;
187 struct r600_shader_src src[4];
188 u32 *literals;
189 u32 nliterals;
190 u32 max_driver_temp_used;
191 /* needed for evergreen interpolation */
192 boolean input_centroid;
193 boolean input_linear;
194 boolean input_perspective;
195 int num_interp_gpr;
196 };
197
198 struct r600_shader_tgsi_instruction {
199 unsigned tgsi_opcode;
200 unsigned is_op3;
201 unsigned r600_opcode;
202 int (*process)(struct r600_shader_ctx *ctx);
203 };
204
205 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
206 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
207
208 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209 {
210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211 int j;
212
213 if (i->Instruction.NumDstRegs > 1) {
214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215 return -EINVAL;
216 }
217 if (i->Instruction.Predicate) {
218 R600_ERR("predicate unsupported\n");
219 return -EINVAL;
220 }
221 #if 0
222 if (i->Instruction.Label) {
223 R600_ERR("label unsupported\n");
224 return -EINVAL;
225 }
226 #endif
227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228 if (i->Src[j].Register.Dimension) {
229 R600_ERR("unsupported src %d (dimension %d)\n", j,
230 i->Src[j].Register.Dimension);
231 return -EINVAL;
232 }
233 }
234 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
235 if (i->Dst[j].Register.Dimension) {
236 R600_ERR("unsupported dst (dimension)\n");
237 return -EINVAL;
238 }
239 }
240 return 0;
241 }
242
243 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
244 {
245 int i, r;
246 struct r600_bytecode_alu alu;
247 int gpr = 0, base_chan = 0;
248 int ij_index = 0;
249
250 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
251 ij_index = 0;
252 if (ctx->shader->input[input].centroid)
253 ij_index++;
254 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
255 ij_index = 0;
256 /* if we have perspective add one */
257 if (ctx->input_perspective) {
258 ij_index++;
259 /* if we have perspective centroid */
260 if (ctx->input_centroid)
261 ij_index++;
262 }
263 if (ctx->shader->input[input].centroid)
264 ij_index++;
265 }
266
267 /* work out gpr and base_chan from index */
268 gpr = ij_index / 2;
269 base_chan = (2 * (ij_index % 2)) + 1;
270
271 for (i = 0; i < 8; i++) {
272 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
273
274 if (i < 4)
275 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
276 else
277 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
278
279 if ((i > 1) && (i < 6)) {
280 alu.dst.sel = ctx->shader->input[input].gpr;
281 alu.dst.write = 1;
282 }
283
284 alu.dst.chan = i % 4;
285
286 alu.src[0].sel = gpr;
287 alu.src[0].chan = (base_chan - (i % 2));
288
289 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
290
291 alu.bank_swizzle_force = SQ_ALU_VEC_210;
292 if ((i % 4) == 3)
293 alu.last = 1;
294 r = r600_bytecode_add_alu(ctx->bc, &alu);
295 if (r)
296 return r;
297 }
298 return 0;
299 }
300
301 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
302 {
303 int i, r;
304 struct r600_bytecode_alu alu;
305
306 for (i = 0; i < 4; i++) {
307 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
308
309 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
310
311 alu.dst.sel = ctx->shader->input[input].gpr;
312 alu.dst.write = 1;
313
314 alu.dst.chan = i;
315
316 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
317 alu.src[0].chan = i;
318
319 if (i == 3)
320 alu.last = 1;
321 r = r600_bytecode_add_alu(ctx->bc, &alu);
322 if (r)
323 return r;
324 }
325 return 0;
326 }
327
328 /*
329 * Special export handling in shaders
330 *
331 * shader export ARRAY_BASE for EXPORT_POS:
332 * 60 is position
333 * 61 is misc vector
334 * 62, 63 are clip distance vectors
335 *
336 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
337 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
338 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
339 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
340 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
341 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
342 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
343 * exclusive from render target index)
344 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
345 *
346 *
347 * shader export ARRAY_BASE for EXPORT_PIXEL:
348 * 0-7 CB targets
349 * 61 computed Z vector
350 *
351 * The use of the values exported in the computed Z vector are controlled
352 * by DB_SHADER_CONTROL:
353 * Z_EXPORT_ENABLE - Z as a float in RED
354 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
355 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
356 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
357 * DB_SOURCE_FORMAT - export control restrictions
358 *
359 */
360
361
362 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
363 static int r600_spi_sid(struct r600_shader_io * io)
364 {
365 int index, name = io->name;
366
367 /* These params are handled differently, they don't need
368 * semantic indices, so we'll use 0 for them.
369 */
370 if (name == TGSI_SEMANTIC_POSITION ||
371 name == TGSI_SEMANTIC_PSIZE ||
372 name == TGSI_SEMANTIC_FACE)
373 index = 0;
374 else {
375 if (name == TGSI_SEMANTIC_GENERIC) {
376 /* For generic params simply use sid from tgsi */
377 index = io->sid;
378 } else {
379
380 /* FIXME: two-side rendering is broken in r600g, this will
381 * keep old functionality */
382 if (name == TGSI_SEMANTIC_BCOLOR)
383 name = TGSI_SEMANTIC_COLOR;
384
385 /* For non-generic params - pack name and sid into 8 bits */
386 index = 0x80 | (name<<3) | (io->sid);
387 }
388
389 /* Make sure that all really used indices have nonzero value, so
390 * we can just compare it to 0 later instead of comparing the name
391 * with different values to detect special cases. */
392 index++;
393 }
394
395 return index;
396 };
397
398 static int tgsi_declaration(struct r600_shader_ctx *ctx)
399 {
400 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
401 unsigned i;
402 int r;
403
404 switch (d->Declaration.File) {
405 case TGSI_FILE_INPUT:
406 i = ctx->shader->ninput++;
407 ctx->shader->input[i].name = d->Semantic.Name;
408 ctx->shader->input[i].sid = d->Semantic.Index;
409 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
410 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
411 ctx->shader->input[i].centroid = d->Declaration.Centroid;
412 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
413 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
414 /* turn input into interpolate on EG */
415 if (ctx->shader->input[i].spi_sid) {
416 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
417 if (ctx->shader->input[i].interpolate > 0) {
418 evergreen_interp_alu(ctx, i);
419 } else {
420 evergreen_interp_flat(ctx, i);
421 }
422 }
423 }
424 break;
425 case TGSI_FILE_OUTPUT:
426 i = ctx->shader->noutput++;
427 ctx->shader->output[i].name = d->Semantic.Name;
428 ctx->shader->output[i].sid = d->Semantic.Index;
429 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
430 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
431 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
432 break;
433 case TGSI_FILE_CONSTANT:
434 case TGSI_FILE_TEMPORARY:
435 case TGSI_FILE_SAMPLER:
436 case TGSI_FILE_ADDRESS:
437 break;
438
439 case TGSI_FILE_SYSTEM_VALUE:
440 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
441 struct r600_bytecode_alu alu;
442 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
443
444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
445 alu.src[0].sel = 0;
446 alu.src[0].chan = 3;
447
448 alu.dst.sel = 0;
449 alu.dst.chan = 3;
450 alu.dst.write = 1;
451 alu.last = 1;
452
453 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
454 return r;
455 break;
456 }
457
458 default:
459 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
460 return -EINVAL;
461 }
462 return 0;
463 }
464
465 static int r600_get_temp(struct r600_shader_ctx *ctx)
466 {
467 return ctx->temp_reg + ctx->max_driver_temp_used++;
468 }
469
470 /*
471 * for evergreen we need to scan the shader to find the number of GPRs we need to
472 * reserve for interpolation.
473 *
474 * we need to know if we are going to emit
475 * any centroid inputs
476 * if perspective and linear are required
477 */
478 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
479 {
480 int i;
481 int num_baryc;
482
483 ctx->input_linear = FALSE;
484 ctx->input_perspective = FALSE;
485 ctx->input_centroid = FALSE;
486 ctx->num_interp_gpr = 1;
487
488 /* any centroid inputs */
489 for (i = 0; i < ctx->info.num_inputs; i++) {
490 /* skip position/face */
491 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
492 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
493 continue;
494 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
495 ctx->input_linear = TRUE;
496 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
497 ctx->input_perspective = TRUE;
498 if (ctx->info.input_centroid[i])
499 ctx->input_centroid = TRUE;
500 }
501
502 num_baryc = 0;
503 /* ignoring sample for now */
504 if (ctx->input_perspective)
505 num_baryc++;
506 if (ctx->input_linear)
507 num_baryc++;
508 if (ctx->input_centroid)
509 num_baryc *= 2;
510
511 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
512
513 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
514 return ctx->num_interp_gpr;
515 }
516
517 static void tgsi_src(struct r600_shader_ctx *ctx,
518 const struct tgsi_full_src_register *tgsi_src,
519 struct r600_shader_src *r600_src)
520 {
521 memset(r600_src, 0, sizeof(*r600_src));
522 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
523 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
524 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
525 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
526 r600_src->neg = tgsi_src->Register.Negate;
527 r600_src->abs = tgsi_src->Register.Absolute;
528
529 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
530 int index;
531 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
532 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
533 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
534
535 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
536 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
537 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
538 return;
539 }
540 index = tgsi_src->Register.Index;
541 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
542 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
543 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
544 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
545 r600_src->swizzle[0] = 3;
546 r600_src->swizzle[1] = 3;
547 r600_src->swizzle[2] = 3;
548 r600_src->swizzle[3] = 3;
549 r600_src->sel = 0;
550 } else {
551 if (tgsi_src->Register.Indirect)
552 r600_src->rel = V_SQ_REL_RELATIVE;
553 r600_src->sel = tgsi_src->Register.Index;
554 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
555 }
556 }
557
558 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
559 {
560 struct r600_bytecode_vtx vtx;
561 unsigned int ar_reg;
562 int r;
563
564 if (offset) {
565 struct r600_bytecode_alu alu;
566
567 memset(&alu, 0, sizeof(alu));
568
569 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
570 alu.src[0].sel = ctx->bc->ar_reg;
571
572 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
573 alu.src[1].value = offset;
574
575 alu.dst.sel = dst_reg;
576 alu.dst.write = 1;
577 alu.last = 1;
578
579 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
580 return r;
581
582 ar_reg = dst_reg;
583 } else {
584 ar_reg = ctx->bc->ar_reg;
585 }
586
587 memset(&vtx, 0, sizeof(vtx));
588 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
589 vtx.src_gpr = ar_reg;
590 vtx.mega_fetch_count = 16;
591 vtx.dst_gpr = dst_reg;
592 vtx.dst_sel_x = 0; /* SEL_X */
593 vtx.dst_sel_y = 1; /* SEL_Y */
594 vtx.dst_sel_z = 2; /* SEL_Z */
595 vtx.dst_sel_w = 3; /* SEL_W */
596 vtx.data_format = FMT_32_32_32_32_FLOAT;
597 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
598 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
599 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
600 vtx.endian = r600_endian_swap(32);
601
602 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
603 return r;
604
605 return 0;
606 }
607
608 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
609 {
610 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
611 struct r600_bytecode_alu alu;
612 int i, j, k, nconst, r;
613
614 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
615 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
616 nconst++;
617 }
618 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
619 }
620 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
621 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
622 continue;
623 }
624
625 if (ctx->src[i].rel) {
626 int treg = r600_get_temp(ctx);
627 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
628 return r;
629
630 ctx->src[i].sel = treg;
631 ctx->src[i].rel = 0;
632 j--;
633 } else if (j > 0) {
634 int treg = r600_get_temp(ctx);
635 for (k = 0; k < 4; k++) {
636 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
637 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
638 alu.src[0].sel = ctx->src[i].sel;
639 alu.src[0].chan = k;
640 alu.src[0].rel = ctx->src[i].rel;
641 alu.dst.sel = treg;
642 alu.dst.chan = k;
643 alu.dst.write = 1;
644 if (k == 3)
645 alu.last = 1;
646 r = r600_bytecode_add_alu(ctx->bc, &alu);
647 if (r)
648 return r;
649 }
650 ctx->src[i].sel = treg;
651 ctx->src[i].rel =0;
652 j--;
653 }
654 }
655 return 0;
656 }
657
658 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
659 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
660 {
661 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
662 struct r600_bytecode_alu alu;
663 int i, j, k, nliteral, r;
664
665 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
666 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
667 nliteral++;
668 }
669 }
670 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
671 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
672 int treg = r600_get_temp(ctx);
673 for (k = 0; k < 4; k++) {
674 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
675 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
676 alu.src[0].sel = ctx->src[i].sel;
677 alu.src[0].chan = k;
678 alu.src[0].value = ctx->src[i].value[k];
679 alu.dst.sel = treg;
680 alu.dst.chan = k;
681 alu.dst.write = 1;
682 if (k == 3)
683 alu.last = 1;
684 r = r600_bytecode_add_alu(ctx->bc, &alu);
685 if (r)
686 return r;
687 }
688 ctx->src[i].sel = treg;
689 j--;
690 }
691 }
692 return 0;
693 }
694
695 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
696 {
697 struct r600_shader *shader = &pipeshader->shader;
698 struct tgsi_token *tokens = pipeshader->tokens;
699 struct pipe_stream_output_info so = pipeshader->so;
700 struct tgsi_full_immediate *immediate;
701 struct tgsi_full_property *property;
702 struct r600_shader_ctx ctx;
703 struct r600_bytecode_output output[32];
704 unsigned output_done, noutput;
705 unsigned opcode;
706 int i, j, r = 0, pos0;
707
708 ctx.bc = &shader->bc;
709 ctx.shader = shader;
710 r600_bytecode_init(ctx.bc, rctx->chip_class);
711 ctx.tokens = tokens;
712 tgsi_scan_shader(tokens, &ctx.info);
713 tgsi_parse_init(&ctx.parse, tokens);
714 ctx.type = ctx.parse.FullHeader.Processor.Processor;
715 shader->processor_type = ctx.type;
716 ctx.bc->type = shader->processor_type;
717
718 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
719 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
720
721 shader->nr_cbufs = rctx->nr_cbufs;
722
723 /* register allocations */
724 /* Values [0,127] correspond to GPR[0..127].
725 * Values [128,159] correspond to constant buffer bank 0
726 * Values [160,191] correspond to constant buffer bank 1
727 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
728 * Values [256,287] correspond to constant buffer bank 2 (EG)
729 * Values [288,319] correspond to constant buffer bank 3 (EG)
730 * Other special values are shown in the list below.
731 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
732 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
733 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
734 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
735 * 248 SQ_ALU_SRC_0: special constant 0.0.
736 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
737 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
738 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
739 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
740 * 253 SQ_ALU_SRC_LITERAL: literal constant.
741 * 254 SQ_ALU_SRC_PV: previous vector result.
742 * 255 SQ_ALU_SRC_PS: previous scalar result.
743 */
744 for (i = 0; i < TGSI_FILE_COUNT; i++) {
745 ctx.file_offset[i] = 0;
746 }
747 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
748 ctx.file_offset[TGSI_FILE_INPUT] = 1;
749 if (ctx.bc->chip_class >= EVERGREEN) {
750 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
751 } else {
752 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
753 }
754 }
755 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
756 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
757 }
758 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
759 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
760 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
761 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
762
763 /* Outside the GPR range. This will be translated to one of the
764 * kcache banks later. */
765 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
766
767 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
768 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
769 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
770 ctx.temp_reg = ctx.bc->ar_reg + 1;
771
772 ctx.nliterals = 0;
773 ctx.literals = NULL;
774 shader->fs_write_all = FALSE;
775 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
776 tgsi_parse_token(&ctx.parse);
777 switch (ctx.parse.FullToken.Token.Type) {
778 case TGSI_TOKEN_TYPE_IMMEDIATE:
779 immediate = &ctx.parse.FullToken.FullImmediate;
780 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
781 if(ctx.literals == NULL) {
782 r = -ENOMEM;
783 goto out_err;
784 }
785 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
786 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
787 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
788 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
789 ctx.nliterals++;
790 break;
791 case TGSI_TOKEN_TYPE_DECLARATION:
792 r = tgsi_declaration(&ctx);
793 if (r)
794 goto out_err;
795 break;
796 case TGSI_TOKEN_TYPE_INSTRUCTION:
797 r = tgsi_is_supported(&ctx);
798 if (r)
799 goto out_err;
800 ctx.max_driver_temp_used = 0;
801 /* reserve first tmp for everyone */
802 r600_get_temp(&ctx);
803
804 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
805 if ((r = tgsi_split_constant(&ctx)))
806 goto out_err;
807 if ((r = tgsi_split_literal_constant(&ctx)))
808 goto out_err;
809 if (ctx.bc->chip_class == CAYMAN)
810 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
811 else if (ctx.bc->chip_class >= EVERGREEN)
812 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
813 else
814 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
815 r = ctx.inst_info->process(&ctx);
816 if (r)
817 goto out_err;
818 break;
819 case TGSI_TOKEN_TYPE_PROPERTY:
820 property = &ctx.parse.FullToken.FullProperty;
821 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
822 if (property->u[0].Data == 1)
823 shader->fs_write_all = TRUE;
824 }
825 break;
826 default:
827 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
828 r = -EINVAL;
829 goto out_err;
830 }
831 }
832
833 noutput = shader->noutput;
834
835 /* clamp color outputs */
836 if (shader->clamp_color) {
837 for (i = 0; i < noutput; i++) {
838 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
839 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
840
841 int j;
842 for (j = 0; j < 4; j++) {
843 struct r600_bytecode_alu alu;
844 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
845
846 /* MOV_SAT R, R */
847 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
848 alu.dst.sel = shader->output[i].gpr;
849 alu.dst.chan = j;
850 alu.dst.write = 1;
851 alu.dst.clamp = 1;
852 alu.src[0].sel = alu.dst.sel;
853 alu.src[0].chan = j;
854
855 if (j == 3) {
856 alu.last = 1;
857 }
858 r = r600_bytecode_add_alu(ctx.bc, &alu);
859 if (r)
860 return r;
861 }
862 }
863 }
864 }
865
866 /* Add stream outputs. */
867 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
868 for (i = 0; i < so.num_outputs; i++) {
869 struct r600_bytecode_output output;
870
871 if (so.output[i].output_buffer >= 4) {
872 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
873 so.output[i].output_buffer);
874 r = -EINVAL;
875 goto out_err;
876 }
877 if (so.output[i].start_component) {
878 R600_ERR("stream_output - start_component cannot be non-zero\n");
879 r = -EINVAL;
880 goto out_err;
881 }
882
883 memset(&output, 0, sizeof(struct r600_bytecode_output));
884 output.gpr = shader->output[so.output[i].register_index].gpr;
885 output.elem_size = 0;
886 output.array_base = so.output[i].dst_offset;
887 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
888 output.burst_count = 1;
889 output.barrier = 1;
890 output.array_size = 0;
891 output.comp_mask = (1 << so.output[i].num_components) - 1;
892 if (ctx.bc->chip_class >= EVERGREEN) {
893 switch (so.output[i].output_buffer) {
894 case 0:
895 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
896 break;
897 case 1:
898 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
899 break;
900 case 2:
901 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
902 break;
903 case 3:
904 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
905 break;
906 }
907 } else {
908 switch (so.output[i].output_buffer) {
909 case 0:
910 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
911 break;
912 case 1:
913 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
914 break;
915 case 2:
916 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
917 break;
918 case 3:
919 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
920 break;
921 }
922 }
923 r = r600_bytecode_add_output(ctx.bc, &output);
924 if (r)
925 goto out_err;
926 }
927 }
928
929 /* export output */
930 j = 0;
931 for (i = 0, pos0 = 0; i < noutput; i++) {
932 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
933 output[i + j].gpr = shader->output[i].gpr;
934 output[i + j].elem_size = 3;
935 output[i + j].swizzle_x = 0;
936 output[i + j].swizzle_y = 1;
937 output[i + j].swizzle_z = 2;
938 output[i + j].swizzle_w = 3;
939 output[i + j].burst_count = 1;
940 output[i + j].barrier = 1;
941 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
942 output[i + j].array_base = i - pos0;
943 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
944 switch (ctx.type) {
945 case TGSI_PROCESSOR_VERTEX:
946 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
947 output[i + j].array_base = 60;
948 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
949 /* position doesn't count in array_base */
950 pos0++;
951 }
952 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
953 output[i + j].array_base = 61;
954 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
955 /* position doesn't count in array_base */
956 pos0++;
957 }
958 break;
959 case TGSI_PROCESSOR_FRAGMENT:
960 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
961 output[i + j].array_base = shader->output[i].sid;
962 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
963 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
964 for (j = 1; j < shader->nr_cbufs; j++) {
965 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
966 output[i + j].gpr = shader->output[i].gpr;
967 output[i + j].elem_size = 3;
968 output[i + j].swizzle_x = 0;
969 output[i + j].swizzle_y = 1;
970 output[i + j].swizzle_z = 2;
971 output[i + j].swizzle_w = 3;
972 output[i + j].burst_count = 1;
973 output[i + j].barrier = 1;
974 output[i + j].array_base = shader->output[i].sid + j;
975 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
976 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
977 }
978 j = shader->nr_cbufs-1;
979 }
980 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
981 output[i + j].array_base = 61;
982 output[i + j].swizzle_x = 2;
983 output[i + j].swizzle_y = 7;
984 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
985 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
986 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
987 output[i + j].array_base = 61;
988 output[i + j].swizzle_x = 7;
989 output[i + j].swizzle_y = 1;
990 output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
991 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
992 } else {
993 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
994 r = -EINVAL;
995 goto out_err;
996 }
997 break;
998 default:
999 R600_ERR("unsupported processor type %d\n", ctx.type);
1000 r = -EINVAL;
1001 goto out_err;
1002 }
1003 }
1004 noutput += j;
1005 /* add fake param output for vertex shader if no param is exported */
1006 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1007 for (i = 0, pos0 = 0; i < noutput; i++) {
1008 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
1009 pos0 = 1;
1010 break;
1011 }
1012 }
1013 if (!pos0) {
1014 memset(&output[i], 0, sizeof(struct r600_bytecode_output));
1015 output[i].gpr = 0;
1016 output[i].elem_size = 3;
1017 output[i].swizzle_x = 7;
1018 output[i].swizzle_y = 7;
1019 output[i].swizzle_z = 7;
1020 output[i].swizzle_w = 7;
1021 output[i].burst_count = 1;
1022 output[i].barrier = 1;
1023 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1024 output[i].array_base = 0;
1025 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1026 noutput++;
1027 }
1028 }
1029 /* add fake pixel export */
1030 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
1031 memset(&output[0], 0, sizeof(struct r600_bytecode_output));
1032 output[0].gpr = 0;
1033 output[0].elem_size = 3;
1034 output[0].swizzle_x = 7;
1035 output[0].swizzle_y = 7;
1036 output[0].swizzle_z = 7;
1037 output[0].swizzle_w = 7;
1038 output[0].burst_count = 1;
1039 output[0].barrier = 1;
1040 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1041 output[0].array_base = 0;
1042 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1043 noutput++;
1044 }
1045 /* set export done on last export of each type */
1046 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1047 if (ctx.bc->chip_class < CAYMAN) {
1048 if (i == (noutput - 1)) {
1049 output[i].end_of_program = 1;
1050 }
1051 }
1052 if (!(output_done & (1 << output[i].type))) {
1053 output_done |= (1 << output[i].type);
1054 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1055 }
1056 }
1057 /* add output to bytecode */
1058 for (i = 0; i < noutput; i++) {
1059 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1060 if (r)
1061 goto out_err;
1062 }
1063 /* add program end */
1064 if (ctx.bc->chip_class == CAYMAN)
1065 cm_bytecode_add_cf_end(ctx.bc);
1066
1067 free(ctx.literals);
1068 tgsi_parse_free(&ctx.parse);
1069 return 0;
1070 out_err:
1071 free(ctx.literals);
1072 tgsi_parse_free(&ctx.parse);
1073 return r;
1074 }
1075
1076 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1077 {
1078 R600_ERR("%s tgsi opcode unsupported\n",
1079 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1080 return -EINVAL;
1081 }
1082
1083 static int tgsi_end(struct r600_shader_ctx *ctx)
1084 {
1085 return 0;
1086 }
1087
1088 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1089 const struct r600_shader_src *shader_src,
1090 unsigned chan)
1091 {
1092 bc_src->sel = shader_src->sel;
1093 bc_src->chan = shader_src->swizzle[chan];
1094 bc_src->neg = shader_src->neg;
1095 bc_src->abs = shader_src->abs;
1096 bc_src->rel = shader_src->rel;
1097 bc_src->value = shader_src->value[bc_src->chan];
1098 }
1099
1100 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1101 {
1102 bc_src->abs = 1;
1103 bc_src->neg = 0;
1104 }
1105
1106 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1107 {
1108 bc_src->neg = !bc_src->neg;
1109 }
1110
1111 static void tgsi_dst(struct r600_shader_ctx *ctx,
1112 const struct tgsi_full_dst_register *tgsi_dst,
1113 unsigned swizzle,
1114 struct r600_bytecode_alu_dst *r600_dst)
1115 {
1116 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1117
1118 r600_dst->sel = tgsi_dst->Register.Index;
1119 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1120 r600_dst->chan = swizzle;
1121 r600_dst->write = 1;
1122 if (tgsi_dst->Register.Indirect)
1123 r600_dst->rel = V_SQ_REL_RELATIVE;
1124 if (inst->Instruction.Saturate) {
1125 r600_dst->clamp = 1;
1126 }
1127 }
1128
1129 static int tgsi_last_instruction(unsigned writemask)
1130 {
1131 int i, lasti = 0;
1132
1133 for (i = 0; i < 4; i++) {
1134 if (writemask & (1 << i)) {
1135 lasti = i;
1136 }
1137 }
1138 return lasti;
1139 }
1140
1141 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1142 {
1143 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1144 struct r600_bytecode_alu alu;
1145 int i, j, r;
1146 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1147
1148 for (i = 0; i < lasti + 1; i++) {
1149 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1150 continue;
1151
1152 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1153 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1154
1155 alu.inst = ctx->inst_info->r600_opcode;
1156 if (!swap) {
1157 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1158 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1159 }
1160 } else {
1161 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1162 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1163 }
1164 /* handle some special cases */
1165 switch (ctx->inst_info->tgsi_opcode) {
1166 case TGSI_OPCODE_SUB:
1167 r600_bytecode_src_toggle_neg(&alu.src[1]);
1168 break;
1169 case TGSI_OPCODE_ABS:
1170 r600_bytecode_src_set_abs(&alu.src[0]);
1171 break;
1172 default:
1173 break;
1174 }
1175 if (i == lasti || trans_only) {
1176 alu.last = 1;
1177 }
1178 r = r600_bytecode_add_alu(ctx->bc, &alu);
1179 if (r)
1180 return r;
1181 }
1182 return 0;
1183 }
1184
1185 static int tgsi_op2(struct r600_shader_ctx *ctx)
1186 {
1187 return tgsi_op2_s(ctx, 0, 0);
1188 }
1189
1190 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1191 {
1192 return tgsi_op2_s(ctx, 1, 0);
1193 }
1194
1195 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1196 {
1197 return tgsi_op2_s(ctx, 0, 1);
1198 }
1199
1200 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1201 {
1202 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1203 struct r600_bytecode_alu alu;
1204 int i, r;
1205 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1206
1207 for (i = 0; i < lasti + 1; i++) {
1208
1209 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1210 continue;
1211 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1212 alu.inst = ctx->inst_info->r600_opcode;
1213
1214 alu.src[0].sel = V_SQ_ALU_SRC_0;
1215
1216 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1217
1218 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1219
1220 if (i == lasti) {
1221 alu.last = 1;
1222 }
1223 r = r600_bytecode_add_alu(ctx->bc, &alu);
1224 if (r)
1225 return r;
1226 }
1227 return 0;
1228
1229 }
1230
1231 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1232 {
1233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1234 int i, j, r;
1235 struct r600_bytecode_alu alu;
1236 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1237
1238 for (i = 0 ; i < last_slot; i++) {
1239 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1240 alu.inst = ctx->inst_info->r600_opcode;
1241 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1242 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1243 }
1244 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1245 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1246
1247 if (i == last_slot - 1)
1248 alu.last = 1;
1249 r = r600_bytecode_add_alu(ctx->bc, &alu);
1250 if (r)
1251 return r;
1252 }
1253 return 0;
1254 }
1255
1256 /*
1257 * r600 - trunc to -PI..PI range
1258 * r700 - normalize by dividing by 2PI
1259 * see fdo bug 27901
1260 */
1261 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1262 {
1263 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1264 static float double_pi = 3.1415926535 * 2;
1265 static float neg_pi = -3.1415926535;
1266
1267 int r;
1268 struct r600_bytecode_alu alu;
1269
1270 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1272 alu.is_op3 = 1;
1273
1274 alu.dst.chan = 0;
1275 alu.dst.sel = ctx->temp_reg;
1276 alu.dst.write = 1;
1277
1278 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1279
1280 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1281 alu.src[1].chan = 0;
1282 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1283 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1284 alu.src[2].chan = 0;
1285 alu.last = 1;
1286 r = r600_bytecode_add_alu(ctx->bc, &alu);
1287 if (r)
1288 return r;
1289
1290 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1291 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1292
1293 alu.dst.chan = 0;
1294 alu.dst.sel = ctx->temp_reg;
1295 alu.dst.write = 1;
1296
1297 alu.src[0].sel = ctx->temp_reg;
1298 alu.src[0].chan = 0;
1299 alu.last = 1;
1300 r = r600_bytecode_add_alu(ctx->bc, &alu);
1301 if (r)
1302 return r;
1303
1304 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1306 alu.is_op3 = 1;
1307
1308 alu.dst.chan = 0;
1309 alu.dst.sel = ctx->temp_reg;
1310 alu.dst.write = 1;
1311
1312 alu.src[0].sel = ctx->temp_reg;
1313 alu.src[0].chan = 0;
1314
1315 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1316 alu.src[1].chan = 0;
1317 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1318 alu.src[2].chan = 0;
1319
1320 if (ctx->bc->chip_class == R600) {
1321 alu.src[1].value = *(uint32_t *)&double_pi;
1322 alu.src[2].value = *(uint32_t *)&neg_pi;
1323 } else {
1324 alu.src[1].sel = V_SQ_ALU_SRC_1;
1325 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1326 alu.src[2].neg = 1;
1327 }
1328
1329 alu.last = 1;
1330 r = r600_bytecode_add_alu(ctx->bc, &alu);
1331 if (r)
1332 return r;
1333 return 0;
1334 }
1335
1336 static int cayman_trig(struct r600_shader_ctx *ctx)
1337 {
1338 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1339 struct r600_bytecode_alu alu;
1340 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1341 int i, r;
1342
1343 r = tgsi_setup_trig(ctx);
1344 if (r)
1345 return r;
1346
1347
1348 for (i = 0; i < last_slot; i++) {
1349 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1350 alu.inst = ctx->inst_info->r600_opcode;
1351 alu.dst.chan = i;
1352
1353 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1354 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1355
1356 alu.src[0].sel = ctx->temp_reg;
1357 alu.src[0].chan = 0;
1358 if (i == last_slot - 1)
1359 alu.last = 1;
1360 r = r600_bytecode_add_alu(ctx->bc, &alu);
1361 if (r)
1362 return r;
1363 }
1364 return 0;
1365 }
1366
1367 static int tgsi_trig(struct r600_shader_ctx *ctx)
1368 {
1369 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1370 struct r600_bytecode_alu alu;
1371 int i, r;
1372 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1373
1374 r = tgsi_setup_trig(ctx);
1375 if (r)
1376 return r;
1377
1378 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1379 alu.inst = ctx->inst_info->r600_opcode;
1380 alu.dst.chan = 0;
1381 alu.dst.sel = ctx->temp_reg;
1382 alu.dst.write = 1;
1383
1384 alu.src[0].sel = ctx->temp_reg;
1385 alu.src[0].chan = 0;
1386 alu.last = 1;
1387 r = r600_bytecode_add_alu(ctx->bc, &alu);
1388 if (r)
1389 return r;
1390
1391 /* replicate result */
1392 for (i = 0; i < lasti + 1; i++) {
1393 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1394 continue;
1395
1396 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1397 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1398
1399 alu.src[0].sel = ctx->temp_reg;
1400 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1401 if (i == lasti)
1402 alu.last = 1;
1403 r = r600_bytecode_add_alu(ctx->bc, &alu);
1404 if (r)
1405 return r;
1406 }
1407 return 0;
1408 }
1409
1410 static int tgsi_scs(struct r600_shader_ctx *ctx)
1411 {
1412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1413 struct r600_bytecode_alu alu;
1414 int i, r;
1415
1416 /* We'll only need the trig stuff if we are going to write to the
1417 * X or Y components of the destination vector.
1418 */
1419 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1420 r = tgsi_setup_trig(ctx);
1421 if (r)
1422 return r;
1423 }
1424
1425 /* dst.x = COS */
1426 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1427 if (ctx->bc->chip_class == CAYMAN) {
1428 for (i = 0 ; i < 3; i++) {
1429 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1430 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1431 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1432
1433 if (i == 0)
1434 alu.dst.write = 1;
1435 else
1436 alu.dst.write = 0;
1437 alu.src[0].sel = ctx->temp_reg;
1438 alu.src[0].chan = 0;
1439 if (i == 2)
1440 alu.last = 1;
1441 r = r600_bytecode_add_alu(ctx->bc, &alu);
1442 if (r)
1443 return r;
1444 }
1445 } else {
1446 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1447 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1448 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1449
1450 alu.src[0].sel = ctx->temp_reg;
1451 alu.src[0].chan = 0;
1452 alu.last = 1;
1453 r = r600_bytecode_add_alu(ctx->bc, &alu);
1454 if (r)
1455 return r;
1456 }
1457 }
1458
1459 /* dst.y = SIN */
1460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1461 if (ctx->bc->chip_class == CAYMAN) {
1462 for (i = 0 ; i < 3; i++) {
1463 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1464 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1465 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1466 if (i == 1)
1467 alu.dst.write = 1;
1468 else
1469 alu.dst.write = 0;
1470 alu.src[0].sel = ctx->temp_reg;
1471 alu.src[0].chan = 0;
1472 if (i == 2)
1473 alu.last = 1;
1474 r = r600_bytecode_add_alu(ctx->bc, &alu);
1475 if (r)
1476 return r;
1477 }
1478 } else {
1479 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1480 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1481 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1482
1483 alu.src[0].sel = ctx->temp_reg;
1484 alu.src[0].chan = 0;
1485 alu.last = 1;
1486 r = r600_bytecode_add_alu(ctx->bc, &alu);
1487 if (r)
1488 return r;
1489 }
1490 }
1491
1492 /* dst.z = 0.0; */
1493 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1494 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1495
1496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1497
1498 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1499
1500 alu.src[0].sel = V_SQ_ALU_SRC_0;
1501 alu.src[0].chan = 0;
1502
1503 alu.last = 1;
1504
1505 r = r600_bytecode_add_alu(ctx->bc, &alu);
1506 if (r)
1507 return r;
1508 }
1509
1510 /* dst.w = 1.0; */
1511 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1512 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1513
1514 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1515
1516 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1517
1518 alu.src[0].sel = V_SQ_ALU_SRC_1;
1519 alu.src[0].chan = 0;
1520
1521 alu.last = 1;
1522
1523 r = r600_bytecode_add_alu(ctx->bc, &alu);
1524 if (r)
1525 return r;
1526 }
1527
1528 return 0;
1529 }
1530
1531 static int tgsi_kill(struct r600_shader_ctx *ctx)
1532 {
1533 struct r600_bytecode_alu alu;
1534 int i, r;
1535
1536 for (i = 0; i < 4; i++) {
1537 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1538 alu.inst = ctx->inst_info->r600_opcode;
1539
1540 alu.dst.chan = i;
1541
1542 alu.src[0].sel = V_SQ_ALU_SRC_0;
1543
1544 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1545 alu.src[1].sel = V_SQ_ALU_SRC_1;
1546 alu.src[1].neg = 1;
1547 } else {
1548 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1549 }
1550 if (i == 3) {
1551 alu.last = 1;
1552 }
1553 r = r600_bytecode_add_alu(ctx->bc, &alu);
1554 if (r)
1555 return r;
1556 }
1557
1558 /* kill must be last in ALU */
1559 ctx->bc->force_add_cf = 1;
1560 ctx->shader->uses_kill = TRUE;
1561 return 0;
1562 }
1563
1564 static int tgsi_lit(struct r600_shader_ctx *ctx)
1565 {
1566 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1567 struct r600_bytecode_alu alu;
1568 int r;
1569
1570 /* tmp.x = max(src.y, 0.0) */
1571 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1572 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1573 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1574 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1575 alu.src[1].chan = 1;
1576
1577 alu.dst.sel = ctx->temp_reg;
1578 alu.dst.chan = 0;
1579 alu.dst.write = 1;
1580
1581 alu.last = 1;
1582 r = r600_bytecode_add_alu(ctx->bc, &alu);
1583 if (r)
1584 return r;
1585
1586 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1587 {
1588 int chan;
1589 int sel;
1590 int i;
1591
1592 if (ctx->bc->chip_class == CAYMAN) {
1593 for (i = 0; i < 3; i++) {
1594 /* tmp.z = log(tmp.x) */
1595 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1597 alu.src[0].sel = ctx->temp_reg;
1598 alu.src[0].chan = 0;
1599 alu.dst.sel = ctx->temp_reg;
1600 alu.dst.chan = i;
1601 if (i == 2) {
1602 alu.dst.write = 1;
1603 alu.last = 1;
1604 } else
1605 alu.dst.write = 0;
1606
1607 r = r600_bytecode_add_alu(ctx->bc, &alu);
1608 if (r)
1609 return r;
1610 }
1611 } else {
1612 /* tmp.z = log(tmp.x) */
1613 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1615 alu.src[0].sel = ctx->temp_reg;
1616 alu.src[0].chan = 0;
1617 alu.dst.sel = ctx->temp_reg;
1618 alu.dst.chan = 2;
1619 alu.dst.write = 1;
1620 alu.last = 1;
1621 r = r600_bytecode_add_alu(ctx->bc, &alu);
1622 if (r)
1623 return r;
1624 }
1625
1626 chan = alu.dst.chan;
1627 sel = alu.dst.sel;
1628
1629 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1630 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1631 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1632 alu.src[0].sel = sel;
1633 alu.src[0].chan = chan;
1634 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1635 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1636 alu.dst.sel = ctx->temp_reg;
1637 alu.dst.chan = 0;
1638 alu.dst.write = 1;
1639 alu.is_op3 = 1;
1640 alu.last = 1;
1641 r = r600_bytecode_add_alu(ctx->bc, &alu);
1642 if (r)
1643 return r;
1644
1645 if (ctx->bc->chip_class == CAYMAN) {
1646 for (i = 0; i < 3; i++) {
1647 /* dst.z = exp(tmp.x) */
1648 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1650 alu.src[0].sel = ctx->temp_reg;
1651 alu.src[0].chan = 0;
1652 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1653 if (i == 2) {
1654 alu.dst.write = 1;
1655 alu.last = 1;
1656 } else
1657 alu.dst.write = 0;
1658 r = r600_bytecode_add_alu(ctx->bc, &alu);
1659 if (r)
1660 return r;
1661 }
1662 } else {
1663 /* dst.z = exp(tmp.x) */
1664 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1666 alu.src[0].sel = ctx->temp_reg;
1667 alu.src[0].chan = 0;
1668 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1669 alu.last = 1;
1670 r = r600_bytecode_add_alu(ctx->bc, &alu);
1671 if (r)
1672 return r;
1673 }
1674 }
1675
1676 /* dst.x, <- 1.0 */
1677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1679 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1680 alu.src[0].chan = 0;
1681 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1682 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1683 r = r600_bytecode_add_alu(ctx->bc, &alu);
1684 if (r)
1685 return r;
1686
1687 /* dst.y = max(src.x, 0.0) */
1688 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1689 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1690 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1691 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1692 alu.src[1].chan = 0;
1693 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1694 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1695 r = r600_bytecode_add_alu(ctx->bc, &alu);
1696 if (r)
1697 return r;
1698
1699 /* dst.w, <- 1.0 */
1700 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1702 alu.src[0].sel = V_SQ_ALU_SRC_1;
1703 alu.src[0].chan = 0;
1704 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1705 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1706 alu.last = 1;
1707 r = r600_bytecode_add_alu(ctx->bc, &alu);
1708 if (r)
1709 return r;
1710
1711 return 0;
1712 }
1713
1714 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1715 {
1716 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1717 struct r600_bytecode_alu alu;
1718 int i, r;
1719
1720 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1721
1722 /* FIXME:
1723 * For state trackers other than OpenGL, we'll want to use
1724 * _RECIPSQRT_IEEE instead.
1725 */
1726 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1727
1728 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1729 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1730 r600_bytecode_src_set_abs(&alu.src[i]);
1731 }
1732 alu.dst.sel = ctx->temp_reg;
1733 alu.dst.write = 1;
1734 alu.last = 1;
1735 r = r600_bytecode_add_alu(ctx->bc, &alu);
1736 if (r)
1737 return r;
1738 /* replicate result */
1739 return tgsi_helper_tempx_replicate(ctx);
1740 }
1741
1742 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1743 {
1744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1745 struct r600_bytecode_alu alu;
1746 int i, r;
1747
1748 for (i = 0; i < 4; i++) {
1749 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1750 alu.src[0].sel = ctx->temp_reg;
1751 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1752 alu.dst.chan = i;
1753 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1754 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1755 if (i == 3)
1756 alu.last = 1;
1757 r = r600_bytecode_add_alu(ctx->bc, &alu);
1758 if (r)
1759 return r;
1760 }
1761 return 0;
1762 }
1763
1764 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1765 {
1766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1767 struct r600_bytecode_alu alu;
1768 int i, r;
1769
1770 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1771 alu.inst = ctx->inst_info->r600_opcode;
1772 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1773 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1774 }
1775 alu.dst.sel = ctx->temp_reg;
1776 alu.dst.write = 1;
1777 alu.last = 1;
1778 r = r600_bytecode_add_alu(ctx->bc, &alu);
1779 if (r)
1780 return r;
1781 /* replicate result */
1782 return tgsi_helper_tempx_replicate(ctx);
1783 }
1784
1785 static int cayman_pow(struct r600_shader_ctx *ctx)
1786 {
1787 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1788 int i, r;
1789 struct r600_bytecode_alu alu;
1790 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1791
1792 for (i = 0; i < 3; i++) {
1793 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1794 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1795 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1796 alu.dst.sel = ctx->temp_reg;
1797 alu.dst.chan = i;
1798 alu.dst.write = 1;
1799 if (i == 2)
1800 alu.last = 1;
1801 r = r600_bytecode_add_alu(ctx->bc, &alu);
1802 if (r)
1803 return r;
1804 }
1805
1806 /* b * LOG2(a) */
1807 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1809 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1810 alu.src[1].sel = ctx->temp_reg;
1811 alu.dst.sel = ctx->temp_reg;
1812 alu.dst.write = 1;
1813 alu.last = 1;
1814 r = r600_bytecode_add_alu(ctx->bc, &alu);
1815 if (r)
1816 return r;
1817
1818 for (i = 0; i < last_slot; i++) {
1819 /* POW(a,b) = EXP2(b * LOG2(a))*/
1820 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1822 alu.src[0].sel = ctx->temp_reg;
1823
1824 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1825 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1826 if (i == last_slot - 1)
1827 alu.last = 1;
1828 r = r600_bytecode_add_alu(ctx->bc, &alu);
1829 if (r)
1830 return r;
1831 }
1832 return 0;
1833 }
1834
1835 static int tgsi_pow(struct r600_shader_ctx *ctx)
1836 {
1837 struct r600_bytecode_alu alu;
1838 int r;
1839
1840 /* LOG2(a) */
1841 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1843 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1844 alu.dst.sel = ctx->temp_reg;
1845 alu.dst.write = 1;
1846 alu.last = 1;
1847 r = r600_bytecode_add_alu(ctx->bc, &alu);
1848 if (r)
1849 return r;
1850 /* b * LOG2(a) */
1851 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1853 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1854 alu.src[1].sel = ctx->temp_reg;
1855 alu.dst.sel = ctx->temp_reg;
1856 alu.dst.write = 1;
1857 alu.last = 1;
1858 r = r600_bytecode_add_alu(ctx->bc, &alu);
1859 if (r)
1860 return r;
1861 /* POW(a,b) = EXP2(b * LOG2(a))*/
1862 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1863 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1864 alu.src[0].sel = ctx->temp_reg;
1865 alu.dst.sel = ctx->temp_reg;
1866 alu.dst.write = 1;
1867 alu.last = 1;
1868 r = r600_bytecode_add_alu(ctx->bc, &alu);
1869 if (r)
1870 return r;
1871 return tgsi_helper_tempx_replicate(ctx);
1872 }
1873
1874 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1875 {
1876 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1877 struct r600_bytecode_alu alu;
1878 int i, r;
1879
1880 /* tmp = (src > 0 ? 1 : src) */
1881 for (i = 0; i < 4; i++) {
1882 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1884 alu.is_op3 = 1;
1885
1886 alu.dst.sel = ctx->temp_reg;
1887 alu.dst.chan = i;
1888
1889 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1890 alu.src[1].sel = V_SQ_ALU_SRC_1;
1891 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1892
1893 if (i == 3)
1894 alu.last = 1;
1895 r = r600_bytecode_add_alu(ctx->bc, &alu);
1896 if (r)
1897 return r;
1898 }
1899
1900 /* dst = (-tmp > 0 ? -1 : tmp) */
1901 for (i = 0; i < 4; i++) {
1902 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1903 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1904 alu.is_op3 = 1;
1905 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1906
1907 alu.src[0].sel = ctx->temp_reg;
1908 alu.src[0].chan = i;
1909 alu.src[0].neg = 1;
1910
1911 alu.src[1].sel = V_SQ_ALU_SRC_1;
1912 alu.src[1].neg = 1;
1913
1914 alu.src[2].sel = ctx->temp_reg;
1915 alu.src[2].chan = i;
1916
1917 if (i == 3)
1918 alu.last = 1;
1919 r = r600_bytecode_add_alu(ctx->bc, &alu);
1920 if (r)
1921 return r;
1922 }
1923 return 0;
1924 }
1925
1926 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1927 {
1928 struct r600_bytecode_alu alu;
1929 int i, r;
1930
1931 for (i = 0; i < 4; i++) {
1932 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1933 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1934 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1935 alu.dst.chan = i;
1936 } else {
1937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1938 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1939 alu.src[0].sel = ctx->temp_reg;
1940 alu.src[0].chan = i;
1941 }
1942 if (i == 3) {
1943 alu.last = 1;
1944 }
1945 r = r600_bytecode_add_alu(ctx->bc, &alu);
1946 if (r)
1947 return r;
1948 }
1949 return 0;
1950 }
1951
1952 static int tgsi_op3(struct r600_shader_ctx *ctx)
1953 {
1954 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1955 struct r600_bytecode_alu alu;
1956 int i, j, r;
1957 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1958
1959 for (i = 0; i < lasti + 1; i++) {
1960 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1961 continue;
1962
1963 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1964 alu.inst = ctx->inst_info->r600_opcode;
1965 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1966 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1967 }
1968
1969 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1970 alu.dst.chan = i;
1971 alu.dst.write = 1;
1972 alu.is_op3 = 1;
1973 if (i == lasti) {
1974 alu.last = 1;
1975 }
1976 r = r600_bytecode_add_alu(ctx->bc, &alu);
1977 if (r)
1978 return r;
1979 }
1980 return 0;
1981 }
1982
1983 static int tgsi_dp(struct r600_shader_ctx *ctx)
1984 {
1985 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1986 struct r600_bytecode_alu alu;
1987 int i, j, r;
1988
1989 for (i = 0; i < 4; i++) {
1990 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1991 alu.inst = ctx->inst_info->r600_opcode;
1992 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1993 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1994 }
1995
1996 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1997 alu.dst.chan = i;
1998 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1999 /* handle some special cases */
2000 switch (ctx->inst_info->tgsi_opcode) {
2001 case TGSI_OPCODE_DP2:
2002 if (i > 1) {
2003 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2004 alu.src[0].chan = alu.src[1].chan = 0;
2005 }
2006 break;
2007 case TGSI_OPCODE_DP3:
2008 if (i > 2) {
2009 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2010 alu.src[0].chan = alu.src[1].chan = 0;
2011 }
2012 break;
2013 case TGSI_OPCODE_DPH:
2014 if (i == 3) {
2015 alu.src[0].sel = V_SQ_ALU_SRC_1;
2016 alu.src[0].chan = 0;
2017 alu.src[0].neg = 0;
2018 }
2019 break;
2020 default:
2021 break;
2022 }
2023 if (i == 3) {
2024 alu.last = 1;
2025 }
2026 r = r600_bytecode_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029 }
2030 return 0;
2031 }
2032
2033 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
2034 unsigned index)
2035 {
2036 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2037 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
2038 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
2039 ctx->src[index].neg || ctx->src[index].abs;
2040 }
2041
2042 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
2043 unsigned index)
2044 {
2045 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2046 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
2047 }
2048
2049 static int tgsi_tex(struct r600_shader_ctx *ctx)
2050 {
2051 static float one_point_five = 1.5f;
2052 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2053 struct r600_bytecode_tex tex;
2054 struct r600_bytecode_alu alu;
2055 unsigned src_gpr;
2056 int r, i, j;
2057 int opcode;
2058 /* Texture fetch instructions can only use gprs as source.
2059 * Also they cannot negate the source or take the absolute value */
2060 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
2061 boolean src_loaded = FALSE;
2062 unsigned sampler_src_reg = 1;
2063 u8 offset_x = 0, offset_y = 0, offset_z = 0;
2064
2065 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
2066
2067 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
2068 /* get offset values */
2069 if (inst->Texture.NumOffsets) {
2070 assert(inst->Texture.NumOffsets == 1);
2071
2072 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
2073 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
2074 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
2075 }
2076 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
2077 /* TGSI moves the sampler to src reg 3 for TXD */
2078 sampler_src_reg = 3;
2079
2080 for (i = 1; i < 3; i++) {
2081 /* set gradients h/v */
2082 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2083 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
2084 SQ_TEX_INST_SET_GRADIENTS_V;
2085 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2086 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2087
2088 if (tgsi_tex_src_requires_loading(ctx, i)) {
2089 tex.src_gpr = r600_get_temp(ctx);
2090 tex.src_sel_x = 0;
2091 tex.src_sel_y = 1;
2092 tex.src_sel_z = 2;
2093 tex.src_sel_w = 3;
2094
2095 for (j = 0; j < 4; j++) {
2096 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2098 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
2099 alu.dst.sel = tex.src_gpr;
2100 alu.dst.chan = j;
2101 if (j == 3)
2102 alu.last = 1;
2103 alu.dst.write = 1;
2104 r = r600_bytecode_add_alu(ctx->bc, &alu);
2105 if (r)
2106 return r;
2107 }
2108
2109 } else {
2110 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
2111 tex.src_sel_x = ctx->src[i].swizzle[0];
2112 tex.src_sel_y = ctx->src[i].swizzle[1];
2113 tex.src_sel_z = ctx->src[i].swizzle[2];
2114 tex.src_sel_w = ctx->src[i].swizzle[3];
2115 tex.src_rel = ctx->src[i].rel;
2116 }
2117 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
2118 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
2119 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2120 tex.coord_type_x = 1;
2121 tex.coord_type_y = 1;
2122 tex.coord_type_z = 1;
2123 tex.coord_type_w = 1;
2124 }
2125 r = r600_bytecode_add_tex(ctx->bc, &tex);
2126 if (r)
2127 return r;
2128 }
2129 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
2130 int out_chan;
2131 /* Add perspective divide */
2132 if (ctx->bc->chip_class == CAYMAN) {
2133 out_chan = 2;
2134 for (i = 0; i < 3; i++) {
2135 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2137 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2138
2139 alu.dst.sel = ctx->temp_reg;
2140 alu.dst.chan = i;
2141 if (i == 2)
2142 alu.last = 1;
2143 if (out_chan == i)
2144 alu.dst.write = 1;
2145 r = r600_bytecode_add_alu(ctx->bc, &alu);
2146 if (r)
2147 return r;
2148 }
2149
2150 } else {
2151 out_chan = 3;
2152 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2154 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2155
2156 alu.dst.sel = ctx->temp_reg;
2157 alu.dst.chan = out_chan;
2158 alu.last = 1;
2159 alu.dst.write = 1;
2160 r = r600_bytecode_add_alu(ctx->bc, &alu);
2161 if (r)
2162 return r;
2163 }
2164
2165 for (i = 0; i < 3; i++) {
2166 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2168 alu.src[0].sel = ctx->temp_reg;
2169 alu.src[0].chan = out_chan;
2170 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2171 alu.dst.sel = ctx->temp_reg;
2172 alu.dst.chan = i;
2173 alu.dst.write = 1;
2174 r = r600_bytecode_add_alu(ctx->bc, &alu);
2175 if (r)
2176 return r;
2177 }
2178 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2179 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2180 alu.src[0].sel = V_SQ_ALU_SRC_1;
2181 alu.src[0].chan = 0;
2182 alu.dst.sel = ctx->temp_reg;
2183 alu.dst.chan = 3;
2184 alu.last = 1;
2185 alu.dst.write = 1;
2186 r = r600_bytecode_add_alu(ctx->bc, &alu);
2187 if (r)
2188 return r;
2189 src_loaded = TRUE;
2190 src_gpr = ctx->temp_reg;
2191 }
2192
2193 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2194 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2195 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2196
2197 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2198 for (i = 0; i < 4; i++) {
2199 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2201 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2202 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2203 alu.dst.sel = ctx->temp_reg;
2204 alu.dst.chan = i;
2205 if (i == 3)
2206 alu.last = 1;
2207 alu.dst.write = 1;
2208 r = r600_bytecode_add_alu(ctx->bc, &alu);
2209 if (r)
2210 return r;
2211 }
2212
2213 /* tmp1.z = RCP_e(|tmp1.z|) */
2214 if (ctx->bc->chip_class == CAYMAN) {
2215 for (i = 0; i < 3; i++) {
2216 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2218 alu.src[0].sel = ctx->temp_reg;
2219 alu.src[0].chan = 2;
2220 alu.src[0].abs = 1;
2221 alu.dst.sel = ctx->temp_reg;
2222 alu.dst.chan = i;
2223 if (i == 2)
2224 alu.dst.write = 1;
2225 if (i == 2)
2226 alu.last = 1;
2227 r = r600_bytecode_add_alu(ctx->bc, &alu);
2228 if (r)
2229 return r;
2230 }
2231 } else {
2232 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2234 alu.src[0].sel = ctx->temp_reg;
2235 alu.src[0].chan = 2;
2236 alu.src[0].abs = 1;
2237 alu.dst.sel = ctx->temp_reg;
2238 alu.dst.chan = 2;
2239 alu.dst.write = 1;
2240 alu.last = 1;
2241 r = r600_bytecode_add_alu(ctx->bc, &alu);
2242 if (r)
2243 return r;
2244 }
2245
2246 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
2247 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
2248 * muladd has no writemask, have to use another temp
2249 */
2250 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2251 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2252 alu.is_op3 = 1;
2253
2254 alu.src[0].sel = ctx->temp_reg;
2255 alu.src[0].chan = 0;
2256 alu.src[1].sel = ctx->temp_reg;
2257 alu.src[1].chan = 2;
2258
2259 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2260 alu.src[2].chan = 0;
2261 alu.src[2].value = *(uint32_t *)&one_point_five;
2262
2263 alu.dst.sel = ctx->temp_reg;
2264 alu.dst.chan = 0;
2265 alu.dst.write = 1;
2266
2267 r = r600_bytecode_add_alu(ctx->bc, &alu);
2268 if (r)
2269 return r;
2270
2271 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2273 alu.is_op3 = 1;
2274
2275 alu.src[0].sel = ctx->temp_reg;
2276 alu.src[0].chan = 1;
2277 alu.src[1].sel = ctx->temp_reg;
2278 alu.src[1].chan = 2;
2279
2280 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2281 alu.src[2].chan = 0;
2282 alu.src[2].value = *(uint32_t *)&one_point_five;
2283
2284 alu.dst.sel = ctx->temp_reg;
2285 alu.dst.chan = 1;
2286 alu.dst.write = 1;
2287
2288 alu.last = 1;
2289 r = r600_bytecode_add_alu(ctx->bc, &alu);
2290 if (r)
2291 return r;
2292
2293 src_loaded = TRUE;
2294 src_gpr = ctx->temp_reg;
2295 }
2296
2297 if (src_requires_loading && !src_loaded) {
2298 for (i = 0; i < 4; i++) {
2299 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2301 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2302 alu.dst.sel = ctx->temp_reg;
2303 alu.dst.chan = i;
2304 if (i == 3)
2305 alu.last = 1;
2306 alu.dst.write = 1;
2307 r = r600_bytecode_add_alu(ctx->bc, &alu);
2308 if (r)
2309 return r;
2310 }
2311 src_loaded = TRUE;
2312 src_gpr = ctx->temp_reg;
2313 }
2314
2315 opcode = ctx->inst_info->r600_opcode;
2316 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2317 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2318 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2319 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2320 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2321 switch (opcode) {
2322 case SQ_TEX_INST_SAMPLE:
2323 opcode = SQ_TEX_INST_SAMPLE_C;
2324 break;
2325 case SQ_TEX_INST_SAMPLE_L:
2326 opcode = SQ_TEX_INST_SAMPLE_C_L;
2327 break;
2328 case SQ_TEX_INST_SAMPLE_LB:
2329 opcode = SQ_TEX_INST_SAMPLE_C_LB;
2330 break;
2331 case SQ_TEX_INST_SAMPLE_G:
2332 opcode = SQ_TEX_INST_SAMPLE_C_G;
2333 break;
2334 }
2335 }
2336
2337 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2338 tex.inst = opcode;
2339
2340 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2341 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2342 tex.src_gpr = src_gpr;
2343 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2344 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2345 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2346 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2347 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2348 if (src_loaded) {
2349 tex.src_sel_x = 0;
2350 tex.src_sel_y = 1;
2351 tex.src_sel_z = 2;
2352 tex.src_sel_w = 3;
2353 } else {
2354 tex.src_sel_x = ctx->src[0].swizzle[0];
2355 tex.src_sel_y = ctx->src[0].swizzle[1];
2356 tex.src_sel_z = ctx->src[0].swizzle[2];
2357 tex.src_sel_w = ctx->src[0].swizzle[3];
2358 tex.src_rel = ctx->src[0].rel;
2359 }
2360
2361 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2362 tex.src_sel_x = 1;
2363 tex.src_sel_y = 0;
2364 tex.src_sel_z = 3;
2365 tex.src_sel_w = 1;
2366 }
2367
2368 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
2369 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
2370 tex.coord_type_x = 1;
2371 tex.coord_type_y = 1;
2372 }
2373 tex.coord_type_z = 1;
2374 tex.coord_type_w = 1;
2375
2376 tex.offset_x = offset_x;
2377 tex.offset_y = offset_y;
2378 tex.offset_z = offset_z;
2379
2380 /* Put the depth for comparison in W.
2381 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2382 * Some instructions expect the depth in Z. */
2383 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2384 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2385 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2386 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2387 opcode != SQ_TEX_INST_SAMPLE_C_L &&
2388 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2389 tex.src_sel_w = tex.src_sel_z;
2390 }
2391
2392 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2393 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2394 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2395 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2396 /* the array index is read from Y */
2397 tex.coord_type_y = 0;
2398 } else {
2399 /* the array index is read from Z */
2400 tex.coord_type_z = 0;
2401 tex.src_sel_z = tex.src_sel_y;
2402 }
2403 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2404 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2405 /* the array index is read from Z */
2406 tex.coord_type_z = 0;
2407
2408 r = r600_bytecode_add_tex(ctx->bc, &tex);
2409 if (r)
2410 return r;
2411
2412 /* add shadow ambient support - gallium doesn't do it yet */
2413 return 0;
2414 }
2415
2416 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2417 {
2418 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2419 struct r600_bytecode_alu alu;
2420 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2421 unsigned i;
2422 int r;
2423
2424 /* optimize if it's just an equal balance */
2425 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2426 for (i = 0; i < lasti + 1; i++) {
2427 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2428 continue;
2429
2430 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2432 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2433 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2434 alu.omod = 3;
2435 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2436 alu.dst.chan = i;
2437 if (i == lasti) {
2438 alu.last = 1;
2439 }
2440 r = r600_bytecode_add_alu(ctx->bc, &alu);
2441 if (r)
2442 return r;
2443 }
2444 return 0;
2445 }
2446
2447 /* 1 - src0 */
2448 for (i = 0; i < lasti + 1; i++) {
2449 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2450 continue;
2451
2452 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2454 alu.src[0].sel = V_SQ_ALU_SRC_1;
2455 alu.src[0].chan = 0;
2456 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2457 r600_bytecode_src_toggle_neg(&alu.src[1]);
2458 alu.dst.sel = ctx->temp_reg;
2459 alu.dst.chan = i;
2460 if (i == lasti) {
2461 alu.last = 1;
2462 }
2463 alu.dst.write = 1;
2464 r = r600_bytecode_add_alu(ctx->bc, &alu);
2465 if (r)
2466 return r;
2467 }
2468
2469 /* (1 - src0) * src2 */
2470 for (i = 0; i < lasti + 1; i++) {
2471 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2472 continue;
2473
2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2476 alu.src[0].sel = ctx->temp_reg;
2477 alu.src[0].chan = i;
2478 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2479 alu.dst.sel = ctx->temp_reg;
2480 alu.dst.chan = i;
2481 if (i == lasti) {
2482 alu.last = 1;
2483 }
2484 alu.dst.write = 1;
2485 r = r600_bytecode_add_alu(ctx->bc, &alu);
2486 if (r)
2487 return r;
2488 }
2489
2490 /* src0 * src1 + (1 - src0) * src2 */
2491 for (i = 0; i < lasti + 1; i++) {
2492 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2493 continue;
2494
2495 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2497 alu.is_op3 = 1;
2498 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2499 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2500 alu.src[2].sel = ctx->temp_reg;
2501 alu.src[2].chan = i;
2502
2503 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2504 alu.dst.chan = i;
2505 if (i == lasti) {
2506 alu.last = 1;
2507 }
2508 r = r600_bytecode_add_alu(ctx->bc, &alu);
2509 if (r)
2510 return r;
2511 }
2512 return 0;
2513 }
2514
2515 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2516 {
2517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2518 struct r600_bytecode_alu alu;
2519 int i, r;
2520 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2521
2522 for (i = 0; i < lasti + 1; i++) {
2523 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2524 continue;
2525
2526 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2527 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2528 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2529 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2530 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2531 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2532 alu.dst.chan = i;
2533 alu.dst.write = 1;
2534 alu.is_op3 = 1;
2535 if (i == lasti)
2536 alu.last = 1;
2537 r = r600_bytecode_add_alu(ctx->bc, &alu);
2538 if (r)
2539 return r;
2540 }
2541 return 0;
2542 }
2543
2544 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2545 {
2546 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2547 static const unsigned int src0_swizzle[] = {2, 0, 1};
2548 static const unsigned int src1_swizzle[] = {1, 2, 0};
2549 struct r600_bytecode_alu alu;
2550 uint32_t use_temp = 0;
2551 int i, r;
2552
2553 if (inst->Dst[0].Register.WriteMask != 0xf)
2554 use_temp = 1;
2555
2556 for (i = 0; i < 4; i++) {
2557 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2558 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2559 if (i < 3) {
2560 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2561 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2562 } else {
2563 alu.src[0].sel = V_SQ_ALU_SRC_0;
2564 alu.src[0].chan = i;
2565 alu.src[1].sel = V_SQ_ALU_SRC_0;
2566 alu.src[1].chan = i;
2567 }
2568
2569 alu.dst.sel = ctx->temp_reg;
2570 alu.dst.chan = i;
2571 alu.dst.write = 1;
2572
2573 if (i == 3)
2574 alu.last = 1;
2575 r = r600_bytecode_add_alu(ctx->bc, &alu);
2576 if (r)
2577 return r;
2578 }
2579
2580 for (i = 0; i < 4; i++) {
2581 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2583
2584 if (i < 3) {
2585 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2586 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2587 } else {
2588 alu.src[0].sel = V_SQ_ALU_SRC_0;
2589 alu.src[0].chan = i;
2590 alu.src[1].sel = V_SQ_ALU_SRC_0;
2591 alu.src[1].chan = i;
2592 }
2593
2594 alu.src[2].sel = ctx->temp_reg;
2595 alu.src[2].neg = 1;
2596 alu.src[2].chan = i;
2597
2598 if (use_temp)
2599 alu.dst.sel = ctx->temp_reg;
2600 else
2601 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2602 alu.dst.chan = i;
2603 alu.dst.write = 1;
2604 alu.is_op3 = 1;
2605 if (i == 3)
2606 alu.last = 1;
2607 r = r600_bytecode_add_alu(ctx->bc, &alu);
2608 if (r)
2609 return r;
2610 }
2611 if (use_temp)
2612 return tgsi_helper_copy(ctx, inst);
2613 return 0;
2614 }
2615
2616 static int tgsi_exp(struct r600_shader_ctx *ctx)
2617 {
2618 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2619 struct r600_bytecode_alu alu;
2620 int r;
2621 int i;
2622
2623 /* result.x = 2^floor(src); */
2624 if (inst->Dst[0].Register.WriteMask & 1) {
2625 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2626
2627 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2628 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2629
2630 alu.dst.sel = ctx->temp_reg;
2631 alu.dst.chan = 0;
2632 alu.dst.write = 1;
2633 alu.last = 1;
2634 r = r600_bytecode_add_alu(ctx->bc, &alu);
2635 if (r)
2636 return r;
2637
2638 if (ctx->bc->chip_class == CAYMAN) {
2639 for (i = 0; i < 3; i++) {
2640 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2641 alu.src[0].sel = ctx->temp_reg;
2642 alu.src[0].chan = 0;
2643
2644 alu.dst.sel = ctx->temp_reg;
2645 alu.dst.chan = i;
2646 if (i == 0)
2647 alu.dst.write = 1;
2648 if (i == 2)
2649 alu.last = 1;
2650 r = r600_bytecode_add_alu(ctx->bc, &alu);
2651 if (r)
2652 return r;
2653 }
2654 } else {
2655 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2656 alu.src[0].sel = ctx->temp_reg;
2657 alu.src[0].chan = 0;
2658
2659 alu.dst.sel = ctx->temp_reg;
2660 alu.dst.chan = 0;
2661 alu.dst.write = 1;
2662 alu.last = 1;
2663 r = r600_bytecode_add_alu(ctx->bc, &alu);
2664 if (r)
2665 return r;
2666 }
2667 }
2668
2669 /* result.y = tmp - floor(tmp); */
2670 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2671 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2672
2673 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2674 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2675
2676 alu.dst.sel = ctx->temp_reg;
2677 #if 0
2678 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2679 if (r)
2680 return r;
2681 #endif
2682 alu.dst.write = 1;
2683 alu.dst.chan = 1;
2684
2685 alu.last = 1;
2686
2687 r = r600_bytecode_add_alu(ctx->bc, &alu);
2688 if (r)
2689 return r;
2690 }
2691
2692 /* result.z = RoughApprox2ToX(tmp);*/
2693 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2694 if (ctx->bc->chip_class == CAYMAN) {
2695 for (i = 0; i < 3; i++) {
2696 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2697 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2698 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2699
2700 alu.dst.sel = ctx->temp_reg;
2701 alu.dst.chan = i;
2702 if (i == 2) {
2703 alu.dst.write = 1;
2704 alu.last = 1;
2705 }
2706
2707 r = r600_bytecode_add_alu(ctx->bc, &alu);
2708 if (r)
2709 return r;
2710 }
2711 } else {
2712 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2714 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2715
2716 alu.dst.sel = ctx->temp_reg;
2717 alu.dst.write = 1;
2718 alu.dst.chan = 2;
2719
2720 alu.last = 1;
2721
2722 r = r600_bytecode_add_alu(ctx->bc, &alu);
2723 if (r)
2724 return r;
2725 }
2726 }
2727
2728 /* result.w = 1.0;*/
2729 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2730 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2731
2732 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2733 alu.src[0].sel = V_SQ_ALU_SRC_1;
2734 alu.src[0].chan = 0;
2735
2736 alu.dst.sel = ctx->temp_reg;
2737 alu.dst.chan = 3;
2738 alu.dst.write = 1;
2739 alu.last = 1;
2740 r = r600_bytecode_add_alu(ctx->bc, &alu);
2741 if (r)
2742 return r;
2743 }
2744 return tgsi_helper_copy(ctx, inst);
2745 }
2746
2747 static int tgsi_log(struct r600_shader_ctx *ctx)
2748 {
2749 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2750 struct r600_bytecode_alu alu;
2751 int r;
2752 int i;
2753
2754 /* result.x = floor(log2(|src|)); */
2755 if (inst->Dst[0].Register.WriteMask & 1) {
2756 if (ctx->bc->chip_class == CAYMAN) {
2757 for (i = 0; i < 3; i++) {
2758 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2759
2760 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2761 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2762 r600_bytecode_src_set_abs(&alu.src[0]);
2763
2764 alu.dst.sel = ctx->temp_reg;
2765 alu.dst.chan = i;
2766 if (i == 0)
2767 alu.dst.write = 1;
2768 if (i == 2)
2769 alu.last = 1;
2770 r = r600_bytecode_add_alu(ctx->bc, &alu);
2771 if (r)
2772 return r;
2773 }
2774
2775 } else {
2776 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2777
2778 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2779 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2780 r600_bytecode_src_set_abs(&alu.src[0]);
2781
2782 alu.dst.sel = ctx->temp_reg;
2783 alu.dst.chan = 0;
2784 alu.dst.write = 1;
2785 alu.last = 1;
2786 r = r600_bytecode_add_alu(ctx->bc, &alu);
2787 if (r)
2788 return r;
2789 }
2790
2791 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2792 alu.src[0].sel = ctx->temp_reg;
2793 alu.src[0].chan = 0;
2794
2795 alu.dst.sel = ctx->temp_reg;
2796 alu.dst.chan = 0;
2797 alu.dst.write = 1;
2798 alu.last = 1;
2799
2800 r = r600_bytecode_add_alu(ctx->bc, &alu);
2801 if (r)
2802 return r;
2803 }
2804
2805 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2806 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2807
2808 if (ctx->bc->chip_class == CAYMAN) {
2809 for (i = 0; i < 3; i++) {
2810 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2811
2812 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2813 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2814 r600_bytecode_src_set_abs(&alu.src[0]);
2815
2816 alu.dst.sel = ctx->temp_reg;
2817 alu.dst.chan = i;
2818 if (i == 1)
2819 alu.dst.write = 1;
2820 if (i == 2)
2821 alu.last = 1;
2822
2823 r = r600_bytecode_add_alu(ctx->bc, &alu);
2824 if (r)
2825 return r;
2826 }
2827 } else {
2828 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2829
2830 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2831 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2832 r600_bytecode_src_set_abs(&alu.src[0]);
2833
2834 alu.dst.sel = ctx->temp_reg;
2835 alu.dst.chan = 1;
2836 alu.dst.write = 1;
2837 alu.last = 1;
2838
2839 r = r600_bytecode_add_alu(ctx->bc, &alu);
2840 if (r)
2841 return r;
2842 }
2843
2844 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2845
2846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2847 alu.src[0].sel = ctx->temp_reg;
2848 alu.src[0].chan = 1;
2849
2850 alu.dst.sel = ctx->temp_reg;
2851 alu.dst.chan = 1;
2852 alu.dst.write = 1;
2853 alu.last = 1;
2854
2855 r = r600_bytecode_add_alu(ctx->bc, &alu);
2856 if (r)
2857 return r;
2858
2859 if (ctx->bc->chip_class == CAYMAN) {
2860 for (i = 0; i < 3; i++) {
2861 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2862 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2863 alu.src[0].sel = ctx->temp_reg;
2864 alu.src[0].chan = 1;
2865
2866 alu.dst.sel = ctx->temp_reg;
2867 alu.dst.chan = i;
2868 if (i == 1)
2869 alu.dst.write = 1;
2870 if (i == 2)
2871 alu.last = 1;
2872
2873 r = r600_bytecode_add_alu(ctx->bc, &alu);
2874 if (r)
2875 return r;
2876 }
2877 } else {
2878 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2879 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2880 alu.src[0].sel = ctx->temp_reg;
2881 alu.src[0].chan = 1;
2882
2883 alu.dst.sel = ctx->temp_reg;
2884 alu.dst.chan = 1;
2885 alu.dst.write = 1;
2886 alu.last = 1;
2887
2888 r = r600_bytecode_add_alu(ctx->bc, &alu);
2889 if (r)
2890 return r;
2891 }
2892
2893 if (ctx->bc->chip_class == CAYMAN) {
2894 for (i = 0; i < 3; i++) {
2895 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2897 alu.src[0].sel = ctx->temp_reg;
2898 alu.src[0].chan = 1;
2899
2900 alu.dst.sel = ctx->temp_reg;
2901 alu.dst.chan = i;
2902 if (i == 1)
2903 alu.dst.write = 1;
2904 if (i == 2)
2905 alu.last = 1;
2906
2907 r = r600_bytecode_add_alu(ctx->bc, &alu);
2908 if (r)
2909 return r;
2910 }
2911 } else {
2912 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2913 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2914 alu.src[0].sel = ctx->temp_reg;
2915 alu.src[0].chan = 1;
2916
2917 alu.dst.sel = ctx->temp_reg;
2918 alu.dst.chan = 1;
2919 alu.dst.write = 1;
2920 alu.last = 1;
2921
2922 r = r600_bytecode_add_alu(ctx->bc, &alu);
2923 if (r)
2924 return r;
2925 }
2926
2927 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2928
2929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2930
2931 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2932 r600_bytecode_src_set_abs(&alu.src[0]);
2933
2934 alu.src[1].sel = ctx->temp_reg;
2935 alu.src[1].chan = 1;
2936
2937 alu.dst.sel = ctx->temp_reg;
2938 alu.dst.chan = 1;
2939 alu.dst.write = 1;
2940 alu.last = 1;
2941
2942 r = r600_bytecode_add_alu(ctx->bc, &alu);
2943 if (r)
2944 return r;
2945 }
2946
2947 /* result.z = log2(|src|);*/
2948 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2949 if (ctx->bc->chip_class == CAYMAN) {
2950 for (i = 0; i < 3; i++) {
2951 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2952
2953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2954 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2955 r600_bytecode_src_set_abs(&alu.src[0]);
2956
2957 alu.dst.sel = ctx->temp_reg;
2958 if (i == 2)
2959 alu.dst.write = 1;
2960 alu.dst.chan = i;
2961 if (i == 2)
2962 alu.last = 1;
2963
2964 r = r600_bytecode_add_alu(ctx->bc, &alu);
2965 if (r)
2966 return r;
2967 }
2968 } else {
2969 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2970
2971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2972 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2973 r600_bytecode_src_set_abs(&alu.src[0]);
2974
2975 alu.dst.sel = ctx->temp_reg;
2976 alu.dst.write = 1;
2977 alu.dst.chan = 2;
2978 alu.last = 1;
2979
2980 r = r600_bytecode_add_alu(ctx->bc, &alu);
2981 if (r)
2982 return r;
2983 }
2984 }
2985
2986 /* result.w = 1.0; */
2987 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2988 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2989
2990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2991 alu.src[0].sel = V_SQ_ALU_SRC_1;
2992 alu.src[0].chan = 0;
2993
2994 alu.dst.sel = ctx->temp_reg;
2995 alu.dst.chan = 3;
2996 alu.dst.write = 1;
2997 alu.last = 1;
2998
2999 r = r600_bytecode_add_alu(ctx->bc, &alu);
3000 if (r)
3001 return r;
3002 }
3003
3004 return tgsi_helper_copy(ctx, inst);
3005 }
3006
3007 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
3008 {
3009 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3010 struct r600_bytecode_alu alu;
3011 int r;
3012
3013 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3014
3015 switch (inst->Instruction.Opcode) {
3016 case TGSI_OPCODE_ARL:
3017 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
3018 break;
3019 case TGSI_OPCODE_ARR:
3020 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3021 break;
3022 case TGSI_OPCODE_UARL:
3023 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3024 break;
3025 default:
3026 assert(0);
3027 return -1;
3028 }
3029
3030 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3031 alu.last = 1;
3032 alu.dst.sel = ctx->bc->ar_reg;
3033 alu.dst.write = 1;
3034 r = r600_bytecode_add_alu(ctx->bc, &alu);
3035 if (r)
3036 return r;
3037
3038 ctx->bc->ar_loaded = 0;
3039 return 0;
3040 }
3041 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
3042 {
3043 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3044 struct r600_bytecode_alu alu;
3045 int r;
3046
3047 switch (inst->Instruction.Opcode) {
3048 case TGSI_OPCODE_ARL:
3049 memset(&alu, 0, sizeof(alu));
3050 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
3051 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3052 alu.dst.sel = ctx->bc->ar_reg;
3053 alu.dst.write = 1;
3054 alu.last = 1;
3055
3056 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3057 return r;
3058
3059 memset(&alu, 0, sizeof(alu));
3060 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3061 alu.src[0].sel = ctx->bc->ar_reg;
3062 alu.dst.sel = ctx->bc->ar_reg;
3063 alu.dst.write = 1;
3064 alu.last = 1;
3065
3066 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3067 return r;
3068 break;
3069 case TGSI_OPCODE_ARR:
3070 memset(&alu, 0, sizeof(alu));
3071 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3072 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3073 alu.dst.sel = ctx->bc->ar_reg;
3074 alu.dst.write = 1;
3075 alu.last = 1;
3076
3077 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3078 return r;
3079 break;
3080 case TGSI_OPCODE_UARL:
3081 memset(&alu, 0, sizeof(alu));
3082 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3083 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3084 alu.dst.sel = ctx->bc->ar_reg;
3085 alu.dst.write = 1;
3086 alu.last = 1;
3087
3088 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3089 return r;
3090 break;
3091 default:
3092 assert(0);
3093 return -1;
3094 }
3095
3096 ctx->bc->ar_loaded = 0;
3097 return 0;
3098 }
3099
3100 static int tgsi_opdst(struct r600_shader_ctx *ctx)
3101 {
3102 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3103 struct r600_bytecode_alu alu;
3104 int i, r = 0;
3105
3106 for (i = 0; i < 4; i++) {
3107 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3108
3109 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3110 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3111
3112 if (i == 0 || i == 3) {
3113 alu.src[0].sel = V_SQ_ALU_SRC_1;
3114 } else {
3115 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3116 }
3117
3118 if (i == 0 || i == 2) {
3119 alu.src[1].sel = V_SQ_ALU_SRC_1;
3120 } else {
3121 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3122 }
3123 if (i == 3)
3124 alu.last = 1;
3125 r = r600_bytecode_add_alu(ctx->bc, &alu);
3126 if (r)
3127 return r;
3128 }
3129 return 0;
3130 }
3131
3132 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
3133 {
3134 struct r600_bytecode_alu alu;
3135 int r;
3136
3137 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3138 alu.inst = opcode;
3139 alu.predicate = 1;
3140
3141 alu.dst.sel = ctx->temp_reg;
3142 alu.dst.write = 1;
3143 alu.dst.chan = 0;
3144
3145 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3146 alu.src[1].sel = V_SQ_ALU_SRC_0;
3147 alu.src[1].chan = 0;
3148
3149 alu.last = 1;
3150
3151 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3152 if (r)
3153 return r;
3154 return 0;
3155 }
3156
3157 static int pops(struct r600_shader_ctx *ctx, int pops)
3158 {
3159 unsigned force_pop = ctx->bc->force_add_cf;
3160
3161 if (!force_pop) {
3162 int alu_pop = 3;
3163 if (ctx->bc->cf_last) {
3164 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
3165 alu_pop = 0;
3166 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
3167 alu_pop = 1;
3168 }
3169 alu_pop += pops;
3170 if (alu_pop == 1) {
3171 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
3172 ctx->bc->force_add_cf = 1;
3173 } else if (alu_pop == 2) {
3174 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
3175 ctx->bc->force_add_cf = 1;
3176 } else {
3177 force_pop = 1;
3178 }
3179 }
3180
3181 if (force_pop) {
3182 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3183 ctx->bc->cf_last->pop_count = pops;
3184 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3185 }
3186
3187 return 0;
3188 }
3189
3190 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3191 {
3192 switch(reason) {
3193 case FC_PUSH_VPM:
3194 ctx->bc->callstack[ctx->bc->call_sp].current--;
3195 break;
3196 case FC_PUSH_WQM:
3197 case FC_LOOP:
3198 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3199 break;
3200 case FC_REP:
3201 /* TOODO : for 16 vp asic should -= 2; */
3202 ctx->bc->callstack[ctx->bc->call_sp].current --;
3203 break;
3204 }
3205 }
3206
3207 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3208 {
3209 if (check_max_only) {
3210 int diff;
3211 switch (reason) {
3212 case FC_PUSH_VPM:
3213 diff = 1;
3214 break;
3215 case FC_PUSH_WQM:
3216 diff = 4;
3217 break;
3218 default:
3219 assert(0);
3220 diff = 0;
3221 }
3222 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3223 ctx->bc->callstack[ctx->bc->call_sp].max) {
3224 ctx->bc->callstack[ctx->bc->call_sp].max =
3225 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3226 }
3227 return;
3228 }
3229 switch (reason) {
3230 case FC_PUSH_VPM:
3231 ctx->bc->callstack[ctx->bc->call_sp].current++;
3232 break;
3233 case FC_PUSH_WQM:
3234 case FC_LOOP:
3235 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3236 break;
3237 case FC_REP:
3238 ctx->bc->callstack[ctx->bc->call_sp].current++;
3239 break;
3240 }
3241
3242 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3243 ctx->bc->callstack[ctx->bc->call_sp].max) {
3244 ctx->bc->callstack[ctx->bc->call_sp].max =
3245 ctx->bc->callstack[ctx->bc->call_sp].current;
3246 }
3247 }
3248
3249 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3250 {
3251 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3252
3253 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3254 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3255 sp->mid[sp->num_mid] = ctx->bc->cf_last;
3256 sp->num_mid++;
3257 }
3258
3259 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3260 {
3261 ctx->bc->fc_sp++;
3262 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3263 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3264 }
3265
3266 static void fc_poplevel(struct r600_shader_ctx *ctx)
3267 {
3268 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3269 if (sp->mid) {
3270 free(sp->mid);
3271 sp->mid = NULL;
3272 }
3273 sp->num_mid = 0;
3274 sp->start = NULL;
3275 sp->type = 0;
3276 ctx->bc->fc_sp--;
3277 }
3278
3279 #if 0
3280 static int emit_return(struct r600_shader_ctx *ctx)
3281 {
3282 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
3283 return 0;
3284 }
3285
3286 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3287 {
3288
3289 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3290 ctx->bc->cf_last->pop_count = pops;
3291 /* TODO work out offset */
3292 return 0;
3293 }
3294
3295 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3296 {
3297 return 0;
3298 }
3299
3300 static void emit_testflag(struct r600_shader_ctx *ctx)
3301 {
3302
3303 }
3304
3305 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3306 {
3307 emit_testflag(ctx);
3308 emit_jump_to_offset(ctx, 1, 4);
3309 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3310 pops(ctx, ifidx + 1);
3311 emit_return(ctx);
3312 }
3313
3314 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3315 {
3316 emit_testflag(ctx);
3317
3318 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3319 ctx->bc->cf_last->pop_count = 1;
3320
3321 fc_set_mid(ctx, fc_sp);
3322
3323 pops(ctx, 1);
3324 }
3325 #endif
3326
3327 static int tgsi_if(struct r600_shader_ctx *ctx)
3328 {
3329 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
3330
3331 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3332
3333 fc_pushlevel(ctx, FC_IF);
3334
3335 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3336 return 0;
3337 }
3338
3339 static int tgsi_else(struct r600_shader_ctx *ctx)
3340 {
3341 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3342 ctx->bc->cf_last->pop_count = 1;
3343
3344 fc_set_mid(ctx, ctx->bc->fc_sp);
3345 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3346 return 0;
3347 }
3348
3349 static int tgsi_endif(struct r600_shader_ctx *ctx)
3350 {
3351 pops(ctx, 1);
3352 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3353 R600_ERR("if/endif unbalanced in shader\n");
3354 return -1;
3355 }
3356
3357 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3358 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3359 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3360 } else {
3361 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3362 }
3363 fc_poplevel(ctx);
3364
3365 callstack_decrease_current(ctx, FC_PUSH_VPM);
3366 return 0;
3367 }
3368
3369 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3370 {
3371 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3372
3373 fc_pushlevel(ctx, FC_LOOP);
3374
3375 /* check stack depth */
3376 callstack_check_depth(ctx, FC_LOOP, 0);
3377 return 0;
3378 }
3379
3380 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3381 {
3382 int i;
3383
3384 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3385
3386 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3387 R600_ERR("loop/endloop in shader code are not paired.\n");
3388 return -EINVAL;
3389 }
3390
3391 /* fixup loop pointers - from r600isa
3392 LOOP END points to CF after LOOP START,
3393 LOOP START point to CF after LOOP END
3394 BRK/CONT point to LOOP END CF
3395 */
3396 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3397
3398 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3399
3400 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3401 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3402 }
3403 /* TODO add LOOPRET support */
3404 fc_poplevel(ctx);
3405 callstack_decrease_current(ctx, FC_LOOP);
3406 return 0;
3407 }
3408
3409 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3410 {
3411 unsigned int fscp;
3412
3413 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3414 {
3415 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3416 break;
3417 }
3418
3419 if (fscp == 0) {
3420 R600_ERR("Break not inside loop/endloop pair\n");
3421 return -EINVAL;
3422 }
3423
3424 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3425 ctx->bc->cf_last->pop_count = 1;
3426
3427 fc_set_mid(ctx, fscp);
3428
3429 pops(ctx, 1);
3430 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3431 return 0;
3432 }
3433
3434 static int tgsi_umad(struct r600_shader_ctx *ctx)
3435 {
3436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3437 struct r600_bytecode_alu alu;
3438 int i, j, r;
3439 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3440
3441 /* src0 * src1 */
3442 for (i = 0; i < lasti + 1; i++) {
3443 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3444 continue;
3445
3446 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3447
3448 alu.dst.chan = i;
3449 alu.dst.sel = ctx->temp_reg;
3450 alu.dst.write = 1;
3451
3452 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3453 for (j = 0; j < 2; j++) {
3454 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3455 }
3456
3457 alu.last = 1;
3458 r = r600_bytecode_add_alu(ctx->bc, &alu);
3459 if (r)
3460 return r;
3461 }
3462
3463
3464 for (i = 0; i < lasti + 1; i++) {
3465 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3466 continue;
3467
3468 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3469 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3470
3471 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3472
3473 alu.src[0].sel = ctx->temp_reg;
3474 alu.src[0].chan = i;
3475
3476 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3477 if (i == lasti) {
3478 alu.last = 1;
3479 }
3480 r = r600_bytecode_add_alu(ctx->bc, &alu);
3481 if (r)
3482 return r;
3483 }
3484 return 0;
3485 }
3486
3487 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3488 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3489 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3490 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3491
3492 /* FIXME:
3493 * For state trackers other than OpenGL, we'll want to use
3494 * _RECIP_IEEE instead.
3495 */
3496 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3497
3498 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3499 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3500 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3501 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3502 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3503 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3504 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3505 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3506 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3507 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3508 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3509 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3510 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3511 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3512 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3513 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514 /* gap */
3515 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517 /* gap */
3518 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3521 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3523 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3524 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3525 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3526 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3527 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3528 /* gap */
3529 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3531 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3533 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3534 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3535 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3536 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3537 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3539 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3540 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3542 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3543 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3545 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3546 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3547 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3548 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3549 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3550 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3551 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3552 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3553 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3557 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3559 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3560 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3562 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3563 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3564 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3565 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3566 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3567 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3568 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3569 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3570 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3571 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3572 /* gap */
3573 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3574 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3575 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3576 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3577 /* gap */
3578 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3579 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3582 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3583 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3584 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3585 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3586 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3587 /* gap */
3588 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3589 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3590 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3591 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3593 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3594 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3595 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3596 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3597 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3598 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3600 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3601 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3602 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603 /* gap */
3604 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3607 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3608 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3609 /* gap */
3610 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3611 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3612 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3615 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3616 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3619 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3620 /* gap */
3621 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
3623 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3624 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3625 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3626 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2},
3627 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3628 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2},
3630 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
3631 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
3632 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3633 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3635 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3636 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3637 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3638 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
3639 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3640 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3641 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3642 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3643 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
3644 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3649 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3650 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3651 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3652 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3653 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3654 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3655 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3656 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3657 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3658 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3659 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3660 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
3661 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
3662 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3663 };
3664
3665 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3666 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3667 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3668 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3669 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3670 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3671 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3672 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3673 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3674 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3675 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3676 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3677 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3678 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3679 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3680 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3681 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3682 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3683 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3684 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3685 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686 /* gap */
3687 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689 /* gap */
3690 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3691 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3692 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3693 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3694 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3695 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3696 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3697 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3698 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3699 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3700 /* gap */
3701 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3702 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3703 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3704 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3705 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3706 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3707 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3708 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3709 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3710 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3711 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3712 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3713 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3714 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3715 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3716 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3717 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3718 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3719 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3720 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3721 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3722 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3723 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3724 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3725 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3726 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3727 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3728 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3729 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3730 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3731 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3732 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3733 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3734 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3735 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3736 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3737 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3738 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3739 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3740 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3741 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3742 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3743 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3744 /* gap */
3745 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3746 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3747 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3748 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3749 /* gap */
3750 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3751 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3752 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3753 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3754 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3755 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3756 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3757 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3758 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3759 /* gap */
3760 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3761 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3762 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3763 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3764 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3765 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3766 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3767 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3768 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3769 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3770 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3771 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3772 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3773 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3774 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3775 /* gap */
3776 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3777 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3778 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3779 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3780 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3781 /* gap */
3782 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3783 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3784 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3785 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3786 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3787 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3788 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3789 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3790 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3791 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3792 /* gap */
3793 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3794 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3795 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3796 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3797 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3798 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
3799 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3800 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3801 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3802 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3803 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3804 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3805 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3806 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3807 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3808 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3809 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3810 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
3811 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3812 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3813 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3814 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3815 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
3816 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3817 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3818 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3819 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3820 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3821 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3822 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3823 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3824 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3825 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3826 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3827 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
3828 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
3829 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
3830 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3831 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3832 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
3833 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
3834 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3835 };
3836
3837 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3838 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3839 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3840 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3841 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3842 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3843 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3844 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3845 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3846 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3847 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3848 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3849 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3850 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3851 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3852 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3853 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3854 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3855 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3856 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3857 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3858 /* gap */
3859 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3860 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3861 /* gap */
3862 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3863 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3864 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3865 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3866 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3867 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3868 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3869 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3870 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3871 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3872 /* gap */
3873 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3874 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3875 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3876 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3877 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3878 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3879 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3880 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3881 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3882 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3883 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3884 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3885 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3886 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3887 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3888 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3889 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3890 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3891 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3892 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3893 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3894 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3895 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3896 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3897 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3898 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3899 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3900 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3901 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3902 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3903 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3904 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3905 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3906 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3907 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3908 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3909 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3910 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3911 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3912 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3913 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3914 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3915 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3916 /* gap */
3917 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3918 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3919 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3920 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3921 /* gap */
3922 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3923 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3924 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3925 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3926 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3927 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3928 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3929 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3930 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3931 /* gap */
3932 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3933 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3934 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3935 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3936 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3937 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3938 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
3939 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3940 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3941 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3942 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3943 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3944 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3945 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3946 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3947 /* gap */
3948 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3949 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3950 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3951 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3952 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3953 /* gap */
3954 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3955 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3956 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3957 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3958 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3959 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3960 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3961 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3962 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3963 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3964 /* gap */
3965 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3966 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3967 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3968 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3969 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3970 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3971 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3972 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3973 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3974 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3975 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3976 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3977 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3978 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3979 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3980 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3981 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3982 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3983 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3984 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3985 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3986 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3987 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3988 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3989 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3990 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3991 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3992 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
3993 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
3994 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
3995 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
3996 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
3997 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3998 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
3999 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
4000 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
4001 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
4002 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4003 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4004 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported},
4005 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
4006 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4007 };