r600g: set round_mode to truncate and get rid of tgsi_f2i on evergreen
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_info.h"
25 #include "tgsi/tgsi_parse.h"
26 #include "tgsi/tgsi_scan.h"
27 #include "tgsi/tgsi_dump.h"
28 #include "util/u_format.h"
29 #include "r600_pipe.h"
30 #include "r600_asm.h"
31 #include "r600_sq.h"
32 #include "r600_formats.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59
60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
63 struct r600_shader *rshader = &shader->shader;
64 uint32_t *ptr;
65 int i;
66
67 /* copy new shader */
68 if (shader->bo == NULL) {
69 shader->bo = (struct r600_resource*)
70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 if (shader->bo == NULL) {
72 return -ENOMEM;
73 }
74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
75 if (R600_BIG_ENDIAN) {
76 for (i = 0; i < rshader->bc.ndw; ++i) {
77 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 }
79 } else {
80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 }
82 rctx->ws->buffer_unmap(shader->bo->buf);
83 }
84 /* build state */
85 switch (rshader->processor_type) {
86 case TGSI_PROCESSOR_VERTEX:
87 if (rctx->chip_class >= EVERGREEN) {
88 evergreen_pipe_shader_vs(ctx, shader);
89 } else {
90 r600_pipe_shader_vs(ctx, shader);
91 }
92 break;
93 case TGSI_PROCESSOR_FRAGMENT:
94 if (rctx->chip_class >= EVERGREEN) {
95 evergreen_pipe_shader_ps(ctx, shader);
96 } else {
97 r600_pipe_shader_ps(ctx, shader);
98 }
99 break;
100 default:
101 return -EINVAL;
102 }
103 return 0;
104 }
105
106 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
107
108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 static int dump_shaders = -1;
111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
112 int r;
113
114 /* Would like some magic "get_bool_option_once" routine.
115 */
116 if (dump_shaders == -1)
117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119 if (dump_shaders) {
120 fprintf(stderr, "--------------------------------------------------------------\n");
121 tgsi_dump(shader->tokens, 0);
122
123 if (shader->so.num_outputs) {
124 unsigned i;
125 fprintf(stderr, "STREAMOUT\n");
126 for (i = 0; i < shader->so.num_outputs; i++) {
127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128 shader->so.output[i].start_component;
129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130 shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131 mask & 1 ? "x" : "_",
132 (mask >> 1) & 1 ? "y" : "_",
133 (mask >> 2) & 1 ? "z" : "_",
134 (mask >> 3) & 1 ? "w" : "_");
135 }
136 }
137 }
138 r = r600_shader_from_tgsi(rctx, shader);
139 if (r) {
140 R600_ERR("translation from TGSI failed !\n");
141 return r;
142 }
143 r = r600_bytecode_build(&shader->shader.bc);
144 if (r) {
145 R600_ERR("building bytecode failed !\n");
146 return r;
147 }
148 if (dump_shaders) {
149 r600_bytecode_dump(&shader->shader.bc);
150 fprintf(stderr, "______________________________________________________________\n");
151 }
152 return r600_pipe_shader(ctx, shader);
153 }
154
155 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156 {
157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158 r600_bytecode_clear(&shader->shader.bc);
159
160 memset(&shader->shader,0,sizeof(struct r600_shader));
161 }
162
163 /*
164 * tgsi -> r600 shader
165 */
166 struct r600_shader_tgsi_instruction;
167
168 struct r600_shader_src {
169 unsigned sel;
170 unsigned swizzle[4];
171 unsigned neg;
172 unsigned abs;
173 unsigned rel;
174 uint32_t value[4];
175 };
176
177 struct r600_shader_ctx {
178 struct tgsi_shader_info info;
179 struct tgsi_parse_context parse;
180 const struct tgsi_token *tokens;
181 unsigned type;
182 unsigned file_offset[TGSI_FILE_COUNT];
183 unsigned temp_reg;
184 struct r600_shader_tgsi_instruction *inst_info;
185 struct r600_bytecode *bc;
186 struct r600_shader *shader;
187 struct r600_shader_src src[4];
188 u32 *literals;
189 u32 nliterals;
190 u32 max_driver_temp_used;
191 /* needed for evergreen interpolation */
192 boolean input_centroid;
193 boolean input_linear;
194 boolean input_perspective;
195 int num_interp_gpr;
196 int face_gpr;
197 int colors_used;
198 boolean clip_vertex_write;
199 unsigned cv_output;
200 };
201
202 struct r600_shader_tgsi_instruction {
203 unsigned tgsi_opcode;
204 unsigned is_op3;
205 unsigned r600_opcode;
206 int (*process)(struct r600_shader_ctx *ctx);
207 };
208
209 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
210 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
211
212 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
213 {
214 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
215 int j;
216
217 if (i->Instruction.NumDstRegs > 1) {
218 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
219 return -EINVAL;
220 }
221 if (i->Instruction.Predicate) {
222 R600_ERR("predicate unsupported\n");
223 return -EINVAL;
224 }
225 #if 0
226 if (i->Instruction.Label) {
227 R600_ERR("label unsupported\n");
228 return -EINVAL;
229 }
230 #endif
231 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
232 if (i->Src[j].Register.Dimension) {
233 R600_ERR("unsupported src %d (dimension %d)\n", j,
234 i->Src[j].Register.Dimension);
235 return -EINVAL;
236 }
237 }
238 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
239 if (i->Dst[j].Register.Dimension) {
240 R600_ERR("unsupported dst (dimension)\n");
241 return -EINVAL;
242 }
243 }
244 return 0;
245 }
246
247 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
248 {
249 int i, r;
250 struct r600_bytecode_alu alu;
251 int gpr = 0, base_chan = 0;
252 int ij_index = 0;
253
254 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
255 ij_index = 0;
256 if (ctx->shader->input[input].centroid)
257 ij_index++;
258 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
259 ij_index = 0;
260 /* if we have perspective add one */
261 if (ctx->input_perspective) {
262 ij_index++;
263 /* if we have perspective centroid */
264 if (ctx->input_centroid)
265 ij_index++;
266 }
267 if (ctx->shader->input[input].centroid)
268 ij_index++;
269 }
270
271 /* work out gpr and base_chan from index */
272 gpr = ij_index / 2;
273 base_chan = (2 * (ij_index % 2)) + 1;
274
275 for (i = 0; i < 8; i++) {
276 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
277
278 if (i < 4)
279 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
280 else
281 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
282
283 if ((i > 1) && (i < 6)) {
284 alu.dst.sel = ctx->shader->input[input].gpr;
285 alu.dst.write = 1;
286 }
287
288 alu.dst.chan = i % 4;
289
290 alu.src[0].sel = gpr;
291 alu.src[0].chan = (base_chan - (i % 2));
292
293 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
294
295 alu.bank_swizzle_force = SQ_ALU_VEC_210;
296 if ((i % 4) == 3)
297 alu.last = 1;
298 r = r600_bytecode_add_alu(ctx->bc, &alu);
299 if (r)
300 return r;
301 }
302 return 0;
303 }
304
305 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
306 {
307 int i, r;
308 struct r600_bytecode_alu alu;
309
310 for (i = 0; i < 4; i++) {
311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
312
313 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
314
315 alu.dst.sel = ctx->shader->input[input].gpr;
316 alu.dst.write = 1;
317
318 alu.dst.chan = i;
319
320 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
321 alu.src[0].chan = i;
322
323 if (i == 3)
324 alu.last = 1;
325 r = r600_bytecode_add_alu(ctx->bc, &alu);
326 if (r)
327 return r;
328 }
329 return 0;
330 }
331
332 /*
333 * Special export handling in shaders
334 *
335 * shader export ARRAY_BASE for EXPORT_POS:
336 * 60 is position
337 * 61 is misc vector
338 * 62, 63 are clip distance vectors
339 *
340 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
341 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
342 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
343 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
344 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
345 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
346 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
347 * exclusive from render target index)
348 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
349 *
350 *
351 * shader export ARRAY_BASE for EXPORT_PIXEL:
352 * 0-7 CB targets
353 * 61 computed Z vector
354 *
355 * The use of the values exported in the computed Z vector are controlled
356 * by DB_SHADER_CONTROL:
357 * Z_EXPORT_ENABLE - Z as a float in RED
358 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
359 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
360 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
361 * DB_SOURCE_FORMAT - export control restrictions
362 *
363 */
364
365
366 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
367 static int r600_spi_sid(struct r600_shader_io * io)
368 {
369 int index, name = io->name;
370
371 /* These params are handled differently, they don't need
372 * semantic indices, so we'll use 0 for them.
373 */
374 if (name == TGSI_SEMANTIC_POSITION ||
375 name == TGSI_SEMANTIC_PSIZE ||
376 name == TGSI_SEMANTIC_FACE)
377 index = 0;
378 else {
379 if (name == TGSI_SEMANTIC_GENERIC) {
380 /* For generic params simply use sid from tgsi */
381 index = io->sid;
382 } else {
383 /* For non-generic params - pack name and sid into 8 bits */
384 index = 0x80 | (name<<3) | (io->sid);
385 }
386
387 /* Make sure that all really used indices have nonzero value, so
388 * we can just compare it to 0 later instead of comparing the name
389 * with different values to detect special cases. */
390 index++;
391 }
392
393 return index;
394 };
395
396 /* turn input into interpolate on EG */
397 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
398 {
399 int r = 0;
400
401 if (ctx->shader->input[index].spi_sid) {
402 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
403 if (ctx->shader->input[index].interpolate > 0) {
404 r = evergreen_interp_alu(ctx, index);
405 } else {
406 r = evergreen_interp_flat(ctx, index);
407 }
408 }
409 return r;
410 }
411
412 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
413 {
414 struct r600_bytecode_alu alu;
415 int i, r;
416 int gpr_front = ctx->shader->input[front].gpr;
417 int gpr_back = ctx->shader->input[back].gpr;
418
419 for (i = 0; i < 4; i++) {
420 memset(&alu, 0, sizeof(alu));
421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
422 alu.is_op3 = 1;
423 alu.dst.write = 1;
424 alu.dst.sel = gpr_front;
425 alu.src[0].sel = ctx->face_gpr;
426 alu.src[1].sel = gpr_front;
427 alu.src[2].sel = gpr_back;
428
429 alu.dst.chan = i;
430 alu.src[1].chan = i;
431 alu.src[2].chan = i;
432 alu.last = (i==3);
433
434 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
435 return r;
436 }
437
438 return 0;
439 }
440
441 static int tgsi_declaration(struct r600_shader_ctx *ctx)
442 {
443 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
444 unsigned i;
445 int r;
446
447 switch (d->Declaration.File) {
448 case TGSI_FILE_INPUT:
449 i = ctx->shader->ninput++;
450 ctx->shader->input[i].name = d->Semantic.Name;
451 ctx->shader->input[i].sid = d->Semantic.Index;
452 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
453 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
454 ctx->shader->input[i].centroid = d->Declaration.Centroid;
455 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
456 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
457 if (ctx->shader->input[i].name == TGSI_SEMANTIC_FACE)
458 ctx->face_gpr = ctx->shader->input[i].gpr;
459 else if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR)
460 ctx->colors_used++;
461 if (ctx->bc->chip_class >= EVERGREEN) {
462 r = evergreen_interp_input(ctx, i);
463 if (r)
464 return r;
465 }
466 }
467 break;
468 case TGSI_FILE_OUTPUT:
469 i = ctx->shader->noutput++;
470 ctx->shader->output[i].name = d->Semantic.Name;
471 ctx->shader->output[i].sid = d->Semantic.Index;
472 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
473 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
474 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
475 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
476 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
477 switch (d->Semantic.Name) {
478 case TGSI_SEMANTIC_CLIPDIST:
479 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
480 break;
481 case TGSI_SEMANTIC_PSIZE:
482 ctx->shader->vs_out_misc_write = 1;
483 break;
484 case TGSI_SEMANTIC_CLIPVERTEX:
485 ctx->clip_vertex_write = TRUE;
486 ctx->cv_output = i;
487 break;
488 }
489 }
490 break;
491 case TGSI_FILE_CONSTANT:
492 case TGSI_FILE_TEMPORARY:
493 case TGSI_FILE_SAMPLER:
494 case TGSI_FILE_ADDRESS:
495 break;
496
497 case TGSI_FILE_SYSTEM_VALUE:
498 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
499 struct r600_bytecode_alu alu;
500 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
501
502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
503 alu.src[0].sel = 0;
504 alu.src[0].chan = 3;
505
506 alu.dst.sel = 0;
507 alu.dst.chan = 3;
508 alu.dst.write = 1;
509 alu.last = 1;
510
511 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
512 return r;
513 break;
514 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
515 break;
516 default:
517 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
518 return -EINVAL;
519 }
520 return 0;
521 }
522
523 static int r600_get_temp(struct r600_shader_ctx *ctx)
524 {
525 return ctx->temp_reg + ctx->max_driver_temp_used++;
526 }
527
528 /*
529 * for evergreen we need to scan the shader to find the number of GPRs we need to
530 * reserve for interpolation.
531 *
532 * we need to know if we are going to emit
533 * any centroid inputs
534 * if perspective and linear are required
535 */
536 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
537 {
538 int i;
539 int num_baryc;
540
541 ctx->input_linear = FALSE;
542 ctx->input_perspective = FALSE;
543 ctx->input_centroid = FALSE;
544 ctx->num_interp_gpr = 1;
545
546 /* any centroid inputs */
547 for (i = 0; i < ctx->info.num_inputs; i++) {
548 /* skip position/face */
549 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
550 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
551 continue;
552 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
553 ctx->input_linear = TRUE;
554 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
555 ctx->input_perspective = TRUE;
556 if (ctx->info.input_centroid[i])
557 ctx->input_centroid = TRUE;
558 }
559
560 num_baryc = 0;
561 /* ignoring sample for now */
562 if (ctx->input_perspective)
563 num_baryc++;
564 if (ctx->input_linear)
565 num_baryc++;
566 if (ctx->input_centroid)
567 num_baryc *= 2;
568
569 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
570
571 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
572 return ctx->num_interp_gpr;
573 }
574
575 static void tgsi_src(struct r600_shader_ctx *ctx,
576 const struct tgsi_full_src_register *tgsi_src,
577 struct r600_shader_src *r600_src)
578 {
579 memset(r600_src, 0, sizeof(*r600_src));
580 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
581 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
582 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
583 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
584 r600_src->neg = tgsi_src->Register.Negate;
585 r600_src->abs = tgsi_src->Register.Absolute;
586
587 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
588 int index;
589 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
590 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
591 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
592
593 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
594 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
595 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
596 return;
597 }
598 index = tgsi_src->Register.Index;
599 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
600 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
601 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
602 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
603 r600_src->swizzle[0] = 3;
604 r600_src->swizzle[1] = 3;
605 r600_src->swizzle[2] = 3;
606 r600_src->swizzle[3] = 3;
607 r600_src->sel = 0;
608 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
609 r600_src->swizzle[0] = 0;
610 r600_src->swizzle[1] = 0;
611 r600_src->swizzle[2] = 0;
612 r600_src->swizzle[3] = 0;
613 r600_src->sel = 0;
614 }
615 } else {
616 if (tgsi_src->Register.Indirect)
617 r600_src->rel = V_SQ_REL_RELATIVE;
618 r600_src->sel = tgsi_src->Register.Index;
619 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
620 }
621 }
622
623 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
624 {
625 struct r600_bytecode_vtx vtx;
626 unsigned int ar_reg;
627 int r;
628
629 if (offset) {
630 struct r600_bytecode_alu alu;
631
632 memset(&alu, 0, sizeof(alu));
633
634 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
635 alu.src[0].sel = ctx->bc->ar_reg;
636
637 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
638 alu.src[1].value = offset;
639
640 alu.dst.sel = dst_reg;
641 alu.dst.write = 1;
642 alu.last = 1;
643
644 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
645 return r;
646
647 ar_reg = dst_reg;
648 } else {
649 ar_reg = ctx->bc->ar_reg;
650 }
651
652 memset(&vtx, 0, sizeof(vtx));
653 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
654 vtx.src_gpr = ar_reg;
655 vtx.mega_fetch_count = 16;
656 vtx.dst_gpr = dst_reg;
657 vtx.dst_sel_x = 0; /* SEL_X */
658 vtx.dst_sel_y = 1; /* SEL_Y */
659 vtx.dst_sel_z = 2; /* SEL_Z */
660 vtx.dst_sel_w = 3; /* SEL_W */
661 vtx.data_format = FMT_32_32_32_32_FLOAT;
662 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
663 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
664 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
665 vtx.endian = r600_endian_swap(32);
666
667 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
668 return r;
669
670 return 0;
671 }
672
673 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
674 {
675 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
676 struct r600_bytecode_alu alu;
677 int i, j, k, nconst, r;
678
679 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
680 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
681 nconst++;
682 }
683 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
684 }
685 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
686 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
687 continue;
688 }
689
690 if (ctx->src[i].rel) {
691 int treg = r600_get_temp(ctx);
692 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
693 return r;
694
695 ctx->src[i].sel = treg;
696 ctx->src[i].rel = 0;
697 j--;
698 } else if (j > 0) {
699 int treg = r600_get_temp(ctx);
700 for (k = 0; k < 4; k++) {
701 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
702 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
703 alu.src[0].sel = ctx->src[i].sel;
704 alu.src[0].chan = k;
705 alu.src[0].rel = ctx->src[i].rel;
706 alu.dst.sel = treg;
707 alu.dst.chan = k;
708 alu.dst.write = 1;
709 if (k == 3)
710 alu.last = 1;
711 r = r600_bytecode_add_alu(ctx->bc, &alu);
712 if (r)
713 return r;
714 }
715 ctx->src[i].sel = treg;
716 ctx->src[i].rel =0;
717 j--;
718 }
719 }
720 return 0;
721 }
722
723 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
724 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
725 {
726 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
727 struct r600_bytecode_alu alu;
728 int i, j, k, nliteral, r;
729
730 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
731 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
732 nliteral++;
733 }
734 }
735 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
736 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
737 int treg = r600_get_temp(ctx);
738 for (k = 0; k < 4; k++) {
739 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
740 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
741 alu.src[0].sel = ctx->src[i].sel;
742 alu.src[0].chan = k;
743 alu.src[0].value = ctx->src[i].value[k];
744 alu.dst.sel = treg;
745 alu.dst.chan = k;
746 alu.dst.write = 1;
747 if (k == 3)
748 alu.last = 1;
749 r = r600_bytecode_add_alu(ctx->bc, &alu);
750 if (r)
751 return r;
752 }
753 ctx->src[i].sel = treg;
754 j--;
755 }
756 }
757 return 0;
758 }
759
760 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
761 {
762 int i, r, count = ctx->shader->ninput;
763
764 /* additional inputs will be allocated right after the existing inputs,
765 * we won't need them after the color selection, so we don't need to
766 * reserve these gprs for the rest of the shader code and to adjust
767 * output offsets etc. */
768 int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
769 ctx->info.file_max[TGSI_FILE_INPUT] + 1;
770
771 if (ctx->face_gpr == -1) {
772 i = ctx->shader->ninput++;
773 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
774 ctx->shader->input[i].spi_sid = 0;
775 ctx->shader->input[i].gpr = gpr++;
776 ctx->face_gpr = ctx->shader->input[i].gpr;
777 }
778
779 for (i = 0; i < count; i++) {
780 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
781 int ni = ctx->shader->ninput++;
782 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
783 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
784 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
785 ctx->shader->input[ni].gpr = gpr++;
786
787 if (ctx->bc->chip_class >= EVERGREEN) {
788 r = evergreen_interp_input(ctx, ni);
789 if (r)
790 return r;
791 }
792
793 r = select_twoside_color(ctx, i, ni);
794 if (r)
795 return r;
796 }
797 }
798 return 0;
799 }
800
801 static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
802 {
803 struct r600_shader *shader = &pipeshader->shader;
804 struct tgsi_token *tokens = pipeshader->tokens;
805 struct pipe_stream_output_info so = pipeshader->so;
806 struct tgsi_full_immediate *immediate;
807 struct tgsi_full_property *property;
808 struct r600_shader_ctx ctx;
809 struct r600_bytecode_output output[32];
810 unsigned output_done, noutput;
811 unsigned opcode;
812 int i, j, k, r = 0;
813 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
814
815 ctx.bc = &shader->bc;
816 ctx.shader = shader;
817 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
818 ctx.tokens = tokens;
819 tgsi_scan_shader(tokens, &ctx.info);
820 tgsi_parse_init(&ctx.parse, tokens);
821 ctx.type = ctx.parse.FullHeader.Processor.Processor;
822 shader->processor_type = ctx.type;
823 ctx.bc->type = shader->processor_type;
824
825 ctx.face_gpr = -1;
826 ctx.colors_used = 0;
827 ctx.clip_vertex_write = 0;
828
829 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
830
831 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
832 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
833
834 shader->nr_cbufs = rctx->nr_cbufs;
835
836 /* register allocations */
837 /* Values [0,127] correspond to GPR[0..127].
838 * Values [128,159] correspond to constant buffer bank 0
839 * Values [160,191] correspond to constant buffer bank 1
840 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
841 * Values [256,287] correspond to constant buffer bank 2 (EG)
842 * Values [288,319] correspond to constant buffer bank 3 (EG)
843 * Other special values are shown in the list below.
844 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
845 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
846 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
847 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
848 * 248 SQ_ALU_SRC_0: special constant 0.0.
849 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
850 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
851 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
852 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
853 * 253 SQ_ALU_SRC_LITERAL: literal constant.
854 * 254 SQ_ALU_SRC_PV: previous vector result.
855 * 255 SQ_ALU_SRC_PS: previous scalar result.
856 */
857 for (i = 0; i < TGSI_FILE_COUNT; i++) {
858 ctx.file_offset[i] = 0;
859 }
860 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
861 ctx.file_offset[TGSI_FILE_INPUT] = 1;
862 if (ctx.bc->chip_class >= EVERGREEN) {
863 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
864 } else {
865 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
866 }
867 }
868 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
869 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
870 }
871 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
872 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
873 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
874 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
875
876 /* Outside the GPR range. This will be translated to one of the
877 * kcache banks later. */
878 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
879
880 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
881 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
882 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
883 ctx.temp_reg = ctx.bc->ar_reg + 1;
884
885 ctx.nliterals = 0;
886 ctx.literals = NULL;
887 shader->fs_write_all = FALSE;
888 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
889 tgsi_parse_token(&ctx.parse);
890 switch (ctx.parse.FullToken.Token.Type) {
891 case TGSI_TOKEN_TYPE_IMMEDIATE:
892 immediate = &ctx.parse.FullToken.FullImmediate;
893 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
894 if(ctx.literals == NULL) {
895 r = -ENOMEM;
896 goto out_err;
897 }
898 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
899 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
900 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
901 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
902 ctx.nliterals++;
903 break;
904 case TGSI_TOKEN_TYPE_DECLARATION:
905 r = tgsi_declaration(&ctx);
906 if (r)
907 goto out_err;
908 break;
909 case TGSI_TOKEN_TYPE_INSTRUCTION:
910 break;
911 case TGSI_TOKEN_TYPE_PROPERTY:
912 property = &ctx.parse.FullToken.FullProperty;
913 switch (property->Property.PropertyName) {
914 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
915 if (property->u[0].Data == 1)
916 shader->fs_write_all = TRUE;
917 break;
918 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
919 if (property->u[0].Data == 1)
920 shader->vs_prohibit_ucps = TRUE;
921 break;
922 }
923 break;
924 default:
925 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
926 r = -EINVAL;
927 goto out_err;
928 }
929 }
930
931 if (shader->two_side && ctx.colors_used) {
932 if ((r = process_twoside_color_inputs(&ctx)))
933 return r;
934 }
935
936 tgsi_parse_init(&ctx.parse, tokens);
937 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
938 tgsi_parse_token(&ctx.parse);
939 switch (ctx.parse.FullToken.Token.Type) {
940 case TGSI_TOKEN_TYPE_INSTRUCTION:
941 r = tgsi_is_supported(&ctx);
942 if (r)
943 goto out_err;
944 ctx.max_driver_temp_used = 0;
945 /* reserve first tmp for everyone */
946 r600_get_temp(&ctx);
947
948 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
949 if ((r = tgsi_split_constant(&ctx)))
950 goto out_err;
951 if ((r = tgsi_split_literal_constant(&ctx)))
952 goto out_err;
953 if (ctx.bc->chip_class == CAYMAN)
954 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
955 else if (ctx.bc->chip_class >= EVERGREEN)
956 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
957 else
958 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
959 r = ctx.inst_info->process(&ctx);
960 if (r)
961 goto out_err;
962 break;
963 default:
964 break;
965 }
966 }
967
968 noutput = shader->noutput;
969
970 if (ctx.clip_vertex_write) {
971 /* need to convert a clipvertex write into clipdistance writes and not export
972 the clip vertex anymore */
973
974 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
975 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
976 shader->output[noutput].gpr = ctx.temp_reg;
977 noutput++;
978 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
979 shader->output[noutput].gpr = ctx.temp_reg+1;
980 noutput++;
981
982 shader->clip_dist_write = 0xFF;
983
984 for (i = 0; i < 8; i++) {
985 int oreg = i >> 2;
986 int ochan = i & 3;
987
988 for (j = 0; j < 4; j++) {
989 struct r600_bytecode_alu alu;
990 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
991 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
992 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
993 alu.src[0].chan = j;
994
995 alu.src[1].sel = 512 + i;
996 alu.src[1].kc_bank = 1;
997 alu.src[1].chan = j;
998
999 alu.dst.sel = ctx.temp_reg + oreg;
1000 alu.dst.chan = j;
1001 alu.dst.write = (j == ochan);
1002 if (j == 3)
1003 alu.last = 1;
1004 r = r600_bytecode_add_alu(ctx.bc, &alu);
1005 if (r)
1006 return r;
1007 }
1008 }
1009 }
1010
1011 /* clamp color outputs */
1012 if (shader->clamp_color) {
1013 for (i = 0; i < noutput; i++) {
1014 if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
1015 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
1016
1017 int j;
1018 for (j = 0; j < 4; j++) {
1019 struct r600_bytecode_alu alu;
1020 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1021
1022 /* MOV_SAT R, R */
1023 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1024 alu.dst.sel = shader->output[i].gpr;
1025 alu.dst.chan = j;
1026 alu.dst.write = 1;
1027 alu.dst.clamp = 1;
1028 alu.src[0].sel = alu.dst.sel;
1029 alu.src[0].chan = j;
1030
1031 if (j == 3) {
1032 alu.last = 1;
1033 }
1034 r = r600_bytecode_add_alu(ctx.bc, &alu);
1035 if (r)
1036 return r;
1037 }
1038 }
1039 }
1040 }
1041
1042 /* Add stream outputs. */
1043 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1044 for (i = 0; i < so.num_outputs; i++) {
1045 struct r600_bytecode_output output;
1046
1047 if (so.output[i].output_buffer >= 4) {
1048 R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1049 so.output[i].output_buffer);
1050 r = -EINVAL;
1051 goto out_err;
1052 }
1053 if (so.output[i].start_component) {
1054 R600_ERR("stream_output - start_component cannot be non-zero\n");
1055 r = -EINVAL;
1056 goto out_err;
1057 }
1058
1059 memset(&output, 0, sizeof(struct r600_bytecode_output));
1060 output.gpr = shader->output[so.output[i].register_index].gpr;
1061 output.elem_size = 0;
1062 output.array_base = so.output[i].dst_offset;
1063 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1064 output.burst_count = 1;
1065 output.barrier = 1;
1066 /* array_size is an upper limit for the burst_count
1067 * with MEM_STREAM instructions */
1068 output.array_size = 0xFFF;
1069 output.comp_mask = (1 << so.output[i].num_components) - 1;
1070 if (ctx.bc->chip_class >= EVERGREEN) {
1071 switch (so.output[i].output_buffer) {
1072 case 0:
1073 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1074 break;
1075 case 1:
1076 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1077 break;
1078 case 2:
1079 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1080 break;
1081 case 3:
1082 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1083 break;
1084 }
1085 } else {
1086 switch (so.output[i].output_buffer) {
1087 case 0:
1088 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1089 break;
1090 case 1:
1091 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1092 break;
1093 case 2:
1094 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1095 break;
1096 case 3:
1097 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1098 break;
1099 }
1100 }
1101 r = r600_bytecode_add_output(ctx.bc, &output);
1102 if (r)
1103 goto out_err;
1104 }
1105 }
1106
1107 /* export output */
1108 for (i = 0, j = 0; i < noutput; i++, j++) {
1109 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1110 output[j].gpr = shader->output[i].gpr;
1111 output[j].elem_size = 3;
1112 output[j].swizzle_x = 0;
1113 output[j].swizzle_y = 1;
1114 output[j].swizzle_z = 2;
1115 output[j].swizzle_w = 3;
1116 output[j].burst_count = 1;
1117 output[j].barrier = 1;
1118 output[j].type = -1;
1119 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1120 switch (ctx.type) {
1121 case TGSI_PROCESSOR_VERTEX:
1122 switch (shader->output[i].name) {
1123 case TGSI_SEMANTIC_POSITION:
1124 output[j].array_base = next_pos_base++;
1125 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1126 break;
1127
1128 case TGSI_SEMANTIC_PSIZE:
1129 output[j].array_base = next_pos_base++;
1130 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1131 break;
1132 case TGSI_SEMANTIC_CLIPVERTEX:
1133 j--;
1134 break;
1135 case TGSI_SEMANTIC_CLIPDIST:
1136 output[j].array_base = next_pos_base++;
1137 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1138 /* spi_sid is 0 for clipdistance outputs that were generated
1139 * for clipvertex - we don't need to pass them to PS */
1140 if (shader->output[i].spi_sid) {
1141 j++;
1142 /* duplicate it as PARAM to pass to the pixel shader */
1143 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1144 output[j].array_base = next_param_base++;
1145 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1146 }
1147 break;
1148 }
1149 break;
1150 case TGSI_PROCESSOR_FRAGMENT:
1151 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1152 output[j].array_base = next_pixel_base++;
1153 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1154 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1155 for (k = 1; k < shader->nr_cbufs; k++) {
1156 j++;
1157 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1158 output[j].gpr = shader->output[i].gpr;
1159 output[j].elem_size = 3;
1160 output[j].swizzle_x = 0;
1161 output[j].swizzle_y = 1;
1162 output[j].swizzle_z = 2;
1163 output[j].swizzle_w = 3;
1164 output[j].burst_count = 1;
1165 output[j].barrier = 1;
1166 output[j].array_base = next_pixel_base++;
1167 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1168 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1169 }
1170 }
1171 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1172 output[j].array_base = 61;
1173 output[j].swizzle_x = 2;
1174 output[j].swizzle_y = 7;
1175 output[j].swizzle_z = output[j].swizzle_w = 7;
1176 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1177 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1178 output[j].array_base = 61;
1179 output[j].swizzle_x = 7;
1180 output[j].swizzle_y = 1;
1181 output[j].swizzle_z = output[j].swizzle_w = 7;
1182 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1183 } else {
1184 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1185 r = -EINVAL;
1186 goto out_err;
1187 }
1188 break;
1189 default:
1190 R600_ERR("unsupported processor type %d\n", ctx.type);
1191 r = -EINVAL;
1192 goto out_err;
1193 }
1194
1195 if (output[j].type==-1) {
1196 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1197 output[j].array_base = next_param_base++;
1198 }
1199 }
1200
1201 /* add fake param output for vertex shader if no param is exported */
1202 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1203 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1204 output[j].gpr = 0;
1205 output[j].elem_size = 3;
1206 output[j].swizzle_x = 7;
1207 output[j].swizzle_y = 7;
1208 output[j].swizzle_z = 7;
1209 output[j].swizzle_w = 7;
1210 output[j].burst_count = 1;
1211 output[j].barrier = 1;
1212 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1213 output[j].array_base = 0;
1214 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1215 j++;
1216 }
1217
1218 /* add fake pixel export */
1219 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
1220 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1221 output[j].gpr = 0;
1222 output[j].elem_size = 3;
1223 output[j].swizzle_x = 7;
1224 output[j].swizzle_y = 7;
1225 output[j].swizzle_z = 7;
1226 output[j].swizzle_w = 7;
1227 output[j].burst_count = 1;
1228 output[j].barrier = 1;
1229 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1230 output[j].array_base = 0;
1231 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1232 j++;
1233 }
1234
1235 noutput = j;
1236
1237 /* set export done on last export of each type */
1238 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1239 if (ctx.bc->chip_class < CAYMAN) {
1240 if (i == (noutput - 1)) {
1241 output[i].end_of_program = 1;
1242 }
1243 }
1244 if (!(output_done & (1 << output[i].type))) {
1245 output_done |= (1 << output[i].type);
1246 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1247 }
1248 }
1249 /* add output to bytecode */
1250 for (i = 0; i < noutput; i++) {
1251 r = r600_bytecode_add_output(ctx.bc, &output[i]);
1252 if (r)
1253 goto out_err;
1254 }
1255 /* add program end */
1256 if (ctx.bc->chip_class == CAYMAN)
1257 cm_bytecode_add_cf_end(ctx.bc);
1258
1259 free(ctx.literals);
1260 tgsi_parse_free(&ctx.parse);
1261 return 0;
1262 out_err:
1263 free(ctx.literals);
1264 tgsi_parse_free(&ctx.parse);
1265 return r;
1266 }
1267
1268 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1269 {
1270 R600_ERR("%s tgsi opcode unsupported\n",
1271 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1272 return -EINVAL;
1273 }
1274
1275 static int tgsi_end(struct r600_shader_ctx *ctx)
1276 {
1277 return 0;
1278 }
1279
1280 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1281 const struct r600_shader_src *shader_src,
1282 unsigned chan)
1283 {
1284 bc_src->sel = shader_src->sel;
1285 bc_src->chan = shader_src->swizzle[chan];
1286 bc_src->neg = shader_src->neg;
1287 bc_src->abs = shader_src->abs;
1288 bc_src->rel = shader_src->rel;
1289 bc_src->value = shader_src->value[bc_src->chan];
1290 }
1291
1292 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1293 {
1294 bc_src->abs = 1;
1295 bc_src->neg = 0;
1296 }
1297
1298 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1299 {
1300 bc_src->neg = !bc_src->neg;
1301 }
1302
1303 static void tgsi_dst(struct r600_shader_ctx *ctx,
1304 const struct tgsi_full_dst_register *tgsi_dst,
1305 unsigned swizzle,
1306 struct r600_bytecode_alu_dst *r600_dst)
1307 {
1308 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1309
1310 r600_dst->sel = tgsi_dst->Register.Index;
1311 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1312 r600_dst->chan = swizzle;
1313 r600_dst->write = 1;
1314 if (tgsi_dst->Register.Indirect)
1315 r600_dst->rel = V_SQ_REL_RELATIVE;
1316 if (inst->Instruction.Saturate) {
1317 r600_dst->clamp = 1;
1318 }
1319 }
1320
1321 static int tgsi_last_instruction(unsigned writemask)
1322 {
1323 int i, lasti = 0;
1324
1325 for (i = 0; i < 4; i++) {
1326 if (writemask & (1 << i)) {
1327 lasti = i;
1328 }
1329 }
1330 return lasti;
1331 }
1332
1333 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1334 {
1335 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1336 struct r600_bytecode_alu alu;
1337 int i, j, r;
1338 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1339
1340 for (i = 0; i < lasti + 1; i++) {
1341 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1342 continue;
1343
1344 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1345 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1346
1347 alu.inst = ctx->inst_info->r600_opcode;
1348 if (!swap) {
1349 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1350 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1351 }
1352 } else {
1353 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1354 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1355 }
1356 /* handle some special cases */
1357 switch (ctx->inst_info->tgsi_opcode) {
1358 case TGSI_OPCODE_SUB:
1359 r600_bytecode_src_toggle_neg(&alu.src[1]);
1360 break;
1361 case TGSI_OPCODE_ABS:
1362 r600_bytecode_src_set_abs(&alu.src[0]);
1363 break;
1364 default:
1365 break;
1366 }
1367 if (i == lasti || trans_only) {
1368 alu.last = 1;
1369 }
1370 r = r600_bytecode_add_alu(ctx->bc, &alu);
1371 if (r)
1372 return r;
1373 }
1374 return 0;
1375 }
1376
1377 static int tgsi_op2(struct r600_shader_ctx *ctx)
1378 {
1379 return tgsi_op2_s(ctx, 0, 0);
1380 }
1381
1382 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1383 {
1384 return tgsi_op2_s(ctx, 1, 0);
1385 }
1386
1387 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1388 {
1389 return tgsi_op2_s(ctx, 0, 1);
1390 }
1391
1392 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1393 {
1394 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1395 struct r600_bytecode_alu alu;
1396 int i, r;
1397 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1398
1399 for (i = 0; i < lasti + 1; i++) {
1400
1401 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1402 continue;
1403 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1404 alu.inst = ctx->inst_info->r600_opcode;
1405
1406 alu.src[0].sel = V_SQ_ALU_SRC_0;
1407
1408 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1409
1410 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1411
1412 if (i == lasti) {
1413 alu.last = 1;
1414 }
1415 r = r600_bytecode_add_alu(ctx->bc, &alu);
1416 if (r)
1417 return r;
1418 }
1419 return 0;
1420
1421 }
1422
1423 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1424 {
1425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1426 int i, j, r;
1427 struct r600_bytecode_alu alu;
1428 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1429
1430 for (i = 0 ; i < last_slot; i++) {
1431 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1432 alu.inst = ctx->inst_info->r600_opcode;
1433 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1434 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1435 }
1436 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1437 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1438
1439 if (i == last_slot - 1)
1440 alu.last = 1;
1441 r = r600_bytecode_add_alu(ctx->bc, &alu);
1442 if (r)
1443 return r;
1444 }
1445 return 0;
1446 }
1447
1448 /*
1449 * r600 - trunc to -PI..PI range
1450 * r700 - normalize by dividing by 2PI
1451 * see fdo bug 27901
1452 */
1453 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1454 {
1455 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1456 static float double_pi = 3.1415926535 * 2;
1457 static float neg_pi = -3.1415926535;
1458
1459 int r;
1460 struct r600_bytecode_alu alu;
1461
1462 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1464 alu.is_op3 = 1;
1465
1466 alu.dst.chan = 0;
1467 alu.dst.sel = ctx->temp_reg;
1468 alu.dst.write = 1;
1469
1470 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1471
1472 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1473 alu.src[1].chan = 0;
1474 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1475 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1476 alu.src[2].chan = 0;
1477 alu.last = 1;
1478 r = r600_bytecode_add_alu(ctx->bc, &alu);
1479 if (r)
1480 return r;
1481
1482 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1484
1485 alu.dst.chan = 0;
1486 alu.dst.sel = ctx->temp_reg;
1487 alu.dst.write = 1;
1488
1489 alu.src[0].sel = ctx->temp_reg;
1490 alu.src[0].chan = 0;
1491 alu.last = 1;
1492 r = r600_bytecode_add_alu(ctx->bc, &alu);
1493 if (r)
1494 return r;
1495
1496 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1497 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1498 alu.is_op3 = 1;
1499
1500 alu.dst.chan = 0;
1501 alu.dst.sel = ctx->temp_reg;
1502 alu.dst.write = 1;
1503
1504 alu.src[0].sel = ctx->temp_reg;
1505 alu.src[0].chan = 0;
1506
1507 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1508 alu.src[1].chan = 0;
1509 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1510 alu.src[2].chan = 0;
1511
1512 if (ctx->bc->chip_class == R600) {
1513 alu.src[1].value = *(uint32_t *)&double_pi;
1514 alu.src[2].value = *(uint32_t *)&neg_pi;
1515 } else {
1516 alu.src[1].sel = V_SQ_ALU_SRC_1;
1517 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1518 alu.src[2].neg = 1;
1519 }
1520
1521 alu.last = 1;
1522 r = r600_bytecode_add_alu(ctx->bc, &alu);
1523 if (r)
1524 return r;
1525 return 0;
1526 }
1527
1528 static int cayman_trig(struct r600_shader_ctx *ctx)
1529 {
1530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1531 struct r600_bytecode_alu alu;
1532 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1533 int i, r;
1534
1535 r = tgsi_setup_trig(ctx);
1536 if (r)
1537 return r;
1538
1539
1540 for (i = 0; i < last_slot; i++) {
1541 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1542 alu.inst = ctx->inst_info->r600_opcode;
1543 alu.dst.chan = i;
1544
1545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1546 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1547
1548 alu.src[0].sel = ctx->temp_reg;
1549 alu.src[0].chan = 0;
1550 if (i == last_slot - 1)
1551 alu.last = 1;
1552 r = r600_bytecode_add_alu(ctx->bc, &alu);
1553 if (r)
1554 return r;
1555 }
1556 return 0;
1557 }
1558
1559 static int tgsi_trig(struct r600_shader_ctx *ctx)
1560 {
1561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1562 struct r600_bytecode_alu alu;
1563 int i, r;
1564 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1565
1566 r = tgsi_setup_trig(ctx);
1567 if (r)
1568 return r;
1569
1570 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1571 alu.inst = ctx->inst_info->r600_opcode;
1572 alu.dst.chan = 0;
1573 alu.dst.sel = ctx->temp_reg;
1574 alu.dst.write = 1;
1575
1576 alu.src[0].sel = ctx->temp_reg;
1577 alu.src[0].chan = 0;
1578 alu.last = 1;
1579 r = r600_bytecode_add_alu(ctx->bc, &alu);
1580 if (r)
1581 return r;
1582
1583 /* replicate result */
1584 for (i = 0; i < lasti + 1; i++) {
1585 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1586 continue;
1587
1588 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1589 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1590
1591 alu.src[0].sel = ctx->temp_reg;
1592 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1593 if (i == lasti)
1594 alu.last = 1;
1595 r = r600_bytecode_add_alu(ctx->bc, &alu);
1596 if (r)
1597 return r;
1598 }
1599 return 0;
1600 }
1601
1602 static int tgsi_scs(struct r600_shader_ctx *ctx)
1603 {
1604 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1605 struct r600_bytecode_alu alu;
1606 int i, r;
1607
1608 /* We'll only need the trig stuff if we are going to write to the
1609 * X or Y components of the destination vector.
1610 */
1611 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1612 r = tgsi_setup_trig(ctx);
1613 if (r)
1614 return r;
1615 }
1616
1617 /* dst.x = COS */
1618 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1619 if (ctx->bc->chip_class == CAYMAN) {
1620 for (i = 0 ; i < 3; i++) {
1621 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1622 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1623 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1624
1625 if (i == 0)
1626 alu.dst.write = 1;
1627 else
1628 alu.dst.write = 0;
1629 alu.src[0].sel = ctx->temp_reg;
1630 alu.src[0].chan = 0;
1631 if (i == 2)
1632 alu.last = 1;
1633 r = r600_bytecode_add_alu(ctx->bc, &alu);
1634 if (r)
1635 return r;
1636 }
1637 } else {
1638 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1640 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1641
1642 alu.src[0].sel = ctx->temp_reg;
1643 alu.src[0].chan = 0;
1644 alu.last = 1;
1645 r = r600_bytecode_add_alu(ctx->bc, &alu);
1646 if (r)
1647 return r;
1648 }
1649 }
1650
1651 /* dst.y = SIN */
1652 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1653 if (ctx->bc->chip_class == CAYMAN) {
1654 for (i = 0 ; i < 3; i++) {
1655 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1657 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1658 if (i == 1)
1659 alu.dst.write = 1;
1660 else
1661 alu.dst.write = 0;
1662 alu.src[0].sel = ctx->temp_reg;
1663 alu.src[0].chan = 0;
1664 if (i == 2)
1665 alu.last = 1;
1666 r = r600_bytecode_add_alu(ctx->bc, &alu);
1667 if (r)
1668 return r;
1669 }
1670 } else {
1671 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1673 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1674
1675 alu.src[0].sel = ctx->temp_reg;
1676 alu.src[0].chan = 0;
1677 alu.last = 1;
1678 r = r600_bytecode_add_alu(ctx->bc, &alu);
1679 if (r)
1680 return r;
1681 }
1682 }
1683
1684 /* dst.z = 0.0; */
1685 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1686 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1687
1688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1689
1690 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1691
1692 alu.src[0].sel = V_SQ_ALU_SRC_0;
1693 alu.src[0].chan = 0;
1694
1695 alu.last = 1;
1696
1697 r = r600_bytecode_add_alu(ctx->bc, &alu);
1698 if (r)
1699 return r;
1700 }
1701
1702 /* dst.w = 1.0; */
1703 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1704 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1705
1706 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1707
1708 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1709
1710 alu.src[0].sel = V_SQ_ALU_SRC_1;
1711 alu.src[0].chan = 0;
1712
1713 alu.last = 1;
1714
1715 r = r600_bytecode_add_alu(ctx->bc, &alu);
1716 if (r)
1717 return r;
1718 }
1719
1720 return 0;
1721 }
1722
1723 static int tgsi_kill(struct r600_shader_ctx *ctx)
1724 {
1725 struct r600_bytecode_alu alu;
1726 int i, r;
1727
1728 for (i = 0; i < 4; i++) {
1729 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1730 alu.inst = ctx->inst_info->r600_opcode;
1731
1732 alu.dst.chan = i;
1733
1734 alu.src[0].sel = V_SQ_ALU_SRC_0;
1735
1736 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1737 alu.src[1].sel = V_SQ_ALU_SRC_1;
1738 alu.src[1].neg = 1;
1739 } else {
1740 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1741 }
1742 if (i == 3) {
1743 alu.last = 1;
1744 }
1745 r = r600_bytecode_add_alu(ctx->bc, &alu);
1746 if (r)
1747 return r;
1748 }
1749
1750 /* kill must be last in ALU */
1751 ctx->bc->force_add_cf = 1;
1752 ctx->shader->uses_kill = TRUE;
1753 return 0;
1754 }
1755
1756 static int tgsi_lit(struct r600_shader_ctx *ctx)
1757 {
1758 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1759 struct r600_bytecode_alu alu;
1760 int r;
1761
1762 /* tmp.x = max(src.y, 0.0) */
1763 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1764 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1765 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1766 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1767 alu.src[1].chan = 1;
1768
1769 alu.dst.sel = ctx->temp_reg;
1770 alu.dst.chan = 0;
1771 alu.dst.write = 1;
1772
1773 alu.last = 1;
1774 r = r600_bytecode_add_alu(ctx->bc, &alu);
1775 if (r)
1776 return r;
1777
1778 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1779 {
1780 int chan;
1781 int sel;
1782 int i;
1783
1784 if (ctx->bc->chip_class == CAYMAN) {
1785 for (i = 0; i < 3; i++) {
1786 /* tmp.z = log(tmp.x) */
1787 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1788 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1789 alu.src[0].sel = ctx->temp_reg;
1790 alu.src[0].chan = 0;
1791 alu.dst.sel = ctx->temp_reg;
1792 alu.dst.chan = i;
1793 if (i == 2) {
1794 alu.dst.write = 1;
1795 alu.last = 1;
1796 } else
1797 alu.dst.write = 0;
1798
1799 r = r600_bytecode_add_alu(ctx->bc, &alu);
1800 if (r)
1801 return r;
1802 }
1803 } else {
1804 /* tmp.z = log(tmp.x) */
1805 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1806 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1807 alu.src[0].sel = ctx->temp_reg;
1808 alu.src[0].chan = 0;
1809 alu.dst.sel = ctx->temp_reg;
1810 alu.dst.chan = 2;
1811 alu.dst.write = 1;
1812 alu.last = 1;
1813 r = r600_bytecode_add_alu(ctx->bc, &alu);
1814 if (r)
1815 return r;
1816 }
1817
1818 chan = alu.dst.chan;
1819 sel = alu.dst.sel;
1820
1821 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1822 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1823 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1824 alu.src[0].sel = sel;
1825 alu.src[0].chan = chan;
1826 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1827 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1828 alu.dst.sel = ctx->temp_reg;
1829 alu.dst.chan = 0;
1830 alu.dst.write = 1;
1831 alu.is_op3 = 1;
1832 alu.last = 1;
1833 r = r600_bytecode_add_alu(ctx->bc, &alu);
1834 if (r)
1835 return r;
1836
1837 if (ctx->bc->chip_class == CAYMAN) {
1838 for (i = 0; i < 3; i++) {
1839 /* dst.z = exp(tmp.x) */
1840 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1842 alu.src[0].sel = ctx->temp_reg;
1843 alu.src[0].chan = 0;
1844 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1845 if (i == 2) {
1846 alu.dst.write = 1;
1847 alu.last = 1;
1848 } else
1849 alu.dst.write = 0;
1850 r = r600_bytecode_add_alu(ctx->bc, &alu);
1851 if (r)
1852 return r;
1853 }
1854 } else {
1855 /* dst.z = exp(tmp.x) */
1856 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1858 alu.src[0].sel = ctx->temp_reg;
1859 alu.src[0].chan = 0;
1860 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1861 alu.last = 1;
1862 r = r600_bytecode_add_alu(ctx->bc, &alu);
1863 if (r)
1864 return r;
1865 }
1866 }
1867
1868 /* dst.x, <- 1.0 */
1869 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1870 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1871 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1872 alu.src[0].chan = 0;
1873 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1874 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1875 r = r600_bytecode_add_alu(ctx->bc, &alu);
1876 if (r)
1877 return r;
1878
1879 /* dst.y = max(src.x, 0.0) */
1880 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1881 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1882 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1883 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1884 alu.src[1].chan = 0;
1885 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1886 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1887 r = r600_bytecode_add_alu(ctx->bc, &alu);
1888 if (r)
1889 return r;
1890
1891 /* dst.w, <- 1.0 */
1892 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1893 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1894 alu.src[0].sel = V_SQ_ALU_SRC_1;
1895 alu.src[0].chan = 0;
1896 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1897 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1898 alu.last = 1;
1899 r = r600_bytecode_add_alu(ctx->bc, &alu);
1900 if (r)
1901 return r;
1902
1903 return 0;
1904 }
1905
1906 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1907 {
1908 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1909 struct r600_bytecode_alu alu;
1910 int i, r;
1911
1912 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1913
1914 /* FIXME:
1915 * For state trackers other than OpenGL, we'll want to use
1916 * _RECIPSQRT_IEEE instead.
1917 */
1918 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1919
1920 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1921 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1922 r600_bytecode_src_set_abs(&alu.src[i]);
1923 }
1924 alu.dst.sel = ctx->temp_reg;
1925 alu.dst.write = 1;
1926 alu.last = 1;
1927 r = r600_bytecode_add_alu(ctx->bc, &alu);
1928 if (r)
1929 return r;
1930 /* replicate result */
1931 return tgsi_helper_tempx_replicate(ctx);
1932 }
1933
1934 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1935 {
1936 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1937 struct r600_bytecode_alu alu;
1938 int i, r;
1939
1940 for (i = 0; i < 4; i++) {
1941 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1942 alu.src[0].sel = ctx->temp_reg;
1943 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1944 alu.dst.chan = i;
1945 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1946 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1947 if (i == 3)
1948 alu.last = 1;
1949 r = r600_bytecode_add_alu(ctx->bc, &alu);
1950 if (r)
1951 return r;
1952 }
1953 return 0;
1954 }
1955
1956 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1957 {
1958 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1959 struct r600_bytecode_alu alu;
1960 int i, r;
1961
1962 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1963 alu.inst = ctx->inst_info->r600_opcode;
1964 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1965 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1966 }
1967 alu.dst.sel = ctx->temp_reg;
1968 alu.dst.write = 1;
1969 alu.last = 1;
1970 r = r600_bytecode_add_alu(ctx->bc, &alu);
1971 if (r)
1972 return r;
1973 /* replicate result */
1974 return tgsi_helper_tempx_replicate(ctx);
1975 }
1976
1977 static int cayman_pow(struct r600_shader_ctx *ctx)
1978 {
1979 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1980 int i, r;
1981 struct r600_bytecode_alu alu;
1982 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1983
1984 for (i = 0; i < 3; i++) {
1985 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1986 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1987 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1988 alu.dst.sel = ctx->temp_reg;
1989 alu.dst.chan = i;
1990 alu.dst.write = 1;
1991 if (i == 2)
1992 alu.last = 1;
1993 r = r600_bytecode_add_alu(ctx->bc, &alu);
1994 if (r)
1995 return r;
1996 }
1997
1998 /* b * LOG2(a) */
1999 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2001 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2002 alu.src[1].sel = ctx->temp_reg;
2003 alu.dst.sel = ctx->temp_reg;
2004 alu.dst.write = 1;
2005 alu.last = 1;
2006 r = r600_bytecode_add_alu(ctx->bc, &alu);
2007 if (r)
2008 return r;
2009
2010 for (i = 0; i < last_slot; i++) {
2011 /* POW(a,b) = EXP2(b * LOG2(a))*/
2012 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2013 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2014 alu.src[0].sel = ctx->temp_reg;
2015
2016 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2017 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2018 if (i == last_slot - 1)
2019 alu.last = 1;
2020 r = r600_bytecode_add_alu(ctx->bc, &alu);
2021 if (r)
2022 return r;
2023 }
2024 return 0;
2025 }
2026
2027 static int tgsi_pow(struct r600_shader_ctx *ctx)
2028 {
2029 struct r600_bytecode_alu alu;
2030 int r;
2031
2032 /* LOG2(a) */
2033 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2035 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2036 alu.dst.sel = ctx->temp_reg;
2037 alu.dst.write = 1;
2038 alu.last = 1;
2039 r = r600_bytecode_add_alu(ctx->bc, &alu);
2040 if (r)
2041 return r;
2042 /* b * LOG2(a) */
2043 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2044 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2045 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2046 alu.src[1].sel = ctx->temp_reg;
2047 alu.dst.sel = ctx->temp_reg;
2048 alu.dst.write = 1;
2049 alu.last = 1;
2050 r = r600_bytecode_add_alu(ctx->bc, &alu);
2051 if (r)
2052 return r;
2053 /* POW(a,b) = EXP2(b * LOG2(a))*/
2054 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2055 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2056 alu.src[0].sel = ctx->temp_reg;
2057 alu.dst.sel = ctx->temp_reg;
2058 alu.dst.write = 1;
2059 alu.last = 1;
2060 r = r600_bytecode_add_alu(ctx->bc, &alu);
2061 if (r)
2062 return r;
2063 return tgsi_helper_tempx_replicate(ctx);
2064 }
2065
2066 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2067 {
2068 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2069 struct r600_bytecode_alu alu;
2070 int i, r;
2071 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2072 int tmp0 = ctx->temp_reg;
2073 int tmp1 = r600_get_temp(ctx);
2074 int tmp2 = r600_get_temp(ctx);
2075
2076 /* Unsigned path:
2077 *
2078 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2079 *
2080 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
2081 * 2. tmp0.z = lo (tmp0.x * src2)
2082 * 3. tmp0.w = -tmp0.z
2083 * 4. tmp0.y = hi (tmp0.x * src2)
2084 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
2085 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
2086 * 7. tmp1.x = tmp0.x - tmp0.w
2087 * 8. tmp1.y = tmp0.x + tmp0.w
2088 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2089 * 10. tmp0.z = hi(tmp0.x * src1) = q
2090 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
2091 *
2092 * 12. tmp0.w = src1 - tmp0.y = r
2093 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
2094 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
2095 *
2096 * if DIV
2097 *
2098 * 15. tmp1.z = tmp0.z + 1 = q + 1
2099 * 16. tmp1.w = tmp0.z - 1 = q - 1
2100 *
2101 * else MOD
2102 *
2103 * 15. tmp1.z = tmp0.w - src2 = r - src2
2104 * 16. tmp1.w = tmp0.w + src2 = r + src2
2105 *
2106 * endif
2107 *
2108 * 17. tmp1.x = tmp1.x & tmp1.y
2109 *
2110 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2111 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2112 *
2113 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2114 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2115 *
2116 * Signed path:
2117 *
2118 * Same as unsigned, using abs values of the operands,
2119 * and fixing the sign of the result in the end.
2120 */
2121
2122 for (i = 0; i < 4; i++) {
2123 if (!(write_mask & (1<<i)))
2124 continue;
2125
2126 if (signed_op) {
2127
2128 /* tmp2.x = -src0 */
2129 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2131
2132 alu.dst.sel = tmp2;
2133 alu.dst.chan = 0;
2134 alu.dst.write = 1;
2135
2136 alu.src[0].sel = V_SQ_ALU_SRC_0;
2137
2138 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2139
2140 alu.last = 1;
2141 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2142 return r;
2143
2144 /* tmp2.y = -src1 */
2145 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2146 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2147
2148 alu.dst.sel = tmp2;
2149 alu.dst.chan = 1;
2150 alu.dst.write = 1;
2151
2152 alu.src[0].sel = V_SQ_ALU_SRC_0;
2153
2154 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2155
2156 alu.last = 1;
2157 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2158 return r;
2159
2160 /* tmp2.z sign bit is set if src0 and src2 signs are different */
2161 /* it will be a sign of the quotient */
2162 if (!mod) {
2163
2164 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2166
2167 alu.dst.sel = tmp2;
2168 alu.dst.chan = 2;
2169 alu.dst.write = 1;
2170
2171 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2172 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2173
2174 alu.last = 1;
2175 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2176 return r;
2177 }
2178
2179 /* tmp2.x = |src0| */
2180 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2181 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2182 alu.is_op3 = 1;
2183
2184 alu.dst.sel = tmp2;
2185 alu.dst.chan = 0;
2186 alu.dst.write = 1;
2187
2188 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2189 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2190 alu.src[2].sel = tmp2;
2191 alu.src[2].chan = 0;
2192
2193 alu.last = 1;
2194 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2195 return r;
2196
2197 /* tmp2.y = |src1| */
2198 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2200 alu.is_op3 = 1;
2201
2202 alu.dst.sel = tmp2;
2203 alu.dst.chan = 1;
2204 alu.dst.write = 1;
2205
2206 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2207 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2208 alu.src[2].sel = tmp2;
2209 alu.src[2].chan = 1;
2210
2211 alu.last = 1;
2212 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2213 return r;
2214
2215 }
2216
2217 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
2218 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2220
2221 alu.dst.sel = tmp0;
2222 alu.dst.chan = 0;
2223 alu.dst.write = 1;
2224
2225 if (signed_op) {
2226 alu.src[0].sel = tmp2;
2227 alu.src[0].chan = 1;
2228 } else {
2229 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2230 }
2231
2232 alu.last = 1;
2233 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2234 return r;
2235
2236 /* 2. tmp0.z = lo (tmp0.x * src2) */
2237 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2238 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2239
2240 alu.dst.sel = tmp0;
2241 alu.dst.chan = 2;
2242 alu.dst.write = 1;
2243
2244 alu.src[0].sel = tmp0;
2245 alu.src[0].chan = 0;
2246 if (signed_op) {
2247 alu.src[1].sel = tmp2;
2248 alu.src[1].chan = 1;
2249 } else {
2250 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2251 }
2252
2253 alu.last = 1;
2254 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2255 return r;
2256
2257 /* 3. tmp0.w = -tmp0.z */
2258 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2260
2261 alu.dst.sel = tmp0;
2262 alu.dst.chan = 3;
2263 alu.dst.write = 1;
2264
2265 alu.src[0].sel = V_SQ_ALU_SRC_0;
2266 alu.src[1].sel = tmp0;
2267 alu.src[1].chan = 2;
2268
2269 alu.last = 1;
2270 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2271 return r;
2272
2273 /* 4. tmp0.y = hi (tmp0.x * src2) */
2274 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2275 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2276
2277 alu.dst.sel = tmp0;
2278 alu.dst.chan = 1;
2279 alu.dst.write = 1;
2280
2281 alu.src[0].sel = tmp0;
2282 alu.src[0].chan = 0;
2283
2284 if (signed_op) {
2285 alu.src[1].sel = tmp2;
2286 alu.src[1].chan = 1;
2287 } else {
2288 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2289 }
2290
2291 alu.last = 1;
2292 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2293 return r;
2294
2295 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
2296 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2297 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2298 alu.is_op3 = 1;
2299
2300 alu.dst.sel = tmp0;
2301 alu.dst.chan = 2;
2302 alu.dst.write = 1;
2303
2304 alu.src[0].sel = tmp0;
2305 alu.src[0].chan = 1;
2306 alu.src[1].sel = tmp0;
2307 alu.src[1].chan = 3;
2308 alu.src[2].sel = tmp0;
2309 alu.src[2].chan = 2;
2310
2311 alu.last = 1;
2312 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2313 return r;
2314
2315 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
2316 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2317 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2318
2319 alu.dst.sel = tmp0;
2320 alu.dst.chan = 3;
2321 alu.dst.write = 1;
2322
2323 alu.src[0].sel = tmp0;
2324 alu.src[0].chan = 2;
2325
2326 alu.src[1].sel = tmp0;
2327 alu.src[1].chan = 0;
2328
2329 alu.last = 1;
2330 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2331 return r;
2332
2333 /* 7. tmp1.x = tmp0.x - tmp0.w */
2334 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2335 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2336
2337 alu.dst.sel = tmp1;
2338 alu.dst.chan = 0;
2339 alu.dst.write = 1;
2340
2341 alu.src[0].sel = tmp0;
2342 alu.src[0].chan = 0;
2343 alu.src[1].sel = tmp0;
2344 alu.src[1].chan = 3;
2345
2346 alu.last = 1;
2347 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2348 return r;
2349
2350 /* 8. tmp1.y = tmp0.x + tmp0.w */
2351 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2353
2354 alu.dst.sel = tmp1;
2355 alu.dst.chan = 1;
2356 alu.dst.write = 1;
2357
2358 alu.src[0].sel = tmp0;
2359 alu.src[0].chan = 0;
2360 alu.src[1].sel = tmp0;
2361 alu.src[1].chan = 3;
2362
2363 alu.last = 1;
2364 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2365 return r;
2366
2367 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2368 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2369 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2370 alu.is_op3 = 1;
2371
2372 alu.dst.sel = tmp0;
2373 alu.dst.chan = 0;
2374 alu.dst.write = 1;
2375
2376 alu.src[0].sel = tmp0;
2377 alu.src[0].chan = 1;
2378 alu.src[1].sel = tmp1;
2379 alu.src[1].chan = 1;
2380 alu.src[2].sel = tmp1;
2381 alu.src[2].chan = 0;
2382
2383 alu.last = 1;
2384 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2385 return r;
2386
2387 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
2388 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2390
2391 alu.dst.sel = tmp0;
2392 alu.dst.chan = 2;
2393 alu.dst.write = 1;
2394
2395 alu.src[0].sel = tmp0;
2396 alu.src[0].chan = 0;
2397
2398 if (signed_op) {
2399 alu.src[1].sel = tmp2;
2400 alu.src[1].chan = 0;
2401 } else {
2402 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2403 }
2404
2405 alu.last = 1;
2406 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2407 return r;
2408
2409 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
2410 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2411 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2412
2413 alu.dst.sel = tmp0;
2414 alu.dst.chan = 1;
2415 alu.dst.write = 1;
2416
2417 if (signed_op) {
2418 alu.src[0].sel = tmp2;
2419 alu.src[0].chan = 1;
2420 } else {
2421 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2422 }
2423
2424 alu.src[1].sel = tmp0;
2425 alu.src[1].chan = 2;
2426
2427 alu.last = 1;
2428 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2429 return r;
2430
2431 /* 12. tmp0.w = src1 - tmp0.y = r */
2432 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2434
2435 alu.dst.sel = tmp0;
2436 alu.dst.chan = 3;
2437 alu.dst.write = 1;
2438
2439 if (signed_op) {
2440 alu.src[0].sel = tmp2;
2441 alu.src[0].chan = 0;
2442 } else {
2443 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2444 }
2445
2446 alu.src[1].sel = tmp0;
2447 alu.src[1].chan = 1;
2448
2449 alu.last = 1;
2450 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2451 return r;
2452
2453 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
2454 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2455 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
2456
2457 alu.dst.sel = tmp1;
2458 alu.dst.chan = 0;
2459 alu.dst.write = 1;
2460
2461 alu.src[0].sel = tmp0;
2462 alu.src[0].chan = 3;
2463 if (signed_op) {
2464 alu.src[1].sel = tmp2;
2465 alu.src[1].chan = 1;
2466 } else {
2467 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2468 }
2469
2470 alu.last = 1;
2471 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2472 return r;
2473
2474 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
2475 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
2477
2478 alu.dst.sel = tmp1;
2479 alu.dst.chan = 1;
2480 alu.dst.write = 1;
2481
2482 if (signed_op) {
2483 alu.src[0].sel = tmp2;
2484 alu.src[0].chan = 0;
2485 } else {
2486 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2487 }
2488
2489 alu.src[1].sel = tmp0;
2490 alu.src[1].chan = 1;
2491
2492 alu.last = 1;
2493 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2494 return r;
2495
2496 if (mod) { /* UMOD */
2497
2498 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
2499 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2500 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2501
2502 alu.dst.sel = tmp1;
2503 alu.dst.chan = 2;
2504 alu.dst.write = 1;
2505
2506 alu.src[0].sel = tmp0;
2507 alu.src[0].chan = 3;
2508
2509 if (signed_op) {
2510 alu.src[1].sel = tmp2;
2511 alu.src[1].chan = 1;
2512 } else {
2513 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2514 }
2515
2516 alu.last = 1;
2517 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2518 return r;
2519
2520 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
2521 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2522 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2523
2524 alu.dst.sel = tmp1;
2525 alu.dst.chan = 3;
2526 alu.dst.write = 1;
2527
2528 alu.src[0].sel = tmp0;
2529 alu.src[0].chan = 3;
2530 if (signed_op) {
2531 alu.src[1].sel = tmp2;
2532 alu.src[1].chan = 1;
2533 } else {
2534 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2535 }
2536
2537 alu.last = 1;
2538 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2539 return r;
2540
2541 } else { /* UDIV */
2542
2543 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
2544 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2545 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2546
2547 alu.dst.sel = tmp1;
2548 alu.dst.chan = 2;
2549 alu.dst.write = 1;
2550
2551 alu.src[0].sel = tmp0;
2552 alu.src[0].chan = 2;
2553 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
2554
2555 alu.last = 1;
2556 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2557 return r;
2558
2559 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
2560 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2562
2563 alu.dst.sel = tmp1;
2564 alu.dst.chan = 3;
2565 alu.dst.write = 1;
2566
2567 alu.src[0].sel = tmp0;
2568 alu.src[0].chan = 2;
2569 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
2570
2571 alu.last = 1;
2572 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2573 return r;
2574
2575 }
2576
2577 /* 17. tmp1.x = tmp1.x & tmp1.y */
2578 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2579 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
2580
2581 alu.dst.sel = tmp1;
2582 alu.dst.chan = 0;
2583 alu.dst.write = 1;
2584
2585 alu.src[0].sel = tmp1;
2586 alu.src[0].chan = 0;
2587 alu.src[1].sel = tmp1;
2588 alu.src[1].chan = 1;
2589
2590 alu.last = 1;
2591 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2592 return r;
2593
2594 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
2595 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
2596 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2597 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2598 alu.is_op3 = 1;
2599
2600 alu.dst.sel = tmp0;
2601 alu.dst.chan = 2;
2602 alu.dst.write = 1;
2603
2604 alu.src[0].sel = tmp1;
2605 alu.src[0].chan = 0;
2606 alu.src[1].sel = tmp0;
2607 alu.src[1].chan = mod ? 3 : 2;
2608 alu.src[2].sel = tmp1;
2609 alu.src[2].chan = 2;
2610
2611 alu.last = 1;
2612 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2613 return r;
2614
2615 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
2616 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2617 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2618 alu.is_op3 = 1;
2619
2620 if (signed_op) {
2621 alu.dst.sel = tmp0;
2622 alu.dst.chan = 2;
2623 alu.dst.write = 1;
2624 } else {
2625 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2626 }
2627
2628 alu.src[0].sel = tmp1;
2629 alu.src[0].chan = 1;
2630 alu.src[1].sel = tmp1;
2631 alu.src[1].chan = 3;
2632 alu.src[2].sel = tmp0;
2633 alu.src[2].chan = 2;
2634
2635 alu.last = 1;
2636 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2637 return r;
2638
2639 if (signed_op) {
2640
2641 /* fix the sign of the result */
2642
2643 if (mod) {
2644
2645 /* tmp0.x = -tmp0.z */
2646 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2648
2649 alu.dst.sel = tmp0;
2650 alu.dst.chan = 0;
2651 alu.dst.write = 1;
2652
2653 alu.src[0].sel = V_SQ_ALU_SRC_0;
2654 alu.src[1].sel = tmp0;
2655 alu.src[1].chan = 2;
2656
2657 alu.last = 1;
2658 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2659 return r;
2660
2661 /* sign of the remainder is the same as the sign of src0 */
2662 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
2663 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2664 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2665 alu.is_op3 = 1;
2666
2667 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2668
2669 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2670 alu.src[1].sel = tmp0;
2671 alu.src[1].chan = 2;
2672 alu.src[2].sel = tmp0;
2673 alu.src[2].chan = 0;
2674
2675 alu.last = 1;
2676 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2677 return r;
2678
2679 } else {
2680
2681 /* tmp0.x = -tmp0.z */
2682 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2684
2685 alu.dst.sel = tmp0;
2686 alu.dst.chan = 0;
2687 alu.dst.write = 1;
2688
2689 alu.src[0].sel = V_SQ_ALU_SRC_0;
2690 alu.src[1].sel = tmp0;
2691 alu.src[1].chan = 2;
2692
2693 alu.last = 1;
2694 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2695 return r;
2696
2697 /* fix the quotient sign (same as the sign of src0*src1) */
2698 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
2699 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2700 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2701 alu.is_op3 = 1;
2702
2703 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2704
2705 alu.src[0].sel = tmp2;
2706 alu.src[0].chan = 2;
2707 alu.src[1].sel = tmp0;
2708 alu.src[1].chan = 2;
2709 alu.src[2].sel = tmp0;
2710 alu.src[2].chan = 0;
2711
2712 alu.last = 1;
2713 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2714 return r;
2715 }
2716 }
2717 }
2718 return 0;
2719 }
2720
2721 static int tgsi_udiv(struct r600_shader_ctx *ctx)
2722 {
2723 return tgsi_divmod(ctx, 0, 0);
2724 }
2725
2726 static int tgsi_umod(struct r600_shader_ctx *ctx)
2727 {
2728 return tgsi_divmod(ctx, 1, 0);
2729 }
2730
2731 static int tgsi_idiv(struct r600_shader_ctx *ctx)
2732 {
2733 return tgsi_divmod(ctx, 0, 1);
2734 }
2735
2736 static int tgsi_imod(struct r600_shader_ctx *ctx)
2737 {
2738 return tgsi_divmod(ctx, 1, 1);
2739 }
2740
2741 static int tgsi_iabs(struct r600_shader_ctx *ctx)
2742 {
2743 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2744 struct r600_bytecode_alu alu;
2745 int i, r;
2746 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2747 int last_inst = tgsi_last_instruction(write_mask);
2748
2749 /* tmp = -src */
2750 for (i = 0; i < 4; i++) {
2751 if (!(write_mask & (1<<i)))
2752 continue;
2753
2754 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2755 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2756
2757 alu.dst.sel = ctx->temp_reg;
2758 alu.dst.chan = i;
2759 alu.dst.write = 1;
2760
2761 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2762 alu.src[0].sel = V_SQ_ALU_SRC_0;
2763
2764 if (i == last_inst)
2765 alu.last = 1;
2766 r = r600_bytecode_add_alu(ctx->bc, &alu);
2767 if (r)
2768 return r;
2769 }
2770
2771 /* dst = (src >= 0 ? src : tmp) */
2772 for (i = 0; i < 4; i++) {
2773 if (!(write_mask & (1<<i)))
2774 continue;
2775
2776 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2778 alu.is_op3 = 1;
2779 alu.dst.write = 1;
2780
2781 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2782
2783 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2784 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2785 alu.src[2].sel = ctx->temp_reg;
2786 alu.src[2].chan = i;
2787
2788 if (i == last_inst)
2789 alu.last = 1;
2790 r = r600_bytecode_add_alu(ctx->bc, &alu);
2791 if (r)
2792 return r;
2793 }
2794 return 0;
2795 }
2796
2797 static int tgsi_issg(struct r600_shader_ctx *ctx)
2798 {
2799 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2800 struct r600_bytecode_alu alu;
2801 int i, r;
2802 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2803 int last_inst = tgsi_last_instruction(write_mask);
2804
2805 /* tmp = (src >= 0 ? src : -1) */
2806 for (i = 0; i < 4; i++) {
2807 if (!(write_mask & (1<<i)))
2808 continue;
2809
2810 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2811 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2812 alu.is_op3 = 1;
2813
2814 alu.dst.sel = ctx->temp_reg;
2815 alu.dst.chan = i;
2816 alu.dst.write = 1;
2817
2818 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2819 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2820 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
2821
2822 if (i == last_inst)
2823 alu.last = 1;
2824 r = r600_bytecode_add_alu(ctx->bc, &alu);
2825 if (r)
2826 return r;
2827 }
2828
2829 /* dst = (tmp > 0 ? 1 : tmp) */
2830 for (i = 0; i < 4; i++) {
2831 if (!(write_mask & (1<<i)))
2832 continue;
2833
2834 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2835 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
2836 alu.is_op3 = 1;
2837 alu.dst.write = 1;
2838
2839 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2840
2841 alu.src[0].sel = ctx->temp_reg;
2842 alu.src[0].chan = i;
2843
2844 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
2845
2846 alu.src[2].sel = ctx->temp_reg;
2847 alu.src[2].chan = i;
2848
2849 if (i == last_inst)
2850 alu.last = 1;
2851 r = r600_bytecode_add_alu(ctx->bc, &alu);
2852 if (r)
2853 return r;
2854 }
2855 return 0;
2856 }
2857
2858
2859
2860 static int tgsi_ssg(struct r600_shader_ctx *ctx)
2861 {
2862 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2863 struct r600_bytecode_alu alu;
2864 int i, r;
2865
2866 /* tmp = (src > 0 ? 1 : src) */
2867 for (i = 0; i < 4; i++) {
2868 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2869 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
2870 alu.is_op3 = 1;
2871
2872 alu.dst.sel = ctx->temp_reg;
2873 alu.dst.chan = i;
2874
2875 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2876 alu.src[1].sel = V_SQ_ALU_SRC_1;
2877 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
2878
2879 if (i == 3)
2880 alu.last = 1;
2881 r = r600_bytecode_add_alu(ctx->bc, &alu);
2882 if (r)
2883 return r;
2884 }
2885
2886 /* dst = (-tmp > 0 ? -1 : tmp) */
2887 for (i = 0; i < 4; i++) {
2888 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2889 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
2890 alu.is_op3 = 1;
2891 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2892
2893 alu.src[0].sel = ctx->temp_reg;
2894 alu.src[0].chan = i;
2895 alu.src[0].neg = 1;
2896
2897 alu.src[1].sel = V_SQ_ALU_SRC_1;
2898 alu.src[1].neg = 1;
2899
2900 alu.src[2].sel = ctx->temp_reg;
2901 alu.src[2].chan = i;
2902
2903 if (i == 3)
2904 alu.last = 1;
2905 r = r600_bytecode_add_alu(ctx->bc, &alu);
2906 if (r)
2907 return r;
2908 }
2909 return 0;
2910 }
2911
2912 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
2913 {
2914 struct r600_bytecode_alu alu;
2915 int i, r;
2916
2917 for (i = 0; i < 4; i++) {
2918 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2919 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
2920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
2921 alu.dst.chan = i;
2922 } else {
2923 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2924 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2925 alu.src[0].sel = ctx->temp_reg;
2926 alu.src[0].chan = i;
2927 }
2928 if (i == 3) {
2929 alu.last = 1;
2930 }
2931 r = r600_bytecode_add_alu(ctx->bc, &alu);
2932 if (r)
2933 return r;
2934 }
2935 return 0;
2936 }
2937
2938 static int tgsi_op3(struct r600_shader_ctx *ctx)
2939 {
2940 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2941 struct r600_bytecode_alu alu;
2942 int i, j, r;
2943 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2944
2945 for (i = 0; i < lasti + 1; i++) {
2946 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2947 continue;
2948
2949 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2950 alu.inst = ctx->inst_info->r600_opcode;
2951 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2952 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
2953 }
2954
2955 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2956 alu.dst.chan = i;
2957 alu.dst.write = 1;
2958 alu.is_op3 = 1;
2959 if (i == lasti) {
2960 alu.last = 1;
2961 }
2962 r = r600_bytecode_add_alu(ctx->bc, &alu);
2963 if (r)
2964 return r;
2965 }
2966 return 0;
2967 }
2968
2969 static int tgsi_dp(struct r600_shader_ctx *ctx)
2970 {
2971 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2972 struct r600_bytecode_alu alu;
2973 int i, j, r;
2974
2975 for (i = 0; i < 4; i++) {
2976 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2977 alu.inst = ctx->inst_info->r600_opcode;
2978 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2979 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
2980 }
2981
2982 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2983 alu.dst.chan = i;
2984 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2985 /* handle some special cases */
2986 switch (ctx->inst_info->tgsi_opcode) {
2987 case TGSI_OPCODE_DP2:
2988 if (i > 1) {
2989 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2990 alu.src[0].chan = alu.src[1].chan = 0;
2991 }
2992 break;
2993 case TGSI_OPCODE_DP3:
2994 if (i > 2) {
2995 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2996 alu.src[0].chan = alu.src[1].chan = 0;
2997 }
2998 break;
2999 case TGSI_OPCODE_DPH:
3000 if (i == 3) {
3001 alu.src[0].sel = V_SQ_ALU_SRC_1;
3002 alu.src[0].chan = 0;
3003 alu.src[0].neg = 0;
3004 }
3005 break;
3006 default:
3007 break;
3008 }
3009 if (i == 3) {
3010 alu.last = 1;
3011 }
3012 r = r600_bytecode_add_alu(ctx->bc, &alu);
3013 if (r)
3014 return r;
3015 }
3016 return 0;
3017 }
3018
3019 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3020 unsigned index)
3021 {
3022 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3023 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3024 inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
3025 ctx->src[index].neg || ctx->src[index].abs;
3026 }
3027
3028 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3029 unsigned index)
3030 {
3031 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3032 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3033 }
3034
3035 static int tgsi_tex(struct r600_shader_ctx *ctx)
3036 {
3037 static float one_point_five = 1.5f;
3038 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3039 struct r600_bytecode_tex tex;
3040 struct r600_bytecode_alu alu;
3041 unsigned src_gpr;
3042 int r, i, j;
3043 int opcode;
3044 /* Texture fetch instructions can only use gprs as source.
3045 * Also they cannot negate the source or take the absolute value */
3046 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
3047 boolean src_loaded = FALSE;
3048 unsigned sampler_src_reg = 1;
3049 u8 offset_x = 0, offset_y = 0, offset_z = 0;
3050
3051 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3052
3053 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3054 /* get offset values */
3055 if (inst->Texture.NumOffsets) {
3056 assert(inst->Texture.NumOffsets == 1);
3057
3058 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3059 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3060 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3061 }
3062 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3063 /* TGSI moves the sampler to src reg 3 for TXD */
3064 sampler_src_reg = 3;
3065
3066 for (i = 1; i < 3; i++) {
3067 /* set gradients h/v */
3068 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3069 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3070 SQ_TEX_INST_SET_GRADIENTS_V;
3071 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3072 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3073
3074 if (tgsi_tex_src_requires_loading(ctx, i)) {
3075 tex.src_gpr = r600_get_temp(ctx);
3076 tex.src_sel_x = 0;
3077 tex.src_sel_y = 1;
3078 tex.src_sel_z = 2;
3079 tex.src_sel_w = 3;
3080
3081 for (j = 0; j < 4; j++) {
3082 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3083 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3084 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3085 alu.dst.sel = tex.src_gpr;
3086 alu.dst.chan = j;
3087 if (j == 3)
3088 alu.last = 1;
3089 alu.dst.write = 1;
3090 r = r600_bytecode_add_alu(ctx->bc, &alu);
3091 if (r)
3092 return r;
3093 }
3094
3095 } else {
3096 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3097 tex.src_sel_x = ctx->src[i].swizzle[0];
3098 tex.src_sel_y = ctx->src[i].swizzle[1];
3099 tex.src_sel_z = ctx->src[i].swizzle[2];
3100 tex.src_sel_w = ctx->src[i].swizzle[3];
3101 tex.src_rel = ctx->src[i].rel;
3102 }
3103 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3104 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3105 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3106 tex.coord_type_x = 1;
3107 tex.coord_type_y = 1;
3108 tex.coord_type_z = 1;
3109 tex.coord_type_w = 1;
3110 }
3111 r = r600_bytecode_add_tex(ctx->bc, &tex);
3112 if (r)
3113 return r;
3114 }
3115 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3116 int out_chan;
3117 /* Add perspective divide */
3118 if (ctx->bc->chip_class == CAYMAN) {
3119 out_chan = 2;
3120 for (i = 0; i < 3; i++) {
3121 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3123 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3124
3125 alu.dst.sel = ctx->temp_reg;
3126 alu.dst.chan = i;
3127 if (i == 2)
3128 alu.last = 1;
3129 if (out_chan == i)
3130 alu.dst.write = 1;
3131 r = r600_bytecode_add_alu(ctx->bc, &alu);
3132 if (r)
3133 return r;
3134 }
3135
3136 } else {
3137 out_chan = 3;
3138 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3139 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3140 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3141
3142 alu.dst.sel = ctx->temp_reg;
3143 alu.dst.chan = out_chan;
3144 alu.last = 1;
3145 alu.dst.write = 1;
3146 r = r600_bytecode_add_alu(ctx->bc, &alu);
3147 if (r)
3148 return r;
3149 }
3150
3151 for (i = 0; i < 3; i++) {
3152 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3154 alu.src[0].sel = ctx->temp_reg;
3155 alu.src[0].chan = out_chan;
3156 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3157 alu.dst.sel = ctx->temp_reg;
3158 alu.dst.chan = i;
3159 alu.dst.write = 1;
3160 r = r600_bytecode_add_alu(ctx->bc, &alu);
3161 if (r)
3162 return r;
3163 }
3164 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3166 alu.src[0].sel = V_SQ_ALU_SRC_1;
3167 alu.src[0].chan = 0;
3168 alu.dst.sel = ctx->temp_reg;
3169 alu.dst.chan = 3;
3170 alu.last = 1;
3171 alu.dst.write = 1;
3172 r = r600_bytecode_add_alu(ctx->bc, &alu);
3173 if (r)
3174 return r;
3175 src_loaded = TRUE;
3176 src_gpr = ctx->temp_reg;
3177 }
3178
3179 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE &&
3180 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
3181
3182 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3183 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3184
3185 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3186 for (i = 0; i < 4; i++) {
3187 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3188 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3189 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3190 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3191 alu.dst.sel = ctx->temp_reg;
3192 alu.dst.chan = i;
3193 if (i == 3)
3194 alu.last = 1;
3195 alu.dst.write = 1;
3196 r = r600_bytecode_add_alu(ctx->bc, &alu);
3197 if (r)
3198 return r;
3199 }
3200
3201 /* tmp1.z = RCP_e(|tmp1.z|) */
3202 if (ctx->bc->chip_class == CAYMAN) {
3203 for (i = 0; i < 3; i++) {
3204 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3205 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3206 alu.src[0].sel = ctx->temp_reg;
3207 alu.src[0].chan = 2;
3208 alu.src[0].abs = 1;
3209 alu.dst.sel = ctx->temp_reg;
3210 alu.dst.chan = i;
3211 if (i == 2)
3212 alu.dst.write = 1;
3213 if (i == 2)
3214 alu.last = 1;
3215 r = r600_bytecode_add_alu(ctx->bc, &alu);
3216 if (r)
3217 return r;
3218 }
3219 } else {
3220 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3222 alu.src[0].sel = ctx->temp_reg;
3223 alu.src[0].chan = 2;
3224 alu.src[0].abs = 1;
3225 alu.dst.sel = ctx->temp_reg;
3226 alu.dst.chan = 2;
3227 alu.dst.write = 1;
3228 alu.last = 1;
3229 r = r600_bytecode_add_alu(ctx->bc, &alu);
3230 if (r)
3231 return r;
3232 }
3233
3234 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3235 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3236 * muladd has no writemask, have to use another temp
3237 */
3238 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3240 alu.is_op3 = 1;
3241
3242 alu.src[0].sel = ctx->temp_reg;
3243 alu.src[0].chan = 0;
3244 alu.src[1].sel = ctx->temp_reg;
3245 alu.src[1].chan = 2;
3246
3247 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3248 alu.src[2].chan = 0;
3249 alu.src[2].value = *(uint32_t *)&one_point_five;
3250
3251 alu.dst.sel = ctx->temp_reg;
3252 alu.dst.chan = 0;
3253 alu.dst.write = 1;
3254
3255 r = r600_bytecode_add_alu(ctx->bc, &alu);
3256 if (r)
3257 return r;
3258
3259 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3260 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3261 alu.is_op3 = 1;
3262
3263 alu.src[0].sel = ctx->temp_reg;
3264 alu.src[0].chan = 1;
3265 alu.src[1].sel = ctx->temp_reg;
3266 alu.src[1].chan = 2;
3267
3268 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3269 alu.src[2].chan = 0;
3270 alu.src[2].value = *(uint32_t *)&one_point_five;
3271
3272 alu.dst.sel = ctx->temp_reg;
3273 alu.dst.chan = 1;
3274 alu.dst.write = 1;
3275
3276 alu.last = 1;
3277 r = r600_bytecode_add_alu(ctx->bc, &alu);
3278 if (r)
3279 return r;
3280
3281 src_loaded = TRUE;
3282 src_gpr = ctx->temp_reg;
3283 }
3284
3285 if (src_requires_loading && !src_loaded) {
3286 for (i = 0; i < 4; i++) {
3287 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3289 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3290 alu.dst.sel = ctx->temp_reg;
3291 alu.dst.chan = i;
3292 if (i == 3)
3293 alu.last = 1;
3294 alu.dst.write = 1;
3295 r = r600_bytecode_add_alu(ctx->bc, &alu);
3296 if (r)
3297 return r;
3298 }
3299 src_loaded = TRUE;
3300 src_gpr = ctx->temp_reg;
3301 }
3302
3303 opcode = ctx->inst_info->r600_opcode;
3304 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
3305 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
3306 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
3307 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
3308 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
3309 switch (opcode) {
3310 case SQ_TEX_INST_SAMPLE:
3311 opcode = SQ_TEX_INST_SAMPLE_C;
3312 break;
3313 case SQ_TEX_INST_SAMPLE_L:
3314 opcode = SQ_TEX_INST_SAMPLE_C_L;
3315 break;
3316 case SQ_TEX_INST_SAMPLE_LB:
3317 opcode = SQ_TEX_INST_SAMPLE_C_LB;
3318 break;
3319 case SQ_TEX_INST_SAMPLE_G:
3320 opcode = SQ_TEX_INST_SAMPLE_C_G;
3321 break;
3322 }
3323 }
3324
3325 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3326 tex.inst = opcode;
3327
3328 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3329 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3330 tex.src_gpr = src_gpr;
3331 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
3332 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
3333 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
3334 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
3335 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
3336 if (src_loaded) {
3337 tex.src_sel_x = 0;
3338 tex.src_sel_y = 1;
3339 tex.src_sel_z = 2;
3340 tex.src_sel_w = 3;
3341 } else {
3342 tex.src_sel_x = ctx->src[0].swizzle[0];
3343 tex.src_sel_y = ctx->src[0].swizzle[1];
3344 tex.src_sel_z = ctx->src[0].swizzle[2];
3345 tex.src_sel_w = ctx->src[0].swizzle[3];
3346 tex.src_rel = ctx->src[0].rel;
3347 }
3348
3349 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
3350 tex.src_sel_x = 1;
3351 tex.src_sel_y = 0;
3352 tex.src_sel_z = 3;
3353 tex.src_sel_w = 1;
3354 }
3355
3356 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
3357 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
3358 tex.coord_type_x = 1;
3359 tex.coord_type_y = 1;
3360 }
3361 tex.coord_type_z = 1;
3362 tex.coord_type_w = 1;
3363
3364 tex.offset_x = offset_x;
3365 tex.offset_y = offset_y;
3366 tex.offset_z = offset_z;
3367
3368 /* Put the depth for comparison in W.
3369 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
3370 * Some instructions expect the depth in Z. */
3371 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
3372 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
3373 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
3374 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
3375 opcode != SQ_TEX_INST_SAMPLE_C_L &&
3376 opcode != SQ_TEX_INST_SAMPLE_C_LB) {
3377 tex.src_sel_w = tex.src_sel_z;
3378 }
3379
3380 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
3381 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
3382 if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
3383 opcode == SQ_TEX_INST_SAMPLE_C_LB) {
3384 /* the array index is read from Y */
3385 tex.coord_type_y = 0;
3386 } else {
3387 /* the array index is read from Z */
3388 tex.coord_type_z = 0;
3389 tex.src_sel_z = tex.src_sel_y;
3390 }
3391 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
3392 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
3393 /* the array index is read from Z */
3394 tex.coord_type_z = 0;
3395
3396 r = r600_bytecode_add_tex(ctx->bc, &tex);
3397 if (r)
3398 return r;
3399
3400 /* add shadow ambient support - gallium doesn't do it yet */
3401 return 0;
3402 }
3403
3404 static int tgsi_lrp(struct r600_shader_ctx *ctx)
3405 {
3406 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3407 struct r600_bytecode_alu alu;
3408 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3409 unsigned i;
3410 int r;
3411
3412 /* optimize if it's just an equal balance */
3413 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
3414 for (i = 0; i < lasti + 1; i++) {
3415 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3416 continue;
3417
3418 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
3420 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3421 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3422 alu.omod = 3;
3423 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3424 alu.dst.chan = i;
3425 if (i == lasti) {
3426 alu.last = 1;
3427 }
3428 r = r600_bytecode_add_alu(ctx->bc, &alu);
3429 if (r)
3430 return r;
3431 }
3432 return 0;
3433 }
3434
3435 /* 1 - src0 */
3436 for (i = 0; i < lasti + 1; i++) {
3437 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3438 continue;
3439
3440 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
3442 alu.src[0].sel = V_SQ_ALU_SRC_1;
3443 alu.src[0].chan = 0;
3444 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3445 r600_bytecode_src_toggle_neg(&alu.src[1]);
3446 alu.dst.sel = ctx->temp_reg;
3447 alu.dst.chan = i;
3448 if (i == lasti) {
3449 alu.last = 1;
3450 }
3451 alu.dst.write = 1;
3452 r = r600_bytecode_add_alu(ctx->bc, &alu);
3453 if (r)
3454 return r;
3455 }
3456
3457 /* (1 - src0) * src2 */
3458 for (i = 0; i < lasti + 1; i++) {
3459 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3460 continue;
3461
3462 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3464 alu.src[0].sel = ctx->temp_reg;
3465 alu.src[0].chan = i;
3466 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3467 alu.dst.sel = ctx->temp_reg;
3468 alu.dst.chan = i;
3469 if (i == lasti) {
3470 alu.last = 1;
3471 }
3472 alu.dst.write = 1;
3473 r = r600_bytecode_add_alu(ctx->bc, &alu);
3474 if (r)
3475 return r;
3476 }
3477
3478 /* src0 * src1 + (1 - src0) * src2 */
3479 for (i = 0; i < lasti + 1; i++) {
3480 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3481 continue;
3482
3483 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3484 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3485 alu.is_op3 = 1;
3486 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3487 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3488 alu.src[2].sel = ctx->temp_reg;
3489 alu.src[2].chan = i;
3490
3491 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3492 alu.dst.chan = i;
3493 if (i == lasti) {
3494 alu.last = 1;
3495 }
3496 r = r600_bytecode_add_alu(ctx->bc, &alu);
3497 if (r)
3498 return r;
3499 }
3500 return 0;
3501 }
3502
3503 static int tgsi_cmp(struct r600_shader_ctx *ctx)
3504 {
3505 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3506 struct r600_bytecode_alu alu;
3507 int i, r;
3508 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3509
3510 for (i = 0; i < lasti + 1; i++) {
3511 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3512 continue;
3513
3514 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3515 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
3516 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3517 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3518 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
3519 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3520 alu.dst.chan = i;
3521 alu.dst.write = 1;
3522 alu.is_op3 = 1;
3523 if (i == lasti)
3524 alu.last = 1;
3525 r = r600_bytecode_add_alu(ctx->bc, &alu);
3526 if (r)
3527 return r;
3528 }
3529 return 0;
3530 }
3531
3532 static int tgsi_xpd(struct r600_shader_ctx *ctx)
3533 {
3534 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3535 static const unsigned int src0_swizzle[] = {2, 0, 1};
3536 static const unsigned int src1_swizzle[] = {1, 2, 0};
3537 struct r600_bytecode_alu alu;
3538 uint32_t use_temp = 0;
3539 int i, r;
3540
3541 if (inst->Dst[0].Register.WriteMask != 0xf)
3542 use_temp = 1;
3543
3544 for (i = 0; i < 4; i++) {
3545 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3546 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3547 if (i < 3) {
3548 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3549 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
3550 } else {
3551 alu.src[0].sel = V_SQ_ALU_SRC_0;
3552 alu.src[0].chan = i;
3553 alu.src[1].sel = V_SQ_ALU_SRC_0;
3554 alu.src[1].chan = i;
3555 }
3556
3557 alu.dst.sel = ctx->temp_reg;
3558 alu.dst.chan = i;
3559 alu.dst.write = 1;
3560
3561 if (i == 3)
3562 alu.last = 1;
3563 r = r600_bytecode_add_alu(ctx->bc, &alu);
3564 if (r)
3565 return r;
3566 }
3567
3568 for (i = 0; i < 4; i++) {
3569 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3570 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3571
3572 if (i < 3) {
3573 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
3574 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
3575 } else {
3576 alu.src[0].sel = V_SQ_ALU_SRC_0;
3577 alu.src[0].chan = i;
3578 alu.src[1].sel = V_SQ_ALU_SRC_0;
3579 alu.src[1].chan = i;
3580 }
3581
3582 alu.src[2].sel = ctx->temp_reg;
3583 alu.src[2].neg = 1;
3584 alu.src[2].chan = i;
3585
3586 if (use_temp)
3587 alu.dst.sel = ctx->temp_reg;
3588 else
3589 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3590 alu.dst.chan = i;
3591 alu.dst.write = 1;
3592 alu.is_op3 = 1;
3593 if (i == 3)
3594 alu.last = 1;
3595 r = r600_bytecode_add_alu(ctx->bc, &alu);
3596 if (r)
3597 return r;
3598 }
3599 if (use_temp)
3600 return tgsi_helper_copy(ctx, inst);
3601 return 0;
3602 }
3603
3604 static int tgsi_exp(struct r600_shader_ctx *ctx)
3605 {
3606 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3607 struct r600_bytecode_alu alu;
3608 int r;
3609 int i;
3610
3611 /* result.x = 2^floor(src); */
3612 if (inst->Dst[0].Register.WriteMask & 1) {
3613 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3614
3615 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3616 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3617
3618 alu.dst.sel = ctx->temp_reg;
3619 alu.dst.chan = 0;
3620 alu.dst.write = 1;
3621 alu.last = 1;
3622 r = r600_bytecode_add_alu(ctx->bc, &alu);
3623 if (r)
3624 return r;
3625
3626 if (ctx->bc->chip_class == CAYMAN) {
3627 for (i = 0; i < 3; i++) {
3628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3629 alu.src[0].sel = ctx->temp_reg;
3630 alu.src[0].chan = 0;
3631
3632 alu.dst.sel = ctx->temp_reg;
3633 alu.dst.chan = i;
3634 if (i == 0)
3635 alu.dst.write = 1;
3636 if (i == 2)
3637 alu.last = 1;
3638 r = r600_bytecode_add_alu(ctx->bc, &alu);
3639 if (r)
3640 return r;
3641 }
3642 } else {
3643 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3644 alu.src[0].sel = ctx->temp_reg;
3645 alu.src[0].chan = 0;
3646
3647 alu.dst.sel = ctx->temp_reg;
3648 alu.dst.chan = 0;
3649 alu.dst.write = 1;
3650 alu.last = 1;
3651 r = r600_bytecode_add_alu(ctx->bc, &alu);
3652 if (r)
3653 return r;
3654 }
3655 }
3656
3657 /* result.y = tmp - floor(tmp); */
3658 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
3659 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3660
3661 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
3662 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3663
3664 alu.dst.sel = ctx->temp_reg;
3665 #if 0
3666 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3667 if (r)
3668 return r;
3669 #endif
3670 alu.dst.write = 1;
3671 alu.dst.chan = 1;
3672
3673 alu.last = 1;
3674
3675 r = r600_bytecode_add_alu(ctx->bc, &alu);
3676 if (r)
3677 return r;
3678 }
3679
3680 /* result.z = RoughApprox2ToX(tmp);*/
3681 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
3682 if (ctx->bc->chip_class == CAYMAN) {
3683 for (i = 0; i < 3; i++) {
3684 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3686 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3687
3688 alu.dst.sel = ctx->temp_reg;
3689 alu.dst.chan = i;
3690 if (i == 2) {
3691 alu.dst.write = 1;
3692 alu.last = 1;
3693 }
3694
3695 r = r600_bytecode_add_alu(ctx->bc, &alu);
3696 if (r)
3697 return r;
3698 }
3699 } else {
3700 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3702 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3703
3704 alu.dst.sel = ctx->temp_reg;
3705 alu.dst.write = 1;
3706 alu.dst.chan = 2;
3707
3708 alu.last = 1;
3709
3710 r = r600_bytecode_add_alu(ctx->bc, &alu);
3711 if (r)
3712 return r;
3713 }
3714 }
3715
3716 /* result.w = 1.0;*/
3717 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
3718 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3719
3720 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3721 alu.src[0].sel = V_SQ_ALU_SRC_1;
3722 alu.src[0].chan = 0;
3723
3724 alu.dst.sel = ctx->temp_reg;
3725 alu.dst.chan = 3;
3726 alu.dst.write = 1;
3727 alu.last = 1;
3728 r = r600_bytecode_add_alu(ctx->bc, &alu);
3729 if (r)
3730 return r;
3731 }
3732 return tgsi_helper_copy(ctx, inst);
3733 }
3734
3735 static int tgsi_log(struct r600_shader_ctx *ctx)
3736 {
3737 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3738 struct r600_bytecode_alu alu;
3739 int r;
3740 int i;
3741
3742 /* result.x = floor(log2(|src|)); */
3743 if (inst->Dst[0].Register.WriteMask & 1) {
3744 if (ctx->bc->chip_class == CAYMAN) {
3745 for (i = 0; i < 3; i++) {
3746 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3747
3748 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3749 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3750 r600_bytecode_src_set_abs(&alu.src[0]);
3751
3752 alu.dst.sel = ctx->temp_reg;
3753 alu.dst.chan = i;
3754 if (i == 0)
3755 alu.dst.write = 1;
3756 if (i == 2)
3757 alu.last = 1;
3758 r = r600_bytecode_add_alu(ctx->bc, &alu);
3759 if (r)
3760 return r;
3761 }
3762
3763 } else {
3764 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3765
3766 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3767 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3768 r600_bytecode_src_set_abs(&alu.src[0]);
3769
3770 alu.dst.sel = ctx->temp_reg;
3771 alu.dst.chan = 0;
3772 alu.dst.write = 1;
3773 alu.last = 1;
3774 r = r600_bytecode_add_alu(ctx->bc, &alu);
3775 if (r)
3776 return r;
3777 }
3778
3779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3780 alu.src[0].sel = ctx->temp_reg;
3781 alu.src[0].chan = 0;
3782
3783 alu.dst.sel = ctx->temp_reg;
3784 alu.dst.chan = 0;
3785 alu.dst.write = 1;
3786 alu.last = 1;
3787
3788 r = r600_bytecode_add_alu(ctx->bc, &alu);
3789 if (r)
3790 return r;
3791 }
3792
3793 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
3794 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
3795
3796 if (ctx->bc->chip_class == CAYMAN) {
3797 for (i = 0; i < 3; i++) {
3798 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3799
3800 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3801 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3802 r600_bytecode_src_set_abs(&alu.src[0]);
3803
3804 alu.dst.sel = ctx->temp_reg;
3805 alu.dst.chan = i;
3806 if (i == 1)
3807 alu.dst.write = 1;
3808 if (i == 2)
3809 alu.last = 1;
3810
3811 r = r600_bytecode_add_alu(ctx->bc, &alu);
3812 if (r)
3813 return r;
3814 }
3815 } else {
3816 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3817
3818 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3819 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3820 r600_bytecode_src_set_abs(&alu.src[0]);
3821
3822 alu.dst.sel = ctx->temp_reg;
3823 alu.dst.chan = 1;
3824 alu.dst.write = 1;
3825 alu.last = 1;
3826
3827 r = r600_bytecode_add_alu(ctx->bc, &alu);
3828 if (r)
3829 return r;
3830 }
3831
3832 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3833
3834 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3835 alu.src[0].sel = ctx->temp_reg;
3836 alu.src[0].chan = 1;
3837
3838 alu.dst.sel = ctx->temp_reg;
3839 alu.dst.chan = 1;
3840 alu.dst.write = 1;
3841 alu.last = 1;
3842
3843 r = r600_bytecode_add_alu(ctx->bc, &alu);
3844 if (r)
3845 return r;
3846
3847 if (ctx->bc->chip_class == CAYMAN) {
3848 for (i = 0; i < 3; i++) {
3849 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3850 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3851 alu.src[0].sel = ctx->temp_reg;
3852 alu.src[0].chan = 1;
3853
3854 alu.dst.sel = ctx->temp_reg;
3855 alu.dst.chan = i;
3856 if (i == 1)
3857 alu.dst.write = 1;
3858 if (i == 2)
3859 alu.last = 1;
3860
3861 r = r600_bytecode_add_alu(ctx->bc, &alu);
3862 if (r)
3863 return r;
3864 }
3865 } else {
3866 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3867 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3868 alu.src[0].sel = ctx->temp_reg;
3869 alu.src[0].chan = 1;
3870
3871 alu.dst.sel = ctx->temp_reg;
3872 alu.dst.chan = 1;
3873 alu.dst.write = 1;
3874 alu.last = 1;
3875
3876 r = r600_bytecode_add_alu(ctx->bc, &alu);
3877 if (r)
3878 return r;
3879 }
3880
3881 if (ctx->bc->chip_class == CAYMAN) {
3882 for (i = 0; i < 3; i++) {
3883 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3884 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3885 alu.src[0].sel = ctx->temp_reg;
3886 alu.src[0].chan = 1;
3887
3888 alu.dst.sel = ctx->temp_reg;
3889 alu.dst.chan = i;
3890 if (i == 1)
3891 alu.dst.write = 1;
3892 if (i == 2)
3893 alu.last = 1;
3894
3895 r = r600_bytecode_add_alu(ctx->bc, &alu);
3896 if (r)
3897 return r;
3898 }
3899 } else {
3900 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3901 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3902 alu.src[0].sel = ctx->temp_reg;
3903 alu.src[0].chan = 1;
3904
3905 alu.dst.sel = ctx->temp_reg;
3906 alu.dst.chan = 1;
3907 alu.dst.write = 1;
3908 alu.last = 1;
3909
3910 r = r600_bytecode_add_alu(ctx->bc, &alu);
3911 if (r)
3912 return r;
3913 }
3914
3915 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3916
3917 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3918
3919 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3920 r600_bytecode_src_set_abs(&alu.src[0]);
3921
3922 alu.src[1].sel = ctx->temp_reg;
3923 alu.src[1].chan = 1;
3924
3925 alu.dst.sel = ctx->temp_reg;
3926 alu.dst.chan = 1;
3927 alu.dst.write = 1;
3928 alu.last = 1;
3929
3930 r = r600_bytecode_add_alu(ctx->bc, &alu);
3931 if (r)
3932 return r;
3933 }
3934
3935 /* result.z = log2(|src|);*/
3936 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
3937 if (ctx->bc->chip_class == CAYMAN) {
3938 for (i = 0; i < 3; i++) {
3939 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3940
3941 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3942 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3943 r600_bytecode_src_set_abs(&alu.src[0]);
3944
3945 alu.dst.sel = ctx->temp_reg;
3946 if (i == 2)
3947 alu.dst.write = 1;
3948 alu.dst.chan = i;
3949 if (i == 2)
3950 alu.last = 1;
3951
3952 r = r600_bytecode_add_alu(ctx->bc, &alu);
3953 if (r)
3954 return r;
3955 }
3956 } else {
3957 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3958
3959 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3960 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3961 r600_bytecode_src_set_abs(&alu.src[0]);
3962
3963 alu.dst.sel = ctx->temp_reg;
3964 alu.dst.write = 1;
3965 alu.dst.chan = 2;
3966 alu.last = 1;
3967
3968 r = r600_bytecode_add_alu(ctx->bc, &alu);
3969 if (r)
3970 return r;
3971 }
3972 }
3973
3974 /* result.w = 1.0; */
3975 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
3976 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3977
3978 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3979 alu.src[0].sel = V_SQ_ALU_SRC_1;
3980 alu.src[0].chan = 0;
3981
3982 alu.dst.sel = ctx->temp_reg;
3983 alu.dst.chan = 3;
3984 alu.dst.write = 1;
3985 alu.last = 1;
3986
3987 r = r600_bytecode_add_alu(ctx->bc, &alu);
3988 if (r)
3989 return r;
3990 }
3991
3992 return tgsi_helper_copy(ctx, inst);
3993 }
3994
3995 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
3996 {
3997 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3998 struct r600_bytecode_alu alu;
3999 int r;
4000
4001 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4002
4003 switch (inst->Instruction.Opcode) {
4004 case TGSI_OPCODE_ARL:
4005 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4006 break;
4007 case TGSI_OPCODE_ARR:
4008 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4009 break;
4010 case TGSI_OPCODE_UARL:
4011 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4012 break;
4013 default:
4014 assert(0);
4015 return -1;
4016 }
4017
4018 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4019 alu.last = 1;
4020 alu.dst.sel = ctx->bc->ar_reg;
4021 alu.dst.write = 1;
4022 r = r600_bytecode_add_alu(ctx->bc, &alu);
4023 if (r)
4024 return r;
4025
4026 ctx->bc->ar_loaded = 0;
4027 return 0;
4028 }
4029 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4030 {
4031 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4032 struct r600_bytecode_alu alu;
4033 int r;
4034
4035 switch (inst->Instruction.Opcode) {
4036 case TGSI_OPCODE_ARL:
4037 memset(&alu, 0, sizeof(alu));
4038 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4039 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4040 alu.dst.sel = ctx->bc->ar_reg;
4041 alu.dst.write = 1;
4042 alu.last = 1;
4043
4044 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4045 return r;
4046
4047 memset(&alu, 0, sizeof(alu));
4048 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4049 alu.src[0].sel = ctx->bc->ar_reg;
4050 alu.dst.sel = ctx->bc->ar_reg;
4051 alu.dst.write = 1;
4052 alu.last = 1;
4053
4054 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4055 return r;
4056 break;
4057 case TGSI_OPCODE_ARR:
4058 memset(&alu, 0, sizeof(alu));
4059 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4060 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4061 alu.dst.sel = ctx->bc->ar_reg;
4062 alu.dst.write = 1;
4063 alu.last = 1;
4064
4065 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4066 return r;
4067 break;
4068 case TGSI_OPCODE_UARL:
4069 memset(&alu, 0, sizeof(alu));
4070 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4071 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4072 alu.dst.sel = ctx->bc->ar_reg;
4073 alu.dst.write = 1;
4074 alu.last = 1;
4075
4076 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4077 return r;
4078 break;
4079 default:
4080 assert(0);
4081 return -1;
4082 }
4083
4084 ctx->bc->ar_loaded = 0;
4085 return 0;
4086 }
4087
4088 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4089 {
4090 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4091 struct r600_bytecode_alu alu;
4092 int i, r = 0;
4093
4094 for (i = 0; i < 4; i++) {
4095 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4096
4097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4098 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4099
4100 if (i == 0 || i == 3) {
4101 alu.src[0].sel = V_SQ_ALU_SRC_1;
4102 } else {
4103 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4104 }
4105
4106 if (i == 0 || i == 2) {
4107 alu.src[1].sel = V_SQ_ALU_SRC_1;
4108 } else {
4109 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4110 }
4111 if (i == 3)
4112 alu.last = 1;
4113 r = r600_bytecode_add_alu(ctx->bc, &alu);
4114 if (r)
4115 return r;
4116 }
4117 return 0;
4118 }
4119
4120 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4121 {
4122 struct r600_bytecode_alu alu;
4123 int r;
4124
4125 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4126 alu.inst = opcode;
4127 alu.predicate = 1;
4128
4129 alu.dst.sel = ctx->temp_reg;
4130 alu.dst.write = 1;
4131 alu.dst.chan = 0;
4132
4133 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4134 alu.src[1].sel = V_SQ_ALU_SRC_0;
4135 alu.src[1].chan = 0;
4136
4137 alu.last = 1;
4138
4139 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4140 if (r)
4141 return r;
4142 return 0;
4143 }
4144
4145 static int pops(struct r600_shader_ctx *ctx, int pops)
4146 {
4147 unsigned force_pop = ctx->bc->force_add_cf;
4148
4149 if (!force_pop) {
4150 int alu_pop = 3;
4151 if (ctx->bc->cf_last) {
4152 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4153 alu_pop = 0;
4154 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4155 alu_pop = 1;
4156 }
4157 alu_pop += pops;
4158 if (alu_pop == 1) {
4159 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4160 ctx->bc->force_add_cf = 1;
4161 } else if (alu_pop == 2) {
4162 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4163 ctx->bc->force_add_cf = 1;
4164 } else {
4165 force_pop = 1;
4166 }
4167 }
4168
4169 if (force_pop) {
4170 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4171 ctx->bc->cf_last->pop_count = pops;
4172 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4173 }
4174
4175 return 0;
4176 }
4177
4178 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4179 {
4180 switch(reason) {
4181 case FC_PUSH_VPM:
4182 ctx->bc->callstack[ctx->bc->call_sp].current--;
4183 break;
4184 case FC_PUSH_WQM:
4185 case FC_LOOP:
4186 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4187 break;
4188 case FC_REP:
4189 /* TOODO : for 16 vp asic should -= 2; */
4190 ctx->bc->callstack[ctx->bc->call_sp].current --;
4191 break;
4192 }
4193 }
4194
4195 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4196 {
4197 if (check_max_only) {
4198 int diff;
4199 switch (reason) {
4200 case FC_PUSH_VPM:
4201 diff = 1;
4202 break;
4203 case FC_PUSH_WQM:
4204 diff = 4;
4205 break;
4206 default:
4207 assert(0);
4208 diff = 0;
4209 }
4210 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4211 ctx->bc->callstack[ctx->bc->call_sp].max) {
4212 ctx->bc->callstack[ctx->bc->call_sp].max =
4213 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4214 }
4215 return;
4216 }
4217 switch (reason) {
4218 case FC_PUSH_VPM:
4219 ctx->bc->callstack[ctx->bc->call_sp].current++;
4220 break;
4221 case FC_PUSH_WQM:
4222 case FC_LOOP:
4223 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4224 break;
4225 case FC_REP:
4226 ctx->bc->callstack[ctx->bc->call_sp].current++;
4227 break;
4228 }
4229
4230 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4231 ctx->bc->callstack[ctx->bc->call_sp].max) {
4232 ctx->bc->callstack[ctx->bc->call_sp].max =
4233 ctx->bc->callstack[ctx->bc->call_sp].current;
4234 }
4235 }
4236
4237 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4238 {
4239 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4240
4241 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
4242 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
4243 sp->mid[sp->num_mid] = ctx->bc->cf_last;
4244 sp->num_mid++;
4245 }
4246
4247 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
4248 {
4249 ctx->bc->fc_sp++;
4250 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
4251 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
4252 }
4253
4254 static void fc_poplevel(struct r600_shader_ctx *ctx)
4255 {
4256 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
4257 if (sp->mid) {
4258 free(sp->mid);
4259 sp->mid = NULL;
4260 }
4261 sp->num_mid = 0;
4262 sp->start = NULL;
4263 sp->type = 0;
4264 ctx->bc->fc_sp--;
4265 }
4266
4267 #if 0
4268 static int emit_return(struct r600_shader_ctx *ctx)
4269 {
4270 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
4271 return 0;
4272 }
4273
4274 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
4275 {
4276
4277 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4278 ctx->bc->cf_last->pop_count = pops;
4279 /* TODO work out offset */
4280 return 0;
4281 }
4282
4283 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
4284 {
4285 return 0;
4286 }
4287
4288 static void emit_testflag(struct r600_shader_ctx *ctx)
4289 {
4290
4291 }
4292
4293 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
4294 {
4295 emit_testflag(ctx);
4296 emit_jump_to_offset(ctx, 1, 4);
4297 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
4298 pops(ctx, ifidx + 1);
4299 emit_return(ctx);
4300 }
4301
4302 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
4303 {
4304 emit_testflag(ctx);
4305
4306 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
4307 ctx->bc->cf_last->pop_count = 1;
4308
4309 fc_set_mid(ctx, fc_sp);
4310
4311 pops(ctx, 1);
4312 }
4313 #endif
4314
4315 static int tgsi_if(struct r600_shader_ctx *ctx)
4316 {
4317 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
4318
4319 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
4320
4321 fc_pushlevel(ctx, FC_IF);
4322
4323 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
4324 return 0;
4325 }
4326
4327 static int tgsi_else(struct r600_shader_ctx *ctx)
4328 {
4329 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
4330 ctx->bc->cf_last->pop_count = 1;
4331
4332 fc_set_mid(ctx, ctx->bc->fc_sp);
4333 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
4334 return 0;
4335 }
4336
4337 static int tgsi_endif(struct r600_shader_ctx *ctx)
4338 {
4339 pops(ctx, 1);
4340 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
4341 R600_ERR("if/endif unbalanced in shader\n");
4342 return -1;
4343 }
4344
4345 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
4346 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
4347 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
4348 } else {
4349 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
4350 }
4351 fc_poplevel(ctx);
4352
4353 callstack_decrease_current(ctx, FC_PUSH_VPM);
4354 return 0;
4355 }
4356
4357 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
4358 {
4359 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
4360
4361 fc_pushlevel(ctx, FC_LOOP);
4362
4363 /* check stack depth */
4364 callstack_check_depth(ctx, FC_LOOP, 0);
4365 return 0;
4366 }
4367
4368 static int tgsi_endloop(struct r600_shader_ctx *ctx)
4369 {
4370 int i;
4371
4372 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
4373
4374 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
4375 R600_ERR("loop/endloop in shader code are not paired.\n");
4376 return -EINVAL;
4377 }
4378
4379 /* fixup loop pointers - from r600isa
4380 LOOP END points to CF after LOOP START,
4381 LOOP START point to CF after LOOP END
4382 BRK/CONT point to LOOP END CF
4383 */
4384 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
4385
4386 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
4387
4388 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
4389 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
4390 }
4391 /* TODO add LOOPRET support */
4392 fc_poplevel(ctx);
4393 callstack_decrease_current(ctx, FC_LOOP);
4394 return 0;
4395 }
4396
4397 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
4398 {
4399 unsigned int fscp;
4400
4401 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
4402 {
4403 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
4404 break;
4405 }
4406
4407 if (fscp == 0) {
4408 R600_ERR("Break not inside loop/endloop pair\n");
4409 return -EINVAL;
4410 }
4411
4412 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
4413 ctx->bc->cf_last->pop_count = 1;
4414
4415 fc_set_mid(ctx, fscp);
4416
4417 pops(ctx, 1);
4418 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
4419 return 0;
4420 }
4421
4422 static int tgsi_umad(struct r600_shader_ctx *ctx)
4423 {
4424 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4425 struct r600_bytecode_alu alu;
4426 int i, j, r;
4427 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4428
4429 /* src0 * src1 */
4430 for (i = 0; i < lasti + 1; i++) {
4431 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4432 continue;
4433
4434 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4435
4436 alu.dst.chan = i;
4437 alu.dst.sel = ctx->temp_reg;
4438 alu.dst.write = 1;
4439
4440 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
4441 for (j = 0; j < 2; j++) {
4442 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
4443 }
4444
4445 alu.last = 1;
4446 r = r600_bytecode_add_alu(ctx->bc, &alu);
4447 if (r)
4448 return r;
4449 }
4450
4451
4452 for (i = 0; i < lasti + 1; i++) {
4453 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4454 continue;
4455
4456 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4457 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4458
4459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
4460
4461 alu.src[0].sel = ctx->temp_reg;
4462 alu.src[0].chan = i;
4463
4464 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4465 if (i == lasti) {
4466 alu.last = 1;
4467 }
4468 r = r600_bytecode_add_alu(ctx->bc, &alu);
4469 if (r)
4470 return r;
4471 }
4472 return 0;
4473 }
4474
4475 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
4476 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
4477 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4478 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
4479
4480 /* FIXME:
4481 * For state trackers other than OpenGL, we'll want to use
4482 * _RECIP_IEEE instead.
4483 */
4484 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
4485
4486 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
4487 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
4488 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
4489 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
4490 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4491 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4492 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4493 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
4494 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
4495 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
4496 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
4497 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
4498 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
4499 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4500 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
4501 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4502 /* gap */
4503 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4504 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4505 /* gap */
4506 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4507 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4508 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
4509 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4510 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
4511 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
4512 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
4513 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
4514 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
4515 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
4516 /* gap */
4517 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4518 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4519 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4520 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4521 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
4522 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
4523 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
4524 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
4525 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4526 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4527 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4528 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4529 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4530 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
4531 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4532 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
4533 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
4534 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
4535 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
4536 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4537 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4538 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
4539 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4540 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4541 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4542 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4543 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4544 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4545 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4546 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
4547 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4548 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4549 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4550 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
4551 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
4552 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
4553 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
4554 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4555 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4556 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4557 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
4558 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
4559 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
4560 /* gap */
4561 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4562 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4563 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
4564 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
4565 /* gap */
4566 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4567 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4568 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4569 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4570 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4571 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
4572 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
4573 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
4574 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
4575 /* gap */
4576 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4577 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
4578 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
4579 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
4580 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
4581 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4582 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
4583 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
4584 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
4585 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4586 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4587 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
4588 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4589 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
4590 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4591 /* gap */
4592 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4593 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4594 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4595 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4596 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4597 /* gap */
4598 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4599 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4600 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4601 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4602 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4603 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4604 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4605 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4606 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
4607 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
4608 /* gap */
4609 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4610 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
4611 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4612 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4613 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4614 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
4615 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
4616 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
4617 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
4618 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
4619 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
4620 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
4621 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
4622 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
4623 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
4624 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
4625 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
4626 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
4627 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
4628 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
4629 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
4630 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
4631 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
4632 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4633 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4634 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4635 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4636 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
4637 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
4638 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
4639 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
4640 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
4641 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4642 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
4643 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
4644 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
4645 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
4646 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4647 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4648 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
4649 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
4650 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
4651 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
4652 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4653 };
4654
4655 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
4656 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4657 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4658 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
4659 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
4660 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
4661 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
4662 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
4663 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
4664 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4665 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4666 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4667 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
4668 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
4669 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
4670 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
4671 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
4672 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
4673 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4674 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
4675 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4676 /* gap */
4677 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4678 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4679 /* gap */
4680 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4681 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4682 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
4683 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4684 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
4685 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
4686 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
4687 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
4688 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
4689 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
4690 /* gap */
4691 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4692 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4693 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4694 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4695 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
4696 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
4697 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
4698 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
4699 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4700 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4701 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4702 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4703 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4704 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
4705 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4706 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
4707 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
4708 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
4709 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
4710 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4711 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4712 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
4713 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4714 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4715 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4716 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4717 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4718 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4719 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4720 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4721 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4722 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4723 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4724 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
4725 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
4726 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
4727 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
4728 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4729 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4730 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4731 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
4732 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
4733 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
4734 /* gap */
4735 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4736 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4737 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
4738 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
4739 /* gap */
4740 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4741 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4742 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4743 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4744 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4745 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
4746 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
4747 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
4748 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
4749 /* gap */
4750 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4751 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
4752 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
4753 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
4754 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
4755 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4756 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
4757 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
4758 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
4759 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4760 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4761 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
4762 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4763 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
4764 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4765 /* gap */
4766 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4767 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4768 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4769 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4770 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4771 /* gap */
4772 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4773 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4774 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4775 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4776 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4777 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4778 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4779 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4780 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
4781 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
4782 /* gap */
4783 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4784 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
4785 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4786 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4787 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4788 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
4789 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
4790 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
4791 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
4792 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
4793 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
4794 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
4795 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
4796 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
4797 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
4798 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
4799 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
4800 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
4801 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
4802 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
4803 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
4804 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
4805 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
4806 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4807 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4808 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4809 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4810 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
4811 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
4812 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
4813 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
4814 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
4815 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4816 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
4817 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
4818 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
4819 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
4820 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4821 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4822 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
4823 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
4824 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
4825 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
4826 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4827 };
4828
4829 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
4830 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4831 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4832 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
4833 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
4834 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
4835 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
4836 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
4837 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
4838 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4839 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4840 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4841 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
4842 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
4843 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
4844 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
4845 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
4846 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
4847 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4848 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
4849 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4850 /* gap */
4851 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4852 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4853 /* gap */
4854 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4855 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4856 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
4857 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4858 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
4859 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
4860 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
4861 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
4862 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
4863 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
4864 /* gap */
4865 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4866 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4867 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4868 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4869 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
4870 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
4871 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
4872 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
4873 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4874 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4875 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4876 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4877 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4878 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
4879 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4880 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
4881 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
4882 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
4883 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
4884 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4885 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4886 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
4887 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4888 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4889 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4890 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4891 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4892 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4893 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4894 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4895 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4896 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4897 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4898 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
4899 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
4900 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
4901 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
4902 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4903 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4904 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4905 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
4906 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
4907 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
4908 /* gap */
4909 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4910 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4911 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
4912 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
4913 /* gap */
4914 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4915 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4916 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4917 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4918 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4919 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4920 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
4921 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
4922 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4923 /* gap */
4924 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4925 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4926 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4927 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4928 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
4929 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4930 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex},
4931 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
4932 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
4933 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4934 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4935 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
4936 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4937 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
4938 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4939 /* gap */
4940 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4941 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4942 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4943 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4944 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4945 /* gap */
4946 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4947 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4948 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4949 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4950 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4951 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4952 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4953 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4954 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
4955 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
4956 /* gap */
4957 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4958 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4959 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4960 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4961 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4962 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4963 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4964 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4965 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4966 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4967 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4968 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4969 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4970 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4971 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4972 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4973 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4974 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4975 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4976 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4977 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4978 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4979 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4980 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4981 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4982 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4983 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4984 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported},
4985 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported},
4986 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
4987 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
4988 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
4989 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4990 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
4991 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
4992 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
4993 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported},
4994 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4995 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4996 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported},
4997 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported},
4998 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4999 };