r600g: Unify comment style somewhat.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36 #include <byteswap.h>
37
38 int r600_find_vs_semantic_index(struct r600_shader *vs,
39 struct r600_shader *ps, int id)
40 {
41 struct r600_shader_io *input = &ps->input[id];
42
43 for (int i = 0; i < vs->noutput; i++) {
44 if (input->name == vs->output[i].name &&
45 input->sid == vs->output[i].sid) {
46 return i - 1;
47 }
48 }
49 return 0;
50 }
51
52 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
53 {
54 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
55 struct r600_shader *rshader = &shader->shader;
56 uint32_t *ptr;
57 int i;
58
59 /* copy new shader */
60 if (shader->bo == NULL) {
61 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
62 if (shader->bo == NULL) {
63 return -ENOMEM;
64 }
65 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
66 if (R600_BIG_ENDIAN) {
67 for (i = 0; i < rshader->bc.ndw; ++i) {
68 ptr[i] = bswap_32(rshader->bc.bytecode[i]);
69 }
70 } else {
71 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
72 }
73 r600_bo_unmap(rctx->radeon, shader->bo);
74 }
75 /* build state */
76 switch (rshader->processor_type) {
77 case TGSI_PROCESSOR_VERTEX:
78 if (rshader->family >= CHIP_CEDAR) {
79 evergreen_pipe_shader_vs(ctx, shader);
80 } else {
81 r600_pipe_shader_vs(ctx, shader);
82 }
83 break;
84 case TGSI_PROCESSOR_FRAGMENT:
85 if (rshader->family >= CHIP_CEDAR) {
86 evergreen_pipe_shader_ps(ctx, shader);
87 } else {
88 r600_pipe_shader_ps(ctx, shader);
89 }
90 break;
91 default:
92 return -EINVAL;
93 }
94 return 0;
95 }
96
97 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
98
99 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
100 {
101 static int dump_shaders = -1;
102 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
103 int r;
104
105 /* Would like some magic "get_bool_option_once" routine.
106 */
107 if (dump_shaders == -1)
108 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
109
110 if (dump_shaders) {
111 fprintf(stderr, "--------------------------------------------------------------\n");
112 tgsi_dump(tokens, 0);
113 }
114 shader->shader.family = r600_get_family(rctx->radeon);
115 r = r600_shader_from_tgsi(tokens, &shader->shader);
116 if (r) {
117 R600_ERR("translation from TGSI failed !\n");
118 return r;
119 }
120 r = r600_bc_build(&shader->shader.bc);
121 if (r) {
122 R600_ERR("building bytecode failed !\n");
123 return r;
124 }
125 if (dump_shaders) {
126 r600_bc_dump(&shader->shader.bc);
127 fprintf(stderr, "______________________________________________________________\n");
128 }
129 return r600_pipe_shader(ctx, shader);
130 }
131
132 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
133 {
134 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
135
136 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
137 r600_bc_clear(&shader->shader.bc);
138 }
139
140 /*
141 * tgsi -> r600 shader
142 */
143 struct r600_shader_tgsi_instruction;
144
145 struct r600_shader_src {
146 unsigned sel;
147 unsigned swizzle[4];
148 unsigned neg;
149 unsigned abs;
150 unsigned rel;
151 uint32_t value[4];
152 };
153
154 struct r600_shader_ctx {
155 struct tgsi_shader_info info;
156 struct tgsi_parse_context parse;
157 const struct tgsi_token *tokens;
158 unsigned type;
159 unsigned file_offset[TGSI_FILE_COUNT];
160 unsigned temp_reg;
161 unsigned ar_reg;
162 struct r600_shader_tgsi_instruction *inst_info;
163 struct r600_bc *bc;
164 struct r600_shader *shader;
165 struct r600_shader_src src[3];
166 u32 *literals;
167 u32 nliterals;
168 u32 max_driver_temp_used;
169 /* needed for evergreen interpolation */
170 boolean input_centroid;
171 boolean input_linear;
172 boolean input_perspective;
173 int num_interp_gpr;
174 };
175
176 struct r600_shader_tgsi_instruction {
177 unsigned tgsi_opcode;
178 unsigned is_op3;
179 unsigned r600_opcode;
180 int (*process)(struct r600_shader_ctx *ctx);
181 };
182
183 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
184 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
185
186 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
187 {
188 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
189 int j;
190
191 if (i->Instruction.NumDstRegs > 1) {
192 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
193 return -EINVAL;
194 }
195 if (i->Instruction.Predicate) {
196 R600_ERR("predicate unsupported\n");
197 return -EINVAL;
198 }
199 #if 0
200 if (i->Instruction.Label) {
201 R600_ERR("label unsupported\n");
202 return -EINVAL;
203 }
204 #endif
205 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
206 if (i->Src[j].Register.Dimension) {
207 R600_ERR("unsupported src %d (dimension %d)\n", j,
208 i->Src[j].Register.Dimension);
209 return -EINVAL;
210 }
211 }
212 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
213 if (i->Dst[j].Register.Dimension) {
214 R600_ERR("unsupported dst (dimension)\n");
215 return -EINVAL;
216 }
217 }
218 return 0;
219 }
220
221 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
222 {
223 int i, r;
224 struct r600_bc_alu alu;
225 int gpr = 0, base_chan = 0;
226 int ij_index = 0;
227
228 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
229 ij_index = 0;
230 if (ctx->shader->input[input].centroid)
231 ij_index++;
232 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
233 ij_index = 0;
234 /* if we have perspective add one */
235 if (ctx->input_perspective) {
236 ij_index++;
237 /* if we have perspective centroid */
238 if (ctx->input_centroid)
239 ij_index++;
240 }
241 if (ctx->shader->input[input].centroid)
242 ij_index++;
243 }
244
245 /* work out gpr and base_chan from index */
246 gpr = ij_index / 2;
247 base_chan = (2 * (ij_index % 2)) + 1;
248
249 for (i = 0; i < 8; i++) {
250 memset(&alu, 0, sizeof(struct r600_bc_alu));
251
252 if (i < 4)
253 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
254 else
255 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
256
257 if ((i > 1) && (i < 6)) {
258 alu.dst.sel = ctx->shader->input[input].gpr;
259 alu.dst.write = 1;
260 }
261
262 alu.dst.chan = i % 4;
263
264 alu.src[0].sel = gpr;
265 alu.src[0].chan = (base_chan - (i % 2));
266
267 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
268
269 alu.bank_swizzle_force = SQ_ALU_VEC_210;
270 if ((i % 4) == 3)
271 alu.last = 1;
272 r = r600_bc_add_alu(ctx->bc, &alu);
273 if (r)
274 return r;
275 }
276 return 0;
277 }
278
279
280 static int tgsi_declaration(struct r600_shader_ctx *ctx)
281 {
282 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
283 unsigned i;
284 int r;
285
286 switch (d->Declaration.File) {
287 case TGSI_FILE_INPUT:
288 i = ctx->shader->ninput++;
289 ctx->shader->input[i].name = d->Semantic.Name;
290 ctx->shader->input[i].sid = d->Semantic.Index;
291 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
292 ctx->shader->input[i].centroid = d->Declaration.Centroid;
293 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
294 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
295 /* turn input into interpolate on EG */
296 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
297 if (ctx->shader->input[i].interpolate > 0) {
298 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
299 evergreen_interp_alu(ctx, i);
300 }
301 }
302 }
303 break;
304 case TGSI_FILE_OUTPUT:
305 i = ctx->shader->noutput++;
306 ctx->shader->output[i].name = d->Semantic.Name;
307 ctx->shader->output[i].sid = d->Semantic.Index;
308 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
309 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
310 break;
311 case TGSI_FILE_CONSTANT:
312 case TGSI_FILE_TEMPORARY:
313 case TGSI_FILE_SAMPLER:
314 case TGSI_FILE_ADDRESS:
315 break;
316
317 case TGSI_FILE_SYSTEM_VALUE:
318 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
319 struct r600_bc_alu alu;
320 memset(&alu, 0, sizeof(struct r600_bc_alu));
321
322 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
323 alu.src[0].sel = 0;
324 alu.src[0].chan = 3;
325
326 alu.dst.sel = 0;
327 alu.dst.chan = 3;
328 alu.dst.write = 1;
329 alu.last = 1;
330
331 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
332 return r;
333 break;
334 }
335
336 default:
337 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
338 return -EINVAL;
339 }
340 return 0;
341 }
342
343 static int r600_get_temp(struct r600_shader_ctx *ctx)
344 {
345 return ctx->temp_reg + ctx->max_driver_temp_used++;
346 }
347
348 /*
349 * for evergreen we need to scan the shader to find the number of GPRs we need to
350 * reserve for interpolation.
351 *
352 * we need to know if we are going to emit
353 * any centroid inputs
354 * if perspective and linear are required
355 */
356 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
357 {
358 int i;
359 int num_baryc;
360
361 ctx->input_linear = FALSE;
362 ctx->input_perspective = FALSE;
363 ctx->input_centroid = FALSE;
364 ctx->num_interp_gpr = 1;
365
366 /* any centroid inputs */
367 for (i = 0; i < ctx->info.num_inputs; i++) {
368 /* skip position/face */
369 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
370 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
371 continue;
372 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
373 ctx->input_linear = TRUE;
374 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
375 ctx->input_perspective = TRUE;
376 if (ctx->info.input_centroid[i])
377 ctx->input_centroid = TRUE;
378 }
379
380 num_baryc = 0;
381 /* ignoring sample for now */
382 if (ctx->input_perspective)
383 num_baryc++;
384 if (ctx->input_linear)
385 num_baryc++;
386 if (ctx->input_centroid)
387 num_baryc *= 2;
388
389 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
390
391 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
392 return ctx->num_interp_gpr;
393 }
394
395 static void tgsi_src(struct r600_shader_ctx *ctx,
396 const struct tgsi_full_src_register *tgsi_src,
397 struct r600_shader_src *r600_src)
398 {
399 memset(r600_src, 0, sizeof(*r600_src));
400 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
401 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
402 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
403 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
404 r600_src->neg = tgsi_src->Register.Negate;
405 r600_src->abs = tgsi_src->Register.Absolute;
406
407 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
408 int index;
409 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
410 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
411 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
412
413 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
414 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
415 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
416 return;
417 }
418 index = tgsi_src->Register.Index;
419 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
420 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
421 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
422 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
423 r600_src->swizzle[0] = 3;
424 r600_src->swizzle[1] = 3;
425 r600_src->swizzle[2] = 3;
426 r600_src->swizzle[3] = 3;
427 r600_src->sel = 0;
428 } else {
429 if (tgsi_src->Register.Indirect)
430 r600_src->rel = V_SQ_REL_RELATIVE;
431 r600_src->sel = tgsi_src->Register.Index;
432 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
433 }
434 }
435
436 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
437 {
438 struct r600_bc_vtx vtx;
439 unsigned int ar_reg;
440 int r;
441
442 if (offset) {
443 struct r600_bc_alu alu;
444
445 memset(&alu, 0, sizeof(alu));
446
447 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
448 alu.src[0].sel = ctx->ar_reg;
449
450 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
451 alu.src[1].value = offset;
452
453 alu.dst.sel = dst_reg;
454 alu.dst.write = 1;
455 alu.last = 1;
456
457 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
458 return r;
459
460 ar_reg = dst_reg;
461 } else {
462 ar_reg = ctx->ar_reg;
463 }
464
465 memset(&vtx, 0, sizeof(vtx));
466 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
467 vtx.src_gpr = ar_reg;
468 vtx.mega_fetch_count = 16;
469 vtx.dst_gpr = dst_reg;
470 vtx.dst_sel_x = 0; /* SEL_X */
471 vtx.dst_sel_y = 1; /* SEL_Y */
472 vtx.dst_sel_z = 2; /* SEL_Z */
473 vtx.dst_sel_w = 3; /* SEL_W */
474 vtx.data_format = FMT_32_32_32_32_FLOAT;
475 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
476 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
477 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
478 vtx.endian = r600_endian_swap(32);
479
480 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
481 return r;
482
483 return 0;
484 }
485
486 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
487 {
488 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
489 struct r600_bc_alu alu;
490 int i, j, k, nconst, r;
491
492 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
493 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
494 nconst++;
495 }
496 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
497 }
498 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
499 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
500 continue;
501 }
502
503 if (ctx->src[i].rel) {
504 int treg = r600_get_temp(ctx);
505 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
506 return r;
507
508 ctx->src[i].sel = treg;
509 ctx->src[i].rel = 0;
510 j--;
511 } else if (j > 0) {
512 int treg = r600_get_temp(ctx);
513 for (k = 0; k < 4; k++) {
514 memset(&alu, 0, sizeof(struct r600_bc_alu));
515 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
516 alu.src[0].sel = ctx->src[i].sel;
517 alu.src[0].chan = k;
518 alu.src[0].rel = ctx->src[i].rel;
519 alu.dst.sel = treg;
520 alu.dst.chan = k;
521 alu.dst.write = 1;
522 if (k == 3)
523 alu.last = 1;
524 r = r600_bc_add_alu(ctx->bc, &alu);
525 if (r)
526 return r;
527 }
528 ctx->src[i].sel = treg;
529 ctx->src[i].rel =0;
530 j--;
531 }
532 }
533 return 0;
534 }
535
536 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
537 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
538 {
539 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
540 struct r600_bc_alu alu;
541 int i, j, k, nliteral, r;
542
543 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
544 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
545 nliteral++;
546 }
547 }
548 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
549 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
550 int treg = r600_get_temp(ctx);
551 for (k = 0; k < 4; k++) {
552 memset(&alu, 0, sizeof(struct r600_bc_alu));
553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
554 alu.src[0].sel = ctx->src[i].sel;
555 alu.src[0].chan = k;
556 alu.src[0].value = ctx->src[i].value[k];
557 alu.dst.sel = treg;
558 alu.dst.chan = k;
559 alu.dst.write = 1;
560 if (k == 3)
561 alu.last = 1;
562 r = r600_bc_add_alu(ctx->bc, &alu);
563 if (r)
564 return r;
565 }
566 ctx->src[i].sel = treg;
567 j--;
568 }
569 }
570 return 0;
571 }
572
573 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
574 {
575 struct tgsi_full_immediate *immediate;
576 struct tgsi_full_property *property;
577 struct r600_shader_ctx ctx;
578 struct r600_bc_output output[32];
579 unsigned output_done, noutput;
580 unsigned opcode;
581 int i, r = 0, pos0;
582
583 ctx.bc = &shader->bc;
584 ctx.shader = shader;
585 r = r600_bc_init(ctx.bc, shader->family);
586 if (r)
587 return r;
588 ctx.tokens = tokens;
589 tgsi_scan_shader(tokens, &ctx.info);
590 tgsi_parse_init(&ctx.parse, tokens);
591 ctx.type = ctx.parse.FullHeader.Processor.Processor;
592 shader->processor_type = ctx.type;
593 ctx.bc->type = shader->processor_type;
594
595 /* register allocations */
596 /* Values [0,127] correspond to GPR[0..127].
597 * Values [128,159] correspond to constant buffer bank 0
598 * Values [160,191] correspond to constant buffer bank 1
599 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
600 * Values [256,287] correspond to constant buffer bank 2 (EG)
601 * Values [288,319] correspond to constant buffer bank 3 (EG)
602 * Other special values are shown in the list below.
603 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
604 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
605 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
606 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
607 * 248 SQ_ALU_SRC_0: special constant 0.0.
608 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
609 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
610 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
611 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
612 * 253 SQ_ALU_SRC_LITERAL: literal constant.
613 * 254 SQ_ALU_SRC_PV: previous vector result.
614 * 255 SQ_ALU_SRC_PS: previous scalar result.
615 */
616 for (i = 0; i < TGSI_FILE_COUNT; i++) {
617 ctx.file_offset[i] = 0;
618 }
619 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
620 ctx.file_offset[TGSI_FILE_INPUT] = 1;
621 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
622 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
623 } else {
624 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
625 }
626 }
627 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
628 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
629 }
630 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
631 ctx.info.file_count[TGSI_FILE_INPUT];
632 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
633 ctx.info.file_count[TGSI_FILE_OUTPUT];
634
635 /* Outside the GPR range. This will be translated to one of the
636 * kcache banks later. */
637 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
638
639 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
640 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
641 ctx.info.file_count[TGSI_FILE_TEMPORARY];
642 ctx.temp_reg = ctx.ar_reg + 1;
643
644 ctx.nliterals = 0;
645 ctx.literals = NULL;
646 shader->fs_write_all = FALSE;
647 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
648 tgsi_parse_token(&ctx.parse);
649 switch (ctx.parse.FullToken.Token.Type) {
650 case TGSI_TOKEN_TYPE_IMMEDIATE:
651 immediate = &ctx.parse.FullToken.FullImmediate;
652 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
653 if(ctx.literals == NULL) {
654 r = -ENOMEM;
655 goto out_err;
656 }
657 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
658 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
659 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
660 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
661 ctx.nliterals++;
662 break;
663 case TGSI_TOKEN_TYPE_DECLARATION:
664 r = tgsi_declaration(&ctx);
665 if (r)
666 goto out_err;
667 break;
668 case TGSI_TOKEN_TYPE_INSTRUCTION:
669 r = tgsi_is_supported(&ctx);
670 if (r)
671 goto out_err;
672 ctx.max_driver_temp_used = 0;
673 /* reserve first tmp for everyone */
674 r600_get_temp(&ctx);
675
676 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
677 if ((r = tgsi_split_constant(&ctx)))
678 goto out_err;
679 if ((r = tgsi_split_literal_constant(&ctx)))
680 goto out_err;
681 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
682 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
683 else
684 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
685 r = ctx.inst_info->process(&ctx);
686 if (r)
687 goto out_err;
688 break;
689 case TGSI_TOKEN_TYPE_PROPERTY:
690 property = &ctx.parse.FullToken.FullProperty;
691 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
692 if (property->u[0].Data == 1)
693 shader->fs_write_all = TRUE;
694 }
695 break;
696 default:
697 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
698 r = -EINVAL;
699 goto out_err;
700 }
701 }
702 /* export output */
703 noutput = shader->noutput;
704 for (i = 0, pos0 = 0; i < noutput; i++) {
705 memset(&output[i], 0, sizeof(struct r600_bc_output));
706 output[i].gpr = shader->output[i].gpr;
707 output[i].elem_size = 3;
708 output[i].swizzle_x = 0;
709 output[i].swizzle_y = 1;
710 output[i].swizzle_z = 2;
711 output[i].swizzle_w = 3;
712 output[i].burst_count = 1;
713 output[i].barrier = 1;
714 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
715 output[i].array_base = i - pos0;
716 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
717 switch (ctx.type) {
718 case TGSI_PROCESSOR_VERTEX:
719 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
720 output[i].array_base = 60;
721 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
722 /* position doesn't count in array_base */
723 pos0++;
724 }
725 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
726 output[i].array_base = 61;
727 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
728 /* position doesn't count in array_base */
729 pos0++;
730 }
731 break;
732 case TGSI_PROCESSOR_FRAGMENT:
733 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
734 output[i].array_base = shader->output[i].sid;
735 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
736 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
737 output[i].array_base = 61;
738 output[i].swizzle_x = 2;
739 output[i].swizzle_y = 7;
740 output[i].swizzle_z = output[i].swizzle_w = 7;
741 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
742 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
743 output[i].array_base = 61;
744 output[i].swizzle_x = 7;
745 output[i].swizzle_y = 1;
746 output[i].swizzle_z = output[i].swizzle_w = 7;
747 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
748 } else {
749 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
750 r = -EINVAL;
751 goto out_err;
752 }
753 break;
754 default:
755 R600_ERR("unsupported processor type %d\n", ctx.type);
756 r = -EINVAL;
757 goto out_err;
758 }
759 }
760 /* add fake param output for vertex shader if no param is exported */
761 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
762 for (i = 0, pos0 = 0; i < noutput; i++) {
763 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
764 pos0 = 1;
765 break;
766 }
767 }
768 if (!pos0) {
769 memset(&output[i], 0, sizeof(struct r600_bc_output));
770 output[i].gpr = 0;
771 output[i].elem_size = 3;
772 output[i].swizzle_x = 0;
773 output[i].swizzle_y = 1;
774 output[i].swizzle_z = 2;
775 output[i].swizzle_w = 3;
776 output[i].burst_count = 1;
777 output[i].barrier = 1;
778 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
779 output[i].array_base = 0;
780 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
781 noutput++;
782 }
783 }
784 /* add fake pixel export */
785 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
786 memset(&output[0], 0, sizeof(struct r600_bc_output));
787 output[0].gpr = 0;
788 output[0].elem_size = 3;
789 output[0].swizzle_x = 7;
790 output[0].swizzle_y = 7;
791 output[0].swizzle_z = 7;
792 output[0].swizzle_w = 7;
793 output[0].burst_count = 1;
794 output[0].barrier = 1;
795 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
796 output[0].array_base = 0;
797 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
798 noutput++;
799 }
800 /* set export done on last export of each type */
801 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
802 if (i == (noutput - 1)) {
803 output[i].end_of_program = 1;
804 }
805 if (!(output_done & (1 << output[i].type))) {
806 output_done |= (1 << output[i].type);
807 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
808 }
809 }
810 /* add output to bytecode */
811 for (i = 0; i < noutput; i++) {
812 r = r600_bc_add_output(ctx.bc, &output[i]);
813 if (r)
814 goto out_err;
815 }
816 free(ctx.literals);
817 tgsi_parse_free(&ctx.parse);
818 return 0;
819 out_err:
820 free(ctx.literals);
821 tgsi_parse_free(&ctx.parse);
822 return r;
823 }
824
825 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
826 {
827 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
828 return -EINVAL;
829 }
830
831 static int tgsi_end(struct r600_shader_ctx *ctx)
832 {
833 return 0;
834 }
835
836 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
837 const struct r600_shader_src *shader_src,
838 unsigned chan)
839 {
840 bc_src->sel = shader_src->sel;
841 bc_src->chan = shader_src->swizzle[chan];
842 bc_src->neg = shader_src->neg;
843 bc_src->abs = shader_src->abs;
844 bc_src->rel = shader_src->rel;
845 bc_src->value = shader_src->value[bc_src->chan];
846 }
847
848 static void tgsi_dst(struct r600_shader_ctx *ctx,
849 const struct tgsi_full_dst_register *tgsi_dst,
850 unsigned swizzle,
851 struct r600_bc_alu_dst *r600_dst)
852 {
853 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
854
855 r600_dst->sel = tgsi_dst->Register.Index;
856 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
857 r600_dst->chan = swizzle;
858 r600_dst->write = 1;
859 if (tgsi_dst->Register.Indirect)
860 r600_dst->rel = V_SQ_REL_RELATIVE;
861 if (inst->Instruction.Saturate) {
862 r600_dst->clamp = 1;
863 }
864 }
865
866 static int tgsi_last_instruction(unsigned writemask)
867 {
868 int i, lasti = 0;
869
870 for (i = 0; i < 4; i++) {
871 if (writemask & (1 << i)) {
872 lasti = i;
873 }
874 }
875 return lasti;
876 }
877
878 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
879 {
880 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
881 struct r600_bc_alu alu;
882 int i, j, r;
883 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
884
885 for (i = 0; i < lasti + 1; i++) {
886 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
887 continue;
888
889 memset(&alu, 0, sizeof(struct r600_bc_alu));
890 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
891
892 alu.inst = ctx->inst_info->r600_opcode;
893 if (!swap) {
894 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
895 r600_bc_src(&alu.src[j], &ctx->src[j], i);
896 }
897 } else {
898 r600_bc_src(&alu.src[0], &ctx->src[1], i);
899 r600_bc_src(&alu.src[1], &ctx->src[0], i);
900 }
901 /* handle some special cases */
902 switch (ctx->inst_info->tgsi_opcode) {
903 case TGSI_OPCODE_SUB:
904 alu.src[1].neg = 1;
905 break;
906 case TGSI_OPCODE_ABS:
907 alu.src[0].abs = 1;
908 if (alu.src[0].neg)
909 alu.src[0].neg = 0;
910 break;
911 default:
912 break;
913 }
914 if (i == lasti) {
915 alu.last = 1;
916 }
917 r = r600_bc_add_alu(ctx->bc, &alu);
918 if (r)
919 return r;
920 }
921 return 0;
922 }
923
924 static int tgsi_op2(struct r600_shader_ctx *ctx)
925 {
926 return tgsi_op2_s(ctx, 0);
927 }
928
929 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
930 {
931 return tgsi_op2_s(ctx, 1);
932 }
933
934 /*
935 * r600 - trunc to -PI..PI range
936 * r700 - normalize by dividing by 2PI
937 * see fdo bug 27901
938 */
939 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
940 {
941 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
942 static float double_pi = 3.1415926535 * 2;
943 static float neg_pi = -3.1415926535;
944
945 int r;
946 struct r600_bc_alu alu;
947
948 memset(&alu, 0, sizeof(struct r600_bc_alu));
949 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
950 alu.is_op3 = 1;
951
952 alu.dst.chan = 0;
953 alu.dst.sel = ctx->temp_reg;
954 alu.dst.write = 1;
955
956 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
957
958 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
959 alu.src[1].chan = 0;
960 alu.src[1].value = *(uint32_t *)&half_inv_pi;
961 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
962 alu.src[2].chan = 0;
963 alu.last = 1;
964 r = r600_bc_add_alu(ctx->bc, &alu);
965 if (r)
966 return r;
967
968 memset(&alu, 0, sizeof(struct r600_bc_alu));
969 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
970
971 alu.dst.chan = 0;
972 alu.dst.sel = ctx->temp_reg;
973 alu.dst.write = 1;
974
975 alu.src[0].sel = ctx->temp_reg;
976 alu.src[0].chan = 0;
977 alu.last = 1;
978 r = r600_bc_add_alu(ctx->bc, &alu);
979 if (r)
980 return r;
981
982 memset(&alu, 0, sizeof(struct r600_bc_alu));
983 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
984 alu.is_op3 = 1;
985
986 alu.dst.chan = 0;
987 alu.dst.sel = ctx->temp_reg;
988 alu.dst.write = 1;
989
990 alu.src[0].sel = ctx->temp_reg;
991 alu.src[0].chan = 0;
992
993 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
994 alu.src[1].chan = 0;
995 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
996 alu.src[2].chan = 0;
997
998 if (ctx->bc->chiprev == CHIPREV_R600) {
999 alu.src[1].value = *(uint32_t *)&double_pi;
1000 alu.src[2].value = *(uint32_t *)&neg_pi;
1001 } else {
1002 alu.src[1].sel = V_SQ_ALU_SRC_1;
1003 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1004 alu.src[2].neg = 1;
1005 }
1006
1007 alu.last = 1;
1008 r = r600_bc_add_alu(ctx->bc, &alu);
1009 if (r)
1010 return r;
1011 return 0;
1012 }
1013
1014 static int tgsi_trig(struct r600_shader_ctx *ctx)
1015 {
1016 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1017 struct r600_bc_alu alu;
1018 int i, r;
1019 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1020
1021 r = tgsi_setup_trig(ctx);
1022 if (r)
1023 return r;
1024
1025 memset(&alu, 0, sizeof(struct r600_bc_alu));
1026 alu.inst = ctx->inst_info->r600_opcode;
1027 alu.dst.chan = 0;
1028 alu.dst.sel = ctx->temp_reg;
1029 alu.dst.write = 1;
1030
1031 alu.src[0].sel = ctx->temp_reg;
1032 alu.src[0].chan = 0;
1033 alu.last = 1;
1034 r = r600_bc_add_alu(ctx->bc, &alu);
1035 if (r)
1036 return r;
1037
1038 /* replicate result */
1039 for (i = 0; i < lasti + 1; i++) {
1040 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1041 continue;
1042
1043 memset(&alu, 0, sizeof(struct r600_bc_alu));
1044 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1045
1046 alu.src[0].sel = ctx->temp_reg;
1047 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1048 if (i == lasti)
1049 alu.last = 1;
1050 r = r600_bc_add_alu(ctx->bc, &alu);
1051 if (r)
1052 return r;
1053 }
1054 return 0;
1055 }
1056
1057 static int tgsi_scs(struct r600_shader_ctx *ctx)
1058 {
1059 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1060 struct r600_bc_alu alu;
1061 int r;
1062
1063 /* We'll only need the trig stuff if we are going to write to the
1064 * X or Y components of the destination vector.
1065 */
1066 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1067 r = tgsi_setup_trig(ctx);
1068 if (r)
1069 return r;
1070 }
1071
1072 /* dst.x = COS */
1073 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1074 memset(&alu, 0, sizeof(struct r600_bc_alu));
1075 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1076 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1077
1078 alu.src[0].sel = ctx->temp_reg;
1079 alu.src[0].chan = 0;
1080 alu.last = 1;
1081 r = r600_bc_add_alu(ctx->bc, &alu);
1082 if (r)
1083 return r;
1084 }
1085
1086 /* dst.y = SIN */
1087 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1088 memset(&alu, 0, sizeof(struct r600_bc_alu));
1089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1090 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1091
1092 alu.src[0].sel = ctx->temp_reg;
1093 alu.src[0].chan = 0;
1094 alu.last = 1;
1095 r = r600_bc_add_alu(ctx->bc, &alu);
1096 if (r)
1097 return r;
1098 }
1099
1100 /* dst.z = 0.0; */
1101 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1102 memset(&alu, 0, sizeof(struct r600_bc_alu));
1103
1104 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1105
1106 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1107
1108 alu.src[0].sel = V_SQ_ALU_SRC_0;
1109 alu.src[0].chan = 0;
1110
1111 alu.last = 1;
1112
1113 r = r600_bc_add_alu(ctx->bc, &alu);
1114 if (r)
1115 return r;
1116 }
1117
1118 /* dst.w = 1.0; */
1119 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1120 memset(&alu, 0, sizeof(struct r600_bc_alu));
1121
1122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1123
1124 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1125
1126 alu.src[0].sel = V_SQ_ALU_SRC_1;
1127 alu.src[0].chan = 0;
1128
1129 alu.last = 1;
1130
1131 r = r600_bc_add_alu(ctx->bc, &alu);
1132 if (r)
1133 return r;
1134 }
1135
1136 return 0;
1137 }
1138
1139 static int tgsi_kill(struct r600_shader_ctx *ctx)
1140 {
1141 struct r600_bc_alu alu;
1142 int i, r;
1143
1144 for (i = 0; i < 4; i++) {
1145 memset(&alu, 0, sizeof(struct r600_bc_alu));
1146 alu.inst = ctx->inst_info->r600_opcode;
1147
1148 alu.dst.chan = i;
1149
1150 alu.src[0].sel = V_SQ_ALU_SRC_0;
1151
1152 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1153 alu.src[1].sel = V_SQ_ALU_SRC_1;
1154 alu.src[1].neg = 1;
1155 } else {
1156 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1157 }
1158 if (i == 3) {
1159 alu.last = 1;
1160 }
1161 r = r600_bc_add_alu(ctx->bc, &alu);
1162 if (r)
1163 return r;
1164 }
1165
1166 /* kill must be last in ALU */
1167 ctx->bc->force_add_cf = 1;
1168 ctx->shader->uses_kill = TRUE;
1169 return 0;
1170 }
1171
1172 static int tgsi_lit(struct r600_shader_ctx *ctx)
1173 {
1174 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1175 struct r600_bc_alu alu;
1176 int r;
1177
1178 /* dst.x, <- 1.0 */
1179 memset(&alu, 0, sizeof(struct r600_bc_alu));
1180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1181 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1182 alu.src[0].chan = 0;
1183 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1184 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1185 r = r600_bc_add_alu(ctx->bc, &alu);
1186 if (r)
1187 return r;
1188
1189 /* dst.y = max(src.x, 0.0) */
1190 memset(&alu, 0, sizeof(struct r600_bc_alu));
1191 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1192 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1193 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1194 alu.src[1].chan = 0;
1195 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1196 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1197 r = r600_bc_add_alu(ctx->bc, &alu);
1198 if (r)
1199 return r;
1200
1201 /* dst.w, <- 1.0 */
1202 memset(&alu, 0, sizeof(struct r600_bc_alu));
1203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1204 alu.src[0].sel = V_SQ_ALU_SRC_1;
1205 alu.src[0].chan = 0;
1206 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1207 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1208 alu.last = 1;
1209 r = r600_bc_add_alu(ctx->bc, &alu);
1210 if (r)
1211 return r;
1212
1213 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1214 {
1215 int chan;
1216 int sel;
1217
1218 /* dst.z = log(src.y) */
1219 memset(&alu, 0, sizeof(struct r600_bc_alu));
1220 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1221 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1222 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1223 alu.last = 1;
1224 r = r600_bc_add_alu(ctx->bc, &alu);
1225 if (r)
1226 return r;
1227
1228 chan = alu.dst.chan;
1229 sel = alu.dst.sel;
1230
1231 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1232 memset(&alu, 0, sizeof(struct r600_bc_alu));
1233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1234 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1235 alu.src[1].sel = sel;
1236 alu.src[1].chan = chan;
1237
1238 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1239 alu.dst.sel = ctx->temp_reg;
1240 alu.dst.chan = 0;
1241 alu.dst.write = 1;
1242 alu.is_op3 = 1;
1243 alu.last = 1;
1244 r = r600_bc_add_alu(ctx->bc, &alu);
1245 if (r)
1246 return r;
1247
1248 /* dst.z = exp(tmp.x) */
1249 memset(&alu, 0, sizeof(struct r600_bc_alu));
1250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1251 alu.src[0].sel = ctx->temp_reg;
1252 alu.src[0].chan = 0;
1253 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1254 alu.last = 1;
1255 r = r600_bc_add_alu(ctx->bc, &alu);
1256 if (r)
1257 return r;
1258 }
1259 return 0;
1260 }
1261
1262 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1263 {
1264 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1265 struct r600_bc_alu alu;
1266 int i, r;
1267
1268 memset(&alu, 0, sizeof(struct r600_bc_alu));
1269
1270 /* FIXME:
1271 * For state trackers other than OpenGL, we'll want to use
1272 * _RECIPSQRT_IEEE instead.
1273 */
1274 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1275
1276 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1277 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1278 alu.src[i].abs = 1;
1279 }
1280 alu.dst.sel = ctx->temp_reg;
1281 alu.dst.write = 1;
1282 alu.last = 1;
1283 r = r600_bc_add_alu(ctx->bc, &alu);
1284 if (r)
1285 return r;
1286 /* replicate result */
1287 return tgsi_helper_tempx_replicate(ctx);
1288 }
1289
1290 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1291 {
1292 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1293 struct r600_bc_alu alu;
1294 int i, r;
1295
1296 for (i = 0; i < 4; i++) {
1297 memset(&alu, 0, sizeof(struct r600_bc_alu));
1298 alu.src[0].sel = ctx->temp_reg;
1299 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1300 alu.dst.chan = i;
1301 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1302 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1303 if (i == 3)
1304 alu.last = 1;
1305 r = r600_bc_add_alu(ctx->bc, &alu);
1306 if (r)
1307 return r;
1308 }
1309 return 0;
1310 }
1311
1312 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1313 {
1314 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1315 struct r600_bc_alu alu;
1316 int i, r;
1317
1318 memset(&alu, 0, sizeof(struct r600_bc_alu));
1319 alu.inst = ctx->inst_info->r600_opcode;
1320 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1321 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1322 }
1323 alu.dst.sel = ctx->temp_reg;
1324 alu.dst.write = 1;
1325 alu.last = 1;
1326 r = r600_bc_add_alu(ctx->bc, &alu);
1327 if (r)
1328 return r;
1329 /* replicate result */
1330 return tgsi_helper_tempx_replicate(ctx);
1331 }
1332
1333 static int tgsi_pow(struct r600_shader_ctx *ctx)
1334 {
1335 struct r600_bc_alu alu;
1336 int r;
1337
1338 /* LOG2(a) */
1339 memset(&alu, 0, sizeof(struct r600_bc_alu));
1340 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1341 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1342 alu.dst.sel = ctx->temp_reg;
1343 alu.dst.write = 1;
1344 alu.last = 1;
1345 r = r600_bc_add_alu(ctx->bc, &alu);
1346 if (r)
1347 return r;
1348 /* b * LOG2(a) */
1349 memset(&alu, 0, sizeof(struct r600_bc_alu));
1350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1351 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1352 alu.src[1].sel = ctx->temp_reg;
1353 alu.dst.sel = ctx->temp_reg;
1354 alu.dst.write = 1;
1355 alu.last = 1;
1356 r = r600_bc_add_alu(ctx->bc, &alu);
1357 if (r)
1358 return r;
1359 /* POW(a,b) = EXP2(b * LOG2(a))*/
1360 memset(&alu, 0, sizeof(struct r600_bc_alu));
1361 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1362 alu.src[0].sel = ctx->temp_reg;
1363 alu.dst.sel = ctx->temp_reg;
1364 alu.dst.write = 1;
1365 alu.last = 1;
1366 r = r600_bc_add_alu(ctx->bc, &alu);
1367 if (r)
1368 return r;
1369 return tgsi_helper_tempx_replicate(ctx);
1370 }
1371
1372 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1373 {
1374 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1375 struct r600_bc_alu alu;
1376 int i, r;
1377
1378 /* tmp = (src > 0 ? 1 : src) */
1379 for (i = 0; i < 4; i++) {
1380 memset(&alu, 0, sizeof(struct r600_bc_alu));
1381 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1382 alu.is_op3 = 1;
1383
1384 alu.dst.sel = ctx->temp_reg;
1385 alu.dst.chan = i;
1386
1387 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1388 alu.src[1].sel = V_SQ_ALU_SRC_1;
1389 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1390
1391 if (i == 3)
1392 alu.last = 1;
1393 r = r600_bc_add_alu(ctx->bc, &alu);
1394 if (r)
1395 return r;
1396 }
1397
1398 /* dst = (-tmp > 0 ? -1 : tmp) */
1399 for (i = 0; i < 4; i++) {
1400 memset(&alu, 0, sizeof(struct r600_bc_alu));
1401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1402 alu.is_op3 = 1;
1403 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1404
1405 alu.src[0].sel = ctx->temp_reg;
1406 alu.src[0].chan = i;
1407 alu.src[0].neg = 1;
1408
1409 alu.src[1].sel = V_SQ_ALU_SRC_1;
1410 alu.src[1].neg = 1;
1411
1412 alu.src[2].sel = ctx->temp_reg;
1413 alu.src[2].chan = i;
1414
1415 if (i == 3)
1416 alu.last = 1;
1417 r = r600_bc_add_alu(ctx->bc, &alu);
1418 if (r)
1419 return r;
1420 }
1421 return 0;
1422 }
1423
1424 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1425 {
1426 struct r600_bc_alu alu;
1427 int i, r;
1428
1429 for (i = 0; i < 4; i++) {
1430 memset(&alu, 0, sizeof(struct r600_bc_alu));
1431 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1432 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1433 alu.dst.chan = i;
1434 } else {
1435 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1436 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1437 alu.src[0].sel = ctx->temp_reg;
1438 alu.src[0].chan = i;
1439 }
1440 if (i == 3) {
1441 alu.last = 1;
1442 }
1443 r = r600_bc_add_alu(ctx->bc, &alu);
1444 if (r)
1445 return r;
1446 }
1447 return 0;
1448 }
1449
1450 static int tgsi_op3(struct r600_shader_ctx *ctx)
1451 {
1452 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1453 struct r600_bc_alu alu;
1454 int i, j, r;
1455 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1456
1457 for (i = 0; i < lasti + 1; i++) {
1458 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1459 continue;
1460
1461 memset(&alu, 0, sizeof(struct r600_bc_alu));
1462 alu.inst = ctx->inst_info->r600_opcode;
1463 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1464 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1465 }
1466
1467 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1468 alu.dst.chan = i;
1469 alu.dst.write = 1;
1470 alu.is_op3 = 1;
1471 if (i == lasti) {
1472 alu.last = 1;
1473 }
1474 r = r600_bc_add_alu(ctx->bc, &alu);
1475 if (r)
1476 return r;
1477 }
1478 return 0;
1479 }
1480
1481 static int tgsi_dp(struct r600_shader_ctx *ctx)
1482 {
1483 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1484 struct r600_bc_alu alu;
1485 int i, j, r;
1486
1487 for (i = 0; i < 4; i++) {
1488 memset(&alu, 0, sizeof(struct r600_bc_alu));
1489 alu.inst = ctx->inst_info->r600_opcode;
1490 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1491 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1492 }
1493
1494 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1495 alu.dst.chan = i;
1496 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1497 /* handle some special cases */
1498 switch (ctx->inst_info->tgsi_opcode) {
1499 case TGSI_OPCODE_DP2:
1500 if (i > 1) {
1501 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1502 alu.src[0].chan = alu.src[1].chan = 0;
1503 }
1504 break;
1505 case TGSI_OPCODE_DP3:
1506 if (i > 2) {
1507 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1508 alu.src[0].chan = alu.src[1].chan = 0;
1509 }
1510 break;
1511 case TGSI_OPCODE_DPH:
1512 if (i == 3) {
1513 alu.src[0].sel = V_SQ_ALU_SRC_1;
1514 alu.src[0].chan = 0;
1515 alu.src[0].neg = 0;
1516 }
1517 break;
1518 default:
1519 break;
1520 }
1521 if (i == 3) {
1522 alu.last = 1;
1523 }
1524 r = r600_bc_add_alu(ctx->bc, &alu);
1525 if (r)
1526 return r;
1527 }
1528 return 0;
1529 }
1530
1531 static int tgsi_tex(struct r600_shader_ctx *ctx)
1532 {
1533 static float one_point_five = 1.5f;
1534 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1535 struct r600_bc_tex tex;
1536 struct r600_bc_alu alu;
1537 unsigned src_gpr;
1538 int r, i;
1539 int opcode;
1540 /* Texture fetch instructions can only use gprs as source.
1541 * Also they cannot negate the source or take the absolute value */
1542 const boolean src_requires_loading =
1543 (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1544 inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
1545 ctx->src[0].neg || ctx->src[0].abs;
1546 boolean src_loaded = FALSE;
1547
1548 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1549
1550 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1551 /* Add perspective divide */
1552 memset(&alu, 0, sizeof(struct r600_bc_alu));
1553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1554 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1555
1556 alu.dst.sel = ctx->temp_reg;
1557 alu.dst.chan = 3;
1558 alu.last = 1;
1559 alu.dst.write = 1;
1560 r = r600_bc_add_alu(ctx->bc, &alu);
1561 if (r)
1562 return r;
1563
1564 for (i = 0; i < 3; i++) {
1565 memset(&alu, 0, sizeof(struct r600_bc_alu));
1566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1567 alu.src[0].sel = ctx->temp_reg;
1568 alu.src[0].chan = 3;
1569 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1570 alu.dst.sel = ctx->temp_reg;
1571 alu.dst.chan = i;
1572 alu.dst.write = 1;
1573 r = r600_bc_add_alu(ctx->bc, &alu);
1574 if (r)
1575 return r;
1576 }
1577 memset(&alu, 0, sizeof(struct r600_bc_alu));
1578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1579 alu.src[0].sel = V_SQ_ALU_SRC_1;
1580 alu.src[0].chan = 0;
1581 alu.dst.sel = ctx->temp_reg;
1582 alu.dst.chan = 3;
1583 alu.last = 1;
1584 alu.dst.write = 1;
1585 r = r600_bc_add_alu(ctx->bc, &alu);
1586 if (r)
1587 return r;
1588 src_loaded = TRUE;
1589 src_gpr = ctx->temp_reg;
1590 }
1591
1592 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1593 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1594 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1595
1596 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1597 for (i = 0; i < 4; i++) {
1598 memset(&alu, 0, sizeof(struct r600_bc_alu));
1599 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1600 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1601 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1602 alu.dst.sel = ctx->temp_reg;
1603 alu.dst.chan = i;
1604 if (i == 3)
1605 alu.last = 1;
1606 alu.dst.write = 1;
1607 r = r600_bc_add_alu(ctx->bc, &alu);
1608 if (r)
1609 return r;
1610 }
1611
1612 /* tmp1.z = RCP_e(|tmp1.z|) */
1613 memset(&alu, 0, sizeof(struct r600_bc_alu));
1614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1615 alu.src[0].sel = ctx->temp_reg;
1616 alu.src[0].chan = 2;
1617 alu.src[0].abs = 1;
1618 alu.dst.sel = ctx->temp_reg;
1619 alu.dst.chan = 2;
1620 alu.dst.write = 1;
1621 alu.last = 1;
1622 r = r600_bc_add_alu(ctx->bc, &alu);
1623 if (r)
1624 return r;
1625
1626 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1627 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1628 * muladd has no writemask, have to use another temp
1629 */
1630 memset(&alu, 0, sizeof(struct r600_bc_alu));
1631 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1632 alu.is_op3 = 1;
1633
1634 alu.src[0].sel = ctx->temp_reg;
1635 alu.src[0].chan = 0;
1636 alu.src[1].sel = ctx->temp_reg;
1637 alu.src[1].chan = 2;
1638
1639 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1640 alu.src[2].chan = 0;
1641 alu.src[2].value = *(uint32_t *)&one_point_five;
1642
1643 alu.dst.sel = ctx->temp_reg;
1644 alu.dst.chan = 0;
1645 alu.dst.write = 1;
1646
1647 r = r600_bc_add_alu(ctx->bc, &alu);
1648 if (r)
1649 return r;
1650
1651 memset(&alu, 0, sizeof(struct r600_bc_alu));
1652 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1653 alu.is_op3 = 1;
1654
1655 alu.src[0].sel = ctx->temp_reg;
1656 alu.src[0].chan = 1;
1657 alu.src[1].sel = ctx->temp_reg;
1658 alu.src[1].chan = 2;
1659
1660 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1661 alu.src[2].chan = 0;
1662 alu.src[2].value = *(uint32_t *)&one_point_five;
1663
1664 alu.dst.sel = ctx->temp_reg;
1665 alu.dst.chan = 1;
1666 alu.dst.write = 1;
1667
1668 alu.last = 1;
1669 r = r600_bc_add_alu(ctx->bc, &alu);
1670 if (r)
1671 return r;
1672
1673 src_loaded = TRUE;
1674 src_gpr = ctx->temp_reg;
1675 }
1676
1677 if (src_requires_loading && !src_loaded) {
1678 for (i = 0; i < 4; i++) {
1679 memset(&alu, 0, sizeof(struct r600_bc_alu));
1680 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1681 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1682 alu.dst.sel = ctx->temp_reg;
1683 alu.dst.chan = i;
1684 if (i == 3)
1685 alu.last = 1;
1686 alu.dst.write = 1;
1687 r = r600_bc_add_alu(ctx->bc, &alu);
1688 if (r)
1689 return r;
1690 }
1691 src_loaded = TRUE;
1692 src_gpr = ctx->temp_reg;
1693 }
1694
1695 opcode = ctx->inst_info->r600_opcode;
1696 if (opcode == SQ_TEX_INST_SAMPLE &&
1697 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1698 opcode = SQ_TEX_INST_SAMPLE_C;
1699
1700 memset(&tex, 0, sizeof(struct r600_bc_tex));
1701 tex.inst = opcode;
1702 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1703 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1704 tex.src_gpr = src_gpr;
1705 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1706 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1707 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1708 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1709 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1710 if (src_loaded) {
1711 tex.src_sel_x = 0;
1712 tex.src_sel_y = 1;
1713 tex.src_sel_z = 2;
1714 tex.src_sel_w = 3;
1715 } else {
1716 tex.src_sel_x = ctx->src[0].swizzle[0];
1717 tex.src_sel_y = ctx->src[0].swizzle[1];
1718 tex.src_sel_z = ctx->src[0].swizzle[2];
1719 tex.src_sel_w = ctx->src[0].swizzle[3];
1720 tex.src_rel = ctx->src[0].rel;
1721 }
1722
1723 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1724 tex.src_sel_x = 1;
1725 tex.src_sel_y = 0;
1726 tex.src_sel_z = 3;
1727 tex.src_sel_w = 1;
1728 }
1729
1730 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1731 tex.coord_type_x = 1;
1732 tex.coord_type_y = 1;
1733 tex.coord_type_z = 1;
1734 tex.coord_type_w = 1;
1735 }
1736
1737 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1738 tex.coord_type_z = 0;
1739 tex.src_sel_z = tex.src_sel_y;
1740 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1741 tex.coord_type_z = 0;
1742
1743 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1744 tex.src_sel_w = tex.src_sel_z;
1745
1746 r = r600_bc_add_tex(ctx->bc, &tex);
1747 if (r)
1748 return r;
1749
1750 /* add shadow ambient support - gallium doesn't do it yet */
1751 return 0;
1752 }
1753
1754 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1755 {
1756 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1757 struct r600_bc_alu alu;
1758 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1759 unsigned i;
1760 int r;
1761
1762 /* optimize if it's just an equal balance */
1763 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1764 for (i = 0; i < lasti + 1; i++) {
1765 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1766 continue;
1767
1768 memset(&alu, 0, sizeof(struct r600_bc_alu));
1769 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1770 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1771 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1772 alu.omod = 3;
1773 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1774 alu.dst.chan = i;
1775 if (i == lasti) {
1776 alu.last = 1;
1777 }
1778 r = r600_bc_add_alu(ctx->bc, &alu);
1779 if (r)
1780 return r;
1781 }
1782 return 0;
1783 }
1784
1785 /* 1 - src0 */
1786 for (i = 0; i < lasti + 1; i++) {
1787 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1788 continue;
1789
1790 memset(&alu, 0, sizeof(struct r600_bc_alu));
1791 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1792 alu.src[0].sel = V_SQ_ALU_SRC_1;
1793 alu.src[0].chan = 0;
1794 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1795 alu.src[1].neg = 1;
1796 alu.dst.sel = ctx->temp_reg;
1797 alu.dst.chan = i;
1798 if (i == lasti) {
1799 alu.last = 1;
1800 }
1801 alu.dst.write = 1;
1802 r = r600_bc_add_alu(ctx->bc, &alu);
1803 if (r)
1804 return r;
1805 }
1806
1807 /* (1 - src0) * src2 */
1808 for (i = 0; i < lasti + 1; i++) {
1809 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1810 continue;
1811
1812 memset(&alu, 0, sizeof(struct r600_bc_alu));
1813 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1814 alu.src[0].sel = ctx->temp_reg;
1815 alu.src[0].chan = i;
1816 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1817 alu.dst.sel = ctx->temp_reg;
1818 alu.dst.chan = i;
1819 if (i == lasti) {
1820 alu.last = 1;
1821 }
1822 alu.dst.write = 1;
1823 r = r600_bc_add_alu(ctx->bc, &alu);
1824 if (r)
1825 return r;
1826 }
1827
1828 /* src0 * src1 + (1 - src0) * src2 */
1829 for (i = 0; i < lasti + 1; i++) {
1830 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1831 continue;
1832
1833 memset(&alu, 0, sizeof(struct r600_bc_alu));
1834 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1835 alu.is_op3 = 1;
1836 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1837 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1838 alu.src[2].sel = ctx->temp_reg;
1839 alu.src[2].chan = i;
1840
1841 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1842 alu.dst.chan = i;
1843 if (i == lasti) {
1844 alu.last = 1;
1845 }
1846 r = r600_bc_add_alu(ctx->bc, &alu);
1847 if (r)
1848 return r;
1849 }
1850 return 0;
1851 }
1852
1853 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1854 {
1855 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1856 struct r600_bc_alu alu;
1857 int i, r;
1858 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1859
1860 for (i = 0; i < lasti + 1; i++) {
1861 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1862 continue;
1863
1864 memset(&alu, 0, sizeof(struct r600_bc_alu));
1865 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1866 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1867 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1868 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1869 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1870 alu.dst.chan = i;
1871 alu.dst.write = 1;
1872 alu.is_op3 = 1;
1873 if (i == lasti)
1874 alu.last = 1;
1875 r = r600_bc_add_alu(ctx->bc, &alu);
1876 if (r)
1877 return r;
1878 }
1879 return 0;
1880 }
1881
1882 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1883 {
1884 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1885 static const unsigned int src0_swizzle[] = {2, 0, 1};
1886 static const unsigned int src1_swizzle[] = {1, 2, 0};
1887 struct r600_bc_alu alu;
1888 uint32_t use_temp = 0;
1889 int i, r;
1890
1891 if (inst->Dst[0].Register.WriteMask != 0xf)
1892 use_temp = 1;
1893
1894 for (i = 0; i < 4; i++) {
1895 memset(&alu, 0, sizeof(struct r600_bc_alu));
1896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1897 if (i < 3) {
1898 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1899 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1900 } else {
1901 alu.src[0].sel = V_SQ_ALU_SRC_0;
1902 alu.src[0].chan = i;
1903 alu.src[1].sel = V_SQ_ALU_SRC_0;
1904 alu.src[1].chan = i;
1905 }
1906
1907 alu.dst.sel = ctx->temp_reg;
1908 alu.dst.chan = i;
1909 alu.dst.write = 1;
1910
1911 if (i == 3)
1912 alu.last = 1;
1913 r = r600_bc_add_alu(ctx->bc, &alu);
1914 if (r)
1915 return r;
1916 }
1917
1918 for (i = 0; i < 4; i++) {
1919 memset(&alu, 0, sizeof(struct r600_bc_alu));
1920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1921
1922 if (i < 3) {
1923 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1924 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1925 } else {
1926 alu.src[0].sel = V_SQ_ALU_SRC_0;
1927 alu.src[0].chan = i;
1928 alu.src[1].sel = V_SQ_ALU_SRC_0;
1929 alu.src[1].chan = i;
1930 }
1931
1932 alu.src[2].sel = ctx->temp_reg;
1933 alu.src[2].neg = 1;
1934 alu.src[2].chan = i;
1935
1936 if (use_temp)
1937 alu.dst.sel = ctx->temp_reg;
1938 else
1939 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1940 alu.dst.chan = i;
1941 alu.dst.write = 1;
1942 alu.is_op3 = 1;
1943 if (i == 3)
1944 alu.last = 1;
1945 r = r600_bc_add_alu(ctx->bc, &alu);
1946 if (r)
1947 return r;
1948 }
1949 if (use_temp)
1950 return tgsi_helper_copy(ctx, inst);
1951 return 0;
1952 }
1953
1954 static int tgsi_exp(struct r600_shader_ctx *ctx)
1955 {
1956 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1957 struct r600_bc_alu alu;
1958 int r;
1959
1960 /* result.x = 2^floor(src); */
1961 if (inst->Dst[0].Register.WriteMask & 1) {
1962 memset(&alu, 0, sizeof(struct r600_bc_alu));
1963
1964 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1965 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1966
1967 alu.dst.sel = ctx->temp_reg;
1968 alu.dst.chan = 0;
1969 alu.dst.write = 1;
1970 alu.last = 1;
1971 r = r600_bc_add_alu(ctx->bc, &alu);
1972 if (r)
1973 return r;
1974
1975 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1976 alu.src[0].sel = ctx->temp_reg;
1977 alu.src[0].chan = 0;
1978
1979 alu.dst.sel = ctx->temp_reg;
1980 alu.dst.chan = 0;
1981 alu.dst.write = 1;
1982 alu.last = 1;
1983 r = r600_bc_add_alu(ctx->bc, &alu);
1984 if (r)
1985 return r;
1986 }
1987
1988 /* result.y = tmp - floor(tmp); */
1989 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1990 memset(&alu, 0, sizeof(struct r600_bc_alu));
1991
1992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1993 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1994
1995 alu.dst.sel = ctx->temp_reg;
1996 #if 0
1997 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1998 if (r)
1999 return r;
2000 #endif
2001 alu.dst.write = 1;
2002 alu.dst.chan = 1;
2003
2004 alu.last = 1;
2005
2006 r = r600_bc_add_alu(ctx->bc, &alu);
2007 if (r)
2008 return r;
2009 }
2010
2011 /* result.z = RoughApprox2ToX(tmp);*/
2012 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2013 memset(&alu, 0, sizeof(struct r600_bc_alu));
2014 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2015 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2016
2017 alu.dst.sel = ctx->temp_reg;
2018 alu.dst.write = 1;
2019 alu.dst.chan = 2;
2020
2021 alu.last = 1;
2022
2023 r = r600_bc_add_alu(ctx->bc, &alu);
2024 if (r)
2025 return r;
2026 }
2027
2028 /* result.w = 1.0;*/
2029 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2030 memset(&alu, 0, sizeof(struct r600_bc_alu));
2031
2032 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2033 alu.src[0].sel = V_SQ_ALU_SRC_1;
2034 alu.src[0].chan = 0;
2035
2036 alu.dst.sel = ctx->temp_reg;
2037 alu.dst.chan = 3;
2038 alu.dst.write = 1;
2039 alu.last = 1;
2040 r = r600_bc_add_alu(ctx->bc, &alu);
2041 if (r)
2042 return r;
2043 }
2044 return tgsi_helper_copy(ctx, inst);
2045 }
2046
2047 static int tgsi_log(struct r600_shader_ctx *ctx)
2048 {
2049 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2050 struct r600_bc_alu alu;
2051 int r;
2052
2053 /* result.x = floor(log2(src)); */
2054 if (inst->Dst[0].Register.WriteMask & 1) {
2055 memset(&alu, 0, sizeof(struct r600_bc_alu));
2056
2057 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2058 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2059
2060 alu.dst.sel = ctx->temp_reg;
2061 alu.dst.chan = 0;
2062 alu.dst.write = 1;
2063 alu.last = 1;
2064 r = r600_bc_add_alu(ctx->bc, &alu);
2065 if (r)
2066 return r;
2067
2068 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2069 alu.src[0].sel = ctx->temp_reg;
2070 alu.src[0].chan = 0;
2071
2072 alu.dst.sel = ctx->temp_reg;
2073 alu.dst.chan = 0;
2074 alu.dst.write = 1;
2075 alu.last = 1;
2076
2077 r = r600_bc_add_alu(ctx->bc, &alu);
2078 if (r)
2079 return r;
2080 }
2081
2082 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2083 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2084 memset(&alu, 0, sizeof(struct r600_bc_alu));
2085
2086 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2087 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2088
2089 alu.dst.sel = ctx->temp_reg;
2090 alu.dst.chan = 1;
2091 alu.dst.write = 1;
2092 alu.last = 1;
2093
2094 r = r600_bc_add_alu(ctx->bc, &alu);
2095 if (r)
2096 return r;
2097
2098 memset(&alu, 0, sizeof(struct r600_bc_alu));
2099
2100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2101 alu.src[0].sel = ctx->temp_reg;
2102 alu.src[0].chan = 1;
2103
2104 alu.dst.sel = ctx->temp_reg;
2105 alu.dst.chan = 1;
2106 alu.dst.write = 1;
2107 alu.last = 1;
2108
2109 r = r600_bc_add_alu(ctx->bc, &alu);
2110 if (r)
2111 return r;
2112
2113 memset(&alu, 0, sizeof(struct r600_bc_alu));
2114
2115 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2116 alu.src[0].sel = ctx->temp_reg;
2117 alu.src[0].chan = 1;
2118
2119 alu.dst.sel = ctx->temp_reg;
2120 alu.dst.chan = 1;
2121 alu.dst.write = 1;
2122 alu.last = 1;
2123
2124 r = r600_bc_add_alu(ctx->bc, &alu);
2125 if (r)
2126 return r;
2127
2128 memset(&alu, 0, sizeof(struct r600_bc_alu));
2129
2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2131 alu.src[0].sel = ctx->temp_reg;
2132 alu.src[0].chan = 1;
2133
2134 alu.dst.sel = ctx->temp_reg;
2135 alu.dst.chan = 1;
2136 alu.dst.write = 1;
2137 alu.last = 1;
2138
2139 r = r600_bc_add_alu(ctx->bc, &alu);
2140 if (r)
2141 return r;
2142
2143 memset(&alu, 0, sizeof(struct r600_bc_alu));
2144
2145 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2146
2147 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2148
2149 alu.src[1].sel = ctx->temp_reg;
2150 alu.src[1].chan = 1;
2151
2152 alu.dst.sel = ctx->temp_reg;
2153 alu.dst.chan = 1;
2154 alu.dst.write = 1;
2155 alu.last = 1;
2156
2157 r = r600_bc_add_alu(ctx->bc, &alu);
2158 if (r)
2159 return r;
2160 }
2161
2162 /* result.z = log2(src);*/
2163 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2164 memset(&alu, 0, sizeof(struct r600_bc_alu));
2165
2166 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2167 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2168
2169 alu.dst.sel = ctx->temp_reg;
2170 alu.dst.write = 1;
2171 alu.dst.chan = 2;
2172 alu.last = 1;
2173
2174 r = r600_bc_add_alu(ctx->bc, &alu);
2175 if (r)
2176 return r;
2177 }
2178
2179 /* result.w = 1.0; */
2180 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2181 memset(&alu, 0, sizeof(struct r600_bc_alu));
2182
2183 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2184 alu.src[0].sel = V_SQ_ALU_SRC_1;
2185 alu.src[0].chan = 0;
2186
2187 alu.dst.sel = ctx->temp_reg;
2188 alu.dst.chan = 3;
2189 alu.dst.write = 1;
2190 alu.last = 1;
2191
2192 r = r600_bc_add_alu(ctx->bc, &alu);
2193 if (r)
2194 return r;
2195 }
2196
2197 return tgsi_helper_copy(ctx, inst);
2198 }
2199
2200 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2201 {
2202 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2203 struct r600_bc_alu alu;
2204 int r;
2205
2206 memset(&alu, 0, sizeof(struct r600_bc_alu));
2207
2208 switch (inst->Instruction.Opcode) {
2209 case TGSI_OPCODE_ARL:
2210 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2211 break;
2212 case TGSI_OPCODE_ARR:
2213 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2214 break;
2215 default:
2216 assert(0);
2217 return -1;
2218 }
2219
2220 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2221 alu.last = 1;
2222 alu.dst.sel = ctx->ar_reg;
2223 alu.dst.write = 1;
2224 r = r600_bc_add_alu(ctx->bc, &alu);
2225 if (r)
2226 return r;
2227
2228 /* TODO: Note that the MOVA can be avoided if we never use AR for
2229 * indexing non-CB registers in the current ALU clause. Similarly, we
2230 * need to load AR from ar_reg again if we started a new clause
2231 * between ARL and AR usage. The easy way to do that is to remove
2232 * the MOVA here, and load it for the first AR access after ar_reg
2233 * has been modified in each clause. */
2234 memset(&alu, 0, sizeof(struct r600_bc_alu));
2235 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2236 alu.src[0].sel = ctx->ar_reg;
2237 alu.src[0].chan = 0;
2238 alu.last = 1;
2239 r = r600_bc_add_alu(ctx->bc, &alu);
2240 if (r)
2241 return r;
2242 return 0;
2243 }
2244 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2245 {
2246 /* TODO from r600c, ar values don't persist between clauses */
2247 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2248 struct r600_bc_alu alu;
2249 int r;
2250
2251 switch (inst->Instruction.Opcode) {
2252 case TGSI_OPCODE_ARL:
2253 memset(&alu, 0, sizeof(alu));
2254 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2255 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2256 alu.dst.sel = ctx->ar_reg;
2257 alu.dst.write = 1;
2258 alu.last = 1;
2259
2260 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2261 return r;
2262
2263 memset(&alu, 0, sizeof(alu));
2264 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2265 alu.src[0].sel = ctx->ar_reg;
2266 alu.dst.sel = ctx->ar_reg;
2267 alu.dst.write = 1;
2268 alu.last = 1;
2269
2270 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2271 return r;
2272 break;
2273 case TGSI_OPCODE_ARR:
2274 memset(&alu, 0, sizeof(alu));
2275 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2276 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2277 alu.dst.sel = ctx->ar_reg;
2278 alu.dst.write = 1;
2279 alu.last = 1;
2280
2281 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2282 return r;
2283 break;
2284 default:
2285 assert(0);
2286 return -1;
2287 }
2288
2289 memset(&alu, 0, sizeof(alu));
2290 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2291 alu.src[0].sel = ctx->ar_reg;
2292 alu.last = 1;
2293
2294 r = r600_bc_add_alu(ctx->bc, &alu);
2295 if (r)
2296 return r;
2297 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2298 return 0;
2299 }
2300
2301 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2302 {
2303 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2304 struct r600_bc_alu alu;
2305 int i, r = 0;
2306
2307 for (i = 0; i < 4; i++) {
2308 memset(&alu, 0, sizeof(struct r600_bc_alu));
2309
2310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2311 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2312
2313 if (i == 0 || i == 3) {
2314 alu.src[0].sel = V_SQ_ALU_SRC_1;
2315 } else {
2316 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2317 }
2318
2319 if (i == 0 || i == 2) {
2320 alu.src[1].sel = V_SQ_ALU_SRC_1;
2321 } else {
2322 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2323 }
2324 if (i == 3)
2325 alu.last = 1;
2326 r = r600_bc_add_alu(ctx->bc, &alu);
2327 if (r)
2328 return r;
2329 }
2330 return 0;
2331 }
2332
2333 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2334 {
2335 struct r600_bc_alu alu;
2336 int r;
2337
2338 memset(&alu, 0, sizeof(struct r600_bc_alu));
2339 alu.inst = opcode;
2340 alu.predicate = 1;
2341
2342 alu.dst.sel = ctx->temp_reg;
2343 alu.dst.write = 1;
2344 alu.dst.chan = 0;
2345
2346 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2347 alu.src[1].sel = V_SQ_ALU_SRC_0;
2348 alu.src[1].chan = 0;
2349
2350 alu.last = 1;
2351
2352 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2353 if (r)
2354 return r;
2355 return 0;
2356 }
2357
2358 static int pops(struct r600_shader_ctx *ctx, int pops)
2359 {
2360 int alu_pop = 3;
2361 if (ctx->bc->cf_last) {
2362 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2363 alu_pop = 0;
2364 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2365 alu_pop = 1;
2366 }
2367 alu_pop += pops;
2368 if (alu_pop == 1) {
2369 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2370 ctx->bc->force_add_cf = 1;
2371 } else if (alu_pop == 2) {
2372 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2373 ctx->bc->force_add_cf = 1;
2374 } else {
2375 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2376 ctx->bc->cf_last->pop_count = pops;
2377 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2378 }
2379 return 0;
2380 }
2381
2382 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2383 {
2384 switch(reason) {
2385 case FC_PUSH_VPM:
2386 ctx->bc->callstack[ctx->bc->call_sp].current--;
2387 break;
2388 case FC_PUSH_WQM:
2389 case FC_LOOP:
2390 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2391 break;
2392 case FC_REP:
2393 /* TOODO : for 16 vp asic should -= 2; */
2394 ctx->bc->callstack[ctx->bc->call_sp].current --;
2395 break;
2396 }
2397 }
2398
2399 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2400 {
2401 if (check_max_only) {
2402 int diff;
2403 switch (reason) {
2404 case FC_PUSH_VPM:
2405 diff = 1;
2406 break;
2407 case FC_PUSH_WQM:
2408 diff = 4;
2409 break;
2410 default:
2411 assert(0);
2412 diff = 0;
2413 }
2414 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2415 ctx->bc->callstack[ctx->bc->call_sp].max) {
2416 ctx->bc->callstack[ctx->bc->call_sp].max =
2417 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2418 }
2419 return;
2420 }
2421 switch (reason) {
2422 case FC_PUSH_VPM:
2423 ctx->bc->callstack[ctx->bc->call_sp].current++;
2424 break;
2425 case FC_PUSH_WQM:
2426 case FC_LOOP:
2427 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2428 break;
2429 case FC_REP:
2430 ctx->bc->callstack[ctx->bc->call_sp].current++;
2431 break;
2432 }
2433
2434 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2435 ctx->bc->callstack[ctx->bc->call_sp].max) {
2436 ctx->bc->callstack[ctx->bc->call_sp].max =
2437 ctx->bc->callstack[ctx->bc->call_sp].current;
2438 }
2439 }
2440
2441 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2442 {
2443 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2444
2445 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2446 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2447 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2448 sp->num_mid++;
2449 }
2450
2451 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2452 {
2453 ctx->bc->fc_sp++;
2454 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2455 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2456 }
2457
2458 static void fc_poplevel(struct r600_shader_ctx *ctx)
2459 {
2460 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2461 if (sp->mid) {
2462 free(sp->mid);
2463 sp->mid = NULL;
2464 }
2465 sp->num_mid = 0;
2466 sp->start = NULL;
2467 sp->type = 0;
2468 ctx->bc->fc_sp--;
2469 }
2470
2471 #if 0
2472 static int emit_return(struct r600_shader_ctx *ctx)
2473 {
2474 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2475 return 0;
2476 }
2477
2478 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2479 {
2480
2481 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2482 ctx->bc->cf_last->pop_count = pops;
2483 /* TODO work out offset */
2484 return 0;
2485 }
2486
2487 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2488 {
2489 return 0;
2490 }
2491
2492 static void emit_testflag(struct r600_shader_ctx *ctx)
2493 {
2494
2495 }
2496
2497 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2498 {
2499 emit_testflag(ctx);
2500 emit_jump_to_offset(ctx, 1, 4);
2501 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2502 pops(ctx, ifidx + 1);
2503 emit_return(ctx);
2504 }
2505
2506 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2507 {
2508 emit_testflag(ctx);
2509
2510 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2511 ctx->bc->cf_last->pop_count = 1;
2512
2513 fc_set_mid(ctx, fc_sp);
2514
2515 pops(ctx, 1);
2516 }
2517 #endif
2518
2519 static int tgsi_if(struct r600_shader_ctx *ctx)
2520 {
2521 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2522
2523 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2524
2525 fc_pushlevel(ctx, FC_IF);
2526
2527 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2528 return 0;
2529 }
2530
2531 static int tgsi_else(struct r600_shader_ctx *ctx)
2532 {
2533 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2534 ctx->bc->cf_last->pop_count = 1;
2535
2536 fc_set_mid(ctx, ctx->bc->fc_sp);
2537 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2538 return 0;
2539 }
2540
2541 static int tgsi_endif(struct r600_shader_ctx *ctx)
2542 {
2543 pops(ctx, 1);
2544 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2545 R600_ERR("if/endif unbalanced in shader\n");
2546 return -1;
2547 }
2548
2549 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2550 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2551 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2552 } else {
2553 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2554 }
2555 fc_poplevel(ctx);
2556
2557 callstack_decrease_current(ctx, FC_PUSH_VPM);
2558 return 0;
2559 }
2560
2561 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2562 {
2563 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2564
2565 fc_pushlevel(ctx, FC_LOOP);
2566
2567 /* check stack depth */
2568 callstack_check_depth(ctx, FC_LOOP, 0);
2569 return 0;
2570 }
2571
2572 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2573 {
2574 int i;
2575
2576 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2577
2578 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2579 R600_ERR("loop/endloop in shader code are not paired.\n");
2580 return -EINVAL;
2581 }
2582
2583 /* fixup loop pointers - from r600isa
2584 LOOP END points to CF after LOOP START,
2585 LOOP START point to CF after LOOP END
2586 BRK/CONT point to LOOP END CF
2587 */
2588 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2589
2590 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2591
2592 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2593 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2594 }
2595 /* TODO add LOOPRET support */
2596 fc_poplevel(ctx);
2597 callstack_decrease_current(ctx, FC_LOOP);
2598 return 0;
2599 }
2600
2601 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2602 {
2603 unsigned int fscp;
2604
2605 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2606 {
2607 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2608 break;
2609 }
2610
2611 if (fscp == 0) {
2612 R600_ERR("Break not inside loop/endloop pair\n");
2613 return -EINVAL;
2614 }
2615
2616 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2617 ctx->bc->cf_last->pop_count = 1;
2618
2619 fc_set_mid(ctx, fscp);
2620
2621 pops(ctx, 1);
2622 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2623 return 0;
2624 }
2625
2626 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2627 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2628 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2629 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2630
2631 /* FIXME:
2632 * For state trackers other than OpenGL, we'll want to use
2633 * _RECIP_IEEE instead.
2634 */
2635 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2636
2637 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2638 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2639 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2640 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2641 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2642 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2643 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2644 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2645 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2646 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2647 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2648 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2649 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2650 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2651 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2652 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2653 /* gap */
2654 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2656 /* gap */
2657 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2658 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2659 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2660 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2661 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2662 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2663 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2664 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2665 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2666 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2667 /* gap */
2668 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2669 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2670 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2671 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2672 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2673 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2674 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2675 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2676 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2677 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2678 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2679 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2682 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2683 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2684 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2685 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2686 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2687 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2688 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2689 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2690 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2691 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2693 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2694 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2696 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2697 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2698 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2699 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2700 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2701 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2702 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2703 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2704 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2705 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2706 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2707 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2708 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2709 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2710 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2711 /* gap */
2712 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2713 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2714 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2715 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2716 /* gap */
2717 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2718 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2719 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2722 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2725 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726 /* gap */
2727 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2729 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2736 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2738 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2739 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2741 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742 /* gap */
2743 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2745 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748 /* gap */
2749 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2751 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2757 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2758 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2759 /* gap */
2760 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2761 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2762 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2763 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2775 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2776 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2778 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2780 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2782 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2783 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2784 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2786 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2788 };
2789
2790 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2791 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2792 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2793 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2794 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2795 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2796 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2797 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2798 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2799 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2800 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2801 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2802 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2803 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2804 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2805 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2806 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2807 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2808 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2809 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2810 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811 /* gap */
2812 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2814 /* gap */
2815 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2817 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2818 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2820 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2821 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2822 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2823 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2824 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2825 /* gap */
2826 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2828 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2830 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2831 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2832 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2833 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2834 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2840 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2842 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2843 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2844 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2845 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2847 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2849 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2856 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2860 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2861 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2862 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2863 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2866 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2867 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2868 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2869 /* gap */
2870 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2871 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2873 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2874 /* gap */
2875 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2883 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 /* gap */
2885 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2894 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2897 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2899 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 /* gap */
2901 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 /* gap */
2907 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2916 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2917 /* gap */
2918 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946 };