r600g: Handle texture fetch instructions with neg or abs on source register
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 int r600_find_vs_semantic_index(struct r600_shader *vs,
38 struct r600_shader *ps, int id)
39 {
40 struct r600_shader_io *input = &ps->input[id];
41
42 for (int i = 0; i < vs->noutput; i++) {
43 if (input->name == vs->output[i].name &&
44 input->sid == vs->output[i].sid) {
45 return i - 1;
46 }
47 }
48 return 0;
49 }
50
51 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
52 {
53 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
54 struct r600_shader *rshader = &shader->shader;
55 void *ptr;
56
57 /* copy new shader */
58 if (shader->bo == NULL) {
59 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
60 if (shader->bo == NULL) {
61 return -ENOMEM;
62 }
63 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
64 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
65 r600_bo_unmap(rctx->radeon, shader->bo);
66 }
67 /* build state */
68 switch (rshader->processor_type) {
69 case TGSI_PROCESSOR_VERTEX:
70 if (rshader->family >= CHIP_CEDAR) {
71 evergreen_pipe_shader_vs(ctx, shader);
72 } else {
73 r600_pipe_shader_vs(ctx, shader);
74 }
75 break;
76 case TGSI_PROCESSOR_FRAGMENT:
77 if (rshader->family >= CHIP_CEDAR) {
78 evergreen_pipe_shader_ps(ctx, shader);
79 } else {
80 r600_pipe_shader_ps(ctx, shader);
81 }
82 break;
83 default:
84 return -EINVAL;
85 }
86 return 0;
87 }
88
89 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
90
91 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
92 {
93 static int dump_shaders = -1;
94 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
95 int r;
96
97 /* Would like some magic "get_bool_option_once" routine.
98 */
99 if (dump_shaders == -1)
100 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
101
102 if (dump_shaders) {
103 fprintf(stderr, "--------------------------------------------------------------\n");
104 tgsi_dump(tokens, 0);
105 }
106 shader->shader.family = r600_get_family(rctx->radeon);
107 r = r600_shader_from_tgsi(tokens, &shader->shader);
108 if (r) {
109 R600_ERR("translation from TGSI failed !\n");
110 return r;
111 }
112 r = r600_bc_build(&shader->shader.bc);
113 if (r) {
114 R600_ERR("building bytecode failed !\n");
115 return r;
116 }
117 if (dump_shaders) {
118 r600_bc_dump(&shader->shader.bc);
119 fprintf(stderr, "______________________________________________________________\n");
120 }
121 return r600_pipe_shader(ctx, shader);
122 }
123
124 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
125 {
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127
128 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
129 r600_bc_clear(&shader->shader.bc);
130 }
131
132 /*
133 * tgsi -> r600 shader
134 */
135 struct r600_shader_tgsi_instruction;
136
137 struct r600_shader_src {
138 unsigned sel;
139 unsigned swizzle[4];
140 unsigned neg;
141 unsigned abs;
142 unsigned rel;
143 uint32_t value[4];
144 };
145
146 struct r600_shader_ctx {
147 struct tgsi_shader_info info;
148 struct tgsi_parse_context parse;
149 const struct tgsi_token *tokens;
150 unsigned type;
151 unsigned file_offset[TGSI_FILE_COUNT];
152 unsigned temp_reg;
153 unsigned ar_reg;
154 struct r600_shader_tgsi_instruction *inst_info;
155 struct r600_bc *bc;
156 struct r600_shader *shader;
157 struct r600_shader_src src[3];
158 u32 *literals;
159 u32 nliterals;
160 u32 max_driver_temp_used;
161 /* needed for evergreen interpolation */
162 boolean input_centroid;
163 boolean input_linear;
164 boolean input_perspective;
165 int num_interp_gpr;
166 };
167
168 struct r600_shader_tgsi_instruction {
169 unsigned tgsi_opcode;
170 unsigned is_op3;
171 unsigned r600_opcode;
172 int (*process)(struct r600_shader_ctx *ctx);
173 };
174
175 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
176 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
177
178 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
179 {
180 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
181 int j;
182
183 if (i->Instruction.NumDstRegs > 1) {
184 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
185 return -EINVAL;
186 }
187 if (i->Instruction.Predicate) {
188 R600_ERR("predicate unsupported\n");
189 return -EINVAL;
190 }
191 #if 0
192 if (i->Instruction.Label) {
193 R600_ERR("label unsupported\n");
194 return -EINVAL;
195 }
196 #endif
197 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
198 if (i->Src[j].Register.Dimension) {
199 R600_ERR("unsupported src %d (dimension %d)\n", j,
200 i->Src[j].Register.Dimension);
201 return -EINVAL;
202 }
203 }
204 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
205 if (i->Dst[j].Register.Dimension) {
206 R600_ERR("unsupported dst (dimension)\n");
207 return -EINVAL;
208 }
209 }
210 return 0;
211 }
212
213 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
214 {
215 int i, r;
216 struct r600_bc_alu alu;
217 int gpr = 0, base_chan = 0;
218 int ij_index = 0;
219
220 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
221 ij_index = 0;
222 if (ctx->shader->input[input].centroid)
223 ij_index++;
224 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
225 ij_index = 0;
226 /* if we have perspective add one */
227 if (ctx->input_perspective) {
228 ij_index++;
229 /* if we have perspective centroid */
230 if (ctx->input_centroid)
231 ij_index++;
232 }
233 if (ctx->shader->input[input].centroid)
234 ij_index++;
235 }
236
237 /* work out gpr and base_chan from index */
238 gpr = ij_index / 2;
239 base_chan = (2 * (ij_index % 2)) + 1;
240
241 for (i = 0; i < 8; i++) {
242 memset(&alu, 0, sizeof(struct r600_bc_alu));
243
244 if (i < 4)
245 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
246 else
247 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
248
249 if ((i > 1) && (i < 6)) {
250 alu.dst.sel = ctx->shader->input[input].gpr;
251 alu.dst.write = 1;
252 }
253
254 alu.dst.chan = i % 4;
255
256 alu.src[0].sel = gpr;
257 alu.src[0].chan = (base_chan - (i % 2));
258
259 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
260
261 alu.bank_swizzle_force = SQ_ALU_VEC_210;
262 if ((i % 4) == 3)
263 alu.last = 1;
264 r = r600_bc_add_alu(ctx->bc, &alu);
265 if (r)
266 return r;
267 }
268 return 0;
269 }
270
271
272 static int tgsi_declaration(struct r600_shader_ctx *ctx)
273 {
274 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
275 unsigned i;
276 int r;
277
278 switch (d->Declaration.File) {
279 case TGSI_FILE_INPUT:
280 i = ctx->shader->ninput++;
281 ctx->shader->input[i].name = d->Semantic.Name;
282 ctx->shader->input[i].sid = d->Semantic.Index;
283 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
284 ctx->shader->input[i].centroid = d->Declaration.Centroid;
285 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
286 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
287 /* turn input into interpolate on EG */
288 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
289 if (ctx->shader->input[i].interpolate > 0) {
290 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
291 evergreen_interp_alu(ctx, i);
292 }
293 }
294 }
295 break;
296 case TGSI_FILE_OUTPUT:
297 i = ctx->shader->noutput++;
298 ctx->shader->output[i].name = d->Semantic.Name;
299 ctx->shader->output[i].sid = d->Semantic.Index;
300 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
301 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
302 break;
303 case TGSI_FILE_CONSTANT:
304 case TGSI_FILE_TEMPORARY:
305 case TGSI_FILE_SAMPLER:
306 case TGSI_FILE_ADDRESS:
307 break;
308
309 case TGSI_FILE_SYSTEM_VALUE:
310 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
311 struct r600_bc_alu alu;
312 memset(&alu, 0, sizeof(struct r600_bc_alu));
313
314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
315 alu.src[0].sel = 0;
316 alu.src[0].chan = 3;
317
318 alu.dst.sel = 0;
319 alu.dst.chan = 3;
320 alu.dst.write = 1;
321 alu.last = 1;
322
323 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
324 return r;
325 break;
326 }
327
328 default:
329 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
330 return -EINVAL;
331 }
332 return 0;
333 }
334
335 static int r600_get_temp(struct r600_shader_ctx *ctx)
336 {
337 return ctx->temp_reg + ctx->max_driver_temp_used++;
338 }
339
340 /*
341 * for evergreen we need to scan the shader to find the number of GPRs we need to
342 * reserve for interpolation.
343 *
344 * we need to know if we are going to emit
345 * any centroid inputs
346 * if perspective and linear are required
347 */
348 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
349 {
350 int i;
351 int num_baryc;
352
353 ctx->input_linear = FALSE;
354 ctx->input_perspective = FALSE;
355 ctx->input_centroid = FALSE;
356 ctx->num_interp_gpr = 1;
357
358 /* any centroid inputs */
359 for (i = 0; i < ctx->info.num_inputs; i++) {
360 /* skip position/face */
361 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
362 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
363 continue;
364 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
365 ctx->input_linear = TRUE;
366 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
367 ctx->input_perspective = TRUE;
368 if (ctx->info.input_centroid[i])
369 ctx->input_centroid = TRUE;
370 }
371
372 num_baryc = 0;
373 /* ignoring sample for now */
374 if (ctx->input_perspective)
375 num_baryc++;
376 if (ctx->input_linear)
377 num_baryc++;
378 if (ctx->input_centroid)
379 num_baryc *= 2;
380
381 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
382
383 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
384 return ctx->num_interp_gpr;
385 }
386
387 static void tgsi_src(struct r600_shader_ctx *ctx,
388 const struct tgsi_full_src_register *tgsi_src,
389 struct r600_shader_src *r600_src)
390 {
391 memset(r600_src, 0, sizeof(*r600_src));
392 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
393 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
394 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
395 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
396 r600_src->neg = tgsi_src->Register.Negate;
397 r600_src->abs = tgsi_src->Register.Absolute;
398
399 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
400 int index;
401 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
402 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
403 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
404
405 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
406 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
407 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
408 return;
409 }
410 index = tgsi_src->Register.Index;
411 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
412 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
413 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
414 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
415 r600_src->swizzle[0] = 3;
416 r600_src->swizzle[1] = 3;
417 r600_src->swizzle[2] = 3;
418 r600_src->swizzle[3] = 3;
419 r600_src->sel = 0;
420 } else {
421 if (tgsi_src->Register.Indirect)
422 r600_src->rel = V_SQ_REL_RELATIVE;
423 r600_src->sel = tgsi_src->Register.Index;
424 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
425 }
426 }
427
428 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
429 {
430 struct r600_bc_vtx vtx;
431 unsigned int ar_reg;
432 int r;
433
434 if (offset) {
435 struct r600_bc_alu alu;
436
437 memset(&alu, 0, sizeof(alu));
438
439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
440 alu.src[0].sel = ctx->ar_reg;
441
442 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
443 alu.src[1].value = offset;
444
445 alu.dst.sel = dst_reg;
446 alu.dst.write = 1;
447 alu.last = 1;
448
449 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
450 return r;
451
452 ar_reg = dst_reg;
453 } else {
454 ar_reg = ctx->ar_reg;
455 }
456
457 memset(&vtx, 0, sizeof(vtx));
458 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
459 vtx.src_gpr = ar_reg;
460 vtx.mega_fetch_count = 16;
461 vtx.dst_gpr = dst_reg;
462 vtx.dst_sel_x = 0; /* SEL_X */
463 vtx.dst_sel_y = 1; /* SEL_Y */
464 vtx.dst_sel_z = 2; /* SEL_Z */
465 vtx.dst_sel_w = 3; /* SEL_W */
466 vtx.data_format = FMT_32_32_32_32_FLOAT;
467 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
468 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
469 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
470
471 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
472 return r;
473
474 return 0;
475 }
476
477 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
478 {
479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
480 struct r600_bc_alu alu;
481 int i, j, k, nconst, r;
482
483 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
484 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
485 nconst++;
486 }
487 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
488 }
489 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
490 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
491 continue;
492 }
493
494 if (ctx->src[i].rel) {
495 int treg = r600_get_temp(ctx);
496 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
497 return r;
498
499 ctx->src[i].sel = treg;
500 ctx->src[i].rel = 0;
501 j--;
502 } else if (j > 0) {
503 int treg = r600_get_temp(ctx);
504 for (k = 0; k < 4; k++) {
505 memset(&alu, 0, sizeof(struct r600_bc_alu));
506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
507 alu.src[0].sel = ctx->src[i].sel;
508 alu.src[0].chan = k;
509 alu.src[0].rel = ctx->src[i].rel;
510 alu.dst.sel = treg;
511 alu.dst.chan = k;
512 alu.dst.write = 1;
513 if (k == 3)
514 alu.last = 1;
515 r = r600_bc_add_alu(ctx->bc, &alu);
516 if (r)
517 return r;
518 }
519 ctx->src[i].sel = treg;
520 ctx->src[i].rel =0;
521 j--;
522 }
523 }
524 return 0;
525 }
526
527 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
528 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
529 {
530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
531 struct r600_bc_alu alu;
532 int i, j, k, nliteral, r;
533
534 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
535 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
536 nliteral++;
537 }
538 }
539 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
540 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
541 int treg = r600_get_temp(ctx);
542 for (k = 0; k < 4; k++) {
543 memset(&alu, 0, sizeof(struct r600_bc_alu));
544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
545 alu.src[0].sel = ctx->src[i].sel;
546 alu.src[0].chan = k;
547 alu.src[0].value = ctx->src[i].value[k];
548 alu.dst.sel = treg;
549 alu.dst.chan = k;
550 alu.dst.write = 1;
551 if (k == 3)
552 alu.last = 1;
553 r = r600_bc_add_alu(ctx->bc, &alu);
554 if (r)
555 return r;
556 }
557 ctx->src[i].sel = treg;
558 j--;
559 }
560 }
561 return 0;
562 }
563
564 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
565 {
566 struct tgsi_full_immediate *immediate;
567 struct tgsi_full_property *property;
568 struct r600_shader_ctx ctx;
569 struct r600_bc_output output[32];
570 unsigned output_done, noutput;
571 unsigned opcode;
572 int i, r = 0, pos0;
573
574 ctx.bc = &shader->bc;
575 ctx.shader = shader;
576 r = r600_bc_init(ctx.bc, shader->family);
577 if (r)
578 return r;
579 ctx.tokens = tokens;
580 tgsi_scan_shader(tokens, &ctx.info);
581 tgsi_parse_init(&ctx.parse, tokens);
582 ctx.type = ctx.parse.FullHeader.Processor.Processor;
583 shader->processor_type = ctx.type;
584 ctx.bc->type = shader->processor_type;
585
586 /* register allocations */
587 /* Values [0,127] correspond to GPR[0..127].
588 * Values [128,159] correspond to constant buffer bank 0
589 * Values [160,191] correspond to constant buffer bank 1
590 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
591 * Values [256,287] correspond to constant buffer bank 2 (EG)
592 * Values [288,319] correspond to constant buffer bank 3 (EG)
593 * Other special values are shown in the list below.
594 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
595 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
596 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
597 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
598 * 248 SQ_ALU_SRC_0: special constant 0.0.
599 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
600 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
601 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
602 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
603 * 253 SQ_ALU_SRC_LITERAL: literal constant.
604 * 254 SQ_ALU_SRC_PV: previous vector result.
605 * 255 SQ_ALU_SRC_PS: previous scalar result.
606 */
607 for (i = 0; i < TGSI_FILE_COUNT; i++) {
608 ctx.file_offset[i] = 0;
609 }
610 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
611 ctx.file_offset[TGSI_FILE_INPUT] = 1;
612 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
613 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
614 } else {
615 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
616 }
617 }
618 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
619 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
620 }
621 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
622 ctx.info.file_count[TGSI_FILE_INPUT];
623 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
624 ctx.info.file_count[TGSI_FILE_OUTPUT];
625
626 /* Outside the GPR range. This will be translated to one of the
627 * kcache banks later. */
628 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
629
630 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
631 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
632 ctx.info.file_count[TGSI_FILE_TEMPORARY];
633 ctx.temp_reg = ctx.ar_reg + 1;
634
635 ctx.nliterals = 0;
636 ctx.literals = NULL;
637 shader->fs_write_all = FALSE;
638 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
639 tgsi_parse_token(&ctx.parse);
640 switch (ctx.parse.FullToken.Token.Type) {
641 case TGSI_TOKEN_TYPE_IMMEDIATE:
642 immediate = &ctx.parse.FullToken.FullImmediate;
643 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
644 if(ctx.literals == NULL) {
645 r = -ENOMEM;
646 goto out_err;
647 }
648 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
649 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
650 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
651 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
652 ctx.nliterals++;
653 break;
654 case TGSI_TOKEN_TYPE_DECLARATION:
655 r = tgsi_declaration(&ctx);
656 if (r)
657 goto out_err;
658 break;
659 case TGSI_TOKEN_TYPE_INSTRUCTION:
660 r = tgsi_is_supported(&ctx);
661 if (r)
662 goto out_err;
663 ctx.max_driver_temp_used = 0;
664 /* reserve first tmp for everyone */
665 r600_get_temp(&ctx);
666
667 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
668 if ((r = tgsi_split_constant(&ctx)))
669 goto out_err;
670 if ((r = tgsi_split_literal_constant(&ctx)))
671 goto out_err;
672 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
673 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
674 else
675 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
676 r = ctx.inst_info->process(&ctx);
677 if (r)
678 goto out_err;
679 break;
680 case TGSI_TOKEN_TYPE_PROPERTY:
681 property = &ctx.parse.FullToken.FullProperty;
682 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
683 if (property->u[0].Data == 1)
684 shader->fs_write_all = TRUE;
685 }
686 break;
687 default:
688 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
689 r = -EINVAL;
690 goto out_err;
691 }
692 }
693 /* export output */
694 noutput = shader->noutput;
695 for (i = 0, pos0 = 0; i < noutput; i++) {
696 memset(&output[i], 0, sizeof(struct r600_bc_output));
697 output[i].gpr = shader->output[i].gpr;
698 output[i].elem_size = 3;
699 output[i].swizzle_x = 0;
700 output[i].swizzle_y = 1;
701 output[i].swizzle_z = 2;
702 output[i].swizzle_w = 3;
703 output[i].burst_count = 1;
704 output[i].barrier = 1;
705 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
706 output[i].array_base = i - pos0;
707 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
708 switch (ctx.type) {
709 case TGSI_PROCESSOR_VERTEX:
710 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
711 output[i].array_base = 60;
712 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
713 /* position doesn't count in array_base */
714 pos0++;
715 }
716 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
717 output[i].array_base = 61;
718 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
719 /* position doesn't count in array_base */
720 pos0++;
721 }
722 break;
723 case TGSI_PROCESSOR_FRAGMENT:
724 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
725 output[i].array_base = shader->output[i].sid;
726 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
727 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
728 output[i].array_base = 61;
729 output[i].swizzle_x = 2;
730 output[i].swizzle_y = 7;
731 output[i].swizzle_z = output[i].swizzle_w = 7;
732 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
733 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
734 output[i].array_base = 61;
735 output[i].swizzle_x = 7;
736 output[i].swizzle_y = 1;
737 output[i].swizzle_z = output[i].swizzle_w = 7;
738 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
739 } else {
740 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
741 r = -EINVAL;
742 goto out_err;
743 }
744 break;
745 default:
746 R600_ERR("unsupported processor type %d\n", ctx.type);
747 r = -EINVAL;
748 goto out_err;
749 }
750 }
751 /* add fake param output for vertex shader if no param is exported */
752 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
753 for (i = 0, pos0 = 0; i < noutput; i++) {
754 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
755 pos0 = 1;
756 break;
757 }
758 }
759 if (!pos0) {
760 memset(&output[i], 0, sizeof(struct r600_bc_output));
761 output[i].gpr = 0;
762 output[i].elem_size = 3;
763 output[i].swizzle_x = 0;
764 output[i].swizzle_y = 1;
765 output[i].swizzle_z = 2;
766 output[i].swizzle_w = 3;
767 output[i].burst_count = 1;
768 output[i].barrier = 1;
769 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
770 output[i].array_base = 0;
771 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
772 noutput++;
773 }
774 }
775 /* add fake pixel export */
776 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
777 memset(&output[0], 0, sizeof(struct r600_bc_output));
778 output[0].gpr = 0;
779 output[0].elem_size = 3;
780 output[0].swizzle_x = 7;
781 output[0].swizzle_y = 7;
782 output[0].swizzle_z = 7;
783 output[0].swizzle_w = 7;
784 output[0].burst_count = 1;
785 output[0].barrier = 1;
786 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
787 output[0].array_base = 0;
788 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
789 noutput++;
790 }
791 /* set export done on last export of each type */
792 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
793 if (i == (noutput - 1)) {
794 output[i].end_of_program = 1;
795 }
796 if (!(output_done & (1 << output[i].type))) {
797 output_done |= (1 << output[i].type);
798 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
799 }
800 }
801 /* add output to bytecode */
802 for (i = 0; i < noutput; i++) {
803 r = r600_bc_add_output(ctx.bc, &output[i]);
804 if (r)
805 goto out_err;
806 }
807 free(ctx.literals);
808 tgsi_parse_free(&ctx.parse);
809 return 0;
810 out_err:
811 free(ctx.literals);
812 tgsi_parse_free(&ctx.parse);
813 return r;
814 }
815
816 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
817 {
818 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
819 return -EINVAL;
820 }
821
822 static int tgsi_end(struct r600_shader_ctx *ctx)
823 {
824 return 0;
825 }
826
827 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
828 const struct r600_shader_src *shader_src,
829 unsigned chan)
830 {
831 bc_src->sel = shader_src->sel;
832 bc_src->chan = shader_src->swizzle[chan];
833 bc_src->neg = shader_src->neg;
834 bc_src->abs = shader_src->abs;
835 bc_src->rel = shader_src->rel;
836 bc_src->value = shader_src->value[bc_src->chan];
837 }
838
839 static void tgsi_dst(struct r600_shader_ctx *ctx,
840 const struct tgsi_full_dst_register *tgsi_dst,
841 unsigned swizzle,
842 struct r600_bc_alu_dst *r600_dst)
843 {
844 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
845
846 r600_dst->sel = tgsi_dst->Register.Index;
847 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
848 r600_dst->chan = swizzle;
849 r600_dst->write = 1;
850 if (tgsi_dst->Register.Indirect)
851 r600_dst->rel = V_SQ_REL_RELATIVE;
852 if (inst->Instruction.Saturate) {
853 r600_dst->clamp = 1;
854 }
855 }
856
857 static int tgsi_last_instruction(unsigned writemask)
858 {
859 int i, lasti = 0;
860
861 for (i = 0; i < 4; i++) {
862 if (writemask & (1 << i)) {
863 lasti = i;
864 }
865 }
866 return lasti;
867 }
868
869 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
870 {
871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
872 struct r600_bc_alu alu;
873 int i, j, r;
874 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
875
876 for (i = 0; i < lasti + 1; i++) {
877 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
878 continue;
879
880 memset(&alu, 0, sizeof(struct r600_bc_alu));
881 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
882
883 alu.inst = ctx->inst_info->r600_opcode;
884 if (!swap) {
885 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
886 r600_bc_src(&alu.src[j], &ctx->src[j], i);
887 }
888 } else {
889 r600_bc_src(&alu.src[0], &ctx->src[1], i);
890 r600_bc_src(&alu.src[1], &ctx->src[0], i);
891 }
892 /* handle some special cases */
893 switch (ctx->inst_info->tgsi_opcode) {
894 case TGSI_OPCODE_SUB:
895 alu.src[1].neg = 1;
896 break;
897 case TGSI_OPCODE_ABS:
898 alu.src[0].abs = 1;
899 break;
900 default:
901 break;
902 }
903 if (i == lasti) {
904 alu.last = 1;
905 }
906 r = r600_bc_add_alu(ctx->bc, &alu);
907 if (r)
908 return r;
909 }
910 return 0;
911 }
912
913 static int tgsi_op2(struct r600_shader_ctx *ctx)
914 {
915 return tgsi_op2_s(ctx, 0);
916 }
917
918 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
919 {
920 return tgsi_op2_s(ctx, 1);
921 }
922
923 /*
924 * r600 - trunc to -PI..PI range
925 * r700 - normalize by dividing by 2PI
926 * see fdo bug 27901
927 */
928 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
929 {
930 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
931 static float double_pi = 3.1415926535 * 2;
932 static float neg_pi = -3.1415926535;
933
934 int r;
935 struct r600_bc_alu alu;
936
937 memset(&alu, 0, sizeof(struct r600_bc_alu));
938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
939 alu.is_op3 = 1;
940
941 alu.dst.chan = 0;
942 alu.dst.sel = ctx->temp_reg;
943 alu.dst.write = 1;
944
945 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
946
947 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
948 alu.src[1].chan = 0;
949 alu.src[1].value = *(uint32_t *)&half_inv_pi;
950 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
951 alu.src[2].chan = 0;
952 alu.last = 1;
953 r = r600_bc_add_alu(ctx->bc, &alu);
954 if (r)
955 return r;
956
957 memset(&alu, 0, sizeof(struct r600_bc_alu));
958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
959
960 alu.dst.chan = 0;
961 alu.dst.sel = ctx->temp_reg;
962 alu.dst.write = 1;
963
964 alu.src[0].sel = ctx->temp_reg;
965 alu.src[0].chan = 0;
966 alu.last = 1;
967 r = r600_bc_add_alu(ctx->bc, &alu);
968 if (r)
969 return r;
970
971 memset(&alu, 0, sizeof(struct r600_bc_alu));
972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
973 alu.is_op3 = 1;
974
975 alu.dst.chan = 0;
976 alu.dst.sel = ctx->temp_reg;
977 alu.dst.write = 1;
978
979 alu.src[0].sel = ctx->temp_reg;
980 alu.src[0].chan = 0;
981
982 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
983 alu.src[1].chan = 0;
984 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
985 alu.src[2].chan = 0;
986
987 if (ctx->bc->chiprev == CHIPREV_R600) {
988 alu.src[1].value = *(uint32_t *)&double_pi;
989 alu.src[2].value = *(uint32_t *)&neg_pi;
990 } else {
991 alu.src[1].sel = V_SQ_ALU_SRC_1;
992 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
993 alu.src[2].neg = 1;
994 }
995
996 alu.last = 1;
997 r = r600_bc_add_alu(ctx->bc, &alu);
998 if (r)
999 return r;
1000 return 0;
1001 }
1002
1003 static int tgsi_trig(struct r600_shader_ctx *ctx)
1004 {
1005 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1006 struct r600_bc_alu alu;
1007 int i, r;
1008 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1009
1010 r = tgsi_setup_trig(ctx);
1011 if (r)
1012 return r;
1013
1014 memset(&alu, 0, sizeof(struct r600_bc_alu));
1015 alu.inst = ctx->inst_info->r600_opcode;
1016 alu.dst.chan = 0;
1017 alu.dst.sel = ctx->temp_reg;
1018 alu.dst.write = 1;
1019
1020 alu.src[0].sel = ctx->temp_reg;
1021 alu.src[0].chan = 0;
1022 alu.last = 1;
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026
1027 /* replicate result */
1028 for (i = 0; i < lasti + 1; i++) {
1029 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1030 continue;
1031
1032 memset(&alu, 0, sizeof(struct r600_bc_alu));
1033 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1034
1035 alu.src[0].sel = ctx->temp_reg;
1036 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1037 if (i == lasti)
1038 alu.last = 1;
1039 r = r600_bc_add_alu(ctx->bc, &alu);
1040 if (r)
1041 return r;
1042 }
1043 return 0;
1044 }
1045
1046 static int tgsi_scs(struct r600_shader_ctx *ctx)
1047 {
1048 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1049 struct r600_bc_alu alu;
1050 int r;
1051
1052 /* We'll only need the trig stuff if we are going to write to the
1053 * X or Y components of the destination vector.
1054 */
1055 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1056 r = tgsi_setup_trig(ctx);
1057 if (r)
1058 return r;
1059 }
1060
1061 /* dst.x = COS */
1062 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1063 memset(&alu, 0, sizeof(struct r600_bc_alu));
1064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1065 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1066
1067 alu.src[0].sel = ctx->temp_reg;
1068 alu.src[0].chan = 0;
1069 alu.last = 1;
1070 r = r600_bc_add_alu(ctx->bc, &alu);
1071 if (r)
1072 return r;
1073 }
1074
1075 /* dst.y = SIN */
1076 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1077 memset(&alu, 0, sizeof(struct r600_bc_alu));
1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1079 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1080
1081 alu.src[0].sel = ctx->temp_reg;
1082 alu.src[0].chan = 0;
1083 alu.last = 1;
1084 r = r600_bc_add_alu(ctx->bc, &alu);
1085 if (r)
1086 return r;
1087 }
1088
1089 /* dst.z = 0.0; */
1090 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1091 memset(&alu, 0, sizeof(struct r600_bc_alu));
1092
1093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1094
1095 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1096
1097 alu.src[0].sel = V_SQ_ALU_SRC_0;
1098 alu.src[0].chan = 0;
1099
1100 alu.last = 1;
1101
1102 r = r600_bc_add_alu(ctx->bc, &alu);
1103 if (r)
1104 return r;
1105 }
1106
1107 /* dst.w = 1.0; */
1108 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1109 memset(&alu, 0, sizeof(struct r600_bc_alu));
1110
1111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1112
1113 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1114
1115 alu.src[0].sel = V_SQ_ALU_SRC_1;
1116 alu.src[0].chan = 0;
1117
1118 alu.last = 1;
1119
1120 r = r600_bc_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123 }
1124
1125 return 0;
1126 }
1127
1128 static int tgsi_kill(struct r600_shader_ctx *ctx)
1129 {
1130 struct r600_bc_alu alu;
1131 int i, r;
1132
1133 for (i = 0; i < 4; i++) {
1134 memset(&alu, 0, sizeof(struct r600_bc_alu));
1135 alu.inst = ctx->inst_info->r600_opcode;
1136
1137 alu.dst.chan = i;
1138
1139 alu.src[0].sel = V_SQ_ALU_SRC_0;
1140
1141 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1142 alu.src[1].sel = V_SQ_ALU_SRC_1;
1143 alu.src[1].neg = 1;
1144 } else {
1145 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1146 }
1147 if (i == 3) {
1148 alu.last = 1;
1149 }
1150 r = r600_bc_add_alu(ctx->bc, &alu);
1151 if (r)
1152 return r;
1153 }
1154
1155 /* kill must be last in ALU */
1156 ctx->bc->force_add_cf = 1;
1157 ctx->shader->uses_kill = TRUE;
1158 return 0;
1159 }
1160
1161 static int tgsi_lit(struct r600_shader_ctx *ctx)
1162 {
1163 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1164 struct r600_bc_alu alu;
1165 int r;
1166
1167 /* dst.x, <- 1.0 */
1168 memset(&alu, 0, sizeof(struct r600_bc_alu));
1169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1170 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1171 alu.src[0].chan = 0;
1172 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1173 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1174 r = r600_bc_add_alu(ctx->bc, &alu);
1175 if (r)
1176 return r;
1177
1178 /* dst.y = max(src.x, 0.0) */
1179 memset(&alu, 0, sizeof(struct r600_bc_alu));
1180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1181 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1182 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1183 alu.src[1].chan = 0;
1184 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1185 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1186 r = r600_bc_add_alu(ctx->bc, &alu);
1187 if (r)
1188 return r;
1189
1190 /* dst.w, <- 1.0 */
1191 memset(&alu, 0, sizeof(struct r600_bc_alu));
1192 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1193 alu.src[0].sel = V_SQ_ALU_SRC_1;
1194 alu.src[0].chan = 0;
1195 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1196 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1197 alu.last = 1;
1198 r = r600_bc_add_alu(ctx->bc, &alu);
1199 if (r)
1200 return r;
1201
1202 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1203 {
1204 int chan;
1205 int sel;
1206
1207 /* dst.z = log(src.y) */
1208 memset(&alu, 0, sizeof(struct r600_bc_alu));
1209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1210 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1211 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1212 alu.last = 1;
1213 r = r600_bc_add_alu(ctx->bc, &alu);
1214 if (r)
1215 return r;
1216
1217 chan = alu.dst.chan;
1218 sel = alu.dst.sel;
1219
1220 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1221 memset(&alu, 0, sizeof(struct r600_bc_alu));
1222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1223 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1224 alu.src[1].sel = sel;
1225 alu.src[1].chan = chan;
1226
1227 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1228 alu.dst.sel = ctx->temp_reg;
1229 alu.dst.chan = 0;
1230 alu.dst.write = 1;
1231 alu.is_op3 = 1;
1232 alu.last = 1;
1233 r = r600_bc_add_alu(ctx->bc, &alu);
1234 if (r)
1235 return r;
1236
1237 /* dst.z = exp(tmp.x) */
1238 memset(&alu, 0, sizeof(struct r600_bc_alu));
1239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1240 alu.src[0].sel = ctx->temp_reg;
1241 alu.src[0].chan = 0;
1242 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1243 alu.last = 1;
1244 r = r600_bc_add_alu(ctx->bc, &alu);
1245 if (r)
1246 return r;
1247 }
1248 return 0;
1249 }
1250
1251 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1252 {
1253 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1254 struct r600_bc_alu alu;
1255 int i, r;
1256
1257 memset(&alu, 0, sizeof(struct r600_bc_alu));
1258
1259 /* FIXME:
1260 * For state trackers other than OpenGL, we'll want to use
1261 * _RECIPSQRT_IEEE instead.
1262 */
1263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1264
1265 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1266 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1267 alu.src[i].abs = 1;
1268 }
1269 alu.dst.sel = ctx->temp_reg;
1270 alu.dst.write = 1;
1271 alu.last = 1;
1272 r = r600_bc_add_alu(ctx->bc, &alu);
1273 if (r)
1274 return r;
1275 /* replicate result */
1276 return tgsi_helper_tempx_replicate(ctx);
1277 }
1278
1279 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1280 {
1281 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1282 struct r600_bc_alu alu;
1283 int i, r;
1284
1285 for (i = 0; i < 4; i++) {
1286 memset(&alu, 0, sizeof(struct r600_bc_alu));
1287 alu.src[0].sel = ctx->temp_reg;
1288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1289 alu.dst.chan = i;
1290 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1291 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1292 if (i == 3)
1293 alu.last = 1;
1294 r = r600_bc_add_alu(ctx->bc, &alu);
1295 if (r)
1296 return r;
1297 }
1298 return 0;
1299 }
1300
1301 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1302 {
1303 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1304 struct r600_bc_alu alu;
1305 int i, r;
1306
1307 memset(&alu, 0, sizeof(struct r600_bc_alu));
1308 alu.inst = ctx->inst_info->r600_opcode;
1309 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1310 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1311 }
1312 alu.dst.sel = ctx->temp_reg;
1313 alu.dst.write = 1;
1314 alu.last = 1;
1315 r = r600_bc_add_alu(ctx->bc, &alu);
1316 if (r)
1317 return r;
1318 /* replicate result */
1319 return tgsi_helper_tempx_replicate(ctx);
1320 }
1321
1322 static int tgsi_pow(struct r600_shader_ctx *ctx)
1323 {
1324 struct r600_bc_alu alu;
1325 int r;
1326
1327 /* LOG2(a) */
1328 memset(&alu, 0, sizeof(struct r600_bc_alu));
1329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1330 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1331 alu.dst.sel = ctx->temp_reg;
1332 alu.dst.write = 1;
1333 alu.last = 1;
1334 r = r600_bc_add_alu(ctx->bc, &alu);
1335 if (r)
1336 return r;
1337 /* b * LOG2(a) */
1338 memset(&alu, 0, sizeof(struct r600_bc_alu));
1339 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1340 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1341 alu.src[1].sel = ctx->temp_reg;
1342 alu.dst.sel = ctx->temp_reg;
1343 alu.dst.write = 1;
1344 alu.last = 1;
1345 r = r600_bc_add_alu(ctx->bc, &alu);
1346 if (r)
1347 return r;
1348 /* POW(a,b) = EXP2(b * LOG2(a))*/
1349 memset(&alu, 0, sizeof(struct r600_bc_alu));
1350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1351 alu.src[0].sel = ctx->temp_reg;
1352 alu.dst.sel = ctx->temp_reg;
1353 alu.dst.write = 1;
1354 alu.last = 1;
1355 r = r600_bc_add_alu(ctx->bc, &alu);
1356 if (r)
1357 return r;
1358 return tgsi_helper_tempx_replicate(ctx);
1359 }
1360
1361 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1362 {
1363 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1364 struct r600_bc_alu alu;
1365 int i, r;
1366
1367 /* tmp = (src > 0 ? 1 : src) */
1368 for (i = 0; i < 4; i++) {
1369 memset(&alu, 0, sizeof(struct r600_bc_alu));
1370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1371 alu.is_op3 = 1;
1372
1373 alu.dst.sel = ctx->temp_reg;
1374 alu.dst.chan = i;
1375
1376 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1377 alu.src[1].sel = V_SQ_ALU_SRC_1;
1378 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1379
1380 if (i == 3)
1381 alu.last = 1;
1382 r = r600_bc_add_alu(ctx->bc, &alu);
1383 if (r)
1384 return r;
1385 }
1386
1387 /* dst = (-tmp > 0 ? -1 : tmp) */
1388 for (i = 0; i < 4; i++) {
1389 memset(&alu, 0, sizeof(struct r600_bc_alu));
1390 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1391 alu.is_op3 = 1;
1392 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1393
1394 alu.src[0].sel = ctx->temp_reg;
1395 alu.src[0].chan = i;
1396 alu.src[0].neg = 1;
1397
1398 alu.src[1].sel = V_SQ_ALU_SRC_1;
1399 alu.src[1].neg = 1;
1400
1401 alu.src[2].sel = ctx->temp_reg;
1402 alu.src[2].chan = i;
1403
1404 if (i == 3)
1405 alu.last = 1;
1406 r = r600_bc_add_alu(ctx->bc, &alu);
1407 if (r)
1408 return r;
1409 }
1410 return 0;
1411 }
1412
1413 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1414 {
1415 struct r600_bc_alu alu;
1416 int i, r;
1417
1418 for (i = 0; i < 4; i++) {
1419 memset(&alu, 0, sizeof(struct r600_bc_alu));
1420 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1422 alu.dst.chan = i;
1423 } else {
1424 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1425 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1426 alu.src[0].sel = ctx->temp_reg;
1427 alu.src[0].chan = i;
1428 }
1429 if (i == 3) {
1430 alu.last = 1;
1431 }
1432 r = r600_bc_add_alu(ctx->bc, &alu);
1433 if (r)
1434 return r;
1435 }
1436 return 0;
1437 }
1438
1439 static int tgsi_op3(struct r600_shader_ctx *ctx)
1440 {
1441 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1442 struct r600_bc_alu alu;
1443 int i, j, r;
1444 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1445
1446 for (i = 0; i < lasti + 1; i++) {
1447 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1448 continue;
1449
1450 memset(&alu, 0, sizeof(struct r600_bc_alu));
1451 alu.inst = ctx->inst_info->r600_opcode;
1452 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1453 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1454 }
1455
1456 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1457 alu.dst.chan = i;
1458 alu.dst.write = 1;
1459 alu.is_op3 = 1;
1460 if (i == lasti) {
1461 alu.last = 1;
1462 }
1463 r = r600_bc_add_alu(ctx->bc, &alu);
1464 if (r)
1465 return r;
1466 }
1467 return 0;
1468 }
1469
1470 static int tgsi_dp(struct r600_shader_ctx *ctx)
1471 {
1472 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1473 struct r600_bc_alu alu;
1474 int i, j, r;
1475
1476 for (i = 0; i < 4; i++) {
1477 memset(&alu, 0, sizeof(struct r600_bc_alu));
1478 alu.inst = ctx->inst_info->r600_opcode;
1479 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1480 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1481 }
1482
1483 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1484 alu.dst.chan = i;
1485 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1486 /* handle some special cases */
1487 switch (ctx->inst_info->tgsi_opcode) {
1488 case TGSI_OPCODE_DP2:
1489 if (i > 1) {
1490 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1491 alu.src[0].chan = alu.src[1].chan = 0;
1492 }
1493 break;
1494 case TGSI_OPCODE_DP3:
1495 if (i > 2) {
1496 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1497 alu.src[0].chan = alu.src[1].chan = 0;
1498 }
1499 break;
1500 case TGSI_OPCODE_DPH:
1501 if (i == 3) {
1502 alu.src[0].sel = V_SQ_ALU_SRC_1;
1503 alu.src[0].chan = 0;
1504 alu.src[0].neg = 0;
1505 }
1506 break;
1507 default:
1508 break;
1509 }
1510 if (i == 3) {
1511 alu.last = 1;
1512 }
1513 r = r600_bc_add_alu(ctx->bc, &alu);
1514 if (r)
1515 return r;
1516 }
1517 return 0;
1518 }
1519
1520 static int tgsi_tex(struct r600_shader_ctx *ctx)
1521 {
1522 static float one_point_five = 1.5f;
1523 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1524 struct r600_bc_tex tex;
1525 struct r600_bc_alu alu;
1526 unsigned src_gpr;
1527 int r, i;
1528 int opcode;
1529 /* Texture fetch instructions can only use gprs as source.
1530 * Also they cannot negate the source or take the absolute value */
1531 const boolean src_requires_loading =
1532 (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1533 inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
1534 ctx->src[0].neg || ctx->src[0].abs;
1535 boolean src_loaded = FALSE;
1536
1537 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1538
1539 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1540 /* Add perspective divide */
1541 memset(&alu, 0, sizeof(struct r600_bc_alu));
1542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1543 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1544
1545 alu.dst.sel = ctx->temp_reg;
1546 alu.dst.chan = 3;
1547 alu.last = 1;
1548 alu.dst.write = 1;
1549 r = r600_bc_add_alu(ctx->bc, &alu);
1550 if (r)
1551 return r;
1552
1553 for (i = 0; i < 3; i++) {
1554 memset(&alu, 0, sizeof(struct r600_bc_alu));
1555 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1556 alu.src[0].sel = ctx->temp_reg;
1557 alu.src[0].chan = 3;
1558 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1559 alu.dst.sel = ctx->temp_reg;
1560 alu.dst.chan = i;
1561 alu.dst.write = 1;
1562 r = r600_bc_add_alu(ctx->bc, &alu);
1563 if (r)
1564 return r;
1565 }
1566 memset(&alu, 0, sizeof(struct r600_bc_alu));
1567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1568 alu.src[0].sel = V_SQ_ALU_SRC_1;
1569 alu.src[0].chan = 0;
1570 alu.dst.sel = ctx->temp_reg;
1571 alu.dst.chan = 3;
1572 alu.last = 1;
1573 alu.dst.write = 1;
1574 r = r600_bc_add_alu(ctx->bc, &alu);
1575 if (r)
1576 return r;
1577 src_loaded = TRUE;
1578 src_gpr = ctx->temp_reg;
1579 }
1580
1581 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1582 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1583 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1584
1585 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1586 for (i = 0; i < 4; i++) {
1587 memset(&alu, 0, sizeof(struct r600_bc_alu));
1588 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1589 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1590 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1591 alu.dst.sel = ctx->temp_reg;
1592 alu.dst.chan = i;
1593 if (i == 3)
1594 alu.last = 1;
1595 alu.dst.write = 1;
1596 r = r600_bc_add_alu(ctx->bc, &alu);
1597 if (r)
1598 return r;
1599 }
1600
1601 /* tmp1.z = RCP_e(|tmp1.z|) */
1602 memset(&alu, 0, sizeof(struct r600_bc_alu));
1603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1604 alu.src[0].sel = ctx->temp_reg;
1605 alu.src[0].chan = 2;
1606 alu.src[0].abs = 1;
1607 alu.dst.sel = ctx->temp_reg;
1608 alu.dst.chan = 2;
1609 alu.dst.write = 1;
1610 alu.last = 1;
1611 r = r600_bc_add_alu(ctx->bc, &alu);
1612 if (r)
1613 return r;
1614
1615 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1616 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1617 * muladd has no writemask, have to use another temp
1618 */
1619 memset(&alu, 0, sizeof(struct r600_bc_alu));
1620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1621 alu.is_op3 = 1;
1622
1623 alu.src[0].sel = ctx->temp_reg;
1624 alu.src[0].chan = 0;
1625 alu.src[1].sel = ctx->temp_reg;
1626 alu.src[1].chan = 2;
1627
1628 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1629 alu.src[2].chan = 0;
1630 alu.src[2].value = *(uint32_t *)&one_point_five;
1631
1632 alu.dst.sel = ctx->temp_reg;
1633 alu.dst.chan = 0;
1634 alu.dst.write = 1;
1635
1636 r = r600_bc_add_alu(ctx->bc, &alu);
1637 if (r)
1638 return r;
1639
1640 memset(&alu, 0, sizeof(struct r600_bc_alu));
1641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1642 alu.is_op3 = 1;
1643
1644 alu.src[0].sel = ctx->temp_reg;
1645 alu.src[0].chan = 1;
1646 alu.src[1].sel = ctx->temp_reg;
1647 alu.src[1].chan = 2;
1648
1649 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1650 alu.src[2].chan = 0;
1651 alu.src[2].value = *(uint32_t *)&one_point_five;
1652
1653 alu.dst.sel = ctx->temp_reg;
1654 alu.dst.chan = 1;
1655 alu.dst.write = 1;
1656
1657 alu.last = 1;
1658 r = r600_bc_add_alu(ctx->bc, &alu);
1659 if (r)
1660 return r;
1661
1662 src_loaded = TRUE;
1663 src_gpr = ctx->temp_reg;
1664 }
1665
1666 if (src_requires_loading && !src_loaded) {
1667 for (i = 0; i < 4; i++) {
1668 memset(&alu, 0, sizeof(struct r600_bc_alu));
1669 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1670 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1671 alu.dst.sel = ctx->temp_reg;
1672 alu.dst.chan = i;
1673 if (i == 3)
1674 alu.last = 1;
1675 alu.dst.write = 1;
1676 r = r600_bc_add_alu(ctx->bc, &alu);
1677 if (r)
1678 return r;
1679 }
1680 src_loaded = TRUE;
1681 src_gpr = ctx->temp_reg;
1682 }
1683
1684 opcode = ctx->inst_info->r600_opcode;
1685 if (opcode == SQ_TEX_INST_SAMPLE &&
1686 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1687 opcode = SQ_TEX_INST_SAMPLE_C;
1688
1689 memset(&tex, 0, sizeof(struct r600_bc_tex));
1690 tex.inst = opcode;
1691 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1692 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1693 tex.src_gpr = src_gpr;
1694 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1695 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1696 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1697 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1698 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1699 if (src_loaded) {
1700 tex.src_sel_x = 0;
1701 tex.src_sel_y = 1;
1702 tex.src_sel_z = 2;
1703 tex.src_sel_w = 3;
1704 } else {
1705 tex.src_sel_x = ctx->src[0].swizzle[0];
1706 tex.src_sel_y = ctx->src[0].swizzle[1];
1707 tex.src_sel_z = ctx->src[0].swizzle[2];
1708 tex.src_sel_w = ctx->src[0].swizzle[3];
1709 }
1710
1711 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1712 tex.src_sel_x = 1;
1713 tex.src_sel_y = 0;
1714 tex.src_sel_z = 3;
1715 tex.src_sel_w = 1;
1716 }
1717
1718 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1719 tex.coord_type_x = 1;
1720 tex.coord_type_y = 1;
1721 tex.coord_type_z = 1;
1722 tex.coord_type_w = 1;
1723 }
1724
1725 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1726 tex.coord_type_z = 0;
1727 tex.src_sel_z = tex.src_sel_y;
1728 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1729 tex.coord_type_z = 0;
1730
1731 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1732 tex.src_sel_w = tex.src_sel_z;
1733
1734 r = r600_bc_add_tex(ctx->bc, &tex);
1735 if (r)
1736 return r;
1737
1738 /* add shadow ambient support - gallium doesn't do it yet */
1739 return 0;
1740 }
1741
1742 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1743 {
1744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1745 struct r600_bc_alu alu;
1746 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1747 unsigned i;
1748 int r;
1749
1750 /* optimize if it's just an equal balance */
1751 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1752 for (i = 0; i < lasti + 1; i++) {
1753 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1754 continue;
1755
1756 memset(&alu, 0, sizeof(struct r600_bc_alu));
1757 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1758 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1759 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1760 alu.omod = 3;
1761 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1762 alu.dst.chan = i;
1763 if (i == lasti) {
1764 alu.last = 1;
1765 }
1766 r = r600_bc_add_alu(ctx->bc, &alu);
1767 if (r)
1768 return r;
1769 }
1770 return 0;
1771 }
1772
1773 /* 1 - src0 */
1774 for (i = 0; i < lasti + 1; i++) {
1775 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1776 continue;
1777
1778 memset(&alu, 0, sizeof(struct r600_bc_alu));
1779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1780 alu.src[0].sel = V_SQ_ALU_SRC_1;
1781 alu.src[0].chan = 0;
1782 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1783 alu.src[1].neg = 1;
1784 alu.dst.sel = ctx->temp_reg;
1785 alu.dst.chan = i;
1786 if (i == lasti) {
1787 alu.last = 1;
1788 }
1789 alu.dst.write = 1;
1790 r = r600_bc_add_alu(ctx->bc, &alu);
1791 if (r)
1792 return r;
1793 }
1794
1795 /* (1 - src0) * src2 */
1796 for (i = 0; i < lasti + 1; i++) {
1797 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1798 continue;
1799
1800 memset(&alu, 0, sizeof(struct r600_bc_alu));
1801 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1802 alu.src[0].sel = ctx->temp_reg;
1803 alu.src[0].chan = i;
1804 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1805 alu.dst.sel = ctx->temp_reg;
1806 alu.dst.chan = i;
1807 if (i == lasti) {
1808 alu.last = 1;
1809 }
1810 alu.dst.write = 1;
1811 r = r600_bc_add_alu(ctx->bc, &alu);
1812 if (r)
1813 return r;
1814 }
1815
1816 /* src0 * src1 + (1 - src0) * src2 */
1817 for (i = 0; i < lasti + 1; i++) {
1818 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1819 continue;
1820
1821 memset(&alu, 0, sizeof(struct r600_bc_alu));
1822 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1823 alu.is_op3 = 1;
1824 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1825 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1826 alu.src[2].sel = ctx->temp_reg;
1827 alu.src[2].chan = i;
1828
1829 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1830 alu.dst.chan = i;
1831 if (i == lasti) {
1832 alu.last = 1;
1833 }
1834 r = r600_bc_add_alu(ctx->bc, &alu);
1835 if (r)
1836 return r;
1837 }
1838 return 0;
1839 }
1840
1841 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1842 {
1843 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1844 struct r600_bc_alu alu;
1845 int i, r;
1846 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1847
1848 for (i = 0; i < lasti + 1; i++) {
1849 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1850 continue;
1851
1852 memset(&alu, 0, sizeof(struct r600_bc_alu));
1853 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1854 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1855 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1856 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1857 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1858 alu.dst.chan = i;
1859 alu.dst.write = 1;
1860 alu.is_op3 = 1;
1861 if (i == lasti)
1862 alu.last = 1;
1863 r = r600_bc_add_alu(ctx->bc, &alu);
1864 if (r)
1865 return r;
1866 }
1867 return 0;
1868 }
1869
1870 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1871 {
1872 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1873 static const unsigned int src0_swizzle[] = {2, 0, 1};
1874 static const unsigned int src1_swizzle[] = {1, 2, 0};
1875 struct r600_bc_alu alu;
1876 uint32_t use_temp = 0;
1877 int i, r;
1878
1879 if (inst->Dst[0].Register.WriteMask != 0xf)
1880 use_temp = 1;
1881
1882 for (i = 0; i < 4; i++) {
1883 memset(&alu, 0, sizeof(struct r600_bc_alu));
1884 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1885 if (i < 3) {
1886 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1887 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1888 } else {
1889 alu.src[0].sel = V_SQ_ALU_SRC_0;
1890 alu.src[0].chan = i;
1891 alu.src[1].sel = V_SQ_ALU_SRC_0;
1892 alu.src[1].chan = i;
1893 }
1894
1895 alu.dst.sel = ctx->temp_reg;
1896 alu.dst.chan = i;
1897 alu.dst.write = 1;
1898
1899 if (i == 3)
1900 alu.last = 1;
1901 r = r600_bc_add_alu(ctx->bc, &alu);
1902 if (r)
1903 return r;
1904 }
1905
1906 for (i = 0; i < 4; i++) {
1907 memset(&alu, 0, sizeof(struct r600_bc_alu));
1908 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1909
1910 if (i < 3) {
1911 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1912 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1913 } else {
1914 alu.src[0].sel = V_SQ_ALU_SRC_0;
1915 alu.src[0].chan = i;
1916 alu.src[1].sel = V_SQ_ALU_SRC_0;
1917 alu.src[1].chan = i;
1918 }
1919
1920 alu.src[2].sel = ctx->temp_reg;
1921 alu.src[2].neg = 1;
1922 alu.src[2].chan = i;
1923
1924 if (use_temp)
1925 alu.dst.sel = ctx->temp_reg;
1926 else
1927 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1928 alu.dst.chan = i;
1929 alu.dst.write = 1;
1930 alu.is_op3 = 1;
1931 if (i == 3)
1932 alu.last = 1;
1933 r = r600_bc_add_alu(ctx->bc, &alu);
1934 if (r)
1935 return r;
1936 }
1937 if (use_temp)
1938 return tgsi_helper_copy(ctx, inst);
1939 return 0;
1940 }
1941
1942 static int tgsi_exp(struct r600_shader_ctx *ctx)
1943 {
1944 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1945 struct r600_bc_alu alu;
1946 int r;
1947
1948 /* result.x = 2^floor(src); */
1949 if (inst->Dst[0].Register.WriteMask & 1) {
1950 memset(&alu, 0, sizeof(struct r600_bc_alu));
1951
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1953 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1954
1955 alu.dst.sel = ctx->temp_reg;
1956 alu.dst.chan = 0;
1957 alu.dst.write = 1;
1958 alu.last = 1;
1959 r = r600_bc_add_alu(ctx->bc, &alu);
1960 if (r)
1961 return r;
1962
1963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1964 alu.src[0].sel = ctx->temp_reg;
1965 alu.src[0].chan = 0;
1966
1967 alu.dst.sel = ctx->temp_reg;
1968 alu.dst.chan = 0;
1969 alu.dst.write = 1;
1970 alu.last = 1;
1971 r = r600_bc_add_alu(ctx->bc, &alu);
1972 if (r)
1973 return r;
1974 }
1975
1976 /* result.y = tmp - floor(tmp); */
1977 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1978 memset(&alu, 0, sizeof(struct r600_bc_alu));
1979
1980 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1981 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1982
1983 alu.dst.sel = ctx->temp_reg;
1984 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1985 // if (r)
1986 // return r;
1987 alu.dst.write = 1;
1988 alu.dst.chan = 1;
1989
1990 alu.last = 1;
1991
1992 r = r600_bc_add_alu(ctx->bc, &alu);
1993 if (r)
1994 return r;
1995 }
1996
1997 /* result.z = RoughApprox2ToX(tmp);*/
1998 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1999 memset(&alu, 0, sizeof(struct r600_bc_alu));
2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2001 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2002
2003 alu.dst.sel = ctx->temp_reg;
2004 alu.dst.write = 1;
2005 alu.dst.chan = 2;
2006
2007 alu.last = 1;
2008
2009 r = r600_bc_add_alu(ctx->bc, &alu);
2010 if (r)
2011 return r;
2012 }
2013
2014 /* result.w = 1.0;*/
2015 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2016 memset(&alu, 0, sizeof(struct r600_bc_alu));
2017
2018 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2019 alu.src[0].sel = V_SQ_ALU_SRC_1;
2020 alu.src[0].chan = 0;
2021
2022 alu.dst.sel = ctx->temp_reg;
2023 alu.dst.chan = 3;
2024 alu.dst.write = 1;
2025 alu.last = 1;
2026 r = r600_bc_add_alu(ctx->bc, &alu);
2027 if (r)
2028 return r;
2029 }
2030 return tgsi_helper_copy(ctx, inst);
2031 }
2032
2033 static int tgsi_log(struct r600_shader_ctx *ctx)
2034 {
2035 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2036 struct r600_bc_alu alu;
2037 int r;
2038
2039 /* result.x = floor(log2(src)); */
2040 if (inst->Dst[0].Register.WriteMask & 1) {
2041 memset(&alu, 0, sizeof(struct r600_bc_alu));
2042
2043 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2044 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2045
2046 alu.dst.sel = ctx->temp_reg;
2047 alu.dst.chan = 0;
2048 alu.dst.write = 1;
2049 alu.last = 1;
2050 r = r600_bc_add_alu(ctx->bc, &alu);
2051 if (r)
2052 return r;
2053
2054 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2055 alu.src[0].sel = ctx->temp_reg;
2056 alu.src[0].chan = 0;
2057
2058 alu.dst.sel = ctx->temp_reg;
2059 alu.dst.chan = 0;
2060 alu.dst.write = 1;
2061 alu.last = 1;
2062
2063 r = r600_bc_add_alu(ctx->bc, &alu);
2064 if (r)
2065 return r;
2066 }
2067
2068 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2069 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2070 memset(&alu, 0, sizeof(struct r600_bc_alu));
2071
2072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2073 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2074
2075 alu.dst.sel = ctx->temp_reg;
2076 alu.dst.chan = 1;
2077 alu.dst.write = 1;
2078 alu.last = 1;
2079
2080 r = r600_bc_add_alu(ctx->bc, &alu);
2081 if (r)
2082 return r;
2083
2084 memset(&alu, 0, sizeof(struct r600_bc_alu));
2085
2086 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2087 alu.src[0].sel = ctx->temp_reg;
2088 alu.src[0].chan = 1;
2089
2090 alu.dst.sel = ctx->temp_reg;
2091 alu.dst.chan = 1;
2092 alu.dst.write = 1;
2093 alu.last = 1;
2094
2095 r = r600_bc_add_alu(ctx->bc, &alu);
2096 if (r)
2097 return r;
2098
2099 memset(&alu, 0, sizeof(struct r600_bc_alu));
2100
2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2102 alu.src[0].sel = ctx->temp_reg;
2103 alu.src[0].chan = 1;
2104
2105 alu.dst.sel = ctx->temp_reg;
2106 alu.dst.chan = 1;
2107 alu.dst.write = 1;
2108 alu.last = 1;
2109
2110 r = r600_bc_add_alu(ctx->bc, &alu);
2111 if (r)
2112 return r;
2113
2114 memset(&alu, 0, sizeof(struct r600_bc_alu));
2115
2116 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2117 alu.src[0].sel = ctx->temp_reg;
2118 alu.src[0].chan = 1;
2119
2120 alu.dst.sel = ctx->temp_reg;
2121 alu.dst.chan = 1;
2122 alu.dst.write = 1;
2123 alu.last = 1;
2124
2125 r = r600_bc_add_alu(ctx->bc, &alu);
2126 if (r)
2127 return r;
2128
2129 memset(&alu, 0, sizeof(struct r600_bc_alu));
2130
2131 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2132
2133 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2134
2135 alu.src[1].sel = ctx->temp_reg;
2136 alu.src[1].chan = 1;
2137
2138 alu.dst.sel = ctx->temp_reg;
2139 alu.dst.chan = 1;
2140 alu.dst.write = 1;
2141 alu.last = 1;
2142
2143 r = r600_bc_add_alu(ctx->bc, &alu);
2144 if (r)
2145 return r;
2146 }
2147
2148 /* result.z = log2(src);*/
2149 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2150 memset(&alu, 0, sizeof(struct r600_bc_alu));
2151
2152 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2153 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2154
2155 alu.dst.sel = ctx->temp_reg;
2156 alu.dst.write = 1;
2157 alu.dst.chan = 2;
2158 alu.last = 1;
2159
2160 r = r600_bc_add_alu(ctx->bc, &alu);
2161 if (r)
2162 return r;
2163 }
2164
2165 /* result.w = 1.0; */
2166 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2167 memset(&alu, 0, sizeof(struct r600_bc_alu));
2168
2169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2170 alu.src[0].sel = V_SQ_ALU_SRC_1;
2171 alu.src[0].chan = 0;
2172
2173 alu.dst.sel = ctx->temp_reg;
2174 alu.dst.chan = 3;
2175 alu.dst.write = 1;
2176 alu.last = 1;
2177
2178 r = r600_bc_add_alu(ctx->bc, &alu);
2179 if (r)
2180 return r;
2181 }
2182
2183 return tgsi_helper_copy(ctx, inst);
2184 }
2185
2186 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2187 {
2188 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2189 struct r600_bc_alu alu;
2190 int r;
2191
2192 memset(&alu, 0, sizeof(struct r600_bc_alu));
2193
2194 switch (inst->Instruction.Opcode) {
2195 case TGSI_OPCODE_ARL:
2196 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2197 break;
2198 case TGSI_OPCODE_ARR:
2199 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2200 break;
2201 default:
2202 assert(0);
2203 return -1;
2204 }
2205
2206 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2207 alu.last = 1;
2208 alu.dst.sel = ctx->ar_reg;
2209 alu.dst.write = 1;
2210 r = r600_bc_add_alu(ctx->bc, &alu);
2211 if (r)
2212 return r;
2213
2214 /* TODO: Note that the MOVA can be avoided if we never use AR for
2215 * indexing non-CB registers in the current ALU clause. Similarly, we
2216 * need to load AR from ar_reg again if we started a new clause
2217 * between ARL and AR usage. The easy way to do that is to remove
2218 * the MOVA here, and load it for the first AR access after ar_reg
2219 * has been modified in each clause. */
2220 memset(&alu, 0, sizeof(struct r600_bc_alu));
2221 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2222 alu.src[0].sel = ctx->ar_reg;
2223 alu.src[0].chan = 0;
2224 alu.last = 1;
2225 r = r600_bc_add_alu(ctx->bc, &alu);
2226 if (r)
2227 return r;
2228 return 0;
2229 }
2230 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2231 {
2232 /* TODO from r600c, ar values don't persist between clauses */
2233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2234 struct r600_bc_alu alu;
2235 int r;
2236
2237 switch (inst->Instruction.Opcode) {
2238 case TGSI_OPCODE_ARL:
2239 memset(&alu, 0, sizeof(alu));
2240 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2241 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2242 alu.dst.sel = ctx->ar_reg;
2243 alu.dst.write = 1;
2244 alu.last = 1;
2245
2246 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2247 return r;
2248
2249 memset(&alu, 0, sizeof(alu));
2250 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2251 alu.src[0].sel = ctx->ar_reg;
2252 alu.dst.sel = ctx->ar_reg;
2253 alu.dst.write = 1;
2254 alu.last = 1;
2255
2256 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2257 return r;
2258 break;
2259 case TGSI_OPCODE_ARR:
2260 memset(&alu, 0, sizeof(alu));
2261 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2262 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2263 alu.dst.sel = ctx->ar_reg;
2264 alu.dst.write = 1;
2265 alu.last = 1;
2266
2267 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2268 return r;
2269 break;
2270 default:
2271 assert(0);
2272 return -1;
2273 }
2274
2275 memset(&alu, 0, sizeof(alu));
2276 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2277 alu.src[0].sel = ctx->ar_reg;
2278 alu.last = 1;
2279
2280 r = r600_bc_add_alu(ctx->bc, &alu);
2281 if (r)
2282 return r;
2283 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2284 return 0;
2285 }
2286
2287 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2288 {
2289 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2290 struct r600_bc_alu alu;
2291 int i, r = 0;
2292
2293 for (i = 0; i < 4; i++) {
2294 memset(&alu, 0, sizeof(struct r600_bc_alu));
2295
2296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2297 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2298
2299 if (i == 0 || i == 3) {
2300 alu.src[0].sel = V_SQ_ALU_SRC_1;
2301 } else {
2302 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2303 }
2304
2305 if (i == 0 || i == 2) {
2306 alu.src[1].sel = V_SQ_ALU_SRC_1;
2307 } else {
2308 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2309 }
2310 if (i == 3)
2311 alu.last = 1;
2312 r = r600_bc_add_alu(ctx->bc, &alu);
2313 if (r)
2314 return r;
2315 }
2316 return 0;
2317 }
2318
2319 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2320 {
2321 struct r600_bc_alu alu;
2322 int r;
2323
2324 memset(&alu, 0, sizeof(struct r600_bc_alu));
2325 alu.inst = opcode;
2326 alu.predicate = 1;
2327
2328 alu.dst.sel = ctx->temp_reg;
2329 alu.dst.write = 1;
2330 alu.dst.chan = 0;
2331
2332 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2333 alu.src[1].sel = V_SQ_ALU_SRC_0;
2334 alu.src[1].chan = 0;
2335
2336 alu.last = 1;
2337
2338 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2339 if (r)
2340 return r;
2341 return 0;
2342 }
2343
2344 static int pops(struct r600_shader_ctx *ctx, int pops)
2345 {
2346 int alu_pop = 3;
2347 if (ctx->bc->cf_last) {
2348 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2349 alu_pop = 0;
2350 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2351 alu_pop = 1;
2352 }
2353 alu_pop += pops;
2354 if (alu_pop == 1) {
2355 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2356 ctx->bc->force_add_cf = 1;
2357 } else if (alu_pop == 2) {
2358 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2359 ctx->bc->force_add_cf = 1;
2360 } else {
2361 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2362 ctx->bc->cf_last->pop_count = pops;
2363 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2364 }
2365 return 0;
2366 }
2367
2368 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2369 {
2370 switch(reason) {
2371 case FC_PUSH_VPM:
2372 ctx->bc->callstack[ctx->bc->call_sp].current--;
2373 break;
2374 case FC_PUSH_WQM:
2375 case FC_LOOP:
2376 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2377 break;
2378 case FC_REP:
2379 /* TOODO : for 16 vp asic should -= 2; */
2380 ctx->bc->callstack[ctx->bc->call_sp].current --;
2381 break;
2382 }
2383 }
2384
2385 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2386 {
2387 if (check_max_only) {
2388 int diff;
2389 switch (reason) {
2390 case FC_PUSH_VPM:
2391 diff = 1;
2392 break;
2393 case FC_PUSH_WQM:
2394 diff = 4;
2395 break;
2396 default:
2397 assert(0);
2398 diff = 0;
2399 }
2400 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2401 ctx->bc->callstack[ctx->bc->call_sp].max) {
2402 ctx->bc->callstack[ctx->bc->call_sp].max =
2403 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2404 }
2405 return;
2406 }
2407 switch (reason) {
2408 case FC_PUSH_VPM:
2409 ctx->bc->callstack[ctx->bc->call_sp].current++;
2410 break;
2411 case FC_PUSH_WQM:
2412 case FC_LOOP:
2413 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2414 break;
2415 case FC_REP:
2416 ctx->bc->callstack[ctx->bc->call_sp].current++;
2417 break;
2418 }
2419
2420 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2421 ctx->bc->callstack[ctx->bc->call_sp].max) {
2422 ctx->bc->callstack[ctx->bc->call_sp].max =
2423 ctx->bc->callstack[ctx->bc->call_sp].current;
2424 }
2425 }
2426
2427 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2428 {
2429 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2430
2431 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2432 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2433 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2434 sp->num_mid++;
2435 }
2436
2437 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2438 {
2439 ctx->bc->fc_sp++;
2440 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2441 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2442 }
2443
2444 static void fc_poplevel(struct r600_shader_ctx *ctx)
2445 {
2446 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2447 if (sp->mid) {
2448 free(sp->mid);
2449 sp->mid = NULL;
2450 }
2451 sp->num_mid = 0;
2452 sp->start = NULL;
2453 sp->type = 0;
2454 ctx->bc->fc_sp--;
2455 }
2456
2457 #if 0
2458 static int emit_return(struct r600_shader_ctx *ctx)
2459 {
2460 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2461 return 0;
2462 }
2463
2464 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2465 {
2466
2467 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2468 ctx->bc->cf_last->pop_count = pops;
2469 /* TODO work out offset */
2470 return 0;
2471 }
2472
2473 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2474 {
2475 return 0;
2476 }
2477
2478 static void emit_testflag(struct r600_shader_ctx *ctx)
2479 {
2480
2481 }
2482
2483 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2484 {
2485 emit_testflag(ctx);
2486 emit_jump_to_offset(ctx, 1, 4);
2487 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2488 pops(ctx, ifidx + 1);
2489 emit_return(ctx);
2490 }
2491
2492 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2493 {
2494 emit_testflag(ctx);
2495
2496 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2497 ctx->bc->cf_last->pop_count = 1;
2498
2499 fc_set_mid(ctx, fc_sp);
2500
2501 pops(ctx, 1);
2502 }
2503 #endif
2504
2505 static int tgsi_if(struct r600_shader_ctx *ctx)
2506 {
2507 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2508
2509 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2510
2511 fc_pushlevel(ctx, FC_IF);
2512
2513 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2514 return 0;
2515 }
2516
2517 static int tgsi_else(struct r600_shader_ctx *ctx)
2518 {
2519 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2520 ctx->bc->cf_last->pop_count = 1;
2521
2522 fc_set_mid(ctx, ctx->bc->fc_sp);
2523 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2524 return 0;
2525 }
2526
2527 static int tgsi_endif(struct r600_shader_ctx *ctx)
2528 {
2529 pops(ctx, 1);
2530 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2531 R600_ERR("if/endif unbalanced in shader\n");
2532 return -1;
2533 }
2534
2535 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2536 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2537 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2538 } else {
2539 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2540 }
2541 fc_poplevel(ctx);
2542
2543 callstack_decrease_current(ctx, FC_PUSH_VPM);
2544 return 0;
2545 }
2546
2547 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2548 {
2549 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2550
2551 fc_pushlevel(ctx, FC_LOOP);
2552
2553 /* check stack depth */
2554 callstack_check_depth(ctx, FC_LOOP, 0);
2555 return 0;
2556 }
2557
2558 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2559 {
2560 int i;
2561
2562 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2563
2564 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2565 R600_ERR("loop/endloop in shader code are not paired.\n");
2566 return -EINVAL;
2567 }
2568
2569 /* fixup loop pointers - from r600isa
2570 LOOP END points to CF after LOOP START,
2571 LOOP START point to CF after LOOP END
2572 BRK/CONT point to LOOP END CF
2573 */
2574 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2575
2576 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2577
2578 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2579 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2580 }
2581 /* TODO add LOOPRET support */
2582 fc_poplevel(ctx);
2583 callstack_decrease_current(ctx, FC_LOOP);
2584 return 0;
2585 }
2586
2587 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2588 {
2589 unsigned int fscp;
2590
2591 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2592 {
2593 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2594 break;
2595 }
2596
2597 if (fscp == 0) {
2598 R600_ERR("Break not inside loop/endloop pair\n");
2599 return -EINVAL;
2600 }
2601
2602 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2603 ctx->bc->cf_last->pop_count = 1;
2604
2605 fc_set_mid(ctx, fscp);
2606
2607 pops(ctx, 1);
2608 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2609 return 0;
2610 }
2611
2612 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2613 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2614 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2615 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2616
2617 /* FIXME:
2618 * For state trackers other than OpenGL, we'll want to use
2619 * _RECIP_IEEE instead.
2620 */
2621 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2622
2623 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2624 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2625 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2626 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2627 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2628 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2629 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2630 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2631 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2632 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2633 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2634 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2635 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2636 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2637 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2638 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2639 /* gap */
2640 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2641 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2642 /* gap */
2643 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2644 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2645 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2646 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2647 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2648 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2649 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2650 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2651 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2652 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2653 /* gap */
2654 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2656 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2657 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2658 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2659 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2660 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2661 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2662 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2663 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2664 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2665 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2666 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2667 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2668 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2669 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2670 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2671 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2672 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2673 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2674 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2675 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2676 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2677 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2678 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2679 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2682 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2683 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2684 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2685 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2686 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2687 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2688 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2689 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2690 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2691 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2693 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2694 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2695 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2696 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2697 /* gap */
2698 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2699 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2700 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2701 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2702 /* gap */
2703 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2704 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2705 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2706 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2707 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2710 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2711 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2712 /* gap */
2713 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2714 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2715 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2716 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2717 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2718 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2719 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2722 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2725 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2727 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728 /* gap */
2729 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734 /* gap */
2735 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2738 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2739 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2741 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2743 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2744 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2745 /* gap */
2746 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2751 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2757 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2758 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2759 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2760 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2761 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2762 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2763 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774 };
2775
2776 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2777 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2778 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2779 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2780 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2781 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2782 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2783 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2784 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2785 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2786 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2787 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2788 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2789 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2790 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2791 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2792 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2793 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2794 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2795 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2796 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2797 /* gap */
2798 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2799 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2800 /* gap */
2801 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2802 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2803 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2804 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2805 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2806 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2807 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2808 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2809 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2810 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2811 /* gap */
2812 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2814 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2816 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2817 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2818 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2819 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2820 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2821 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2823 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2826 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2828 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2829 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2830 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2831 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2833 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2834 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2835 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2842 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2844 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2846 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2847 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2848 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2849 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2852 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2853 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2854 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2855 /* gap */
2856 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2859 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2860 /* gap */
2861 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2869 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870 /* gap */
2871 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2880 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2883 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2885 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 /* gap */
2887 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 /* gap */
2893 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2902 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2903 /* gap */
2904 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 };