Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 int r600_find_vs_semantic_index(struct r600_shader *vs,
38 struct r600_shader *ps, int id)
39 {
40 struct r600_shader_io *input = &ps->input[id];
41
42 for (int i = 0; i < vs->noutput; i++) {
43 if (input->name == vs->output[i].name &&
44 input->sid == vs->output[i].sid) {
45 return i - 1;
46 }
47 }
48 return 0;
49 }
50
51 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
52 {
53 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
54 struct r600_shader *rshader = &shader->shader;
55 void *ptr;
56
57 /* copy new shader */
58 if (shader->bo == NULL) {
59 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
60 if (shader->bo == NULL) {
61 return -ENOMEM;
62 }
63 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
64 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
65 r600_bo_unmap(rctx->radeon, shader->bo);
66 }
67 /* build state */
68 switch (rshader->processor_type) {
69 case TGSI_PROCESSOR_VERTEX:
70 if (rshader->family >= CHIP_CEDAR) {
71 evergreen_pipe_shader_vs(ctx, shader);
72 } else {
73 r600_pipe_shader_vs(ctx, shader);
74 }
75 break;
76 case TGSI_PROCESSOR_FRAGMENT:
77 if (rshader->family >= CHIP_CEDAR) {
78 evergreen_pipe_shader_ps(ctx, shader);
79 } else {
80 r600_pipe_shader_ps(ctx, shader);
81 }
82 break;
83 default:
84 return -EINVAL;
85 }
86 return 0;
87 }
88
89 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
90
91 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
92 {
93 static int dump_shaders = -1;
94 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
95 int r;
96
97 /* Would like some magic "get_bool_option_once" routine.
98 */
99 if (dump_shaders == -1)
100 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
101
102 if (dump_shaders) {
103 fprintf(stderr, "--------------------------------------------------------------\n");
104 tgsi_dump(tokens, 0);
105 }
106 shader->shader.family = r600_get_family(rctx->radeon);
107 r = r600_shader_from_tgsi(tokens, &shader->shader);
108 if (r) {
109 R600_ERR("translation from TGSI failed !\n");
110 return r;
111 }
112 r = r600_bc_build(&shader->shader.bc);
113 if (r) {
114 R600_ERR("building bytecode failed !\n");
115 return r;
116 }
117 if (dump_shaders) {
118 r600_bc_dump(&shader->shader.bc);
119 fprintf(stderr, "______________________________________________________________\n");
120 }
121 return r600_pipe_shader(ctx, shader);
122 }
123
124 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
125 {
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127
128 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
129 r600_bc_clear(&shader->shader.bc);
130 }
131
132 /*
133 * tgsi -> r600 shader
134 */
135 struct r600_shader_tgsi_instruction;
136
137 struct r600_shader_src {
138 unsigned sel;
139 unsigned swizzle[4];
140 unsigned neg;
141 unsigned abs;
142 unsigned rel;
143 uint32_t value[4];
144 };
145
146 struct r600_shader_ctx {
147 struct tgsi_shader_info info;
148 struct tgsi_parse_context parse;
149 const struct tgsi_token *tokens;
150 unsigned type;
151 unsigned file_offset[TGSI_FILE_COUNT];
152 unsigned temp_reg;
153 unsigned ar_reg;
154 struct r600_shader_tgsi_instruction *inst_info;
155 struct r600_bc *bc;
156 struct r600_shader *shader;
157 struct r600_shader_src src[3];
158 u32 *literals;
159 u32 nliterals;
160 u32 max_driver_temp_used;
161 /* needed for evergreen interpolation */
162 boolean input_centroid;
163 boolean input_linear;
164 boolean input_perspective;
165 int num_interp_gpr;
166 };
167
168 struct r600_shader_tgsi_instruction {
169 unsigned tgsi_opcode;
170 unsigned is_op3;
171 unsigned r600_opcode;
172 int (*process)(struct r600_shader_ctx *ctx);
173 };
174
175 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
176 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
177
178 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
179 {
180 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
181 int j;
182
183 if (i->Instruction.NumDstRegs > 1) {
184 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
185 return -EINVAL;
186 }
187 if (i->Instruction.Predicate) {
188 R600_ERR("predicate unsupported\n");
189 return -EINVAL;
190 }
191 #if 0
192 if (i->Instruction.Label) {
193 R600_ERR("label unsupported\n");
194 return -EINVAL;
195 }
196 #endif
197 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
198 if (i->Src[j].Register.Dimension) {
199 R600_ERR("unsupported src %d (dimension %d)\n", j,
200 i->Src[j].Register.Dimension);
201 return -EINVAL;
202 }
203 }
204 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
205 if (i->Dst[j].Register.Dimension) {
206 R600_ERR("unsupported dst (dimension)\n");
207 return -EINVAL;
208 }
209 }
210 return 0;
211 }
212
213 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
214 {
215 int i, r;
216 struct r600_bc_alu alu;
217 int gpr = 0, base_chan = 0;
218 int ij_index = 0;
219
220 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
221 ij_index = 0;
222 if (ctx->shader->input[input].centroid)
223 ij_index++;
224 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
225 ij_index = 0;
226 /* if we have perspective add one */
227 if (ctx->input_perspective) {
228 ij_index++;
229 /* if we have perspective centroid */
230 if (ctx->input_centroid)
231 ij_index++;
232 }
233 if (ctx->shader->input[input].centroid)
234 ij_index++;
235 }
236
237 /* work out gpr and base_chan from index */
238 gpr = ij_index / 2;
239 base_chan = (2 * (ij_index % 2)) + 1;
240
241 for (i = 0; i < 8; i++) {
242 memset(&alu, 0, sizeof(struct r600_bc_alu));
243
244 if (i < 4)
245 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
246 else
247 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
248
249 if ((i > 1) && (i < 6)) {
250 alu.dst.sel = ctx->shader->input[input].gpr;
251 alu.dst.write = 1;
252 }
253
254 alu.dst.chan = i % 4;
255
256 alu.src[0].sel = gpr;
257 alu.src[0].chan = (base_chan - (i % 2));
258
259 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
260
261 alu.bank_swizzle_force = SQ_ALU_VEC_210;
262 if ((i % 4) == 3)
263 alu.last = 1;
264 r = r600_bc_add_alu(ctx->bc, &alu);
265 if (r)
266 return r;
267 }
268 return 0;
269 }
270
271
272 static int tgsi_declaration(struct r600_shader_ctx *ctx)
273 {
274 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
275 unsigned i;
276 int r;
277
278 switch (d->Declaration.File) {
279 case TGSI_FILE_INPUT:
280 i = ctx->shader->ninput++;
281 ctx->shader->input[i].name = d->Semantic.Name;
282 ctx->shader->input[i].sid = d->Semantic.Index;
283 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
284 ctx->shader->input[i].centroid = d->Declaration.Centroid;
285 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
286 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
287 /* turn input into interpolate on EG */
288 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
289 if (ctx->shader->input[i].interpolate > 0) {
290 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
291 evergreen_interp_alu(ctx, i);
292 }
293 }
294 }
295 break;
296 case TGSI_FILE_OUTPUT:
297 i = ctx->shader->noutput++;
298 ctx->shader->output[i].name = d->Semantic.Name;
299 ctx->shader->output[i].sid = d->Semantic.Index;
300 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
301 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
302 break;
303 case TGSI_FILE_CONSTANT:
304 case TGSI_FILE_TEMPORARY:
305 case TGSI_FILE_SAMPLER:
306 case TGSI_FILE_ADDRESS:
307 break;
308
309 case TGSI_FILE_SYSTEM_VALUE:
310 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
311 struct r600_bc_alu alu;
312 memset(&alu, 0, sizeof(struct r600_bc_alu));
313
314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
315 alu.src[0].sel = 0;
316 alu.src[0].chan = 3;
317
318 alu.dst.sel = 0;
319 alu.dst.chan = 3;
320 alu.dst.write = 1;
321 alu.last = 1;
322
323 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
324 return r;
325 break;
326 }
327
328 default:
329 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
330 return -EINVAL;
331 }
332 return 0;
333 }
334
335 static int r600_get_temp(struct r600_shader_ctx *ctx)
336 {
337 return ctx->temp_reg + ctx->max_driver_temp_used++;
338 }
339
340 /*
341 * for evergreen we need to scan the shader to find the number of GPRs we need to
342 * reserve for interpolation.
343 *
344 * we need to know if we are going to emit
345 * any centroid inputs
346 * if perspective and linear are required
347 */
348 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
349 {
350 int i;
351 int num_baryc;
352
353 ctx->input_linear = FALSE;
354 ctx->input_perspective = FALSE;
355 ctx->input_centroid = FALSE;
356 ctx->num_interp_gpr = 1;
357
358 /* any centroid inputs */
359 for (i = 0; i < ctx->info.num_inputs; i++) {
360 /* skip position/face */
361 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
362 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
363 continue;
364 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
365 ctx->input_linear = TRUE;
366 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
367 ctx->input_perspective = TRUE;
368 if (ctx->info.input_centroid[i])
369 ctx->input_centroid = TRUE;
370 }
371
372 num_baryc = 0;
373 /* ignoring sample for now */
374 if (ctx->input_perspective)
375 num_baryc++;
376 if (ctx->input_linear)
377 num_baryc++;
378 if (ctx->input_centroid)
379 num_baryc *= 2;
380
381 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
382
383 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
384 return ctx->num_interp_gpr;
385 }
386
387 static void tgsi_src(struct r600_shader_ctx *ctx,
388 const struct tgsi_full_src_register *tgsi_src,
389 struct r600_shader_src *r600_src)
390 {
391 memset(r600_src, 0, sizeof(*r600_src));
392 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
393 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
394 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
395 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
396 r600_src->neg = tgsi_src->Register.Negate;
397 r600_src->abs = tgsi_src->Register.Absolute;
398
399 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
400 int index;
401 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
402 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
403 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
404
405 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
406 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
407 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
408 return;
409 }
410 index = tgsi_src->Register.Index;
411 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
412 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
413 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
414 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
415 r600_src->swizzle[0] = 3;
416 r600_src->swizzle[1] = 3;
417 r600_src->swizzle[2] = 3;
418 r600_src->swizzle[3] = 3;
419 r600_src->sel = 0;
420 } else {
421 if (tgsi_src->Register.Indirect)
422 r600_src->rel = V_SQ_REL_RELATIVE;
423 r600_src->sel = tgsi_src->Register.Index;
424 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
425 }
426 }
427
428 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
429 {
430 struct r600_bc_vtx vtx;
431 unsigned int ar_reg;
432 int r;
433
434 if (offset) {
435 struct r600_bc_alu alu;
436
437 memset(&alu, 0, sizeof(alu));
438
439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
440 alu.src[0].sel = ctx->ar_reg;
441
442 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
443 alu.src[1].value = offset;
444
445 alu.dst.sel = dst_reg;
446 alu.dst.write = 1;
447 alu.last = 1;
448
449 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
450 return r;
451
452 ar_reg = dst_reg;
453 } else {
454 ar_reg = ctx->ar_reg;
455 }
456
457 memset(&vtx, 0, sizeof(vtx));
458 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
459 vtx.src_gpr = ar_reg;
460 vtx.mega_fetch_count = 16;
461 vtx.dst_gpr = dst_reg;
462 vtx.dst_sel_x = 0; /* SEL_X */
463 vtx.dst_sel_y = 1; /* SEL_Y */
464 vtx.dst_sel_z = 2; /* SEL_Z */
465 vtx.dst_sel_w = 3; /* SEL_W */
466 vtx.data_format = FMT_32_32_32_32_FLOAT;
467 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
468 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
469 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
470
471 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
472 return r;
473
474 return 0;
475 }
476
477 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
478 {
479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
480 struct r600_bc_alu alu;
481 int i, j, k, nconst, r;
482
483 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
484 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
485 nconst++;
486 }
487 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
488 }
489 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
490 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
491 continue;
492 }
493
494 if (ctx->src[i].rel) {
495 int treg = r600_get_temp(ctx);
496 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
497 return r;
498
499 ctx->src[i].sel = treg;
500 ctx->src[i].rel = 0;
501 j--;
502 } else if (j > 0) {
503 int treg = r600_get_temp(ctx);
504 for (k = 0; k < 4; k++) {
505 memset(&alu, 0, sizeof(struct r600_bc_alu));
506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
507 alu.src[0].sel = ctx->src[i].sel;
508 alu.src[0].chan = k;
509 alu.src[0].rel = ctx->src[i].rel;
510 alu.dst.sel = treg;
511 alu.dst.chan = k;
512 alu.dst.write = 1;
513 if (k == 3)
514 alu.last = 1;
515 r = r600_bc_add_alu(ctx->bc, &alu);
516 if (r)
517 return r;
518 }
519 ctx->src[i].sel = treg;
520 ctx->src[i].rel =0;
521 j--;
522 }
523 }
524 return 0;
525 }
526
527 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
528 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
529 {
530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
531 struct r600_bc_alu alu;
532 int i, j, k, nliteral, r;
533
534 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
535 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
536 nliteral++;
537 }
538 }
539 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
540 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
541 int treg = r600_get_temp(ctx);
542 for (k = 0; k < 4; k++) {
543 memset(&alu, 0, sizeof(struct r600_bc_alu));
544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
545 alu.src[0].sel = ctx->src[i].sel;
546 alu.src[0].chan = k;
547 alu.src[0].value = ctx->src[i].value[k];
548 alu.dst.sel = treg;
549 alu.dst.chan = k;
550 alu.dst.write = 1;
551 if (k == 3)
552 alu.last = 1;
553 r = r600_bc_add_alu(ctx->bc, &alu);
554 if (r)
555 return r;
556 }
557 ctx->src[i].sel = treg;
558 j--;
559 }
560 }
561 return 0;
562 }
563
564 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
565 {
566 struct tgsi_full_immediate *immediate;
567 struct tgsi_full_property *property;
568 struct r600_shader_ctx ctx;
569 struct r600_bc_output output[32];
570 unsigned noutput;
571 unsigned opcode;
572 int i, r = 0, pos0;
573
574 ctx.bc = &shader->bc;
575 ctx.shader = shader;
576 r = r600_bc_init(ctx.bc, shader->family);
577 if (r)
578 return r;
579 ctx.tokens = tokens;
580 tgsi_scan_shader(tokens, &ctx.info);
581 tgsi_parse_init(&ctx.parse, tokens);
582 ctx.type = ctx.parse.FullHeader.Processor.Processor;
583 shader->processor_type = ctx.type;
584 ctx.bc->type = shader->processor_type;
585
586 /* register allocations */
587 /* Values [0,127] correspond to GPR[0..127].
588 * Values [128,159] correspond to constant buffer bank 0
589 * Values [160,191] correspond to constant buffer bank 1
590 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
591 * Values [256,287] correspond to constant buffer bank 2 (EG)
592 * Values [288,319] correspond to constant buffer bank 3 (EG)
593 * Other special values are shown in the list below.
594 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
595 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
596 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
597 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
598 * 248 SQ_ALU_SRC_0: special constant 0.0.
599 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
600 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
601 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
602 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
603 * 253 SQ_ALU_SRC_LITERAL: literal constant.
604 * 254 SQ_ALU_SRC_PV: previous vector result.
605 * 255 SQ_ALU_SRC_PS: previous scalar result.
606 */
607 for (i = 0; i < TGSI_FILE_COUNT; i++) {
608 ctx.file_offset[i] = 0;
609 }
610 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
611 ctx.file_offset[TGSI_FILE_INPUT] = 1;
612 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
613 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
614 } else {
615 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
616 }
617 }
618 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
619 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
620 }
621 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
622 ctx.info.file_count[TGSI_FILE_INPUT];
623 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
624 ctx.info.file_count[TGSI_FILE_OUTPUT];
625
626 /* Outside the GPR range. This will be translated to one of the
627 * kcache banks later. */
628 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
629
630 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
631 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
632 ctx.info.file_count[TGSI_FILE_TEMPORARY];
633 ctx.temp_reg = ctx.ar_reg + 1;
634
635 ctx.nliterals = 0;
636 ctx.literals = NULL;
637 shader->fs_write_all = FALSE;
638 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
639 tgsi_parse_token(&ctx.parse);
640 switch (ctx.parse.FullToken.Token.Type) {
641 case TGSI_TOKEN_TYPE_IMMEDIATE:
642 immediate = &ctx.parse.FullToken.FullImmediate;
643 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
644 if(ctx.literals == NULL) {
645 r = -ENOMEM;
646 goto out_err;
647 }
648 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
649 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
650 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
651 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
652 ctx.nliterals++;
653 break;
654 case TGSI_TOKEN_TYPE_DECLARATION:
655 r = tgsi_declaration(&ctx);
656 if (r)
657 goto out_err;
658 break;
659 case TGSI_TOKEN_TYPE_INSTRUCTION:
660 r = tgsi_is_supported(&ctx);
661 if (r)
662 goto out_err;
663 ctx.max_driver_temp_used = 0;
664 /* reserve first tmp for everyone */
665 r600_get_temp(&ctx);
666
667 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
668 if ((r = tgsi_split_constant(&ctx)))
669 goto out_err;
670 if ((r = tgsi_split_literal_constant(&ctx)))
671 goto out_err;
672 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
673 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
674 else
675 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
676 r = ctx.inst_info->process(&ctx);
677 if (r)
678 goto out_err;
679 break;
680 case TGSI_TOKEN_TYPE_PROPERTY:
681 property = &ctx.parse.FullToken.FullProperty;
682 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
683 if (property->u[0].Data == 1)
684 shader->fs_write_all = TRUE;
685 }
686 break;
687 default:
688 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
689 r = -EINVAL;
690 goto out_err;
691 }
692 }
693 /* export output */
694 noutput = shader->noutput;
695 for (i = 0, pos0 = 0; i < noutput; i++) {
696 memset(&output[i], 0, sizeof(struct r600_bc_output));
697 output[i].gpr = shader->output[i].gpr;
698 output[i].elem_size = 3;
699 output[i].swizzle_x = 0;
700 output[i].swizzle_y = 1;
701 output[i].swizzle_z = 2;
702 output[i].swizzle_w = 3;
703 output[i].burst_count = 1;
704 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
705 output[i].array_base = i - pos0;
706 switch (ctx.type) {
707 case TGSI_PROCESSOR_VERTEX:
708 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
709 output[i].array_base = 60;
710 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
711 /* position doesn't count in array_base */
712 pos0++;
713 }
714 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
715 output[i].array_base = 61;
716 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
717 /* position doesn't count in array_base */
718 pos0++;
719 }
720 break;
721 case TGSI_PROCESSOR_FRAGMENT:
722 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
723 output[i].array_base = shader->output[i].sid;
724 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
725 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
726 output[i].array_base = 61;
727 output[i].swizzle_x = 2;
728 output[i].swizzle_y = 7;
729 output[i].swizzle_z = output[i].swizzle_w = 7;
730 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
731 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
732 output[i].array_base = 61;
733 output[i].swizzle_x = 7;
734 output[i].swizzle_y = 1;
735 output[i].swizzle_z = output[i].swizzle_w = 7;
736 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
737 } else {
738 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
739 r = -EINVAL;
740 goto out_err;
741 }
742 break;
743 default:
744 R600_ERR("unsupported processor type %d\n", ctx.type);
745 r = -EINVAL;
746 goto out_err;
747 }
748 }
749 /* add fake param output for vertex shader if no param is exported */
750 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
751 for (i = 0, pos0 = 0; i < noutput; i++) {
752 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
753 pos0 = 1;
754 break;
755 }
756 }
757 if (!pos0) {
758 memset(&output[i], 0, sizeof(struct r600_bc_output));
759 output[i].gpr = 0;
760 output[i].elem_size = 3;
761 output[i].swizzle_x = 0;
762 output[i].swizzle_y = 1;
763 output[i].swizzle_z = 2;
764 output[i].swizzle_w = 3;
765 output[i].burst_count = 1;
766 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
767 output[i].array_base = 0;
768 noutput++;
769 }
770 }
771 /* add fake pixel export */
772 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
773 memset(&output[0], 0, sizeof(struct r600_bc_output));
774 output[0].gpr = 0;
775 output[0].elem_size = 3;
776 output[0].swizzle_x = 7;
777 output[0].swizzle_y = 7;
778 output[0].swizzle_z = 7;
779 output[0].swizzle_w = 7;
780 output[0].burst_count = 1;
781 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
782 output[0].array_base = 0;
783 noutput++;
784 }
785 /* add output to bytecode */
786 for (i = 0; i < noutput; i++) {
787 r = r600_bc_add_output(ctx.bc, &output[i]);
788 if (r)
789 goto out_err;
790 }
791 free(ctx.literals);
792 tgsi_parse_free(&ctx.parse);
793 return 0;
794 out_err:
795 free(ctx.literals);
796 tgsi_parse_free(&ctx.parse);
797 return r;
798 }
799
800 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
801 {
802 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
803 return -EINVAL;
804 }
805
806 static int tgsi_end(struct r600_shader_ctx *ctx)
807 {
808 return 0;
809 }
810
811 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
812 const struct r600_shader_src *shader_src,
813 unsigned chan)
814 {
815 bc_src->sel = shader_src->sel;
816 bc_src->chan = shader_src->swizzle[chan];
817 bc_src->neg = shader_src->neg;
818 bc_src->abs = shader_src->abs;
819 bc_src->rel = shader_src->rel;
820 bc_src->value = shader_src->value[bc_src->chan];
821 }
822
823 static void tgsi_dst(struct r600_shader_ctx *ctx,
824 const struct tgsi_full_dst_register *tgsi_dst,
825 unsigned swizzle,
826 struct r600_bc_alu_dst *r600_dst)
827 {
828 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
829
830 r600_dst->sel = tgsi_dst->Register.Index;
831 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
832 r600_dst->chan = swizzle;
833 r600_dst->write = 1;
834 if (tgsi_dst->Register.Indirect)
835 r600_dst->rel = V_SQ_REL_RELATIVE;
836 if (inst->Instruction.Saturate) {
837 r600_dst->clamp = 1;
838 }
839 }
840
841 static int tgsi_last_instruction(unsigned writemask)
842 {
843 int i, lasti = 0;
844
845 for (i = 0; i < 4; i++) {
846 if (writemask & (1 << i)) {
847 lasti = i;
848 }
849 }
850 return lasti;
851 }
852
853 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
854 {
855 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
856 struct r600_bc_alu alu;
857 int i, j, r;
858 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
859
860 for (i = 0; i < lasti + 1; i++) {
861 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
862 continue;
863
864 memset(&alu, 0, sizeof(struct r600_bc_alu));
865 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
866
867 alu.inst = ctx->inst_info->r600_opcode;
868 if (!swap) {
869 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
870 r600_bc_src(&alu.src[j], &ctx->src[j], i);
871 }
872 } else {
873 r600_bc_src(&alu.src[0], &ctx->src[1], i);
874 r600_bc_src(&alu.src[1], &ctx->src[0], i);
875 }
876 /* handle some special cases */
877 switch (ctx->inst_info->tgsi_opcode) {
878 case TGSI_OPCODE_SUB:
879 alu.src[1].neg = 1;
880 break;
881 case TGSI_OPCODE_ABS:
882 alu.src[0].abs = 1;
883 break;
884 default:
885 break;
886 }
887 if (i == lasti) {
888 alu.last = 1;
889 }
890 r = r600_bc_add_alu(ctx->bc, &alu);
891 if (r)
892 return r;
893 }
894 return 0;
895 }
896
897 static int tgsi_op2(struct r600_shader_ctx *ctx)
898 {
899 return tgsi_op2_s(ctx, 0);
900 }
901
902 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
903 {
904 return tgsi_op2_s(ctx, 1);
905 }
906
907 /*
908 * r600 - trunc to -PI..PI range
909 * r700 - normalize by dividing by 2PI
910 * see fdo bug 27901
911 */
912 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
913 {
914 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
915 static float double_pi = 3.1415926535 * 2;
916 static float neg_pi = -3.1415926535;
917
918 int r;
919 struct r600_bc_alu alu;
920
921 memset(&alu, 0, sizeof(struct r600_bc_alu));
922 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
923 alu.is_op3 = 1;
924
925 alu.dst.chan = 0;
926 alu.dst.sel = ctx->temp_reg;
927 alu.dst.write = 1;
928
929 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
930
931 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
932 alu.src[1].chan = 0;
933 alu.src[1].value = *(uint32_t *)&half_inv_pi;
934 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
935 alu.src[2].chan = 0;
936 alu.last = 1;
937 r = r600_bc_add_alu(ctx->bc, &alu);
938 if (r)
939 return r;
940
941 memset(&alu, 0, sizeof(struct r600_bc_alu));
942 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
943
944 alu.dst.chan = 0;
945 alu.dst.sel = ctx->temp_reg;
946 alu.dst.write = 1;
947
948 alu.src[0].sel = ctx->temp_reg;
949 alu.src[0].chan = 0;
950 alu.last = 1;
951 r = r600_bc_add_alu(ctx->bc, &alu);
952 if (r)
953 return r;
954
955 memset(&alu, 0, sizeof(struct r600_bc_alu));
956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
957 alu.is_op3 = 1;
958
959 alu.dst.chan = 0;
960 alu.dst.sel = ctx->temp_reg;
961 alu.dst.write = 1;
962
963 alu.src[0].sel = ctx->temp_reg;
964 alu.src[0].chan = 0;
965
966 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
967 alu.src[1].chan = 0;
968 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
969 alu.src[2].chan = 0;
970
971 if (ctx->bc->chiprev == CHIPREV_R600) {
972 alu.src[1].value = *(uint32_t *)&double_pi;
973 alu.src[2].value = *(uint32_t *)&neg_pi;
974 } else {
975 alu.src[1].sel = V_SQ_ALU_SRC_1;
976 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
977 alu.src[2].neg = 1;
978 }
979
980 alu.last = 1;
981 r = r600_bc_add_alu(ctx->bc, &alu);
982 if (r)
983 return r;
984 return 0;
985 }
986
987 static int tgsi_trig(struct r600_shader_ctx *ctx)
988 {
989 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
990 struct r600_bc_alu alu;
991 int i, r;
992 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
993
994 r = tgsi_setup_trig(ctx);
995 if (r)
996 return r;
997
998 memset(&alu, 0, sizeof(struct r600_bc_alu));
999 alu.inst = ctx->inst_info->r600_opcode;
1000 alu.dst.chan = 0;
1001 alu.dst.sel = ctx->temp_reg;
1002 alu.dst.write = 1;
1003
1004 alu.src[0].sel = ctx->temp_reg;
1005 alu.src[0].chan = 0;
1006 alu.last = 1;
1007 r = r600_bc_add_alu(ctx->bc, &alu);
1008 if (r)
1009 return r;
1010
1011 /* replicate result */
1012 for (i = 0; i < lasti + 1; i++) {
1013 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1014 continue;
1015
1016 memset(&alu, 0, sizeof(struct r600_bc_alu));
1017 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1018
1019 alu.src[0].sel = ctx->temp_reg;
1020 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1021 if (i == lasti)
1022 alu.last = 1;
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026 }
1027 return 0;
1028 }
1029
1030 static int tgsi_scs(struct r600_shader_ctx *ctx)
1031 {
1032 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1033 struct r600_bc_alu alu;
1034 int r;
1035
1036 /* We'll only need the trig stuff if we are going to write to the
1037 * X or Y components of the destination vector.
1038 */
1039 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1040 r = tgsi_setup_trig(ctx);
1041 if (r)
1042 return r;
1043 }
1044
1045 /* dst.x = COS */
1046 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1047 memset(&alu, 0, sizeof(struct r600_bc_alu));
1048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1049 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1050
1051 alu.src[0].sel = ctx->temp_reg;
1052 alu.src[0].chan = 0;
1053 alu.last = 1;
1054 r = r600_bc_add_alu(ctx->bc, &alu);
1055 if (r)
1056 return r;
1057 }
1058
1059 /* dst.y = SIN */
1060 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1061 memset(&alu, 0, sizeof(struct r600_bc_alu));
1062 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1063 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1064
1065 alu.src[0].sel = ctx->temp_reg;
1066 alu.src[0].chan = 0;
1067 alu.last = 1;
1068 r = r600_bc_add_alu(ctx->bc, &alu);
1069 if (r)
1070 return r;
1071 }
1072
1073 /* dst.z = 0.0; */
1074 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1075 memset(&alu, 0, sizeof(struct r600_bc_alu));
1076
1077 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1078
1079 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1080
1081 alu.src[0].sel = V_SQ_ALU_SRC_0;
1082 alu.src[0].chan = 0;
1083
1084 alu.last = 1;
1085
1086 r = r600_bc_add_alu(ctx->bc, &alu);
1087 if (r)
1088 return r;
1089 }
1090
1091 /* dst.w = 1.0; */
1092 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1093 memset(&alu, 0, sizeof(struct r600_bc_alu));
1094
1095 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1096
1097 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1098
1099 alu.src[0].sel = V_SQ_ALU_SRC_1;
1100 alu.src[0].chan = 0;
1101
1102 alu.last = 1;
1103
1104 r = r600_bc_add_alu(ctx->bc, &alu);
1105 if (r)
1106 return r;
1107 }
1108
1109 return 0;
1110 }
1111
1112 static int tgsi_kill(struct r600_shader_ctx *ctx)
1113 {
1114 struct r600_bc_alu alu;
1115 int i, r;
1116
1117 for (i = 0; i < 4; i++) {
1118 memset(&alu, 0, sizeof(struct r600_bc_alu));
1119 alu.inst = ctx->inst_info->r600_opcode;
1120
1121 alu.dst.chan = i;
1122
1123 alu.src[0].sel = V_SQ_ALU_SRC_0;
1124
1125 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1126 alu.src[1].sel = V_SQ_ALU_SRC_1;
1127 alu.src[1].neg = 1;
1128 } else {
1129 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1130 }
1131 if (i == 3) {
1132 alu.last = 1;
1133 }
1134 r = r600_bc_add_alu(ctx->bc, &alu);
1135 if (r)
1136 return r;
1137 }
1138
1139 /* kill must be last in ALU */
1140 ctx->bc->force_add_cf = 1;
1141 ctx->shader->uses_kill = TRUE;
1142 return 0;
1143 }
1144
1145 static int tgsi_lit(struct r600_shader_ctx *ctx)
1146 {
1147 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1148 struct r600_bc_alu alu;
1149 int r;
1150
1151 /* dst.x, <- 1.0 */
1152 memset(&alu, 0, sizeof(struct r600_bc_alu));
1153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1154 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1155 alu.src[0].chan = 0;
1156 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1157 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1158 r = r600_bc_add_alu(ctx->bc, &alu);
1159 if (r)
1160 return r;
1161
1162 /* dst.y = max(src.x, 0.0) */
1163 memset(&alu, 0, sizeof(struct r600_bc_alu));
1164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1165 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1166 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1167 alu.src[1].chan = 0;
1168 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1169 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1170 r = r600_bc_add_alu(ctx->bc, &alu);
1171 if (r)
1172 return r;
1173
1174 /* dst.w, <- 1.0 */
1175 memset(&alu, 0, sizeof(struct r600_bc_alu));
1176 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1177 alu.src[0].sel = V_SQ_ALU_SRC_1;
1178 alu.src[0].chan = 0;
1179 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1180 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1181 alu.last = 1;
1182 r = r600_bc_add_alu(ctx->bc, &alu);
1183 if (r)
1184 return r;
1185
1186 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1187 {
1188 int chan;
1189 int sel;
1190
1191 /* dst.z = log(src.y) */
1192 memset(&alu, 0, sizeof(struct r600_bc_alu));
1193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1194 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1195 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1196 alu.last = 1;
1197 r = r600_bc_add_alu(ctx->bc, &alu);
1198 if (r)
1199 return r;
1200
1201 chan = alu.dst.chan;
1202 sel = alu.dst.sel;
1203
1204 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1205 memset(&alu, 0, sizeof(struct r600_bc_alu));
1206 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1207 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1208 alu.src[1].sel = sel;
1209 alu.src[1].chan = chan;
1210
1211 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1212 alu.dst.sel = ctx->temp_reg;
1213 alu.dst.chan = 0;
1214 alu.dst.write = 1;
1215 alu.is_op3 = 1;
1216 alu.last = 1;
1217 r = r600_bc_add_alu(ctx->bc, &alu);
1218 if (r)
1219 return r;
1220
1221 /* dst.z = exp(tmp.x) */
1222 memset(&alu, 0, sizeof(struct r600_bc_alu));
1223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1224 alu.src[0].sel = ctx->temp_reg;
1225 alu.src[0].chan = 0;
1226 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1227 alu.last = 1;
1228 r = r600_bc_add_alu(ctx->bc, &alu);
1229 if (r)
1230 return r;
1231 }
1232 return 0;
1233 }
1234
1235 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1236 {
1237 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1238 struct r600_bc_alu alu;
1239 int i, r;
1240
1241 memset(&alu, 0, sizeof(struct r600_bc_alu));
1242
1243 /* FIXME:
1244 * For state trackers other than OpenGL, we'll want to use
1245 * _RECIPSQRT_IEEE instead.
1246 */
1247 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1248
1249 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1250 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1251 alu.src[i].abs = 1;
1252 }
1253 alu.dst.sel = ctx->temp_reg;
1254 alu.dst.write = 1;
1255 alu.last = 1;
1256 r = r600_bc_add_alu(ctx->bc, &alu);
1257 if (r)
1258 return r;
1259 /* replicate result */
1260 return tgsi_helper_tempx_replicate(ctx);
1261 }
1262
1263 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1264 {
1265 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1266 struct r600_bc_alu alu;
1267 int i, r;
1268
1269 for (i = 0; i < 4; i++) {
1270 memset(&alu, 0, sizeof(struct r600_bc_alu));
1271 alu.src[0].sel = ctx->temp_reg;
1272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1273 alu.dst.chan = i;
1274 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1275 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1276 if (i == 3)
1277 alu.last = 1;
1278 r = r600_bc_add_alu(ctx->bc, &alu);
1279 if (r)
1280 return r;
1281 }
1282 return 0;
1283 }
1284
1285 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1286 {
1287 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1288 struct r600_bc_alu alu;
1289 int i, r;
1290
1291 memset(&alu, 0, sizeof(struct r600_bc_alu));
1292 alu.inst = ctx->inst_info->r600_opcode;
1293 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1294 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1295 }
1296 alu.dst.sel = ctx->temp_reg;
1297 alu.dst.write = 1;
1298 alu.last = 1;
1299 r = r600_bc_add_alu(ctx->bc, &alu);
1300 if (r)
1301 return r;
1302 /* replicate result */
1303 return tgsi_helper_tempx_replicate(ctx);
1304 }
1305
1306 static int tgsi_pow(struct r600_shader_ctx *ctx)
1307 {
1308 struct r600_bc_alu alu;
1309 int r;
1310
1311 /* LOG2(a) */
1312 memset(&alu, 0, sizeof(struct r600_bc_alu));
1313 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1314 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1315 alu.dst.sel = ctx->temp_reg;
1316 alu.dst.write = 1;
1317 alu.last = 1;
1318 r = r600_bc_add_alu(ctx->bc, &alu);
1319 if (r)
1320 return r;
1321 /* b * LOG2(a) */
1322 memset(&alu, 0, sizeof(struct r600_bc_alu));
1323 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1324 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1325 alu.src[1].sel = ctx->temp_reg;
1326 alu.dst.sel = ctx->temp_reg;
1327 alu.dst.write = 1;
1328 alu.last = 1;
1329 r = r600_bc_add_alu(ctx->bc, &alu);
1330 if (r)
1331 return r;
1332 /* POW(a,b) = EXP2(b * LOG2(a))*/
1333 memset(&alu, 0, sizeof(struct r600_bc_alu));
1334 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1335 alu.src[0].sel = ctx->temp_reg;
1336 alu.dst.sel = ctx->temp_reg;
1337 alu.dst.write = 1;
1338 alu.last = 1;
1339 r = r600_bc_add_alu(ctx->bc, &alu);
1340 if (r)
1341 return r;
1342 return tgsi_helper_tempx_replicate(ctx);
1343 }
1344
1345 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1346 {
1347 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1348 struct r600_bc_alu alu;
1349 int i, r;
1350
1351 /* tmp = (src > 0 ? 1 : src) */
1352 for (i = 0; i < 4; i++) {
1353 memset(&alu, 0, sizeof(struct r600_bc_alu));
1354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1355 alu.is_op3 = 1;
1356
1357 alu.dst.sel = ctx->temp_reg;
1358 alu.dst.chan = i;
1359
1360 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1361 alu.src[1].sel = V_SQ_ALU_SRC_1;
1362 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1363
1364 if (i == 3)
1365 alu.last = 1;
1366 r = r600_bc_add_alu(ctx->bc, &alu);
1367 if (r)
1368 return r;
1369 }
1370
1371 /* dst = (-tmp > 0 ? -1 : tmp) */
1372 for (i = 0; i < 4; i++) {
1373 memset(&alu, 0, sizeof(struct r600_bc_alu));
1374 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1375 alu.is_op3 = 1;
1376 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1377
1378 alu.src[0].sel = ctx->temp_reg;
1379 alu.src[0].chan = i;
1380 alu.src[0].neg = 1;
1381
1382 alu.src[1].sel = V_SQ_ALU_SRC_1;
1383 alu.src[1].neg = 1;
1384
1385 alu.src[2].sel = ctx->temp_reg;
1386 alu.src[2].chan = i;
1387
1388 if (i == 3)
1389 alu.last = 1;
1390 r = r600_bc_add_alu(ctx->bc, &alu);
1391 if (r)
1392 return r;
1393 }
1394 return 0;
1395 }
1396
1397 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1398 {
1399 struct r600_bc_alu alu;
1400 int i, r;
1401
1402 for (i = 0; i < 4; i++) {
1403 memset(&alu, 0, sizeof(struct r600_bc_alu));
1404 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1406 alu.dst.chan = i;
1407 } else {
1408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1409 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1410 alu.src[0].sel = ctx->temp_reg;
1411 alu.src[0].chan = i;
1412 }
1413 if (i == 3) {
1414 alu.last = 1;
1415 }
1416 r = r600_bc_add_alu(ctx->bc, &alu);
1417 if (r)
1418 return r;
1419 }
1420 return 0;
1421 }
1422
1423 static int tgsi_op3(struct r600_shader_ctx *ctx)
1424 {
1425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1426 struct r600_bc_alu alu;
1427 int i, j, r;
1428 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1429
1430 for (i = 0; i < lasti + 1; i++) {
1431 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1432 continue;
1433
1434 memset(&alu, 0, sizeof(struct r600_bc_alu));
1435 alu.inst = ctx->inst_info->r600_opcode;
1436 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1437 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1438 }
1439
1440 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1441 alu.dst.chan = i;
1442 alu.dst.write = 1;
1443 alu.is_op3 = 1;
1444 if (i == lasti) {
1445 alu.last = 1;
1446 }
1447 r = r600_bc_add_alu(ctx->bc, &alu);
1448 if (r)
1449 return r;
1450 }
1451 return 0;
1452 }
1453
1454 static int tgsi_dp(struct r600_shader_ctx *ctx)
1455 {
1456 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1457 struct r600_bc_alu alu;
1458 int i, j, r;
1459
1460 for (i = 0; i < 4; i++) {
1461 memset(&alu, 0, sizeof(struct r600_bc_alu));
1462 alu.inst = ctx->inst_info->r600_opcode;
1463 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1464 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1465 }
1466
1467 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1468 alu.dst.chan = i;
1469 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1470 /* handle some special cases */
1471 switch (ctx->inst_info->tgsi_opcode) {
1472 case TGSI_OPCODE_DP2:
1473 if (i > 1) {
1474 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1475 alu.src[0].chan = alu.src[1].chan = 0;
1476 }
1477 break;
1478 case TGSI_OPCODE_DP3:
1479 if (i > 2) {
1480 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1481 alu.src[0].chan = alu.src[1].chan = 0;
1482 }
1483 break;
1484 case TGSI_OPCODE_DPH:
1485 if (i == 3) {
1486 alu.src[0].sel = V_SQ_ALU_SRC_1;
1487 alu.src[0].chan = 0;
1488 alu.src[0].neg = 0;
1489 }
1490 break;
1491 default:
1492 break;
1493 }
1494 if (i == 3) {
1495 alu.last = 1;
1496 }
1497 r = r600_bc_add_alu(ctx->bc, &alu);
1498 if (r)
1499 return r;
1500 }
1501 return 0;
1502 }
1503
1504 static int tgsi_tex(struct r600_shader_ctx *ctx)
1505 {
1506 static float one_point_five = 1.5f;
1507 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1508 struct r600_bc_tex tex;
1509 struct r600_bc_alu alu;
1510 unsigned src_gpr;
1511 int r, i;
1512 int opcode;
1513 boolean src_not_temp =
1514 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1515 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1516
1517 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1518
1519 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1520 /* Add perspective divide */
1521 memset(&alu, 0, sizeof(struct r600_bc_alu));
1522 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1523 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1524
1525 alu.dst.sel = ctx->temp_reg;
1526 alu.dst.chan = 3;
1527 alu.last = 1;
1528 alu.dst.write = 1;
1529 r = r600_bc_add_alu(ctx->bc, &alu);
1530 if (r)
1531 return r;
1532
1533 for (i = 0; i < 3; i++) {
1534 memset(&alu, 0, sizeof(struct r600_bc_alu));
1535 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1536 alu.src[0].sel = ctx->temp_reg;
1537 alu.src[0].chan = 3;
1538 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1539 alu.dst.sel = ctx->temp_reg;
1540 alu.dst.chan = i;
1541 alu.dst.write = 1;
1542 r = r600_bc_add_alu(ctx->bc, &alu);
1543 if (r)
1544 return r;
1545 }
1546 memset(&alu, 0, sizeof(struct r600_bc_alu));
1547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1548 alu.src[0].sel = V_SQ_ALU_SRC_1;
1549 alu.src[0].chan = 0;
1550 alu.dst.sel = ctx->temp_reg;
1551 alu.dst.chan = 3;
1552 alu.last = 1;
1553 alu.dst.write = 1;
1554 r = r600_bc_add_alu(ctx->bc, &alu);
1555 if (r)
1556 return r;
1557 src_not_temp = FALSE;
1558 src_gpr = ctx->temp_reg;
1559 }
1560
1561 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1562 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1563 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1564
1565 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1566 for (i = 0; i < 4; i++) {
1567 memset(&alu, 0, sizeof(struct r600_bc_alu));
1568 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1569 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1570 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1571 alu.dst.sel = ctx->temp_reg;
1572 alu.dst.chan = i;
1573 if (i == 3)
1574 alu.last = 1;
1575 alu.dst.write = 1;
1576 r = r600_bc_add_alu(ctx->bc, &alu);
1577 if (r)
1578 return r;
1579 }
1580
1581 /* tmp1.z = RCP_e(|tmp1.z|) */
1582 memset(&alu, 0, sizeof(struct r600_bc_alu));
1583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1584 alu.src[0].sel = ctx->temp_reg;
1585 alu.src[0].chan = 2;
1586 alu.src[0].abs = 1;
1587 alu.dst.sel = ctx->temp_reg;
1588 alu.dst.chan = 2;
1589 alu.dst.write = 1;
1590 alu.last = 1;
1591 r = r600_bc_add_alu(ctx->bc, &alu);
1592 if (r)
1593 return r;
1594
1595 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1596 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1597 * muladd has no writemask, have to use another temp
1598 */
1599 memset(&alu, 0, sizeof(struct r600_bc_alu));
1600 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1601 alu.is_op3 = 1;
1602
1603 alu.src[0].sel = ctx->temp_reg;
1604 alu.src[0].chan = 0;
1605 alu.src[1].sel = ctx->temp_reg;
1606 alu.src[1].chan = 2;
1607
1608 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1609 alu.src[2].chan = 0;
1610 alu.src[2].value = *(uint32_t *)&one_point_five;
1611
1612 alu.dst.sel = ctx->temp_reg;
1613 alu.dst.chan = 0;
1614 alu.dst.write = 1;
1615
1616 r = r600_bc_add_alu(ctx->bc, &alu);
1617 if (r)
1618 return r;
1619
1620 memset(&alu, 0, sizeof(struct r600_bc_alu));
1621 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1622 alu.is_op3 = 1;
1623
1624 alu.src[0].sel = ctx->temp_reg;
1625 alu.src[0].chan = 1;
1626 alu.src[1].sel = ctx->temp_reg;
1627 alu.src[1].chan = 2;
1628
1629 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1630 alu.src[2].chan = 0;
1631 alu.src[2].value = *(uint32_t *)&one_point_five;
1632
1633 alu.dst.sel = ctx->temp_reg;
1634 alu.dst.chan = 1;
1635 alu.dst.write = 1;
1636
1637 alu.last = 1;
1638 r = r600_bc_add_alu(ctx->bc, &alu);
1639 if (r)
1640 return r;
1641
1642 src_not_temp = FALSE;
1643 src_gpr = ctx->temp_reg;
1644 }
1645
1646 if (src_not_temp) {
1647 for (i = 0; i < 4; i++) {
1648 memset(&alu, 0, sizeof(struct r600_bc_alu));
1649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1650 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1651 alu.dst.sel = ctx->temp_reg;
1652 alu.dst.chan = i;
1653 if (i == 3)
1654 alu.last = 1;
1655 alu.dst.write = 1;
1656 r = r600_bc_add_alu(ctx->bc, &alu);
1657 if (r)
1658 return r;
1659 }
1660 src_gpr = ctx->temp_reg;
1661 }
1662
1663 opcode = ctx->inst_info->r600_opcode;
1664 if (opcode == SQ_TEX_INST_SAMPLE &&
1665 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1666 opcode = SQ_TEX_INST_SAMPLE_C;
1667
1668 memset(&tex, 0, sizeof(struct r600_bc_tex));
1669 tex.inst = opcode;
1670 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1671 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1672 tex.src_gpr = src_gpr;
1673 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1674 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1675 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1676 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1677 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1678 tex.src_sel_x = 0;
1679 tex.src_sel_y = 1;
1680 tex.src_sel_z = 2;
1681 tex.src_sel_w = 3;
1682
1683 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1684 tex.src_sel_x = 1;
1685 tex.src_sel_y = 0;
1686 tex.src_sel_z = 3;
1687 tex.src_sel_w = 1;
1688 }
1689
1690 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1691 tex.coord_type_x = 1;
1692 tex.coord_type_y = 1;
1693 tex.coord_type_z = 1;
1694 tex.coord_type_w = 1;
1695 }
1696
1697 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1698 tex.coord_type_z = 0;
1699 tex.src_sel_z = 1;
1700 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1701 tex.coord_type_z = 0;
1702
1703 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1704 tex.src_sel_w = 2;
1705
1706 r = r600_bc_add_tex(ctx->bc, &tex);
1707 if (r)
1708 return r;
1709
1710 /* add shadow ambient support - gallium doesn't do it yet */
1711 return 0;
1712 }
1713
1714 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1715 {
1716 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1717 struct r600_bc_alu alu;
1718 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1719 unsigned i;
1720 int r;
1721
1722 /* optimize if it's just an equal balance */
1723 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1724 for (i = 0; i < lasti + 1; i++) {
1725 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1726 continue;
1727
1728 memset(&alu, 0, sizeof(struct r600_bc_alu));
1729 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1730 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1731 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1732 alu.omod = 3;
1733 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1734 alu.dst.chan = i;
1735 if (i == lasti) {
1736 alu.last = 1;
1737 }
1738 r = r600_bc_add_alu(ctx->bc, &alu);
1739 if (r)
1740 return r;
1741 }
1742 return 0;
1743 }
1744
1745 /* 1 - src0 */
1746 for (i = 0; i < lasti + 1; i++) {
1747 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1748 continue;
1749
1750 memset(&alu, 0, sizeof(struct r600_bc_alu));
1751 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1752 alu.src[0].sel = V_SQ_ALU_SRC_1;
1753 alu.src[0].chan = 0;
1754 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1755 alu.src[1].neg = 1;
1756 alu.dst.sel = ctx->temp_reg;
1757 alu.dst.chan = i;
1758 if (i == lasti) {
1759 alu.last = 1;
1760 }
1761 alu.dst.write = 1;
1762 r = r600_bc_add_alu(ctx->bc, &alu);
1763 if (r)
1764 return r;
1765 }
1766
1767 /* (1 - src0) * src2 */
1768 for (i = 0; i < lasti + 1; i++) {
1769 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1770 continue;
1771
1772 memset(&alu, 0, sizeof(struct r600_bc_alu));
1773 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1774 alu.src[0].sel = ctx->temp_reg;
1775 alu.src[0].chan = i;
1776 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1777 alu.dst.sel = ctx->temp_reg;
1778 alu.dst.chan = i;
1779 if (i == lasti) {
1780 alu.last = 1;
1781 }
1782 alu.dst.write = 1;
1783 r = r600_bc_add_alu(ctx->bc, &alu);
1784 if (r)
1785 return r;
1786 }
1787
1788 /* src0 * src1 + (1 - src0) * src2 */
1789 for (i = 0; i < lasti + 1; i++) {
1790 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1791 continue;
1792
1793 memset(&alu, 0, sizeof(struct r600_bc_alu));
1794 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1795 alu.is_op3 = 1;
1796 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1797 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1798 alu.src[2].sel = ctx->temp_reg;
1799 alu.src[2].chan = i;
1800
1801 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1802 alu.dst.chan = i;
1803 if (i == lasti) {
1804 alu.last = 1;
1805 }
1806 r = r600_bc_add_alu(ctx->bc, &alu);
1807 if (r)
1808 return r;
1809 }
1810 return 0;
1811 }
1812
1813 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1814 {
1815 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1816 struct r600_bc_alu alu;
1817 int i, r;
1818 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1819
1820 for (i = 0; i < lasti + 1; i++) {
1821 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1822 continue;
1823
1824 memset(&alu, 0, sizeof(struct r600_bc_alu));
1825 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1826 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1827 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1828 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1829 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1830 alu.dst.chan = i;
1831 alu.dst.write = 1;
1832 alu.is_op3 = 1;
1833 if (i == lasti)
1834 alu.last = 1;
1835 r = r600_bc_add_alu(ctx->bc, &alu);
1836 if (r)
1837 return r;
1838 }
1839 return 0;
1840 }
1841
1842 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1843 {
1844 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1845 static const unsigned int src0_swizzle[] = {2, 0, 1};
1846 static const unsigned int src1_swizzle[] = {1, 2, 0};
1847 struct r600_bc_alu alu;
1848 uint32_t use_temp = 0;
1849 int i, r;
1850
1851 if (inst->Dst[0].Register.WriteMask != 0xf)
1852 use_temp = 1;
1853
1854 for (i = 0; i < 4; i++) {
1855 memset(&alu, 0, sizeof(struct r600_bc_alu));
1856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1857 if (i < 3) {
1858 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1859 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1860 } else {
1861 alu.src[0].sel = V_SQ_ALU_SRC_0;
1862 alu.src[0].chan = i;
1863 alu.src[1].sel = V_SQ_ALU_SRC_0;
1864 alu.src[1].chan = i;
1865 }
1866
1867 alu.dst.sel = ctx->temp_reg;
1868 alu.dst.chan = i;
1869 alu.dst.write = 1;
1870
1871 if (i == 3)
1872 alu.last = 1;
1873 r = r600_bc_add_alu(ctx->bc, &alu);
1874 if (r)
1875 return r;
1876 }
1877
1878 for (i = 0; i < 4; i++) {
1879 memset(&alu, 0, sizeof(struct r600_bc_alu));
1880 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1881
1882 if (i < 3) {
1883 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1884 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1885 } else {
1886 alu.src[0].sel = V_SQ_ALU_SRC_0;
1887 alu.src[0].chan = i;
1888 alu.src[1].sel = V_SQ_ALU_SRC_0;
1889 alu.src[1].chan = i;
1890 }
1891
1892 alu.src[2].sel = ctx->temp_reg;
1893 alu.src[2].neg = 1;
1894 alu.src[2].chan = i;
1895
1896 if (use_temp)
1897 alu.dst.sel = ctx->temp_reg;
1898 else
1899 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1900 alu.dst.chan = i;
1901 alu.dst.write = 1;
1902 alu.is_op3 = 1;
1903 if (i == 3)
1904 alu.last = 1;
1905 r = r600_bc_add_alu(ctx->bc, &alu);
1906 if (r)
1907 return r;
1908 }
1909 if (use_temp)
1910 return tgsi_helper_copy(ctx, inst);
1911 return 0;
1912 }
1913
1914 static int tgsi_exp(struct r600_shader_ctx *ctx)
1915 {
1916 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1917 struct r600_bc_alu alu;
1918 int r;
1919
1920 /* result.x = 2^floor(src); */
1921 if (inst->Dst[0].Register.WriteMask & 1) {
1922 memset(&alu, 0, sizeof(struct r600_bc_alu));
1923
1924 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1925 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1926
1927 alu.dst.sel = ctx->temp_reg;
1928 alu.dst.chan = 0;
1929 alu.dst.write = 1;
1930 alu.last = 1;
1931 r = r600_bc_add_alu(ctx->bc, &alu);
1932 if (r)
1933 return r;
1934
1935 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1936 alu.src[0].sel = ctx->temp_reg;
1937 alu.src[0].chan = 0;
1938
1939 alu.dst.sel = ctx->temp_reg;
1940 alu.dst.chan = 0;
1941 alu.dst.write = 1;
1942 alu.last = 1;
1943 r = r600_bc_add_alu(ctx->bc, &alu);
1944 if (r)
1945 return r;
1946 }
1947
1948 /* result.y = tmp - floor(tmp); */
1949 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1950 memset(&alu, 0, sizeof(struct r600_bc_alu));
1951
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1953 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1954
1955 alu.dst.sel = ctx->temp_reg;
1956 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1957 // if (r)
1958 // return r;
1959 alu.dst.write = 1;
1960 alu.dst.chan = 1;
1961
1962 alu.last = 1;
1963
1964 r = r600_bc_add_alu(ctx->bc, &alu);
1965 if (r)
1966 return r;
1967 }
1968
1969 /* result.z = RoughApprox2ToX(tmp);*/
1970 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1971 memset(&alu, 0, sizeof(struct r600_bc_alu));
1972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1973 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1974
1975 alu.dst.sel = ctx->temp_reg;
1976 alu.dst.write = 1;
1977 alu.dst.chan = 2;
1978
1979 alu.last = 1;
1980
1981 r = r600_bc_add_alu(ctx->bc, &alu);
1982 if (r)
1983 return r;
1984 }
1985
1986 /* result.w = 1.0;*/
1987 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1988 memset(&alu, 0, sizeof(struct r600_bc_alu));
1989
1990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1991 alu.src[0].sel = V_SQ_ALU_SRC_1;
1992 alu.src[0].chan = 0;
1993
1994 alu.dst.sel = ctx->temp_reg;
1995 alu.dst.chan = 3;
1996 alu.dst.write = 1;
1997 alu.last = 1;
1998 r = r600_bc_add_alu(ctx->bc, &alu);
1999 if (r)
2000 return r;
2001 }
2002 return tgsi_helper_copy(ctx, inst);
2003 }
2004
2005 static int tgsi_log(struct r600_shader_ctx *ctx)
2006 {
2007 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2008 struct r600_bc_alu alu;
2009 int r;
2010
2011 /* result.x = floor(log2(src)); */
2012 if (inst->Dst[0].Register.WriteMask & 1) {
2013 memset(&alu, 0, sizeof(struct r600_bc_alu));
2014
2015 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2016 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2017
2018 alu.dst.sel = ctx->temp_reg;
2019 alu.dst.chan = 0;
2020 alu.dst.write = 1;
2021 alu.last = 1;
2022 r = r600_bc_add_alu(ctx->bc, &alu);
2023 if (r)
2024 return r;
2025
2026 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2027 alu.src[0].sel = ctx->temp_reg;
2028 alu.src[0].chan = 0;
2029
2030 alu.dst.sel = ctx->temp_reg;
2031 alu.dst.chan = 0;
2032 alu.dst.write = 1;
2033 alu.last = 1;
2034
2035 r = r600_bc_add_alu(ctx->bc, &alu);
2036 if (r)
2037 return r;
2038 }
2039
2040 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2041 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2042 memset(&alu, 0, sizeof(struct r600_bc_alu));
2043
2044 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2045 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2046
2047 alu.dst.sel = ctx->temp_reg;
2048 alu.dst.chan = 1;
2049 alu.dst.write = 1;
2050 alu.last = 1;
2051
2052 r = r600_bc_add_alu(ctx->bc, &alu);
2053 if (r)
2054 return r;
2055
2056 memset(&alu, 0, sizeof(struct r600_bc_alu));
2057
2058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2059 alu.src[0].sel = ctx->temp_reg;
2060 alu.src[0].chan = 1;
2061
2062 alu.dst.sel = ctx->temp_reg;
2063 alu.dst.chan = 1;
2064 alu.dst.write = 1;
2065 alu.last = 1;
2066
2067 r = r600_bc_add_alu(ctx->bc, &alu);
2068 if (r)
2069 return r;
2070
2071 memset(&alu, 0, sizeof(struct r600_bc_alu));
2072
2073 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2074 alu.src[0].sel = ctx->temp_reg;
2075 alu.src[0].chan = 1;
2076
2077 alu.dst.sel = ctx->temp_reg;
2078 alu.dst.chan = 1;
2079 alu.dst.write = 1;
2080 alu.last = 1;
2081
2082 r = r600_bc_add_alu(ctx->bc, &alu);
2083 if (r)
2084 return r;
2085
2086 memset(&alu, 0, sizeof(struct r600_bc_alu));
2087
2088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2089 alu.src[0].sel = ctx->temp_reg;
2090 alu.src[0].chan = 1;
2091
2092 alu.dst.sel = ctx->temp_reg;
2093 alu.dst.chan = 1;
2094 alu.dst.write = 1;
2095 alu.last = 1;
2096
2097 r = r600_bc_add_alu(ctx->bc, &alu);
2098 if (r)
2099 return r;
2100
2101 memset(&alu, 0, sizeof(struct r600_bc_alu));
2102
2103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2104
2105 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2106
2107 alu.src[1].sel = ctx->temp_reg;
2108 alu.src[1].chan = 1;
2109
2110 alu.dst.sel = ctx->temp_reg;
2111 alu.dst.chan = 1;
2112 alu.dst.write = 1;
2113 alu.last = 1;
2114
2115 r = r600_bc_add_alu(ctx->bc, &alu);
2116 if (r)
2117 return r;
2118 }
2119
2120 /* result.z = log2(src);*/
2121 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2122 memset(&alu, 0, sizeof(struct r600_bc_alu));
2123
2124 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2125 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2126
2127 alu.dst.sel = ctx->temp_reg;
2128 alu.dst.write = 1;
2129 alu.dst.chan = 2;
2130 alu.last = 1;
2131
2132 r = r600_bc_add_alu(ctx->bc, &alu);
2133 if (r)
2134 return r;
2135 }
2136
2137 /* result.w = 1.0; */
2138 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2139 memset(&alu, 0, sizeof(struct r600_bc_alu));
2140
2141 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2142 alu.src[0].sel = V_SQ_ALU_SRC_1;
2143 alu.src[0].chan = 0;
2144
2145 alu.dst.sel = ctx->temp_reg;
2146 alu.dst.chan = 3;
2147 alu.dst.write = 1;
2148 alu.last = 1;
2149
2150 r = r600_bc_add_alu(ctx->bc, &alu);
2151 if (r)
2152 return r;
2153 }
2154
2155 return tgsi_helper_copy(ctx, inst);
2156 }
2157
2158 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2159 {
2160 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2161 struct r600_bc_alu alu;
2162 int r;
2163
2164 memset(&alu, 0, sizeof(struct r600_bc_alu));
2165
2166 switch (inst->Instruction.Opcode) {
2167 case TGSI_OPCODE_ARL:
2168 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2169 break;
2170 case TGSI_OPCODE_ARR:
2171 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2172 break;
2173 default:
2174 assert(0);
2175 return -1;
2176 }
2177
2178 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2179 alu.last = 1;
2180 alu.dst.sel = ctx->ar_reg;
2181 alu.dst.write = 1;
2182 r = r600_bc_add_alu(ctx->bc, &alu);
2183 if (r)
2184 return r;
2185
2186 /* TODO: Note that the MOVA can be avoided if we never use AR for
2187 * indexing non-CB registers in the current ALU clause. Similarly, we
2188 * need to load AR from ar_reg again if we started a new clause
2189 * between ARL and AR usage. The easy way to do that is to remove
2190 * the MOVA here, and load it for the first AR access after ar_reg
2191 * has been modified in each clause. */
2192 memset(&alu, 0, sizeof(struct r600_bc_alu));
2193 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2194 alu.src[0].sel = ctx->ar_reg;
2195 alu.src[0].chan = 0;
2196 alu.last = 1;
2197 r = r600_bc_add_alu(ctx->bc, &alu);
2198 if (r)
2199 return r;
2200 return 0;
2201 }
2202 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2203 {
2204 /* TODO from r600c, ar values don't persist between clauses */
2205 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2206 struct r600_bc_alu alu;
2207 int r;
2208
2209 switch (inst->Instruction.Opcode) {
2210 case TGSI_OPCODE_ARL:
2211 memset(&alu, 0, sizeof(alu));
2212 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2213 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2214 alu.dst.sel = ctx->ar_reg;
2215 alu.dst.write = 1;
2216 alu.last = 1;
2217
2218 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2219 return r;
2220
2221 memset(&alu, 0, sizeof(alu));
2222 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2223 alu.src[0].sel = ctx->ar_reg;
2224 alu.dst.sel = ctx->ar_reg;
2225 alu.dst.write = 1;
2226 alu.last = 1;
2227
2228 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2229 return r;
2230 break;
2231 case TGSI_OPCODE_ARR:
2232 memset(&alu, 0, sizeof(alu));
2233 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2234 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2235 alu.dst.sel = ctx->ar_reg;
2236 alu.dst.write = 1;
2237 alu.last = 1;
2238
2239 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2240 return r;
2241 break;
2242 default:
2243 assert(0);
2244 return -1;
2245 }
2246
2247 memset(&alu, 0, sizeof(alu));
2248 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2249 alu.src[0].sel = ctx->ar_reg;
2250 alu.last = 1;
2251
2252 r = r600_bc_add_alu(ctx->bc, &alu);
2253 if (r)
2254 return r;
2255 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2256 return 0;
2257 }
2258
2259 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2260 {
2261 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2262 struct r600_bc_alu alu;
2263 int i, r = 0;
2264
2265 for (i = 0; i < 4; i++) {
2266 memset(&alu, 0, sizeof(struct r600_bc_alu));
2267
2268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2269 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2270
2271 if (i == 0 || i == 3) {
2272 alu.src[0].sel = V_SQ_ALU_SRC_1;
2273 } else {
2274 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2275 }
2276
2277 if (i == 0 || i == 2) {
2278 alu.src[1].sel = V_SQ_ALU_SRC_1;
2279 } else {
2280 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2281 }
2282 if (i == 3)
2283 alu.last = 1;
2284 r = r600_bc_add_alu(ctx->bc, &alu);
2285 if (r)
2286 return r;
2287 }
2288 return 0;
2289 }
2290
2291 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2292 {
2293 struct r600_bc_alu alu;
2294 int r;
2295
2296 memset(&alu, 0, sizeof(struct r600_bc_alu));
2297 alu.inst = opcode;
2298 alu.predicate = 1;
2299
2300 alu.dst.sel = ctx->temp_reg;
2301 alu.dst.write = 1;
2302 alu.dst.chan = 0;
2303
2304 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2305 alu.src[1].sel = V_SQ_ALU_SRC_0;
2306 alu.src[1].chan = 0;
2307
2308 alu.last = 1;
2309
2310 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2311 if (r)
2312 return r;
2313 return 0;
2314 }
2315
2316 static int pops(struct r600_shader_ctx *ctx, int pops)
2317 {
2318 int alu_pop = 3;
2319 if (ctx->bc->cf_last) {
2320 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2321 alu_pop = 0;
2322 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2323 alu_pop = 1;
2324 }
2325 alu_pop += pops;
2326 if (alu_pop == 1) {
2327 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2328 ctx->bc->force_add_cf = 1;
2329 } else if (alu_pop == 2) {
2330 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2331 ctx->bc->force_add_cf = 1;
2332 } else {
2333 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2334 ctx->bc->cf_last->pop_count = pops;
2335 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2336 }
2337 return 0;
2338 }
2339
2340 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2341 {
2342 switch(reason) {
2343 case FC_PUSH_VPM:
2344 ctx->bc->callstack[ctx->bc->call_sp].current--;
2345 break;
2346 case FC_PUSH_WQM:
2347 case FC_LOOP:
2348 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2349 break;
2350 case FC_REP:
2351 /* TOODO : for 16 vp asic should -= 2; */
2352 ctx->bc->callstack[ctx->bc->call_sp].current --;
2353 break;
2354 }
2355 }
2356
2357 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2358 {
2359 if (check_max_only) {
2360 int diff;
2361 switch (reason) {
2362 case FC_PUSH_VPM:
2363 diff = 1;
2364 break;
2365 case FC_PUSH_WQM:
2366 diff = 4;
2367 break;
2368 default:
2369 assert(0);
2370 diff = 0;
2371 }
2372 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2373 ctx->bc->callstack[ctx->bc->call_sp].max) {
2374 ctx->bc->callstack[ctx->bc->call_sp].max =
2375 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2376 }
2377 return;
2378 }
2379 switch (reason) {
2380 case FC_PUSH_VPM:
2381 ctx->bc->callstack[ctx->bc->call_sp].current++;
2382 break;
2383 case FC_PUSH_WQM:
2384 case FC_LOOP:
2385 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2386 break;
2387 case FC_REP:
2388 ctx->bc->callstack[ctx->bc->call_sp].current++;
2389 break;
2390 }
2391
2392 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2393 ctx->bc->callstack[ctx->bc->call_sp].max) {
2394 ctx->bc->callstack[ctx->bc->call_sp].max =
2395 ctx->bc->callstack[ctx->bc->call_sp].current;
2396 }
2397 }
2398
2399 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2400 {
2401 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2402
2403 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2404 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2405 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2406 sp->num_mid++;
2407 }
2408
2409 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2410 {
2411 ctx->bc->fc_sp++;
2412 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2413 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2414 }
2415
2416 static void fc_poplevel(struct r600_shader_ctx *ctx)
2417 {
2418 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2419 if (sp->mid) {
2420 free(sp->mid);
2421 sp->mid = NULL;
2422 }
2423 sp->num_mid = 0;
2424 sp->start = NULL;
2425 sp->type = 0;
2426 ctx->bc->fc_sp--;
2427 }
2428
2429 #if 0
2430 static int emit_return(struct r600_shader_ctx *ctx)
2431 {
2432 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2433 return 0;
2434 }
2435
2436 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2437 {
2438
2439 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2440 ctx->bc->cf_last->pop_count = pops;
2441 /* TODO work out offset */
2442 return 0;
2443 }
2444
2445 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2446 {
2447 return 0;
2448 }
2449
2450 static void emit_testflag(struct r600_shader_ctx *ctx)
2451 {
2452
2453 }
2454
2455 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2456 {
2457 emit_testflag(ctx);
2458 emit_jump_to_offset(ctx, 1, 4);
2459 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2460 pops(ctx, ifidx + 1);
2461 emit_return(ctx);
2462 }
2463
2464 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2465 {
2466 emit_testflag(ctx);
2467
2468 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2469 ctx->bc->cf_last->pop_count = 1;
2470
2471 fc_set_mid(ctx, fc_sp);
2472
2473 pops(ctx, 1);
2474 }
2475 #endif
2476
2477 static int tgsi_if(struct r600_shader_ctx *ctx)
2478 {
2479 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2480
2481 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2482
2483 fc_pushlevel(ctx, FC_IF);
2484
2485 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2486 return 0;
2487 }
2488
2489 static int tgsi_else(struct r600_shader_ctx *ctx)
2490 {
2491 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2492 ctx->bc->cf_last->pop_count = 1;
2493
2494 fc_set_mid(ctx, ctx->bc->fc_sp);
2495 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2496 return 0;
2497 }
2498
2499 static int tgsi_endif(struct r600_shader_ctx *ctx)
2500 {
2501 pops(ctx, 1);
2502 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2503 R600_ERR("if/endif unbalanced in shader\n");
2504 return -1;
2505 }
2506
2507 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2508 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2509 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2510 } else {
2511 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2512 }
2513 fc_poplevel(ctx);
2514
2515 callstack_decrease_current(ctx, FC_PUSH_VPM);
2516 return 0;
2517 }
2518
2519 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2520 {
2521 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2522
2523 fc_pushlevel(ctx, FC_LOOP);
2524
2525 /* check stack depth */
2526 callstack_check_depth(ctx, FC_LOOP, 0);
2527 return 0;
2528 }
2529
2530 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2531 {
2532 int i;
2533
2534 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2535
2536 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2537 R600_ERR("loop/endloop in shader code are not paired.\n");
2538 return -EINVAL;
2539 }
2540
2541 /* fixup loop pointers - from r600isa
2542 LOOP END points to CF after LOOP START,
2543 LOOP START point to CF after LOOP END
2544 BRK/CONT point to LOOP END CF
2545 */
2546 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2547
2548 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2549
2550 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2551 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2552 }
2553 /* TODO add LOOPRET support */
2554 fc_poplevel(ctx);
2555 callstack_decrease_current(ctx, FC_LOOP);
2556 return 0;
2557 }
2558
2559 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2560 {
2561 unsigned int fscp;
2562
2563 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2564 {
2565 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2566 break;
2567 }
2568
2569 if (fscp == 0) {
2570 R600_ERR("Break not inside loop/endloop pair\n");
2571 return -EINVAL;
2572 }
2573
2574 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2575 ctx->bc->cf_last->pop_count = 1;
2576
2577 fc_set_mid(ctx, fscp);
2578
2579 pops(ctx, 1);
2580 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2581 return 0;
2582 }
2583
2584 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2585 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2586 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2587 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2588
2589 /* FIXME:
2590 * For state trackers other than OpenGL, we'll want to use
2591 * _RECIP_IEEE instead.
2592 */
2593 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2594
2595 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2596 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2597 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2598 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2599 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2600 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2601 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2602 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2603 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2604 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2605 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2606 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2607 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2608 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2609 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2610 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2611 /* gap */
2612 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2613 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2614 /* gap */
2615 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2616 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2617 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2618 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2619 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2620 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2621 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2622 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2623 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2624 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2625 /* gap */
2626 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2627 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2628 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2629 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2630 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2631 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2632 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2633 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2634 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2635 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2636 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2637 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2638 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2639 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2640 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2641 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2642 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2643 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2644 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2645 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2646 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2647 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2648 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2649 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2650 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2651 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2652 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2653 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2654 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2656 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2657 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2658 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2659 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2660 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2661 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2662 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2663 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2664 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2665 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2666 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2667 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2668 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2669 /* gap */
2670 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2671 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2672 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2673 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2674 /* gap */
2675 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2676 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2677 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2678 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2679 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2682 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2683 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2684 /* gap */
2685 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2686 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2687 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2688 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2689 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2690 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2691 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2693 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2694 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2696 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2697 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2698 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2699 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2700 /* gap */
2701 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2702 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2703 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2704 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2705 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2706 /* gap */
2707 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2710 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2711 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2712 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2713 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2714 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2715 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2716 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2717 /* gap */
2718 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2719 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2722 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2725 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2727 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2729 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2738 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2739 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2741 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2743 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2745 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746 };
2747
2748 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2749 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2750 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2751 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2752 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2753 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2754 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2755 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2756 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2757 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2758 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2759 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2760 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2761 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2762 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2763 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2764 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2765 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2766 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2767 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2768 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769 /* gap */
2770 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772 /* gap */
2773 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2775 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2776 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2778 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2780 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2781 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2782 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2783 /* gap */
2784 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2786 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2788 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2789 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2790 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2791 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2792 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2793 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2794 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2795 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2796 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2797 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2798 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2799 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2800 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2801 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2802 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2803 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2804 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2805 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2806 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2807 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2808 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2809 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2812 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2814 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2817 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2818 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2819 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2820 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2821 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2823 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2824 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2825 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2826 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2827 /* gap */
2828 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2830 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2831 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2832 /* gap */
2833 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2834 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2841 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2842 /* gap */
2843 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2844 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2852 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2855 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2857 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858 /* gap */
2859 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864 /* gap */
2865 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2871 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2874 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2875 /* gap */
2876 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 };