r600g: Move r600_pipe_shader_ps() to r600_state.c.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 int r600_find_vs_semantic_index(struct r600_shader *vs,
38 struct r600_shader *ps, int id)
39 {
40 struct r600_shader_io *input = &ps->input[id];
41
42 for (int i = 0; i < vs->noutput; i++) {
43 if (input->name == vs->output[i].name &&
44 input->sid == vs->output[i].sid) {
45 return i - 1;
46 }
47 }
48 return 0;
49 }
50
51 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
52 {
53 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
54 struct r600_shader *rshader = &shader->shader;
55 void *ptr;
56
57 /* copy new shader */
58 if (shader->bo == NULL) {
59 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
60 if (shader->bo == NULL) {
61 return -ENOMEM;
62 }
63 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
64 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
65 r600_bo_unmap(rctx->radeon, shader->bo);
66 }
67 /* build state */
68 switch (rshader->processor_type) {
69 case TGSI_PROCESSOR_VERTEX:
70 if (rshader->family >= CHIP_CEDAR) {
71 evergreen_pipe_shader_vs(ctx, shader);
72 } else {
73 r600_pipe_shader_vs(ctx, shader);
74 }
75 break;
76 case TGSI_PROCESSOR_FRAGMENT:
77 if (rshader->family >= CHIP_CEDAR) {
78 evergreen_pipe_shader_ps(ctx, shader);
79 } else {
80 r600_pipe_shader_ps(ctx, shader);
81 }
82 break;
83 default:
84 return -EINVAL;
85 }
86 return 0;
87 }
88
89 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
90
91 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
92 {
93 static int dump_shaders = -1;
94 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
95 int r;
96
97 /* Would like some magic "get_bool_option_once" routine.
98 */
99 if (dump_shaders == -1)
100 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
101
102 if (dump_shaders) {
103 fprintf(stderr, "--------------------------------------------------------------\n");
104 tgsi_dump(tokens, 0);
105 }
106 shader->shader.family = r600_get_family(rctx->radeon);
107 r = r600_shader_from_tgsi(tokens, &shader->shader);
108 if (r) {
109 R600_ERR("translation from TGSI failed !\n");
110 return r;
111 }
112 r = r600_bc_build(&shader->shader.bc);
113 if (r) {
114 R600_ERR("building bytecode failed !\n");
115 return r;
116 }
117 if (dump_shaders) {
118 r600_bc_dump(&shader->shader.bc);
119 fprintf(stderr, "______________________________________________________________\n");
120 }
121 return r600_pipe_shader(ctx, shader);
122 }
123
124 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
125 {
126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127
128 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
129 r600_bc_clear(&shader->shader.bc);
130 }
131
132 /*
133 * tgsi -> r600 shader
134 */
135 struct r600_shader_tgsi_instruction;
136
137 struct r600_shader_src {
138 unsigned sel;
139 unsigned swizzle[4];
140 unsigned neg;
141 unsigned abs;
142 unsigned rel;
143 uint32_t value[4];
144 };
145
146 struct r600_shader_ctx {
147 struct tgsi_shader_info info;
148 struct tgsi_parse_context parse;
149 const struct tgsi_token *tokens;
150 unsigned type;
151 unsigned file_offset[TGSI_FILE_COUNT];
152 unsigned temp_reg;
153 unsigned ar_reg;
154 struct r600_shader_tgsi_instruction *inst_info;
155 struct r600_bc *bc;
156 struct r600_shader *shader;
157 struct r600_shader_src src[3];
158 u32 *literals;
159 u32 nliterals;
160 u32 max_driver_temp_used;
161 /* needed for evergreen interpolation */
162 boolean input_centroid;
163 boolean input_linear;
164 boolean input_perspective;
165 int num_interp_gpr;
166 };
167
168 struct r600_shader_tgsi_instruction {
169 unsigned tgsi_opcode;
170 unsigned is_op3;
171 unsigned r600_opcode;
172 int (*process)(struct r600_shader_ctx *ctx);
173 };
174
175 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
176 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
177
178 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
179 {
180 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
181 int j;
182
183 if (i->Instruction.NumDstRegs > 1) {
184 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
185 return -EINVAL;
186 }
187 if (i->Instruction.Predicate) {
188 R600_ERR("predicate unsupported\n");
189 return -EINVAL;
190 }
191 #if 0
192 if (i->Instruction.Label) {
193 R600_ERR("label unsupported\n");
194 return -EINVAL;
195 }
196 #endif
197 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
198 if (i->Src[j].Register.Dimension) {
199 R600_ERR("unsupported src %d (dimension %d)\n", j,
200 i->Src[j].Register.Dimension);
201 return -EINVAL;
202 }
203 }
204 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
205 if (i->Dst[j].Register.Dimension) {
206 R600_ERR("unsupported dst (dimension)\n");
207 return -EINVAL;
208 }
209 }
210 return 0;
211 }
212
213 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
214 {
215 int i, r;
216 struct r600_bc_alu alu;
217 int gpr = 0, base_chan = 0;
218 int ij_index = 0;
219
220 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
221 ij_index = 0;
222 if (ctx->shader->input[input].centroid)
223 ij_index++;
224 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
225 ij_index = 0;
226 /* if we have perspective add one */
227 if (ctx->input_perspective) {
228 ij_index++;
229 /* if we have perspective centroid */
230 if (ctx->input_centroid)
231 ij_index++;
232 }
233 if (ctx->shader->input[input].centroid)
234 ij_index++;
235 }
236
237 /* work out gpr and base_chan from index */
238 gpr = ij_index / 2;
239 base_chan = (2 * (ij_index % 2)) + 1;
240
241 for (i = 0; i < 8; i++) {
242 memset(&alu, 0, sizeof(struct r600_bc_alu));
243
244 if (i < 4)
245 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
246 else
247 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
248
249 if ((i > 1) && (i < 6)) {
250 alu.dst.sel = ctx->shader->input[input].gpr;
251 alu.dst.write = 1;
252 }
253
254 alu.dst.chan = i % 4;
255
256 alu.src[0].sel = gpr;
257 alu.src[0].chan = (base_chan - (i % 2));
258
259 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
260
261 alu.bank_swizzle_force = SQ_ALU_VEC_210;
262 if ((i % 4) == 3)
263 alu.last = 1;
264 r = r600_bc_add_alu(ctx->bc, &alu);
265 if (r)
266 return r;
267 }
268 return 0;
269 }
270
271
272 static int tgsi_declaration(struct r600_shader_ctx *ctx)
273 {
274 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
275 unsigned i;
276 int r;
277
278 switch (d->Declaration.File) {
279 case TGSI_FILE_INPUT:
280 i = ctx->shader->ninput++;
281 ctx->shader->input[i].name = d->Semantic.Name;
282 ctx->shader->input[i].sid = d->Semantic.Index;
283 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
284 ctx->shader->input[i].centroid = d->Declaration.Centroid;
285 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
286 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
287 /* turn input into interpolate on EG */
288 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
289 if (ctx->shader->input[i].interpolate > 0) {
290 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
291 evergreen_interp_alu(ctx, i);
292 }
293 }
294 }
295 break;
296 case TGSI_FILE_OUTPUT:
297 i = ctx->shader->noutput++;
298 ctx->shader->output[i].name = d->Semantic.Name;
299 ctx->shader->output[i].sid = d->Semantic.Index;
300 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
301 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
302 break;
303 case TGSI_FILE_CONSTANT:
304 case TGSI_FILE_TEMPORARY:
305 case TGSI_FILE_SAMPLER:
306 case TGSI_FILE_ADDRESS:
307 break;
308
309 case TGSI_FILE_SYSTEM_VALUE:
310 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
311 struct r600_bc_alu alu;
312 memset(&alu, 0, sizeof(struct r600_bc_alu));
313
314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
315 alu.src[0].sel = 0;
316 alu.src[0].chan = 3;
317
318 alu.dst.sel = 0;
319 alu.dst.chan = 3;
320 alu.dst.write = 1;
321 alu.last = 1;
322
323 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
324 return r;
325 break;
326 }
327
328 default:
329 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
330 return -EINVAL;
331 }
332 return 0;
333 }
334
335 static int r600_get_temp(struct r600_shader_ctx *ctx)
336 {
337 return ctx->temp_reg + ctx->max_driver_temp_used++;
338 }
339
340 /*
341 * for evergreen we need to scan the shader to find the number of GPRs we need to
342 * reserve for interpolation.
343 *
344 * we need to know if we are going to emit
345 * any centroid inputs
346 * if perspective and linear are required
347 */
348 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
349 {
350 int i;
351 int num_baryc;
352
353 ctx->input_linear = FALSE;
354 ctx->input_perspective = FALSE;
355 ctx->input_centroid = FALSE;
356 ctx->num_interp_gpr = 1;
357
358 /* any centroid inputs */
359 for (i = 0; i < ctx->info.num_inputs; i++) {
360 /* skip position/face */
361 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
362 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
363 continue;
364 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
365 ctx->input_linear = TRUE;
366 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
367 ctx->input_perspective = TRUE;
368 if (ctx->info.input_centroid[i])
369 ctx->input_centroid = TRUE;
370 }
371
372 num_baryc = 0;
373 /* ignoring sample for now */
374 if (ctx->input_perspective)
375 num_baryc++;
376 if (ctx->input_linear)
377 num_baryc++;
378 if (ctx->input_centroid)
379 num_baryc *= 2;
380
381 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
382
383 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
384 return ctx->num_interp_gpr;
385 }
386
387 static void tgsi_src(struct r600_shader_ctx *ctx,
388 const struct tgsi_full_src_register *tgsi_src,
389 struct r600_shader_src *r600_src)
390 {
391 memset(r600_src, 0, sizeof(*r600_src));
392 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
393 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
394 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
395 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
396 r600_src->neg = tgsi_src->Register.Negate;
397 r600_src->abs = tgsi_src->Register.Absolute;
398
399 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
400 int index;
401 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
402 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
403 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
404
405 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
406 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
407 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
408 return;
409 }
410 index = tgsi_src->Register.Index;
411 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
412 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
413 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
414 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
415 r600_src->swizzle[0] = 3;
416 r600_src->swizzle[1] = 3;
417 r600_src->swizzle[2] = 3;
418 r600_src->swizzle[3] = 3;
419 r600_src->sel = 0;
420 } else {
421 if (tgsi_src->Register.Indirect)
422 r600_src->rel = V_SQ_REL_RELATIVE;
423 r600_src->sel = tgsi_src->Register.Index;
424 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
425 }
426 }
427
428 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
429 {
430 struct r600_bc_vtx vtx;
431 unsigned int ar_reg;
432 int r;
433
434 if (offset) {
435 struct r600_bc_alu alu;
436
437 memset(&alu, 0, sizeof(alu));
438
439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
440 alu.src[0].sel = ctx->ar_reg;
441
442 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
443 alu.src[1].value = offset;
444
445 alu.dst.sel = dst_reg;
446 alu.dst.write = 1;
447 alu.last = 1;
448
449 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
450 return r;
451
452 ar_reg = dst_reg;
453 } else {
454 ar_reg = ctx->ar_reg;
455 }
456
457 memset(&vtx, 0, sizeof(vtx));
458 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
459 vtx.src_gpr = ar_reg;
460 vtx.mega_fetch_count = 16;
461 vtx.dst_gpr = dst_reg;
462 vtx.dst_sel_x = 0; /* SEL_X */
463 vtx.dst_sel_y = 1; /* SEL_Y */
464 vtx.dst_sel_z = 2; /* SEL_Z */
465 vtx.dst_sel_w = 3; /* SEL_W */
466 vtx.data_format = FMT_32_32_32_32_FLOAT;
467 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
468 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
469 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
470
471 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
472 return r;
473
474 return 0;
475 }
476
477 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
478 {
479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
480 struct r600_bc_alu alu;
481 int i, j, k, nconst, r;
482
483 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
484 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
485 nconst++;
486 }
487 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
488 }
489 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
490 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
491 continue;
492 }
493
494 if (ctx->src[i].rel) {
495 int treg = r600_get_temp(ctx);
496 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
497 return r;
498
499 ctx->src[i].sel = treg;
500 ctx->src[i].rel = 0;
501 j--;
502 } else if (j > 0) {
503 int treg = r600_get_temp(ctx);
504 for (k = 0; k < 4; k++) {
505 memset(&alu, 0, sizeof(struct r600_bc_alu));
506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
507 alu.src[0].sel = ctx->src[i].sel;
508 alu.src[0].chan = k;
509 alu.src[0].rel = ctx->src[i].rel;
510 alu.dst.sel = treg;
511 alu.dst.chan = k;
512 alu.dst.write = 1;
513 if (k == 3)
514 alu.last = 1;
515 r = r600_bc_add_alu(ctx->bc, &alu);
516 if (r)
517 return r;
518 }
519 ctx->src[i].sel = treg;
520 ctx->src[i].rel =0;
521 j--;
522 }
523 }
524 return 0;
525 }
526
527 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
528 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
529 {
530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
531 struct r600_bc_alu alu;
532 int i, j, k, nliteral, r;
533
534 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
535 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
536 nliteral++;
537 }
538 }
539 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
540 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
541 int treg = r600_get_temp(ctx);
542 for (k = 0; k < 4; k++) {
543 memset(&alu, 0, sizeof(struct r600_bc_alu));
544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
545 alu.src[0].sel = ctx->src[i].sel;
546 alu.src[0].chan = k;
547 alu.src[0].value = ctx->src[i].value[k];
548 alu.dst.sel = treg;
549 alu.dst.chan = k;
550 alu.dst.write = 1;
551 if (k == 3)
552 alu.last = 1;
553 r = r600_bc_add_alu(ctx->bc, &alu);
554 if (r)
555 return r;
556 }
557 ctx->src[i].sel = treg;
558 j--;
559 }
560 }
561 return 0;
562 }
563
564 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
565 {
566 struct tgsi_full_immediate *immediate;
567 struct tgsi_full_property *property;
568 struct r600_shader_ctx ctx;
569 struct r600_bc_output output[32];
570 unsigned output_done, noutput;
571 unsigned opcode;
572 int i, r = 0, pos0;
573
574 ctx.bc = &shader->bc;
575 ctx.shader = shader;
576 r = r600_bc_init(ctx.bc, shader->family);
577 if (r)
578 return r;
579 ctx.tokens = tokens;
580 tgsi_scan_shader(tokens, &ctx.info);
581 tgsi_parse_init(&ctx.parse, tokens);
582 ctx.type = ctx.parse.FullHeader.Processor.Processor;
583 shader->processor_type = ctx.type;
584 ctx.bc->type = shader->processor_type;
585
586 /* register allocations */
587 /* Values [0,127] correspond to GPR[0..127].
588 * Values [128,159] correspond to constant buffer bank 0
589 * Values [160,191] correspond to constant buffer bank 1
590 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
591 * Values [256,287] correspond to constant buffer bank 2 (EG)
592 * Values [288,319] correspond to constant buffer bank 3 (EG)
593 * Other special values are shown in the list below.
594 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
595 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
596 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
597 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
598 * 248 SQ_ALU_SRC_0: special constant 0.0.
599 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
600 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
601 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
602 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
603 * 253 SQ_ALU_SRC_LITERAL: literal constant.
604 * 254 SQ_ALU_SRC_PV: previous vector result.
605 * 255 SQ_ALU_SRC_PS: previous scalar result.
606 */
607 for (i = 0; i < TGSI_FILE_COUNT; i++) {
608 ctx.file_offset[i] = 0;
609 }
610 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
611 ctx.file_offset[TGSI_FILE_INPUT] = 1;
612 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
613 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
614 } else {
615 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
616 }
617 }
618 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
619 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
620 }
621 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
622 ctx.info.file_count[TGSI_FILE_INPUT];
623 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
624 ctx.info.file_count[TGSI_FILE_OUTPUT];
625
626 /* Outside the GPR range. This will be translated to one of the
627 * kcache banks later. */
628 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
629
630 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
631 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
632 ctx.info.file_count[TGSI_FILE_TEMPORARY];
633 ctx.temp_reg = ctx.ar_reg + 1;
634
635 ctx.nliterals = 0;
636 ctx.literals = NULL;
637 shader->fs_write_all = FALSE;
638 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
639 tgsi_parse_token(&ctx.parse);
640 switch (ctx.parse.FullToken.Token.Type) {
641 case TGSI_TOKEN_TYPE_IMMEDIATE:
642 immediate = &ctx.parse.FullToken.FullImmediate;
643 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
644 if(ctx.literals == NULL) {
645 r = -ENOMEM;
646 goto out_err;
647 }
648 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
649 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
650 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
651 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
652 ctx.nliterals++;
653 break;
654 case TGSI_TOKEN_TYPE_DECLARATION:
655 r = tgsi_declaration(&ctx);
656 if (r)
657 goto out_err;
658 break;
659 case TGSI_TOKEN_TYPE_INSTRUCTION:
660 r = tgsi_is_supported(&ctx);
661 if (r)
662 goto out_err;
663 ctx.max_driver_temp_used = 0;
664 /* reserve first tmp for everyone */
665 r600_get_temp(&ctx);
666
667 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
668 if ((r = tgsi_split_constant(&ctx)))
669 goto out_err;
670 if ((r = tgsi_split_literal_constant(&ctx)))
671 goto out_err;
672 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
673 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
674 else
675 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
676 r = ctx.inst_info->process(&ctx);
677 if (r)
678 goto out_err;
679 break;
680 case TGSI_TOKEN_TYPE_PROPERTY:
681 property = &ctx.parse.FullToken.FullProperty;
682 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
683 if (property->u[0].Data == 1)
684 shader->fs_write_all = TRUE;
685 }
686 break;
687 default:
688 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
689 r = -EINVAL;
690 goto out_err;
691 }
692 }
693 /* export output */
694 noutput = shader->noutput;
695 for (i = 0, pos0 = 0; i < noutput; i++) {
696 memset(&output[i], 0, sizeof(struct r600_bc_output));
697 output[i].gpr = shader->output[i].gpr;
698 output[i].elem_size = 3;
699 output[i].swizzle_x = 0;
700 output[i].swizzle_y = 1;
701 output[i].swizzle_z = 2;
702 output[i].swizzle_w = 3;
703 output[i].burst_count = 1;
704 output[i].barrier = 1;
705 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
706 output[i].array_base = i - pos0;
707 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
708 switch (ctx.type) {
709 case TGSI_PROCESSOR_VERTEX:
710 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
711 output[i].array_base = 60;
712 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
713 /* position doesn't count in array_base */
714 pos0++;
715 }
716 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
717 output[i].array_base = 61;
718 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
719 /* position doesn't count in array_base */
720 pos0++;
721 }
722 break;
723 case TGSI_PROCESSOR_FRAGMENT:
724 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
725 output[i].array_base = shader->output[i].sid;
726 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
727 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
728 output[i].array_base = 61;
729 output[i].swizzle_x = 2;
730 output[i].swizzle_y = 7;
731 output[i].swizzle_z = output[i].swizzle_w = 7;
732 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
733 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
734 output[i].array_base = 61;
735 output[i].swizzle_x = 7;
736 output[i].swizzle_y = 1;
737 output[i].swizzle_z = output[i].swizzle_w = 7;
738 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
739 } else {
740 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
741 r = -EINVAL;
742 goto out_err;
743 }
744 break;
745 default:
746 R600_ERR("unsupported processor type %d\n", ctx.type);
747 r = -EINVAL;
748 goto out_err;
749 }
750 }
751 /* add fake param output for vertex shader if no param is exported */
752 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
753 for (i = 0, pos0 = 0; i < noutput; i++) {
754 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
755 pos0 = 1;
756 break;
757 }
758 }
759 if (!pos0) {
760 memset(&output[i], 0, sizeof(struct r600_bc_output));
761 output[i].gpr = 0;
762 output[i].elem_size = 3;
763 output[i].swizzle_x = 0;
764 output[i].swizzle_y = 1;
765 output[i].swizzle_z = 2;
766 output[i].swizzle_w = 3;
767 output[i].burst_count = 1;
768 output[i].barrier = 1;
769 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
770 output[i].array_base = 0;
771 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
772 noutput++;
773 }
774 }
775 /* add fake pixel export */
776 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
777 memset(&output[0], 0, sizeof(struct r600_bc_output));
778 output[0].gpr = 0;
779 output[0].elem_size = 3;
780 output[0].swizzle_x = 7;
781 output[0].swizzle_y = 7;
782 output[0].swizzle_z = 7;
783 output[0].swizzle_w = 7;
784 output[0].burst_count = 1;
785 output[0].barrier = 1;
786 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
787 output[0].array_base = 0;
788 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
789 noutput++;
790 }
791 /* set export done on last export of each type */
792 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
793 if (i == (noutput - 1)) {
794 output[i].end_of_program = 1;
795 }
796 if (!(output_done & (1 << output[i].type))) {
797 output_done |= (1 << output[i].type);
798 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
799 }
800 }
801 /* add output to bytecode */
802 for (i = 0; i < noutput; i++) {
803 r = r600_bc_add_output(ctx.bc, &output[i]);
804 if (r)
805 goto out_err;
806 }
807 free(ctx.literals);
808 tgsi_parse_free(&ctx.parse);
809 return 0;
810 out_err:
811 free(ctx.literals);
812 tgsi_parse_free(&ctx.parse);
813 return r;
814 }
815
816 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
817 {
818 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
819 return -EINVAL;
820 }
821
822 static int tgsi_end(struct r600_shader_ctx *ctx)
823 {
824 return 0;
825 }
826
827 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
828 const struct r600_shader_src *shader_src,
829 unsigned chan)
830 {
831 bc_src->sel = shader_src->sel;
832 bc_src->chan = shader_src->swizzle[chan];
833 bc_src->neg = shader_src->neg;
834 bc_src->abs = shader_src->abs;
835 bc_src->rel = shader_src->rel;
836 bc_src->value = shader_src->value[bc_src->chan];
837 }
838
839 static void tgsi_dst(struct r600_shader_ctx *ctx,
840 const struct tgsi_full_dst_register *tgsi_dst,
841 unsigned swizzle,
842 struct r600_bc_alu_dst *r600_dst)
843 {
844 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
845
846 r600_dst->sel = tgsi_dst->Register.Index;
847 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
848 r600_dst->chan = swizzle;
849 r600_dst->write = 1;
850 if (tgsi_dst->Register.Indirect)
851 r600_dst->rel = V_SQ_REL_RELATIVE;
852 if (inst->Instruction.Saturate) {
853 r600_dst->clamp = 1;
854 }
855 }
856
857 static int tgsi_last_instruction(unsigned writemask)
858 {
859 int i, lasti = 0;
860
861 for (i = 0; i < 4; i++) {
862 if (writemask & (1 << i)) {
863 lasti = i;
864 }
865 }
866 return lasti;
867 }
868
869 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
870 {
871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
872 struct r600_bc_alu alu;
873 int i, j, r;
874 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
875
876 for (i = 0; i < lasti + 1; i++) {
877 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
878 continue;
879
880 memset(&alu, 0, sizeof(struct r600_bc_alu));
881 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
882
883 alu.inst = ctx->inst_info->r600_opcode;
884 if (!swap) {
885 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
886 r600_bc_src(&alu.src[j], &ctx->src[j], i);
887 }
888 } else {
889 r600_bc_src(&alu.src[0], &ctx->src[1], i);
890 r600_bc_src(&alu.src[1], &ctx->src[0], i);
891 }
892 /* handle some special cases */
893 switch (ctx->inst_info->tgsi_opcode) {
894 case TGSI_OPCODE_SUB:
895 alu.src[1].neg = 1;
896 break;
897 case TGSI_OPCODE_ABS:
898 alu.src[0].abs = 1;
899 break;
900 default:
901 break;
902 }
903 if (i == lasti) {
904 alu.last = 1;
905 }
906 r = r600_bc_add_alu(ctx->bc, &alu);
907 if (r)
908 return r;
909 }
910 return 0;
911 }
912
913 static int tgsi_op2(struct r600_shader_ctx *ctx)
914 {
915 return tgsi_op2_s(ctx, 0);
916 }
917
918 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
919 {
920 return tgsi_op2_s(ctx, 1);
921 }
922
923 /*
924 * r600 - trunc to -PI..PI range
925 * r700 - normalize by dividing by 2PI
926 * see fdo bug 27901
927 */
928 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
929 {
930 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
931 static float double_pi = 3.1415926535 * 2;
932 static float neg_pi = -3.1415926535;
933
934 int r;
935 struct r600_bc_alu alu;
936
937 memset(&alu, 0, sizeof(struct r600_bc_alu));
938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
939 alu.is_op3 = 1;
940
941 alu.dst.chan = 0;
942 alu.dst.sel = ctx->temp_reg;
943 alu.dst.write = 1;
944
945 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
946
947 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
948 alu.src[1].chan = 0;
949 alu.src[1].value = *(uint32_t *)&half_inv_pi;
950 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
951 alu.src[2].chan = 0;
952 alu.last = 1;
953 r = r600_bc_add_alu(ctx->bc, &alu);
954 if (r)
955 return r;
956
957 memset(&alu, 0, sizeof(struct r600_bc_alu));
958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
959
960 alu.dst.chan = 0;
961 alu.dst.sel = ctx->temp_reg;
962 alu.dst.write = 1;
963
964 alu.src[0].sel = ctx->temp_reg;
965 alu.src[0].chan = 0;
966 alu.last = 1;
967 r = r600_bc_add_alu(ctx->bc, &alu);
968 if (r)
969 return r;
970
971 memset(&alu, 0, sizeof(struct r600_bc_alu));
972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
973 alu.is_op3 = 1;
974
975 alu.dst.chan = 0;
976 alu.dst.sel = ctx->temp_reg;
977 alu.dst.write = 1;
978
979 alu.src[0].sel = ctx->temp_reg;
980 alu.src[0].chan = 0;
981
982 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
983 alu.src[1].chan = 0;
984 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
985 alu.src[2].chan = 0;
986
987 if (ctx->bc->chiprev == CHIPREV_R600) {
988 alu.src[1].value = *(uint32_t *)&double_pi;
989 alu.src[2].value = *(uint32_t *)&neg_pi;
990 } else {
991 alu.src[1].sel = V_SQ_ALU_SRC_1;
992 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
993 alu.src[2].neg = 1;
994 }
995
996 alu.last = 1;
997 r = r600_bc_add_alu(ctx->bc, &alu);
998 if (r)
999 return r;
1000 return 0;
1001 }
1002
1003 static int tgsi_trig(struct r600_shader_ctx *ctx)
1004 {
1005 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1006 struct r600_bc_alu alu;
1007 int i, r;
1008 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1009
1010 r = tgsi_setup_trig(ctx);
1011 if (r)
1012 return r;
1013
1014 memset(&alu, 0, sizeof(struct r600_bc_alu));
1015 alu.inst = ctx->inst_info->r600_opcode;
1016 alu.dst.chan = 0;
1017 alu.dst.sel = ctx->temp_reg;
1018 alu.dst.write = 1;
1019
1020 alu.src[0].sel = ctx->temp_reg;
1021 alu.src[0].chan = 0;
1022 alu.last = 1;
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026
1027 /* replicate result */
1028 for (i = 0; i < lasti + 1; i++) {
1029 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1030 continue;
1031
1032 memset(&alu, 0, sizeof(struct r600_bc_alu));
1033 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1034
1035 alu.src[0].sel = ctx->temp_reg;
1036 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1037 if (i == lasti)
1038 alu.last = 1;
1039 r = r600_bc_add_alu(ctx->bc, &alu);
1040 if (r)
1041 return r;
1042 }
1043 return 0;
1044 }
1045
1046 static int tgsi_scs(struct r600_shader_ctx *ctx)
1047 {
1048 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1049 struct r600_bc_alu alu;
1050 int r;
1051
1052 /* We'll only need the trig stuff if we are going to write to the
1053 * X or Y components of the destination vector.
1054 */
1055 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1056 r = tgsi_setup_trig(ctx);
1057 if (r)
1058 return r;
1059 }
1060
1061 /* dst.x = COS */
1062 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1063 memset(&alu, 0, sizeof(struct r600_bc_alu));
1064 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1065 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1066
1067 alu.src[0].sel = ctx->temp_reg;
1068 alu.src[0].chan = 0;
1069 alu.last = 1;
1070 r = r600_bc_add_alu(ctx->bc, &alu);
1071 if (r)
1072 return r;
1073 }
1074
1075 /* dst.y = SIN */
1076 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1077 memset(&alu, 0, sizeof(struct r600_bc_alu));
1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1079 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1080
1081 alu.src[0].sel = ctx->temp_reg;
1082 alu.src[0].chan = 0;
1083 alu.last = 1;
1084 r = r600_bc_add_alu(ctx->bc, &alu);
1085 if (r)
1086 return r;
1087 }
1088
1089 /* dst.z = 0.0; */
1090 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1091 memset(&alu, 0, sizeof(struct r600_bc_alu));
1092
1093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1094
1095 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1096
1097 alu.src[0].sel = V_SQ_ALU_SRC_0;
1098 alu.src[0].chan = 0;
1099
1100 alu.last = 1;
1101
1102 r = r600_bc_add_alu(ctx->bc, &alu);
1103 if (r)
1104 return r;
1105 }
1106
1107 /* dst.w = 1.0; */
1108 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1109 memset(&alu, 0, sizeof(struct r600_bc_alu));
1110
1111 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1112
1113 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1114
1115 alu.src[0].sel = V_SQ_ALU_SRC_1;
1116 alu.src[0].chan = 0;
1117
1118 alu.last = 1;
1119
1120 r = r600_bc_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123 }
1124
1125 return 0;
1126 }
1127
1128 static int tgsi_kill(struct r600_shader_ctx *ctx)
1129 {
1130 struct r600_bc_alu alu;
1131 int i, r;
1132
1133 for (i = 0; i < 4; i++) {
1134 memset(&alu, 0, sizeof(struct r600_bc_alu));
1135 alu.inst = ctx->inst_info->r600_opcode;
1136
1137 alu.dst.chan = i;
1138
1139 alu.src[0].sel = V_SQ_ALU_SRC_0;
1140
1141 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1142 alu.src[1].sel = V_SQ_ALU_SRC_1;
1143 alu.src[1].neg = 1;
1144 } else {
1145 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1146 }
1147 if (i == 3) {
1148 alu.last = 1;
1149 }
1150 r = r600_bc_add_alu(ctx->bc, &alu);
1151 if (r)
1152 return r;
1153 }
1154
1155 /* kill must be last in ALU */
1156 ctx->bc->force_add_cf = 1;
1157 ctx->shader->uses_kill = TRUE;
1158 return 0;
1159 }
1160
1161 static int tgsi_lit(struct r600_shader_ctx *ctx)
1162 {
1163 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1164 struct r600_bc_alu alu;
1165 int r;
1166
1167 /* dst.x, <- 1.0 */
1168 memset(&alu, 0, sizeof(struct r600_bc_alu));
1169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1170 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1171 alu.src[0].chan = 0;
1172 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1173 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1174 r = r600_bc_add_alu(ctx->bc, &alu);
1175 if (r)
1176 return r;
1177
1178 /* dst.y = max(src.x, 0.0) */
1179 memset(&alu, 0, sizeof(struct r600_bc_alu));
1180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1181 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1182 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1183 alu.src[1].chan = 0;
1184 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1185 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1186 r = r600_bc_add_alu(ctx->bc, &alu);
1187 if (r)
1188 return r;
1189
1190 /* dst.w, <- 1.0 */
1191 memset(&alu, 0, sizeof(struct r600_bc_alu));
1192 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1193 alu.src[0].sel = V_SQ_ALU_SRC_1;
1194 alu.src[0].chan = 0;
1195 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1196 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1197 alu.last = 1;
1198 r = r600_bc_add_alu(ctx->bc, &alu);
1199 if (r)
1200 return r;
1201
1202 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1203 {
1204 int chan;
1205 int sel;
1206
1207 /* dst.z = log(src.y) */
1208 memset(&alu, 0, sizeof(struct r600_bc_alu));
1209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1210 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1211 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1212 alu.last = 1;
1213 r = r600_bc_add_alu(ctx->bc, &alu);
1214 if (r)
1215 return r;
1216
1217 chan = alu.dst.chan;
1218 sel = alu.dst.sel;
1219
1220 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1221 memset(&alu, 0, sizeof(struct r600_bc_alu));
1222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1223 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1224 alu.src[1].sel = sel;
1225 alu.src[1].chan = chan;
1226
1227 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1228 alu.dst.sel = ctx->temp_reg;
1229 alu.dst.chan = 0;
1230 alu.dst.write = 1;
1231 alu.is_op3 = 1;
1232 alu.last = 1;
1233 r = r600_bc_add_alu(ctx->bc, &alu);
1234 if (r)
1235 return r;
1236
1237 /* dst.z = exp(tmp.x) */
1238 memset(&alu, 0, sizeof(struct r600_bc_alu));
1239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1240 alu.src[0].sel = ctx->temp_reg;
1241 alu.src[0].chan = 0;
1242 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1243 alu.last = 1;
1244 r = r600_bc_add_alu(ctx->bc, &alu);
1245 if (r)
1246 return r;
1247 }
1248 return 0;
1249 }
1250
1251 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1252 {
1253 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1254 struct r600_bc_alu alu;
1255 int i, r;
1256
1257 memset(&alu, 0, sizeof(struct r600_bc_alu));
1258
1259 /* FIXME:
1260 * For state trackers other than OpenGL, we'll want to use
1261 * _RECIPSQRT_IEEE instead.
1262 */
1263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1264
1265 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1266 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1267 alu.src[i].abs = 1;
1268 }
1269 alu.dst.sel = ctx->temp_reg;
1270 alu.dst.write = 1;
1271 alu.last = 1;
1272 r = r600_bc_add_alu(ctx->bc, &alu);
1273 if (r)
1274 return r;
1275 /* replicate result */
1276 return tgsi_helper_tempx_replicate(ctx);
1277 }
1278
1279 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1280 {
1281 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1282 struct r600_bc_alu alu;
1283 int i, r;
1284
1285 for (i = 0; i < 4; i++) {
1286 memset(&alu, 0, sizeof(struct r600_bc_alu));
1287 alu.src[0].sel = ctx->temp_reg;
1288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1289 alu.dst.chan = i;
1290 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1291 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1292 if (i == 3)
1293 alu.last = 1;
1294 r = r600_bc_add_alu(ctx->bc, &alu);
1295 if (r)
1296 return r;
1297 }
1298 return 0;
1299 }
1300
1301 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1302 {
1303 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1304 struct r600_bc_alu alu;
1305 int i, r;
1306
1307 memset(&alu, 0, sizeof(struct r600_bc_alu));
1308 alu.inst = ctx->inst_info->r600_opcode;
1309 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1310 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1311 }
1312 alu.dst.sel = ctx->temp_reg;
1313 alu.dst.write = 1;
1314 alu.last = 1;
1315 r = r600_bc_add_alu(ctx->bc, &alu);
1316 if (r)
1317 return r;
1318 /* replicate result */
1319 return tgsi_helper_tempx_replicate(ctx);
1320 }
1321
1322 static int tgsi_pow(struct r600_shader_ctx *ctx)
1323 {
1324 struct r600_bc_alu alu;
1325 int r;
1326
1327 /* LOG2(a) */
1328 memset(&alu, 0, sizeof(struct r600_bc_alu));
1329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1330 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1331 alu.dst.sel = ctx->temp_reg;
1332 alu.dst.write = 1;
1333 alu.last = 1;
1334 r = r600_bc_add_alu(ctx->bc, &alu);
1335 if (r)
1336 return r;
1337 /* b * LOG2(a) */
1338 memset(&alu, 0, sizeof(struct r600_bc_alu));
1339 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1340 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1341 alu.src[1].sel = ctx->temp_reg;
1342 alu.dst.sel = ctx->temp_reg;
1343 alu.dst.write = 1;
1344 alu.last = 1;
1345 r = r600_bc_add_alu(ctx->bc, &alu);
1346 if (r)
1347 return r;
1348 /* POW(a,b) = EXP2(b * LOG2(a))*/
1349 memset(&alu, 0, sizeof(struct r600_bc_alu));
1350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1351 alu.src[0].sel = ctx->temp_reg;
1352 alu.dst.sel = ctx->temp_reg;
1353 alu.dst.write = 1;
1354 alu.last = 1;
1355 r = r600_bc_add_alu(ctx->bc, &alu);
1356 if (r)
1357 return r;
1358 return tgsi_helper_tempx_replicate(ctx);
1359 }
1360
1361 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1362 {
1363 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1364 struct r600_bc_alu alu;
1365 int i, r;
1366
1367 /* tmp = (src > 0 ? 1 : src) */
1368 for (i = 0; i < 4; i++) {
1369 memset(&alu, 0, sizeof(struct r600_bc_alu));
1370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1371 alu.is_op3 = 1;
1372
1373 alu.dst.sel = ctx->temp_reg;
1374 alu.dst.chan = i;
1375
1376 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1377 alu.src[1].sel = V_SQ_ALU_SRC_1;
1378 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1379
1380 if (i == 3)
1381 alu.last = 1;
1382 r = r600_bc_add_alu(ctx->bc, &alu);
1383 if (r)
1384 return r;
1385 }
1386
1387 /* dst = (-tmp > 0 ? -1 : tmp) */
1388 for (i = 0; i < 4; i++) {
1389 memset(&alu, 0, sizeof(struct r600_bc_alu));
1390 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1391 alu.is_op3 = 1;
1392 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1393
1394 alu.src[0].sel = ctx->temp_reg;
1395 alu.src[0].chan = i;
1396 alu.src[0].neg = 1;
1397
1398 alu.src[1].sel = V_SQ_ALU_SRC_1;
1399 alu.src[1].neg = 1;
1400
1401 alu.src[2].sel = ctx->temp_reg;
1402 alu.src[2].chan = i;
1403
1404 if (i == 3)
1405 alu.last = 1;
1406 r = r600_bc_add_alu(ctx->bc, &alu);
1407 if (r)
1408 return r;
1409 }
1410 return 0;
1411 }
1412
1413 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1414 {
1415 struct r600_bc_alu alu;
1416 int i, r;
1417
1418 for (i = 0; i < 4; i++) {
1419 memset(&alu, 0, sizeof(struct r600_bc_alu));
1420 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1422 alu.dst.chan = i;
1423 } else {
1424 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1425 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1426 alu.src[0].sel = ctx->temp_reg;
1427 alu.src[0].chan = i;
1428 }
1429 if (i == 3) {
1430 alu.last = 1;
1431 }
1432 r = r600_bc_add_alu(ctx->bc, &alu);
1433 if (r)
1434 return r;
1435 }
1436 return 0;
1437 }
1438
1439 static int tgsi_op3(struct r600_shader_ctx *ctx)
1440 {
1441 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1442 struct r600_bc_alu alu;
1443 int i, j, r;
1444 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1445
1446 for (i = 0; i < lasti + 1; i++) {
1447 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1448 continue;
1449
1450 memset(&alu, 0, sizeof(struct r600_bc_alu));
1451 alu.inst = ctx->inst_info->r600_opcode;
1452 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1453 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1454 }
1455
1456 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1457 alu.dst.chan = i;
1458 alu.dst.write = 1;
1459 alu.is_op3 = 1;
1460 if (i == lasti) {
1461 alu.last = 1;
1462 }
1463 r = r600_bc_add_alu(ctx->bc, &alu);
1464 if (r)
1465 return r;
1466 }
1467 return 0;
1468 }
1469
1470 static int tgsi_dp(struct r600_shader_ctx *ctx)
1471 {
1472 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1473 struct r600_bc_alu alu;
1474 int i, j, r;
1475
1476 for (i = 0; i < 4; i++) {
1477 memset(&alu, 0, sizeof(struct r600_bc_alu));
1478 alu.inst = ctx->inst_info->r600_opcode;
1479 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1480 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1481 }
1482
1483 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1484 alu.dst.chan = i;
1485 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1486 /* handle some special cases */
1487 switch (ctx->inst_info->tgsi_opcode) {
1488 case TGSI_OPCODE_DP2:
1489 if (i > 1) {
1490 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1491 alu.src[0].chan = alu.src[1].chan = 0;
1492 }
1493 break;
1494 case TGSI_OPCODE_DP3:
1495 if (i > 2) {
1496 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1497 alu.src[0].chan = alu.src[1].chan = 0;
1498 }
1499 break;
1500 case TGSI_OPCODE_DPH:
1501 if (i == 3) {
1502 alu.src[0].sel = V_SQ_ALU_SRC_1;
1503 alu.src[0].chan = 0;
1504 alu.src[0].neg = 0;
1505 }
1506 break;
1507 default:
1508 break;
1509 }
1510 if (i == 3) {
1511 alu.last = 1;
1512 }
1513 r = r600_bc_add_alu(ctx->bc, &alu);
1514 if (r)
1515 return r;
1516 }
1517 return 0;
1518 }
1519
1520 static int tgsi_tex(struct r600_shader_ctx *ctx)
1521 {
1522 static float one_point_five = 1.5f;
1523 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1524 struct r600_bc_tex tex;
1525 struct r600_bc_alu alu;
1526 unsigned src_gpr;
1527 int r, i;
1528 int opcode;
1529 boolean src_not_temp =
1530 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1531 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1532
1533 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1534
1535 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1536 /* Add perspective divide */
1537 memset(&alu, 0, sizeof(struct r600_bc_alu));
1538 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1539 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1540
1541 alu.dst.sel = ctx->temp_reg;
1542 alu.dst.chan = 3;
1543 alu.last = 1;
1544 alu.dst.write = 1;
1545 r = r600_bc_add_alu(ctx->bc, &alu);
1546 if (r)
1547 return r;
1548
1549 for (i = 0; i < 3; i++) {
1550 memset(&alu, 0, sizeof(struct r600_bc_alu));
1551 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1552 alu.src[0].sel = ctx->temp_reg;
1553 alu.src[0].chan = 3;
1554 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1555 alu.dst.sel = ctx->temp_reg;
1556 alu.dst.chan = i;
1557 alu.dst.write = 1;
1558 r = r600_bc_add_alu(ctx->bc, &alu);
1559 if (r)
1560 return r;
1561 }
1562 memset(&alu, 0, sizeof(struct r600_bc_alu));
1563 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1564 alu.src[0].sel = V_SQ_ALU_SRC_1;
1565 alu.src[0].chan = 0;
1566 alu.dst.sel = ctx->temp_reg;
1567 alu.dst.chan = 3;
1568 alu.last = 1;
1569 alu.dst.write = 1;
1570 r = r600_bc_add_alu(ctx->bc, &alu);
1571 if (r)
1572 return r;
1573 src_not_temp = FALSE;
1574 src_gpr = ctx->temp_reg;
1575 }
1576
1577 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1578 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1579 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1580
1581 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1582 for (i = 0; i < 4; i++) {
1583 memset(&alu, 0, sizeof(struct r600_bc_alu));
1584 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1585 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1586 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1587 alu.dst.sel = ctx->temp_reg;
1588 alu.dst.chan = i;
1589 if (i == 3)
1590 alu.last = 1;
1591 alu.dst.write = 1;
1592 r = r600_bc_add_alu(ctx->bc, &alu);
1593 if (r)
1594 return r;
1595 }
1596
1597 /* tmp1.z = RCP_e(|tmp1.z|) */
1598 memset(&alu, 0, sizeof(struct r600_bc_alu));
1599 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1600 alu.src[0].sel = ctx->temp_reg;
1601 alu.src[0].chan = 2;
1602 alu.src[0].abs = 1;
1603 alu.dst.sel = ctx->temp_reg;
1604 alu.dst.chan = 2;
1605 alu.dst.write = 1;
1606 alu.last = 1;
1607 r = r600_bc_add_alu(ctx->bc, &alu);
1608 if (r)
1609 return r;
1610
1611 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1612 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1613 * muladd has no writemask, have to use another temp
1614 */
1615 memset(&alu, 0, sizeof(struct r600_bc_alu));
1616 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1617 alu.is_op3 = 1;
1618
1619 alu.src[0].sel = ctx->temp_reg;
1620 alu.src[0].chan = 0;
1621 alu.src[1].sel = ctx->temp_reg;
1622 alu.src[1].chan = 2;
1623
1624 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1625 alu.src[2].chan = 0;
1626 alu.src[2].value = *(uint32_t *)&one_point_five;
1627
1628 alu.dst.sel = ctx->temp_reg;
1629 alu.dst.chan = 0;
1630 alu.dst.write = 1;
1631
1632 r = r600_bc_add_alu(ctx->bc, &alu);
1633 if (r)
1634 return r;
1635
1636 memset(&alu, 0, sizeof(struct r600_bc_alu));
1637 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1638 alu.is_op3 = 1;
1639
1640 alu.src[0].sel = ctx->temp_reg;
1641 alu.src[0].chan = 1;
1642 alu.src[1].sel = ctx->temp_reg;
1643 alu.src[1].chan = 2;
1644
1645 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1646 alu.src[2].chan = 0;
1647 alu.src[2].value = *(uint32_t *)&one_point_five;
1648
1649 alu.dst.sel = ctx->temp_reg;
1650 alu.dst.chan = 1;
1651 alu.dst.write = 1;
1652
1653 alu.last = 1;
1654 r = r600_bc_add_alu(ctx->bc, &alu);
1655 if (r)
1656 return r;
1657
1658 src_not_temp = FALSE;
1659 src_gpr = ctx->temp_reg;
1660 }
1661
1662 if (src_not_temp) {
1663 for (i = 0; i < 4; i++) {
1664 memset(&alu, 0, sizeof(struct r600_bc_alu));
1665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1666 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1667 alu.dst.sel = ctx->temp_reg;
1668 alu.dst.chan = i;
1669 if (i == 3)
1670 alu.last = 1;
1671 alu.dst.write = 1;
1672 r = r600_bc_add_alu(ctx->bc, &alu);
1673 if (r)
1674 return r;
1675 }
1676 src_gpr = ctx->temp_reg;
1677 }
1678
1679 opcode = ctx->inst_info->r600_opcode;
1680 if (opcode == SQ_TEX_INST_SAMPLE &&
1681 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1682 opcode = SQ_TEX_INST_SAMPLE_C;
1683
1684 memset(&tex, 0, sizeof(struct r600_bc_tex));
1685 tex.inst = opcode;
1686 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1687 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1688 tex.src_gpr = src_gpr;
1689 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1690 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1691 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1692 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1693 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1694 tex.src_sel_x = 0;
1695 tex.src_sel_y = 1;
1696 tex.src_sel_z = 2;
1697 tex.src_sel_w = 3;
1698
1699 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1700 tex.src_sel_x = 1;
1701 tex.src_sel_y = 0;
1702 tex.src_sel_z = 3;
1703 tex.src_sel_w = 1;
1704 }
1705
1706 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1707 tex.coord_type_x = 1;
1708 tex.coord_type_y = 1;
1709 tex.coord_type_z = 1;
1710 tex.coord_type_w = 1;
1711 }
1712
1713 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1714 tex.coord_type_z = 0;
1715 tex.src_sel_z = 1;
1716 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1717 tex.coord_type_z = 0;
1718
1719 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1720 tex.src_sel_w = 2;
1721
1722 r = r600_bc_add_tex(ctx->bc, &tex);
1723 if (r)
1724 return r;
1725
1726 /* add shadow ambient support - gallium doesn't do it yet */
1727 return 0;
1728 }
1729
1730 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1731 {
1732 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1733 struct r600_bc_alu alu;
1734 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1735 unsigned i;
1736 int r;
1737
1738 /* optimize if it's just an equal balance */
1739 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1740 for (i = 0; i < lasti + 1; i++) {
1741 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1742 continue;
1743
1744 memset(&alu, 0, sizeof(struct r600_bc_alu));
1745 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1746 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1747 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1748 alu.omod = 3;
1749 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1750 alu.dst.chan = i;
1751 if (i == lasti) {
1752 alu.last = 1;
1753 }
1754 r = r600_bc_add_alu(ctx->bc, &alu);
1755 if (r)
1756 return r;
1757 }
1758 return 0;
1759 }
1760
1761 /* 1 - src0 */
1762 for (i = 0; i < lasti + 1; i++) {
1763 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1764 continue;
1765
1766 memset(&alu, 0, sizeof(struct r600_bc_alu));
1767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1768 alu.src[0].sel = V_SQ_ALU_SRC_1;
1769 alu.src[0].chan = 0;
1770 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1771 alu.src[1].neg = 1;
1772 alu.dst.sel = ctx->temp_reg;
1773 alu.dst.chan = i;
1774 if (i == lasti) {
1775 alu.last = 1;
1776 }
1777 alu.dst.write = 1;
1778 r = r600_bc_add_alu(ctx->bc, &alu);
1779 if (r)
1780 return r;
1781 }
1782
1783 /* (1 - src0) * src2 */
1784 for (i = 0; i < lasti + 1; i++) {
1785 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1786 continue;
1787
1788 memset(&alu, 0, sizeof(struct r600_bc_alu));
1789 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1790 alu.src[0].sel = ctx->temp_reg;
1791 alu.src[0].chan = i;
1792 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1793 alu.dst.sel = ctx->temp_reg;
1794 alu.dst.chan = i;
1795 if (i == lasti) {
1796 alu.last = 1;
1797 }
1798 alu.dst.write = 1;
1799 r = r600_bc_add_alu(ctx->bc, &alu);
1800 if (r)
1801 return r;
1802 }
1803
1804 /* src0 * src1 + (1 - src0) * src2 */
1805 for (i = 0; i < lasti + 1; i++) {
1806 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1807 continue;
1808
1809 memset(&alu, 0, sizeof(struct r600_bc_alu));
1810 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1811 alu.is_op3 = 1;
1812 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1813 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1814 alu.src[2].sel = ctx->temp_reg;
1815 alu.src[2].chan = i;
1816
1817 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1818 alu.dst.chan = i;
1819 if (i == lasti) {
1820 alu.last = 1;
1821 }
1822 r = r600_bc_add_alu(ctx->bc, &alu);
1823 if (r)
1824 return r;
1825 }
1826 return 0;
1827 }
1828
1829 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1830 {
1831 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1832 struct r600_bc_alu alu;
1833 int i, r;
1834 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1835
1836 for (i = 0; i < lasti + 1; i++) {
1837 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1838 continue;
1839
1840 memset(&alu, 0, sizeof(struct r600_bc_alu));
1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1842 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1843 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1844 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1845 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1846 alu.dst.chan = i;
1847 alu.dst.write = 1;
1848 alu.is_op3 = 1;
1849 if (i == lasti)
1850 alu.last = 1;
1851 r = r600_bc_add_alu(ctx->bc, &alu);
1852 if (r)
1853 return r;
1854 }
1855 return 0;
1856 }
1857
1858 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1859 {
1860 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1861 static const unsigned int src0_swizzle[] = {2, 0, 1};
1862 static const unsigned int src1_swizzle[] = {1, 2, 0};
1863 struct r600_bc_alu alu;
1864 uint32_t use_temp = 0;
1865 int i, r;
1866
1867 if (inst->Dst[0].Register.WriteMask != 0xf)
1868 use_temp = 1;
1869
1870 for (i = 0; i < 4; i++) {
1871 memset(&alu, 0, sizeof(struct r600_bc_alu));
1872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1873 if (i < 3) {
1874 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1875 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1876 } else {
1877 alu.src[0].sel = V_SQ_ALU_SRC_0;
1878 alu.src[0].chan = i;
1879 alu.src[1].sel = V_SQ_ALU_SRC_0;
1880 alu.src[1].chan = i;
1881 }
1882
1883 alu.dst.sel = ctx->temp_reg;
1884 alu.dst.chan = i;
1885 alu.dst.write = 1;
1886
1887 if (i == 3)
1888 alu.last = 1;
1889 r = r600_bc_add_alu(ctx->bc, &alu);
1890 if (r)
1891 return r;
1892 }
1893
1894 for (i = 0; i < 4; i++) {
1895 memset(&alu, 0, sizeof(struct r600_bc_alu));
1896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1897
1898 if (i < 3) {
1899 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1900 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1901 } else {
1902 alu.src[0].sel = V_SQ_ALU_SRC_0;
1903 alu.src[0].chan = i;
1904 alu.src[1].sel = V_SQ_ALU_SRC_0;
1905 alu.src[1].chan = i;
1906 }
1907
1908 alu.src[2].sel = ctx->temp_reg;
1909 alu.src[2].neg = 1;
1910 alu.src[2].chan = i;
1911
1912 if (use_temp)
1913 alu.dst.sel = ctx->temp_reg;
1914 else
1915 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1916 alu.dst.chan = i;
1917 alu.dst.write = 1;
1918 alu.is_op3 = 1;
1919 if (i == 3)
1920 alu.last = 1;
1921 r = r600_bc_add_alu(ctx->bc, &alu);
1922 if (r)
1923 return r;
1924 }
1925 if (use_temp)
1926 return tgsi_helper_copy(ctx, inst);
1927 return 0;
1928 }
1929
1930 static int tgsi_exp(struct r600_shader_ctx *ctx)
1931 {
1932 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1933 struct r600_bc_alu alu;
1934 int r;
1935
1936 /* result.x = 2^floor(src); */
1937 if (inst->Dst[0].Register.WriteMask & 1) {
1938 memset(&alu, 0, sizeof(struct r600_bc_alu));
1939
1940 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1941 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1942
1943 alu.dst.sel = ctx->temp_reg;
1944 alu.dst.chan = 0;
1945 alu.dst.write = 1;
1946 alu.last = 1;
1947 r = r600_bc_add_alu(ctx->bc, &alu);
1948 if (r)
1949 return r;
1950
1951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1952 alu.src[0].sel = ctx->temp_reg;
1953 alu.src[0].chan = 0;
1954
1955 alu.dst.sel = ctx->temp_reg;
1956 alu.dst.chan = 0;
1957 alu.dst.write = 1;
1958 alu.last = 1;
1959 r = r600_bc_add_alu(ctx->bc, &alu);
1960 if (r)
1961 return r;
1962 }
1963
1964 /* result.y = tmp - floor(tmp); */
1965 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1966 memset(&alu, 0, sizeof(struct r600_bc_alu));
1967
1968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1969 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1970
1971 alu.dst.sel = ctx->temp_reg;
1972 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1973 // if (r)
1974 // return r;
1975 alu.dst.write = 1;
1976 alu.dst.chan = 1;
1977
1978 alu.last = 1;
1979
1980 r = r600_bc_add_alu(ctx->bc, &alu);
1981 if (r)
1982 return r;
1983 }
1984
1985 /* result.z = RoughApprox2ToX(tmp);*/
1986 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1987 memset(&alu, 0, sizeof(struct r600_bc_alu));
1988 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1989 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1990
1991 alu.dst.sel = ctx->temp_reg;
1992 alu.dst.write = 1;
1993 alu.dst.chan = 2;
1994
1995 alu.last = 1;
1996
1997 r = r600_bc_add_alu(ctx->bc, &alu);
1998 if (r)
1999 return r;
2000 }
2001
2002 /* result.w = 1.0;*/
2003 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2004 memset(&alu, 0, sizeof(struct r600_bc_alu));
2005
2006 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2007 alu.src[0].sel = V_SQ_ALU_SRC_1;
2008 alu.src[0].chan = 0;
2009
2010 alu.dst.sel = ctx->temp_reg;
2011 alu.dst.chan = 3;
2012 alu.dst.write = 1;
2013 alu.last = 1;
2014 r = r600_bc_add_alu(ctx->bc, &alu);
2015 if (r)
2016 return r;
2017 }
2018 return tgsi_helper_copy(ctx, inst);
2019 }
2020
2021 static int tgsi_log(struct r600_shader_ctx *ctx)
2022 {
2023 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2024 struct r600_bc_alu alu;
2025 int r;
2026
2027 /* result.x = floor(log2(src)); */
2028 if (inst->Dst[0].Register.WriteMask & 1) {
2029 memset(&alu, 0, sizeof(struct r600_bc_alu));
2030
2031 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2032 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2033
2034 alu.dst.sel = ctx->temp_reg;
2035 alu.dst.chan = 0;
2036 alu.dst.write = 1;
2037 alu.last = 1;
2038 r = r600_bc_add_alu(ctx->bc, &alu);
2039 if (r)
2040 return r;
2041
2042 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2043 alu.src[0].sel = ctx->temp_reg;
2044 alu.src[0].chan = 0;
2045
2046 alu.dst.sel = ctx->temp_reg;
2047 alu.dst.chan = 0;
2048 alu.dst.write = 1;
2049 alu.last = 1;
2050
2051 r = r600_bc_add_alu(ctx->bc, &alu);
2052 if (r)
2053 return r;
2054 }
2055
2056 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2057 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2058 memset(&alu, 0, sizeof(struct r600_bc_alu));
2059
2060 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2061 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2062
2063 alu.dst.sel = ctx->temp_reg;
2064 alu.dst.chan = 1;
2065 alu.dst.write = 1;
2066 alu.last = 1;
2067
2068 r = r600_bc_add_alu(ctx->bc, &alu);
2069 if (r)
2070 return r;
2071
2072 memset(&alu, 0, sizeof(struct r600_bc_alu));
2073
2074 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2075 alu.src[0].sel = ctx->temp_reg;
2076 alu.src[0].chan = 1;
2077
2078 alu.dst.sel = ctx->temp_reg;
2079 alu.dst.chan = 1;
2080 alu.dst.write = 1;
2081 alu.last = 1;
2082
2083 r = r600_bc_add_alu(ctx->bc, &alu);
2084 if (r)
2085 return r;
2086
2087 memset(&alu, 0, sizeof(struct r600_bc_alu));
2088
2089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2090 alu.src[0].sel = ctx->temp_reg;
2091 alu.src[0].chan = 1;
2092
2093 alu.dst.sel = ctx->temp_reg;
2094 alu.dst.chan = 1;
2095 alu.dst.write = 1;
2096 alu.last = 1;
2097
2098 r = r600_bc_add_alu(ctx->bc, &alu);
2099 if (r)
2100 return r;
2101
2102 memset(&alu, 0, sizeof(struct r600_bc_alu));
2103
2104 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2105 alu.src[0].sel = ctx->temp_reg;
2106 alu.src[0].chan = 1;
2107
2108 alu.dst.sel = ctx->temp_reg;
2109 alu.dst.chan = 1;
2110 alu.dst.write = 1;
2111 alu.last = 1;
2112
2113 r = r600_bc_add_alu(ctx->bc, &alu);
2114 if (r)
2115 return r;
2116
2117 memset(&alu, 0, sizeof(struct r600_bc_alu));
2118
2119 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2120
2121 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2122
2123 alu.src[1].sel = ctx->temp_reg;
2124 alu.src[1].chan = 1;
2125
2126 alu.dst.sel = ctx->temp_reg;
2127 alu.dst.chan = 1;
2128 alu.dst.write = 1;
2129 alu.last = 1;
2130
2131 r = r600_bc_add_alu(ctx->bc, &alu);
2132 if (r)
2133 return r;
2134 }
2135
2136 /* result.z = log2(src);*/
2137 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2138 memset(&alu, 0, sizeof(struct r600_bc_alu));
2139
2140 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2141 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2142
2143 alu.dst.sel = ctx->temp_reg;
2144 alu.dst.write = 1;
2145 alu.dst.chan = 2;
2146 alu.last = 1;
2147
2148 r = r600_bc_add_alu(ctx->bc, &alu);
2149 if (r)
2150 return r;
2151 }
2152
2153 /* result.w = 1.0; */
2154 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2155 memset(&alu, 0, sizeof(struct r600_bc_alu));
2156
2157 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2158 alu.src[0].sel = V_SQ_ALU_SRC_1;
2159 alu.src[0].chan = 0;
2160
2161 alu.dst.sel = ctx->temp_reg;
2162 alu.dst.chan = 3;
2163 alu.dst.write = 1;
2164 alu.last = 1;
2165
2166 r = r600_bc_add_alu(ctx->bc, &alu);
2167 if (r)
2168 return r;
2169 }
2170
2171 return tgsi_helper_copy(ctx, inst);
2172 }
2173
2174 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2175 {
2176 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2177 struct r600_bc_alu alu;
2178 int r;
2179
2180 memset(&alu, 0, sizeof(struct r600_bc_alu));
2181
2182 switch (inst->Instruction.Opcode) {
2183 case TGSI_OPCODE_ARL:
2184 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2185 break;
2186 case TGSI_OPCODE_ARR:
2187 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2188 break;
2189 default:
2190 assert(0);
2191 return -1;
2192 }
2193
2194 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2195 alu.last = 1;
2196 alu.dst.sel = ctx->ar_reg;
2197 alu.dst.write = 1;
2198 r = r600_bc_add_alu(ctx->bc, &alu);
2199 if (r)
2200 return r;
2201
2202 /* TODO: Note that the MOVA can be avoided if we never use AR for
2203 * indexing non-CB registers in the current ALU clause. Similarly, we
2204 * need to load AR from ar_reg again if we started a new clause
2205 * between ARL and AR usage. The easy way to do that is to remove
2206 * the MOVA here, and load it for the first AR access after ar_reg
2207 * has been modified in each clause. */
2208 memset(&alu, 0, sizeof(struct r600_bc_alu));
2209 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2210 alu.src[0].sel = ctx->ar_reg;
2211 alu.src[0].chan = 0;
2212 alu.last = 1;
2213 r = r600_bc_add_alu(ctx->bc, &alu);
2214 if (r)
2215 return r;
2216 return 0;
2217 }
2218 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2219 {
2220 /* TODO from r600c, ar values don't persist between clauses */
2221 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2222 struct r600_bc_alu alu;
2223 int r;
2224
2225 switch (inst->Instruction.Opcode) {
2226 case TGSI_OPCODE_ARL:
2227 memset(&alu, 0, sizeof(alu));
2228 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2229 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2230 alu.dst.sel = ctx->ar_reg;
2231 alu.dst.write = 1;
2232 alu.last = 1;
2233
2234 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2235 return r;
2236
2237 memset(&alu, 0, sizeof(alu));
2238 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2239 alu.src[0].sel = ctx->ar_reg;
2240 alu.dst.sel = ctx->ar_reg;
2241 alu.dst.write = 1;
2242 alu.last = 1;
2243
2244 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2245 return r;
2246 break;
2247 case TGSI_OPCODE_ARR:
2248 memset(&alu, 0, sizeof(alu));
2249 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2250 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2251 alu.dst.sel = ctx->ar_reg;
2252 alu.dst.write = 1;
2253 alu.last = 1;
2254
2255 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2256 return r;
2257 break;
2258 default:
2259 assert(0);
2260 return -1;
2261 }
2262
2263 memset(&alu, 0, sizeof(alu));
2264 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2265 alu.src[0].sel = ctx->ar_reg;
2266 alu.last = 1;
2267
2268 r = r600_bc_add_alu(ctx->bc, &alu);
2269 if (r)
2270 return r;
2271 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2272 return 0;
2273 }
2274
2275 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2276 {
2277 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2278 struct r600_bc_alu alu;
2279 int i, r = 0;
2280
2281 for (i = 0; i < 4; i++) {
2282 memset(&alu, 0, sizeof(struct r600_bc_alu));
2283
2284 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2285 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2286
2287 if (i == 0 || i == 3) {
2288 alu.src[0].sel = V_SQ_ALU_SRC_1;
2289 } else {
2290 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2291 }
2292
2293 if (i == 0 || i == 2) {
2294 alu.src[1].sel = V_SQ_ALU_SRC_1;
2295 } else {
2296 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2297 }
2298 if (i == 3)
2299 alu.last = 1;
2300 r = r600_bc_add_alu(ctx->bc, &alu);
2301 if (r)
2302 return r;
2303 }
2304 return 0;
2305 }
2306
2307 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2308 {
2309 struct r600_bc_alu alu;
2310 int r;
2311
2312 memset(&alu, 0, sizeof(struct r600_bc_alu));
2313 alu.inst = opcode;
2314 alu.predicate = 1;
2315
2316 alu.dst.sel = ctx->temp_reg;
2317 alu.dst.write = 1;
2318 alu.dst.chan = 0;
2319
2320 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2321 alu.src[1].sel = V_SQ_ALU_SRC_0;
2322 alu.src[1].chan = 0;
2323
2324 alu.last = 1;
2325
2326 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2327 if (r)
2328 return r;
2329 return 0;
2330 }
2331
2332 static int pops(struct r600_shader_ctx *ctx, int pops)
2333 {
2334 int alu_pop = 3;
2335 if (ctx->bc->cf_last) {
2336 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2337 alu_pop = 0;
2338 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2339 alu_pop = 1;
2340 }
2341 alu_pop += pops;
2342 if (alu_pop == 1) {
2343 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2344 ctx->bc->force_add_cf = 1;
2345 } else if (alu_pop == 2) {
2346 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2347 ctx->bc->force_add_cf = 1;
2348 } else {
2349 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2350 ctx->bc->cf_last->pop_count = pops;
2351 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2352 }
2353 return 0;
2354 }
2355
2356 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2357 {
2358 switch(reason) {
2359 case FC_PUSH_VPM:
2360 ctx->bc->callstack[ctx->bc->call_sp].current--;
2361 break;
2362 case FC_PUSH_WQM:
2363 case FC_LOOP:
2364 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2365 break;
2366 case FC_REP:
2367 /* TOODO : for 16 vp asic should -= 2; */
2368 ctx->bc->callstack[ctx->bc->call_sp].current --;
2369 break;
2370 }
2371 }
2372
2373 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2374 {
2375 if (check_max_only) {
2376 int diff;
2377 switch (reason) {
2378 case FC_PUSH_VPM:
2379 diff = 1;
2380 break;
2381 case FC_PUSH_WQM:
2382 diff = 4;
2383 break;
2384 default:
2385 assert(0);
2386 diff = 0;
2387 }
2388 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2389 ctx->bc->callstack[ctx->bc->call_sp].max) {
2390 ctx->bc->callstack[ctx->bc->call_sp].max =
2391 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2392 }
2393 return;
2394 }
2395 switch (reason) {
2396 case FC_PUSH_VPM:
2397 ctx->bc->callstack[ctx->bc->call_sp].current++;
2398 break;
2399 case FC_PUSH_WQM:
2400 case FC_LOOP:
2401 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2402 break;
2403 case FC_REP:
2404 ctx->bc->callstack[ctx->bc->call_sp].current++;
2405 break;
2406 }
2407
2408 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2409 ctx->bc->callstack[ctx->bc->call_sp].max) {
2410 ctx->bc->callstack[ctx->bc->call_sp].max =
2411 ctx->bc->callstack[ctx->bc->call_sp].current;
2412 }
2413 }
2414
2415 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2416 {
2417 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2418
2419 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2420 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2421 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2422 sp->num_mid++;
2423 }
2424
2425 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2426 {
2427 ctx->bc->fc_sp++;
2428 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2429 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2430 }
2431
2432 static void fc_poplevel(struct r600_shader_ctx *ctx)
2433 {
2434 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2435 if (sp->mid) {
2436 free(sp->mid);
2437 sp->mid = NULL;
2438 }
2439 sp->num_mid = 0;
2440 sp->start = NULL;
2441 sp->type = 0;
2442 ctx->bc->fc_sp--;
2443 }
2444
2445 #if 0
2446 static int emit_return(struct r600_shader_ctx *ctx)
2447 {
2448 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2449 return 0;
2450 }
2451
2452 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2453 {
2454
2455 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2456 ctx->bc->cf_last->pop_count = pops;
2457 /* TODO work out offset */
2458 return 0;
2459 }
2460
2461 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2462 {
2463 return 0;
2464 }
2465
2466 static void emit_testflag(struct r600_shader_ctx *ctx)
2467 {
2468
2469 }
2470
2471 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2472 {
2473 emit_testflag(ctx);
2474 emit_jump_to_offset(ctx, 1, 4);
2475 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2476 pops(ctx, ifidx + 1);
2477 emit_return(ctx);
2478 }
2479
2480 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2481 {
2482 emit_testflag(ctx);
2483
2484 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2485 ctx->bc->cf_last->pop_count = 1;
2486
2487 fc_set_mid(ctx, fc_sp);
2488
2489 pops(ctx, 1);
2490 }
2491 #endif
2492
2493 static int tgsi_if(struct r600_shader_ctx *ctx)
2494 {
2495 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2496
2497 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2498
2499 fc_pushlevel(ctx, FC_IF);
2500
2501 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2502 return 0;
2503 }
2504
2505 static int tgsi_else(struct r600_shader_ctx *ctx)
2506 {
2507 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2508 ctx->bc->cf_last->pop_count = 1;
2509
2510 fc_set_mid(ctx, ctx->bc->fc_sp);
2511 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2512 return 0;
2513 }
2514
2515 static int tgsi_endif(struct r600_shader_ctx *ctx)
2516 {
2517 pops(ctx, 1);
2518 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2519 R600_ERR("if/endif unbalanced in shader\n");
2520 return -1;
2521 }
2522
2523 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2524 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2525 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2526 } else {
2527 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2528 }
2529 fc_poplevel(ctx);
2530
2531 callstack_decrease_current(ctx, FC_PUSH_VPM);
2532 return 0;
2533 }
2534
2535 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2536 {
2537 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2538
2539 fc_pushlevel(ctx, FC_LOOP);
2540
2541 /* check stack depth */
2542 callstack_check_depth(ctx, FC_LOOP, 0);
2543 return 0;
2544 }
2545
2546 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2547 {
2548 int i;
2549
2550 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2551
2552 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2553 R600_ERR("loop/endloop in shader code are not paired.\n");
2554 return -EINVAL;
2555 }
2556
2557 /* fixup loop pointers - from r600isa
2558 LOOP END points to CF after LOOP START,
2559 LOOP START point to CF after LOOP END
2560 BRK/CONT point to LOOP END CF
2561 */
2562 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2563
2564 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2565
2566 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2567 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2568 }
2569 /* TODO add LOOPRET support */
2570 fc_poplevel(ctx);
2571 callstack_decrease_current(ctx, FC_LOOP);
2572 return 0;
2573 }
2574
2575 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2576 {
2577 unsigned int fscp;
2578
2579 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2580 {
2581 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2582 break;
2583 }
2584
2585 if (fscp == 0) {
2586 R600_ERR("Break not inside loop/endloop pair\n");
2587 return -EINVAL;
2588 }
2589
2590 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2591 ctx->bc->cf_last->pop_count = 1;
2592
2593 fc_set_mid(ctx, fscp);
2594
2595 pops(ctx, 1);
2596 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2597 return 0;
2598 }
2599
2600 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2601 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2602 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2603 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2604
2605 /* FIXME:
2606 * For state trackers other than OpenGL, we'll want to use
2607 * _RECIP_IEEE instead.
2608 */
2609 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2610
2611 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2612 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2613 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2614 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2615 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2616 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2617 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2618 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2619 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2620 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2621 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2622 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2623 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2624 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2625 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2626 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2627 /* gap */
2628 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2629 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2630 /* gap */
2631 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2632 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2633 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2634 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2635 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2636 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2637 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2638 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2639 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2640 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2641 /* gap */
2642 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2643 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2644 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2645 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2646 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2647 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2648 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2649 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2650 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2651 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2652 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2653 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2654 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2656 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2657 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2658 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2659 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2660 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2661 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2662 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2663 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2664 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2665 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2666 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2667 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2668 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2669 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2670 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2671 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2672 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2673 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2674 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2675 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2676 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2677 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2678 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2679 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2682 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2683 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2684 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2685 /* gap */
2686 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2687 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2688 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2689 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2690 /* gap */
2691 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2693 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2694 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2696 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2697 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2698 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2699 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2700 /* gap */
2701 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2702 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2703 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2704 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2705 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2706 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2707 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2710 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2711 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2712 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2713 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2714 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2715 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2716 /* gap */
2717 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2718 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2719 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2722 /* gap */
2723 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2725 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2727 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2729 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2732 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2733 /* gap */
2734 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2738 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2739 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2741 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2743 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2745 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2751 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2757 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2758 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2759 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2760 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2761 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2762 };
2763
2764 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2765 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2766 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2767 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2768 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2769 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2770 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2771 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2772 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2773 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2774 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2775 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2776 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2777 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2778 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2779 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2780 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2781 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2782 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2783 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2784 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785 /* gap */
2786 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2788 /* gap */
2789 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2790 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2791 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2792 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2793 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2794 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2795 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2796 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2797 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2798 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2799 /* gap */
2800 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2801 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2802 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2803 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2804 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2805 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2806 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2807 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2808 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2809 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2812 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2814 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2816 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2817 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2818 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2819 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2820 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2821 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2823 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2826 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2828 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2830 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2831 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2833 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2834 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2835 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2836 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2837 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2840 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2841 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2842 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2843 /* gap */
2844 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2847 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2848 /* gap */
2849 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2857 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858 /* gap */
2859 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2868 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2871 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2873 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874 /* gap */
2875 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880 /* gap */
2881 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2890 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2891 /* gap */
2892 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 };