r600g: Don't negate result of ABS instruction
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
38 {
39 struct r600_pipe_state *rstate = &shader->rstate;
40 struct r600_shader *rshader = &shader->shader;
41 unsigned spi_vs_out_id[10];
42 unsigned i, tmp;
43
44 /* clear previous register */
45 rstate->nregs = 0;
46
47 /* so far never got proper semantic id from tgsi */
48 /* FIXME better to move this in config things so they get emited
49 * only one time per cs
50 */
51 for (i = 0; i < 10; i++) {
52 spi_vs_out_id[i] = 0;
53 }
54 for (i = 0; i < 32; i++) {
55 tmp = i << ((i & 3) * 8);
56 spi_vs_out_id[i / 4] |= tmp;
57 }
58 for (i = 0; i < 10; i++) {
59 r600_pipe_state_add_reg(rstate,
60 R_028614_SPI_VS_OUT_ID_0 + i * 4,
61 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
62 }
63
64 r600_pipe_state_add_reg(rstate,
65 R_0286C4_SPI_VS_OUT_CONFIG,
66 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
67 0xFFFFFFFF, NULL);
68 r600_pipe_state_add_reg(rstate,
69 R_028868_SQ_PGM_RESOURCES_VS,
70 S_028868_NUM_GPRS(rshader->bc.ngpr) |
71 S_028868_STACK_SIZE(rshader->bc.nstack),
72 0xFFFFFFFF, NULL);
73 r600_pipe_state_add_reg(rstate,
74 R_0288D0_SQ_PGM_CF_OFFSET_VS,
75 0x00000000, 0xFFFFFFFF, NULL);
76 r600_pipe_state_add_reg(rstate,
77 R_028858_SQ_PGM_START_VS,
78 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
79
80 r600_pipe_state_add_reg(rstate,
81 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
82 0xFFFFFFFF, NULL);
83
84 }
85
86 int r600_find_vs_semantic_index(struct r600_shader *vs,
87 struct r600_shader *ps, int id)
88 {
89 struct r600_shader_io *input = &ps->input[id];
90
91 for (int i = 0; i < vs->noutput; i++) {
92 if (input->name == vs->output[i].name &&
93 input->sid == vs->output[i].sid) {
94 return i - 1;
95 }
96 }
97 return 0;
98 }
99
100 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101 {
102 struct r600_pipe_state *rstate = &shader->rstate;
103 struct r600_shader *rshader = &shader->shader;
104 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
105 int pos_index = -1, face_index = -1;
106
107 rstate->nregs = 0;
108
109 for (i = 0; i < rshader->ninput; i++) {
110 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
111 pos_index = i;
112 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
113 face_index = i;
114 }
115
116 for (i = 0; i < rshader->noutput; i++) {
117 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
118 r600_pipe_state_add_reg(rstate,
119 R_02880C_DB_SHADER_CONTROL,
120 S_02880C_Z_EXPORT_ENABLE(1),
121 S_02880C_Z_EXPORT_ENABLE(1), NULL);
122 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
123 r600_pipe_state_add_reg(rstate,
124 R_02880C_DB_SHADER_CONTROL,
125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
126 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
127 }
128
129 exports_ps = 0;
130 num_cout = 0;
131 for (i = 0; i < rshader->noutput; i++) {
132 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
133 exports_ps |= 1;
134 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
135 num_cout++;
136 }
137 }
138 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
139 if (!exports_ps) {
140 /* always at least export 1 component per pixel */
141 exports_ps = 2;
142 }
143
144 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
145 S_0286CC_PERSP_GRADIENT_ENA(1);
146 spi_input_z = 0;
147 if (pos_index != -1) {
148 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
149 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
150 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
151 S_0286CC_BARYC_SAMPLE_CNTL(1));
152 spi_input_z |= 1;
153 }
154
155 spi_ps_in_control_1 = 0;
156 if (face_index != -1) {
157 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
158 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
159 }
160
161 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
162 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
163 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
164 r600_pipe_state_add_reg(rstate,
165 R_028840_SQ_PGM_START_PS,
166 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
167 r600_pipe_state_add_reg(rstate,
168 R_028850_SQ_PGM_RESOURCES_PS,
169 S_028868_NUM_GPRS(rshader->bc.ngpr) |
170 S_028868_STACK_SIZE(rshader->bc.nstack),
171 0xFFFFFFFF, NULL);
172 r600_pipe_state_add_reg(rstate,
173 R_028854_SQ_PGM_EXPORTS_PS,
174 exports_ps, 0xFFFFFFFF, NULL);
175 r600_pipe_state_add_reg(rstate,
176 R_0288CC_SQ_PGM_CF_OFFSET_PS,
177 0x00000000, 0xFFFFFFFF, NULL);
178
179 if (rshader->fs_write_all) {
180 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
181 S_028808_MULTIWRITE_ENABLE(1),
182 S_028808_MULTIWRITE_ENABLE(1),
183 NULL);
184 }
185
186 if (rshader->uses_kill) {
187 /* only set some bits here, the other bits are set in the dsa state */
188 r600_pipe_state_add_reg(rstate,
189 R_02880C_DB_SHADER_CONTROL,
190 S_02880C_KILL_ENABLE(1),
191 S_02880C_KILL_ENABLE(1), NULL);
192 }
193 r600_pipe_state_add_reg(rstate,
194 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
195 0xFFFFFFFF, NULL);
196 }
197
198 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
199 {
200 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
201 struct r600_shader *rshader = &shader->shader;
202 void *ptr;
203
204 /* copy new shader */
205 if (shader->bo == NULL) {
206 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
207 if (shader->bo == NULL) {
208 return -ENOMEM;
209 }
210 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
211 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
212 r600_bo_unmap(rctx->radeon, shader->bo);
213 }
214 /* build state */
215 switch (rshader->processor_type) {
216 case TGSI_PROCESSOR_VERTEX:
217 if (rshader->family >= CHIP_CEDAR) {
218 evergreen_pipe_shader_vs(ctx, shader);
219 } else {
220 r600_pipe_shader_vs(ctx, shader);
221 }
222 break;
223 case TGSI_PROCESSOR_FRAGMENT:
224 if (rshader->family >= CHIP_CEDAR) {
225 evergreen_pipe_shader_ps(ctx, shader);
226 } else {
227 r600_pipe_shader_ps(ctx, shader);
228 }
229 break;
230 default:
231 return -EINVAL;
232 }
233 return 0;
234 }
235
236 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
237
238 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
239 {
240 static int dump_shaders = -1;
241 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
242 int r;
243
244 /* Would like some magic "get_bool_option_once" routine.
245 */
246 if (dump_shaders == -1)
247 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
248
249 if (dump_shaders) {
250 fprintf(stderr, "--------------------------------------------------------------\n");
251 tgsi_dump(tokens, 0);
252 }
253 shader->shader.family = r600_get_family(rctx->radeon);
254 r = r600_shader_from_tgsi(tokens, &shader->shader);
255 if (r) {
256 R600_ERR("translation from TGSI failed !\n");
257 return r;
258 }
259 r = r600_bc_build(&shader->shader.bc);
260 if (r) {
261 R600_ERR("building bytecode failed !\n");
262 return r;
263 }
264 if (dump_shaders) {
265 r600_bc_dump(&shader->shader.bc);
266 fprintf(stderr, "______________________________________________________________\n");
267 }
268 return r600_pipe_shader(ctx, shader);
269 }
270
271 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272 {
273 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276 r600_bc_clear(&shader->shader.bc);
277 }
278
279 /*
280 * tgsi -> r600 shader
281 */
282 struct r600_shader_tgsi_instruction;
283
284 struct r600_shader_src {
285 unsigned sel;
286 unsigned swizzle[4];
287 unsigned neg;
288 unsigned abs;
289 unsigned rel;
290 uint32_t value[4];
291 };
292
293 struct r600_shader_ctx {
294 struct tgsi_shader_info info;
295 struct tgsi_parse_context parse;
296 const struct tgsi_token *tokens;
297 unsigned type;
298 unsigned file_offset[TGSI_FILE_COUNT];
299 unsigned temp_reg;
300 unsigned ar_reg;
301 struct r600_shader_tgsi_instruction *inst_info;
302 struct r600_bc *bc;
303 struct r600_shader *shader;
304 struct r600_shader_src src[3];
305 u32 *literals;
306 u32 nliterals;
307 u32 max_driver_temp_used;
308 /* needed for evergreen interpolation */
309 boolean input_centroid;
310 boolean input_linear;
311 boolean input_perspective;
312 int num_interp_gpr;
313 };
314
315 struct r600_shader_tgsi_instruction {
316 unsigned tgsi_opcode;
317 unsigned is_op3;
318 unsigned r600_opcode;
319 int (*process)(struct r600_shader_ctx *ctx);
320 };
321
322 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
323 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
324
325 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
326 {
327 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
328 int j;
329
330 if (i->Instruction.NumDstRegs > 1) {
331 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
332 return -EINVAL;
333 }
334 if (i->Instruction.Predicate) {
335 R600_ERR("predicate unsupported\n");
336 return -EINVAL;
337 }
338 #if 0
339 if (i->Instruction.Label) {
340 R600_ERR("label unsupported\n");
341 return -EINVAL;
342 }
343 #endif
344 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
345 if (i->Src[j].Register.Dimension) {
346 R600_ERR("unsupported src %d (dimension %d)\n", j,
347 i->Src[j].Register.Dimension);
348 return -EINVAL;
349 }
350 }
351 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
352 if (i->Dst[j].Register.Dimension) {
353 R600_ERR("unsupported dst (dimension)\n");
354 return -EINVAL;
355 }
356 }
357 return 0;
358 }
359
360 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
361 {
362 int i, r;
363 struct r600_bc_alu alu;
364 int gpr = 0, base_chan = 0;
365 int ij_index = 0;
366
367 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
368 ij_index = 0;
369 if (ctx->shader->input[input].centroid)
370 ij_index++;
371 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
372 ij_index = 0;
373 /* if we have perspective add one */
374 if (ctx->input_perspective) {
375 ij_index++;
376 /* if we have perspective centroid */
377 if (ctx->input_centroid)
378 ij_index++;
379 }
380 if (ctx->shader->input[input].centroid)
381 ij_index++;
382 }
383
384 /* work out gpr and base_chan from index */
385 gpr = ij_index / 2;
386 base_chan = (2 * (ij_index % 2)) + 1;
387
388 for (i = 0; i < 8; i++) {
389 memset(&alu, 0, sizeof(struct r600_bc_alu));
390
391 if (i < 4)
392 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
393 else
394 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
395
396 if ((i > 1) && (i < 6)) {
397 alu.dst.sel = ctx->shader->input[input].gpr;
398 alu.dst.write = 1;
399 }
400
401 alu.dst.chan = i % 4;
402
403 alu.src[0].sel = gpr;
404 alu.src[0].chan = (base_chan - (i % 2));
405
406 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
407
408 alu.bank_swizzle_force = SQ_ALU_VEC_210;
409 if ((i % 4) == 3)
410 alu.last = 1;
411 r = r600_bc_add_alu(ctx->bc, &alu);
412 if (r)
413 return r;
414 }
415 return 0;
416 }
417
418
419 static int tgsi_declaration(struct r600_shader_ctx *ctx)
420 {
421 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
422 unsigned i;
423
424 switch (d->Declaration.File) {
425 case TGSI_FILE_INPUT:
426 i = ctx->shader->ninput++;
427 ctx->shader->input[i].name = d->Semantic.Name;
428 ctx->shader->input[i].sid = d->Semantic.Index;
429 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
430 ctx->shader->input[i].centroid = d->Declaration.Centroid;
431 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
432 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
433 /* turn input into interpolate on EG */
434 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
435 if (ctx->shader->input[i].interpolate > 0) {
436 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
437 evergreen_interp_alu(ctx, i);
438 }
439 }
440 }
441 break;
442 case TGSI_FILE_OUTPUT:
443 i = ctx->shader->noutput++;
444 ctx->shader->output[i].name = d->Semantic.Name;
445 ctx->shader->output[i].sid = d->Semantic.Index;
446 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
447 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
448 break;
449 case TGSI_FILE_CONSTANT:
450 case TGSI_FILE_TEMPORARY:
451 case TGSI_FILE_SAMPLER:
452 case TGSI_FILE_ADDRESS:
453 break;
454 default:
455 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
456 return -EINVAL;
457 }
458 return 0;
459 }
460
461 static int r600_get_temp(struct r600_shader_ctx *ctx)
462 {
463 return ctx->temp_reg + ctx->max_driver_temp_used++;
464 }
465
466 /*
467 * for evergreen we need to scan the shader to find the number of GPRs we need to
468 * reserve for interpolation.
469 *
470 * we need to know if we are going to emit
471 * any centroid inputs
472 * if perspective and linear are required
473 */
474 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
475 {
476 int i;
477 int num_baryc;
478
479 ctx->input_linear = FALSE;
480 ctx->input_perspective = FALSE;
481 ctx->input_centroid = FALSE;
482 ctx->num_interp_gpr = 1;
483
484 /* any centroid inputs */
485 for (i = 0; i < ctx->info.num_inputs; i++) {
486 /* skip position/face */
487 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
488 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
489 continue;
490 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
491 ctx->input_linear = TRUE;
492 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
493 ctx->input_perspective = TRUE;
494 if (ctx->info.input_centroid[i])
495 ctx->input_centroid = TRUE;
496 }
497
498 num_baryc = 0;
499 /* ignoring sample for now */
500 if (ctx->input_perspective)
501 num_baryc++;
502 if (ctx->input_linear)
503 num_baryc++;
504 if (ctx->input_centroid)
505 num_baryc *= 2;
506
507 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
508
509 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
510 return ctx->num_interp_gpr;
511 }
512
513 static void tgsi_src(struct r600_shader_ctx *ctx,
514 const struct tgsi_full_src_register *tgsi_src,
515 struct r600_shader_src *r600_src)
516 {
517 memset(r600_src, 0, sizeof(*r600_src));
518 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
519 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
520 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
521 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
522 r600_src->neg = tgsi_src->Register.Negate;
523 r600_src->abs = tgsi_src->Register.Absolute;
524 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
525 int index;
526 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
527 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
528 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
529
530 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
531 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
532 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
533 return;
534 }
535 index = tgsi_src->Register.Index;
536 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
537 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
538 } else {
539 if (tgsi_src->Register.Indirect)
540 r600_src->rel = V_SQ_REL_RELATIVE;
541 r600_src->sel = tgsi_src->Register.Index;
542 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
543 }
544 }
545
546 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
547 {
548 struct r600_bc_vtx vtx;
549 unsigned int ar_reg;
550 int r;
551
552 if (offset) {
553 struct r600_bc_alu alu;
554
555 memset(&alu, 0, sizeof(alu));
556
557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
558 alu.src[0].sel = ctx->ar_reg;
559
560 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
561 alu.src[1].value = offset;
562
563 alu.dst.sel = dst_reg;
564 alu.dst.write = 1;
565 alu.last = 1;
566
567 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
568 return r;
569
570 ar_reg = dst_reg;
571 } else {
572 ar_reg = ctx->ar_reg;
573 }
574
575 memset(&vtx, 0, sizeof(vtx));
576 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
577 vtx.src_gpr = ar_reg;
578 vtx.mega_fetch_count = 16;
579 vtx.dst_gpr = dst_reg;
580 vtx.dst_sel_x = 0; /* SEL_X */
581 vtx.dst_sel_y = 1; /* SEL_Y */
582 vtx.dst_sel_z = 2; /* SEL_Z */
583 vtx.dst_sel_w = 3; /* SEL_W */
584 vtx.data_format = FMT_32_32_32_32_FLOAT;
585 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
586 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
587 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
588
589 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
590 return r;
591
592 return 0;
593 }
594
595 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
596 {
597 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
598 struct r600_bc_alu alu;
599 int i, j, k, nconst, r;
600
601 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
602 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
603 nconst++;
604 }
605 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
606 }
607 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
608 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
609 continue;
610 }
611
612 if (ctx->src[i].rel) {
613 int treg = r600_get_temp(ctx);
614 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
615 return r;
616
617 ctx->src[i].sel = treg;
618 ctx->src[i].rel = 0;
619 j--;
620 } else if (j > 0) {
621 int treg = r600_get_temp(ctx);
622 for (k = 0; k < 4; k++) {
623 memset(&alu, 0, sizeof(struct r600_bc_alu));
624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
625 alu.src[0].sel = ctx->src[i].sel;
626 alu.src[0].chan = k;
627 alu.src[0].rel = ctx->src[i].rel;
628 alu.dst.sel = treg;
629 alu.dst.chan = k;
630 alu.dst.write = 1;
631 if (k == 3)
632 alu.last = 1;
633 r = r600_bc_add_alu(ctx->bc, &alu);
634 if (r)
635 return r;
636 }
637 ctx->src[i].sel = treg;
638 ctx->src[i].rel =0;
639 j--;
640 }
641 }
642 return 0;
643 }
644
645 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
646 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
647 {
648 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
649 struct r600_bc_alu alu;
650 int i, j, k, nliteral, r;
651
652 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
653 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
654 nliteral++;
655 }
656 }
657 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
658 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
659 int treg = r600_get_temp(ctx);
660 for (k = 0; k < 4; k++) {
661 memset(&alu, 0, sizeof(struct r600_bc_alu));
662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
663 alu.src[0].sel = ctx->src[i].sel;
664 alu.src[0].chan = k;
665 alu.src[0].value = ctx->src[i].value[k];
666 alu.dst.sel = treg;
667 alu.dst.chan = k;
668 alu.dst.write = 1;
669 if (k == 3)
670 alu.last = 1;
671 r = r600_bc_add_alu(ctx->bc, &alu);
672 if (r)
673 return r;
674 }
675 ctx->src[i].sel = treg;
676 j--;
677 }
678 }
679 return 0;
680 }
681
682 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
683 {
684 struct tgsi_full_immediate *immediate;
685 struct tgsi_full_property *property;
686 struct r600_shader_ctx ctx;
687 struct r600_bc_output output[32];
688 unsigned output_done, noutput;
689 unsigned opcode;
690 int i, r = 0, pos0;
691
692 ctx.bc = &shader->bc;
693 ctx.shader = shader;
694 r = r600_bc_init(ctx.bc, shader->family);
695 if (r)
696 return r;
697 ctx.tokens = tokens;
698 tgsi_scan_shader(tokens, &ctx.info);
699 tgsi_parse_init(&ctx.parse, tokens);
700 ctx.type = ctx.parse.FullHeader.Processor.Processor;
701 shader->processor_type = ctx.type;
702 ctx.bc->type = shader->processor_type;
703
704 /* register allocations */
705 /* Values [0,127] correspond to GPR[0..127].
706 * Values [128,159] correspond to constant buffer bank 0
707 * Values [160,191] correspond to constant buffer bank 1
708 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
709 * Values [256,287] correspond to constant buffer bank 2 (EG)
710 * Values [288,319] correspond to constant buffer bank 3 (EG)
711 * Other special values are shown in the list below.
712 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
713 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
714 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
715 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
716 * 248 SQ_ALU_SRC_0: special constant 0.0.
717 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
718 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
719 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
720 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
721 * 253 SQ_ALU_SRC_LITERAL: literal constant.
722 * 254 SQ_ALU_SRC_PV: previous vector result.
723 * 255 SQ_ALU_SRC_PS: previous scalar result.
724 */
725 for (i = 0; i < TGSI_FILE_COUNT; i++) {
726 ctx.file_offset[i] = 0;
727 }
728 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
729 ctx.file_offset[TGSI_FILE_INPUT] = 1;
730 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
731 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
732 } else {
733 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
734 }
735 }
736 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
737 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
738 }
739 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
740 ctx.info.file_count[TGSI_FILE_INPUT];
741 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
742 ctx.info.file_count[TGSI_FILE_OUTPUT];
743
744 /* Outside the GPR range. This will be translated to one of the
745 * kcache banks later. */
746 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
747
748 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
749 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
750 ctx.info.file_count[TGSI_FILE_TEMPORARY];
751 ctx.temp_reg = ctx.ar_reg + 1;
752
753 ctx.nliterals = 0;
754 ctx.literals = NULL;
755 shader->fs_write_all = FALSE;
756 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
757 tgsi_parse_token(&ctx.parse);
758 switch (ctx.parse.FullToken.Token.Type) {
759 case TGSI_TOKEN_TYPE_IMMEDIATE:
760 immediate = &ctx.parse.FullToken.FullImmediate;
761 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
762 if(ctx.literals == NULL) {
763 r = -ENOMEM;
764 goto out_err;
765 }
766 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
767 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
768 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
769 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
770 ctx.nliterals++;
771 break;
772 case TGSI_TOKEN_TYPE_DECLARATION:
773 r = tgsi_declaration(&ctx);
774 if (r)
775 goto out_err;
776 break;
777 case TGSI_TOKEN_TYPE_INSTRUCTION:
778 r = tgsi_is_supported(&ctx);
779 if (r)
780 goto out_err;
781 ctx.max_driver_temp_used = 0;
782 /* reserve first tmp for everyone */
783 r600_get_temp(&ctx);
784
785 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
786 if ((r = tgsi_split_constant(&ctx)))
787 goto out_err;
788 if ((r = tgsi_split_literal_constant(&ctx)))
789 goto out_err;
790 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
791 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
792 else
793 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
794 r = ctx.inst_info->process(&ctx);
795 if (r)
796 goto out_err;
797 break;
798 case TGSI_TOKEN_TYPE_PROPERTY:
799 property = &ctx.parse.FullToken.FullProperty;
800 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
801 if (property->u[0].Data == 1)
802 shader->fs_write_all = TRUE;
803 }
804 break;
805 default:
806 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
807 r = -EINVAL;
808 goto out_err;
809 }
810 }
811 /* export output */
812 noutput = shader->noutput;
813 for (i = 0, pos0 = 0; i < noutput; i++) {
814 memset(&output[i], 0, sizeof(struct r600_bc_output));
815 output[i].gpr = shader->output[i].gpr;
816 output[i].elem_size = 3;
817 output[i].swizzle_x = 0;
818 output[i].swizzle_y = 1;
819 output[i].swizzle_z = 2;
820 output[i].swizzle_w = 3;
821 output[i].burst_count = 1;
822 output[i].barrier = 1;
823 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
824 output[i].array_base = i - pos0;
825 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
826 switch (ctx.type) {
827 case TGSI_PROCESSOR_VERTEX:
828 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
829 output[i].array_base = 60;
830 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
831 /* position doesn't count in array_base */
832 pos0++;
833 }
834 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
835 output[i].array_base = 61;
836 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
837 /* position doesn't count in array_base */
838 pos0++;
839 }
840 break;
841 case TGSI_PROCESSOR_FRAGMENT:
842 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
843 output[i].array_base = shader->output[i].sid;
844 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
845 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
846 output[i].array_base = 61;
847 output[i].swizzle_x = 2;
848 output[i].swizzle_y = 7;
849 output[i].swizzle_z = output[i].swizzle_w = 7;
850 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
851 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
852 output[i].array_base = 61;
853 output[i].swizzle_x = 7;
854 output[i].swizzle_y = 1;
855 output[i].swizzle_z = output[i].swizzle_w = 7;
856 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
857 } else {
858 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
859 r = -EINVAL;
860 goto out_err;
861 }
862 break;
863 default:
864 R600_ERR("unsupported processor type %d\n", ctx.type);
865 r = -EINVAL;
866 goto out_err;
867 }
868 }
869 /* add fake param output for vertex shader if no param is exported */
870 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
871 for (i = 0, pos0 = 0; i < noutput; i++) {
872 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
873 pos0 = 1;
874 break;
875 }
876 }
877 if (!pos0) {
878 memset(&output[i], 0, sizeof(struct r600_bc_output));
879 output[i].gpr = 0;
880 output[i].elem_size = 3;
881 output[i].swizzle_x = 0;
882 output[i].swizzle_y = 1;
883 output[i].swizzle_z = 2;
884 output[i].swizzle_w = 3;
885 output[i].burst_count = 1;
886 output[i].barrier = 1;
887 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
888 output[i].array_base = 0;
889 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
890 noutput++;
891 }
892 }
893 /* add fake pixel export */
894 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
895 memset(&output[0], 0, sizeof(struct r600_bc_output));
896 output[0].gpr = 0;
897 output[0].elem_size = 3;
898 output[0].swizzle_x = 7;
899 output[0].swizzle_y = 7;
900 output[0].swizzle_z = 7;
901 output[0].swizzle_w = 7;
902 output[0].burst_count = 1;
903 output[0].barrier = 1;
904 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
905 output[0].array_base = 0;
906 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
907 noutput++;
908 }
909 /* set export done on last export of each type */
910 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
911 if (i == (noutput - 1)) {
912 output[i].end_of_program = 1;
913 }
914 if (!(output_done & (1 << output[i].type))) {
915 output_done |= (1 << output[i].type);
916 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
917 }
918 }
919 /* add output to bytecode */
920 for (i = 0; i < noutput; i++) {
921 r = r600_bc_add_output(ctx.bc, &output[i]);
922 if (r)
923 goto out_err;
924 }
925 free(ctx.literals);
926 tgsi_parse_free(&ctx.parse);
927 return 0;
928 out_err:
929 free(ctx.literals);
930 tgsi_parse_free(&ctx.parse);
931 return r;
932 }
933
934 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
935 {
936 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
937 return -EINVAL;
938 }
939
940 static int tgsi_end(struct r600_shader_ctx *ctx)
941 {
942 return 0;
943 }
944
945 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
946 const struct r600_shader_src *shader_src,
947 unsigned chan)
948 {
949 bc_src->sel = shader_src->sel;
950 bc_src->chan = shader_src->swizzle[chan];
951 bc_src->neg = shader_src->neg;
952 bc_src->abs = shader_src->abs;
953 bc_src->rel = shader_src->rel;
954 bc_src->value = shader_src->value[bc_src->chan];
955 }
956
957 static void tgsi_dst(struct r600_shader_ctx *ctx,
958 const struct tgsi_full_dst_register *tgsi_dst,
959 unsigned swizzle,
960 struct r600_bc_alu_dst *r600_dst)
961 {
962 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
963
964 r600_dst->sel = tgsi_dst->Register.Index;
965 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
966 r600_dst->chan = swizzle;
967 r600_dst->write = 1;
968 if (tgsi_dst->Register.Indirect)
969 r600_dst->rel = V_SQ_REL_RELATIVE;
970 if (inst->Instruction.Saturate) {
971 r600_dst->clamp = 1;
972 }
973 }
974
975 static int tgsi_last_instruction(unsigned writemask)
976 {
977 int i, lasti = 0;
978
979 for (i = 0; i < 4; i++) {
980 if (writemask & (1 << i)) {
981 lasti = i;
982 }
983 }
984 return lasti;
985 }
986
987 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
988 {
989 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
990 struct r600_bc_alu alu;
991 int i, j, r;
992 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
993
994 for (i = 0; i < lasti + 1; i++) {
995 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
996 continue;
997
998 memset(&alu, 0, sizeof(struct r600_bc_alu));
999 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1000
1001 alu.inst = ctx->inst_info->r600_opcode;
1002 if (!swap) {
1003 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1004 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1005 }
1006 } else {
1007 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1008 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1009 }
1010 /* handle some special cases */
1011 switch (ctx->inst_info->tgsi_opcode) {
1012 case TGSI_OPCODE_SUB:
1013 alu.src[1].neg = 1;
1014 break;
1015 case TGSI_OPCODE_ABS:
1016 alu.src[0].abs = 1;
1017 /* negation is performed after absolute value is taken */
1018 alu.src[0].neg = 0;
1019 break;
1020 default:
1021 break;
1022 }
1023 if (i == lasti) {
1024 alu.last = 1;
1025 }
1026 r = r600_bc_add_alu(ctx->bc, &alu);
1027 if (r)
1028 return r;
1029 }
1030 return 0;
1031 }
1032
1033 static int tgsi_op2(struct r600_shader_ctx *ctx)
1034 {
1035 return tgsi_op2_s(ctx, 0);
1036 }
1037
1038 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1039 {
1040 return tgsi_op2_s(ctx, 1);
1041 }
1042
1043 /*
1044 * r600 - trunc to -PI..PI range
1045 * r700 - normalize by dividing by 2PI
1046 * see fdo bug 27901
1047 */
1048 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1049 {
1050 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1051 static float double_pi = 3.1415926535 * 2;
1052 static float neg_pi = -3.1415926535;
1053
1054 int r;
1055 struct r600_bc_alu alu;
1056
1057 memset(&alu, 0, sizeof(struct r600_bc_alu));
1058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1059 alu.is_op3 = 1;
1060
1061 alu.dst.chan = 0;
1062 alu.dst.sel = ctx->temp_reg;
1063 alu.dst.write = 1;
1064
1065 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1066
1067 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1068 alu.src[1].chan = 0;
1069 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1070 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1071 alu.src[2].chan = 0;
1072 alu.last = 1;
1073 r = r600_bc_add_alu(ctx->bc, &alu);
1074 if (r)
1075 return r;
1076
1077 memset(&alu, 0, sizeof(struct r600_bc_alu));
1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1079
1080 alu.dst.chan = 0;
1081 alu.dst.sel = ctx->temp_reg;
1082 alu.dst.write = 1;
1083
1084 alu.src[0].sel = ctx->temp_reg;
1085 alu.src[0].chan = 0;
1086 alu.last = 1;
1087 r = r600_bc_add_alu(ctx->bc, &alu);
1088 if (r)
1089 return r;
1090
1091 memset(&alu, 0, sizeof(struct r600_bc_alu));
1092 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1093 alu.is_op3 = 1;
1094
1095 alu.dst.chan = 0;
1096 alu.dst.sel = ctx->temp_reg;
1097 alu.dst.write = 1;
1098
1099 alu.src[0].sel = ctx->temp_reg;
1100 alu.src[0].chan = 0;
1101
1102 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1103 alu.src[1].chan = 0;
1104 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1105 alu.src[2].chan = 0;
1106
1107 if (ctx->bc->chiprev == CHIPREV_R600) {
1108 alu.src[1].value = *(uint32_t *)&double_pi;
1109 alu.src[2].value = *(uint32_t *)&neg_pi;
1110 } else {
1111 alu.src[1].sel = V_SQ_ALU_SRC_1;
1112 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1113 alu.src[2].neg = 1;
1114 }
1115
1116 alu.last = 1;
1117 r = r600_bc_add_alu(ctx->bc, &alu);
1118 if (r)
1119 return r;
1120 return 0;
1121 }
1122
1123 static int tgsi_trig(struct r600_shader_ctx *ctx)
1124 {
1125 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1126 struct r600_bc_alu alu;
1127 int i, r;
1128 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1129
1130 r = tgsi_setup_trig(ctx);
1131 if (r)
1132 return r;
1133
1134 memset(&alu, 0, sizeof(struct r600_bc_alu));
1135 alu.inst = ctx->inst_info->r600_opcode;
1136 alu.dst.chan = 0;
1137 alu.dst.sel = ctx->temp_reg;
1138 alu.dst.write = 1;
1139
1140 alu.src[0].sel = ctx->temp_reg;
1141 alu.src[0].chan = 0;
1142 alu.last = 1;
1143 r = r600_bc_add_alu(ctx->bc, &alu);
1144 if (r)
1145 return r;
1146
1147 /* replicate result */
1148 for (i = 0; i < lasti + 1; i++) {
1149 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1150 continue;
1151
1152 memset(&alu, 0, sizeof(struct r600_bc_alu));
1153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1154
1155 alu.src[0].sel = ctx->temp_reg;
1156 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1157 if (i == lasti)
1158 alu.last = 1;
1159 r = r600_bc_add_alu(ctx->bc, &alu);
1160 if (r)
1161 return r;
1162 }
1163 return 0;
1164 }
1165
1166 static int tgsi_scs(struct r600_shader_ctx *ctx)
1167 {
1168 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1169 struct r600_bc_alu alu;
1170 int r;
1171
1172 /* We'll only need the trig stuff if we are going to write to the
1173 * X or Y components of the destination vector.
1174 */
1175 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1176 r = tgsi_setup_trig(ctx);
1177 if (r)
1178 return r;
1179 }
1180
1181 /* dst.x = COS */
1182 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1183 memset(&alu, 0, sizeof(struct r600_bc_alu));
1184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1185 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1186
1187 alu.src[0].sel = ctx->temp_reg;
1188 alu.src[0].chan = 0;
1189 alu.last = 1;
1190 r = r600_bc_add_alu(ctx->bc, &alu);
1191 if (r)
1192 return r;
1193 }
1194
1195 /* dst.y = SIN */
1196 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1197 memset(&alu, 0, sizeof(struct r600_bc_alu));
1198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1199 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1200
1201 alu.src[0].sel = ctx->temp_reg;
1202 alu.src[0].chan = 0;
1203 alu.last = 1;
1204 r = r600_bc_add_alu(ctx->bc, &alu);
1205 if (r)
1206 return r;
1207 }
1208
1209 /* dst.z = 0.0; */
1210 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1211 memset(&alu, 0, sizeof(struct r600_bc_alu));
1212
1213 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1214
1215 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1216
1217 alu.src[0].sel = V_SQ_ALU_SRC_0;
1218 alu.src[0].chan = 0;
1219
1220 alu.last = 1;
1221
1222 r = r600_bc_add_alu(ctx->bc, &alu);
1223 if (r)
1224 return r;
1225 }
1226
1227 /* dst.w = 1.0; */
1228 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1229 memset(&alu, 0, sizeof(struct r600_bc_alu));
1230
1231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1232
1233 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1234
1235 alu.src[0].sel = V_SQ_ALU_SRC_1;
1236 alu.src[0].chan = 0;
1237
1238 alu.last = 1;
1239
1240 r = r600_bc_add_alu(ctx->bc, &alu);
1241 if (r)
1242 return r;
1243 }
1244
1245 return 0;
1246 }
1247
1248 static int tgsi_kill(struct r600_shader_ctx *ctx)
1249 {
1250 struct r600_bc_alu alu;
1251 int i, r;
1252
1253 for (i = 0; i < 4; i++) {
1254 memset(&alu, 0, sizeof(struct r600_bc_alu));
1255 alu.inst = ctx->inst_info->r600_opcode;
1256
1257 alu.dst.chan = i;
1258
1259 alu.src[0].sel = V_SQ_ALU_SRC_0;
1260
1261 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1262 alu.src[1].sel = V_SQ_ALU_SRC_1;
1263 alu.src[1].neg = 1;
1264 } else {
1265 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1266 }
1267 if (i == 3) {
1268 alu.last = 1;
1269 }
1270 r = r600_bc_add_alu(ctx->bc, &alu);
1271 if (r)
1272 return r;
1273 }
1274
1275 /* kill must be last in ALU */
1276 ctx->bc->force_add_cf = 1;
1277 ctx->shader->uses_kill = TRUE;
1278 return 0;
1279 }
1280
1281 static int tgsi_lit(struct r600_shader_ctx *ctx)
1282 {
1283 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1284 struct r600_bc_alu alu;
1285 int r;
1286
1287 /* dst.x, <- 1.0 */
1288 memset(&alu, 0, sizeof(struct r600_bc_alu));
1289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1290 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1291 alu.src[0].chan = 0;
1292 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1293 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1294 r = r600_bc_add_alu(ctx->bc, &alu);
1295 if (r)
1296 return r;
1297
1298 /* dst.y = max(src.x, 0.0) */
1299 memset(&alu, 0, sizeof(struct r600_bc_alu));
1300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1301 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1302 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1303 alu.src[1].chan = 0;
1304 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1305 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1306 r = r600_bc_add_alu(ctx->bc, &alu);
1307 if (r)
1308 return r;
1309
1310 /* dst.w, <- 1.0 */
1311 memset(&alu, 0, sizeof(struct r600_bc_alu));
1312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1313 alu.src[0].sel = V_SQ_ALU_SRC_1;
1314 alu.src[0].chan = 0;
1315 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1316 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1317 alu.last = 1;
1318 r = r600_bc_add_alu(ctx->bc, &alu);
1319 if (r)
1320 return r;
1321
1322 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1323 {
1324 int chan;
1325 int sel;
1326
1327 /* dst.z = log(src.y) */
1328 memset(&alu, 0, sizeof(struct r600_bc_alu));
1329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1330 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1331 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1332 alu.last = 1;
1333 r = r600_bc_add_alu(ctx->bc, &alu);
1334 if (r)
1335 return r;
1336
1337 chan = alu.dst.chan;
1338 sel = alu.dst.sel;
1339
1340 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1341 memset(&alu, 0, sizeof(struct r600_bc_alu));
1342 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1343 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1344 alu.src[1].sel = sel;
1345 alu.src[1].chan = chan;
1346
1347 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1348 alu.dst.sel = ctx->temp_reg;
1349 alu.dst.chan = 0;
1350 alu.dst.write = 1;
1351 alu.is_op3 = 1;
1352 alu.last = 1;
1353 r = r600_bc_add_alu(ctx->bc, &alu);
1354 if (r)
1355 return r;
1356
1357 /* dst.z = exp(tmp.x) */
1358 memset(&alu, 0, sizeof(struct r600_bc_alu));
1359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1360 alu.src[0].sel = ctx->temp_reg;
1361 alu.src[0].chan = 0;
1362 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1363 alu.last = 1;
1364 r = r600_bc_add_alu(ctx->bc, &alu);
1365 if (r)
1366 return r;
1367 }
1368 return 0;
1369 }
1370
1371 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1372 {
1373 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1374 struct r600_bc_alu alu;
1375 int i, r;
1376
1377 memset(&alu, 0, sizeof(struct r600_bc_alu));
1378
1379 /* FIXME:
1380 * For state trackers other than OpenGL, we'll want to use
1381 * _RECIPSQRT_IEEE instead.
1382 */
1383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1384
1385 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1386 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1387 alu.src[i].abs = 1;
1388 }
1389 alu.dst.sel = ctx->temp_reg;
1390 alu.dst.write = 1;
1391 alu.last = 1;
1392 r = r600_bc_add_alu(ctx->bc, &alu);
1393 if (r)
1394 return r;
1395 /* replicate result */
1396 return tgsi_helper_tempx_replicate(ctx);
1397 }
1398
1399 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1400 {
1401 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1402 struct r600_bc_alu alu;
1403 int i, r;
1404
1405 for (i = 0; i < 4; i++) {
1406 memset(&alu, 0, sizeof(struct r600_bc_alu));
1407 alu.src[0].sel = ctx->temp_reg;
1408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1409 alu.dst.chan = i;
1410 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1411 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1412 if (i == 3)
1413 alu.last = 1;
1414 r = r600_bc_add_alu(ctx->bc, &alu);
1415 if (r)
1416 return r;
1417 }
1418 return 0;
1419 }
1420
1421 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1422 {
1423 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1424 struct r600_bc_alu alu;
1425 int i, r;
1426
1427 memset(&alu, 0, sizeof(struct r600_bc_alu));
1428 alu.inst = ctx->inst_info->r600_opcode;
1429 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1430 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1431 }
1432 alu.dst.sel = ctx->temp_reg;
1433 alu.dst.write = 1;
1434 alu.last = 1;
1435 r = r600_bc_add_alu(ctx->bc, &alu);
1436 if (r)
1437 return r;
1438 /* replicate result */
1439 return tgsi_helper_tempx_replicate(ctx);
1440 }
1441
1442 static int tgsi_pow(struct r600_shader_ctx *ctx)
1443 {
1444 struct r600_bc_alu alu;
1445 int r;
1446
1447 /* LOG2(a) */
1448 memset(&alu, 0, sizeof(struct r600_bc_alu));
1449 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1450 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1451 alu.dst.sel = ctx->temp_reg;
1452 alu.dst.write = 1;
1453 alu.last = 1;
1454 r = r600_bc_add_alu(ctx->bc, &alu);
1455 if (r)
1456 return r;
1457 /* b * LOG2(a) */
1458 memset(&alu, 0, sizeof(struct r600_bc_alu));
1459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1460 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1461 alu.src[1].sel = ctx->temp_reg;
1462 alu.dst.sel = ctx->temp_reg;
1463 alu.dst.write = 1;
1464 alu.last = 1;
1465 r = r600_bc_add_alu(ctx->bc, &alu);
1466 if (r)
1467 return r;
1468 /* POW(a,b) = EXP2(b * LOG2(a))*/
1469 memset(&alu, 0, sizeof(struct r600_bc_alu));
1470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1471 alu.src[0].sel = ctx->temp_reg;
1472 alu.dst.sel = ctx->temp_reg;
1473 alu.dst.write = 1;
1474 alu.last = 1;
1475 r = r600_bc_add_alu(ctx->bc, &alu);
1476 if (r)
1477 return r;
1478 return tgsi_helper_tempx_replicate(ctx);
1479 }
1480
1481 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1482 {
1483 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1484 struct r600_bc_alu alu;
1485 int i, r;
1486
1487 /* tmp = (src > 0 ? 1 : src) */
1488 for (i = 0; i < 4; i++) {
1489 memset(&alu, 0, sizeof(struct r600_bc_alu));
1490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1491 alu.is_op3 = 1;
1492
1493 alu.dst.sel = ctx->temp_reg;
1494 alu.dst.chan = i;
1495
1496 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1497 alu.src[1].sel = V_SQ_ALU_SRC_1;
1498 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1499
1500 if (i == 3)
1501 alu.last = 1;
1502 r = r600_bc_add_alu(ctx->bc, &alu);
1503 if (r)
1504 return r;
1505 }
1506
1507 /* dst = (-tmp > 0 ? -1 : tmp) */
1508 for (i = 0; i < 4; i++) {
1509 memset(&alu, 0, sizeof(struct r600_bc_alu));
1510 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1511 alu.is_op3 = 1;
1512 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1513
1514 alu.src[0].sel = ctx->temp_reg;
1515 alu.src[0].chan = i;
1516 alu.src[0].neg = 1;
1517
1518 alu.src[1].sel = V_SQ_ALU_SRC_1;
1519 alu.src[1].neg = 1;
1520
1521 alu.src[2].sel = ctx->temp_reg;
1522 alu.src[2].chan = i;
1523
1524 if (i == 3)
1525 alu.last = 1;
1526 r = r600_bc_add_alu(ctx->bc, &alu);
1527 if (r)
1528 return r;
1529 }
1530 return 0;
1531 }
1532
1533 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1534 {
1535 struct r600_bc_alu alu;
1536 int i, r;
1537
1538 for (i = 0; i < 4; i++) {
1539 memset(&alu, 0, sizeof(struct r600_bc_alu));
1540 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1541 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1542 alu.dst.chan = i;
1543 } else {
1544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1546 alu.src[0].sel = ctx->temp_reg;
1547 alu.src[0].chan = i;
1548 }
1549 if (i == 3) {
1550 alu.last = 1;
1551 }
1552 r = r600_bc_add_alu(ctx->bc, &alu);
1553 if (r)
1554 return r;
1555 }
1556 return 0;
1557 }
1558
1559 static int tgsi_op3(struct r600_shader_ctx *ctx)
1560 {
1561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1562 struct r600_bc_alu alu;
1563 int i, j, r;
1564 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1565
1566 for (i = 0; i < lasti + 1; i++) {
1567 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1568 continue;
1569
1570 memset(&alu, 0, sizeof(struct r600_bc_alu));
1571 alu.inst = ctx->inst_info->r600_opcode;
1572 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1573 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1574 }
1575
1576 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1577 alu.dst.chan = i;
1578 alu.dst.write = 1;
1579 alu.is_op3 = 1;
1580 if (i == lasti) {
1581 alu.last = 1;
1582 }
1583 r = r600_bc_add_alu(ctx->bc, &alu);
1584 if (r)
1585 return r;
1586 }
1587 return 0;
1588 }
1589
1590 static int tgsi_dp(struct r600_shader_ctx *ctx)
1591 {
1592 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1593 struct r600_bc_alu alu;
1594 int i, j, r;
1595
1596 for (i = 0; i < 4; i++) {
1597 memset(&alu, 0, sizeof(struct r600_bc_alu));
1598 alu.inst = ctx->inst_info->r600_opcode;
1599 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1600 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1601 }
1602
1603 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1604 alu.dst.chan = i;
1605 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1606 /* handle some special cases */
1607 switch (ctx->inst_info->tgsi_opcode) {
1608 case TGSI_OPCODE_DP2:
1609 if (i > 1) {
1610 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1611 alu.src[0].chan = alu.src[1].chan = 0;
1612 }
1613 break;
1614 case TGSI_OPCODE_DP3:
1615 if (i > 2) {
1616 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1617 alu.src[0].chan = alu.src[1].chan = 0;
1618 }
1619 break;
1620 case TGSI_OPCODE_DPH:
1621 if (i == 3) {
1622 alu.src[0].sel = V_SQ_ALU_SRC_1;
1623 alu.src[0].chan = 0;
1624 alu.src[0].neg = 0;
1625 }
1626 break;
1627 default:
1628 break;
1629 }
1630 if (i == 3) {
1631 alu.last = 1;
1632 }
1633 r = r600_bc_add_alu(ctx->bc, &alu);
1634 if (r)
1635 return r;
1636 }
1637 return 0;
1638 }
1639
1640 static int tgsi_tex(struct r600_shader_ctx *ctx)
1641 {
1642 static float one_point_five = 1.5f;
1643 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1644 struct r600_bc_tex tex;
1645 struct r600_bc_alu alu;
1646 unsigned src_gpr;
1647 int r, i;
1648 int opcode;
1649 boolean src_not_temp =
1650 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1651 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1652
1653 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1654
1655 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1656 /* Add perspective divide */
1657 memset(&alu, 0, sizeof(struct r600_bc_alu));
1658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1659 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1660
1661 alu.dst.sel = ctx->temp_reg;
1662 alu.dst.chan = 3;
1663 alu.last = 1;
1664 alu.dst.write = 1;
1665 r = r600_bc_add_alu(ctx->bc, &alu);
1666 if (r)
1667 return r;
1668
1669 for (i = 0; i < 3; i++) {
1670 memset(&alu, 0, sizeof(struct r600_bc_alu));
1671 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1672 alu.src[0].sel = ctx->temp_reg;
1673 alu.src[0].chan = 3;
1674 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1675 alu.dst.sel = ctx->temp_reg;
1676 alu.dst.chan = i;
1677 alu.dst.write = 1;
1678 r = r600_bc_add_alu(ctx->bc, &alu);
1679 if (r)
1680 return r;
1681 }
1682 memset(&alu, 0, sizeof(struct r600_bc_alu));
1683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1684 alu.src[0].sel = V_SQ_ALU_SRC_1;
1685 alu.src[0].chan = 0;
1686 alu.dst.sel = ctx->temp_reg;
1687 alu.dst.chan = 3;
1688 alu.last = 1;
1689 alu.dst.write = 1;
1690 r = r600_bc_add_alu(ctx->bc, &alu);
1691 if (r)
1692 return r;
1693 src_not_temp = FALSE;
1694 src_gpr = ctx->temp_reg;
1695 }
1696
1697 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1698 int src_chan, src2_chan;
1699
1700 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1701 for (i = 0; i < 4; i++) {
1702 memset(&alu, 0, sizeof(struct r600_bc_alu));
1703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1704 switch (i) {
1705 case 0:
1706 src_chan = 2;
1707 src2_chan = 1;
1708 break;
1709 case 1:
1710 src_chan = 2;
1711 src2_chan = 0;
1712 break;
1713 case 2:
1714 src_chan = 0;
1715 src2_chan = 2;
1716 break;
1717 case 3:
1718 src_chan = 1;
1719 src2_chan = 2;
1720 break;
1721 default:
1722 assert(0);
1723 src_chan = 0;
1724 src2_chan = 0;
1725 break;
1726 }
1727 r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
1728 r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
1729 alu.dst.sel = ctx->temp_reg;
1730 alu.dst.chan = i;
1731 if (i == 3)
1732 alu.last = 1;
1733 alu.dst.write = 1;
1734 r = r600_bc_add_alu(ctx->bc, &alu);
1735 if (r)
1736 return r;
1737 }
1738
1739 /* tmp1.z = RCP_e(|tmp1.z|) */
1740 memset(&alu, 0, sizeof(struct r600_bc_alu));
1741 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1742 alu.src[0].sel = ctx->temp_reg;
1743 alu.src[0].chan = 2;
1744 alu.src[0].abs = 1;
1745 alu.dst.sel = ctx->temp_reg;
1746 alu.dst.chan = 2;
1747 alu.dst.write = 1;
1748 alu.last = 1;
1749 r = r600_bc_add_alu(ctx->bc, &alu);
1750 if (r)
1751 return r;
1752
1753 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1754 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1755 * muladd has no writemask, have to use another temp
1756 */
1757 memset(&alu, 0, sizeof(struct r600_bc_alu));
1758 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1759 alu.is_op3 = 1;
1760
1761 alu.src[0].sel = ctx->temp_reg;
1762 alu.src[0].chan = 0;
1763 alu.src[1].sel = ctx->temp_reg;
1764 alu.src[1].chan = 2;
1765
1766 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1767 alu.src[2].chan = 0;
1768 alu.src[2].value = *(uint32_t *)&one_point_five;
1769
1770 alu.dst.sel = ctx->temp_reg;
1771 alu.dst.chan = 0;
1772 alu.dst.write = 1;
1773
1774 r = r600_bc_add_alu(ctx->bc, &alu);
1775 if (r)
1776 return r;
1777
1778 memset(&alu, 0, sizeof(struct r600_bc_alu));
1779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1780 alu.is_op3 = 1;
1781
1782 alu.src[0].sel = ctx->temp_reg;
1783 alu.src[0].chan = 1;
1784 alu.src[1].sel = ctx->temp_reg;
1785 alu.src[1].chan = 2;
1786
1787 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1788 alu.src[2].chan = 0;
1789 alu.src[2].value = *(uint32_t *)&one_point_five;
1790
1791 alu.dst.sel = ctx->temp_reg;
1792 alu.dst.chan = 1;
1793 alu.dst.write = 1;
1794
1795 alu.last = 1;
1796 r = r600_bc_add_alu(ctx->bc, &alu);
1797 if (r)
1798 return r;
1799
1800 src_not_temp = FALSE;
1801 src_gpr = ctx->temp_reg;
1802 }
1803
1804 if (src_not_temp) {
1805 for (i = 0; i < 4; i++) {
1806 memset(&alu, 0, sizeof(struct r600_bc_alu));
1807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1808 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1809 alu.dst.sel = ctx->temp_reg;
1810 alu.dst.chan = i;
1811 if (i == 3)
1812 alu.last = 1;
1813 alu.dst.write = 1;
1814 r = r600_bc_add_alu(ctx->bc, &alu);
1815 if (r)
1816 return r;
1817 }
1818 src_gpr = ctx->temp_reg;
1819 }
1820
1821 opcode = ctx->inst_info->r600_opcode;
1822 if (opcode == SQ_TEX_INST_SAMPLE &&
1823 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1824 opcode = SQ_TEX_INST_SAMPLE_C;
1825
1826 memset(&tex, 0, sizeof(struct r600_bc_tex));
1827 tex.inst = opcode;
1828 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1829 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1830 tex.src_gpr = src_gpr;
1831 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1832 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1833 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1834 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1835 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1836 tex.src_sel_x = 0;
1837 tex.src_sel_y = 1;
1838 tex.src_sel_z = 2;
1839 tex.src_sel_w = 3;
1840
1841 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1842 tex.src_sel_x = 1;
1843 tex.src_sel_y = 0;
1844 tex.src_sel_z = 3;
1845 tex.src_sel_w = 1;
1846 }
1847
1848 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1849 tex.coord_type_x = 1;
1850 tex.coord_type_y = 1;
1851 tex.coord_type_z = 1;
1852 tex.coord_type_w = 1;
1853 }
1854
1855 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1856 tex.coord_type_z = 0;
1857 tex.src_sel_z = 1;
1858 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1859 tex.coord_type_z = 0;
1860
1861 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1862 tex.src_sel_w = 2;
1863
1864 r = r600_bc_add_tex(ctx->bc, &tex);
1865 if (r)
1866 return r;
1867
1868 /* add shadow ambient support - gallium doesn't do it yet */
1869 return 0;
1870 }
1871
1872 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1873 {
1874 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1875 struct r600_bc_alu alu;
1876 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1877 unsigned i;
1878 int r;
1879
1880 /* optimize if it's just an equal balance */
1881 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1882 for (i = 0; i < lasti + 1; i++) {
1883 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1884 continue;
1885
1886 memset(&alu, 0, sizeof(struct r600_bc_alu));
1887 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1888 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1889 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1890 alu.omod = 3;
1891 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1892 alu.dst.chan = i;
1893 if (i == lasti) {
1894 alu.last = 1;
1895 }
1896 r = r600_bc_add_alu(ctx->bc, &alu);
1897 if (r)
1898 return r;
1899 }
1900 return 0;
1901 }
1902
1903 /* 1 - src0 */
1904 for (i = 0; i < lasti + 1; i++) {
1905 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1906 continue;
1907
1908 memset(&alu, 0, sizeof(struct r600_bc_alu));
1909 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1910 alu.src[0].sel = V_SQ_ALU_SRC_1;
1911 alu.src[0].chan = 0;
1912 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1913 alu.src[1].neg = 1;
1914 alu.dst.sel = ctx->temp_reg;
1915 alu.dst.chan = i;
1916 if (i == lasti) {
1917 alu.last = 1;
1918 }
1919 alu.dst.write = 1;
1920 r = r600_bc_add_alu(ctx->bc, &alu);
1921 if (r)
1922 return r;
1923 }
1924
1925 /* (1 - src0) * src2 */
1926 for (i = 0; i < lasti + 1; i++) {
1927 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1928 continue;
1929
1930 memset(&alu, 0, sizeof(struct r600_bc_alu));
1931 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1932 alu.src[0].sel = ctx->temp_reg;
1933 alu.src[0].chan = i;
1934 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1935 alu.dst.sel = ctx->temp_reg;
1936 alu.dst.chan = i;
1937 if (i == lasti) {
1938 alu.last = 1;
1939 }
1940 alu.dst.write = 1;
1941 r = r600_bc_add_alu(ctx->bc, &alu);
1942 if (r)
1943 return r;
1944 }
1945
1946 /* src0 * src1 + (1 - src0) * src2 */
1947 for (i = 0; i < lasti + 1; i++) {
1948 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1949 continue;
1950
1951 memset(&alu, 0, sizeof(struct r600_bc_alu));
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1953 alu.is_op3 = 1;
1954 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1955 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1956 alu.src[2].sel = ctx->temp_reg;
1957 alu.src[2].chan = i;
1958
1959 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1960 alu.dst.chan = i;
1961 if (i == lasti) {
1962 alu.last = 1;
1963 }
1964 r = r600_bc_add_alu(ctx->bc, &alu);
1965 if (r)
1966 return r;
1967 }
1968 return 0;
1969 }
1970
1971 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1972 {
1973 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1974 struct r600_bc_alu alu;
1975 int i, r;
1976 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1977
1978 for (i = 0; i < lasti + 1; i++) {
1979 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1980 continue;
1981
1982 memset(&alu, 0, sizeof(struct r600_bc_alu));
1983 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1984 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1985 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1986 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1987 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1988 alu.dst.chan = i;
1989 alu.dst.write = 1;
1990 alu.is_op3 = 1;
1991 if (i == lasti)
1992 alu.last = 1;
1993 r = r600_bc_add_alu(ctx->bc, &alu);
1994 if (r)
1995 return r;
1996 }
1997 return 0;
1998 }
1999
2000 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2001 {
2002 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2003 struct r600_bc_alu alu;
2004 uint32_t use_temp = 0;
2005 int i, r;
2006
2007 if (inst->Dst[0].Register.WriteMask != 0xf)
2008 use_temp = 1;
2009
2010 for (i = 0; i < 4; i++) {
2011 memset(&alu, 0, sizeof(struct r600_bc_alu));
2012 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2013
2014 switch (i) {
2015 case 0:
2016 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2017 break;
2018 case 1:
2019 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2020 break;
2021 case 2:
2022 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2023 break;
2024 case 3:
2025 alu.src[0].sel = V_SQ_ALU_SRC_0;
2026 alu.src[0].chan = i;
2027 }
2028
2029 switch (i) {
2030 case 0:
2031 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2032 break;
2033 case 1:
2034 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2035 break;
2036 case 2:
2037 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2038 break;
2039 case 3:
2040 alu.src[1].sel = V_SQ_ALU_SRC_0;
2041 alu.src[1].chan = i;
2042 }
2043
2044 alu.dst.sel = ctx->temp_reg;
2045 alu.dst.chan = i;
2046 alu.dst.write = 1;
2047
2048 if (i == 3)
2049 alu.last = 1;
2050 r = r600_bc_add_alu(ctx->bc, &alu);
2051 if (r)
2052 return r;
2053 }
2054
2055 for (i = 0; i < 4; i++) {
2056 memset(&alu, 0, sizeof(struct r600_bc_alu));
2057 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2058
2059 switch (i) {
2060 case 0:
2061 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2062 break;
2063 case 1:
2064 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2065 break;
2066 case 2:
2067 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2068 break;
2069 case 3:
2070 alu.src[0].sel = V_SQ_ALU_SRC_0;
2071 alu.src[0].chan = i;
2072 }
2073
2074 switch (i) {
2075 case 0:
2076 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2077 break;
2078 case 1:
2079 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2080 break;
2081 case 2:
2082 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2083 break;
2084 case 3:
2085 alu.src[1].sel = V_SQ_ALU_SRC_0;
2086 alu.src[1].chan = i;
2087 }
2088
2089 alu.src[2].sel = ctx->temp_reg;
2090 alu.src[2].neg = 1;
2091 alu.src[2].chan = i;
2092
2093 if (use_temp)
2094 alu.dst.sel = ctx->temp_reg;
2095 else
2096 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2097 alu.dst.chan = i;
2098 alu.dst.write = 1;
2099 alu.is_op3 = 1;
2100 if (i == 3)
2101 alu.last = 1;
2102 r = r600_bc_add_alu(ctx->bc, &alu);
2103 if (r)
2104 return r;
2105 }
2106 if (use_temp)
2107 return tgsi_helper_copy(ctx, inst);
2108 return 0;
2109 }
2110
2111 static int tgsi_exp(struct r600_shader_ctx *ctx)
2112 {
2113 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2114 struct r600_bc_alu alu;
2115 int r;
2116
2117 /* result.x = 2^floor(src); */
2118 if (inst->Dst[0].Register.WriteMask & 1) {
2119 memset(&alu, 0, sizeof(struct r600_bc_alu));
2120
2121 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2122 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2123
2124 alu.dst.sel = ctx->temp_reg;
2125 alu.dst.chan = 0;
2126 alu.dst.write = 1;
2127 alu.last = 1;
2128 r = r600_bc_add_alu(ctx->bc, &alu);
2129 if (r)
2130 return r;
2131
2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2133 alu.src[0].sel = ctx->temp_reg;
2134 alu.src[0].chan = 0;
2135
2136 alu.dst.sel = ctx->temp_reg;
2137 alu.dst.chan = 0;
2138 alu.dst.write = 1;
2139 alu.last = 1;
2140 r = r600_bc_add_alu(ctx->bc, &alu);
2141 if (r)
2142 return r;
2143 }
2144
2145 /* result.y = tmp - floor(tmp); */
2146 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2147 memset(&alu, 0, sizeof(struct r600_bc_alu));
2148
2149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2150 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2151
2152 alu.dst.sel = ctx->temp_reg;
2153 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2154 // if (r)
2155 // return r;
2156 alu.dst.write = 1;
2157 alu.dst.chan = 1;
2158
2159 alu.last = 1;
2160
2161 r = r600_bc_add_alu(ctx->bc, &alu);
2162 if (r)
2163 return r;
2164 }
2165
2166 /* result.z = RoughApprox2ToX(tmp);*/
2167 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2168 memset(&alu, 0, sizeof(struct r600_bc_alu));
2169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2170 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2171
2172 alu.dst.sel = ctx->temp_reg;
2173 alu.dst.write = 1;
2174 alu.dst.chan = 2;
2175
2176 alu.last = 1;
2177
2178 r = r600_bc_add_alu(ctx->bc, &alu);
2179 if (r)
2180 return r;
2181 }
2182
2183 /* result.w = 1.0;*/
2184 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2185 memset(&alu, 0, sizeof(struct r600_bc_alu));
2186
2187 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2188 alu.src[0].sel = V_SQ_ALU_SRC_1;
2189 alu.src[0].chan = 0;
2190
2191 alu.dst.sel = ctx->temp_reg;
2192 alu.dst.chan = 3;
2193 alu.dst.write = 1;
2194 alu.last = 1;
2195 r = r600_bc_add_alu(ctx->bc, &alu);
2196 if (r)
2197 return r;
2198 }
2199 return tgsi_helper_copy(ctx, inst);
2200 }
2201
2202 static int tgsi_log(struct r600_shader_ctx *ctx)
2203 {
2204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2205 struct r600_bc_alu alu;
2206 int r;
2207
2208 /* result.x = floor(log2(src)); */
2209 if (inst->Dst[0].Register.WriteMask & 1) {
2210 memset(&alu, 0, sizeof(struct r600_bc_alu));
2211
2212 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2213 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2214
2215 alu.dst.sel = ctx->temp_reg;
2216 alu.dst.chan = 0;
2217 alu.dst.write = 1;
2218 alu.last = 1;
2219 r = r600_bc_add_alu(ctx->bc, &alu);
2220 if (r)
2221 return r;
2222
2223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2224 alu.src[0].sel = ctx->temp_reg;
2225 alu.src[0].chan = 0;
2226
2227 alu.dst.sel = ctx->temp_reg;
2228 alu.dst.chan = 0;
2229 alu.dst.write = 1;
2230 alu.last = 1;
2231
2232 r = r600_bc_add_alu(ctx->bc, &alu);
2233 if (r)
2234 return r;
2235 }
2236
2237 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2238 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2239 memset(&alu, 0, sizeof(struct r600_bc_alu));
2240
2241 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2242 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2243
2244 alu.dst.sel = ctx->temp_reg;
2245 alu.dst.chan = 1;
2246 alu.dst.write = 1;
2247 alu.last = 1;
2248
2249 r = r600_bc_add_alu(ctx->bc, &alu);
2250 if (r)
2251 return r;
2252
2253 memset(&alu, 0, sizeof(struct r600_bc_alu));
2254
2255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2256 alu.src[0].sel = ctx->temp_reg;
2257 alu.src[0].chan = 1;
2258
2259 alu.dst.sel = ctx->temp_reg;
2260 alu.dst.chan = 1;
2261 alu.dst.write = 1;
2262 alu.last = 1;
2263
2264 r = r600_bc_add_alu(ctx->bc, &alu);
2265 if (r)
2266 return r;
2267
2268 memset(&alu, 0, sizeof(struct r600_bc_alu));
2269
2270 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2271 alu.src[0].sel = ctx->temp_reg;
2272 alu.src[0].chan = 1;
2273
2274 alu.dst.sel = ctx->temp_reg;
2275 alu.dst.chan = 1;
2276 alu.dst.write = 1;
2277 alu.last = 1;
2278
2279 r = r600_bc_add_alu(ctx->bc, &alu);
2280 if (r)
2281 return r;
2282
2283 memset(&alu, 0, sizeof(struct r600_bc_alu));
2284
2285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2286 alu.src[0].sel = ctx->temp_reg;
2287 alu.src[0].chan = 1;
2288
2289 alu.dst.sel = ctx->temp_reg;
2290 alu.dst.chan = 1;
2291 alu.dst.write = 1;
2292 alu.last = 1;
2293
2294 r = r600_bc_add_alu(ctx->bc, &alu);
2295 if (r)
2296 return r;
2297
2298 memset(&alu, 0, sizeof(struct r600_bc_alu));
2299
2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2301
2302 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2303
2304 alu.src[1].sel = ctx->temp_reg;
2305 alu.src[1].chan = 1;
2306
2307 alu.dst.sel = ctx->temp_reg;
2308 alu.dst.chan = 1;
2309 alu.dst.write = 1;
2310 alu.last = 1;
2311
2312 r = r600_bc_add_alu(ctx->bc, &alu);
2313 if (r)
2314 return r;
2315 }
2316
2317 /* result.z = log2(src);*/
2318 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2319 memset(&alu, 0, sizeof(struct r600_bc_alu));
2320
2321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2322 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2323
2324 alu.dst.sel = ctx->temp_reg;
2325 alu.dst.write = 1;
2326 alu.dst.chan = 2;
2327 alu.last = 1;
2328
2329 r = r600_bc_add_alu(ctx->bc, &alu);
2330 if (r)
2331 return r;
2332 }
2333
2334 /* result.w = 1.0; */
2335 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2336 memset(&alu, 0, sizeof(struct r600_bc_alu));
2337
2338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2339 alu.src[0].sel = V_SQ_ALU_SRC_1;
2340 alu.src[0].chan = 0;
2341
2342 alu.dst.sel = ctx->temp_reg;
2343 alu.dst.chan = 3;
2344 alu.dst.write = 1;
2345 alu.last = 1;
2346
2347 r = r600_bc_add_alu(ctx->bc, &alu);
2348 if (r)
2349 return r;
2350 }
2351
2352 return tgsi_helper_copy(ctx, inst);
2353 }
2354
2355 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2356 {
2357 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2358 struct r600_bc_alu alu;
2359 int r;
2360
2361 memset(&alu, 0, sizeof(struct r600_bc_alu));
2362
2363 switch (inst->Instruction.Opcode) {
2364 case TGSI_OPCODE_ARL:
2365 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2366 break;
2367 case TGSI_OPCODE_ARR:
2368 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2369 break;
2370 default:
2371 assert(0);
2372 return -1;
2373 }
2374
2375 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2376 alu.last = 1;
2377 alu.dst.sel = ctx->ar_reg;
2378 alu.dst.write = 1;
2379 r = r600_bc_add_alu(ctx->bc, &alu);
2380 if (r)
2381 return r;
2382
2383 /* TODO: Note that the MOVA can be avoided if we never use AR for
2384 * indexing non-CB registers in the current ALU clause. Similarly, we
2385 * need to load AR from ar_reg again if we started a new clause
2386 * between ARL and AR usage. The easy way to do that is to remove
2387 * the MOVA here, and load it for the first AR access after ar_reg
2388 * has been modified in each clause. */
2389 memset(&alu, 0, sizeof(struct r600_bc_alu));
2390 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2391 alu.src[0].sel = ctx->ar_reg;
2392 alu.src[0].chan = 0;
2393 alu.last = 1;
2394 r = r600_bc_add_alu(ctx->bc, &alu);
2395 if (r)
2396 return r;
2397 return 0;
2398 }
2399 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2400 {
2401 /* TODO from r600c, ar values don't persist between clauses */
2402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2403 struct r600_bc_alu alu;
2404 int r;
2405
2406 switch (inst->Instruction.Opcode) {
2407 case TGSI_OPCODE_ARL:
2408 memset(&alu, 0, sizeof(alu));
2409 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2410 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2411 alu.dst.sel = ctx->ar_reg;
2412 alu.dst.write = 1;
2413 alu.last = 1;
2414
2415 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2416 return r;
2417
2418 memset(&alu, 0, sizeof(alu));
2419 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2420 alu.src[0].sel = ctx->ar_reg;
2421 alu.dst.sel = ctx->ar_reg;
2422 alu.dst.write = 1;
2423 alu.last = 1;
2424
2425 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2426 return r;
2427 break;
2428 case TGSI_OPCODE_ARR:
2429 memset(&alu, 0, sizeof(alu));
2430 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2431 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2432 alu.dst.sel = ctx->ar_reg;
2433 alu.dst.write = 1;
2434 alu.last = 1;
2435
2436 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2437 return r;
2438 break;
2439 default:
2440 assert(0);
2441 return -1;
2442 }
2443
2444 memset(&alu, 0, sizeof(alu));
2445 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2446 alu.src[0].sel = ctx->ar_reg;
2447 alu.last = 1;
2448
2449 r = r600_bc_add_alu(ctx->bc, &alu);
2450 if (r)
2451 return r;
2452 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2453 return 0;
2454 }
2455
2456 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2457 {
2458 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2459 struct r600_bc_alu alu;
2460 int i, r = 0;
2461
2462 for (i = 0; i < 4; i++) {
2463 memset(&alu, 0, sizeof(struct r600_bc_alu));
2464
2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2466 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2467
2468 if (i == 0 || i == 3) {
2469 alu.src[0].sel = V_SQ_ALU_SRC_1;
2470 } else {
2471 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2472 }
2473
2474 if (i == 0 || i == 2) {
2475 alu.src[1].sel = V_SQ_ALU_SRC_1;
2476 } else {
2477 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2478 }
2479 if (i == 3)
2480 alu.last = 1;
2481 r = r600_bc_add_alu(ctx->bc, &alu);
2482 if (r)
2483 return r;
2484 }
2485 return 0;
2486 }
2487
2488 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2489 {
2490 struct r600_bc_alu alu;
2491 int r;
2492
2493 memset(&alu, 0, sizeof(struct r600_bc_alu));
2494 alu.inst = opcode;
2495 alu.predicate = 1;
2496
2497 alu.dst.sel = ctx->temp_reg;
2498 alu.dst.write = 1;
2499 alu.dst.chan = 0;
2500
2501 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2502 alu.src[1].sel = V_SQ_ALU_SRC_0;
2503 alu.src[1].chan = 0;
2504
2505 alu.last = 1;
2506
2507 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2508 if (r)
2509 return r;
2510 return 0;
2511 }
2512
2513 static int pops(struct r600_shader_ctx *ctx, int pops)
2514 {
2515 int alu_pop = 3;
2516 if (ctx->bc->cf_last) {
2517 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2518 alu_pop = 0;
2519 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2520 alu_pop = 1;
2521 }
2522 alu_pop += pops;
2523 if (alu_pop == 1) {
2524 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2525 ctx->bc->force_add_cf = 1;
2526 } else if (alu_pop == 2) {
2527 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2528 ctx->bc->force_add_cf = 1;
2529 } else {
2530 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2531 ctx->bc->cf_last->pop_count = pops;
2532 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2533 }
2534 return 0;
2535 }
2536
2537 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2538 {
2539 switch(reason) {
2540 case FC_PUSH_VPM:
2541 ctx->bc->callstack[ctx->bc->call_sp].current--;
2542 break;
2543 case FC_PUSH_WQM:
2544 case FC_LOOP:
2545 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2546 break;
2547 case FC_REP:
2548 /* TOODO : for 16 vp asic should -= 2; */
2549 ctx->bc->callstack[ctx->bc->call_sp].current --;
2550 break;
2551 }
2552 }
2553
2554 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2555 {
2556 if (check_max_only) {
2557 int diff;
2558 switch (reason) {
2559 case FC_PUSH_VPM:
2560 diff = 1;
2561 break;
2562 case FC_PUSH_WQM:
2563 diff = 4;
2564 break;
2565 default:
2566 assert(0);
2567 diff = 0;
2568 }
2569 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2570 ctx->bc->callstack[ctx->bc->call_sp].max) {
2571 ctx->bc->callstack[ctx->bc->call_sp].max =
2572 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2573 }
2574 return;
2575 }
2576 switch (reason) {
2577 case FC_PUSH_VPM:
2578 ctx->bc->callstack[ctx->bc->call_sp].current++;
2579 break;
2580 case FC_PUSH_WQM:
2581 case FC_LOOP:
2582 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2583 break;
2584 case FC_REP:
2585 ctx->bc->callstack[ctx->bc->call_sp].current++;
2586 break;
2587 }
2588
2589 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2590 ctx->bc->callstack[ctx->bc->call_sp].max) {
2591 ctx->bc->callstack[ctx->bc->call_sp].max =
2592 ctx->bc->callstack[ctx->bc->call_sp].current;
2593 }
2594 }
2595
2596 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2597 {
2598 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2599
2600 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2601 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2602 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2603 sp->num_mid++;
2604 }
2605
2606 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2607 {
2608 ctx->bc->fc_sp++;
2609 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2610 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2611 }
2612
2613 static void fc_poplevel(struct r600_shader_ctx *ctx)
2614 {
2615 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2616 if (sp->mid) {
2617 free(sp->mid);
2618 sp->mid = NULL;
2619 }
2620 sp->num_mid = 0;
2621 sp->start = NULL;
2622 sp->type = 0;
2623 ctx->bc->fc_sp--;
2624 }
2625
2626 #if 0
2627 static int emit_return(struct r600_shader_ctx *ctx)
2628 {
2629 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2630 return 0;
2631 }
2632
2633 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2634 {
2635
2636 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2637 ctx->bc->cf_last->pop_count = pops;
2638 /* TODO work out offset */
2639 return 0;
2640 }
2641
2642 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2643 {
2644 return 0;
2645 }
2646
2647 static void emit_testflag(struct r600_shader_ctx *ctx)
2648 {
2649
2650 }
2651
2652 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2653 {
2654 emit_testflag(ctx);
2655 emit_jump_to_offset(ctx, 1, 4);
2656 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2657 pops(ctx, ifidx + 1);
2658 emit_return(ctx);
2659 }
2660
2661 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2662 {
2663 emit_testflag(ctx);
2664
2665 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2666 ctx->bc->cf_last->pop_count = 1;
2667
2668 fc_set_mid(ctx, fc_sp);
2669
2670 pops(ctx, 1);
2671 }
2672 #endif
2673
2674 static int tgsi_if(struct r600_shader_ctx *ctx)
2675 {
2676 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2677
2678 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2679
2680 fc_pushlevel(ctx, FC_IF);
2681
2682 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2683 return 0;
2684 }
2685
2686 static int tgsi_else(struct r600_shader_ctx *ctx)
2687 {
2688 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2689 ctx->bc->cf_last->pop_count = 1;
2690
2691 fc_set_mid(ctx, ctx->bc->fc_sp);
2692 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2693 return 0;
2694 }
2695
2696 static int tgsi_endif(struct r600_shader_ctx *ctx)
2697 {
2698 pops(ctx, 1);
2699 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2700 R600_ERR("if/endif unbalanced in shader\n");
2701 return -1;
2702 }
2703
2704 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2705 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2706 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2707 } else {
2708 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2709 }
2710 fc_poplevel(ctx);
2711
2712 callstack_decrease_current(ctx, FC_PUSH_VPM);
2713 return 0;
2714 }
2715
2716 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2717 {
2718 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2719
2720 fc_pushlevel(ctx, FC_LOOP);
2721
2722 /* check stack depth */
2723 callstack_check_depth(ctx, FC_LOOP, 0);
2724 return 0;
2725 }
2726
2727 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2728 {
2729 int i;
2730
2731 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2732
2733 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2734 R600_ERR("loop/endloop in shader code are not paired.\n");
2735 return -EINVAL;
2736 }
2737
2738 /* fixup loop pointers - from r600isa
2739 LOOP END points to CF after LOOP START,
2740 LOOP START point to CF after LOOP END
2741 BRK/CONT point to LOOP END CF
2742 */
2743 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2744
2745 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2746
2747 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2748 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2749 }
2750 /* TODO add LOOPRET support */
2751 fc_poplevel(ctx);
2752 callstack_decrease_current(ctx, FC_LOOP);
2753 return 0;
2754 }
2755
2756 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2757 {
2758 unsigned int fscp;
2759
2760 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2761 {
2762 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2763 break;
2764 }
2765
2766 if (fscp == 0) {
2767 R600_ERR("Break not inside loop/endloop pair\n");
2768 return -EINVAL;
2769 }
2770
2771 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2772 ctx->bc->cf_last->pop_count = 1;
2773
2774 fc_set_mid(ctx, fscp);
2775
2776 pops(ctx, 1);
2777 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2778 return 0;
2779 }
2780
2781 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2782 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2783 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2784 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2785
2786 /* FIXME:
2787 * For state trackers other than OpenGL, we'll want to use
2788 * _RECIP_IEEE instead.
2789 */
2790 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2791
2792 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2793 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2794 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2795 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2796 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2797 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2798 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2799 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2800 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2801 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2802 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2803 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2804 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2805 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2806 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2807 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2808 /* gap */
2809 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811 /* gap */
2812 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2814 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2815 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2817 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2819 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2820 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2821 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2822 /* gap */
2823 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2825 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2826 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2827 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2828 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2829 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2830 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2831 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2833 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2834 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2837 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2839 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2840 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2841 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2842 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2844 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2846 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2853 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2857 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2858 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2859 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2860 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2863 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2864 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2865 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2866 /* gap */
2867 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2870 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2871 /* gap */
2872 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2880 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 /* gap */
2882 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2891 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2894 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2896 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 /* gap */
2898 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 /* gap */
2904 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2913 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2914 /* gap */
2915 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 };
2944
2945 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2946 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2947 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2948 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2949 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2950 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2951 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2952 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2954 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2955 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2956 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2957 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2958 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2959 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2960 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2961 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2962 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2963 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2964 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2965 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966 /* gap */
2967 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969 /* gap */
2970 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2973 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2974 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2975 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2977 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2978 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2979 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2980 /* gap */
2981 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2983 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2984 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2985 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2986 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2987 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2988 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2989 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2995 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2997 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2998 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2999 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3000 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3002 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3004 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3011 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3015 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3016 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3017 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3018 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3021 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3022 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3023 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3024 /* gap */
3025 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3028 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3029 /* gap */
3030 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3038 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039 /* gap */
3040 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3049 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3052 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3054 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055 /* gap */
3056 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061 /* gap */
3062 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3071 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3072 /* gap */
3073 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101 };