Merge remote branch 'origin/nvc0'
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
38 {
39 struct r600_pipe_state *rstate = &shader->rstate;
40 struct r600_shader *rshader = &shader->shader;
41 unsigned spi_vs_out_id[10];
42 unsigned i, tmp;
43
44 /* clear previous register */
45 rstate->nregs = 0;
46
47 /* so far never got proper semantic id from tgsi */
48 /* FIXME better to move this in config things so they get emited
49 * only one time per cs
50 */
51 for (i = 0; i < 10; i++) {
52 spi_vs_out_id[i] = 0;
53 }
54 for (i = 0; i < 32; i++) {
55 tmp = i << ((i & 3) * 8);
56 spi_vs_out_id[i / 4] |= tmp;
57 }
58 for (i = 0; i < 10; i++) {
59 r600_pipe_state_add_reg(rstate,
60 R_028614_SPI_VS_OUT_ID_0 + i * 4,
61 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
62 }
63
64 r600_pipe_state_add_reg(rstate,
65 R_0286C4_SPI_VS_OUT_CONFIG,
66 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
67 0xFFFFFFFF, NULL);
68 r600_pipe_state_add_reg(rstate,
69 R_028868_SQ_PGM_RESOURCES_VS,
70 S_028868_NUM_GPRS(rshader->bc.ngpr) |
71 S_028868_STACK_SIZE(rshader->bc.nstack),
72 0xFFFFFFFF, NULL);
73 r600_pipe_state_add_reg(rstate,
74 R_0288D0_SQ_PGM_CF_OFFSET_VS,
75 0x00000000, 0xFFFFFFFF, NULL);
76 r600_pipe_state_add_reg(rstate,
77 R_028858_SQ_PGM_START_VS,
78 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
79
80 r600_pipe_state_add_reg(rstate,
81 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
82 0xFFFFFFFF, NULL);
83
84 }
85
86 int r600_find_vs_semantic_index(struct r600_shader *vs,
87 struct r600_shader *ps, int id)
88 {
89 struct r600_shader_io *input = &ps->input[id];
90
91 for (int i = 0; i < vs->noutput; i++) {
92 if (input->name == vs->output[i].name &&
93 input->sid == vs->output[i].sid) {
94 return i - 1;
95 }
96 }
97 return 0;
98 }
99
100 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101 {
102 struct r600_pipe_state *rstate = &shader->rstate;
103 struct r600_shader *rshader = &shader->shader;
104 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
105 int pos_index = -1, face_index = -1;
106
107 rstate->nregs = 0;
108
109 for (i = 0; i < rshader->ninput; i++) {
110 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
111 pos_index = i;
112 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
113 face_index = i;
114 }
115
116 for (i = 0; i < rshader->noutput; i++) {
117 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
118 r600_pipe_state_add_reg(rstate,
119 R_02880C_DB_SHADER_CONTROL,
120 S_02880C_Z_EXPORT_ENABLE(1),
121 S_02880C_Z_EXPORT_ENABLE(1), NULL);
122 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
123 r600_pipe_state_add_reg(rstate,
124 R_02880C_DB_SHADER_CONTROL,
125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
126 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
127 }
128
129 exports_ps = 0;
130 num_cout = 0;
131 for (i = 0; i < rshader->noutput; i++) {
132 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
133 exports_ps |= 1;
134 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
135 num_cout++;
136 }
137 }
138 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
139 if (!exports_ps) {
140 /* always at least export 1 component per pixel */
141 exports_ps = 2;
142 }
143
144 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
145 S_0286CC_PERSP_GRADIENT_ENA(1);
146 spi_input_z = 0;
147 if (pos_index != -1) {
148 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
149 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
150 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
151 S_0286CC_BARYC_SAMPLE_CNTL(1));
152 spi_input_z |= 1;
153 }
154
155 spi_ps_in_control_1 = 0;
156 if (face_index != -1) {
157 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
158 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
159 }
160
161 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
162 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
163 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
164 r600_pipe_state_add_reg(rstate,
165 R_028840_SQ_PGM_START_PS,
166 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
167 r600_pipe_state_add_reg(rstate,
168 R_028850_SQ_PGM_RESOURCES_PS,
169 S_028868_NUM_GPRS(rshader->bc.ngpr) |
170 S_028868_STACK_SIZE(rshader->bc.nstack),
171 0xFFFFFFFF, NULL);
172 r600_pipe_state_add_reg(rstate,
173 R_028854_SQ_PGM_EXPORTS_PS,
174 exports_ps, 0xFFFFFFFF, NULL);
175 r600_pipe_state_add_reg(rstate,
176 R_0288CC_SQ_PGM_CF_OFFSET_PS,
177 0x00000000, 0xFFFFFFFF, NULL);
178
179 if (rshader->fs_write_all) {
180 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
181 S_028808_MULTIWRITE_ENABLE(1),
182 S_028808_MULTIWRITE_ENABLE(1),
183 NULL);
184 }
185
186 if (rshader->uses_kill) {
187 /* only set some bits here, the other bits are set in the dsa state */
188 r600_pipe_state_add_reg(rstate,
189 R_02880C_DB_SHADER_CONTROL,
190 S_02880C_KILL_ENABLE(1),
191 S_02880C_KILL_ENABLE(1), NULL);
192 }
193 r600_pipe_state_add_reg(rstate,
194 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
195 0xFFFFFFFF, NULL);
196 }
197
198 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
199 {
200 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
201 struct r600_shader *rshader = &shader->shader;
202 void *ptr;
203
204 /* copy new shader */
205 if (shader->bo == NULL) {
206 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
207 if (shader->bo == NULL) {
208 return -ENOMEM;
209 }
210 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
211 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
212 r600_bo_unmap(rctx->radeon, shader->bo);
213 }
214 /* build state */
215 switch (rshader->processor_type) {
216 case TGSI_PROCESSOR_VERTEX:
217 if (rshader->family >= CHIP_CEDAR) {
218 evergreen_pipe_shader_vs(ctx, shader);
219 } else {
220 r600_pipe_shader_vs(ctx, shader);
221 }
222 break;
223 case TGSI_PROCESSOR_FRAGMENT:
224 if (rshader->family >= CHIP_CEDAR) {
225 evergreen_pipe_shader_ps(ctx, shader);
226 } else {
227 r600_pipe_shader_ps(ctx, shader);
228 }
229 break;
230 default:
231 return -EINVAL;
232 }
233 return 0;
234 }
235
236 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
237
238 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
239 {
240 static int dump_shaders = -1;
241 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
242 int r;
243
244 /* Would like some magic "get_bool_option_once" routine.
245 */
246 if (dump_shaders == -1)
247 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
248
249 if (dump_shaders) {
250 fprintf(stderr, "--------------------------------------------------------------\n");
251 tgsi_dump(tokens, 0);
252 }
253 shader->shader.family = r600_get_family(rctx->radeon);
254 r = r600_shader_from_tgsi(tokens, &shader->shader);
255 if (r) {
256 R600_ERR("translation from TGSI failed !\n");
257 return r;
258 }
259 r = r600_bc_build(&shader->shader.bc);
260 if (r) {
261 R600_ERR("building bytecode failed !\n");
262 return r;
263 }
264 if (dump_shaders) {
265 r600_bc_dump(&shader->shader.bc);
266 fprintf(stderr, "______________________________________________________________\n");
267 }
268 return r600_pipe_shader(ctx, shader);
269 }
270
271 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272 {
273 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276 r600_bc_clear(&shader->shader.bc);
277 }
278
279 /*
280 * tgsi -> r600 shader
281 */
282 struct r600_shader_tgsi_instruction;
283
284 struct r600_shader_src {
285 unsigned sel;
286 unsigned swizzle[4];
287 unsigned neg;
288 unsigned abs;
289 unsigned rel;
290 uint32_t value[4];
291 };
292
293 struct r600_shader_ctx {
294 struct tgsi_shader_info info;
295 struct tgsi_parse_context parse;
296 const struct tgsi_token *tokens;
297 unsigned type;
298 unsigned file_offset[TGSI_FILE_COUNT];
299 unsigned temp_reg;
300 unsigned ar_reg;
301 struct r600_shader_tgsi_instruction *inst_info;
302 struct r600_bc *bc;
303 struct r600_shader *shader;
304 struct r600_shader_src src[3];
305 u32 *literals;
306 u32 nliterals;
307 u32 max_driver_temp_used;
308 /* needed for evergreen interpolation */
309 boolean input_centroid;
310 boolean input_linear;
311 boolean input_perspective;
312 int num_interp_gpr;
313 };
314
315 struct r600_shader_tgsi_instruction {
316 unsigned tgsi_opcode;
317 unsigned is_op3;
318 unsigned r600_opcode;
319 int (*process)(struct r600_shader_ctx *ctx);
320 };
321
322 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
323 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
324
325 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
326 {
327 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
328 int j;
329
330 if (i->Instruction.NumDstRegs > 1) {
331 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
332 return -EINVAL;
333 }
334 if (i->Instruction.Predicate) {
335 R600_ERR("predicate unsupported\n");
336 return -EINVAL;
337 }
338 #if 0
339 if (i->Instruction.Label) {
340 R600_ERR("label unsupported\n");
341 return -EINVAL;
342 }
343 #endif
344 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
345 if (i->Src[j].Register.Dimension) {
346 R600_ERR("unsupported src %d (dimension %d)\n", j,
347 i->Src[j].Register.Dimension);
348 return -EINVAL;
349 }
350 }
351 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
352 if (i->Dst[j].Register.Dimension) {
353 R600_ERR("unsupported dst (dimension)\n");
354 return -EINVAL;
355 }
356 }
357 return 0;
358 }
359
360 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
361 {
362 int i, r;
363 struct r600_bc_alu alu;
364 int gpr = 0, base_chan = 0;
365 int ij_index = 0;
366
367 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
368 ij_index = 0;
369 if (ctx->shader->input[input].centroid)
370 ij_index++;
371 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
372 ij_index = 0;
373 /* if we have perspective add one */
374 if (ctx->input_perspective) {
375 ij_index++;
376 /* if we have perspective centroid */
377 if (ctx->input_centroid)
378 ij_index++;
379 }
380 if (ctx->shader->input[input].centroid)
381 ij_index++;
382 }
383
384 /* work out gpr and base_chan from index */
385 gpr = ij_index / 2;
386 base_chan = (2 * (ij_index % 2)) + 1;
387
388 for (i = 0; i < 8; i++) {
389 memset(&alu, 0, sizeof(struct r600_bc_alu));
390
391 if (i < 4)
392 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
393 else
394 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
395
396 if ((i > 1) && (i < 6)) {
397 alu.dst.sel = ctx->shader->input[input].gpr;
398 alu.dst.write = 1;
399 }
400
401 alu.dst.chan = i % 4;
402
403 alu.src[0].sel = gpr;
404 alu.src[0].chan = (base_chan - (i % 2));
405
406 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
407
408 alu.bank_swizzle_force = SQ_ALU_VEC_210;
409 if ((i % 4) == 3)
410 alu.last = 1;
411 r = r600_bc_add_alu(ctx->bc, &alu);
412 if (r)
413 return r;
414 }
415 return 0;
416 }
417
418
419 static int tgsi_declaration(struct r600_shader_ctx *ctx)
420 {
421 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
422 unsigned i;
423 int r;
424
425 switch (d->Declaration.File) {
426 case TGSI_FILE_INPUT:
427 i = ctx->shader->ninput++;
428 ctx->shader->input[i].name = d->Semantic.Name;
429 ctx->shader->input[i].sid = d->Semantic.Index;
430 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
431 ctx->shader->input[i].centroid = d->Declaration.Centroid;
432 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
433 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
434 /* turn input into interpolate on EG */
435 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
436 if (ctx->shader->input[i].interpolate > 0) {
437 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
438 evergreen_interp_alu(ctx, i);
439 }
440 }
441 }
442 break;
443 case TGSI_FILE_OUTPUT:
444 i = ctx->shader->noutput++;
445 ctx->shader->output[i].name = d->Semantic.Name;
446 ctx->shader->output[i].sid = d->Semantic.Index;
447 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
448 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
449 break;
450 case TGSI_FILE_CONSTANT:
451 case TGSI_FILE_TEMPORARY:
452 case TGSI_FILE_SAMPLER:
453 case TGSI_FILE_ADDRESS:
454 break;
455
456 case TGSI_FILE_SYSTEM_VALUE:
457 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
458 struct r600_bc_alu alu;
459 memset(&alu, 0, sizeof(struct r600_bc_alu));
460
461 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
462 alu.src[0].sel = 0;
463 alu.src[0].chan = 3;
464
465 alu.dst.sel = 0;
466 alu.dst.chan = 3;
467 alu.dst.write = 1;
468 alu.last = 1;
469
470 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
471 return r;
472 break;
473 }
474
475 default:
476 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
477 return -EINVAL;
478 }
479 return 0;
480 }
481
482 static int r600_get_temp(struct r600_shader_ctx *ctx)
483 {
484 return ctx->temp_reg + ctx->max_driver_temp_used++;
485 }
486
487 /*
488 * for evergreen we need to scan the shader to find the number of GPRs we need to
489 * reserve for interpolation.
490 *
491 * we need to know if we are going to emit
492 * any centroid inputs
493 * if perspective and linear are required
494 */
495 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
496 {
497 int i;
498 int num_baryc;
499
500 ctx->input_linear = FALSE;
501 ctx->input_perspective = FALSE;
502 ctx->input_centroid = FALSE;
503 ctx->num_interp_gpr = 1;
504
505 /* any centroid inputs */
506 for (i = 0; i < ctx->info.num_inputs; i++) {
507 /* skip position/face */
508 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
509 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
510 continue;
511 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
512 ctx->input_linear = TRUE;
513 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
514 ctx->input_perspective = TRUE;
515 if (ctx->info.input_centroid[i])
516 ctx->input_centroid = TRUE;
517 }
518
519 num_baryc = 0;
520 /* ignoring sample for now */
521 if (ctx->input_perspective)
522 num_baryc++;
523 if (ctx->input_linear)
524 num_baryc++;
525 if (ctx->input_centroid)
526 num_baryc *= 2;
527
528 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
529
530 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
531 return ctx->num_interp_gpr;
532 }
533
534 static void tgsi_src(struct r600_shader_ctx *ctx,
535 const struct tgsi_full_src_register *tgsi_src,
536 struct r600_shader_src *r600_src)
537 {
538 memset(r600_src, 0, sizeof(*r600_src));
539 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
540 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
541 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
542 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
543 r600_src->neg = tgsi_src->Register.Negate;
544 r600_src->abs = tgsi_src->Register.Absolute;
545
546 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
547 int index;
548 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
549 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
550 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
551
552 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
553 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
554 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
555 return;
556 }
557 index = tgsi_src->Register.Index;
558 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
559 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
560 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
561 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
562 r600_src->swizzle[0] = 3;
563 r600_src->swizzle[1] = 3;
564 r600_src->swizzle[2] = 3;
565 r600_src->swizzle[3] = 3;
566 r600_src->sel = 0;
567 } else {
568 if (tgsi_src->Register.Indirect)
569 r600_src->rel = V_SQ_REL_RELATIVE;
570 r600_src->sel = tgsi_src->Register.Index;
571 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
572 }
573 }
574
575 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
576 {
577 struct r600_bc_vtx vtx;
578 unsigned int ar_reg;
579 int r;
580
581 if (offset) {
582 struct r600_bc_alu alu;
583
584 memset(&alu, 0, sizeof(alu));
585
586 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
587 alu.src[0].sel = ctx->ar_reg;
588
589 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
590 alu.src[1].value = offset;
591
592 alu.dst.sel = dst_reg;
593 alu.dst.write = 1;
594 alu.last = 1;
595
596 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
597 return r;
598
599 ar_reg = dst_reg;
600 } else {
601 ar_reg = ctx->ar_reg;
602 }
603
604 memset(&vtx, 0, sizeof(vtx));
605 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
606 vtx.src_gpr = ar_reg;
607 vtx.mega_fetch_count = 16;
608 vtx.dst_gpr = dst_reg;
609 vtx.dst_sel_x = 0; /* SEL_X */
610 vtx.dst_sel_y = 1; /* SEL_Y */
611 vtx.dst_sel_z = 2; /* SEL_Z */
612 vtx.dst_sel_w = 3; /* SEL_W */
613 vtx.data_format = FMT_32_32_32_32_FLOAT;
614 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
615 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
616 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
617
618 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
619 return r;
620
621 return 0;
622 }
623
624 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
625 {
626 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
627 struct r600_bc_alu alu;
628 int i, j, k, nconst, r;
629
630 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
631 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
632 nconst++;
633 }
634 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
635 }
636 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
637 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
638 continue;
639 }
640
641 if (ctx->src[i].rel) {
642 int treg = r600_get_temp(ctx);
643 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
644 return r;
645
646 ctx->src[i].sel = treg;
647 ctx->src[i].rel = 0;
648 j--;
649 } else if (j > 0) {
650 int treg = r600_get_temp(ctx);
651 for (k = 0; k < 4; k++) {
652 memset(&alu, 0, sizeof(struct r600_bc_alu));
653 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
654 alu.src[0].sel = ctx->src[i].sel;
655 alu.src[0].chan = k;
656 alu.src[0].rel = ctx->src[i].rel;
657 alu.dst.sel = treg;
658 alu.dst.chan = k;
659 alu.dst.write = 1;
660 if (k == 3)
661 alu.last = 1;
662 r = r600_bc_add_alu(ctx->bc, &alu);
663 if (r)
664 return r;
665 }
666 ctx->src[i].sel = treg;
667 ctx->src[i].rel =0;
668 j--;
669 }
670 }
671 return 0;
672 }
673
674 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
675 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
676 {
677 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
678 struct r600_bc_alu alu;
679 int i, j, k, nliteral, r;
680
681 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
682 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
683 nliteral++;
684 }
685 }
686 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
687 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
688 int treg = r600_get_temp(ctx);
689 for (k = 0; k < 4; k++) {
690 memset(&alu, 0, sizeof(struct r600_bc_alu));
691 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
692 alu.src[0].sel = ctx->src[i].sel;
693 alu.src[0].chan = k;
694 alu.src[0].value = ctx->src[i].value[k];
695 alu.dst.sel = treg;
696 alu.dst.chan = k;
697 alu.dst.write = 1;
698 if (k == 3)
699 alu.last = 1;
700 r = r600_bc_add_alu(ctx->bc, &alu);
701 if (r)
702 return r;
703 }
704 ctx->src[i].sel = treg;
705 j--;
706 }
707 }
708 return 0;
709 }
710
711 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
712 {
713 struct tgsi_full_immediate *immediate;
714 struct tgsi_full_property *property;
715 struct r600_shader_ctx ctx;
716 struct r600_bc_output output[32];
717 unsigned output_done, noutput;
718 unsigned opcode;
719 int i, r = 0, pos0;
720
721 ctx.bc = &shader->bc;
722 ctx.shader = shader;
723 r = r600_bc_init(ctx.bc, shader->family);
724 if (r)
725 return r;
726 ctx.tokens = tokens;
727 tgsi_scan_shader(tokens, &ctx.info);
728 tgsi_parse_init(&ctx.parse, tokens);
729 ctx.type = ctx.parse.FullHeader.Processor.Processor;
730 shader->processor_type = ctx.type;
731 ctx.bc->type = shader->processor_type;
732
733 /* register allocations */
734 /* Values [0,127] correspond to GPR[0..127].
735 * Values [128,159] correspond to constant buffer bank 0
736 * Values [160,191] correspond to constant buffer bank 1
737 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
738 * Values [256,287] correspond to constant buffer bank 2 (EG)
739 * Values [288,319] correspond to constant buffer bank 3 (EG)
740 * Other special values are shown in the list below.
741 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
742 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
743 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
744 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
745 * 248 SQ_ALU_SRC_0: special constant 0.0.
746 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
747 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
748 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
749 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
750 * 253 SQ_ALU_SRC_LITERAL: literal constant.
751 * 254 SQ_ALU_SRC_PV: previous vector result.
752 * 255 SQ_ALU_SRC_PS: previous scalar result.
753 */
754 for (i = 0; i < TGSI_FILE_COUNT; i++) {
755 ctx.file_offset[i] = 0;
756 }
757 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
758 ctx.file_offset[TGSI_FILE_INPUT] = 1;
759 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
760 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
761 } else {
762 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
763 }
764 }
765 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
766 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
767 }
768 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
769 ctx.info.file_count[TGSI_FILE_INPUT];
770 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
771 ctx.info.file_count[TGSI_FILE_OUTPUT];
772
773 /* Outside the GPR range. This will be translated to one of the
774 * kcache banks later. */
775 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
776
777 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
778 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
779 ctx.info.file_count[TGSI_FILE_TEMPORARY];
780 ctx.temp_reg = ctx.ar_reg + 1;
781
782 ctx.nliterals = 0;
783 ctx.literals = NULL;
784 shader->fs_write_all = FALSE;
785 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
786 tgsi_parse_token(&ctx.parse);
787 switch (ctx.parse.FullToken.Token.Type) {
788 case TGSI_TOKEN_TYPE_IMMEDIATE:
789 immediate = &ctx.parse.FullToken.FullImmediate;
790 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
791 if(ctx.literals == NULL) {
792 r = -ENOMEM;
793 goto out_err;
794 }
795 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
796 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
797 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
798 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
799 ctx.nliterals++;
800 break;
801 case TGSI_TOKEN_TYPE_DECLARATION:
802 r = tgsi_declaration(&ctx);
803 if (r)
804 goto out_err;
805 break;
806 case TGSI_TOKEN_TYPE_INSTRUCTION:
807 r = tgsi_is_supported(&ctx);
808 if (r)
809 goto out_err;
810 ctx.max_driver_temp_used = 0;
811 /* reserve first tmp for everyone */
812 r600_get_temp(&ctx);
813
814 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
815 if ((r = tgsi_split_constant(&ctx)))
816 goto out_err;
817 if ((r = tgsi_split_literal_constant(&ctx)))
818 goto out_err;
819 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
820 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
821 else
822 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
823 r = ctx.inst_info->process(&ctx);
824 if (r)
825 goto out_err;
826 break;
827 case TGSI_TOKEN_TYPE_PROPERTY:
828 property = &ctx.parse.FullToken.FullProperty;
829 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
830 if (property->u[0].Data == 1)
831 shader->fs_write_all = TRUE;
832 }
833 break;
834 default:
835 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
836 r = -EINVAL;
837 goto out_err;
838 }
839 }
840 /* export output */
841 noutput = shader->noutput;
842 for (i = 0, pos0 = 0; i < noutput; i++) {
843 memset(&output[i], 0, sizeof(struct r600_bc_output));
844 output[i].gpr = shader->output[i].gpr;
845 output[i].elem_size = 3;
846 output[i].swizzle_x = 0;
847 output[i].swizzle_y = 1;
848 output[i].swizzle_z = 2;
849 output[i].swizzle_w = 3;
850 output[i].burst_count = 1;
851 output[i].barrier = 1;
852 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
853 output[i].array_base = i - pos0;
854 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
855 switch (ctx.type) {
856 case TGSI_PROCESSOR_VERTEX:
857 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
858 output[i].array_base = 60;
859 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
860 /* position doesn't count in array_base */
861 pos0++;
862 }
863 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
864 output[i].array_base = 61;
865 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
866 /* position doesn't count in array_base */
867 pos0++;
868 }
869 break;
870 case TGSI_PROCESSOR_FRAGMENT:
871 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
872 output[i].array_base = shader->output[i].sid;
873 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
874 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
875 output[i].array_base = 61;
876 output[i].swizzle_x = 2;
877 output[i].swizzle_y = 7;
878 output[i].swizzle_z = output[i].swizzle_w = 7;
879 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
880 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
881 output[i].array_base = 61;
882 output[i].swizzle_x = 7;
883 output[i].swizzle_y = 1;
884 output[i].swizzle_z = output[i].swizzle_w = 7;
885 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
886 } else {
887 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
888 r = -EINVAL;
889 goto out_err;
890 }
891 break;
892 default:
893 R600_ERR("unsupported processor type %d\n", ctx.type);
894 r = -EINVAL;
895 goto out_err;
896 }
897 }
898 /* add fake param output for vertex shader if no param is exported */
899 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
900 for (i = 0, pos0 = 0; i < noutput; i++) {
901 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
902 pos0 = 1;
903 break;
904 }
905 }
906 if (!pos0) {
907 memset(&output[i], 0, sizeof(struct r600_bc_output));
908 output[i].gpr = 0;
909 output[i].elem_size = 3;
910 output[i].swizzle_x = 0;
911 output[i].swizzle_y = 1;
912 output[i].swizzle_z = 2;
913 output[i].swizzle_w = 3;
914 output[i].burst_count = 1;
915 output[i].barrier = 1;
916 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
917 output[i].array_base = 0;
918 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
919 noutput++;
920 }
921 }
922 /* add fake pixel export */
923 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
924 memset(&output[0], 0, sizeof(struct r600_bc_output));
925 output[0].gpr = 0;
926 output[0].elem_size = 3;
927 output[0].swizzle_x = 7;
928 output[0].swizzle_y = 7;
929 output[0].swizzle_z = 7;
930 output[0].swizzle_w = 7;
931 output[0].burst_count = 1;
932 output[0].barrier = 1;
933 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
934 output[0].array_base = 0;
935 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
936 noutput++;
937 }
938 /* set export done on last export of each type */
939 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
940 if (i == (noutput - 1)) {
941 output[i].end_of_program = 1;
942 }
943 if (!(output_done & (1 << output[i].type))) {
944 output_done |= (1 << output[i].type);
945 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
946 }
947 }
948 /* add output to bytecode */
949 for (i = 0; i < noutput; i++) {
950 r = r600_bc_add_output(ctx.bc, &output[i]);
951 if (r)
952 goto out_err;
953 }
954 free(ctx.literals);
955 tgsi_parse_free(&ctx.parse);
956 return 0;
957 out_err:
958 free(ctx.literals);
959 tgsi_parse_free(&ctx.parse);
960 return r;
961 }
962
963 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
964 {
965 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
966 return -EINVAL;
967 }
968
969 static int tgsi_end(struct r600_shader_ctx *ctx)
970 {
971 return 0;
972 }
973
974 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
975 const struct r600_shader_src *shader_src,
976 unsigned chan)
977 {
978 bc_src->sel = shader_src->sel;
979 bc_src->chan = shader_src->swizzle[chan];
980 bc_src->neg = shader_src->neg;
981 bc_src->abs = shader_src->abs;
982 bc_src->rel = shader_src->rel;
983 bc_src->value = shader_src->value[bc_src->chan];
984 }
985
986 static void tgsi_dst(struct r600_shader_ctx *ctx,
987 const struct tgsi_full_dst_register *tgsi_dst,
988 unsigned swizzle,
989 struct r600_bc_alu_dst *r600_dst)
990 {
991 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
992
993 r600_dst->sel = tgsi_dst->Register.Index;
994 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
995 r600_dst->chan = swizzle;
996 r600_dst->write = 1;
997 if (tgsi_dst->Register.Indirect)
998 r600_dst->rel = V_SQ_REL_RELATIVE;
999 if (inst->Instruction.Saturate) {
1000 r600_dst->clamp = 1;
1001 }
1002 }
1003
1004 static int tgsi_last_instruction(unsigned writemask)
1005 {
1006 int i, lasti = 0;
1007
1008 for (i = 0; i < 4; i++) {
1009 if (writemask & (1 << i)) {
1010 lasti = i;
1011 }
1012 }
1013 return lasti;
1014 }
1015
1016 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1017 {
1018 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1019 struct r600_bc_alu alu;
1020 int i, j, r;
1021 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1022
1023 for (i = 0; i < lasti + 1; i++) {
1024 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1025 continue;
1026
1027 memset(&alu, 0, sizeof(struct r600_bc_alu));
1028 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1029
1030 alu.inst = ctx->inst_info->r600_opcode;
1031 if (!swap) {
1032 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1033 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1034 }
1035 } else {
1036 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1037 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1038 }
1039 /* handle some special cases */
1040 switch (ctx->inst_info->tgsi_opcode) {
1041 case TGSI_OPCODE_SUB:
1042 alu.src[1].neg = 1;
1043 break;
1044 case TGSI_OPCODE_ABS:
1045 alu.src[0].abs = 1;
1046 break;
1047 default:
1048 break;
1049 }
1050 if (i == lasti) {
1051 alu.last = 1;
1052 }
1053 r = r600_bc_add_alu(ctx->bc, &alu);
1054 if (r)
1055 return r;
1056 }
1057 return 0;
1058 }
1059
1060 static int tgsi_op2(struct r600_shader_ctx *ctx)
1061 {
1062 return tgsi_op2_s(ctx, 0);
1063 }
1064
1065 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1066 {
1067 return tgsi_op2_s(ctx, 1);
1068 }
1069
1070 /*
1071 * r600 - trunc to -PI..PI range
1072 * r700 - normalize by dividing by 2PI
1073 * see fdo bug 27901
1074 */
1075 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1076 {
1077 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1078 static float double_pi = 3.1415926535 * 2;
1079 static float neg_pi = -3.1415926535;
1080
1081 int r;
1082 struct r600_bc_alu alu;
1083
1084 memset(&alu, 0, sizeof(struct r600_bc_alu));
1085 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1086 alu.is_op3 = 1;
1087
1088 alu.dst.chan = 0;
1089 alu.dst.sel = ctx->temp_reg;
1090 alu.dst.write = 1;
1091
1092 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1093
1094 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1095 alu.src[1].chan = 0;
1096 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1097 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1098 alu.src[2].chan = 0;
1099 alu.last = 1;
1100 r = r600_bc_add_alu(ctx->bc, &alu);
1101 if (r)
1102 return r;
1103
1104 memset(&alu, 0, sizeof(struct r600_bc_alu));
1105 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1106
1107 alu.dst.chan = 0;
1108 alu.dst.sel = ctx->temp_reg;
1109 alu.dst.write = 1;
1110
1111 alu.src[0].sel = ctx->temp_reg;
1112 alu.src[0].chan = 0;
1113 alu.last = 1;
1114 r = r600_bc_add_alu(ctx->bc, &alu);
1115 if (r)
1116 return r;
1117
1118 memset(&alu, 0, sizeof(struct r600_bc_alu));
1119 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1120 alu.is_op3 = 1;
1121
1122 alu.dst.chan = 0;
1123 alu.dst.sel = ctx->temp_reg;
1124 alu.dst.write = 1;
1125
1126 alu.src[0].sel = ctx->temp_reg;
1127 alu.src[0].chan = 0;
1128
1129 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1130 alu.src[1].chan = 0;
1131 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1132 alu.src[2].chan = 0;
1133
1134 if (ctx->bc->chiprev == CHIPREV_R600) {
1135 alu.src[1].value = *(uint32_t *)&double_pi;
1136 alu.src[2].value = *(uint32_t *)&neg_pi;
1137 } else {
1138 alu.src[1].sel = V_SQ_ALU_SRC_1;
1139 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1140 alu.src[2].neg = 1;
1141 }
1142
1143 alu.last = 1;
1144 r = r600_bc_add_alu(ctx->bc, &alu);
1145 if (r)
1146 return r;
1147 return 0;
1148 }
1149
1150 static int tgsi_trig(struct r600_shader_ctx *ctx)
1151 {
1152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1153 struct r600_bc_alu alu;
1154 int i, r;
1155 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1156
1157 r = tgsi_setup_trig(ctx);
1158 if (r)
1159 return r;
1160
1161 memset(&alu, 0, sizeof(struct r600_bc_alu));
1162 alu.inst = ctx->inst_info->r600_opcode;
1163 alu.dst.chan = 0;
1164 alu.dst.sel = ctx->temp_reg;
1165 alu.dst.write = 1;
1166
1167 alu.src[0].sel = ctx->temp_reg;
1168 alu.src[0].chan = 0;
1169 alu.last = 1;
1170 r = r600_bc_add_alu(ctx->bc, &alu);
1171 if (r)
1172 return r;
1173
1174 /* replicate result */
1175 for (i = 0; i < lasti + 1; i++) {
1176 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1177 continue;
1178
1179 memset(&alu, 0, sizeof(struct r600_bc_alu));
1180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1181
1182 alu.src[0].sel = ctx->temp_reg;
1183 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1184 if (i == lasti)
1185 alu.last = 1;
1186 r = r600_bc_add_alu(ctx->bc, &alu);
1187 if (r)
1188 return r;
1189 }
1190 return 0;
1191 }
1192
1193 static int tgsi_scs(struct r600_shader_ctx *ctx)
1194 {
1195 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1196 struct r600_bc_alu alu;
1197 int r;
1198
1199 /* We'll only need the trig stuff if we are going to write to the
1200 * X or Y components of the destination vector.
1201 */
1202 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1203 r = tgsi_setup_trig(ctx);
1204 if (r)
1205 return r;
1206 }
1207
1208 /* dst.x = COS */
1209 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1210 memset(&alu, 0, sizeof(struct r600_bc_alu));
1211 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1212 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1213
1214 alu.src[0].sel = ctx->temp_reg;
1215 alu.src[0].chan = 0;
1216 alu.last = 1;
1217 r = r600_bc_add_alu(ctx->bc, &alu);
1218 if (r)
1219 return r;
1220 }
1221
1222 /* dst.y = SIN */
1223 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1224 memset(&alu, 0, sizeof(struct r600_bc_alu));
1225 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1226 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1227
1228 alu.src[0].sel = ctx->temp_reg;
1229 alu.src[0].chan = 0;
1230 alu.last = 1;
1231 r = r600_bc_add_alu(ctx->bc, &alu);
1232 if (r)
1233 return r;
1234 }
1235
1236 /* dst.z = 0.0; */
1237 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1238 memset(&alu, 0, sizeof(struct r600_bc_alu));
1239
1240 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1241
1242 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1243
1244 alu.src[0].sel = V_SQ_ALU_SRC_0;
1245 alu.src[0].chan = 0;
1246
1247 alu.last = 1;
1248
1249 r = r600_bc_add_alu(ctx->bc, &alu);
1250 if (r)
1251 return r;
1252 }
1253
1254 /* dst.w = 1.0; */
1255 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1256 memset(&alu, 0, sizeof(struct r600_bc_alu));
1257
1258 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1259
1260 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1261
1262 alu.src[0].sel = V_SQ_ALU_SRC_1;
1263 alu.src[0].chan = 0;
1264
1265 alu.last = 1;
1266
1267 r = r600_bc_add_alu(ctx->bc, &alu);
1268 if (r)
1269 return r;
1270 }
1271
1272 return 0;
1273 }
1274
1275 static int tgsi_kill(struct r600_shader_ctx *ctx)
1276 {
1277 struct r600_bc_alu alu;
1278 int i, r;
1279
1280 for (i = 0; i < 4; i++) {
1281 memset(&alu, 0, sizeof(struct r600_bc_alu));
1282 alu.inst = ctx->inst_info->r600_opcode;
1283
1284 alu.dst.chan = i;
1285
1286 alu.src[0].sel = V_SQ_ALU_SRC_0;
1287
1288 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1289 alu.src[1].sel = V_SQ_ALU_SRC_1;
1290 alu.src[1].neg = 1;
1291 } else {
1292 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1293 }
1294 if (i == 3) {
1295 alu.last = 1;
1296 }
1297 r = r600_bc_add_alu(ctx->bc, &alu);
1298 if (r)
1299 return r;
1300 }
1301
1302 /* kill must be last in ALU */
1303 ctx->bc->force_add_cf = 1;
1304 ctx->shader->uses_kill = TRUE;
1305 return 0;
1306 }
1307
1308 static int tgsi_lit(struct r600_shader_ctx *ctx)
1309 {
1310 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1311 struct r600_bc_alu alu;
1312 int r;
1313
1314 /* dst.x, <- 1.0 */
1315 memset(&alu, 0, sizeof(struct r600_bc_alu));
1316 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1317 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1318 alu.src[0].chan = 0;
1319 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1320 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1321 r = r600_bc_add_alu(ctx->bc, &alu);
1322 if (r)
1323 return r;
1324
1325 /* dst.y = max(src.x, 0.0) */
1326 memset(&alu, 0, sizeof(struct r600_bc_alu));
1327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1328 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1329 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1330 alu.src[1].chan = 0;
1331 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1332 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1333 r = r600_bc_add_alu(ctx->bc, &alu);
1334 if (r)
1335 return r;
1336
1337 /* dst.w, <- 1.0 */
1338 memset(&alu, 0, sizeof(struct r600_bc_alu));
1339 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1340 alu.src[0].sel = V_SQ_ALU_SRC_1;
1341 alu.src[0].chan = 0;
1342 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1343 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1344 alu.last = 1;
1345 r = r600_bc_add_alu(ctx->bc, &alu);
1346 if (r)
1347 return r;
1348
1349 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1350 {
1351 int chan;
1352 int sel;
1353
1354 /* dst.z = log(src.y) */
1355 memset(&alu, 0, sizeof(struct r600_bc_alu));
1356 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1357 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1358 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1359 alu.last = 1;
1360 r = r600_bc_add_alu(ctx->bc, &alu);
1361 if (r)
1362 return r;
1363
1364 chan = alu.dst.chan;
1365 sel = alu.dst.sel;
1366
1367 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1368 memset(&alu, 0, sizeof(struct r600_bc_alu));
1369 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1370 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1371 alu.src[1].sel = sel;
1372 alu.src[1].chan = chan;
1373
1374 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1375 alu.dst.sel = ctx->temp_reg;
1376 alu.dst.chan = 0;
1377 alu.dst.write = 1;
1378 alu.is_op3 = 1;
1379 alu.last = 1;
1380 r = r600_bc_add_alu(ctx->bc, &alu);
1381 if (r)
1382 return r;
1383
1384 /* dst.z = exp(tmp.x) */
1385 memset(&alu, 0, sizeof(struct r600_bc_alu));
1386 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1387 alu.src[0].sel = ctx->temp_reg;
1388 alu.src[0].chan = 0;
1389 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1390 alu.last = 1;
1391 r = r600_bc_add_alu(ctx->bc, &alu);
1392 if (r)
1393 return r;
1394 }
1395 return 0;
1396 }
1397
1398 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1399 {
1400 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1401 struct r600_bc_alu alu;
1402 int i, r;
1403
1404 memset(&alu, 0, sizeof(struct r600_bc_alu));
1405
1406 /* FIXME:
1407 * For state trackers other than OpenGL, we'll want to use
1408 * _RECIPSQRT_IEEE instead.
1409 */
1410 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1411
1412 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1413 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1414 alu.src[i].abs = 1;
1415 }
1416 alu.dst.sel = ctx->temp_reg;
1417 alu.dst.write = 1;
1418 alu.last = 1;
1419 r = r600_bc_add_alu(ctx->bc, &alu);
1420 if (r)
1421 return r;
1422 /* replicate result */
1423 return tgsi_helper_tempx_replicate(ctx);
1424 }
1425
1426 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1427 {
1428 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1429 struct r600_bc_alu alu;
1430 int i, r;
1431
1432 for (i = 0; i < 4; i++) {
1433 memset(&alu, 0, sizeof(struct r600_bc_alu));
1434 alu.src[0].sel = ctx->temp_reg;
1435 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1436 alu.dst.chan = i;
1437 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1438 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1439 if (i == 3)
1440 alu.last = 1;
1441 r = r600_bc_add_alu(ctx->bc, &alu);
1442 if (r)
1443 return r;
1444 }
1445 return 0;
1446 }
1447
1448 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1449 {
1450 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1451 struct r600_bc_alu alu;
1452 int i, r;
1453
1454 memset(&alu, 0, sizeof(struct r600_bc_alu));
1455 alu.inst = ctx->inst_info->r600_opcode;
1456 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1457 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1458 }
1459 alu.dst.sel = ctx->temp_reg;
1460 alu.dst.write = 1;
1461 alu.last = 1;
1462 r = r600_bc_add_alu(ctx->bc, &alu);
1463 if (r)
1464 return r;
1465 /* replicate result */
1466 return tgsi_helper_tempx_replicate(ctx);
1467 }
1468
1469 static int tgsi_pow(struct r600_shader_ctx *ctx)
1470 {
1471 struct r600_bc_alu alu;
1472 int r;
1473
1474 /* LOG2(a) */
1475 memset(&alu, 0, sizeof(struct r600_bc_alu));
1476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1477 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1478 alu.dst.sel = ctx->temp_reg;
1479 alu.dst.write = 1;
1480 alu.last = 1;
1481 r = r600_bc_add_alu(ctx->bc, &alu);
1482 if (r)
1483 return r;
1484 /* b * LOG2(a) */
1485 memset(&alu, 0, sizeof(struct r600_bc_alu));
1486 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1487 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1488 alu.src[1].sel = ctx->temp_reg;
1489 alu.dst.sel = ctx->temp_reg;
1490 alu.dst.write = 1;
1491 alu.last = 1;
1492 r = r600_bc_add_alu(ctx->bc, &alu);
1493 if (r)
1494 return r;
1495 /* POW(a,b) = EXP2(b * LOG2(a))*/
1496 memset(&alu, 0, sizeof(struct r600_bc_alu));
1497 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1498 alu.src[0].sel = ctx->temp_reg;
1499 alu.dst.sel = ctx->temp_reg;
1500 alu.dst.write = 1;
1501 alu.last = 1;
1502 r = r600_bc_add_alu(ctx->bc, &alu);
1503 if (r)
1504 return r;
1505 return tgsi_helper_tempx_replicate(ctx);
1506 }
1507
1508 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1509 {
1510 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1511 struct r600_bc_alu alu;
1512 int i, r;
1513
1514 /* tmp = (src > 0 ? 1 : src) */
1515 for (i = 0; i < 4; i++) {
1516 memset(&alu, 0, sizeof(struct r600_bc_alu));
1517 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1518 alu.is_op3 = 1;
1519
1520 alu.dst.sel = ctx->temp_reg;
1521 alu.dst.chan = i;
1522
1523 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1524 alu.src[1].sel = V_SQ_ALU_SRC_1;
1525 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1526
1527 if (i == 3)
1528 alu.last = 1;
1529 r = r600_bc_add_alu(ctx->bc, &alu);
1530 if (r)
1531 return r;
1532 }
1533
1534 /* dst = (-tmp > 0 ? -1 : tmp) */
1535 for (i = 0; i < 4; i++) {
1536 memset(&alu, 0, sizeof(struct r600_bc_alu));
1537 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1538 alu.is_op3 = 1;
1539 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1540
1541 alu.src[0].sel = ctx->temp_reg;
1542 alu.src[0].chan = i;
1543 alu.src[0].neg = 1;
1544
1545 alu.src[1].sel = V_SQ_ALU_SRC_1;
1546 alu.src[1].neg = 1;
1547
1548 alu.src[2].sel = ctx->temp_reg;
1549 alu.src[2].chan = i;
1550
1551 if (i == 3)
1552 alu.last = 1;
1553 r = r600_bc_add_alu(ctx->bc, &alu);
1554 if (r)
1555 return r;
1556 }
1557 return 0;
1558 }
1559
1560 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1561 {
1562 struct r600_bc_alu alu;
1563 int i, r;
1564
1565 for (i = 0; i < 4; i++) {
1566 memset(&alu, 0, sizeof(struct r600_bc_alu));
1567 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1568 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1569 alu.dst.chan = i;
1570 } else {
1571 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1572 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1573 alu.src[0].sel = ctx->temp_reg;
1574 alu.src[0].chan = i;
1575 }
1576 if (i == 3) {
1577 alu.last = 1;
1578 }
1579 r = r600_bc_add_alu(ctx->bc, &alu);
1580 if (r)
1581 return r;
1582 }
1583 return 0;
1584 }
1585
1586 static int tgsi_op3(struct r600_shader_ctx *ctx)
1587 {
1588 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1589 struct r600_bc_alu alu;
1590 int i, j, r;
1591 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1592
1593 for (i = 0; i < lasti + 1; i++) {
1594 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1595 continue;
1596
1597 memset(&alu, 0, sizeof(struct r600_bc_alu));
1598 alu.inst = ctx->inst_info->r600_opcode;
1599 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1600 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1601 }
1602
1603 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1604 alu.dst.chan = i;
1605 alu.dst.write = 1;
1606 alu.is_op3 = 1;
1607 if (i == lasti) {
1608 alu.last = 1;
1609 }
1610 r = r600_bc_add_alu(ctx->bc, &alu);
1611 if (r)
1612 return r;
1613 }
1614 return 0;
1615 }
1616
1617 static int tgsi_dp(struct r600_shader_ctx *ctx)
1618 {
1619 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1620 struct r600_bc_alu alu;
1621 int i, j, r;
1622
1623 for (i = 0; i < 4; i++) {
1624 memset(&alu, 0, sizeof(struct r600_bc_alu));
1625 alu.inst = ctx->inst_info->r600_opcode;
1626 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1627 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1628 }
1629
1630 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1631 alu.dst.chan = i;
1632 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1633 /* handle some special cases */
1634 switch (ctx->inst_info->tgsi_opcode) {
1635 case TGSI_OPCODE_DP2:
1636 if (i > 1) {
1637 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1638 alu.src[0].chan = alu.src[1].chan = 0;
1639 }
1640 break;
1641 case TGSI_OPCODE_DP3:
1642 if (i > 2) {
1643 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1644 alu.src[0].chan = alu.src[1].chan = 0;
1645 }
1646 break;
1647 case TGSI_OPCODE_DPH:
1648 if (i == 3) {
1649 alu.src[0].sel = V_SQ_ALU_SRC_1;
1650 alu.src[0].chan = 0;
1651 alu.src[0].neg = 0;
1652 }
1653 break;
1654 default:
1655 break;
1656 }
1657 if (i == 3) {
1658 alu.last = 1;
1659 }
1660 r = r600_bc_add_alu(ctx->bc, &alu);
1661 if (r)
1662 return r;
1663 }
1664 return 0;
1665 }
1666
1667 static int tgsi_tex(struct r600_shader_ctx *ctx)
1668 {
1669 static float one_point_five = 1.5f;
1670 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1671 struct r600_bc_tex tex;
1672 struct r600_bc_alu alu;
1673 unsigned src_gpr;
1674 int r, i;
1675 int opcode;
1676 boolean src_not_temp =
1677 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1678 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1679
1680 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1681
1682 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1683 /* Add perspective divide */
1684 memset(&alu, 0, sizeof(struct r600_bc_alu));
1685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1686 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1687
1688 alu.dst.sel = ctx->temp_reg;
1689 alu.dst.chan = 3;
1690 alu.last = 1;
1691 alu.dst.write = 1;
1692 r = r600_bc_add_alu(ctx->bc, &alu);
1693 if (r)
1694 return r;
1695
1696 for (i = 0; i < 3; i++) {
1697 memset(&alu, 0, sizeof(struct r600_bc_alu));
1698 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1699 alu.src[0].sel = ctx->temp_reg;
1700 alu.src[0].chan = 3;
1701 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1702 alu.dst.sel = ctx->temp_reg;
1703 alu.dst.chan = i;
1704 alu.dst.write = 1;
1705 r = r600_bc_add_alu(ctx->bc, &alu);
1706 if (r)
1707 return r;
1708 }
1709 memset(&alu, 0, sizeof(struct r600_bc_alu));
1710 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1711 alu.src[0].sel = V_SQ_ALU_SRC_1;
1712 alu.src[0].chan = 0;
1713 alu.dst.sel = ctx->temp_reg;
1714 alu.dst.chan = 3;
1715 alu.last = 1;
1716 alu.dst.write = 1;
1717 r = r600_bc_add_alu(ctx->bc, &alu);
1718 if (r)
1719 return r;
1720 src_not_temp = FALSE;
1721 src_gpr = ctx->temp_reg;
1722 }
1723
1724 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1725 int src_chan, src2_chan;
1726
1727 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1728 for (i = 0; i < 4; i++) {
1729 memset(&alu, 0, sizeof(struct r600_bc_alu));
1730 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1731 switch (i) {
1732 case 0:
1733 src_chan = 2;
1734 src2_chan = 1;
1735 break;
1736 case 1:
1737 src_chan = 2;
1738 src2_chan = 0;
1739 break;
1740 case 2:
1741 src_chan = 0;
1742 src2_chan = 2;
1743 break;
1744 case 3:
1745 src_chan = 1;
1746 src2_chan = 2;
1747 break;
1748 default:
1749 assert(0);
1750 src_chan = 0;
1751 src2_chan = 0;
1752 break;
1753 }
1754 r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
1755 r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
1756 alu.dst.sel = ctx->temp_reg;
1757 alu.dst.chan = i;
1758 if (i == 3)
1759 alu.last = 1;
1760 alu.dst.write = 1;
1761 r = r600_bc_add_alu(ctx->bc, &alu);
1762 if (r)
1763 return r;
1764 }
1765
1766 /* tmp1.z = RCP_e(|tmp1.z|) */
1767 memset(&alu, 0, sizeof(struct r600_bc_alu));
1768 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1769 alu.src[0].sel = ctx->temp_reg;
1770 alu.src[0].chan = 2;
1771 alu.src[0].abs = 1;
1772 alu.dst.sel = ctx->temp_reg;
1773 alu.dst.chan = 2;
1774 alu.dst.write = 1;
1775 alu.last = 1;
1776 r = r600_bc_add_alu(ctx->bc, &alu);
1777 if (r)
1778 return r;
1779
1780 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1781 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1782 * muladd has no writemask, have to use another temp
1783 */
1784 memset(&alu, 0, sizeof(struct r600_bc_alu));
1785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1786 alu.is_op3 = 1;
1787
1788 alu.src[0].sel = ctx->temp_reg;
1789 alu.src[0].chan = 0;
1790 alu.src[1].sel = ctx->temp_reg;
1791 alu.src[1].chan = 2;
1792
1793 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1794 alu.src[2].chan = 0;
1795 alu.src[2].value = *(uint32_t *)&one_point_five;
1796
1797 alu.dst.sel = ctx->temp_reg;
1798 alu.dst.chan = 0;
1799 alu.dst.write = 1;
1800
1801 r = r600_bc_add_alu(ctx->bc, &alu);
1802 if (r)
1803 return r;
1804
1805 memset(&alu, 0, sizeof(struct r600_bc_alu));
1806 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1807 alu.is_op3 = 1;
1808
1809 alu.src[0].sel = ctx->temp_reg;
1810 alu.src[0].chan = 1;
1811 alu.src[1].sel = ctx->temp_reg;
1812 alu.src[1].chan = 2;
1813
1814 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1815 alu.src[2].chan = 0;
1816 alu.src[2].value = *(uint32_t *)&one_point_five;
1817
1818 alu.dst.sel = ctx->temp_reg;
1819 alu.dst.chan = 1;
1820 alu.dst.write = 1;
1821
1822 alu.last = 1;
1823 r = r600_bc_add_alu(ctx->bc, &alu);
1824 if (r)
1825 return r;
1826
1827 src_not_temp = FALSE;
1828 src_gpr = ctx->temp_reg;
1829 }
1830
1831 if (src_not_temp) {
1832 for (i = 0; i < 4; i++) {
1833 memset(&alu, 0, sizeof(struct r600_bc_alu));
1834 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1835 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1836 alu.dst.sel = ctx->temp_reg;
1837 alu.dst.chan = i;
1838 if (i == 3)
1839 alu.last = 1;
1840 alu.dst.write = 1;
1841 r = r600_bc_add_alu(ctx->bc, &alu);
1842 if (r)
1843 return r;
1844 }
1845 src_gpr = ctx->temp_reg;
1846 }
1847
1848 opcode = ctx->inst_info->r600_opcode;
1849 if (opcode == SQ_TEX_INST_SAMPLE &&
1850 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1851 opcode = SQ_TEX_INST_SAMPLE_C;
1852
1853 memset(&tex, 0, sizeof(struct r600_bc_tex));
1854 tex.inst = opcode;
1855 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1856 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1857 tex.src_gpr = src_gpr;
1858 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1859 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1860 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1861 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1862 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1863 tex.src_sel_x = 0;
1864 tex.src_sel_y = 1;
1865 tex.src_sel_z = 2;
1866 tex.src_sel_w = 3;
1867
1868 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1869 tex.src_sel_x = 1;
1870 tex.src_sel_y = 0;
1871 tex.src_sel_z = 3;
1872 tex.src_sel_w = 1;
1873 }
1874
1875 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1876 tex.coord_type_x = 1;
1877 tex.coord_type_y = 1;
1878 tex.coord_type_z = 1;
1879 tex.coord_type_w = 1;
1880 }
1881
1882 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1883 tex.coord_type_z = 0;
1884 tex.src_sel_z = 1;
1885 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1886 tex.coord_type_z = 0;
1887
1888 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1889 tex.src_sel_w = 2;
1890
1891 r = r600_bc_add_tex(ctx->bc, &tex);
1892 if (r)
1893 return r;
1894
1895 /* add shadow ambient support - gallium doesn't do it yet */
1896 return 0;
1897 }
1898
1899 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1900 {
1901 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1902 struct r600_bc_alu alu;
1903 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1904 unsigned i;
1905 int r;
1906
1907 /* optimize if it's just an equal balance */
1908 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1909 for (i = 0; i < lasti + 1; i++) {
1910 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1911 continue;
1912
1913 memset(&alu, 0, sizeof(struct r600_bc_alu));
1914 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1915 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1916 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1917 alu.omod = 3;
1918 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1919 alu.dst.chan = i;
1920 if (i == lasti) {
1921 alu.last = 1;
1922 }
1923 r = r600_bc_add_alu(ctx->bc, &alu);
1924 if (r)
1925 return r;
1926 }
1927 return 0;
1928 }
1929
1930 /* 1 - src0 */
1931 for (i = 0; i < lasti + 1; i++) {
1932 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1933 continue;
1934
1935 memset(&alu, 0, sizeof(struct r600_bc_alu));
1936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1937 alu.src[0].sel = V_SQ_ALU_SRC_1;
1938 alu.src[0].chan = 0;
1939 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1940 alu.src[1].neg = 1;
1941 alu.dst.sel = ctx->temp_reg;
1942 alu.dst.chan = i;
1943 if (i == lasti) {
1944 alu.last = 1;
1945 }
1946 alu.dst.write = 1;
1947 r = r600_bc_add_alu(ctx->bc, &alu);
1948 if (r)
1949 return r;
1950 }
1951
1952 /* (1 - src0) * src2 */
1953 for (i = 0; i < lasti + 1; i++) {
1954 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1955 continue;
1956
1957 memset(&alu, 0, sizeof(struct r600_bc_alu));
1958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1959 alu.src[0].sel = ctx->temp_reg;
1960 alu.src[0].chan = i;
1961 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1962 alu.dst.sel = ctx->temp_reg;
1963 alu.dst.chan = i;
1964 if (i == lasti) {
1965 alu.last = 1;
1966 }
1967 alu.dst.write = 1;
1968 r = r600_bc_add_alu(ctx->bc, &alu);
1969 if (r)
1970 return r;
1971 }
1972
1973 /* src0 * src1 + (1 - src0) * src2 */
1974 for (i = 0; i < lasti + 1; i++) {
1975 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1976 continue;
1977
1978 memset(&alu, 0, sizeof(struct r600_bc_alu));
1979 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1980 alu.is_op3 = 1;
1981 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1982 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1983 alu.src[2].sel = ctx->temp_reg;
1984 alu.src[2].chan = i;
1985
1986 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1987 alu.dst.chan = i;
1988 if (i == lasti) {
1989 alu.last = 1;
1990 }
1991 r = r600_bc_add_alu(ctx->bc, &alu);
1992 if (r)
1993 return r;
1994 }
1995 return 0;
1996 }
1997
1998 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1999 {
2000 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2001 struct r600_bc_alu alu;
2002 int i, r;
2003 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2004
2005 for (i = 0; i < lasti + 1; i++) {
2006 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2007 continue;
2008
2009 memset(&alu, 0, sizeof(struct r600_bc_alu));
2010 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2011 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2012 r600_bc_src(&alu.src[1], &ctx->src[2], i);
2013 r600_bc_src(&alu.src[2], &ctx->src[1], i);
2014 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2015 alu.dst.chan = i;
2016 alu.dst.write = 1;
2017 alu.is_op3 = 1;
2018 if (i == lasti)
2019 alu.last = 1;
2020 r = r600_bc_add_alu(ctx->bc, &alu);
2021 if (r)
2022 return r;
2023 }
2024 return 0;
2025 }
2026
2027 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2028 {
2029 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2030 struct r600_bc_alu alu;
2031 uint32_t use_temp = 0;
2032 int i, r;
2033
2034 if (inst->Dst[0].Register.WriteMask != 0xf)
2035 use_temp = 1;
2036
2037 for (i = 0; i < 4; i++) {
2038 memset(&alu, 0, sizeof(struct r600_bc_alu));
2039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2040
2041 switch (i) {
2042 case 0:
2043 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2044 break;
2045 case 1:
2046 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2047 break;
2048 case 2:
2049 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2050 break;
2051 case 3:
2052 alu.src[0].sel = V_SQ_ALU_SRC_0;
2053 alu.src[0].chan = i;
2054 }
2055
2056 switch (i) {
2057 case 0:
2058 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2059 break;
2060 case 1:
2061 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2062 break;
2063 case 2:
2064 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2065 break;
2066 case 3:
2067 alu.src[1].sel = V_SQ_ALU_SRC_0;
2068 alu.src[1].chan = i;
2069 }
2070
2071 alu.dst.sel = ctx->temp_reg;
2072 alu.dst.chan = i;
2073 alu.dst.write = 1;
2074
2075 if (i == 3)
2076 alu.last = 1;
2077 r = r600_bc_add_alu(ctx->bc, &alu);
2078 if (r)
2079 return r;
2080 }
2081
2082 for (i = 0; i < 4; i++) {
2083 memset(&alu, 0, sizeof(struct r600_bc_alu));
2084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2085
2086 switch (i) {
2087 case 0:
2088 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2089 break;
2090 case 1:
2091 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2092 break;
2093 case 2:
2094 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2095 break;
2096 case 3:
2097 alu.src[0].sel = V_SQ_ALU_SRC_0;
2098 alu.src[0].chan = i;
2099 }
2100
2101 switch (i) {
2102 case 0:
2103 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2104 break;
2105 case 1:
2106 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2107 break;
2108 case 2:
2109 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2110 break;
2111 case 3:
2112 alu.src[1].sel = V_SQ_ALU_SRC_0;
2113 alu.src[1].chan = i;
2114 }
2115
2116 alu.src[2].sel = ctx->temp_reg;
2117 alu.src[2].neg = 1;
2118 alu.src[2].chan = i;
2119
2120 if (use_temp)
2121 alu.dst.sel = ctx->temp_reg;
2122 else
2123 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2124 alu.dst.chan = i;
2125 alu.dst.write = 1;
2126 alu.is_op3 = 1;
2127 if (i == 3)
2128 alu.last = 1;
2129 r = r600_bc_add_alu(ctx->bc, &alu);
2130 if (r)
2131 return r;
2132 }
2133 if (use_temp)
2134 return tgsi_helper_copy(ctx, inst);
2135 return 0;
2136 }
2137
2138 static int tgsi_exp(struct r600_shader_ctx *ctx)
2139 {
2140 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2141 struct r600_bc_alu alu;
2142 int r;
2143
2144 /* result.x = 2^floor(src); */
2145 if (inst->Dst[0].Register.WriteMask & 1) {
2146 memset(&alu, 0, sizeof(struct r600_bc_alu));
2147
2148 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2149 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2150
2151 alu.dst.sel = ctx->temp_reg;
2152 alu.dst.chan = 0;
2153 alu.dst.write = 1;
2154 alu.last = 1;
2155 r = r600_bc_add_alu(ctx->bc, &alu);
2156 if (r)
2157 return r;
2158
2159 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2160 alu.src[0].sel = ctx->temp_reg;
2161 alu.src[0].chan = 0;
2162
2163 alu.dst.sel = ctx->temp_reg;
2164 alu.dst.chan = 0;
2165 alu.dst.write = 1;
2166 alu.last = 1;
2167 r = r600_bc_add_alu(ctx->bc, &alu);
2168 if (r)
2169 return r;
2170 }
2171
2172 /* result.y = tmp - floor(tmp); */
2173 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2174 memset(&alu, 0, sizeof(struct r600_bc_alu));
2175
2176 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2177 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2178
2179 alu.dst.sel = ctx->temp_reg;
2180 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2181 // if (r)
2182 // return r;
2183 alu.dst.write = 1;
2184 alu.dst.chan = 1;
2185
2186 alu.last = 1;
2187
2188 r = r600_bc_add_alu(ctx->bc, &alu);
2189 if (r)
2190 return r;
2191 }
2192
2193 /* result.z = RoughApprox2ToX(tmp);*/
2194 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2195 memset(&alu, 0, sizeof(struct r600_bc_alu));
2196 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2197 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2198
2199 alu.dst.sel = ctx->temp_reg;
2200 alu.dst.write = 1;
2201 alu.dst.chan = 2;
2202
2203 alu.last = 1;
2204
2205 r = r600_bc_add_alu(ctx->bc, &alu);
2206 if (r)
2207 return r;
2208 }
2209
2210 /* result.w = 1.0;*/
2211 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2212 memset(&alu, 0, sizeof(struct r600_bc_alu));
2213
2214 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2215 alu.src[0].sel = V_SQ_ALU_SRC_1;
2216 alu.src[0].chan = 0;
2217
2218 alu.dst.sel = ctx->temp_reg;
2219 alu.dst.chan = 3;
2220 alu.dst.write = 1;
2221 alu.last = 1;
2222 r = r600_bc_add_alu(ctx->bc, &alu);
2223 if (r)
2224 return r;
2225 }
2226 return tgsi_helper_copy(ctx, inst);
2227 }
2228
2229 static int tgsi_log(struct r600_shader_ctx *ctx)
2230 {
2231 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2232 struct r600_bc_alu alu;
2233 int r;
2234
2235 /* result.x = floor(log2(src)); */
2236 if (inst->Dst[0].Register.WriteMask & 1) {
2237 memset(&alu, 0, sizeof(struct r600_bc_alu));
2238
2239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2240 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2241
2242 alu.dst.sel = ctx->temp_reg;
2243 alu.dst.chan = 0;
2244 alu.dst.write = 1;
2245 alu.last = 1;
2246 r = r600_bc_add_alu(ctx->bc, &alu);
2247 if (r)
2248 return r;
2249
2250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2251 alu.src[0].sel = ctx->temp_reg;
2252 alu.src[0].chan = 0;
2253
2254 alu.dst.sel = ctx->temp_reg;
2255 alu.dst.chan = 0;
2256 alu.dst.write = 1;
2257 alu.last = 1;
2258
2259 r = r600_bc_add_alu(ctx->bc, &alu);
2260 if (r)
2261 return r;
2262 }
2263
2264 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2265 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2266 memset(&alu, 0, sizeof(struct r600_bc_alu));
2267
2268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2269 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2270
2271 alu.dst.sel = ctx->temp_reg;
2272 alu.dst.chan = 1;
2273 alu.dst.write = 1;
2274 alu.last = 1;
2275
2276 r = r600_bc_add_alu(ctx->bc, &alu);
2277 if (r)
2278 return r;
2279
2280 memset(&alu, 0, sizeof(struct r600_bc_alu));
2281
2282 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2283 alu.src[0].sel = ctx->temp_reg;
2284 alu.src[0].chan = 1;
2285
2286 alu.dst.sel = ctx->temp_reg;
2287 alu.dst.chan = 1;
2288 alu.dst.write = 1;
2289 alu.last = 1;
2290
2291 r = r600_bc_add_alu(ctx->bc, &alu);
2292 if (r)
2293 return r;
2294
2295 memset(&alu, 0, sizeof(struct r600_bc_alu));
2296
2297 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2298 alu.src[0].sel = ctx->temp_reg;
2299 alu.src[0].chan = 1;
2300
2301 alu.dst.sel = ctx->temp_reg;
2302 alu.dst.chan = 1;
2303 alu.dst.write = 1;
2304 alu.last = 1;
2305
2306 r = r600_bc_add_alu(ctx->bc, &alu);
2307 if (r)
2308 return r;
2309
2310 memset(&alu, 0, sizeof(struct r600_bc_alu));
2311
2312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2313 alu.src[0].sel = ctx->temp_reg;
2314 alu.src[0].chan = 1;
2315
2316 alu.dst.sel = ctx->temp_reg;
2317 alu.dst.chan = 1;
2318 alu.dst.write = 1;
2319 alu.last = 1;
2320
2321 r = r600_bc_add_alu(ctx->bc, &alu);
2322 if (r)
2323 return r;
2324
2325 memset(&alu, 0, sizeof(struct r600_bc_alu));
2326
2327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2328
2329 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2330
2331 alu.src[1].sel = ctx->temp_reg;
2332 alu.src[1].chan = 1;
2333
2334 alu.dst.sel = ctx->temp_reg;
2335 alu.dst.chan = 1;
2336 alu.dst.write = 1;
2337 alu.last = 1;
2338
2339 r = r600_bc_add_alu(ctx->bc, &alu);
2340 if (r)
2341 return r;
2342 }
2343
2344 /* result.z = log2(src);*/
2345 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2346 memset(&alu, 0, sizeof(struct r600_bc_alu));
2347
2348 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2349 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2350
2351 alu.dst.sel = ctx->temp_reg;
2352 alu.dst.write = 1;
2353 alu.dst.chan = 2;
2354 alu.last = 1;
2355
2356 r = r600_bc_add_alu(ctx->bc, &alu);
2357 if (r)
2358 return r;
2359 }
2360
2361 /* result.w = 1.0; */
2362 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2363 memset(&alu, 0, sizeof(struct r600_bc_alu));
2364
2365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2366 alu.src[0].sel = V_SQ_ALU_SRC_1;
2367 alu.src[0].chan = 0;
2368
2369 alu.dst.sel = ctx->temp_reg;
2370 alu.dst.chan = 3;
2371 alu.dst.write = 1;
2372 alu.last = 1;
2373
2374 r = r600_bc_add_alu(ctx->bc, &alu);
2375 if (r)
2376 return r;
2377 }
2378
2379 return tgsi_helper_copy(ctx, inst);
2380 }
2381
2382 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2383 {
2384 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2385 struct r600_bc_alu alu;
2386 int r;
2387
2388 memset(&alu, 0, sizeof(struct r600_bc_alu));
2389
2390 switch (inst->Instruction.Opcode) {
2391 case TGSI_OPCODE_ARL:
2392 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2393 break;
2394 case TGSI_OPCODE_ARR:
2395 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2396 break;
2397 default:
2398 assert(0);
2399 return -1;
2400 }
2401
2402 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2403 alu.last = 1;
2404 alu.dst.sel = ctx->ar_reg;
2405 alu.dst.write = 1;
2406 r = r600_bc_add_alu(ctx->bc, &alu);
2407 if (r)
2408 return r;
2409
2410 /* TODO: Note that the MOVA can be avoided if we never use AR for
2411 * indexing non-CB registers in the current ALU clause. Similarly, we
2412 * need to load AR from ar_reg again if we started a new clause
2413 * between ARL and AR usage. The easy way to do that is to remove
2414 * the MOVA here, and load it for the first AR access after ar_reg
2415 * has been modified in each clause. */
2416 memset(&alu, 0, sizeof(struct r600_bc_alu));
2417 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2418 alu.src[0].sel = ctx->ar_reg;
2419 alu.src[0].chan = 0;
2420 alu.last = 1;
2421 r = r600_bc_add_alu(ctx->bc, &alu);
2422 if (r)
2423 return r;
2424 return 0;
2425 }
2426 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2427 {
2428 /* TODO from r600c, ar values don't persist between clauses */
2429 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2430 struct r600_bc_alu alu;
2431 int r;
2432
2433 switch (inst->Instruction.Opcode) {
2434 case TGSI_OPCODE_ARL:
2435 memset(&alu, 0, sizeof(alu));
2436 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2437 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2438 alu.dst.sel = ctx->ar_reg;
2439 alu.dst.write = 1;
2440 alu.last = 1;
2441
2442 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2443 return r;
2444
2445 memset(&alu, 0, sizeof(alu));
2446 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2447 alu.src[0].sel = ctx->ar_reg;
2448 alu.dst.sel = ctx->ar_reg;
2449 alu.dst.write = 1;
2450 alu.last = 1;
2451
2452 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2453 return r;
2454 break;
2455 case TGSI_OPCODE_ARR:
2456 memset(&alu, 0, sizeof(alu));
2457 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2458 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2459 alu.dst.sel = ctx->ar_reg;
2460 alu.dst.write = 1;
2461 alu.last = 1;
2462
2463 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2464 return r;
2465 break;
2466 default:
2467 assert(0);
2468 return -1;
2469 }
2470
2471 memset(&alu, 0, sizeof(alu));
2472 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2473 alu.src[0].sel = ctx->ar_reg;
2474 alu.last = 1;
2475
2476 r = r600_bc_add_alu(ctx->bc, &alu);
2477 if (r)
2478 return r;
2479 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2480 return 0;
2481 }
2482
2483 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2484 {
2485 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2486 struct r600_bc_alu alu;
2487 int i, r = 0;
2488
2489 for (i = 0; i < 4; i++) {
2490 memset(&alu, 0, sizeof(struct r600_bc_alu));
2491
2492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2493 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2494
2495 if (i == 0 || i == 3) {
2496 alu.src[0].sel = V_SQ_ALU_SRC_1;
2497 } else {
2498 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2499 }
2500
2501 if (i == 0 || i == 2) {
2502 alu.src[1].sel = V_SQ_ALU_SRC_1;
2503 } else {
2504 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2505 }
2506 if (i == 3)
2507 alu.last = 1;
2508 r = r600_bc_add_alu(ctx->bc, &alu);
2509 if (r)
2510 return r;
2511 }
2512 return 0;
2513 }
2514
2515 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2516 {
2517 struct r600_bc_alu alu;
2518 int r;
2519
2520 memset(&alu, 0, sizeof(struct r600_bc_alu));
2521 alu.inst = opcode;
2522 alu.predicate = 1;
2523
2524 alu.dst.sel = ctx->temp_reg;
2525 alu.dst.write = 1;
2526 alu.dst.chan = 0;
2527
2528 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2529 alu.src[1].sel = V_SQ_ALU_SRC_0;
2530 alu.src[1].chan = 0;
2531
2532 alu.last = 1;
2533
2534 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2535 if (r)
2536 return r;
2537 return 0;
2538 }
2539
2540 static int pops(struct r600_shader_ctx *ctx, int pops)
2541 {
2542 int alu_pop = 3;
2543 if (ctx->bc->cf_last) {
2544 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2545 alu_pop = 0;
2546 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2547 alu_pop = 1;
2548 }
2549 alu_pop += pops;
2550 if (alu_pop == 1) {
2551 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2552 ctx->bc->force_add_cf = 1;
2553 } else if (alu_pop == 2) {
2554 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2555 ctx->bc->force_add_cf = 1;
2556 } else {
2557 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2558 ctx->bc->cf_last->pop_count = pops;
2559 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2560 }
2561 return 0;
2562 }
2563
2564 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2565 {
2566 switch(reason) {
2567 case FC_PUSH_VPM:
2568 ctx->bc->callstack[ctx->bc->call_sp].current--;
2569 break;
2570 case FC_PUSH_WQM:
2571 case FC_LOOP:
2572 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2573 break;
2574 case FC_REP:
2575 /* TOODO : for 16 vp asic should -= 2; */
2576 ctx->bc->callstack[ctx->bc->call_sp].current --;
2577 break;
2578 }
2579 }
2580
2581 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2582 {
2583 if (check_max_only) {
2584 int diff;
2585 switch (reason) {
2586 case FC_PUSH_VPM:
2587 diff = 1;
2588 break;
2589 case FC_PUSH_WQM:
2590 diff = 4;
2591 break;
2592 default:
2593 assert(0);
2594 diff = 0;
2595 }
2596 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2597 ctx->bc->callstack[ctx->bc->call_sp].max) {
2598 ctx->bc->callstack[ctx->bc->call_sp].max =
2599 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2600 }
2601 return;
2602 }
2603 switch (reason) {
2604 case FC_PUSH_VPM:
2605 ctx->bc->callstack[ctx->bc->call_sp].current++;
2606 break;
2607 case FC_PUSH_WQM:
2608 case FC_LOOP:
2609 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2610 break;
2611 case FC_REP:
2612 ctx->bc->callstack[ctx->bc->call_sp].current++;
2613 break;
2614 }
2615
2616 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2617 ctx->bc->callstack[ctx->bc->call_sp].max) {
2618 ctx->bc->callstack[ctx->bc->call_sp].max =
2619 ctx->bc->callstack[ctx->bc->call_sp].current;
2620 }
2621 }
2622
2623 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2624 {
2625 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2626
2627 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2628 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2629 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2630 sp->num_mid++;
2631 }
2632
2633 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2634 {
2635 ctx->bc->fc_sp++;
2636 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2637 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2638 }
2639
2640 static void fc_poplevel(struct r600_shader_ctx *ctx)
2641 {
2642 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2643 if (sp->mid) {
2644 free(sp->mid);
2645 sp->mid = NULL;
2646 }
2647 sp->num_mid = 0;
2648 sp->start = NULL;
2649 sp->type = 0;
2650 ctx->bc->fc_sp--;
2651 }
2652
2653 #if 0
2654 static int emit_return(struct r600_shader_ctx *ctx)
2655 {
2656 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2657 return 0;
2658 }
2659
2660 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2661 {
2662
2663 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2664 ctx->bc->cf_last->pop_count = pops;
2665 /* TODO work out offset */
2666 return 0;
2667 }
2668
2669 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2670 {
2671 return 0;
2672 }
2673
2674 static void emit_testflag(struct r600_shader_ctx *ctx)
2675 {
2676
2677 }
2678
2679 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2680 {
2681 emit_testflag(ctx);
2682 emit_jump_to_offset(ctx, 1, 4);
2683 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2684 pops(ctx, ifidx + 1);
2685 emit_return(ctx);
2686 }
2687
2688 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2689 {
2690 emit_testflag(ctx);
2691
2692 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2693 ctx->bc->cf_last->pop_count = 1;
2694
2695 fc_set_mid(ctx, fc_sp);
2696
2697 pops(ctx, 1);
2698 }
2699 #endif
2700
2701 static int tgsi_if(struct r600_shader_ctx *ctx)
2702 {
2703 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2704
2705 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2706
2707 fc_pushlevel(ctx, FC_IF);
2708
2709 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2710 return 0;
2711 }
2712
2713 static int tgsi_else(struct r600_shader_ctx *ctx)
2714 {
2715 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2716 ctx->bc->cf_last->pop_count = 1;
2717
2718 fc_set_mid(ctx, ctx->bc->fc_sp);
2719 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2720 return 0;
2721 }
2722
2723 static int tgsi_endif(struct r600_shader_ctx *ctx)
2724 {
2725 pops(ctx, 1);
2726 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2727 R600_ERR("if/endif unbalanced in shader\n");
2728 return -1;
2729 }
2730
2731 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2732 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2733 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2734 } else {
2735 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2736 }
2737 fc_poplevel(ctx);
2738
2739 callstack_decrease_current(ctx, FC_PUSH_VPM);
2740 return 0;
2741 }
2742
2743 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2744 {
2745 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2746
2747 fc_pushlevel(ctx, FC_LOOP);
2748
2749 /* check stack depth */
2750 callstack_check_depth(ctx, FC_LOOP, 0);
2751 return 0;
2752 }
2753
2754 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2755 {
2756 int i;
2757
2758 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2759
2760 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2761 R600_ERR("loop/endloop in shader code are not paired.\n");
2762 return -EINVAL;
2763 }
2764
2765 /* fixup loop pointers - from r600isa
2766 LOOP END points to CF after LOOP START,
2767 LOOP START point to CF after LOOP END
2768 BRK/CONT point to LOOP END CF
2769 */
2770 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2771
2772 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2773
2774 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2775 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2776 }
2777 /* TODO add LOOPRET support */
2778 fc_poplevel(ctx);
2779 callstack_decrease_current(ctx, FC_LOOP);
2780 return 0;
2781 }
2782
2783 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2784 {
2785 unsigned int fscp;
2786
2787 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2788 {
2789 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2790 break;
2791 }
2792
2793 if (fscp == 0) {
2794 R600_ERR("Break not inside loop/endloop pair\n");
2795 return -EINVAL;
2796 }
2797
2798 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2799 ctx->bc->cf_last->pop_count = 1;
2800
2801 fc_set_mid(ctx, fscp);
2802
2803 pops(ctx, 1);
2804 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2805 return 0;
2806 }
2807
2808 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2809 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2810 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2811 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2812
2813 /* FIXME:
2814 * For state trackers other than OpenGL, we'll want to use
2815 * _RECIP_IEEE instead.
2816 */
2817 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2818
2819 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2820 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2821 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2822 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2823 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2824 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2825 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2826 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2827 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2828 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2829 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2830 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2831 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2832 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2833 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2834 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835 /* gap */
2836 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 /* gap */
2839 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2842 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2844 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2846 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2847 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2848 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2849 /* gap */
2850 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2852 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2854 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2855 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2856 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2857 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2858 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2864 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2866 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2867 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2868 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2869 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2871 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2873 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2880 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2884 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2885 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2886 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2887 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2890 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2891 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2892 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2893 /* gap */
2894 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2897 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2898 /* gap */
2899 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2907 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 /* gap */
2909 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2918 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2921 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2923 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 /* gap */
2925 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 /* gap */
2931 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2940 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2941 /* gap */
2942 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2961 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2962 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970 };
2971
2972 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2973 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2974 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2975 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2976 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2977 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2978 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2979 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2981 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2982 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2983 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2984 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2985 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2986 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2987 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2988 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2989 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2990 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2991 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2992 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993 /* gap */
2994 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996 /* gap */
2997 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3000 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3002 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3004 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3005 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3006 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3007 /* gap */
3008 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3010 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3012 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3013 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3014 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3015 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3016 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3022 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3024 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3025 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3026 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3027 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3028 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3029 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3030 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3031 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3038 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3042 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3043 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3044 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3045 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3048 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3049 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3050 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3051 /* gap */
3052 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3055 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3056 /* gap */
3057 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3065 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066 /* gap */
3067 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3076 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3079 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3081 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082 /* gap */
3083 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088 /* gap */
3089 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3098 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3099 /* gap */
3100 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128 };