Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_formats.h"
32 #include "r600_opcodes.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
38 {
39 struct r600_pipe_state *rstate = &shader->rstate;
40 struct r600_shader *rshader = &shader->shader;
41 unsigned spi_vs_out_id[10];
42 unsigned i, tmp;
43
44 /* clear previous register */
45 rstate->nregs = 0;
46
47 /* so far never got proper semantic id from tgsi */
48 /* FIXME better to move this in config things so they get emited
49 * only one time per cs
50 */
51 for (i = 0; i < 10; i++) {
52 spi_vs_out_id[i] = 0;
53 }
54 for (i = 0; i < 32; i++) {
55 tmp = i << ((i & 3) * 8);
56 spi_vs_out_id[i / 4] |= tmp;
57 }
58 for (i = 0; i < 10; i++) {
59 r600_pipe_state_add_reg(rstate,
60 R_028614_SPI_VS_OUT_ID_0 + i * 4,
61 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
62 }
63
64 r600_pipe_state_add_reg(rstate,
65 R_0286C4_SPI_VS_OUT_CONFIG,
66 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
67 0xFFFFFFFF, NULL);
68 r600_pipe_state_add_reg(rstate,
69 R_028868_SQ_PGM_RESOURCES_VS,
70 S_028868_NUM_GPRS(rshader->bc.ngpr) |
71 S_028868_STACK_SIZE(rshader->bc.nstack),
72 0xFFFFFFFF, NULL);
73 r600_pipe_state_add_reg(rstate,
74 R_0288D0_SQ_PGM_CF_OFFSET_VS,
75 0x00000000, 0xFFFFFFFF, NULL);
76 r600_pipe_state_add_reg(rstate,
77 R_028858_SQ_PGM_START_VS,
78 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
79
80 r600_pipe_state_add_reg(rstate,
81 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
82 0xFFFFFFFF, NULL);
83
84 }
85
86 int r600_find_vs_semantic_index(struct r600_shader *vs,
87 struct r600_shader *ps, int id)
88 {
89 struct r600_shader_io *input = &ps->input[id];
90
91 for (int i = 0; i < vs->noutput; i++) {
92 if (input->name == vs->output[i].name &&
93 input->sid == vs->output[i].sid) {
94 return i - 1;
95 }
96 }
97 return 0;
98 }
99
100 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101 {
102 struct r600_pipe_state *rstate = &shader->rstate;
103 struct r600_shader *rshader = &shader->shader;
104 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
105 int pos_index = -1, face_index = -1;
106
107 rstate->nregs = 0;
108
109 for (i = 0; i < rshader->ninput; i++) {
110 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
111 pos_index = i;
112 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
113 face_index = i;
114 }
115
116 for (i = 0; i < rshader->noutput; i++) {
117 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
118 r600_pipe_state_add_reg(rstate,
119 R_02880C_DB_SHADER_CONTROL,
120 S_02880C_Z_EXPORT_ENABLE(1),
121 S_02880C_Z_EXPORT_ENABLE(1), NULL);
122 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
123 r600_pipe_state_add_reg(rstate,
124 R_02880C_DB_SHADER_CONTROL,
125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
126 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
127 }
128
129 exports_ps = 0;
130 num_cout = 0;
131 for (i = 0; i < rshader->noutput; i++) {
132 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
133 exports_ps |= 1;
134 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
135 num_cout++;
136 }
137 }
138 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
139 if (!exports_ps) {
140 /* always at least export 1 component per pixel */
141 exports_ps = 2;
142 }
143
144 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
145 S_0286CC_PERSP_GRADIENT_ENA(1);
146 spi_input_z = 0;
147 if (pos_index != -1) {
148 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
149 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
150 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
151 S_0286CC_BARYC_SAMPLE_CNTL(1));
152 spi_input_z |= 1;
153 }
154
155 spi_ps_in_control_1 = 0;
156 if (face_index != -1) {
157 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
158 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
159 }
160
161 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
162 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
163 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
164 r600_pipe_state_add_reg(rstate,
165 R_028840_SQ_PGM_START_PS,
166 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
167 r600_pipe_state_add_reg(rstate,
168 R_028850_SQ_PGM_RESOURCES_PS,
169 S_028868_NUM_GPRS(rshader->bc.ngpr) |
170 S_028868_STACK_SIZE(rshader->bc.nstack),
171 0xFFFFFFFF, NULL);
172 r600_pipe_state_add_reg(rstate,
173 R_028854_SQ_PGM_EXPORTS_PS,
174 exports_ps, 0xFFFFFFFF, NULL);
175 r600_pipe_state_add_reg(rstate,
176 R_0288CC_SQ_PGM_CF_OFFSET_PS,
177 0x00000000, 0xFFFFFFFF, NULL);
178
179 if (rshader->fs_write_all) {
180 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
181 S_028808_MULTIWRITE_ENABLE(1),
182 S_028808_MULTIWRITE_ENABLE(1),
183 NULL);
184 }
185
186 if (rshader->uses_kill) {
187 /* only set some bits here, the other bits are set in the dsa state */
188 r600_pipe_state_add_reg(rstate,
189 R_02880C_DB_SHADER_CONTROL,
190 S_02880C_KILL_ENABLE(1),
191 S_02880C_KILL_ENABLE(1), NULL);
192 }
193 r600_pipe_state_add_reg(rstate,
194 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
195 0xFFFFFFFF, NULL);
196 }
197
198 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
199 {
200 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
201 struct r600_shader *rshader = &shader->shader;
202 void *ptr;
203
204 /* copy new shader */
205 if (shader->bo == NULL) {
206 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
207 if (shader->bo == NULL) {
208 return -ENOMEM;
209 }
210 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
211 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
212 r600_bo_unmap(rctx->radeon, shader->bo);
213 }
214 /* build state */
215 switch (rshader->processor_type) {
216 case TGSI_PROCESSOR_VERTEX:
217 if (rshader->family >= CHIP_CEDAR) {
218 evergreen_pipe_shader_vs(ctx, shader);
219 } else {
220 r600_pipe_shader_vs(ctx, shader);
221 }
222 break;
223 case TGSI_PROCESSOR_FRAGMENT:
224 if (rshader->family >= CHIP_CEDAR) {
225 evergreen_pipe_shader_ps(ctx, shader);
226 } else {
227 r600_pipe_shader_ps(ctx, shader);
228 }
229 break;
230 default:
231 return -EINVAL;
232 }
233 return 0;
234 }
235
236 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
237
238 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
239 {
240 static int dump_shaders = -1;
241 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
242 int r;
243
244 /* Would like some magic "get_bool_option_once" routine.
245 */
246 if (dump_shaders == -1)
247 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
248
249 if (dump_shaders) {
250 fprintf(stderr, "--------------------------------------------------------------\n");
251 tgsi_dump(tokens, 0);
252 }
253 shader->shader.family = r600_get_family(rctx->radeon);
254 r = r600_shader_from_tgsi(tokens, &shader->shader);
255 if (r) {
256 R600_ERR("translation from TGSI failed !\n");
257 return r;
258 }
259 r = r600_bc_build(&shader->shader.bc);
260 if (r) {
261 R600_ERR("building bytecode failed !\n");
262 return r;
263 }
264 if (dump_shaders) {
265 r600_bc_dump(&shader->shader.bc);
266 fprintf(stderr, "______________________________________________________________\n");
267 }
268 return r600_pipe_shader(ctx, shader);
269 }
270
271 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272 {
273 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276 r600_bc_clear(&shader->shader.bc);
277 }
278
279 /*
280 * tgsi -> r600 shader
281 */
282 struct r600_shader_tgsi_instruction;
283
284 struct r600_shader_src {
285 unsigned sel;
286 unsigned swizzle[4];
287 unsigned neg;
288 unsigned abs;
289 unsigned rel;
290 uint32_t value[4];
291 };
292
293 struct r600_shader_ctx {
294 struct tgsi_shader_info info;
295 struct tgsi_parse_context parse;
296 const struct tgsi_token *tokens;
297 unsigned type;
298 unsigned file_offset[TGSI_FILE_COUNT];
299 unsigned temp_reg;
300 unsigned ar_reg;
301 struct r600_shader_tgsi_instruction *inst_info;
302 struct r600_bc *bc;
303 struct r600_shader *shader;
304 struct r600_shader_src src[3];
305 u32 *literals;
306 u32 nliterals;
307 u32 max_driver_temp_used;
308 /* needed for evergreen interpolation */
309 boolean input_centroid;
310 boolean input_linear;
311 boolean input_perspective;
312 int num_interp_gpr;
313 };
314
315 struct r600_shader_tgsi_instruction {
316 unsigned tgsi_opcode;
317 unsigned is_op3;
318 unsigned r600_opcode;
319 int (*process)(struct r600_shader_ctx *ctx);
320 };
321
322 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
323 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
324
325 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
326 {
327 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
328 int j;
329
330 if (i->Instruction.NumDstRegs > 1) {
331 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
332 return -EINVAL;
333 }
334 if (i->Instruction.Predicate) {
335 R600_ERR("predicate unsupported\n");
336 return -EINVAL;
337 }
338 #if 0
339 if (i->Instruction.Label) {
340 R600_ERR("label unsupported\n");
341 return -EINVAL;
342 }
343 #endif
344 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
345 if (i->Src[j].Register.Dimension) {
346 R600_ERR("unsupported src %d (dimension %d)\n", j,
347 i->Src[j].Register.Dimension);
348 return -EINVAL;
349 }
350 }
351 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
352 if (i->Dst[j].Register.Dimension) {
353 R600_ERR("unsupported dst (dimension)\n");
354 return -EINVAL;
355 }
356 }
357 return 0;
358 }
359
360 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
361 {
362 int i, r;
363 struct r600_bc_alu alu;
364 int gpr = 0, base_chan = 0;
365 int ij_index = 0;
366
367 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
368 ij_index = 0;
369 if (ctx->shader->input[input].centroid)
370 ij_index++;
371 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
372 ij_index = 0;
373 /* if we have perspective add one */
374 if (ctx->input_perspective) {
375 ij_index++;
376 /* if we have perspective centroid */
377 if (ctx->input_centroid)
378 ij_index++;
379 }
380 if (ctx->shader->input[input].centroid)
381 ij_index++;
382 }
383
384 /* work out gpr and base_chan from index */
385 gpr = ij_index / 2;
386 base_chan = (2 * (ij_index % 2)) + 1;
387
388 for (i = 0; i < 8; i++) {
389 memset(&alu, 0, sizeof(struct r600_bc_alu));
390
391 if (i < 4)
392 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
393 else
394 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
395
396 if ((i > 1) && (i < 6)) {
397 alu.dst.sel = ctx->shader->input[input].gpr;
398 alu.dst.write = 1;
399 }
400
401 alu.dst.chan = i % 4;
402
403 alu.src[0].sel = gpr;
404 alu.src[0].chan = (base_chan - (i % 2));
405
406 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
407
408 alu.bank_swizzle_force = SQ_ALU_VEC_210;
409 if ((i % 4) == 3)
410 alu.last = 1;
411 r = r600_bc_add_alu(ctx->bc, &alu);
412 if (r)
413 return r;
414 }
415 return 0;
416 }
417
418
419 static int tgsi_declaration(struct r600_shader_ctx *ctx)
420 {
421 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
422 unsigned i;
423 int r;
424
425 switch (d->Declaration.File) {
426 case TGSI_FILE_INPUT:
427 i = ctx->shader->ninput++;
428 ctx->shader->input[i].name = d->Semantic.Name;
429 ctx->shader->input[i].sid = d->Semantic.Index;
430 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
431 ctx->shader->input[i].centroid = d->Declaration.Centroid;
432 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
433 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
434 /* turn input into interpolate on EG */
435 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
436 if (ctx->shader->input[i].interpolate > 0) {
437 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
438 evergreen_interp_alu(ctx, i);
439 }
440 }
441 }
442 break;
443 case TGSI_FILE_OUTPUT:
444 i = ctx->shader->noutput++;
445 ctx->shader->output[i].name = d->Semantic.Name;
446 ctx->shader->output[i].sid = d->Semantic.Index;
447 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
448 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
449 break;
450 case TGSI_FILE_CONSTANT:
451 case TGSI_FILE_TEMPORARY:
452 case TGSI_FILE_SAMPLER:
453 case TGSI_FILE_ADDRESS:
454 break;
455
456 case TGSI_FILE_SYSTEM_VALUE:
457 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
458 struct r600_bc_alu alu;
459 memset(&alu, 0, sizeof(struct r600_bc_alu));
460
461 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
462 alu.src[0].sel = 0;
463 alu.src[0].chan = 3;
464
465 alu.dst.sel = 0;
466 alu.dst.chan = 3;
467 alu.dst.write = 1;
468 alu.last = 1;
469
470 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
471 return r;
472 break;
473 }
474
475 default:
476 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
477 return -EINVAL;
478 }
479 return 0;
480 }
481
482 static int r600_get_temp(struct r600_shader_ctx *ctx)
483 {
484 return ctx->temp_reg + ctx->max_driver_temp_used++;
485 }
486
487 /*
488 * for evergreen we need to scan the shader to find the number of GPRs we need to
489 * reserve for interpolation.
490 *
491 * we need to know if we are going to emit
492 * any centroid inputs
493 * if perspective and linear are required
494 */
495 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
496 {
497 int i;
498 int num_baryc;
499
500 ctx->input_linear = FALSE;
501 ctx->input_perspective = FALSE;
502 ctx->input_centroid = FALSE;
503 ctx->num_interp_gpr = 1;
504
505 /* any centroid inputs */
506 for (i = 0; i < ctx->info.num_inputs; i++) {
507 /* skip position/face */
508 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
509 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
510 continue;
511 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
512 ctx->input_linear = TRUE;
513 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
514 ctx->input_perspective = TRUE;
515 if (ctx->info.input_centroid[i])
516 ctx->input_centroid = TRUE;
517 }
518
519 num_baryc = 0;
520 /* ignoring sample for now */
521 if (ctx->input_perspective)
522 num_baryc++;
523 if (ctx->input_linear)
524 num_baryc++;
525 if (ctx->input_centroid)
526 num_baryc *= 2;
527
528 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
529
530 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
531 return ctx->num_interp_gpr;
532 }
533
534 static void tgsi_src(struct r600_shader_ctx *ctx,
535 const struct tgsi_full_src_register *tgsi_src,
536 struct r600_shader_src *r600_src)
537 {
538 memset(r600_src, 0, sizeof(*r600_src));
539 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
540 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
541 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
542 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
543 r600_src->neg = tgsi_src->Register.Negate;
544 r600_src->abs = tgsi_src->Register.Absolute;
545
546 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
547 int index;
548 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
549 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
550 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
551
552 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
553 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
554 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
555 return;
556 }
557 index = tgsi_src->Register.Index;
558 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
559 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
560 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
561 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
562 r600_src->swizzle[0] = 3;
563 r600_src->swizzle[1] = 3;
564 r600_src->swizzle[2] = 3;
565 r600_src->swizzle[3] = 3;
566 r600_src->sel = 0;
567 } else {
568 if (tgsi_src->Register.Indirect)
569 r600_src->rel = V_SQ_REL_RELATIVE;
570 r600_src->sel = tgsi_src->Register.Index;
571 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
572 }
573 }
574
575 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
576 {
577 struct r600_bc_vtx vtx;
578 unsigned int ar_reg;
579 int r;
580
581 if (offset) {
582 struct r600_bc_alu alu;
583
584 memset(&alu, 0, sizeof(alu));
585
586 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
587 alu.src[0].sel = ctx->ar_reg;
588
589 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
590 alu.src[1].value = offset;
591
592 alu.dst.sel = dst_reg;
593 alu.dst.write = 1;
594 alu.last = 1;
595
596 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
597 return r;
598
599 ar_reg = dst_reg;
600 } else {
601 ar_reg = ctx->ar_reg;
602 }
603
604 memset(&vtx, 0, sizeof(vtx));
605 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
606 vtx.src_gpr = ar_reg;
607 vtx.mega_fetch_count = 16;
608 vtx.dst_gpr = dst_reg;
609 vtx.dst_sel_x = 0; /* SEL_X */
610 vtx.dst_sel_y = 1; /* SEL_Y */
611 vtx.dst_sel_z = 2; /* SEL_Z */
612 vtx.dst_sel_w = 3; /* SEL_W */
613 vtx.data_format = FMT_32_32_32_32_FLOAT;
614 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
615 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
616 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
617
618 if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
619 return r;
620
621 return 0;
622 }
623
624 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
625 {
626 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
627 struct r600_bc_alu alu;
628 int i, j, k, nconst, r;
629
630 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
631 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
632 nconst++;
633 }
634 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
635 }
636 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
637 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
638 continue;
639 }
640
641 if (ctx->src[i].rel) {
642 int treg = r600_get_temp(ctx);
643 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
644 return r;
645
646 ctx->src[i].sel = treg;
647 ctx->src[i].rel = 0;
648 j--;
649 } else if (j > 0) {
650 int treg = r600_get_temp(ctx);
651 for (k = 0; k < 4; k++) {
652 memset(&alu, 0, sizeof(struct r600_bc_alu));
653 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
654 alu.src[0].sel = ctx->src[i].sel;
655 alu.src[0].chan = k;
656 alu.src[0].rel = ctx->src[i].rel;
657 alu.dst.sel = treg;
658 alu.dst.chan = k;
659 alu.dst.write = 1;
660 if (k == 3)
661 alu.last = 1;
662 r = r600_bc_add_alu(ctx->bc, &alu);
663 if (r)
664 return r;
665 }
666 ctx->src[i].sel = treg;
667 ctx->src[i].rel =0;
668 j--;
669 }
670 }
671 return 0;
672 }
673
674 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
675 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
676 {
677 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
678 struct r600_bc_alu alu;
679 int i, j, k, nliteral, r;
680
681 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
682 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
683 nliteral++;
684 }
685 }
686 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
687 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
688 int treg = r600_get_temp(ctx);
689 for (k = 0; k < 4; k++) {
690 memset(&alu, 0, sizeof(struct r600_bc_alu));
691 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
692 alu.src[0].sel = ctx->src[i].sel;
693 alu.src[0].chan = k;
694 alu.src[0].value = ctx->src[i].value[k];
695 alu.dst.sel = treg;
696 alu.dst.chan = k;
697 alu.dst.write = 1;
698 if (k == 3)
699 alu.last = 1;
700 r = r600_bc_add_alu(ctx->bc, &alu);
701 if (r)
702 return r;
703 }
704 ctx->src[i].sel = treg;
705 j--;
706 }
707 }
708 return 0;
709 }
710
711 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
712 {
713 struct tgsi_full_immediate *immediate;
714 struct tgsi_full_property *property;
715 struct r600_shader_ctx ctx;
716 struct r600_bc_output output[32];
717 unsigned noutput;
718 unsigned opcode;
719 int i, r = 0, pos0;
720
721 ctx.bc = &shader->bc;
722 ctx.shader = shader;
723 r = r600_bc_init(ctx.bc, shader->family);
724 if (r)
725 return r;
726 ctx.tokens = tokens;
727 tgsi_scan_shader(tokens, &ctx.info);
728 tgsi_parse_init(&ctx.parse, tokens);
729 ctx.type = ctx.parse.FullHeader.Processor.Processor;
730 shader->processor_type = ctx.type;
731 ctx.bc->type = shader->processor_type;
732
733 /* register allocations */
734 /* Values [0,127] correspond to GPR[0..127].
735 * Values [128,159] correspond to constant buffer bank 0
736 * Values [160,191] correspond to constant buffer bank 1
737 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
738 * Values [256,287] correspond to constant buffer bank 2 (EG)
739 * Values [288,319] correspond to constant buffer bank 3 (EG)
740 * Other special values are shown in the list below.
741 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
742 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
743 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
744 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
745 * 248 SQ_ALU_SRC_0: special constant 0.0.
746 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
747 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
748 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
749 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
750 * 253 SQ_ALU_SRC_LITERAL: literal constant.
751 * 254 SQ_ALU_SRC_PV: previous vector result.
752 * 255 SQ_ALU_SRC_PS: previous scalar result.
753 */
754 for (i = 0; i < TGSI_FILE_COUNT; i++) {
755 ctx.file_offset[i] = 0;
756 }
757 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
758 ctx.file_offset[TGSI_FILE_INPUT] = 1;
759 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
760 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
761 } else {
762 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
763 }
764 }
765 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
766 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
767 }
768 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
769 ctx.info.file_count[TGSI_FILE_INPUT];
770 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
771 ctx.info.file_count[TGSI_FILE_OUTPUT];
772
773 /* Outside the GPR range. This will be translated to one of the
774 * kcache banks later. */
775 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
776
777 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
778 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
779 ctx.info.file_count[TGSI_FILE_TEMPORARY];
780 ctx.temp_reg = ctx.ar_reg + 1;
781
782 ctx.nliterals = 0;
783 ctx.literals = NULL;
784 shader->fs_write_all = FALSE;
785 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
786 tgsi_parse_token(&ctx.parse);
787 switch (ctx.parse.FullToken.Token.Type) {
788 case TGSI_TOKEN_TYPE_IMMEDIATE:
789 immediate = &ctx.parse.FullToken.FullImmediate;
790 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
791 if(ctx.literals == NULL) {
792 r = -ENOMEM;
793 goto out_err;
794 }
795 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
796 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
797 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
798 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
799 ctx.nliterals++;
800 break;
801 case TGSI_TOKEN_TYPE_DECLARATION:
802 r = tgsi_declaration(&ctx);
803 if (r)
804 goto out_err;
805 break;
806 case TGSI_TOKEN_TYPE_INSTRUCTION:
807 r = tgsi_is_supported(&ctx);
808 if (r)
809 goto out_err;
810 ctx.max_driver_temp_used = 0;
811 /* reserve first tmp for everyone */
812 r600_get_temp(&ctx);
813
814 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
815 if ((r = tgsi_split_constant(&ctx)))
816 goto out_err;
817 if ((r = tgsi_split_literal_constant(&ctx)))
818 goto out_err;
819 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
820 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
821 else
822 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
823 r = ctx.inst_info->process(&ctx);
824 if (r)
825 goto out_err;
826 break;
827 case TGSI_TOKEN_TYPE_PROPERTY:
828 property = &ctx.parse.FullToken.FullProperty;
829 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
830 if (property->u[0].Data == 1)
831 shader->fs_write_all = TRUE;
832 }
833 break;
834 default:
835 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
836 r = -EINVAL;
837 goto out_err;
838 }
839 }
840 /* export output */
841 noutput = shader->noutput;
842 for (i = 0, pos0 = 0; i < noutput; i++) {
843 memset(&output[i], 0, sizeof(struct r600_bc_output));
844 output[i].gpr = shader->output[i].gpr;
845 output[i].elem_size = 3;
846 output[i].swizzle_x = 0;
847 output[i].swizzle_y = 1;
848 output[i].swizzle_z = 2;
849 output[i].swizzle_w = 3;
850 output[i].burst_count = 1;
851 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
852 output[i].array_base = i - pos0;
853 switch (ctx.type) {
854 case TGSI_PROCESSOR_VERTEX:
855 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
856 output[i].array_base = 60;
857 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
858 /* position doesn't count in array_base */
859 pos0++;
860 }
861 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
862 output[i].array_base = 61;
863 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
864 /* position doesn't count in array_base */
865 pos0++;
866 }
867 break;
868 case TGSI_PROCESSOR_FRAGMENT:
869 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
870 output[i].array_base = shader->output[i].sid;
871 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
872 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
873 output[i].array_base = 61;
874 output[i].swizzle_x = 2;
875 output[i].swizzle_y = 7;
876 output[i].swizzle_z = output[i].swizzle_w = 7;
877 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
878 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
879 output[i].array_base = 61;
880 output[i].swizzle_x = 7;
881 output[i].swizzle_y = 1;
882 output[i].swizzle_z = output[i].swizzle_w = 7;
883 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
884 } else {
885 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
886 r = -EINVAL;
887 goto out_err;
888 }
889 break;
890 default:
891 R600_ERR("unsupported processor type %d\n", ctx.type);
892 r = -EINVAL;
893 goto out_err;
894 }
895 }
896 /* add fake param output for vertex shader if no param is exported */
897 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
898 for (i = 0, pos0 = 0; i < noutput; i++) {
899 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
900 pos0 = 1;
901 break;
902 }
903 }
904 if (!pos0) {
905 memset(&output[i], 0, sizeof(struct r600_bc_output));
906 output[i].gpr = 0;
907 output[i].elem_size = 3;
908 output[i].swizzle_x = 0;
909 output[i].swizzle_y = 1;
910 output[i].swizzle_z = 2;
911 output[i].swizzle_w = 3;
912 output[i].burst_count = 1;
913 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
914 output[i].array_base = 0;
915 noutput++;
916 }
917 }
918 /* add fake pixel export */
919 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
920 memset(&output[0], 0, sizeof(struct r600_bc_output));
921 output[0].gpr = 0;
922 output[0].elem_size = 3;
923 output[0].swizzle_x = 7;
924 output[0].swizzle_y = 7;
925 output[0].swizzle_z = 7;
926 output[0].swizzle_w = 7;
927 output[0].burst_count = 1;
928 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
929 output[0].array_base = 0;
930 noutput++;
931 }
932 /* add output to bytecode */
933 for (i = 0; i < noutput; i++) {
934 r = r600_bc_add_output(ctx.bc, &output[i]);
935 if (r)
936 goto out_err;
937 }
938 free(ctx.literals);
939 tgsi_parse_free(&ctx.parse);
940 return 0;
941 out_err:
942 free(ctx.literals);
943 tgsi_parse_free(&ctx.parse);
944 return r;
945 }
946
947 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
948 {
949 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
950 return -EINVAL;
951 }
952
953 static int tgsi_end(struct r600_shader_ctx *ctx)
954 {
955 return 0;
956 }
957
958 static void r600_bc_src(struct r600_bc_alu_src *bc_src,
959 const struct r600_shader_src *shader_src,
960 unsigned chan)
961 {
962 bc_src->sel = shader_src->sel;
963 bc_src->chan = shader_src->swizzle[chan];
964 bc_src->neg = shader_src->neg;
965 bc_src->abs = shader_src->abs;
966 bc_src->rel = shader_src->rel;
967 bc_src->value = shader_src->value[bc_src->chan];
968 }
969
970 static void tgsi_dst(struct r600_shader_ctx *ctx,
971 const struct tgsi_full_dst_register *tgsi_dst,
972 unsigned swizzle,
973 struct r600_bc_alu_dst *r600_dst)
974 {
975 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
976
977 r600_dst->sel = tgsi_dst->Register.Index;
978 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
979 r600_dst->chan = swizzle;
980 r600_dst->write = 1;
981 if (tgsi_dst->Register.Indirect)
982 r600_dst->rel = V_SQ_REL_RELATIVE;
983 if (inst->Instruction.Saturate) {
984 r600_dst->clamp = 1;
985 }
986 }
987
988 static int tgsi_last_instruction(unsigned writemask)
989 {
990 int i, lasti = 0;
991
992 for (i = 0; i < 4; i++) {
993 if (writemask & (1 << i)) {
994 lasti = i;
995 }
996 }
997 return lasti;
998 }
999
1000 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1001 {
1002 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1003 struct r600_bc_alu alu;
1004 int i, j, r;
1005 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1006
1007 for (i = 0; i < lasti + 1; i++) {
1008 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1009 continue;
1010
1011 memset(&alu, 0, sizeof(struct r600_bc_alu));
1012 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1013
1014 alu.inst = ctx->inst_info->r600_opcode;
1015 if (!swap) {
1016 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1017 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1018 }
1019 } else {
1020 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1021 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1022 }
1023 /* handle some special cases */
1024 switch (ctx->inst_info->tgsi_opcode) {
1025 case TGSI_OPCODE_SUB:
1026 alu.src[1].neg = 1;
1027 break;
1028 case TGSI_OPCODE_ABS:
1029 alu.src[0].abs = 1;
1030 break;
1031 default:
1032 break;
1033 }
1034 if (i == lasti) {
1035 alu.last = 1;
1036 }
1037 r = r600_bc_add_alu(ctx->bc, &alu);
1038 if (r)
1039 return r;
1040 }
1041 return 0;
1042 }
1043
1044 static int tgsi_op2(struct r600_shader_ctx *ctx)
1045 {
1046 return tgsi_op2_s(ctx, 0);
1047 }
1048
1049 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1050 {
1051 return tgsi_op2_s(ctx, 1);
1052 }
1053
1054 /*
1055 * r600 - trunc to -PI..PI range
1056 * r700 - normalize by dividing by 2PI
1057 * see fdo bug 27901
1058 */
1059 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1060 {
1061 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1062 static float double_pi = 3.1415926535 * 2;
1063 static float neg_pi = -3.1415926535;
1064
1065 int r;
1066 struct r600_bc_alu alu;
1067
1068 memset(&alu, 0, sizeof(struct r600_bc_alu));
1069 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1070 alu.is_op3 = 1;
1071
1072 alu.dst.chan = 0;
1073 alu.dst.sel = ctx->temp_reg;
1074 alu.dst.write = 1;
1075
1076 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1077
1078 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1079 alu.src[1].chan = 0;
1080 alu.src[1].value = *(uint32_t *)&half_inv_pi;
1081 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1082 alu.src[2].chan = 0;
1083 alu.last = 1;
1084 r = r600_bc_add_alu(ctx->bc, &alu);
1085 if (r)
1086 return r;
1087
1088 memset(&alu, 0, sizeof(struct r600_bc_alu));
1089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1090
1091 alu.dst.chan = 0;
1092 alu.dst.sel = ctx->temp_reg;
1093 alu.dst.write = 1;
1094
1095 alu.src[0].sel = ctx->temp_reg;
1096 alu.src[0].chan = 0;
1097 alu.last = 1;
1098 r = r600_bc_add_alu(ctx->bc, &alu);
1099 if (r)
1100 return r;
1101
1102 memset(&alu, 0, sizeof(struct r600_bc_alu));
1103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1104 alu.is_op3 = 1;
1105
1106 alu.dst.chan = 0;
1107 alu.dst.sel = ctx->temp_reg;
1108 alu.dst.write = 1;
1109
1110 alu.src[0].sel = ctx->temp_reg;
1111 alu.src[0].chan = 0;
1112
1113 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1114 alu.src[1].chan = 0;
1115 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1116 alu.src[2].chan = 0;
1117
1118 if (ctx->bc->chiprev == CHIPREV_R600) {
1119 alu.src[1].value = *(uint32_t *)&double_pi;
1120 alu.src[2].value = *(uint32_t *)&neg_pi;
1121 } else {
1122 alu.src[1].sel = V_SQ_ALU_SRC_1;
1123 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1124 alu.src[2].neg = 1;
1125 }
1126
1127 alu.last = 1;
1128 r = r600_bc_add_alu(ctx->bc, &alu);
1129 if (r)
1130 return r;
1131 return 0;
1132 }
1133
1134 static int tgsi_trig(struct r600_shader_ctx *ctx)
1135 {
1136 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1137 struct r600_bc_alu alu;
1138 int i, r;
1139 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1140
1141 r = tgsi_setup_trig(ctx);
1142 if (r)
1143 return r;
1144
1145 memset(&alu, 0, sizeof(struct r600_bc_alu));
1146 alu.inst = ctx->inst_info->r600_opcode;
1147 alu.dst.chan = 0;
1148 alu.dst.sel = ctx->temp_reg;
1149 alu.dst.write = 1;
1150
1151 alu.src[0].sel = ctx->temp_reg;
1152 alu.src[0].chan = 0;
1153 alu.last = 1;
1154 r = r600_bc_add_alu(ctx->bc, &alu);
1155 if (r)
1156 return r;
1157
1158 /* replicate result */
1159 for (i = 0; i < lasti + 1; i++) {
1160 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1161 continue;
1162
1163 memset(&alu, 0, sizeof(struct r600_bc_alu));
1164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1165
1166 alu.src[0].sel = ctx->temp_reg;
1167 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1168 if (i == lasti)
1169 alu.last = 1;
1170 r = r600_bc_add_alu(ctx->bc, &alu);
1171 if (r)
1172 return r;
1173 }
1174 return 0;
1175 }
1176
1177 static int tgsi_scs(struct r600_shader_ctx *ctx)
1178 {
1179 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1180 struct r600_bc_alu alu;
1181 int r;
1182
1183 /* We'll only need the trig stuff if we are going to write to the
1184 * X or Y components of the destination vector.
1185 */
1186 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1187 r = tgsi_setup_trig(ctx);
1188 if (r)
1189 return r;
1190 }
1191
1192 /* dst.x = COS */
1193 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1194 memset(&alu, 0, sizeof(struct r600_bc_alu));
1195 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1196 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1197
1198 alu.src[0].sel = ctx->temp_reg;
1199 alu.src[0].chan = 0;
1200 alu.last = 1;
1201 r = r600_bc_add_alu(ctx->bc, &alu);
1202 if (r)
1203 return r;
1204 }
1205
1206 /* dst.y = SIN */
1207 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1208 memset(&alu, 0, sizeof(struct r600_bc_alu));
1209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1210 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1211
1212 alu.src[0].sel = ctx->temp_reg;
1213 alu.src[0].chan = 0;
1214 alu.last = 1;
1215 r = r600_bc_add_alu(ctx->bc, &alu);
1216 if (r)
1217 return r;
1218 }
1219
1220 /* dst.z = 0.0; */
1221 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1222 memset(&alu, 0, sizeof(struct r600_bc_alu));
1223
1224 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1225
1226 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1227
1228 alu.src[0].sel = V_SQ_ALU_SRC_0;
1229 alu.src[0].chan = 0;
1230
1231 alu.last = 1;
1232
1233 r = r600_bc_add_alu(ctx->bc, &alu);
1234 if (r)
1235 return r;
1236 }
1237
1238 /* dst.w = 1.0; */
1239 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1240 memset(&alu, 0, sizeof(struct r600_bc_alu));
1241
1242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1243
1244 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1245
1246 alu.src[0].sel = V_SQ_ALU_SRC_1;
1247 alu.src[0].chan = 0;
1248
1249 alu.last = 1;
1250
1251 r = r600_bc_add_alu(ctx->bc, &alu);
1252 if (r)
1253 return r;
1254 }
1255
1256 return 0;
1257 }
1258
1259 static int tgsi_kill(struct r600_shader_ctx *ctx)
1260 {
1261 struct r600_bc_alu alu;
1262 int i, r;
1263
1264 for (i = 0; i < 4; i++) {
1265 memset(&alu, 0, sizeof(struct r600_bc_alu));
1266 alu.inst = ctx->inst_info->r600_opcode;
1267
1268 alu.dst.chan = i;
1269
1270 alu.src[0].sel = V_SQ_ALU_SRC_0;
1271
1272 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1273 alu.src[1].sel = V_SQ_ALU_SRC_1;
1274 alu.src[1].neg = 1;
1275 } else {
1276 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1277 }
1278 if (i == 3) {
1279 alu.last = 1;
1280 }
1281 r = r600_bc_add_alu(ctx->bc, &alu);
1282 if (r)
1283 return r;
1284 }
1285
1286 /* kill must be last in ALU */
1287 ctx->bc->force_add_cf = 1;
1288 ctx->shader->uses_kill = TRUE;
1289 return 0;
1290 }
1291
1292 static int tgsi_lit(struct r600_shader_ctx *ctx)
1293 {
1294 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1295 struct r600_bc_alu alu;
1296 int r;
1297
1298 /* dst.x, <- 1.0 */
1299 memset(&alu, 0, sizeof(struct r600_bc_alu));
1300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1301 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1302 alu.src[0].chan = 0;
1303 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1304 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1305 r = r600_bc_add_alu(ctx->bc, &alu);
1306 if (r)
1307 return r;
1308
1309 /* dst.y = max(src.x, 0.0) */
1310 memset(&alu, 0, sizeof(struct r600_bc_alu));
1311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1312 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1313 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1314 alu.src[1].chan = 0;
1315 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1316 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1317 r = r600_bc_add_alu(ctx->bc, &alu);
1318 if (r)
1319 return r;
1320
1321 /* dst.w, <- 1.0 */
1322 memset(&alu, 0, sizeof(struct r600_bc_alu));
1323 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1324 alu.src[0].sel = V_SQ_ALU_SRC_1;
1325 alu.src[0].chan = 0;
1326 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1327 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1328 alu.last = 1;
1329 r = r600_bc_add_alu(ctx->bc, &alu);
1330 if (r)
1331 return r;
1332
1333 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1334 {
1335 int chan;
1336 int sel;
1337
1338 /* dst.z = log(src.y) */
1339 memset(&alu, 0, sizeof(struct r600_bc_alu));
1340 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1341 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1342 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1343 alu.last = 1;
1344 r = r600_bc_add_alu(ctx->bc, &alu);
1345 if (r)
1346 return r;
1347
1348 chan = alu.dst.chan;
1349 sel = alu.dst.sel;
1350
1351 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1352 memset(&alu, 0, sizeof(struct r600_bc_alu));
1353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1354 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1355 alu.src[1].sel = sel;
1356 alu.src[1].chan = chan;
1357
1358 r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1359 alu.dst.sel = ctx->temp_reg;
1360 alu.dst.chan = 0;
1361 alu.dst.write = 1;
1362 alu.is_op3 = 1;
1363 alu.last = 1;
1364 r = r600_bc_add_alu(ctx->bc, &alu);
1365 if (r)
1366 return r;
1367
1368 /* dst.z = exp(tmp.x) */
1369 memset(&alu, 0, sizeof(struct r600_bc_alu));
1370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1371 alu.src[0].sel = ctx->temp_reg;
1372 alu.src[0].chan = 0;
1373 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1374 alu.last = 1;
1375 r = r600_bc_add_alu(ctx->bc, &alu);
1376 if (r)
1377 return r;
1378 }
1379 return 0;
1380 }
1381
1382 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1383 {
1384 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1385 struct r600_bc_alu alu;
1386 int i, r;
1387
1388 memset(&alu, 0, sizeof(struct r600_bc_alu));
1389
1390 /* FIXME:
1391 * For state trackers other than OpenGL, we'll want to use
1392 * _RECIPSQRT_IEEE instead.
1393 */
1394 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1395
1396 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1397 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1398 alu.src[i].abs = 1;
1399 }
1400 alu.dst.sel = ctx->temp_reg;
1401 alu.dst.write = 1;
1402 alu.last = 1;
1403 r = r600_bc_add_alu(ctx->bc, &alu);
1404 if (r)
1405 return r;
1406 /* replicate result */
1407 return tgsi_helper_tempx_replicate(ctx);
1408 }
1409
1410 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1411 {
1412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1413 struct r600_bc_alu alu;
1414 int i, r;
1415
1416 for (i = 0; i < 4; i++) {
1417 memset(&alu, 0, sizeof(struct r600_bc_alu));
1418 alu.src[0].sel = ctx->temp_reg;
1419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1420 alu.dst.chan = i;
1421 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1422 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1423 if (i == 3)
1424 alu.last = 1;
1425 r = r600_bc_add_alu(ctx->bc, &alu);
1426 if (r)
1427 return r;
1428 }
1429 return 0;
1430 }
1431
1432 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1433 {
1434 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1435 struct r600_bc_alu alu;
1436 int i, r;
1437
1438 memset(&alu, 0, sizeof(struct r600_bc_alu));
1439 alu.inst = ctx->inst_info->r600_opcode;
1440 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1441 r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1442 }
1443 alu.dst.sel = ctx->temp_reg;
1444 alu.dst.write = 1;
1445 alu.last = 1;
1446 r = r600_bc_add_alu(ctx->bc, &alu);
1447 if (r)
1448 return r;
1449 /* replicate result */
1450 return tgsi_helper_tempx_replicate(ctx);
1451 }
1452
1453 static int tgsi_pow(struct r600_shader_ctx *ctx)
1454 {
1455 struct r600_bc_alu alu;
1456 int r;
1457
1458 /* LOG2(a) */
1459 memset(&alu, 0, sizeof(struct r600_bc_alu));
1460 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1461 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1462 alu.dst.sel = ctx->temp_reg;
1463 alu.dst.write = 1;
1464 alu.last = 1;
1465 r = r600_bc_add_alu(ctx->bc, &alu);
1466 if (r)
1467 return r;
1468 /* b * LOG2(a) */
1469 memset(&alu, 0, sizeof(struct r600_bc_alu));
1470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1471 r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1472 alu.src[1].sel = ctx->temp_reg;
1473 alu.dst.sel = ctx->temp_reg;
1474 alu.dst.write = 1;
1475 alu.last = 1;
1476 r = r600_bc_add_alu(ctx->bc, &alu);
1477 if (r)
1478 return r;
1479 /* POW(a,b) = EXP2(b * LOG2(a))*/
1480 memset(&alu, 0, sizeof(struct r600_bc_alu));
1481 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1482 alu.src[0].sel = ctx->temp_reg;
1483 alu.dst.sel = ctx->temp_reg;
1484 alu.dst.write = 1;
1485 alu.last = 1;
1486 r = r600_bc_add_alu(ctx->bc, &alu);
1487 if (r)
1488 return r;
1489 return tgsi_helper_tempx_replicate(ctx);
1490 }
1491
1492 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1493 {
1494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1495 struct r600_bc_alu alu;
1496 int i, r;
1497
1498 /* tmp = (src > 0 ? 1 : src) */
1499 for (i = 0; i < 4; i++) {
1500 memset(&alu, 0, sizeof(struct r600_bc_alu));
1501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1502 alu.is_op3 = 1;
1503
1504 alu.dst.sel = ctx->temp_reg;
1505 alu.dst.chan = i;
1506
1507 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1508 alu.src[1].sel = V_SQ_ALU_SRC_1;
1509 r600_bc_src(&alu.src[2], &ctx->src[0], i);
1510
1511 if (i == 3)
1512 alu.last = 1;
1513 r = r600_bc_add_alu(ctx->bc, &alu);
1514 if (r)
1515 return r;
1516 }
1517
1518 /* dst = (-tmp > 0 ? -1 : tmp) */
1519 for (i = 0; i < 4; i++) {
1520 memset(&alu, 0, sizeof(struct r600_bc_alu));
1521 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1522 alu.is_op3 = 1;
1523 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1524
1525 alu.src[0].sel = ctx->temp_reg;
1526 alu.src[0].chan = i;
1527 alu.src[0].neg = 1;
1528
1529 alu.src[1].sel = V_SQ_ALU_SRC_1;
1530 alu.src[1].neg = 1;
1531
1532 alu.src[2].sel = ctx->temp_reg;
1533 alu.src[2].chan = i;
1534
1535 if (i == 3)
1536 alu.last = 1;
1537 r = r600_bc_add_alu(ctx->bc, &alu);
1538 if (r)
1539 return r;
1540 }
1541 return 0;
1542 }
1543
1544 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1545 {
1546 struct r600_bc_alu alu;
1547 int i, r;
1548
1549 for (i = 0; i < 4; i++) {
1550 memset(&alu, 0, sizeof(struct r600_bc_alu));
1551 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1552 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1553 alu.dst.chan = i;
1554 } else {
1555 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1556 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1557 alu.src[0].sel = ctx->temp_reg;
1558 alu.src[0].chan = i;
1559 }
1560 if (i == 3) {
1561 alu.last = 1;
1562 }
1563 r = r600_bc_add_alu(ctx->bc, &alu);
1564 if (r)
1565 return r;
1566 }
1567 return 0;
1568 }
1569
1570 static int tgsi_op3(struct r600_shader_ctx *ctx)
1571 {
1572 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1573 struct r600_bc_alu alu;
1574 int i, j, r;
1575 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1576
1577 for (i = 0; i < lasti + 1; i++) {
1578 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1579 continue;
1580
1581 memset(&alu, 0, sizeof(struct r600_bc_alu));
1582 alu.inst = ctx->inst_info->r600_opcode;
1583 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1584 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1585 }
1586
1587 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1588 alu.dst.chan = i;
1589 alu.dst.write = 1;
1590 alu.is_op3 = 1;
1591 if (i == lasti) {
1592 alu.last = 1;
1593 }
1594 r = r600_bc_add_alu(ctx->bc, &alu);
1595 if (r)
1596 return r;
1597 }
1598 return 0;
1599 }
1600
1601 static int tgsi_dp(struct r600_shader_ctx *ctx)
1602 {
1603 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1604 struct r600_bc_alu alu;
1605 int i, j, r;
1606
1607 for (i = 0; i < 4; i++) {
1608 memset(&alu, 0, sizeof(struct r600_bc_alu));
1609 alu.inst = ctx->inst_info->r600_opcode;
1610 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1611 r600_bc_src(&alu.src[j], &ctx->src[j], i);
1612 }
1613
1614 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1615 alu.dst.chan = i;
1616 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1617 /* handle some special cases */
1618 switch (ctx->inst_info->tgsi_opcode) {
1619 case TGSI_OPCODE_DP2:
1620 if (i > 1) {
1621 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1622 alu.src[0].chan = alu.src[1].chan = 0;
1623 }
1624 break;
1625 case TGSI_OPCODE_DP3:
1626 if (i > 2) {
1627 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1628 alu.src[0].chan = alu.src[1].chan = 0;
1629 }
1630 break;
1631 case TGSI_OPCODE_DPH:
1632 if (i == 3) {
1633 alu.src[0].sel = V_SQ_ALU_SRC_1;
1634 alu.src[0].chan = 0;
1635 alu.src[0].neg = 0;
1636 }
1637 break;
1638 default:
1639 break;
1640 }
1641 if (i == 3) {
1642 alu.last = 1;
1643 }
1644 r = r600_bc_add_alu(ctx->bc, &alu);
1645 if (r)
1646 return r;
1647 }
1648 return 0;
1649 }
1650
1651 static int tgsi_tex(struct r600_shader_ctx *ctx)
1652 {
1653 static float one_point_five = 1.5f;
1654 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1655 struct r600_bc_tex tex;
1656 struct r600_bc_alu alu;
1657 unsigned src_gpr;
1658 int r, i;
1659 int opcode;
1660 boolean src_not_temp =
1661 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1662 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1663
1664 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1665
1666 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1667 /* Add perspective divide */
1668 memset(&alu, 0, sizeof(struct r600_bc_alu));
1669 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1670 r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1671
1672 alu.dst.sel = ctx->temp_reg;
1673 alu.dst.chan = 3;
1674 alu.last = 1;
1675 alu.dst.write = 1;
1676 r = r600_bc_add_alu(ctx->bc, &alu);
1677 if (r)
1678 return r;
1679
1680 for (i = 0; i < 3; i++) {
1681 memset(&alu, 0, sizeof(struct r600_bc_alu));
1682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1683 alu.src[0].sel = ctx->temp_reg;
1684 alu.src[0].chan = 3;
1685 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1686 alu.dst.sel = ctx->temp_reg;
1687 alu.dst.chan = i;
1688 alu.dst.write = 1;
1689 r = r600_bc_add_alu(ctx->bc, &alu);
1690 if (r)
1691 return r;
1692 }
1693 memset(&alu, 0, sizeof(struct r600_bc_alu));
1694 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1695 alu.src[0].sel = V_SQ_ALU_SRC_1;
1696 alu.src[0].chan = 0;
1697 alu.dst.sel = ctx->temp_reg;
1698 alu.dst.chan = 3;
1699 alu.last = 1;
1700 alu.dst.write = 1;
1701 r = r600_bc_add_alu(ctx->bc, &alu);
1702 if (r)
1703 return r;
1704 src_not_temp = FALSE;
1705 src_gpr = ctx->temp_reg;
1706 }
1707
1708 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1709 int src_chan, src2_chan;
1710
1711 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1712 for (i = 0; i < 4; i++) {
1713 memset(&alu, 0, sizeof(struct r600_bc_alu));
1714 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1715 switch (i) {
1716 case 0:
1717 src_chan = 2;
1718 src2_chan = 1;
1719 break;
1720 case 1:
1721 src_chan = 2;
1722 src2_chan = 0;
1723 break;
1724 case 2:
1725 src_chan = 0;
1726 src2_chan = 2;
1727 break;
1728 case 3:
1729 src_chan = 1;
1730 src2_chan = 2;
1731 break;
1732 default:
1733 assert(0);
1734 src_chan = 0;
1735 src2_chan = 0;
1736 break;
1737 }
1738 r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
1739 r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
1740 alu.dst.sel = ctx->temp_reg;
1741 alu.dst.chan = i;
1742 if (i == 3)
1743 alu.last = 1;
1744 alu.dst.write = 1;
1745 r = r600_bc_add_alu(ctx->bc, &alu);
1746 if (r)
1747 return r;
1748 }
1749
1750 /* tmp1.z = RCP_e(|tmp1.z|) */
1751 memset(&alu, 0, sizeof(struct r600_bc_alu));
1752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1753 alu.src[0].sel = ctx->temp_reg;
1754 alu.src[0].chan = 2;
1755 alu.src[0].abs = 1;
1756 alu.dst.sel = ctx->temp_reg;
1757 alu.dst.chan = 2;
1758 alu.dst.write = 1;
1759 alu.last = 1;
1760 r = r600_bc_add_alu(ctx->bc, &alu);
1761 if (r)
1762 return r;
1763
1764 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1765 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1766 * muladd has no writemask, have to use another temp
1767 */
1768 memset(&alu, 0, sizeof(struct r600_bc_alu));
1769 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1770 alu.is_op3 = 1;
1771
1772 alu.src[0].sel = ctx->temp_reg;
1773 alu.src[0].chan = 0;
1774 alu.src[1].sel = ctx->temp_reg;
1775 alu.src[1].chan = 2;
1776
1777 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1778 alu.src[2].chan = 0;
1779 alu.src[2].value = *(uint32_t *)&one_point_five;
1780
1781 alu.dst.sel = ctx->temp_reg;
1782 alu.dst.chan = 0;
1783 alu.dst.write = 1;
1784
1785 r = r600_bc_add_alu(ctx->bc, &alu);
1786 if (r)
1787 return r;
1788
1789 memset(&alu, 0, sizeof(struct r600_bc_alu));
1790 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1791 alu.is_op3 = 1;
1792
1793 alu.src[0].sel = ctx->temp_reg;
1794 alu.src[0].chan = 1;
1795 alu.src[1].sel = ctx->temp_reg;
1796 alu.src[1].chan = 2;
1797
1798 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1799 alu.src[2].chan = 0;
1800 alu.src[2].value = *(uint32_t *)&one_point_five;
1801
1802 alu.dst.sel = ctx->temp_reg;
1803 alu.dst.chan = 1;
1804 alu.dst.write = 1;
1805
1806 alu.last = 1;
1807 r = r600_bc_add_alu(ctx->bc, &alu);
1808 if (r)
1809 return r;
1810
1811 src_not_temp = FALSE;
1812 src_gpr = ctx->temp_reg;
1813 }
1814
1815 if (src_not_temp) {
1816 for (i = 0; i < 4; i++) {
1817 memset(&alu, 0, sizeof(struct r600_bc_alu));
1818 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1819 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1820 alu.dst.sel = ctx->temp_reg;
1821 alu.dst.chan = i;
1822 if (i == 3)
1823 alu.last = 1;
1824 alu.dst.write = 1;
1825 r = r600_bc_add_alu(ctx->bc, &alu);
1826 if (r)
1827 return r;
1828 }
1829 src_gpr = ctx->temp_reg;
1830 }
1831
1832 opcode = ctx->inst_info->r600_opcode;
1833 if (opcode == SQ_TEX_INST_SAMPLE &&
1834 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1835 opcode = SQ_TEX_INST_SAMPLE_C;
1836
1837 memset(&tex, 0, sizeof(struct r600_bc_tex));
1838 tex.inst = opcode;
1839 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1840 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1841 tex.src_gpr = src_gpr;
1842 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1843 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1844 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1845 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1846 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1847 tex.src_sel_x = 0;
1848 tex.src_sel_y = 1;
1849 tex.src_sel_z = 2;
1850 tex.src_sel_w = 3;
1851
1852 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1853 tex.src_sel_x = 1;
1854 tex.src_sel_y = 0;
1855 tex.src_sel_z = 3;
1856 tex.src_sel_w = 1;
1857 }
1858
1859 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1860 tex.coord_type_x = 1;
1861 tex.coord_type_y = 1;
1862 tex.coord_type_z = 1;
1863 tex.coord_type_w = 1;
1864 }
1865
1866 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1867 tex.coord_type_z = 0;
1868 tex.src_sel_z = 1;
1869 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1870 tex.coord_type_z = 0;
1871
1872 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1873 tex.src_sel_w = 2;
1874
1875 r = r600_bc_add_tex(ctx->bc, &tex);
1876 if (r)
1877 return r;
1878
1879 /* add shadow ambient support - gallium doesn't do it yet */
1880 return 0;
1881 }
1882
1883 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1884 {
1885 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1886 struct r600_bc_alu alu;
1887 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1888 unsigned i;
1889 int r;
1890
1891 /* optimize if it's just an equal balance */
1892 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1893 for (i = 0; i < lasti + 1; i++) {
1894 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1895 continue;
1896
1897 memset(&alu, 0, sizeof(struct r600_bc_alu));
1898 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1899 r600_bc_src(&alu.src[0], &ctx->src[1], i);
1900 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1901 alu.omod = 3;
1902 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1903 alu.dst.chan = i;
1904 if (i == lasti) {
1905 alu.last = 1;
1906 }
1907 r = r600_bc_add_alu(ctx->bc, &alu);
1908 if (r)
1909 return r;
1910 }
1911 return 0;
1912 }
1913
1914 /* 1 - src0 */
1915 for (i = 0; i < lasti + 1; i++) {
1916 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1917 continue;
1918
1919 memset(&alu, 0, sizeof(struct r600_bc_alu));
1920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1921 alu.src[0].sel = V_SQ_ALU_SRC_1;
1922 alu.src[0].chan = 0;
1923 r600_bc_src(&alu.src[1], &ctx->src[0], i);
1924 alu.src[1].neg = 1;
1925 alu.dst.sel = ctx->temp_reg;
1926 alu.dst.chan = i;
1927 if (i == lasti) {
1928 alu.last = 1;
1929 }
1930 alu.dst.write = 1;
1931 r = r600_bc_add_alu(ctx->bc, &alu);
1932 if (r)
1933 return r;
1934 }
1935
1936 /* (1 - src0) * src2 */
1937 for (i = 0; i < lasti + 1; i++) {
1938 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1939 continue;
1940
1941 memset(&alu, 0, sizeof(struct r600_bc_alu));
1942 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1943 alu.src[0].sel = ctx->temp_reg;
1944 alu.src[0].chan = i;
1945 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1946 alu.dst.sel = ctx->temp_reg;
1947 alu.dst.chan = i;
1948 if (i == lasti) {
1949 alu.last = 1;
1950 }
1951 alu.dst.write = 1;
1952 r = r600_bc_add_alu(ctx->bc, &alu);
1953 if (r)
1954 return r;
1955 }
1956
1957 /* src0 * src1 + (1 - src0) * src2 */
1958 for (i = 0; i < lasti + 1; i++) {
1959 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1960 continue;
1961
1962 memset(&alu, 0, sizeof(struct r600_bc_alu));
1963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1964 alu.is_op3 = 1;
1965 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1966 r600_bc_src(&alu.src[1], &ctx->src[1], i);
1967 alu.src[2].sel = ctx->temp_reg;
1968 alu.src[2].chan = i;
1969
1970 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1971 alu.dst.chan = i;
1972 if (i == lasti) {
1973 alu.last = 1;
1974 }
1975 r = r600_bc_add_alu(ctx->bc, &alu);
1976 if (r)
1977 return r;
1978 }
1979 return 0;
1980 }
1981
1982 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1983 {
1984 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1985 struct r600_bc_alu alu;
1986 int i, r;
1987 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1988
1989 for (i = 0; i < lasti + 1; i++) {
1990 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1991 continue;
1992
1993 memset(&alu, 0, sizeof(struct r600_bc_alu));
1994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1995 r600_bc_src(&alu.src[0], &ctx->src[0], i);
1996 r600_bc_src(&alu.src[1], &ctx->src[2], i);
1997 r600_bc_src(&alu.src[2], &ctx->src[1], i);
1998 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1999 alu.dst.chan = i;
2000 alu.dst.write = 1;
2001 alu.is_op3 = 1;
2002 if (i == lasti)
2003 alu.last = 1;
2004 r = r600_bc_add_alu(ctx->bc, &alu);
2005 if (r)
2006 return r;
2007 }
2008 return 0;
2009 }
2010
2011 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2012 {
2013 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2014 struct r600_bc_alu alu;
2015 uint32_t use_temp = 0;
2016 int i, r;
2017
2018 if (inst->Dst[0].Register.WriteMask != 0xf)
2019 use_temp = 1;
2020
2021 for (i = 0; i < 4; i++) {
2022 memset(&alu, 0, sizeof(struct r600_bc_alu));
2023 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2024
2025 switch (i) {
2026 case 0:
2027 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2028 break;
2029 case 1:
2030 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2031 break;
2032 case 2:
2033 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2034 break;
2035 case 3:
2036 alu.src[0].sel = V_SQ_ALU_SRC_0;
2037 alu.src[0].chan = i;
2038 }
2039
2040 switch (i) {
2041 case 0:
2042 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2043 break;
2044 case 1:
2045 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2046 break;
2047 case 2:
2048 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2049 break;
2050 case 3:
2051 alu.src[1].sel = V_SQ_ALU_SRC_0;
2052 alu.src[1].chan = i;
2053 }
2054
2055 alu.dst.sel = ctx->temp_reg;
2056 alu.dst.chan = i;
2057 alu.dst.write = 1;
2058
2059 if (i == 3)
2060 alu.last = 1;
2061 r = r600_bc_add_alu(ctx->bc, &alu);
2062 if (r)
2063 return r;
2064 }
2065
2066 for (i = 0; i < 4; i++) {
2067 memset(&alu, 0, sizeof(struct r600_bc_alu));
2068 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2069
2070 switch (i) {
2071 case 0:
2072 r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2073 break;
2074 case 1:
2075 r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2076 break;
2077 case 2:
2078 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2079 break;
2080 case 3:
2081 alu.src[0].sel = V_SQ_ALU_SRC_0;
2082 alu.src[0].chan = i;
2083 }
2084
2085 switch (i) {
2086 case 0:
2087 r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2088 break;
2089 case 1:
2090 r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2091 break;
2092 case 2:
2093 r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2094 break;
2095 case 3:
2096 alu.src[1].sel = V_SQ_ALU_SRC_0;
2097 alu.src[1].chan = i;
2098 }
2099
2100 alu.src[2].sel = ctx->temp_reg;
2101 alu.src[2].neg = 1;
2102 alu.src[2].chan = i;
2103
2104 if (use_temp)
2105 alu.dst.sel = ctx->temp_reg;
2106 else
2107 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2108 alu.dst.chan = i;
2109 alu.dst.write = 1;
2110 alu.is_op3 = 1;
2111 if (i == 3)
2112 alu.last = 1;
2113 r = r600_bc_add_alu(ctx->bc, &alu);
2114 if (r)
2115 return r;
2116 }
2117 if (use_temp)
2118 return tgsi_helper_copy(ctx, inst);
2119 return 0;
2120 }
2121
2122 static int tgsi_exp(struct r600_shader_ctx *ctx)
2123 {
2124 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2125 struct r600_bc_alu alu;
2126 int r;
2127
2128 /* result.x = 2^floor(src); */
2129 if (inst->Dst[0].Register.WriteMask & 1) {
2130 memset(&alu, 0, sizeof(struct r600_bc_alu));
2131
2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2133 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2134
2135 alu.dst.sel = ctx->temp_reg;
2136 alu.dst.chan = 0;
2137 alu.dst.write = 1;
2138 alu.last = 1;
2139 r = r600_bc_add_alu(ctx->bc, &alu);
2140 if (r)
2141 return r;
2142
2143 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2144 alu.src[0].sel = ctx->temp_reg;
2145 alu.src[0].chan = 0;
2146
2147 alu.dst.sel = ctx->temp_reg;
2148 alu.dst.chan = 0;
2149 alu.dst.write = 1;
2150 alu.last = 1;
2151 r = r600_bc_add_alu(ctx->bc, &alu);
2152 if (r)
2153 return r;
2154 }
2155
2156 /* result.y = tmp - floor(tmp); */
2157 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2158 memset(&alu, 0, sizeof(struct r600_bc_alu));
2159
2160 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2161 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2162
2163 alu.dst.sel = ctx->temp_reg;
2164 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2165 // if (r)
2166 // return r;
2167 alu.dst.write = 1;
2168 alu.dst.chan = 1;
2169
2170 alu.last = 1;
2171
2172 r = r600_bc_add_alu(ctx->bc, &alu);
2173 if (r)
2174 return r;
2175 }
2176
2177 /* result.z = RoughApprox2ToX(tmp);*/
2178 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2179 memset(&alu, 0, sizeof(struct r600_bc_alu));
2180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2181 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2182
2183 alu.dst.sel = ctx->temp_reg;
2184 alu.dst.write = 1;
2185 alu.dst.chan = 2;
2186
2187 alu.last = 1;
2188
2189 r = r600_bc_add_alu(ctx->bc, &alu);
2190 if (r)
2191 return r;
2192 }
2193
2194 /* result.w = 1.0;*/
2195 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2196 memset(&alu, 0, sizeof(struct r600_bc_alu));
2197
2198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2199 alu.src[0].sel = V_SQ_ALU_SRC_1;
2200 alu.src[0].chan = 0;
2201
2202 alu.dst.sel = ctx->temp_reg;
2203 alu.dst.chan = 3;
2204 alu.dst.write = 1;
2205 alu.last = 1;
2206 r = r600_bc_add_alu(ctx->bc, &alu);
2207 if (r)
2208 return r;
2209 }
2210 return tgsi_helper_copy(ctx, inst);
2211 }
2212
2213 static int tgsi_log(struct r600_shader_ctx *ctx)
2214 {
2215 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2216 struct r600_bc_alu alu;
2217 int r;
2218
2219 /* result.x = floor(log2(src)); */
2220 if (inst->Dst[0].Register.WriteMask & 1) {
2221 memset(&alu, 0, sizeof(struct r600_bc_alu));
2222
2223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2224 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2225
2226 alu.dst.sel = ctx->temp_reg;
2227 alu.dst.chan = 0;
2228 alu.dst.write = 1;
2229 alu.last = 1;
2230 r = r600_bc_add_alu(ctx->bc, &alu);
2231 if (r)
2232 return r;
2233
2234 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2235 alu.src[0].sel = ctx->temp_reg;
2236 alu.src[0].chan = 0;
2237
2238 alu.dst.sel = ctx->temp_reg;
2239 alu.dst.chan = 0;
2240 alu.dst.write = 1;
2241 alu.last = 1;
2242
2243 r = r600_bc_add_alu(ctx->bc, &alu);
2244 if (r)
2245 return r;
2246 }
2247
2248 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2249 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2250 memset(&alu, 0, sizeof(struct r600_bc_alu));
2251
2252 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2253 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2254
2255 alu.dst.sel = ctx->temp_reg;
2256 alu.dst.chan = 1;
2257 alu.dst.write = 1;
2258 alu.last = 1;
2259
2260 r = r600_bc_add_alu(ctx->bc, &alu);
2261 if (r)
2262 return r;
2263
2264 memset(&alu, 0, sizeof(struct r600_bc_alu));
2265
2266 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2267 alu.src[0].sel = ctx->temp_reg;
2268 alu.src[0].chan = 1;
2269
2270 alu.dst.sel = ctx->temp_reg;
2271 alu.dst.chan = 1;
2272 alu.dst.write = 1;
2273 alu.last = 1;
2274
2275 r = r600_bc_add_alu(ctx->bc, &alu);
2276 if (r)
2277 return r;
2278
2279 memset(&alu, 0, sizeof(struct r600_bc_alu));
2280
2281 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2282 alu.src[0].sel = ctx->temp_reg;
2283 alu.src[0].chan = 1;
2284
2285 alu.dst.sel = ctx->temp_reg;
2286 alu.dst.chan = 1;
2287 alu.dst.write = 1;
2288 alu.last = 1;
2289
2290 r = r600_bc_add_alu(ctx->bc, &alu);
2291 if (r)
2292 return r;
2293
2294 memset(&alu, 0, sizeof(struct r600_bc_alu));
2295
2296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2297 alu.src[0].sel = ctx->temp_reg;
2298 alu.src[0].chan = 1;
2299
2300 alu.dst.sel = ctx->temp_reg;
2301 alu.dst.chan = 1;
2302 alu.dst.write = 1;
2303 alu.last = 1;
2304
2305 r = r600_bc_add_alu(ctx->bc, &alu);
2306 if (r)
2307 return r;
2308
2309 memset(&alu, 0, sizeof(struct r600_bc_alu));
2310
2311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2312
2313 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2314
2315 alu.src[1].sel = ctx->temp_reg;
2316 alu.src[1].chan = 1;
2317
2318 alu.dst.sel = ctx->temp_reg;
2319 alu.dst.chan = 1;
2320 alu.dst.write = 1;
2321 alu.last = 1;
2322
2323 r = r600_bc_add_alu(ctx->bc, &alu);
2324 if (r)
2325 return r;
2326 }
2327
2328 /* result.z = log2(src);*/
2329 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2330 memset(&alu, 0, sizeof(struct r600_bc_alu));
2331
2332 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2333 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2334
2335 alu.dst.sel = ctx->temp_reg;
2336 alu.dst.write = 1;
2337 alu.dst.chan = 2;
2338 alu.last = 1;
2339
2340 r = r600_bc_add_alu(ctx->bc, &alu);
2341 if (r)
2342 return r;
2343 }
2344
2345 /* result.w = 1.0; */
2346 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2347 memset(&alu, 0, sizeof(struct r600_bc_alu));
2348
2349 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2350 alu.src[0].sel = V_SQ_ALU_SRC_1;
2351 alu.src[0].chan = 0;
2352
2353 alu.dst.sel = ctx->temp_reg;
2354 alu.dst.chan = 3;
2355 alu.dst.write = 1;
2356 alu.last = 1;
2357
2358 r = r600_bc_add_alu(ctx->bc, &alu);
2359 if (r)
2360 return r;
2361 }
2362
2363 return tgsi_helper_copy(ctx, inst);
2364 }
2365
2366 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2367 {
2368 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2369 struct r600_bc_alu alu;
2370 int r;
2371
2372 memset(&alu, 0, sizeof(struct r600_bc_alu));
2373
2374 switch (inst->Instruction.Opcode) {
2375 case TGSI_OPCODE_ARL:
2376 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2377 break;
2378 case TGSI_OPCODE_ARR:
2379 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2380 break;
2381 default:
2382 assert(0);
2383 return -1;
2384 }
2385
2386 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2387 alu.last = 1;
2388 alu.dst.sel = ctx->ar_reg;
2389 alu.dst.write = 1;
2390 r = r600_bc_add_alu(ctx->bc, &alu);
2391 if (r)
2392 return r;
2393
2394 /* TODO: Note that the MOVA can be avoided if we never use AR for
2395 * indexing non-CB registers in the current ALU clause. Similarly, we
2396 * need to load AR from ar_reg again if we started a new clause
2397 * between ARL and AR usage. The easy way to do that is to remove
2398 * the MOVA here, and load it for the first AR access after ar_reg
2399 * has been modified in each clause. */
2400 memset(&alu, 0, sizeof(struct r600_bc_alu));
2401 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2402 alu.src[0].sel = ctx->ar_reg;
2403 alu.src[0].chan = 0;
2404 alu.last = 1;
2405 r = r600_bc_add_alu(ctx->bc, &alu);
2406 if (r)
2407 return r;
2408 return 0;
2409 }
2410 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2411 {
2412 /* TODO from r600c, ar values don't persist between clauses */
2413 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2414 struct r600_bc_alu alu;
2415 int r;
2416
2417 switch (inst->Instruction.Opcode) {
2418 case TGSI_OPCODE_ARL:
2419 memset(&alu, 0, sizeof(alu));
2420 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2421 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2422 alu.dst.sel = ctx->ar_reg;
2423 alu.dst.write = 1;
2424 alu.last = 1;
2425
2426 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2427 return r;
2428
2429 memset(&alu, 0, sizeof(alu));
2430 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2431 alu.src[0].sel = ctx->ar_reg;
2432 alu.dst.sel = ctx->ar_reg;
2433 alu.dst.write = 1;
2434 alu.last = 1;
2435
2436 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2437 return r;
2438 break;
2439 case TGSI_OPCODE_ARR:
2440 memset(&alu, 0, sizeof(alu));
2441 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2442 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2443 alu.dst.sel = ctx->ar_reg;
2444 alu.dst.write = 1;
2445 alu.last = 1;
2446
2447 if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2448 return r;
2449 break;
2450 default:
2451 assert(0);
2452 return -1;
2453 }
2454
2455 memset(&alu, 0, sizeof(alu));
2456 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2457 alu.src[0].sel = ctx->ar_reg;
2458 alu.last = 1;
2459
2460 r = r600_bc_add_alu(ctx->bc, &alu);
2461 if (r)
2462 return r;
2463 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2464 return 0;
2465 }
2466
2467 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2468 {
2469 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2470 struct r600_bc_alu alu;
2471 int i, r = 0;
2472
2473 for (i = 0; i < 4; i++) {
2474 memset(&alu, 0, sizeof(struct r600_bc_alu));
2475
2476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2477 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2478
2479 if (i == 0 || i == 3) {
2480 alu.src[0].sel = V_SQ_ALU_SRC_1;
2481 } else {
2482 r600_bc_src(&alu.src[0], &ctx->src[0], i);
2483 }
2484
2485 if (i == 0 || i == 2) {
2486 alu.src[1].sel = V_SQ_ALU_SRC_1;
2487 } else {
2488 r600_bc_src(&alu.src[1], &ctx->src[1], i);
2489 }
2490 if (i == 3)
2491 alu.last = 1;
2492 r = r600_bc_add_alu(ctx->bc, &alu);
2493 if (r)
2494 return r;
2495 }
2496 return 0;
2497 }
2498
2499 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2500 {
2501 struct r600_bc_alu alu;
2502 int r;
2503
2504 memset(&alu, 0, sizeof(struct r600_bc_alu));
2505 alu.inst = opcode;
2506 alu.predicate = 1;
2507
2508 alu.dst.sel = ctx->temp_reg;
2509 alu.dst.write = 1;
2510 alu.dst.chan = 0;
2511
2512 r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2513 alu.src[1].sel = V_SQ_ALU_SRC_0;
2514 alu.src[1].chan = 0;
2515
2516 alu.last = 1;
2517
2518 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2519 if (r)
2520 return r;
2521 return 0;
2522 }
2523
2524 static int pops(struct r600_shader_ctx *ctx, int pops)
2525 {
2526 int alu_pop = 3;
2527 if (ctx->bc->cf_last) {
2528 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2529 alu_pop = 0;
2530 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2531 alu_pop = 1;
2532 }
2533 alu_pop += pops;
2534 if (alu_pop == 1) {
2535 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2536 ctx->bc->force_add_cf = 1;
2537 } else if (alu_pop == 2) {
2538 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2539 ctx->bc->force_add_cf = 1;
2540 } else {
2541 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2542 ctx->bc->cf_last->pop_count = pops;
2543 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2544 }
2545 return 0;
2546 }
2547
2548 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2549 {
2550 switch(reason) {
2551 case FC_PUSH_VPM:
2552 ctx->bc->callstack[ctx->bc->call_sp].current--;
2553 break;
2554 case FC_PUSH_WQM:
2555 case FC_LOOP:
2556 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2557 break;
2558 case FC_REP:
2559 /* TOODO : for 16 vp asic should -= 2; */
2560 ctx->bc->callstack[ctx->bc->call_sp].current --;
2561 break;
2562 }
2563 }
2564
2565 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2566 {
2567 if (check_max_only) {
2568 int diff;
2569 switch (reason) {
2570 case FC_PUSH_VPM:
2571 diff = 1;
2572 break;
2573 case FC_PUSH_WQM:
2574 diff = 4;
2575 break;
2576 default:
2577 assert(0);
2578 diff = 0;
2579 }
2580 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2581 ctx->bc->callstack[ctx->bc->call_sp].max) {
2582 ctx->bc->callstack[ctx->bc->call_sp].max =
2583 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2584 }
2585 return;
2586 }
2587 switch (reason) {
2588 case FC_PUSH_VPM:
2589 ctx->bc->callstack[ctx->bc->call_sp].current++;
2590 break;
2591 case FC_PUSH_WQM:
2592 case FC_LOOP:
2593 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2594 break;
2595 case FC_REP:
2596 ctx->bc->callstack[ctx->bc->call_sp].current++;
2597 break;
2598 }
2599
2600 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2601 ctx->bc->callstack[ctx->bc->call_sp].max) {
2602 ctx->bc->callstack[ctx->bc->call_sp].max =
2603 ctx->bc->callstack[ctx->bc->call_sp].current;
2604 }
2605 }
2606
2607 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2608 {
2609 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2610
2611 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2612 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2613 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2614 sp->num_mid++;
2615 }
2616
2617 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2618 {
2619 ctx->bc->fc_sp++;
2620 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2621 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2622 }
2623
2624 static void fc_poplevel(struct r600_shader_ctx *ctx)
2625 {
2626 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2627 if (sp->mid) {
2628 free(sp->mid);
2629 sp->mid = NULL;
2630 }
2631 sp->num_mid = 0;
2632 sp->start = NULL;
2633 sp->type = 0;
2634 ctx->bc->fc_sp--;
2635 }
2636
2637 #if 0
2638 static int emit_return(struct r600_shader_ctx *ctx)
2639 {
2640 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2641 return 0;
2642 }
2643
2644 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2645 {
2646
2647 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2648 ctx->bc->cf_last->pop_count = pops;
2649 /* TODO work out offset */
2650 return 0;
2651 }
2652
2653 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2654 {
2655 return 0;
2656 }
2657
2658 static void emit_testflag(struct r600_shader_ctx *ctx)
2659 {
2660
2661 }
2662
2663 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2664 {
2665 emit_testflag(ctx);
2666 emit_jump_to_offset(ctx, 1, 4);
2667 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2668 pops(ctx, ifidx + 1);
2669 emit_return(ctx);
2670 }
2671
2672 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2673 {
2674 emit_testflag(ctx);
2675
2676 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2677 ctx->bc->cf_last->pop_count = 1;
2678
2679 fc_set_mid(ctx, fc_sp);
2680
2681 pops(ctx, 1);
2682 }
2683 #endif
2684
2685 static int tgsi_if(struct r600_shader_ctx *ctx)
2686 {
2687 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2688
2689 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2690
2691 fc_pushlevel(ctx, FC_IF);
2692
2693 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2694 return 0;
2695 }
2696
2697 static int tgsi_else(struct r600_shader_ctx *ctx)
2698 {
2699 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2700 ctx->bc->cf_last->pop_count = 1;
2701
2702 fc_set_mid(ctx, ctx->bc->fc_sp);
2703 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2704 return 0;
2705 }
2706
2707 static int tgsi_endif(struct r600_shader_ctx *ctx)
2708 {
2709 pops(ctx, 1);
2710 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2711 R600_ERR("if/endif unbalanced in shader\n");
2712 return -1;
2713 }
2714
2715 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2716 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2717 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2718 } else {
2719 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2720 }
2721 fc_poplevel(ctx);
2722
2723 callstack_decrease_current(ctx, FC_PUSH_VPM);
2724 return 0;
2725 }
2726
2727 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2728 {
2729 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2730
2731 fc_pushlevel(ctx, FC_LOOP);
2732
2733 /* check stack depth */
2734 callstack_check_depth(ctx, FC_LOOP, 0);
2735 return 0;
2736 }
2737
2738 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2739 {
2740 int i;
2741
2742 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2743
2744 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2745 R600_ERR("loop/endloop in shader code are not paired.\n");
2746 return -EINVAL;
2747 }
2748
2749 /* fixup loop pointers - from r600isa
2750 LOOP END points to CF after LOOP START,
2751 LOOP START point to CF after LOOP END
2752 BRK/CONT point to LOOP END CF
2753 */
2754 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2755
2756 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2757
2758 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2759 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2760 }
2761 /* TODO add LOOPRET support */
2762 fc_poplevel(ctx);
2763 callstack_decrease_current(ctx, FC_LOOP);
2764 return 0;
2765 }
2766
2767 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2768 {
2769 unsigned int fscp;
2770
2771 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2772 {
2773 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2774 break;
2775 }
2776
2777 if (fscp == 0) {
2778 R600_ERR("Break not inside loop/endloop pair\n");
2779 return -EINVAL;
2780 }
2781
2782 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2783 ctx->bc->cf_last->pop_count = 1;
2784
2785 fc_set_mid(ctx, fscp);
2786
2787 pops(ctx, 1);
2788 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2789 return 0;
2790 }
2791
2792 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2793 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2794 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2795 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2796
2797 /* FIXME:
2798 * For state trackers other than OpenGL, we'll want to use
2799 * _RECIP_IEEE instead.
2800 */
2801 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2802
2803 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2804 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2805 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2806 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2807 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2808 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2809 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2810 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2811 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2812 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2813 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2814 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2815 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2816 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2817 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2818 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819 /* gap */
2820 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2821 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822 /* gap */
2823 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2826 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2828 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2830 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2831 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2832 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2833 /* gap */
2834 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2836 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2838 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2839 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2840 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2841 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2842 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2844 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2848 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2850 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2851 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2852 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2853 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2855 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2857 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2864 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2868 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2869 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2870 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2871 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2874 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2875 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2876 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2877 /* gap */
2878 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2881 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2882 /* gap */
2883 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2891 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 /* gap */
2893 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2902 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2905 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2907 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 /* gap */
2909 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 /* gap */
2915 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2924 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2925 /* gap */
2926 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954 };
2955
2956 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2957 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2958 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2959 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2960 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2961 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2962 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2963 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2965 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2966 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2967 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2968 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2969 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2970 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2971 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2972 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2973 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2974 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2975 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2976 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977 /* gap */
2978 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2979 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980 /* gap */
2981 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2984 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2985 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2986 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2987 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2988 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2989 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2990 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2991 /* gap */
2992 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2994 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2996 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2997 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2998 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2999 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3000 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3006 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3008 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3009 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3010 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3011 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3013 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3015 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3022 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3024 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3025 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3026 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3027 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3028 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3029 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3030 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3032 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3033 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3034 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3035 /* gap */
3036 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3039 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3040 /* gap */
3041 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3049 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050 /* gap */
3051 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3060 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3063 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3065 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066 /* gap */
3067 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072 /* gap */
3073 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3082 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3083 /* gap */
3084 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 };