r600g: tgsi_dst() can't fail.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_opcodes.h"
32 #include "r600d.h"
33 #include <stdio.h>
34 #include <errno.h>
35
36 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37 {
38 struct r600_pipe_state *rstate = &shader->rstate;
39 struct r600_shader *rshader = &shader->shader;
40 unsigned spi_vs_out_id[10];
41 unsigned i, tmp;
42
43 /* clear previous register */
44 rstate->nregs = 0;
45
46 /* so far never got proper semantic id from tgsi */
47 /* FIXME better to move this in config things so they get emited
48 * only one time per cs
49 */
50 for (i = 0; i < 10; i++) {
51 spi_vs_out_id[i] = 0;
52 }
53 for (i = 0; i < 32; i++) {
54 tmp = i << ((i & 3) * 8);
55 spi_vs_out_id[i / 4] |= tmp;
56 }
57 for (i = 0; i < 10; i++) {
58 r600_pipe_state_add_reg(rstate,
59 R_028614_SPI_VS_OUT_ID_0 + i * 4,
60 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61 }
62
63 r600_pipe_state_add_reg(rstate,
64 R_0286C4_SPI_VS_OUT_CONFIG,
65 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66 0xFFFFFFFF, NULL);
67 r600_pipe_state_add_reg(rstate,
68 R_028868_SQ_PGM_RESOURCES_VS,
69 S_028868_NUM_GPRS(rshader->bc.ngpr) |
70 S_028868_STACK_SIZE(rshader->bc.nstack),
71 0xFFFFFFFF, NULL);
72 r600_pipe_state_add_reg(rstate,
73 R_0288D0_SQ_PGM_CF_OFFSET_VS,
74 0x00000000, 0xFFFFFFFF, NULL);
75 r600_pipe_state_add_reg(rstate,
76 R_028858_SQ_PGM_START_VS,
77 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79 r600_pipe_state_add_reg(rstate,
80 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
81 0xFFFFFFFF, NULL);
82
83 }
84
85 int r600_find_vs_semantic_index(struct r600_shader *vs,
86 struct r600_shader *ps, int id)
87 {
88 struct r600_shader_io *input = &ps->input[id];
89
90 for (int i = 0; i < vs->noutput; i++) {
91 if (input->name == vs->output[i].name &&
92 input->sid == vs->output[i].sid) {
93 return i - 1;
94 }
95 }
96 return 0;
97 }
98
99 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
100 {
101 struct r600_pipe_state *rstate = &shader->rstate;
102 struct r600_shader *rshader = &shader->shader;
103 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
104 int pos_index = -1, face_index = -1;
105
106 rstate->nregs = 0;
107
108 for (i = 0; i < rshader->ninput; i++) {
109 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
110 pos_index = i;
111 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
112 face_index = i;
113 }
114
115 for (i = 0; i < rshader->noutput; i++) {
116 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
117 r600_pipe_state_add_reg(rstate,
118 R_02880C_DB_SHADER_CONTROL,
119 S_02880C_Z_EXPORT_ENABLE(1),
120 S_02880C_Z_EXPORT_ENABLE(1), NULL);
121 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
122 r600_pipe_state_add_reg(rstate,
123 R_02880C_DB_SHADER_CONTROL,
124 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
126 }
127
128 exports_ps = 0;
129 num_cout = 0;
130 for (i = 0; i < rshader->noutput; i++) {
131 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
132 exports_ps |= 1;
133 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
134 num_cout++;
135 }
136 }
137 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
138 if (!exports_ps) {
139 /* always at least export 1 component per pixel */
140 exports_ps = 2;
141 }
142
143 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
144 S_0286CC_PERSP_GRADIENT_ENA(1);
145 spi_input_z = 0;
146 if (pos_index != -1) {
147 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
148 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
149 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
150 S_0286CC_BARYC_SAMPLE_CNTL(1));
151 spi_input_z |= 1;
152 }
153
154 spi_ps_in_control_1 = 0;
155 if (face_index != -1) {
156 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
157 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
158 }
159
160 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
161 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
162 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
163 r600_pipe_state_add_reg(rstate,
164 R_028840_SQ_PGM_START_PS,
165 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
166 r600_pipe_state_add_reg(rstate,
167 R_028850_SQ_PGM_RESOURCES_PS,
168 S_028868_NUM_GPRS(rshader->bc.ngpr) |
169 S_028868_STACK_SIZE(rshader->bc.nstack),
170 0xFFFFFFFF, NULL);
171 r600_pipe_state_add_reg(rstate,
172 R_028854_SQ_PGM_EXPORTS_PS,
173 exports_ps, 0xFFFFFFFF, NULL);
174 r600_pipe_state_add_reg(rstate,
175 R_0288CC_SQ_PGM_CF_OFFSET_PS,
176 0x00000000, 0xFFFFFFFF, NULL);
177
178 if (rshader->fs_write_all) {
179 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
180 S_028808_MULTIWRITE_ENABLE(1),
181 S_028808_MULTIWRITE_ENABLE(1),
182 NULL);
183 }
184
185 if (rshader->uses_kill) {
186 /* only set some bits here, the other bits are set in the dsa state */
187 r600_pipe_state_add_reg(rstate,
188 R_02880C_DB_SHADER_CONTROL,
189 S_02880C_KILL_ENABLE(1),
190 S_02880C_KILL_ENABLE(1), NULL);
191 }
192 r600_pipe_state_add_reg(rstate,
193 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
194 0xFFFFFFFF, NULL);
195 }
196
197 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
198 {
199 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
200 struct r600_shader *rshader = &shader->shader;
201 void *ptr;
202
203 /* copy new shader */
204 if (shader->bo == NULL) {
205 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
206 if (shader->bo == NULL) {
207 return -ENOMEM;
208 }
209 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
210 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
211 r600_bo_unmap(rctx->radeon, shader->bo);
212 }
213 /* build state */
214 switch (rshader->processor_type) {
215 case TGSI_PROCESSOR_VERTEX:
216 if (rshader->family >= CHIP_CEDAR) {
217 evergreen_pipe_shader_vs(ctx, shader);
218 } else {
219 r600_pipe_shader_vs(ctx, shader);
220 }
221 break;
222 case TGSI_PROCESSOR_FRAGMENT:
223 if (rshader->family >= CHIP_CEDAR) {
224 evergreen_pipe_shader_ps(ctx, shader);
225 } else {
226 r600_pipe_shader_ps(ctx, shader);
227 }
228 break;
229 default:
230 return -EINVAL;
231 }
232 return 0;
233 }
234
235 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
236 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
237 {
238 static int dump_shaders = -1;
239 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
240 u32 *literals;
241 int r;
242
243 /* Would like some magic "get_bool_option_once" routine.
244 */
245 if (dump_shaders == -1)
246 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
247
248 if (dump_shaders) {
249 fprintf(stderr, "--------------------------------------------------------------\n");
250 tgsi_dump(tokens, 0);
251 }
252 shader->shader.family = r600_get_family(rctx->radeon);
253 r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
254 if (r) {
255 R600_ERR("translation from TGSI failed !\n");
256 return r;
257 }
258 r = r600_bc_build(&shader->shader.bc);
259 free(literals);
260 if (r) {
261 R600_ERR("building bytecode failed !\n");
262 return r;
263 }
264 if (dump_shaders) {
265 r600_bc_dump(&shader->shader.bc);
266 fprintf(stderr, "______________________________________________________________\n");
267 }
268 return r600_pipe_shader(ctx, shader);
269 }
270
271 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272 {
273 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276 r600_bc_clear(&shader->shader.bc);
277 }
278
279 /*
280 * tgsi -> r600 shader
281 */
282 struct r600_shader_tgsi_instruction;
283
284 struct r600_shader_ctx {
285 struct tgsi_shader_info info;
286 struct tgsi_parse_context parse;
287 const struct tgsi_token *tokens;
288 unsigned type;
289 unsigned file_offset[TGSI_FILE_COUNT];
290 unsigned temp_reg;
291 struct r600_shader_tgsi_instruction *inst_info;
292 struct r600_bc *bc;
293 struct r600_shader *shader;
294 u32 *literals;
295 u32 nliterals;
296 u32 max_driver_temp_used;
297 /* needed for evergreen interpolation */
298 boolean input_centroid;
299 boolean input_linear;
300 boolean input_perspective;
301 int num_interp_gpr;
302 };
303
304 struct r600_shader_tgsi_instruction {
305 unsigned tgsi_opcode;
306 unsigned is_op3;
307 unsigned r600_opcode;
308 int (*process)(struct r600_shader_ctx *ctx);
309 };
310
311 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
312 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
313
314 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
315 {
316 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
317 int j;
318
319 if (i->Instruction.NumDstRegs > 1) {
320 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
321 return -EINVAL;
322 }
323 if (i->Instruction.Predicate) {
324 R600_ERR("predicate unsupported\n");
325 return -EINVAL;
326 }
327 #if 0
328 if (i->Instruction.Label) {
329 R600_ERR("label unsupported\n");
330 return -EINVAL;
331 }
332 #endif
333 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
334 if (i->Src[j].Register.Dimension) {
335 R600_ERR("unsupported src %d (dimension %d)\n", j,
336 i->Src[j].Register.Dimension);
337 return -EINVAL;
338 }
339 }
340 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
341 if (i->Dst[j].Register.Dimension) {
342 R600_ERR("unsupported dst (dimension)\n");
343 return -EINVAL;
344 }
345 }
346 return 0;
347 }
348
349 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
350 {
351 int i, r;
352 struct r600_bc_alu alu;
353 int gpr = 0, base_chan = 0;
354 int ij_index = 0;
355
356 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
357 ij_index = 0;
358 if (ctx->shader->input[input].centroid)
359 ij_index++;
360 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
361 ij_index = 0;
362 /* if we have perspective add one */
363 if (ctx->input_perspective) {
364 ij_index++;
365 /* if we have perspective centroid */
366 if (ctx->input_centroid)
367 ij_index++;
368 }
369 if (ctx->shader->input[input].centroid)
370 ij_index++;
371 }
372
373 /* work out gpr and base_chan from index */
374 gpr = ij_index / 2;
375 base_chan = (2 * (ij_index % 2)) + 1;
376
377 for (i = 0; i < 8; i++) {
378 memset(&alu, 0, sizeof(struct r600_bc_alu));
379
380 if (i < 4)
381 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
382 else
383 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
384
385 if ((i > 1) && (i < 6)) {
386 alu.dst.sel = ctx->shader->input[input].gpr;
387 alu.dst.write = 1;
388 }
389
390 alu.dst.chan = i % 4;
391
392 alu.src[0].sel = gpr;
393 alu.src[0].chan = (base_chan - (i % 2));
394
395 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
396
397 alu.bank_swizzle_force = SQ_ALU_VEC_210;
398 if ((i % 4) == 3)
399 alu.last = 1;
400 r = r600_bc_add_alu(ctx->bc, &alu);
401 if (r)
402 return r;
403 }
404 return 0;
405 }
406
407
408 static int tgsi_declaration(struct r600_shader_ctx *ctx)
409 {
410 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
411 unsigned i;
412
413 switch (d->Declaration.File) {
414 case TGSI_FILE_INPUT:
415 i = ctx->shader->ninput++;
416 ctx->shader->input[i].name = d->Semantic.Name;
417 ctx->shader->input[i].sid = d->Semantic.Index;
418 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
419 ctx->shader->input[i].centroid = d->Declaration.Centroid;
420 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
421 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
422 /* turn input into interpolate on EG */
423 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
424 if (ctx->shader->input[i].interpolate > 0) {
425 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
426 evergreen_interp_alu(ctx, i);
427 }
428 }
429 }
430 break;
431 case TGSI_FILE_OUTPUT:
432 i = ctx->shader->noutput++;
433 ctx->shader->output[i].name = d->Semantic.Name;
434 ctx->shader->output[i].sid = d->Semantic.Index;
435 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
436 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
437 break;
438 case TGSI_FILE_CONSTANT:
439 case TGSI_FILE_TEMPORARY:
440 case TGSI_FILE_SAMPLER:
441 case TGSI_FILE_ADDRESS:
442 break;
443 default:
444 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
445 return -EINVAL;
446 }
447 return 0;
448 }
449
450 static int r600_get_temp(struct r600_shader_ctx *ctx)
451 {
452 return ctx->temp_reg + ctx->max_driver_temp_used++;
453 }
454
455 /*
456 * for evergreen we need to scan the shader to find the number of GPRs we need to
457 * reserve for interpolation.
458 *
459 * we need to know if we are going to emit
460 * any centroid inputs
461 * if perspective and linear are required
462 */
463 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
464 {
465 int i;
466 int num_baryc;
467
468 ctx->input_linear = FALSE;
469 ctx->input_perspective = FALSE;
470 ctx->input_centroid = FALSE;
471 ctx->num_interp_gpr = 1;
472
473 /* any centroid inputs */
474 for (i = 0; i < ctx->info.num_inputs; i++) {
475 /* skip position/face */
476 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
477 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
478 continue;
479 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
480 ctx->input_linear = TRUE;
481 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
482 ctx->input_perspective = TRUE;
483 if (ctx->info.input_centroid[i])
484 ctx->input_centroid = TRUE;
485 }
486
487 num_baryc = 0;
488 /* ignoring sample for now */
489 if (ctx->input_perspective)
490 num_baryc++;
491 if (ctx->input_linear)
492 num_baryc++;
493 if (ctx->input_centroid)
494 num_baryc *= 2;
495
496 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
497
498 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
499 return ctx->num_interp_gpr;
500 }
501
502 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
503 {
504 struct tgsi_full_immediate *immediate;
505 struct tgsi_full_property *property;
506 struct r600_shader_ctx ctx;
507 struct r600_bc_output output[32];
508 unsigned output_done, noutput;
509 unsigned opcode;
510 int i, r = 0, pos0;
511
512 ctx.bc = &shader->bc;
513 ctx.shader = shader;
514 r = r600_bc_init(ctx.bc, shader->family);
515 if (r)
516 return r;
517 ctx.tokens = tokens;
518 tgsi_scan_shader(tokens, &ctx.info);
519 tgsi_parse_init(&ctx.parse, tokens);
520 ctx.type = ctx.parse.FullHeader.Processor.Processor;
521 shader->processor_type = ctx.type;
522 ctx.bc->type = shader->processor_type;
523
524 /* register allocations */
525 /* Values [0,127] correspond to GPR[0..127].
526 * Values [128,159] correspond to constant buffer bank 0
527 * Values [160,191] correspond to constant buffer bank 1
528 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
529 * Values [256,287] correspond to constant buffer bank 2 (EG)
530 * Values [288,319] correspond to constant buffer bank 3 (EG)
531 * Other special values are shown in the list below.
532 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
533 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
534 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
535 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
536 * 248 SQ_ALU_SRC_0: special constant 0.0.
537 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
538 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
539 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
540 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
541 * 253 SQ_ALU_SRC_LITERAL: literal constant.
542 * 254 SQ_ALU_SRC_PV: previous vector result.
543 * 255 SQ_ALU_SRC_PS: previous scalar result.
544 */
545 for (i = 0; i < TGSI_FILE_COUNT; i++) {
546 ctx.file_offset[i] = 0;
547 }
548 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
549 ctx.file_offset[TGSI_FILE_INPUT] = 1;
550 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
551 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
552 } else {
553 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
554 }
555 }
556 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
557 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
558 }
559 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
560 ctx.info.file_count[TGSI_FILE_INPUT];
561 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
562 ctx.info.file_count[TGSI_FILE_OUTPUT];
563
564 /* Outside the GPR range. This will be translated to one of the
565 * kcache banks later. */
566 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
567
568 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
569 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
570 ctx.info.file_count[TGSI_FILE_TEMPORARY];
571
572 ctx.nliterals = 0;
573 ctx.literals = NULL;
574 shader->fs_write_all = FALSE;
575 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
576 tgsi_parse_token(&ctx.parse);
577 switch (ctx.parse.FullToken.Token.Type) {
578 case TGSI_TOKEN_TYPE_IMMEDIATE:
579 immediate = &ctx.parse.FullToken.FullImmediate;
580 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
581 if(ctx.literals == NULL) {
582 r = -ENOMEM;
583 goto out_err;
584 }
585 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
586 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
587 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
588 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
589 ctx.nliterals++;
590 break;
591 case TGSI_TOKEN_TYPE_DECLARATION:
592 r = tgsi_declaration(&ctx);
593 if (r)
594 goto out_err;
595 break;
596 case TGSI_TOKEN_TYPE_INSTRUCTION:
597 r = tgsi_is_supported(&ctx);
598 if (r)
599 goto out_err;
600 ctx.max_driver_temp_used = 0;
601 /* reserve first tmp for everyone */
602 r600_get_temp(&ctx);
603 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
604 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
605 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
606 else
607 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
608 r = ctx.inst_info->process(&ctx);
609 if (r)
610 goto out_err;
611 break;
612 case TGSI_TOKEN_TYPE_PROPERTY:
613 property = &ctx.parse.FullToken.FullProperty;
614 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
615 if (property->u[0].Data == 1)
616 shader->fs_write_all = TRUE;
617 }
618 break;
619 default:
620 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
621 r = -EINVAL;
622 goto out_err;
623 }
624 }
625 /* export output */
626 noutput = shader->noutput;
627 for (i = 0, pos0 = 0; i < noutput; i++) {
628 memset(&output[i], 0, sizeof(struct r600_bc_output));
629 output[i].gpr = shader->output[i].gpr;
630 output[i].elem_size = 3;
631 output[i].swizzle_x = 0;
632 output[i].swizzle_y = 1;
633 output[i].swizzle_z = 2;
634 output[i].swizzle_w = 3;
635 output[i].burst_count = 1;
636 output[i].barrier = 1;
637 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
638 output[i].array_base = i - pos0;
639 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
640 switch (ctx.type) {
641 case TGSI_PROCESSOR_VERTEX:
642 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
643 output[i].array_base = 60;
644 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
645 /* position doesn't count in array_base */
646 pos0++;
647 }
648 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
649 output[i].array_base = 61;
650 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
651 /* position doesn't count in array_base */
652 pos0++;
653 }
654 break;
655 case TGSI_PROCESSOR_FRAGMENT:
656 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
657 output[i].array_base = shader->output[i].sid;
658 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
659 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
660 output[i].array_base = 61;
661 output[i].swizzle_x = 2;
662 output[i].swizzle_y = 7;
663 output[i].swizzle_z = output[i].swizzle_w = 7;
664 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
665 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
666 output[i].array_base = 61;
667 output[i].swizzle_x = 7;
668 output[i].swizzle_y = 1;
669 output[i].swizzle_z = output[i].swizzle_w = 7;
670 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
671 } else {
672 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
673 r = -EINVAL;
674 goto out_err;
675 }
676 break;
677 default:
678 R600_ERR("unsupported processor type %d\n", ctx.type);
679 r = -EINVAL;
680 goto out_err;
681 }
682 }
683 /* add fake param output for vertex shader if no param is exported */
684 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
685 for (i = 0, pos0 = 0; i < noutput; i++) {
686 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
687 pos0 = 1;
688 break;
689 }
690 }
691 if (!pos0) {
692 memset(&output[i], 0, sizeof(struct r600_bc_output));
693 output[i].gpr = 0;
694 output[i].elem_size = 3;
695 output[i].swizzle_x = 0;
696 output[i].swizzle_y = 1;
697 output[i].swizzle_z = 2;
698 output[i].swizzle_w = 3;
699 output[i].burst_count = 1;
700 output[i].barrier = 1;
701 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
702 output[i].array_base = 0;
703 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
704 noutput++;
705 }
706 }
707 /* add fake pixel export */
708 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
709 memset(&output[0], 0, sizeof(struct r600_bc_output));
710 output[0].gpr = 0;
711 output[0].elem_size = 3;
712 output[0].swizzle_x = 7;
713 output[0].swizzle_y = 7;
714 output[0].swizzle_z = 7;
715 output[0].swizzle_w = 7;
716 output[0].burst_count = 1;
717 output[0].barrier = 1;
718 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
719 output[0].array_base = 0;
720 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
721 noutput++;
722 }
723 /* set export done on last export of each type */
724 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
725 if (i == (noutput - 1)) {
726 output[i].end_of_program = 1;
727 }
728 if (!(output_done & (1 << output[i].type))) {
729 output_done |= (1 << output[i].type);
730 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
731 }
732 }
733 /* add output to bytecode */
734 for (i = 0; i < noutput; i++) {
735 r = r600_bc_add_output(ctx.bc, &output[i]);
736 if (r)
737 goto out_err;
738 }
739 *literals = ctx.literals;
740 tgsi_parse_free(&ctx.parse);
741 return 0;
742 out_err:
743 free(ctx.literals);
744 tgsi_parse_free(&ctx.parse);
745 return r;
746 }
747
748 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
749 {
750 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
751 return -EINVAL;
752 }
753
754 static int tgsi_end(struct r600_shader_ctx *ctx)
755 {
756 return 0;
757 }
758
759 static void tgsi_src(struct r600_shader_ctx *ctx,
760 const struct tgsi_full_src_register *tgsi_src,
761 struct r600_bc_alu_src *r600_src)
762 {
763 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
764 r600_src->neg = tgsi_src->Register.Negate;
765 r600_src->abs = tgsi_src->Register.Absolute;
766 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
767 int index;
768 if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
769 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
770 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
771
772 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
773 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
774 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
775 return;
776 }
777 index = tgsi_src->Register.Index;
778 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
779 r600_src->value = ctx->literals + index * 4;
780 } else {
781 if (tgsi_src->Register.Indirect)
782 r600_src->rel = V_SQ_REL_RELATIVE;
783 r600_src->sel = tgsi_src->Register.Index;
784 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
785 }
786 }
787
788 static void tgsi_dst(struct r600_shader_ctx *ctx,
789 const struct tgsi_full_dst_register *tgsi_dst,
790 unsigned swizzle,
791 struct r600_bc_alu_dst *r600_dst)
792 {
793 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
794
795 r600_dst->sel = tgsi_dst->Register.Index;
796 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
797 r600_dst->chan = swizzle;
798 r600_dst->write = 1;
799 if (tgsi_dst->Register.Indirect)
800 r600_dst->rel = V_SQ_REL_RELATIVE;
801 if (inst->Instruction.Saturate) {
802 r600_dst->clamp = 1;
803 }
804 }
805
806 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
807 {
808 switch (swizzle) {
809 case 0:
810 return tgsi_src->Register.SwizzleX;
811 case 1:
812 return tgsi_src->Register.SwizzleY;
813 case 2:
814 return tgsi_src->Register.SwizzleZ;
815 case 3:
816 return tgsi_src->Register.SwizzleW;
817 default:
818 return 0;
819 }
820 }
821
822 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
823 {
824 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
825 struct r600_bc_alu alu;
826 int i, j, k, nconst, r;
827
828 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
829 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
830 nconst++;
831 }
832 tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
833 }
834 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
835 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
836 int treg = r600_get_temp(ctx);
837 for (k = 0; k < 4; k++) {
838 memset(&alu, 0, sizeof(struct r600_bc_alu));
839 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
840 alu.src[0].sel = r600_src[i].sel;
841 alu.src[0].chan = k;
842 alu.src[0].rel = r600_src[i].rel;
843 alu.dst.sel = treg;
844 alu.dst.chan = k;
845 alu.dst.write = 1;
846 if (k == 3)
847 alu.last = 1;
848 r = r600_bc_add_alu(ctx->bc, &alu);
849 if (r)
850 return r;
851 }
852 r600_src[i].sel = treg;
853 r600_src[i].rel =0;
854 j--;
855 }
856 }
857 return 0;
858 }
859
860 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
861 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
862 {
863 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
864 struct r600_bc_alu alu;
865 int i, j, k, nliteral, r;
866
867 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
868 if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
869 nliteral++;
870 }
871 }
872 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
873 if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
874 int treg = r600_get_temp(ctx);
875 for (k = 0; k < 4; k++) {
876 memset(&alu, 0, sizeof(struct r600_bc_alu));
877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
878 alu.src[0].sel = r600_src[i].sel;
879 alu.src[0].chan = k;
880 alu.src[0].value = r600_src[i].value;
881 alu.dst.sel = treg;
882 alu.dst.chan = k;
883 alu.dst.write = 1;
884 if (k == 3)
885 alu.last = 1;
886 r = r600_bc_add_alu(ctx->bc, &alu);
887 if (r)
888 return r;
889 }
890 r600_src[i].sel = treg;
891 j--;
892 }
893 }
894 return 0;
895 }
896
897 static int tgsi_last_instruction(unsigned writemask)
898 {
899 int i, lasti = 0;
900
901 for (i = 0; i < 4; i++) {
902 if (writemask & (1 << i)) {
903 lasti = i;
904 }
905 }
906 return lasti;
907 }
908
909 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
910 {
911 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
912 struct r600_bc_alu_src r600_src[3];
913 struct r600_bc_alu alu;
914 int i, j, r;
915 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
916
917 r = tgsi_split_constant(ctx, r600_src);
918 if (r)
919 return r;
920 r = tgsi_split_literal_constant(ctx, r600_src);
921 if (r)
922 return r;
923 for (i = 0; i < lasti + 1; i++) {
924 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
925 continue;
926
927 memset(&alu, 0, sizeof(struct r600_bc_alu));
928 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
929
930 alu.inst = ctx->inst_info->r600_opcode;
931 if (!swap) {
932 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
933 alu.src[j] = r600_src[j];
934 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
935 }
936 } else {
937 alu.src[0] = r600_src[1];
938 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
939
940 alu.src[1] = r600_src[0];
941 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
942 }
943 /* handle some special cases */
944 switch (ctx->inst_info->tgsi_opcode) {
945 case TGSI_OPCODE_SUB:
946 alu.src[1].neg = 1;
947 break;
948 case TGSI_OPCODE_ABS:
949 alu.src[0].abs = 1;
950 break;
951 default:
952 break;
953 }
954 if (i == lasti) {
955 alu.last = 1;
956 }
957 r = r600_bc_add_alu(ctx->bc, &alu);
958 if (r)
959 return r;
960 }
961 return 0;
962 }
963
964 static int tgsi_op2(struct r600_shader_ctx *ctx)
965 {
966 return tgsi_op2_s(ctx, 0);
967 }
968
969 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
970 {
971 return tgsi_op2_s(ctx, 1);
972 }
973
974 /*
975 * r600 - trunc to -PI..PI range
976 * r700 - normalize by dividing by 2PI
977 * see fdo bug 27901
978 */
979 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
980 struct r600_bc_alu_src r600_src[3])
981 {
982 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
983 static float double_pi = 3.1415926535 * 2;
984 static float neg_pi = -3.1415926535;
985
986 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
987 int r;
988 struct r600_bc_alu alu;
989
990 r = tgsi_split_constant(ctx, r600_src);
991 if (r)
992 return r;
993 r = tgsi_split_literal_constant(ctx, r600_src);
994 if (r)
995 return r;
996
997 memset(&alu, 0, sizeof(struct r600_bc_alu));
998 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
999 alu.is_op3 = 1;
1000
1001 alu.dst.chan = 0;
1002 alu.dst.sel = ctx->temp_reg;
1003 alu.dst.write = 1;
1004
1005 alu.src[0] = r600_src[0];
1006 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1007
1008 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1009 alu.src[1].chan = 0;
1010 alu.src[1].value = (uint32_t *)&half_inv_pi;
1011 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1012 alu.src[2].chan = 0;
1013 alu.last = 1;
1014 r = r600_bc_add_alu(ctx->bc, &alu);
1015 if (r)
1016 return r;
1017
1018 memset(&alu, 0, sizeof(struct r600_bc_alu));
1019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1020
1021 alu.dst.chan = 0;
1022 alu.dst.sel = ctx->temp_reg;
1023 alu.dst.write = 1;
1024
1025 alu.src[0].sel = ctx->temp_reg;
1026 alu.src[0].chan = 0;
1027 alu.last = 1;
1028 r = r600_bc_add_alu(ctx->bc, &alu);
1029 if (r)
1030 return r;
1031
1032 memset(&alu, 0, sizeof(struct r600_bc_alu));
1033 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1034 alu.is_op3 = 1;
1035
1036 alu.dst.chan = 0;
1037 alu.dst.sel = ctx->temp_reg;
1038 alu.dst.write = 1;
1039
1040 alu.src[0].sel = ctx->temp_reg;
1041 alu.src[0].chan = 0;
1042
1043 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1044 alu.src[1].chan = 0;
1045 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1046 alu.src[2].chan = 0;
1047
1048 if (ctx->bc->chiprev == CHIPREV_R600) {
1049 alu.src[1].value = (uint32_t *)&double_pi;
1050 alu.src[2].value = (uint32_t *)&neg_pi;
1051 } else {
1052 alu.src[1].sel = V_SQ_ALU_SRC_1;
1053 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1054 alu.src[2].neg = 1;
1055 }
1056
1057 alu.last = 1;
1058 r = r600_bc_add_alu(ctx->bc, &alu);
1059 if (r)
1060 return r;
1061 return 0;
1062 }
1063
1064 static int tgsi_trig(struct r600_shader_ctx *ctx)
1065 {
1066 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1067 struct r600_bc_alu_src r600_src[3];
1068 struct r600_bc_alu alu;
1069 int i, r;
1070 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1071
1072 r = tgsi_setup_trig(ctx, r600_src);
1073 if (r)
1074 return r;
1075
1076 memset(&alu, 0, sizeof(struct r600_bc_alu));
1077 alu.inst = ctx->inst_info->r600_opcode;
1078 alu.dst.chan = 0;
1079 alu.dst.sel = ctx->temp_reg;
1080 alu.dst.write = 1;
1081
1082 alu.src[0].sel = ctx->temp_reg;
1083 alu.src[0].chan = 0;
1084 alu.last = 1;
1085 r = r600_bc_add_alu(ctx->bc, &alu);
1086 if (r)
1087 return r;
1088
1089 /* replicate result */
1090 for (i = 0; i < lasti + 1; i++) {
1091 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1092 continue;
1093
1094 memset(&alu, 0, sizeof(struct r600_bc_alu));
1095 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1096
1097 alu.src[0].sel = ctx->temp_reg;
1098 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1099 if (i == lasti)
1100 alu.last = 1;
1101 r = r600_bc_add_alu(ctx->bc, &alu);
1102 if (r)
1103 return r;
1104 }
1105 return 0;
1106 }
1107
1108 static int tgsi_scs(struct r600_shader_ctx *ctx)
1109 {
1110 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1111 struct r600_bc_alu_src r600_src[3];
1112 struct r600_bc_alu alu;
1113 int r;
1114
1115 /* We'll only need the trig stuff if we are going to write to the
1116 * X or Y components of the destination vector.
1117 */
1118 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1119 r = tgsi_setup_trig(ctx, r600_src);
1120 if (r)
1121 return r;
1122 }
1123
1124 /* dst.x = COS */
1125 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1126 memset(&alu, 0, sizeof(struct r600_bc_alu));
1127 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1128 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1129
1130 alu.src[0].sel = ctx->temp_reg;
1131 alu.src[0].chan = 0;
1132 alu.last = 1;
1133 r = r600_bc_add_alu(ctx->bc, &alu);
1134 if (r)
1135 return r;
1136 }
1137
1138 /* dst.y = SIN */
1139 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1140 memset(&alu, 0, sizeof(struct r600_bc_alu));
1141 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1142 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1143
1144 alu.src[0].sel = ctx->temp_reg;
1145 alu.src[0].chan = 0;
1146 alu.last = 1;
1147 r = r600_bc_add_alu(ctx->bc, &alu);
1148 if (r)
1149 return r;
1150 }
1151
1152 /* dst.z = 0.0; */
1153 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1154 memset(&alu, 0, sizeof(struct r600_bc_alu));
1155
1156 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1157
1158 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1159
1160 alu.src[0].sel = V_SQ_ALU_SRC_0;
1161 alu.src[0].chan = 0;
1162
1163 alu.last = 1;
1164
1165 r = r600_bc_add_alu(ctx->bc, &alu);
1166 if (r)
1167 return r;
1168 }
1169
1170 /* dst.w = 1.0; */
1171 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1172 memset(&alu, 0, sizeof(struct r600_bc_alu));
1173
1174 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1175
1176 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1177
1178 alu.src[0].sel = V_SQ_ALU_SRC_1;
1179 alu.src[0].chan = 0;
1180
1181 alu.last = 1;
1182
1183 r = r600_bc_add_alu(ctx->bc, &alu);
1184 if (r)
1185 return r;
1186 }
1187
1188 return 0;
1189 }
1190
1191 static int tgsi_kill(struct r600_shader_ctx *ctx)
1192 {
1193 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1194 struct r600_bc_alu alu;
1195 int i, r;
1196
1197 for (i = 0; i < 4; i++) {
1198 memset(&alu, 0, sizeof(struct r600_bc_alu));
1199 alu.inst = ctx->inst_info->r600_opcode;
1200
1201 alu.dst.chan = i;
1202
1203 alu.src[0].sel = V_SQ_ALU_SRC_0;
1204
1205 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1206 alu.src[1].sel = V_SQ_ALU_SRC_1;
1207 alu.src[1].neg = 1;
1208 } else {
1209 tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1210 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1211 }
1212 if (i == 3) {
1213 alu.last = 1;
1214 }
1215 r = r600_bc_add_alu(ctx->bc, &alu);
1216 if (r)
1217 return r;
1218 }
1219
1220 /* kill must be last in ALU */
1221 ctx->bc->force_add_cf = 1;
1222 ctx->shader->uses_kill = TRUE;
1223 return 0;
1224 }
1225
1226 static int tgsi_lit(struct r600_shader_ctx *ctx)
1227 {
1228 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1229 struct r600_bc_alu alu;
1230 struct r600_bc_alu_src r600_src[3];
1231 int r;
1232
1233 r = tgsi_split_constant(ctx, r600_src);
1234 if (r)
1235 return r;
1236 r = tgsi_split_literal_constant(ctx, r600_src);
1237 if (r)
1238 return r;
1239
1240 /* dst.x, <- 1.0 */
1241 memset(&alu, 0, sizeof(struct r600_bc_alu));
1242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1243 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1244 alu.src[0].chan = 0;
1245 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1246 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1247 r = r600_bc_add_alu(ctx->bc, &alu);
1248 if (r)
1249 return r;
1250
1251 /* dst.y = max(src.x, 0.0) */
1252 memset(&alu, 0, sizeof(struct r600_bc_alu));
1253 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1254 alu.src[0] = r600_src[0];
1255 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1256 alu.src[1].chan = 0;
1257 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1258 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1259 r = r600_bc_add_alu(ctx->bc, &alu);
1260 if (r)
1261 return r;
1262
1263 /* dst.w, <- 1.0 */
1264 memset(&alu, 0, sizeof(struct r600_bc_alu));
1265 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1266 alu.src[0].sel = V_SQ_ALU_SRC_1;
1267 alu.src[0].chan = 0;
1268 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1269 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1270 alu.last = 1;
1271 r = r600_bc_add_alu(ctx->bc, &alu);
1272 if (r)
1273 return r;
1274
1275 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1276 {
1277 int chan;
1278 int sel;
1279
1280 /* dst.z = log(src.y) */
1281 memset(&alu, 0, sizeof(struct r600_bc_alu));
1282 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1283 alu.src[0] = r600_src[0];
1284 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1285 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1286 alu.last = 1;
1287 r = r600_bc_add_alu(ctx->bc, &alu);
1288 if (r)
1289 return r;
1290
1291 chan = alu.dst.chan;
1292 sel = alu.dst.sel;
1293
1294 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1295 memset(&alu, 0, sizeof(struct r600_bc_alu));
1296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1297 alu.src[0] = r600_src[0];
1298 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1299 alu.src[1].sel = sel;
1300 alu.src[1].chan = chan;
1301
1302 alu.src[2] = r600_src[0];
1303 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1304 alu.dst.sel = ctx->temp_reg;
1305 alu.dst.chan = 0;
1306 alu.dst.write = 1;
1307 alu.is_op3 = 1;
1308 alu.last = 1;
1309 r = r600_bc_add_alu(ctx->bc, &alu);
1310 if (r)
1311 return r;
1312
1313 /* dst.z = exp(tmp.x) */
1314 memset(&alu, 0, sizeof(struct r600_bc_alu));
1315 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1316 alu.src[0].sel = ctx->temp_reg;
1317 alu.src[0].chan = 0;
1318 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1319 alu.last = 1;
1320 r = r600_bc_add_alu(ctx->bc, &alu);
1321 if (r)
1322 return r;
1323 }
1324 return 0;
1325 }
1326
1327 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1328 {
1329 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1330 struct r600_bc_alu alu;
1331 int i, r;
1332
1333 memset(&alu, 0, sizeof(struct r600_bc_alu));
1334
1335 /* FIXME:
1336 * For state trackers other than OpenGL, we'll want to use
1337 * _RECIPSQRT_IEEE instead.
1338 */
1339 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1340
1341 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1342 tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1343 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1344 alu.src[i].abs = 1;
1345 }
1346 alu.dst.sel = ctx->temp_reg;
1347 alu.dst.write = 1;
1348 alu.last = 1;
1349 r = r600_bc_add_alu(ctx->bc, &alu);
1350 if (r)
1351 return r;
1352 /* replicate result */
1353 return tgsi_helper_tempx_replicate(ctx);
1354 }
1355
1356 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1357 {
1358 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1359 struct r600_bc_alu alu;
1360 int i, r;
1361
1362 for (i = 0; i < 4; i++) {
1363 memset(&alu, 0, sizeof(struct r600_bc_alu));
1364 alu.src[0].sel = ctx->temp_reg;
1365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1366 alu.dst.chan = i;
1367 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1368 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1369 if (i == 3)
1370 alu.last = 1;
1371 r = r600_bc_add_alu(ctx->bc, &alu);
1372 if (r)
1373 return r;
1374 }
1375 return 0;
1376 }
1377
1378 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1379 {
1380 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1381 struct r600_bc_alu alu;
1382 int i, r;
1383
1384 memset(&alu, 0, sizeof(struct r600_bc_alu));
1385 alu.inst = ctx->inst_info->r600_opcode;
1386 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1387 tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1388 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1389 }
1390 alu.dst.sel = ctx->temp_reg;
1391 alu.dst.write = 1;
1392 alu.last = 1;
1393 r = r600_bc_add_alu(ctx->bc, &alu);
1394 if (r)
1395 return r;
1396 /* replicate result */
1397 return tgsi_helper_tempx_replicate(ctx);
1398 }
1399
1400 static int tgsi_pow(struct r600_shader_ctx *ctx)
1401 {
1402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1403 struct r600_bc_alu alu;
1404 int r;
1405
1406 /* LOG2(a) */
1407 memset(&alu, 0, sizeof(struct r600_bc_alu));
1408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1409 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1410 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1411 alu.dst.sel = ctx->temp_reg;
1412 alu.dst.write = 1;
1413 alu.last = 1;
1414 r = r600_bc_add_alu(ctx->bc, &alu);
1415 if (r)
1416 return r;
1417 /* b * LOG2(a) */
1418 memset(&alu, 0, sizeof(struct r600_bc_alu));
1419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1420 tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1421 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1422 alu.src[1].sel = ctx->temp_reg;
1423 alu.dst.sel = ctx->temp_reg;
1424 alu.dst.write = 1;
1425 alu.last = 1;
1426 r = r600_bc_add_alu(ctx->bc, &alu);
1427 if (r)
1428 return r;
1429 /* POW(a,b) = EXP2(b * LOG2(a))*/
1430 memset(&alu, 0, sizeof(struct r600_bc_alu));
1431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1432 alu.src[0].sel = ctx->temp_reg;
1433 alu.dst.sel = ctx->temp_reg;
1434 alu.dst.write = 1;
1435 alu.last = 1;
1436 r = r600_bc_add_alu(ctx->bc, &alu);
1437 if (r)
1438 return r;
1439 return tgsi_helper_tempx_replicate(ctx);
1440 }
1441
1442 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1443 {
1444 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1445 struct r600_bc_alu alu;
1446 struct r600_bc_alu_src r600_src[3];
1447 int i, r;
1448
1449 r = tgsi_split_constant(ctx, r600_src);
1450 if (r)
1451 return r;
1452 r = tgsi_split_literal_constant(ctx, r600_src);
1453 if (r)
1454 return r;
1455
1456 /* tmp = (src > 0 ? 1 : src) */
1457 for (i = 0; i < 4; i++) {
1458 memset(&alu, 0, sizeof(struct r600_bc_alu));
1459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1460 alu.is_op3 = 1;
1461
1462 alu.dst.sel = ctx->temp_reg;
1463 alu.dst.chan = i;
1464
1465 alu.src[0] = r600_src[0];
1466 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1467
1468 alu.src[1].sel = V_SQ_ALU_SRC_1;
1469
1470 alu.src[2] = r600_src[0];
1471 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1472 if (i == 3)
1473 alu.last = 1;
1474 r = r600_bc_add_alu(ctx->bc, &alu);
1475 if (r)
1476 return r;
1477 }
1478
1479 /* dst = (-tmp > 0 ? -1 : tmp) */
1480 for (i = 0; i < 4; i++) {
1481 memset(&alu, 0, sizeof(struct r600_bc_alu));
1482 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1483 alu.is_op3 = 1;
1484 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1485
1486 alu.src[0].sel = ctx->temp_reg;
1487 alu.src[0].chan = i;
1488 alu.src[0].neg = 1;
1489
1490 alu.src[1].sel = V_SQ_ALU_SRC_1;
1491 alu.src[1].neg = 1;
1492
1493 alu.src[2].sel = ctx->temp_reg;
1494 alu.src[2].chan = i;
1495
1496 if (i == 3)
1497 alu.last = 1;
1498 r = r600_bc_add_alu(ctx->bc, &alu);
1499 if (r)
1500 return r;
1501 }
1502 return 0;
1503 }
1504
1505 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1506 {
1507 struct r600_bc_alu alu;
1508 int i, r;
1509
1510 for (i = 0; i < 4; i++) {
1511 memset(&alu, 0, sizeof(struct r600_bc_alu));
1512 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1514 alu.dst.chan = i;
1515 } else {
1516 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1517 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1518 alu.src[0].sel = ctx->temp_reg;
1519 alu.src[0].chan = i;
1520 }
1521 if (i == 3) {
1522 alu.last = 1;
1523 }
1524 r = r600_bc_add_alu(ctx->bc, &alu);
1525 if (r)
1526 return r;
1527 }
1528 return 0;
1529 }
1530
1531 static int tgsi_op3(struct r600_shader_ctx *ctx)
1532 {
1533 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1534 struct r600_bc_alu_src r600_src[3];
1535 struct r600_bc_alu alu;
1536 int i, j, r;
1537 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1538
1539 r = tgsi_split_constant(ctx, r600_src);
1540 if (r)
1541 return r;
1542 r = tgsi_split_literal_constant(ctx, r600_src);
1543 if (r)
1544 return r;
1545 for (i = 0; i < lasti + 1; i++) {
1546 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1547 continue;
1548
1549 memset(&alu, 0, sizeof(struct r600_bc_alu));
1550 alu.inst = ctx->inst_info->r600_opcode;
1551 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1552 alu.src[j] = r600_src[j];
1553 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1554 }
1555
1556 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1557 alu.dst.chan = i;
1558 alu.dst.write = 1;
1559 alu.is_op3 = 1;
1560 if (i == lasti) {
1561 alu.last = 1;
1562 }
1563 r = r600_bc_add_alu(ctx->bc, &alu);
1564 if (r)
1565 return r;
1566 }
1567 return 0;
1568 }
1569
1570 static int tgsi_dp(struct r600_shader_ctx *ctx)
1571 {
1572 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1573 struct r600_bc_alu_src r600_src[3];
1574 struct r600_bc_alu alu;
1575 int i, j, r;
1576
1577 r = tgsi_split_constant(ctx, r600_src);
1578 if (r)
1579 return r;
1580 r = tgsi_split_literal_constant(ctx, r600_src);
1581 if (r)
1582 return r;
1583 for (i = 0; i < 4; i++) {
1584 memset(&alu, 0, sizeof(struct r600_bc_alu));
1585 alu.inst = ctx->inst_info->r600_opcode;
1586 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1587 alu.src[j] = r600_src[j];
1588 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1589 }
1590
1591 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1592 alu.dst.chan = i;
1593 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1594 /* handle some special cases */
1595 switch (ctx->inst_info->tgsi_opcode) {
1596 case TGSI_OPCODE_DP2:
1597 if (i > 1) {
1598 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1599 alu.src[0].chan = alu.src[1].chan = 0;
1600 }
1601 break;
1602 case TGSI_OPCODE_DP3:
1603 if (i > 2) {
1604 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1605 alu.src[0].chan = alu.src[1].chan = 0;
1606 }
1607 break;
1608 case TGSI_OPCODE_DPH:
1609 if (i == 3) {
1610 alu.src[0].sel = V_SQ_ALU_SRC_1;
1611 alu.src[0].chan = 0;
1612 alu.src[0].neg = 0;
1613 }
1614 break;
1615 default:
1616 break;
1617 }
1618 if (i == 3) {
1619 alu.last = 1;
1620 }
1621 r = r600_bc_add_alu(ctx->bc, &alu);
1622 if (r)
1623 return r;
1624 }
1625 return 0;
1626 }
1627
1628 static int tgsi_tex(struct r600_shader_ctx *ctx)
1629 {
1630 static float one_point_five = 1.5f;
1631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1632 struct r600_bc_tex tex;
1633 struct r600_bc_alu alu;
1634 unsigned src_gpr;
1635 int r, i;
1636 int opcode;
1637 boolean src_not_temp =
1638 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1639 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1640
1641 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1642
1643 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1644 /* Add perspective divide */
1645 memset(&alu, 0, sizeof(struct r600_bc_alu));
1646 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1647 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1648
1649 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1650 alu.dst.sel = ctx->temp_reg;
1651 alu.dst.chan = 3;
1652 alu.last = 1;
1653 alu.dst.write = 1;
1654 r = r600_bc_add_alu(ctx->bc, &alu);
1655 if (r)
1656 return r;
1657
1658 for (i = 0; i < 3; i++) {
1659 memset(&alu, 0, sizeof(struct r600_bc_alu));
1660 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1661 alu.src[0].sel = ctx->temp_reg;
1662 alu.src[0].chan = 3;
1663 tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1664 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1665 alu.dst.sel = ctx->temp_reg;
1666 alu.dst.chan = i;
1667 alu.dst.write = 1;
1668 r = r600_bc_add_alu(ctx->bc, &alu);
1669 if (r)
1670 return r;
1671 }
1672 memset(&alu, 0, sizeof(struct r600_bc_alu));
1673 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1674 alu.src[0].sel = V_SQ_ALU_SRC_1;
1675 alu.src[0].chan = 0;
1676 alu.dst.sel = ctx->temp_reg;
1677 alu.dst.chan = 3;
1678 alu.last = 1;
1679 alu.dst.write = 1;
1680 r = r600_bc_add_alu(ctx->bc, &alu);
1681 if (r)
1682 return r;
1683 src_not_temp = FALSE;
1684 src_gpr = ctx->temp_reg;
1685 }
1686
1687 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1688 int src_chan, src2_chan;
1689
1690 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1691 for (i = 0; i < 4; i++) {
1692 memset(&alu, 0, sizeof(struct r600_bc_alu));
1693 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1694 switch (i) {
1695 case 0:
1696 src_chan = 2;
1697 src2_chan = 1;
1698 break;
1699 case 1:
1700 src_chan = 2;
1701 src2_chan = 0;
1702 break;
1703 case 2:
1704 src_chan = 0;
1705 src2_chan = 2;
1706 break;
1707 case 3:
1708 src_chan = 1;
1709 src2_chan = 2;
1710 break;
1711 default:
1712 assert(0);
1713 src_chan = 0;
1714 src2_chan = 0;
1715 break;
1716 }
1717 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1718 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1719 tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1720 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1721 alu.dst.sel = ctx->temp_reg;
1722 alu.dst.chan = i;
1723 if (i == 3)
1724 alu.last = 1;
1725 alu.dst.write = 1;
1726 r = r600_bc_add_alu(ctx->bc, &alu);
1727 if (r)
1728 return r;
1729 }
1730
1731 /* tmp1.z = RCP_e(|tmp1.z|) */
1732 memset(&alu, 0, sizeof(struct r600_bc_alu));
1733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1734 alu.src[0].sel = ctx->temp_reg;
1735 alu.src[0].chan = 2;
1736 alu.src[0].abs = 1;
1737 alu.dst.sel = ctx->temp_reg;
1738 alu.dst.chan = 2;
1739 alu.dst.write = 1;
1740 alu.last = 1;
1741 r = r600_bc_add_alu(ctx->bc, &alu);
1742 if (r)
1743 return r;
1744
1745 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1746 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1747 * muladd has no writemask, have to use another temp
1748 */
1749 memset(&alu, 0, sizeof(struct r600_bc_alu));
1750 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1751 alu.is_op3 = 1;
1752
1753 alu.src[0].sel = ctx->temp_reg;
1754 alu.src[0].chan = 0;
1755 alu.src[1].sel = ctx->temp_reg;
1756 alu.src[1].chan = 2;
1757
1758 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1759 alu.src[2].chan = 0;
1760 alu.src[2].value = (u32*)&one_point_five;
1761
1762 alu.dst.sel = ctx->temp_reg;
1763 alu.dst.chan = 0;
1764 alu.dst.write = 1;
1765
1766 r = r600_bc_add_alu(ctx->bc, &alu);
1767 if (r)
1768 return r;
1769
1770 memset(&alu, 0, sizeof(struct r600_bc_alu));
1771 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1772 alu.is_op3 = 1;
1773
1774 alu.src[0].sel = ctx->temp_reg;
1775 alu.src[0].chan = 1;
1776 alu.src[1].sel = ctx->temp_reg;
1777 alu.src[1].chan = 2;
1778
1779 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1780 alu.src[2].chan = 0;
1781 alu.src[2].value = (u32*)&one_point_five;
1782
1783 alu.dst.sel = ctx->temp_reg;
1784 alu.dst.chan = 1;
1785 alu.dst.write = 1;
1786
1787 alu.last = 1;
1788 r = r600_bc_add_alu(ctx->bc, &alu);
1789 if (r)
1790 return r;
1791
1792 src_not_temp = FALSE;
1793 src_gpr = ctx->temp_reg;
1794 }
1795
1796 if (src_not_temp) {
1797 for (i = 0; i < 4; i++) {
1798 memset(&alu, 0, sizeof(struct r600_bc_alu));
1799 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1800 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1801 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1802 alu.dst.sel = ctx->temp_reg;
1803 alu.dst.chan = i;
1804 if (i == 3)
1805 alu.last = 1;
1806 alu.dst.write = 1;
1807 r = r600_bc_add_alu(ctx->bc, &alu);
1808 if (r)
1809 return r;
1810 }
1811 src_gpr = ctx->temp_reg;
1812 }
1813
1814 opcode = ctx->inst_info->r600_opcode;
1815 if (opcode == SQ_TEX_INST_SAMPLE &&
1816 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1817 opcode = SQ_TEX_INST_SAMPLE_C;
1818
1819 memset(&tex, 0, sizeof(struct r600_bc_tex));
1820 tex.inst = opcode;
1821 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1822 tex.resource_id = tex.sampler_id;
1823 tex.src_gpr = src_gpr;
1824 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1825 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1826 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1827 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1828 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1829 tex.src_sel_x = 0;
1830 tex.src_sel_y = 1;
1831 tex.src_sel_z = 2;
1832 tex.src_sel_w = 3;
1833
1834 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1835 tex.src_sel_x = 1;
1836 tex.src_sel_y = 0;
1837 tex.src_sel_z = 3;
1838 tex.src_sel_w = 1;
1839 }
1840
1841 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1842 tex.coord_type_x = 1;
1843 tex.coord_type_y = 1;
1844 tex.coord_type_z = 1;
1845 tex.coord_type_w = 1;
1846 }
1847
1848 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1849 tex.src_sel_w = 2;
1850
1851 r = r600_bc_add_tex(ctx->bc, &tex);
1852 if (r)
1853 return r;
1854
1855 /* add shadow ambient support - gallium doesn't do it yet */
1856 return 0;
1857 }
1858
1859 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1860 {
1861 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1862 struct r600_bc_alu_src r600_src[3];
1863 struct r600_bc_alu alu;
1864 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1865 unsigned i;
1866 int r;
1867
1868 r = tgsi_split_constant(ctx, r600_src);
1869 if (r)
1870 return r;
1871 r = tgsi_split_literal_constant(ctx, r600_src);
1872 if (r)
1873 return r;
1874
1875 /* optimize if it's just an equal balance */
1876 if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
1877 for (i = 0; i < lasti + 1; i++) {
1878 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1879 continue;
1880
1881 memset(&alu, 0, sizeof(struct r600_bc_alu));
1882 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1883 alu.src[0] = r600_src[1];
1884 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1885 alu.src[1] = r600_src[2];
1886 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1887 alu.omod = 3;
1888 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1889 alu.dst.chan = i;
1890 if (i == lasti) {
1891 alu.last = 1;
1892 }
1893 r = r600_bc_add_alu(ctx->bc, &alu);
1894 if (r)
1895 return r;
1896 }
1897 return 0;
1898 }
1899
1900 /* 1 - src0 */
1901 for (i = 0; i < lasti + 1; i++) {
1902 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1903 continue;
1904
1905 memset(&alu, 0, sizeof(struct r600_bc_alu));
1906 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1907 alu.src[0].sel = V_SQ_ALU_SRC_1;
1908 alu.src[0].chan = 0;
1909 alu.src[1] = r600_src[0];
1910 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1911 alu.src[1].neg = 1;
1912 alu.dst.sel = ctx->temp_reg;
1913 alu.dst.chan = i;
1914 if (i == lasti) {
1915 alu.last = 1;
1916 }
1917 alu.dst.write = 1;
1918 r = r600_bc_add_alu(ctx->bc, &alu);
1919 if (r)
1920 return r;
1921 }
1922
1923 /* (1 - src0) * src2 */
1924 for (i = 0; i < lasti + 1; i++) {
1925 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1926 continue;
1927
1928 memset(&alu, 0, sizeof(struct r600_bc_alu));
1929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1930 alu.src[0].sel = ctx->temp_reg;
1931 alu.src[0].chan = i;
1932 alu.src[1] = r600_src[2];
1933 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1934 alu.dst.sel = ctx->temp_reg;
1935 alu.dst.chan = i;
1936 if (i == lasti) {
1937 alu.last = 1;
1938 }
1939 alu.dst.write = 1;
1940 r = r600_bc_add_alu(ctx->bc, &alu);
1941 if (r)
1942 return r;
1943 }
1944
1945 /* src0 * src1 + (1 - src0) * src2 */
1946 for (i = 0; i < lasti + 1; i++) {
1947 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1948 continue;
1949
1950 memset(&alu, 0, sizeof(struct r600_bc_alu));
1951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1952 alu.is_op3 = 1;
1953 alu.src[0] = r600_src[0];
1954 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1955 alu.src[1] = r600_src[1];
1956 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1957 alu.src[2].sel = ctx->temp_reg;
1958 alu.src[2].chan = i;
1959
1960 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1961 alu.dst.chan = i;
1962 if (i == lasti) {
1963 alu.last = 1;
1964 }
1965 r = r600_bc_add_alu(ctx->bc, &alu);
1966 if (r)
1967 return r;
1968 }
1969 return 0;
1970 }
1971
1972 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1973 {
1974 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1975 struct r600_bc_alu_src r600_src[3];
1976 struct r600_bc_alu alu;
1977 int i, r;
1978 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1979
1980 r = tgsi_split_constant(ctx, r600_src);
1981 if (r)
1982 return r;
1983 r = tgsi_split_literal_constant(ctx, r600_src);
1984 if (r)
1985 return r;
1986
1987 for (i = 0; i < lasti + 1; i++) {
1988 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1989 continue;
1990
1991 memset(&alu, 0, sizeof(struct r600_bc_alu));
1992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1993 alu.src[0] = r600_src[0];
1994 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1995
1996 alu.src[1] = r600_src[2];
1997 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1998
1999 alu.src[2] = r600_src[1];
2000 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2001
2002 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2003 alu.dst.chan = i;
2004 alu.dst.write = 1;
2005 alu.is_op3 = 1;
2006 if (i == lasti)
2007 alu.last = 1;
2008 r = r600_bc_add_alu(ctx->bc, &alu);
2009 if (r)
2010 return r;
2011 }
2012 return 0;
2013 }
2014
2015 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2016 {
2017 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2018 struct r600_bc_alu_src r600_src[3];
2019 struct r600_bc_alu alu;
2020 uint32_t use_temp = 0;
2021 int i, r;
2022
2023 if (inst->Dst[0].Register.WriteMask != 0xf)
2024 use_temp = 1;
2025
2026 r = tgsi_split_constant(ctx, r600_src);
2027 if (r)
2028 return r;
2029 r = tgsi_split_literal_constant(ctx, r600_src);
2030 if (r)
2031 return r;
2032
2033 for (i = 0; i < 4; i++) {
2034 memset(&alu, 0, sizeof(struct r600_bc_alu));
2035 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2036
2037 alu.src[0] = r600_src[0];
2038 switch (i) {
2039 case 0:
2040 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2041 break;
2042 case 1:
2043 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2044 break;
2045 case 2:
2046 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2047 break;
2048 case 3:
2049 alu.src[0].sel = V_SQ_ALU_SRC_0;
2050 alu.src[0].chan = i;
2051 }
2052
2053 alu.src[1] = r600_src[1];
2054 switch (i) {
2055 case 0:
2056 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2057 break;
2058 case 1:
2059 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2060 break;
2061 case 2:
2062 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2063 break;
2064 case 3:
2065 alu.src[1].sel = V_SQ_ALU_SRC_0;
2066 alu.src[1].chan = i;
2067 }
2068
2069 alu.dst.sel = ctx->temp_reg;
2070 alu.dst.chan = i;
2071 alu.dst.write = 1;
2072
2073 if (i == 3)
2074 alu.last = 1;
2075 r = r600_bc_add_alu(ctx->bc, &alu);
2076 if (r)
2077 return r;
2078 }
2079
2080 for (i = 0; i < 4; i++) {
2081 memset(&alu, 0, sizeof(struct r600_bc_alu));
2082 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2083
2084 alu.src[0] = r600_src[0];
2085 switch (i) {
2086 case 0:
2087 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2088 break;
2089 case 1:
2090 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2091 break;
2092 case 2:
2093 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2094 break;
2095 case 3:
2096 alu.src[0].sel = V_SQ_ALU_SRC_0;
2097 alu.src[0].chan = i;
2098 }
2099
2100 alu.src[1] = r600_src[1];
2101 switch (i) {
2102 case 0:
2103 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2104 break;
2105 case 1:
2106 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2107 break;
2108 case 2:
2109 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2110 break;
2111 case 3:
2112 alu.src[1].sel = V_SQ_ALU_SRC_0;
2113 alu.src[1].chan = i;
2114 }
2115
2116 alu.src[2].sel = ctx->temp_reg;
2117 alu.src[2].neg = 1;
2118 alu.src[2].chan = i;
2119
2120 if (use_temp)
2121 alu.dst.sel = ctx->temp_reg;
2122 else
2123 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2124 alu.dst.chan = i;
2125 alu.dst.write = 1;
2126 alu.is_op3 = 1;
2127 if (i == 3)
2128 alu.last = 1;
2129 r = r600_bc_add_alu(ctx->bc, &alu);
2130 if (r)
2131 return r;
2132 }
2133 if (use_temp)
2134 return tgsi_helper_copy(ctx, inst);
2135 return 0;
2136 }
2137
2138 static int tgsi_exp(struct r600_shader_ctx *ctx)
2139 {
2140 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2141 struct r600_bc_alu_src r600_src[3] = { { 0 } };
2142 struct r600_bc_alu alu;
2143 int r;
2144
2145 /* result.x = 2^floor(src); */
2146 if (inst->Dst[0].Register.WriteMask & 1) {
2147 memset(&alu, 0, sizeof(struct r600_bc_alu));
2148
2149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2150 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2151
2152 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2153
2154 alu.dst.sel = ctx->temp_reg;
2155 alu.dst.chan = 0;
2156 alu.dst.write = 1;
2157 alu.last = 1;
2158 r = r600_bc_add_alu(ctx->bc, &alu);
2159 if (r)
2160 return r;
2161
2162 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2163 alu.src[0].sel = ctx->temp_reg;
2164 alu.src[0].chan = 0;
2165
2166 alu.dst.sel = ctx->temp_reg;
2167 alu.dst.chan = 0;
2168 alu.dst.write = 1;
2169 alu.last = 1;
2170 r = r600_bc_add_alu(ctx->bc, &alu);
2171 if (r)
2172 return r;
2173 }
2174
2175 /* result.y = tmp - floor(tmp); */
2176 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2177 memset(&alu, 0, sizeof(struct r600_bc_alu));
2178
2179 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2180 alu.src[0] = r600_src[0];
2181 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2182 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2183
2184 alu.dst.sel = ctx->temp_reg;
2185 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2186 // if (r)
2187 // return r;
2188 alu.dst.write = 1;
2189 alu.dst.chan = 1;
2190
2191 alu.last = 1;
2192
2193 r = r600_bc_add_alu(ctx->bc, &alu);
2194 if (r)
2195 return r;
2196 }
2197
2198 /* result.z = RoughApprox2ToX(tmp);*/
2199 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2200 memset(&alu, 0, sizeof(struct r600_bc_alu));
2201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2202 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2203 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2204
2205 alu.dst.sel = ctx->temp_reg;
2206 alu.dst.write = 1;
2207 alu.dst.chan = 2;
2208
2209 alu.last = 1;
2210
2211 r = r600_bc_add_alu(ctx->bc, &alu);
2212 if (r)
2213 return r;
2214 }
2215
2216 /* result.w = 1.0;*/
2217 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2218 memset(&alu, 0, sizeof(struct r600_bc_alu));
2219
2220 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2221 alu.src[0].sel = V_SQ_ALU_SRC_1;
2222 alu.src[0].chan = 0;
2223
2224 alu.dst.sel = ctx->temp_reg;
2225 alu.dst.chan = 3;
2226 alu.dst.write = 1;
2227 alu.last = 1;
2228 r = r600_bc_add_alu(ctx->bc, &alu);
2229 if (r)
2230 return r;
2231 }
2232 return tgsi_helper_copy(ctx, inst);
2233 }
2234
2235 static int tgsi_log(struct r600_shader_ctx *ctx)
2236 {
2237 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2238 struct r600_bc_alu alu;
2239 int r;
2240
2241 /* result.x = floor(log2(src)); */
2242 if (inst->Dst[0].Register.WriteMask & 1) {
2243 memset(&alu, 0, sizeof(struct r600_bc_alu));
2244
2245 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2246 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2247
2248 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2249
2250 alu.dst.sel = ctx->temp_reg;
2251 alu.dst.chan = 0;
2252 alu.dst.write = 1;
2253 alu.last = 1;
2254 r = r600_bc_add_alu(ctx->bc, &alu);
2255 if (r)
2256 return r;
2257
2258 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2259 alu.src[0].sel = ctx->temp_reg;
2260 alu.src[0].chan = 0;
2261
2262 alu.dst.sel = ctx->temp_reg;
2263 alu.dst.chan = 0;
2264 alu.dst.write = 1;
2265 alu.last = 1;
2266
2267 r = r600_bc_add_alu(ctx->bc, &alu);
2268 if (r)
2269 return r;
2270 }
2271
2272 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2273 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2274 memset(&alu, 0, sizeof(struct r600_bc_alu));
2275
2276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2277 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2278
2279 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2280
2281 alu.dst.sel = ctx->temp_reg;
2282 alu.dst.chan = 1;
2283 alu.dst.write = 1;
2284 alu.last = 1;
2285
2286 r = r600_bc_add_alu(ctx->bc, &alu);
2287 if (r)
2288 return r;
2289
2290 memset(&alu, 0, sizeof(struct r600_bc_alu));
2291
2292 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2293 alu.src[0].sel = ctx->temp_reg;
2294 alu.src[0].chan = 1;
2295
2296 alu.dst.sel = ctx->temp_reg;
2297 alu.dst.chan = 1;
2298 alu.dst.write = 1;
2299 alu.last = 1;
2300
2301 r = r600_bc_add_alu(ctx->bc, &alu);
2302 if (r)
2303 return r;
2304
2305 memset(&alu, 0, sizeof(struct r600_bc_alu));
2306
2307 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2308 alu.src[0].sel = ctx->temp_reg;
2309 alu.src[0].chan = 1;
2310
2311 alu.dst.sel = ctx->temp_reg;
2312 alu.dst.chan = 1;
2313 alu.dst.write = 1;
2314 alu.last = 1;
2315
2316 r = r600_bc_add_alu(ctx->bc, &alu);
2317 if (r)
2318 return r;
2319
2320 memset(&alu, 0, sizeof(struct r600_bc_alu));
2321
2322 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2323 alu.src[0].sel = ctx->temp_reg;
2324 alu.src[0].chan = 1;
2325
2326 alu.dst.sel = ctx->temp_reg;
2327 alu.dst.chan = 1;
2328 alu.dst.write = 1;
2329 alu.last = 1;
2330
2331 r = r600_bc_add_alu(ctx->bc, &alu);
2332 if (r)
2333 return r;
2334
2335 memset(&alu, 0, sizeof(struct r600_bc_alu));
2336
2337 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2338
2339 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2340 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2341
2342 alu.src[1].sel = ctx->temp_reg;
2343 alu.src[1].chan = 1;
2344
2345 alu.dst.sel = ctx->temp_reg;
2346 alu.dst.chan = 1;
2347 alu.dst.write = 1;
2348 alu.last = 1;
2349
2350 r = r600_bc_add_alu(ctx->bc, &alu);
2351 if (r)
2352 return r;
2353 }
2354
2355 /* result.z = log2(src);*/
2356 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2357 memset(&alu, 0, sizeof(struct r600_bc_alu));
2358
2359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2360 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2361 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2362
2363 alu.dst.sel = ctx->temp_reg;
2364 alu.dst.write = 1;
2365 alu.dst.chan = 2;
2366 alu.last = 1;
2367
2368 r = r600_bc_add_alu(ctx->bc, &alu);
2369 if (r)
2370 return r;
2371 }
2372
2373 /* result.w = 1.0; */
2374 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2375 memset(&alu, 0, sizeof(struct r600_bc_alu));
2376
2377 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2378 alu.src[0].sel = V_SQ_ALU_SRC_1;
2379 alu.src[0].chan = 0;
2380
2381 alu.dst.sel = ctx->temp_reg;
2382 alu.dst.chan = 3;
2383 alu.dst.write = 1;
2384 alu.last = 1;
2385
2386 r = r600_bc_add_alu(ctx->bc, &alu);
2387 if (r)
2388 return r;
2389 }
2390
2391 return tgsi_helper_copy(ctx, inst);
2392 }
2393
2394 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2395 {
2396 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2397 struct r600_bc_alu alu;
2398 int r;
2399 memset(&alu, 0, sizeof(struct r600_bc_alu));
2400
2401 switch (inst->Instruction.Opcode) {
2402 case TGSI_OPCODE_ARL:
2403 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2404 break;
2405 case TGSI_OPCODE_ARR:
2406 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2407 break;
2408 default:
2409 assert(0);
2410 return -1;
2411 }
2412
2413 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2414 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2415 alu.last = 1;
2416 alu.dst.chan = 0;
2417 alu.dst.sel = ctx->temp_reg;
2418 alu.dst.write = 1;
2419 r = r600_bc_add_alu(ctx->bc, &alu);
2420 if (r)
2421 return r;
2422 memset(&alu, 0, sizeof(struct r600_bc_alu));
2423 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2424 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2425 alu.src[0].sel = ctx->temp_reg;
2426 alu.src[0].chan = 0;
2427 alu.last = 1;
2428 r = r600_bc_add_alu(ctx->bc, &alu);
2429 if (r)
2430 return r;
2431 return 0;
2432 }
2433 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2434 {
2435 /* TODO from r600c, ar values don't persist between clauses */
2436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2437 struct r600_bc_alu alu;
2438 int r;
2439 memset(&alu, 0, sizeof(struct r600_bc_alu));
2440
2441 switch (inst->Instruction.Opcode) {
2442 case TGSI_OPCODE_ARL:
2443 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2444 break;
2445 case TGSI_OPCODE_ARR:
2446 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2447 break;
2448 default:
2449 assert(0);
2450 return -1;
2451 }
2452
2453
2454 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2455 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2456
2457 alu.last = 1;
2458
2459 r = r600_bc_add_alu(ctx->bc, &alu);
2460 if (r)
2461 return r;
2462 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2463 return 0;
2464 }
2465
2466 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2467 {
2468 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2469 struct r600_bc_alu alu;
2470 int i, r = 0;
2471
2472 for (i = 0; i < 4; i++) {
2473 memset(&alu, 0, sizeof(struct r600_bc_alu));
2474
2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2476 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2477
2478 if (i == 0 || i == 3) {
2479 alu.src[0].sel = V_SQ_ALU_SRC_1;
2480 } else {
2481 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2482 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2483 }
2484
2485 if (i == 0 || i == 2) {
2486 alu.src[1].sel = V_SQ_ALU_SRC_1;
2487 } else {
2488 tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2489 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2490 }
2491 if (i == 3)
2492 alu.last = 1;
2493 r = r600_bc_add_alu(ctx->bc, &alu);
2494 if (r)
2495 return r;
2496 }
2497 return 0;
2498 }
2499
2500 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2501 {
2502 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2503 struct r600_bc_alu alu;
2504 int r;
2505
2506 memset(&alu, 0, sizeof(struct r600_bc_alu));
2507 alu.inst = opcode;
2508 alu.predicate = 1;
2509
2510 alu.dst.sel = ctx->temp_reg;
2511 alu.dst.write = 1;
2512 alu.dst.chan = 0;
2513
2514 tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2515 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2516 alu.src[1].sel = V_SQ_ALU_SRC_0;
2517 alu.src[1].chan = 0;
2518
2519 alu.last = 1;
2520
2521 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2522 if (r)
2523 return r;
2524 return 0;
2525 }
2526
2527 static int pops(struct r600_shader_ctx *ctx, int pops)
2528 {
2529 int alu_pop = 3;
2530 if (ctx->bc->cf_last) {
2531 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2532 alu_pop = 0;
2533 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2534 alu_pop = 1;
2535 }
2536 alu_pop += pops;
2537 if (alu_pop == 1) {
2538 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2539 ctx->bc->force_add_cf = 1;
2540 } else if (alu_pop == 2) {
2541 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2542 ctx->bc->force_add_cf = 1;
2543 } else {
2544 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2545 ctx->bc->cf_last->pop_count = pops;
2546 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2547 }
2548 return 0;
2549 }
2550
2551 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2552 {
2553 switch(reason) {
2554 case FC_PUSH_VPM:
2555 ctx->bc->callstack[ctx->bc->call_sp].current--;
2556 break;
2557 case FC_PUSH_WQM:
2558 case FC_LOOP:
2559 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2560 break;
2561 case FC_REP:
2562 /* TOODO : for 16 vp asic should -= 2; */
2563 ctx->bc->callstack[ctx->bc->call_sp].current --;
2564 break;
2565 }
2566 }
2567
2568 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2569 {
2570 if (check_max_only) {
2571 int diff;
2572 switch (reason) {
2573 case FC_PUSH_VPM:
2574 diff = 1;
2575 break;
2576 case FC_PUSH_WQM:
2577 diff = 4;
2578 break;
2579 default:
2580 assert(0);
2581 diff = 0;
2582 }
2583 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2584 ctx->bc->callstack[ctx->bc->call_sp].max) {
2585 ctx->bc->callstack[ctx->bc->call_sp].max =
2586 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2587 }
2588 return;
2589 }
2590 switch (reason) {
2591 case FC_PUSH_VPM:
2592 ctx->bc->callstack[ctx->bc->call_sp].current++;
2593 break;
2594 case FC_PUSH_WQM:
2595 case FC_LOOP:
2596 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2597 break;
2598 case FC_REP:
2599 ctx->bc->callstack[ctx->bc->call_sp].current++;
2600 break;
2601 }
2602
2603 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2604 ctx->bc->callstack[ctx->bc->call_sp].max) {
2605 ctx->bc->callstack[ctx->bc->call_sp].max =
2606 ctx->bc->callstack[ctx->bc->call_sp].current;
2607 }
2608 }
2609
2610 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2611 {
2612 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2613
2614 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2615 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2616 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2617 sp->num_mid++;
2618 }
2619
2620 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2621 {
2622 ctx->bc->fc_sp++;
2623 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2624 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2625 }
2626
2627 static void fc_poplevel(struct r600_shader_ctx *ctx)
2628 {
2629 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2630 if (sp->mid) {
2631 free(sp->mid);
2632 sp->mid = NULL;
2633 }
2634 sp->num_mid = 0;
2635 sp->start = NULL;
2636 sp->type = 0;
2637 ctx->bc->fc_sp--;
2638 }
2639
2640 #if 0
2641 static int emit_return(struct r600_shader_ctx *ctx)
2642 {
2643 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2644 return 0;
2645 }
2646
2647 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2648 {
2649
2650 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2651 ctx->bc->cf_last->pop_count = pops;
2652 /* TODO work out offset */
2653 return 0;
2654 }
2655
2656 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2657 {
2658 return 0;
2659 }
2660
2661 static void emit_testflag(struct r600_shader_ctx *ctx)
2662 {
2663
2664 }
2665
2666 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2667 {
2668 emit_testflag(ctx);
2669 emit_jump_to_offset(ctx, 1, 4);
2670 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2671 pops(ctx, ifidx + 1);
2672 emit_return(ctx);
2673 }
2674
2675 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2676 {
2677 emit_testflag(ctx);
2678
2679 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2680 ctx->bc->cf_last->pop_count = 1;
2681
2682 fc_set_mid(ctx, fc_sp);
2683
2684 pops(ctx, 1);
2685 }
2686 #endif
2687
2688 static int tgsi_if(struct r600_shader_ctx *ctx)
2689 {
2690 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2691
2692 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2693
2694 fc_pushlevel(ctx, FC_IF);
2695
2696 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2697 return 0;
2698 }
2699
2700 static int tgsi_else(struct r600_shader_ctx *ctx)
2701 {
2702 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2703 ctx->bc->cf_last->pop_count = 1;
2704
2705 fc_set_mid(ctx, ctx->bc->fc_sp);
2706 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2707 return 0;
2708 }
2709
2710 static int tgsi_endif(struct r600_shader_ctx *ctx)
2711 {
2712 pops(ctx, 1);
2713 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2714 R600_ERR("if/endif unbalanced in shader\n");
2715 return -1;
2716 }
2717
2718 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2719 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2720 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2721 } else {
2722 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2723 }
2724 fc_poplevel(ctx);
2725
2726 callstack_decrease_current(ctx, FC_PUSH_VPM);
2727 return 0;
2728 }
2729
2730 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2731 {
2732 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2733
2734 fc_pushlevel(ctx, FC_LOOP);
2735
2736 /* check stack depth */
2737 callstack_check_depth(ctx, FC_LOOP, 0);
2738 return 0;
2739 }
2740
2741 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2742 {
2743 int i;
2744
2745 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2746
2747 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2748 R600_ERR("loop/endloop in shader code are not paired.\n");
2749 return -EINVAL;
2750 }
2751
2752 /* fixup loop pointers - from r600isa
2753 LOOP END points to CF after LOOP START,
2754 LOOP START point to CF after LOOP END
2755 BRK/CONT point to LOOP END CF
2756 */
2757 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2758
2759 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2760
2761 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2762 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2763 }
2764 /* TODO add LOOPRET support */
2765 fc_poplevel(ctx);
2766 callstack_decrease_current(ctx, FC_LOOP);
2767 return 0;
2768 }
2769
2770 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2771 {
2772 unsigned int fscp;
2773
2774 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2775 {
2776 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2777 break;
2778 }
2779
2780 if (fscp == 0) {
2781 R600_ERR("Break not inside loop/endloop pair\n");
2782 return -EINVAL;
2783 }
2784
2785 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2786 ctx->bc->cf_last->pop_count = 1;
2787
2788 fc_set_mid(ctx, fscp);
2789
2790 pops(ctx, 1);
2791 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2792 return 0;
2793 }
2794
2795 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2796 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2797 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2798 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2799
2800 /* FIXME:
2801 * For state trackers other than OpenGL, we'll want to use
2802 * _RECIP_IEEE instead.
2803 */
2804 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2805
2806 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2807 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2808 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2809 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2810 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2811 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2812 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2813 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2814 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2815 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2816 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2817 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2818 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2819 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2820 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2821 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822 /* gap */
2823 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825 /* gap */
2826 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2828 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2829 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2830 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2831 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2833 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2834 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2835 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2836 /* gap */
2837 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2839 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2841 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2842 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2843 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2844 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2845 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2851 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2853 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2854 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2855 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2856 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2858 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2860 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2867 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2871 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2872 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2873 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2874 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2877 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2878 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2879 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2880 /* gap */
2881 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2884 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2885 /* gap */
2886 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2894 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895 /* gap */
2896 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2905 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2908 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2910 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911 /* gap */
2912 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 /* gap */
2918 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2927 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2928 /* gap */
2929 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957 };
2958
2959 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2960 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2961 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2962 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2963 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2964 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2965 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2966 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2968 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2969 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2970 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2971 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2972 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2973 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2974 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2975 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2976 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2977 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2978 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2979 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980 /* gap */
2981 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983 /* gap */
2984 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2985 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2986 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2987 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2988 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2989 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2991 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2992 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2993 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2994 /* gap */
2995 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2997 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2999 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3000 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3001 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3002 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3003 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3009 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3011 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3012 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3013 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3014 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3015 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3016 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3018 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3022 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3024 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3025 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3028 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3029 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3030 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3031 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3032 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3035 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3036 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3037 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3038 /* gap */
3039 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3042 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3043 /* gap */
3044 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3052 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053 /* gap */
3054 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3063 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3066 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3068 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069 /* gap */
3070 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075 /* gap */
3076 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3085 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3086 /* gap */
3087 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115 };