Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_opcodes.h"
32 #include "r600d.h"
33 #include <stdio.h>
34 #include <errno.h>
35
36 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37 {
38 struct r600_pipe_state *rstate = &shader->rstate;
39 struct r600_shader *rshader = &shader->shader;
40 unsigned spi_vs_out_id[10];
41 unsigned i, tmp;
42
43 /* clear previous register */
44 rstate->nregs = 0;
45
46 /* so far never got proper semantic id from tgsi */
47 /* FIXME better to move this in config things so they get emited
48 * only one time per cs
49 */
50 for (i = 0; i < 10; i++) {
51 spi_vs_out_id[i] = 0;
52 }
53 for (i = 0; i < 32; i++) {
54 tmp = i << ((i & 3) * 8);
55 spi_vs_out_id[i / 4] |= tmp;
56 }
57 for (i = 0; i < 10; i++) {
58 r600_pipe_state_add_reg(rstate,
59 R_028614_SPI_VS_OUT_ID_0 + i * 4,
60 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61 }
62
63 r600_pipe_state_add_reg(rstate,
64 R_0286C4_SPI_VS_OUT_CONFIG,
65 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66 0xFFFFFFFF, NULL);
67 r600_pipe_state_add_reg(rstate,
68 R_028868_SQ_PGM_RESOURCES_VS,
69 S_028868_NUM_GPRS(rshader->bc.ngpr) |
70 S_028868_STACK_SIZE(rshader->bc.nstack),
71 0xFFFFFFFF, NULL);
72 r600_pipe_state_add_reg(rstate,
73 R_0288D0_SQ_PGM_CF_OFFSET_VS,
74 0x00000000, 0xFFFFFFFF, NULL);
75 r600_pipe_state_add_reg(rstate,
76 R_028858_SQ_PGM_START_VS,
77 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79 r600_pipe_state_add_reg(rstate,
80 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
81 0xFFFFFFFF, NULL);
82
83 }
84
85 int r600_find_vs_semantic_index(struct r600_shader *vs,
86 struct r600_shader *ps, int id)
87 {
88 struct r600_shader_io *input = &ps->input[id];
89
90 for (int i = 0; i < vs->noutput; i++) {
91 if (input->name == vs->output[i].name &&
92 input->sid == vs->output[i].sid) {
93 return i - 1;
94 }
95 }
96 return 0;
97 }
98
99 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
100 {
101 struct r600_pipe_state *rstate = &shader->rstate;
102 struct r600_shader *rshader = &shader->shader;
103 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
104 int pos_index = -1, face_index = -1;
105
106 rstate->nregs = 0;
107
108 for (i = 0; i < rshader->ninput; i++) {
109 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
110 pos_index = i;
111 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
112 face_index = i;
113 }
114
115 for (i = 0; i < rshader->noutput; i++) {
116 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
117 r600_pipe_state_add_reg(rstate,
118 R_02880C_DB_SHADER_CONTROL,
119 S_02880C_Z_EXPORT_ENABLE(1),
120 S_02880C_Z_EXPORT_ENABLE(1), NULL);
121 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
122 r600_pipe_state_add_reg(rstate,
123 R_02880C_DB_SHADER_CONTROL,
124 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
126 }
127
128 exports_ps = 0;
129 num_cout = 0;
130 for (i = 0; i < rshader->noutput; i++) {
131 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
132 exports_ps |= 1;
133 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
134 num_cout++;
135 }
136 }
137 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
138 if (!exports_ps) {
139 /* always at least export 1 component per pixel */
140 exports_ps = 2;
141 }
142
143 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
144 S_0286CC_PERSP_GRADIENT_ENA(1);
145 spi_input_z = 0;
146 if (pos_index != -1) {
147 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
148 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
149 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
150 S_0286CC_BARYC_SAMPLE_CNTL(1));
151 spi_input_z |= 1;
152 }
153
154 spi_ps_in_control_1 = 0;
155 if (face_index != -1) {
156 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
157 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
158 }
159
160 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
161 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
162 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
163 r600_pipe_state_add_reg(rstate,
164 R_028840_SQ_PGM_START_PS,
165 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
166 r600_pipe_state_add_reg(rstate,
167 R_028850_SQ_PGM_RESOURCES_PS,
168 S_028868_NUM_GPRS(rshader->bc.ngpr) |
169 S_028868_STACK_SIZE(rshader->bc.nstack),
170 0xFFFFFFFF, NULL);
171 r600_pipe_state_add_reg(rstate,
172 R_028854_SQ_PGM_EXPORTS_PS,
173 exports_ps, 0xFFFFFFFF, NULL);
174 r600_pipe_state_add_reg(rstate,
175 R_0288CC_SQ_PGM_CF_OFFSET_PS,
176 0x00000000, 0xFFFFFFFF, NULL);
177
178 if (rshader->uses_kill) {
179 /* only set some bits here, the other bits are set in the dsa state */
180 r600_pipe_state_add_reg(rstate,
181 R_02880C_DB_SHADER_CONTROL,
182 S_02880C_KILL_ENABLE(1),
183 S_02880C_KILL_ENABLE(1), NULL);
184 }
185 r600_pipe_state_add_reg(rstate,
186 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
187 0xFFFFFFFF, NULL);
188 }
189
190 int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
191 {
192 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
193 struct r600_shader *rshader = &shader->shader;
194 void *ptr;
195
196 /* copy new shader */
197 if (shader->bo == NULL) {
198 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
199 if (shader->bo == NULL) {
200 return -ENOMEM;
201 }
202 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
203 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
204 r600_bo_unmap(rctx->radeon, shader->bo);
205 }
206 /* build state */
207 switch (rshader->processor_type) {
208 case TGSI_PROCESSOR_VERTEX:
209 if (rshader->family >= CHIP_CEDAR) {
210 evergreen_pipe_shader_vs(ctx, shader);
211 } else {
212 r600_pipe_shader_vs(ctx, shader);
213 }
214 break;
215 case TGSI_PROCESSOR_FRAGMENT:
216 if (rshader->family >= CHIP_CEDAR) {
217 evergreen_pipe_shader_ps(ctx, shader);
218 } else {
219 r600_pipe_shader_ps(ctx, shader);
220 }
221 break;
222 default:
223 return -EINVAL;
224 }
225 return 0;
226 }
227
228 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
229 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
230 {
231 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
232 u32 *literals;
233 int r;
234
235 //fprintf(stderr, "--------------------------------------------------------------\n");
236 //tgsi_dump(tokens, 0);
237 shader->shader.family = r600_get_family(rctx->radeon);
238 r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
239 if (r) {
240 R600_ERR("translation from TGSI failed !\n");
241 return r;
242 }
243 r = r600_bc_build(&shader->shader.bc);
244 free(literals);
245 if (r) {
246 R600_ERR("building bytecode failed !\n");
247 return r;
248 }
249 //r600_bc_dump(&shader->shader.bc);
250 //fprintf(stderr, "______________________________________________________________\n");
251 return r600_pipe_shader(ctx, shader);
252 }
253
254 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
255 {
256 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
257
258 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
259 r600_bc_clear(&shader->shader.bc);
260 }
261
262 /*
263 * tgsi -> r600 shader
264 */
265 struct r600_shader_tgsi_instruction;
266
267 struct r600_shader_ctx {
268 struct tgsi_shader_info info;
269 struct tgsi_parse_context parse;
270 const struct tgsi_token *tokens;
271 unsigned type;
272 unsigned file_offset[TGSI_FILE_COUNT];
273 unsigned temp_reg;
274 struct r600_shader_tgsi_instruction *inst_info;
275 struct r600_bc *bc;
276 struct r600_shader *shader;
277 u32 *literals;
278 u32 nliterals;
279 u32 max_driver_temp_used;
280 /* needed for evergreen interpolation */
281 boolean input_centroid;
282 boolean input_linear;
283 boolean input_perspective;
284 int num_interp_gpr;
285 };
286
287 struct r600_shader_tgsi_instruction {
288 unsigned tgsi_opcode;
289 unsigned is_op3;
290 unsigned r600_opcode;
291 int (*process)(struct r600_shader_ctx *ctx);
292 };
293
294 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
295 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
296
297 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
298 {
299 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
300 int j;
301
302 if (i->Instruction.NumDstRegs > 1) {
303 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
304 return -EINVAL;
305 }
306 if (i->Instruction.Predicate) {
307 R600_ERR("predicate unsupported\n");
308 return -EINVAL;
309 }
310 #if 0
311 if (i->Instruction.Label) {
312 R600_ERR("label unsupported\n");
313 return -EINVAL;
314 }
315 #endif
316 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
317 if (i->Src[j].Register.Dimension) {
318 R600_ERR("unsupported src %d (dimension %d)\n", j,
319 i->Src[j].Register.Dimension);
320 return -EINVAL;
321 }
322 }
323 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
324 if (i->Dst[j].Register.Dimension) {
325 R600_ERR("unsupported dst (dimension)\n");
326 return -EINVAL;
327 }
328 }
329 return 0;
330 }
331
332 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
333 {
334 int i, r;
335 struct r600_bc_alu alu;
336 int gpr = 0, base_chan = 0;
337 int ij_index = 0;
338
339 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
340 ij_index = 0;
341 if (ctx->shader->input[input].centroid)
342 ij_index++;
343 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
344 ij_index = 0;
345 /* if we have perspective add one */
346 if (ctx->input_perspective) {
347 ij_index++;
348 /* if we have perspective centroid */
349 if (ctx->input_centroid)
350 ij_index++;
351 }
352 if (ctx->shader->input[input].centroid)
353 ij_index++;
354 }
355
356 /* work out gpr and base_chan from index */
357 gpr = ij_index / 2;
358 base_chan = (2 * (ij_index % 2)) + 1;
359
360 for (i = 0; i < 8; i++) {
361 memset(&alu, 0, sizeof(struct r600_bc_alu));
362
363 if (i < 4)
364 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
365 else
366 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
367
368 if ((i > 1) && (i < 6)) {
369 alu.dst.sel = ctx->shader->input[input].gpr;
370 alu.dst.write = 1;
371 }
372
373 alu.dst.chan = i % 4;
374
375 alu.src[0].sel = gpr;
376 alu.src[0].chan = (base_chan - (i % 2));
377
378 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
379
380 alu.bank_swizzle_force = SQ_ALU_VEC_210;
381 if ((i % 4) == 3)
382 alu.last = 1;
383 r = r600_bc_add_alu(ctx->bc, &alu);
384 if (r)
385 return r;
386 }
387 return 0;
388 }
389
390
391 static int tgsi_declaration(struct r600_shader_ctx *ctx)
392 {
393 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
394 unsigned i;
395
396 switch (d->Declaration.File) {
397 case TGSI_FILE_INPUT:
398 i = ctx->shader->ninput++;
399 ctx->shader->input[i].name = d->Semantic.Name;
400 ctx->shader->input[i].sid = d->Semantic.Index;
401 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
402 ctx->shader->input[i].centroid = d->Declaration.Centroid;
403 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
404 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
405 /* turn input into interpolate on EG */
406 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
407 if (ctx->shader->input[i].interpolate > 0) {
408 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
409 evergreen_interp_alu(ctx, i);
410 }
411 }
412 }
413 break;
414 case TGSI_FILE_OUTPUT:
415 i = ctx->shader->noutput++;
416 ctx->shader->output[i].name = d->Semantic.Name;
417 ctx->shader->output[i].sid = d->Semantic.Index;
418 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
419 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
420 break;
421 case TGSI_FILE_CONSTANT:
422 case TGSI_FILE_TEMPORARY:
423 case TGSI_FILE_SAMPLER:
424 case TGSI_FILE_ADDRESS:
425 break;
426 default:
427 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
428 return -EINVAL;
429 }
430 return 0;
431 }
432
433 static int r600_get_temp(struct r600_shader_ctx *ctx)
434 {
435 return ctx->temp_reg + ctx->max_driver_temp_used++;
436 }
437
438 /*
439 * for evergreen we need to scan the shader to find the number of GPRs we need to
440 * reserve for interpolation.
441 *
442 * we need to know if we are going to emit
443 * any centroid inputs
444 * if perspective and linear are required
445 */
446 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
447 {
448 int i;
449 int num_baryc;
450
451 ctx->input_linear = FALSE;
452 ctx->input_perspective = FALSE;
453 ctx->input_centroid = FALSE;
454 ctx->num_interp_gpr = 1;
455
456 /* any centroid inputs */
457 for (i = 0; i < ctx->info.num_inputs; i++) {
458 /* skip position/face */
459 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
460 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
461 continue;
462 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
463 ctx->input_linear = TRUE;
464 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
465 ctx->input_perspective = TRUE;
466 if (ctx->info.input_centroid[i])
467 ctx->input_centroid = TRUE;
468 }
469
470 num_baryc = 0;
471 /* ignoring sample for now */
472 if (ctx->input_perspective)
473 num_baryc++;
474 if (ctx->input_linear)
475 num_baryc++;
476 if (ctx->input_centroid)
477 num_baryc *= 2;
478
479 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
480
481 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
482 return ctx->num_interp_gpr;
483 }
484
485 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
486 {
487 struct tgsi_full_immediate *immediate;
488 struct r600_shader_ctx ctx;
489 struct r600_bc_output output[32];
490 unsigned noutput;
491 unsigned opcode;
492 int i, r = 0, pos0;
493
494 ctx.bc = &shader->bc;
495 ctx.shader = shader;
496 r = r600_bc_init(ctx.bc, shader->family);
497 if (r)
498 return r;
499 ctx.tokens = tokens;
500 tgsi_scan_shader(tokens, &ctx.info);
501 tgsi_parse_init(&ctx.parse, tokens);
502 ctx.type = ctx.parse.FullHeader.Processor.Processor;
503 shader->processor_type = ctx.type;
504 ctx.bc->type = shader->processor_type;
505
506 /* register allocations */
507 /* Values [0,127] correspond to GPR[0..127].
508 * Values [128,159] correspond to constant buffer bank 0
509 * Values [160,191] correspond to constant buffer bank 1
510 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
511 * Values [256,287] correspond to constant buffer bank 2 (EG)
512 * Values [288,319] correspond to constant buffer bank 3 (EG)
513 * Other special values are shown in the list below.
514 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
515 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
516 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
517 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
518 * 248 SQ_ALU_SRC_0: special constant 0.0.
519 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
520 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
521 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
522 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
523 * 253 SQ_ALU_SRC_LITERAL: literal constant.
524 * 254 SQ_ALU_SRC_PV: previous vector result.
525 * 255 SQ_ALU_SRC_PS: previous scalar result.
526 */
527 for (i = 0; i < TGSI_FILE_COUNT; i++) {
528 ctx.file_offset[i] = 0;
529 }
530 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
531 ctx.file_offset[TGSI_FILE_INPUT] = 1;
532 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
533 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
534 } else {
535 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
536 }
537 }
538 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
539 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
540 }
541 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
542 ctx.info.file_count[TGSI_FILE_INPUT];
543 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
544 ctx.info.file_count[TGSI_FILE_OUTPUT];
545
546 /* Outside the GPR range. This will be translated to one of the
547 * kcache banks later. */
548 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
549
550 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
551 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
552 ctx.info.file_count[TGSI_FILE_TEMPORARY];
553
554 ctx.nliterals = 0;
555 ctx.literals = NULL;
556
557 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
558 tgsi_parse_token(&ctx.parse);
559 switch (ctx.parse.FullToken.Token.Type) {
560 case TGSI_TOKEN_TYPE_IMMEDIATE:
561 immediate = &ctx.parse.FullToken.FullImmediate;
562 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
563 if(ctx.literals == NULL) {
564 r = -ENOMEM;
565 goto out_err;
566 }
567 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
568 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
569 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
570 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
571 ctx.nliterals++;
572 break;
573 case TGSI_TOKEN_TYPE_DECLARATION:
574 r = tgsi_declaration(&ctx);
575 if (r)
576 goto out_err;
577 break;
578 case TGSI_TOKEN_TYPE_INSTRUCTION:
579 r = tgsi_is_supported(&ctx);
580 if (r)
581 goto out_err;
582 ctx.max_driver_temp_used = 0;
583 /* reserve first tmp for everyone */
584 r600_get_temp(&ctx);
585 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
586 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
587 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
588 else
589 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
590 r = ctx.inst_info->process(&ctx);
591 if (r)
592 goto out_err;
593 break;
594 case TGSI_TOKEN_TYPE_PROPERTY:
595 break;
596 default:
597 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
598 r = -EINVAL;
599 goto out_err;
600 }
601 }
602 /* export output */
603 noutput = shader->noutput;
604 for (i = 0, pos0 = 0; i < noutput; i++) {
605 memset(&output[i], 0, sizeof(struct r600_bc_output));
606 output[i].gpr = shader->output[i].gpr;
607 output[i].elem_size = 3;
608 output[i].swizzle_x = 0;
609 output[i].swizzle_y = 1;
610 output[i].swizzle_z = 2;
611 output[i].swizzle_w = 3;
612 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
613 output[i].array_base = i - pos0;
614 switch (ctx.type) {
615 case TGSI_PROCESSOR_VERTEX:
616 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
617 output[i].array_base = 60;
618 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
619 /* position doesn't count in array_base */
620 pos0++;
621 }
622 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
623 output[i].array_base = 61;
624 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
625 /* position doesn't count in array_base */
626 pos0++;
627 }
628 break;
629 case TGSI_PROCESSOR_FRAGMENT:
630 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
631 output[i].array_base = shader->output[i].sid;
632 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
633 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
634 output[i].array_base = 61;
635 output[i].swizzle_x = 2;
636 output[i].swizzle_y = 7;
637 output[i].swizzle_z = output[i].swizzle_w = 7;
638 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
639 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
640 output[i].array_base = 61;
641 output[i].swizzle_x = 7;
642 output[i].swizzle_y = 1;
643 output[i].swizzle_z = output[i].swizzle_w = 7;
644 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
645 } else {
646 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
647 r = -EINVAL;
648 goto out_err;
649 }
650 break;
651 default:
652 R600_ERR("unsupported processor type %d\n", ctx.type);
653 r = -EINVAL;
654 goto out_err;
655 }
656 }
657 /* add fake param output for vertex shader if no param is exported */
658 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
659 for (i = 0, pos0 = 0; i < noutput; i++) {
660 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
661 pos0 = 1;
662 break;
663 }
664 }
665 if (!pos0) {
666 memset(&output[i], 0, sizeof(struct r600_bc_output));
667 output[i].gpr = 0;
668 output[i].elem_size = 3;
669 output[i].swizzle_x = 0;
670 output[i].swizzle_y = 1;
671 output[i].swizzle_z = 2;
672 output[i].swizzle_w = 3;
673 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
674 output[i].array_base = 0;
675 noutput++;
676 }
677 }
678 /* add fake pixel export */
679 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
680 memset(&output[0], 0, sizeof(struct r600_bc_output));
681 output[0].gpr = 0;
682 output[0].elem_size = 3;
683 output[0].swizzle_x = 7;
684 output[0].swizzle_y = 7;
685 output[0].swizzle_z = 7;
686 output[0].swizzle_w = 7;
687 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
688 output[0].array_base = 0;
689 noutput++;
690 }
691 /* add output to bytecode */
692 for (i = 0; i < noutput; i++) {
693 r = r600_bc_add_output(ctx.bc, &output[i]);
694 if (r)
695 goto out_err;
696 }
697 *literals = ctx.literals;
698 tgsi_parse_free(&ctx.parse);
699 return 0;
700 out_err:
701 free(ctx.literals);
702 tgsi_parse_free(&ctx.parse);
703 return r;
704 }
705
706 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
707 {
708 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
709 return -EINVAL;
710 }
711
712 static int tgsi_end(struct r600_shader_ctx *ctx)
713 {
714 return 0;
715 }
716
717 static int tgsi_src(struct r600_shader_ctx *ctx,
718 const struct tgsi_full_src_register *tgsi_src,
719 struct r600_bc_alu_src *r600_src)
720 {
721 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
722 r600_src->neg = tgsi_src->Register.Negate;
723 r600_src->abs = tgsi_src->Register.Absolute;
724 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
725 int index;
726 if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
727 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
728 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
729
730 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
731 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
732 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
733 return 0;
734 }
735 index = tgsi_src->Register.Index;
736 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
737 r600_src->value = ctx->literals + index * 4;
738 } else {
739 if (tgsi_src->Register.Indirect)
740 r600_src->rel = V_SQ_REL_RELATIVE;
741 r600_src->sel = tgsi_src->Register.Index;
742 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
743 }
744 return 0;
745 }
746
747 static int tgsi_dst(struct r600_shader_ctx *ctx,
748 const struct tgsi_full_dst_register *tgsi_dst,
749 unsigned swizzle,
750 struct r600_bc_alu_dst *r600_dst)
751 {
752 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
753
754 r600_dst->sel = tgsi_dst->Register.Index;
755 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
756 r600_dst->chan = swizzle;
757 r600_dst->write = 1;
758 if (tgsi_dst->Register.Indirect)
759 r600_dst->rel = V_SQ_REL_RELATIVE;
760 if (inst->Instruction.Saturate) {
761 r600_dst->clamp = 1;
762 }
763 return 0;
764 }
765
766 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
767 {
768 switch (swizzle) {
769 case 0:
770 return tgsi_src->Register.SwizzleX;
771 case 1:
772 return tgsi_src->Register.SwizzleY;
773 case 2:
774 return tgsi_src->Register.SwizzleZ;
775 case 3:
776 return tgsi_src->Register.SwizzleW;
777 default:
778 return 0;
779 }
780 }
781
782 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
783 {
784 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
785 struct r600_bc_alu alu;
786 int i, j, k, nconst, r;
787
788 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
789 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
790 nconst++;
791 }
792 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
793 if (r) {
794 return r;
795 }
796 }
797 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
798 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
799 int treg = r600_get_temp(ctx);
800 for (k = 0; k < 4; k++) {
801 memset(&alu, 0, sizeof(struct r600_bc_alu));
802 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
803 alu.src[0].sel = r600_src[i].sel;
804 alu.src[0].chan = k;
805 alu.src[0].rel = r600_src[i].rel;
806 alu.dst.sel = treg;
807 alu.dst.chan = k;
808 alu.dst.write = 1;
809 if (k == 3)
810 alu.last = 1;
811 r = r600_bc_add_alu(ctx->bc, &alu);
812 if (r)
813 return r;
814 }
815 r600_src[i].sel = treg;
816 r600_src[i].rel =0;
817 j--;
818 }
819 }
820 return 0;
821 }
822
823 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
824 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
825 {
826 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
827 struct r600_bc_alu alu;
828 int i, j, k, nliteral, r;
829
830 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
831 if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
832 nliteral++;
833 }
834 }
835 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
836 if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
837 int treg = r600_get_temp(ctx);
838 for (k = 0; k < 4; k++) {
839 memset(&alu, 0, sizeof(struct r600_bc_alu));
840 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
841 alu.src[0].sel = r600_src[i].sel;
842 alu.src[0].chan = k;
843 alu.src[0].value = r600_src[i].value;
844 alu.dst.sel = treg;
845 alu.dst.chan = k;
846 alu.dst.write = 1;
847 if (k == 3)
848 alu.last = 1;
849 r = r600_bc_add_alu(ctx->bc, &alu);
850 if (r)
851 return r;
852 }
853 r600_src[i].sel = treg;
854 j--;
855 }
856 }
857 return 0;
858 }
859
860 static int tgsi_last_instruction(unsigned writemask)
861 {
862 int i, lasti = 0;
863
864 for (i = 0; i < 4; i++) {
865 if (writemask & (1 << i)) {
866 lasti = i;
867 }
868 }
869 return lasti;
870 }
871
872 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
873 {
874 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
875 struct r600_bc_alu_src r600_src[3];
876 struct r600_bc_alu alu;
877 int i, j, r;
878 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
879
880 r = tgsi_split_constant(ctx, r600_src);
881 if (r)
882 return r;
883 r = tgsi_split_literal_constant(ctx, r600_src);
884 if (r)
885 return r;
886 for (i = 0; i < lasti + 1; i++) {
887 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
888 continue;
889
890 memset(&alu, 0, sizeof(struct r600_bc_alu));
891 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
892 if (r)
893 return r;
894
895 alu.inst = ctx->inst_info->r600_opcode;
896 if (!swap) {
897 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
898 alu.src[j] = r600_src[j];
899 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
900 }
901 } else {
902 alu.src[0] = r600_src[1];
903 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
904
905 alu.src[1] = r600_src[0];
906 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
907 }
908 /* handle some special cases */
909 switch (ctx->inst_info->tgsi_opcode) {
910 case TGSI_OPCODE_SUB:
911 alu.src[1].neg = 1;
912 break;
913 case TGSI_OPCODE_ABS:
914 alu.src[0].abs = 1;
915 break;
916 default:
917 break;
918 }
919 if (i == lasti) {
920 alu.last = 1;
921 }
922 r = r600_bc_add_alu(ctx->bc, &alu);
923 if (r)
924 return r;
925 }
926 return 0;
927 }
928
929 static int tgsi_op2(struct r600_shader_ctx *ctx)
930 {
931 return tgsi_op2_s(ctx, 0);
932 }
933
934 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
935 {
936 return tgsi_op2_s(ctx, 1);
937 }
938
939 /*
940 * r600 - trunc to -PI..PI range
941 * r700 - normalize by dividing by 2PI
942 * see fdo bug 27901
943 */
944 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
945 struct r600_bc_alu_src r600_src[3])
946 {
947 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
948 static float double_pi = 3.1415926535 * 2;
949 static float neg_pi = -3.1415926535;
950
951 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
952 int r;
953 struct r600_bc_alu alu;
954
955 r = tgsi_split_constant(ctx, r600_src);
956 if (r)
957 return r;
958 r = tgsi_split_literal_constant(ctx, r600_src);
959 if (r)
960 return r;
961
962 memset(&alu, 0, sizeof(struct r600_bc_alu));
963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
964 alu.is_op3 = 1;
965
966 alu.dst.chan = 0;
967 alu.dst.sel = ctx->temp_reg;
968 alu.dst.write = 1;
969
970 alu.src[0] = r600_src[0];
971 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
972
973 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
974 alu.src[1].chan = 0;
975 alu.src[1].value = (uint32_t *)&half_inv_pi;
976 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
977 alu.src[2].chan = 1;
978 alu.last = 1;
979 r = r600_bc_add_alu(ctx->bc, &alu);
980 if (r)
981 return r;
982
983 memset(&alu, 0, sizeof(struct r600_bc_alu));
984 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
985
986 alu.dst.chan = 0;
987 alu.dst.sel = ctx->temp_reg;
988 alu.dst.write = 1;
989
990 alu.src[0].sel = ctx->temp_reg;
991 alu.src[0].chan = 0;
992 alu.last = 1;
993 r = r600_bc_add_alu(ctx->bc, &alu);
994 if (r)
995 return r;
996
997 memset(&alu, 0, sizeof(struct r600_bc_alu));
998 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
999 alu.is_op3 = 1;
1000
1001 alu.dst.chan = 0;
1002 alu.dst.sel = ctx->temp_reg;
1003 alu.dst.write = 1;
1004
1005 alu.src[0].sel = ctx->temp_reg;
1006 alu.src[0].chan = 0;
1007
1008 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1009 alu.src[1].chan = 0;
1010 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1011 alu.src[2].chan = 1;
1012
1013 if (ctx->bc->chiprev == CHIPREV_R600) {
1014 alu.src[1].value = (uint32_t *)&double_pi;
1015 alu.src[2].value = (uint32_t *)&neg_pi;
1016 } else {
1017 alu.src[1].sel = V_SQ_ALU_SRC_1;
1018 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1019 alu.src[2].neg = 1;
1020 }
1021
1022 alu.last = 1;
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026 return 0;
1027 }
1028
1029 static int tgsi_trig(struct r600_shader_ctx *ctx)
1030 {
1031 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1032 struct r600_bc_alu_src r600_src[3];
1033 struct r600_bc_alu alu;
1034 int i, r;
1035 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1036
1037 r = tgsi_setup_trig(ctx, r600_src);
1038 if (r)
1039 return r;
1040
1041 memset(&alu, 0, sizeof(struct r600_bc_alu));
1042 alu.inst = ctx->inst_info->r600_opcode;
1043 alu.dst.chan = 0;
1044 alu.dst.sel = ctx->temp_reg;
1045 alu.dst.write = 1;
1046
1047 alu.src[0].sel = ctx->temp_reg;
1048 alu.src[0].chan = 0;
1049 alu.last = 1;
1050 r = r600_bc_add_alu(ctx->bc, &alu);
1051 if (r)
1052 return r;
1053
1054 /* replicate result */
1055 for (i = 0; i < lasti + 1; i++) {
1056 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1057 continue;
1058
1059 memset(&alu, 0, sizeof(struct r600_bc_alu));
1060 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1061
1062 alu.src[0].sel = ctx->temp_reg;
1063 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1064 if (r)
1065 return r;
1066 if (i == lasti)
1067 alu.last = 1;
1068 r = r600_bc_add_alu(ctx->bc, &alu);
1069 if (r)
1070 return r;
1071 }
1072 return 0;
1073 }
1074
1075 static int tgsi_scs(struct r600_shader_ctx *ctx)
1076 {
1077 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1078 struct r600_bc_alu_src r600_src[3];
1079 struct r600_bc_alu alu;
1080 int r;
1081
1082 /* We'll only need the trig stuff if we are going to write to the
1083 * X or Y components of the destination vector.
1084 */
1085 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1086 r = tgsi_setup_trig(ctx, r600_src);
1087 if (r)
1088 return r;
1089 }
1090
1091 /* dst.x = COS */
1092 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1093 memset(&alu, 0, sizeof(struct r600_bc_alu));
1094 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1095 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1096 if (r)
1097 return r;
1098
1099 alu.src[0].sel = ctx->temp_reg;
1100 alu.src[0].chan = 0;
1101 alu.last = 1;
1102 r = r600_bc_add_alu(ctx->bc, &alu);
1103 if (r)
1104 return r;
1105 }
1106
1107 /* dst.y = SIN */
1108 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1109 memset(&alu, 0, sizeof(struct r600_bc_alu));
1110 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1111 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1112 if (r)
1113 return r;
1114
1115 alu.src[0].sel = ctx->temp_reg;
1116 alu.src[0].chan = 0;
1117 alu.last = 1;
1118 r = r600_bc_add_alu(ctx->bc, &alu);
1119 if (r)
1120 return r;
1121 }
1122
1123 /* dst.z = 0.0; */
1124 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1125 memset(&alu, 0, sizeof(struct r600_bc_alu));
1126
1127 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1128
1129 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1130 if (r)
1131 return r;
1132
1133 alu.src[0].sel = V_SQ_ALU_SRC_0;
1134 alu.src[0].chan = 0;
1135
1136 alu.last = 1;
1137
1138 r = r600_bc_add_alu(ctx->bc, &alu);
1139 if (r)
1140 return r;
1141 }
1142
1143 /* dst.w = 1.0; */
1144 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1145 memset(&alu, 0, sizeof(struct r600_bc_alu));
1146
1147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1148
1149 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1150 if (r)
1151 return r;
1152
1153 alu.src[0].sel = V_SQ_ALU_SRC_1;
1154 alu.src[0].chan = 0;
1155
1156 alu.last = 1;
1157
1158 r = r600_bc_add_alu(ctx->bc, &alu);
1159 if (r)
1160 return r;
1161 }
1162
1163 return 0;
1164 }
1165
1166 static int tgsi_kill(struct r600_shader_ctx *ctx)
1167 {
1168 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1169 struct r600_bc_alu alu;
1170 int i, r;
1171
1172 for (i = 0; i < 4; i++) {
1173 memset(&alu, 0, sizeof(struct r600_bc_alu));
1174 alu.inst = ctx->inst_info->r600_opcode;
1175
1176 alu.dst.chan = i;
1177
1178 alu.src[0].sel = V_SQ_ALU_SRC_0;
1179
1180 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1181 alu.src[1].sel = V_SQ_ALU_SRC_1;
1182 alu.src[1].neg = 1;
1183 } else {
1184 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1185 if (r)
1186 return r;
1187 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1188 }
1189 if (i == 3) {
1190 alu.last = 1;
1191 }
1192 r = r600_bc_add_alu(ctx->bc, &alu);
1193 if (r)
1194 return r;
1195 }
1196
1197 /* kill must be last in ALU */
1198 ctx->bc->force_add_cf = 1;
1199 ctx->shader->uses_kill = TRUE;
1200 return 0;
1201 }
1202
1203 static int tgsi_lit(struct r600_shader_ctx *ctx)
1204 {
1205 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1206 struct r600_bc_alu alu;
1207 struct r600_bc_alu_src r600_src[3];
1208 int r;
1209
1210 r = tgsi_split_constant(ctx, r600_src);
1211 if (r)
1212 return r;
1213 r = tgsi_split_literal_constant(ctx, r600_src);
1214 if (r)
1215 return r;
1216
1217 /* dst.x, <- 1.0 */
1218 memset(&alu, 0, sizeof(struct r600_bc_alu));
1219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1220 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1221 alu.src[0].chan = 0;
1222 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1223 if (r)
1224 return r;
1225 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1226 r = r600_bc_add_alu(ctx->bc, &alu);
1227 if (r)
1228 return r;
1229
1230 /* dst.y = max(src.x, 0.0) */
1231 memset(&alu, 0, sizeof(struct r600_bc_alu));
1232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1233 alu.src[0] = r600_src[0];
1234 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1235 alu.src[1].chan = 0;
1236 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1237 if (r)
1238 return r;
1239 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1240 r = r600_bc_add_alu(ctx->bc, &alu);
1241 if (r)
1242 return r;
1243
1244 /* dst.w, <- 1.0 */
1245 memset(&alu, 0, sizeof(struct r600_bc_alu));
1246 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1247 alu.src[0].sel = V_SQ_ALU_SRC_1;
1248 alu.src[0].chan = 0;
1249 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1250 if (r)
1251 return r;
1252 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1253 alu.last = 1;
1254 r = r600_bc_add_alu(ctx->bc, &alu);
1255 if (r)
1256 return r;
1257
1258 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1259 {
1260 int chan;
1261 int sel;
1262
1263 /* dst.z = log(src.y) */
1264 memset(&alu, 0, sizeof(struct r600_bc_alu));
1265 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1266 alu.src[0] = r600_src[0];
1267 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1268 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1269 if (r)
1270 return r;
1271 alu.last = 1;
1272 r = r600_bc_add_alu(ctx->bc, &alu);
1273 if (r)
1274 return r;
1275
1276 chan = alu.dst.chan;
1277 sel = alu.dst.sel;
1278
1279 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1280 memset(&alu, 0, sizeof(struct r600_bc_alu));
1281 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1282 alu.src[0] = r600_src[0];
1283 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1284 alu.src[1].sel = sel;
1285 alu.src[1].chan = chan;
1286
1287 alu.src[2] = r600_src[0];
1288 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1289 alu.dst.sel = ctx->temp_reg;
1290 alu.dst.chan = 0;
1291 alu.dst.write = 1;
1292 alu.is_op3 = 1;
1293 alu.last = 1;
1294 r = r600_bc_add_alu(ctx->bc, &alu);
1295 if (r)
1296 return r;
1297
1298 /* dst.z = exp(tmp.x) */
1299 memset(&alu, 0, sizeof(struct r600_bc_alu));
1300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1301 alu.src[0].sel = ctx->temp_reg;
1302 alu.src[0].chan = 0;
1303 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1304 if (r)
1305 return r;
1306 alu.last = 1;
1307 r = r600_bc_add_alu(ctx->bc, &alu);
1308 if (r)
1309 return r;
1310 }
1311 return 0;
1312 }
1313
1314 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1315 {
1316 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1317 struct r600_bc_alu alu;
1318 int i, r;
1319
1320 memset(&alu, 0, sizeof(struct r600_bc_alu));
1321
1322 /* FIXME:
1323 * For state trackers other than OpenGL, we'll want to use
1324 * _RECIPSQRT_IEEE instead.
1325 */
1326 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1327
1328 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1329 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1330 if (r)
1331 return r;
1332 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1333 alu.src[i].abs = 1;
1334 }
1335 alu.dst.sel = ctx->temp_reg;
1336 alu.dst.write = 1;
1337 alu.last = 1;
1338 r = r600_bc_add_alu(ctx->bc, &alu);
1339 if (r)
1340 return r;
1341 /* replicate result */
1342 return tgsi_helper_tempx_replicate(ctx);
1343 }
1344
1345 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1346 {
1347 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1348 struct r600_bc_alu alu;
1349 int i, r;
1350
1351 for (i = 0; i < 4; i++) {
1352 memset(&alu, 0, sizeof(struct r600_bc_alu));
1353 alu.src[0].sel = ctx->temp_reg;
1354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1355 alu.dst.chan = i;
1356 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1357 if (r)
1358 return r;
1359 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1360 if (i == 3)
1361 alu.last = 1;
1362 r = r600_bc_add_alu(ctx->bc, &alu);
1363 if (r)
1364 return r;
1365 }
1366 return 0;
1367 }
1368
1369 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1370 {
1371 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1372 struct r600_bc_alu alu;
1373 int i, r;
1374
1375 memset(&alu, 0, sizeof(struct r600_bc_alu));
1376 alu.inst = ctx->inst_info->r600_opcode;
1377 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1378 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1379 if (r)
1380 return r;
1381 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1382 }
1383 alu.dst.sel = ctx->temp_reg;
1384 alu.dst.write = 1;
1385 alu.last = 1;
1386 r = r600_bc_add_alu(ctx->bc, &alu);
1387 if (r)
1388 return r;
1389 /* replicate result */
1390 return tgsi_helper_tempx_replicate(ctx);
1391 }
1392
1393 static int tgsi_pow(struct r600_shader_ctx *ctx)
1394 {
1395 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1396 struct r600_bc_alu alu;
1397 int r;
1398
1399 /* LOG2(a) */
1400 memset(&alu, 0, sizeof(struct r600_bc_alu));
1401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1402 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1403 if (r)
1404 return r;
1405 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1406 alu.dst.sel = ctx->temp_reg;
1407 alu.dst.write = 1;
1408 alu.last = 1;
1409 r = r600_bc_add_alu(ctx->bc, &alu);
1410 if (r)
1411 return r;
1412 /* b * LOG2(a) */
1413 memset(&alu, 0, sizeof(struct r600_bc_alu));
1414 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1415 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1416 if (r)
1417 return r;
1418 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1419 alu.src[1].sel = ctx->temp_reg;
1420 alu.dst.sel = ctx->temp_reg;
1421 alu.dst.write = 1;
1422 alu.last = 1;
1423 r = r600_bc_add_alu(ctx->bc, &alu);
1424 if (r)
1425 return r;
1426 /* POW(a,b) = EXP2(b * LOG2(a))*/
1427 memset(&alu, 0, sizeof(struct r600_bc_alu));
1428 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1429 alu.src[0].sel = ctx->temp_reg;
1430 alu.dst.sel = ctx->temp_reg;
1431 alu.dst.write = 1;
1432 alu.last = 1;
1433 r = r600_bc_add_alu(ctx->bc, &alu);
1434 if (r)
1435 return r;
1436 return tgsi_helper_tempx_replicate(ctx);
1437 }
1438
1439 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1440 {
1441 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1442 struct r600_bc_alu alu;
1443 struct r600_bc_alu_src r600_src[3];
1444 int i, r;
1445
1446 r = tgsi_split_constant(ctx, r600_src);
1447 if (r)
1448 return r;
1449 r = tgsi_split_literal_constant(ctx, r600_src);
1450 if (r)
1451 return r;
1452
1453 /* tmp = (src > 0 ? 1 : src) */
1454 for (i = 0; i < 4; i++) {
1455 memset(&alu, 0, sizeof(struct r600_bc_alu));
1456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1457 alu.is_op3 = 1;
1458
1459 alu.dst.sel = ctx->temp_reg;
1460 alu.dst.chan = i;
1461
1462 alu.src[0] = r600_src[0];
1463 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1464
1465 alu.src[1].sel = V_SQ_ALU_SRC_1;
1466
1467 alu.src[2] = r600_src[0];
1468 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1469 if (i == 3)
1470 alu.last = 1;
1471 r = r600_bc_add_alu(ctx->bc, &alu);
1472 if (r)
1473 return r;
1474 }
1475
1476 /* dst = (-tmp > 0 ? -1 : tmp) */
1477 for (i = 0; i < 4; i++) {
1478 memset(&alu, 0, sizeof(struct r600_bc_alu));
1479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1480 alu.is_op3 = 1;
1481 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1482 if (r)
1483 return r;
1484
1485 alu.src[0].sel = ctx->temp_reg;
1486 alu.src[0].chan = i;
1487 alu.src[0].neg = 1;
1488
1489 alu.src[1].sel = V_SQ_ALU_SRC_1;
1490 alu.src[1].neg = 1;
1491
1492 alu.src[2].sel = ctx->temp_reg;
1493 alu.src[2].chan = i;
1494
1495 if (i == 3)
1496 alu.last = 1;
1497 r = r600_bc_add_alu(ctx->bc, &alu);
1498 if (r)
1499 return r;
1500 }
1501 return 0;
1502 }
1503
1504 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1505 {
1506 struct r600_bc_alu alu;
1507 int i, r;
1508
1509 for (i = 0; i < 4; i++) {
1510 memset(&alu, 0, sizeof(struct r600_bc_alu));
1511 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1512 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1513 alu.dst.chan = i;
1514 } else {
1515 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1516 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1517 if (r)
1518 return r;
1519 alu.src[0].sel = ctx->temp_reg;
1520 alu.src[0].chan = i;
1521 }
1522 if (i == 3) {
1523 alu.last = 1;
1524 }
1525 r = r600_bc_add_alu(ctx->bc, &alu);
1526 if (r)
1527 return r;
1528 }
1529 return 0;
1530 }
1531
1532 static int tgsi_op3(struct r600_shader_ctx *ctx)
1533 {
1534 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1535 struct r600_bc_alu_src r600_src[3];
1536 struct r600_bc_alu alu;
1537 int i, j, r;
1538 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1539
1540 r = tgsi_split_constant(ctx, r600_src);
1541 if (r)
1542 return r;
1543 r = tgsi_split_literal_constant(ctx, r600_src);
1544 if (r)
1545 return r;
1546 for (i = 0; i < lasti + 1; i++) {
1547 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1548 continue;
1549
1550 memset(&alu, 0, sizeof(struct r600_bc_alu));
1551 alu.inst = ctx->inst_info->r600_opcode;
1552 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1553 alu.src[j] = r600_src[j];
1554 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1555 }
1556
1557 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1558 if (r)
1559 return r;
1560
1561 alu.dst.chan = i;
1562 alu.dst.write = 1;
1563 alu.is_op3 = 1;
1564 if (i == lasti) {
1565 alu.last = 1;
1566 }
1567 r = r600_bc_add_alu(ctx->bc, &alu);
1568 if (r)
1569 return r;
1570 }
1571 return 0;
1572 }
1573
1574 static int tgsi_dp(struct r600_shader_ctx *ctx)
1575 {
1576 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1577 struct r600_bc_alu_src r600_src[3];
1578 struct r600_bc_alu alu;
1579 int i, j, r;
1580
1581 r = tgsi_split_constant(ctx, r600_src);
1582 if (r)
1583 return r;
1584 r = tgsi_split_literal_constant(ctx, r600_src);
1585 if (r)
1586 return r;
1587 for (i = 0; i < 4; i++) {
1588 memset(&alu, 0, sizeof(struct r600_bc_alu));
1589 alu.inst = ctx->inst_info->r600_opcode;
1590 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1591 alu.src[j] = r600_src[j];
1592 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1593 }
1594
1595 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1596 if (r)
1597 return r;
1598
1599 alu.dst.chan = i;
1600 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1601 /* handle some special cases */
1602 switch (ctx->inst_info->tgsi_opcode) {
1603 case TGSI_OPCODE_DP2:
1604 if (i > 1) {
1605 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1606 alu.src[0].chan = alu.src[1].chan = 0;
1607 }
1608 break;
1609 case TGSI_OPCODE_DP3:
1610 if (i > 2) {
1611 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1612 alu.src[0].chan = alu.src[1].chan = 0;
1613 }
1614 break;
1615 case TGSI_OPCODE_DPH:
1616 if (i == 3) {
1617 alu.src[0].sel = V_SQ_ALU_SRC_1;
1618 alu.src[0].chan = 0;
1619 alu.src[0].neg = 0;
1620 }
1621 break;
1622 default:
1623 break;
1624 }
1625 if (i == 3) {
1626 alu.last = 1;
1627 }
1628 r = r600_bc_add_alu(ctx->bc, &alu);
1629 if (r)
1630 return r;
1631 }
1632 return 0;
1633 }
1634
1635 static int tgsi_tex(struct r600_shader_ctx *ctx)
1636 {
1637 static float one_point_five = 1.5f;
1638 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1639 struct r600_bc_tex tex;
1640 struct r600_bc_alu alu;
1641 unsigned src_gpr;
1642 int r, i;
1643 int opcode;
1644 boolean src_not_temp =
1645 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1646 inst->Src[0].Register.File != TGSI_FILE_INPUT;
1647
1648 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1649
1650 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1651 /* Add perspective divide */
1652 memset(&alu, 0, sizeof(struct r600_bc_alu));
1653 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1654 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1655 if (r)
1656 return r;
1657
1658 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1659 alu.dst.sel = ctx->temp_reg;
1660 alu.dst.chan = 3;
1661 alu.last = 1;
1662 alu.dst.write = 1;
1663 r = r600_bc_add_alu(ctx->bc, &alu);
1664 if (r)
1665 return r;
1666
1667 for (i = 0; i < 3; i++) {
1668 memset(&alu, 0, sizeof(struct r600_bc_alu));
1669 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1670 alu.src[0].sel = ctx->temp_reg;
1671 alu.src[0].chan = 3;
1672 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1673 if (r)
1674 return r;
1675 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1676 alu.dst.sel = ctx->temp_reg;
1677 alu.dst.chan = i;
1678 alu.dst.write = 1;
1679 r = r600_bc_add_alu(ctx->bc, &alu);
1680 if (r)
1681 return r;
1682 }
1683 memset(&alu, 0, sizeof(struct r600_bc_alu));
1684 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1685 alu.src[0].sel = V_SQ_ALU_SRC_1;
1686 alu.src[0].chan = 0;
1687 alu.dst.sel = ctx->temp_reg;
1688 alu.dst.chan = 3;
1689 alu.last = 1;
1690 alu.dst.write = 1;
1691 r = r600_bc_add_alu(ctx->bc, &alu);
1692 if (r)
1693 return r;
1694 src_not_temp = FALSE;
1695 src_gpr = ctx->temp_reg;
1696 }
1697
1698 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1699 int src_chan, src2_chan;
1700
1701 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1702 for (i = 0; i < 4; i++) {
1703 memset(&alu, 0, sizeof(struct r600_bc_alu));
1704 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1705 switch (i) {
1706 case 0:
1707 src_chan = 2;
1708 src2_chan = 1;
1709 break;
1710 case 1:
1711 src_chan = 2;
1712 src2_chan = 0;
1713 break;
1714 case 2:
1715 src_chan = 0;
1716 src2_chan = 2;
1717 break;
1718 case 3:
1719 src_chan = 1;
1720 src2_chan = 2;
1721 break;
1722 default:
1723 assert(0);
1724 src_chan = 0;
1725 src2_chan = 0;
1726 break;
1727 }
1728 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1729 if (r)
1730 return r;
1731 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1732 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1733 if (r)
1734 return r;
1735 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1736 alu.dst.sel = ctx->temp_reg;
1737 alu.dst.chan = i;
1738 if (i == 3)
1739 alu.last = 1;
1740 alu.dst.write = 1;
1741 r = r600_bc_add_alu(ctx->bc, &alu);
1742 if (r)
1743 return r;
1744 }
1745
1746 /* tmp1.z = RCP_e(|tmp1.z|) */
1747 memset(&alu, 0, sizeof(struct r600_bc_alu));
1748 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1749 alu.src[0].sel = ctx->temp_reg;
1750 alu.src[0].chan = 2;
1751 alu.src[0].abs = 1;
1752 alu.dst.sel = ctx->temp_reg;
1753 alu.dst.chan = 2;
1754 alu.dst.write = 1;
1755 alu.last = 1;
1756 r = r600_bc_add_alu(ctx->bc, &alu);
1757 if (r)
1758 return r;
1759
1760 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1761 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1762 * muladd has no writemask, have to use another temp
1763 */
1764 memset(&alu, 0, sizeof(struct r600_bc_alu));
1765 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1766 alu.is_op3 = 1;
1767
1768 alu.src[0].sel = ctx->temp_reg;
1769 alu.src[0].chan = 0;
1770 alu.src[1].sel = ctx->temp_reg;
1771 alu.src[1].chan = 2;
1772
1773 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1774 alu.src[2].chan = 0;
1775
1776 alu.dst.sel = ctx->temp_reg;
1777 alu.dst.chan = 0;
1778 alu.dst.write = 1;
1779
1780 r = r600_bc_add_alu(ctx->bc, &alu);
1781 if (r)
1782 return r;
1783
1784 memset(&alu, 0, sizeof(struct r600_bc_alu));
1785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1786 alu.is_op3 = 1;
1787
1788 alu.src[0].sel = ctx->temp_reg;
1789 alu.src[0].chan = 1;
1790 alu.src[1].sel = ctx->temp_reg;
1791 alu.src[1].chan = 2;
1792
1793 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1794 alu.src[2].chan = 0;
1795 alu.src[2].value = (u32*)&one_point_five;
1796
1797 alu.dst.sel = ctx->temp_reg;
1798 alu.dst.chan = 1;
1799 alu.dst.write = 1;
1800
1801 alu.last = 1;
1802 r = r600_bc_add_alu(ctx->bc, &alu);
1803 if (r)
1804 return r;
1805
1806 src_not_temp = FALSE;
1807 src_gpr = ctx->temp_reg;
1808 }
1809
1810 if (src_not_temp) {
1811 for (i = 0; i < 4; i++) {
1812 memset(&alu, 0, sizeof(struct r600_bc_alu));
1813 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1814 alu.src[0].sel = src_gpr;
1815 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1816 alu.dst.sel = ctx->temp_reg;
1817 alu.dst.chan = i;
1818 if (i == 3)
1819 alu.last = 1;
1820 alu.dst.write = 1;
1821 r = r600_bc_add_alu(ctx->bc, &alu);
1822 if (r)
1823 return r;
1824 }
1825 src_gpr = ctx->temp_reg;
1826 }
1827
1828 opcode = ctx->inst_info->r600_opcode;
1829 if (opcode == SQ_TEX_INST_SAMPLE &&
1830 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1831 opcode = SQ_TEX_INST_SAMPLE_C;
1832
1833 memset(&tex, 0, sizeof(struct r600_bc_tex));
1834 tex.inst = opcode;
1835 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1836 tex.resource_id = tex.sampler_id;
1837 tex.src_gpr = src_gpr;
1838 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1839 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1840 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1841 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1842 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1843 tex.src_sel_x = 0;
1844 tex.src_sel_y = 1;
1845 tex.src_sel_z = 2;
1846 tex.src_sel_w = 3;
1847
1848 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1849 tex.src_sel_x = 1;
1850 tex.src_sel_y = 0;
1851 tex.src_sel_z = 3;
1852 tex.src_sel_w = 1;
1853 }
1854
1855 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1856 tex.coord_type_x = 1;
1857 tex.coord_type_y = 1;
1858 tex.coord_type_z = 1;
1859 tex.coord_type_w = 1;
1860 }
1861
1862 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1863 tex.src_sel_w = 2;
1864
1865 r = r600_bc_add_tex(ctx->bc, &tex);
1866 if (r)
1867 return r;
1868
1869 /* add shadow ambient support - gallium doesn't do it yet */
1870 return 0;
1871 }
1872
1873 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1874 {
1875 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1876 struct r600_bc_alu_src r600_src[3];
1877 struct r600_bc_alu alu;
1878 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1879 unsigned i;
1880 int r;
1881
1882 r = tgsi_split_constant(ctx, r600_src);
1883 if (r)
1884 return r;
1885 r = tgsi_split_literal_constant(ctx, r600_src);
1886 if (r)
1887 return r;
1888
1889 /* optimize if it's just an equal balance */
1890 if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
1891 for (i = 0; i < lasti + 1; i++) {
1892 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1893 continue;
1894
1895 memset(&alu, 0, sizeof(struct r600_bc_alu));
1896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1897 alu.src[0] = r600_src[1];
1898 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1899 alu.src[1] = r600_src[2];
1900 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1901 alu.omod = 3;
1902 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1903 if (r)
1904 return r;
1905
1906 alu.dst.chan = i;
1907 if (i == lasti) {
1908 alu.last = 1;
1909 }
1910 r = r600_bc_add_alu(ctx->bc, &alu);
1911 if (r)
1912 return r;
1913 }
1914 return 0;
1915 }
1916
1917 /* 1 - src0 */
1918 for (i = 0; i < lasti + 1; i++) {
1919 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1920 continue;
1921
1922 memset(&alu, 0, sizeof(struct r600_bc_alu));
1923 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1924 alu.src[0].sel = V_SQ_ALU_SRC_1;
1925 alu.src[0].chan = 0;
1926 alu.src[1] = r600_src[0];
1927 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1928 alu.src[1].neg = 1;
1929 alu.dst.sel = ctx->temp_reg;
1930 alu.dst.chan = i;
1931 if (i == lasti) {
1932 alu.last = 1;
1933 }
1934 alu.dst.write = 1;
1935 r = r600_bc_add_alu(ctx->bc, &alu);
1936 if (r)
1937 return r;
1938 }
1939
1940 /* (1 - src0) * src2 */
1941 for (i = 0; i < lasti + 1; i++) {
1942 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1943 continue;
1944
1945 memset(&alu, 0, sizeof(struct r600_bc_alu));
1946 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1947 alu.src[0].sel = ctx->temp_reg;
1948 alu.src[0].chan = i;
1949 alu.src[1] = r600_src[2];
1950 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1951 alu.dst.sel = ctx->temp_reg;
1952 alu.dst.chan = i;
1953 if (i == lasti) {
1954 alu.last = 1;
1955 }
1956 alu.dst.write = 1;
1957 r = r600_bc_add_alu(ctx->bc, &alu);
1958 if (r)
1959 return r;
1960 }
1961
1962 /* src0 * src1 + (1 - src0) * src2 */
1963 for (i = 0; i < lasti + 1; i++) {
1964 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1965 continue;
1966
1967 memset(&alu, 0, sizeof(struct r600_bc_alu));
1968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1969 alu.is_op3 = 1;
1970 alu.src[0] = r600_src[0];
1971 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1972 alu.src[1] = r600_src[1];
1973 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1974 alu.src[2].sel = ctx->temp_reg;
1975 alu.src[2].chan = i;
1976
1977 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1978 if (r)
1979 return r;
1980
1981 alu.dst.chan = i;
1982 if (i == lasti) {
1983 alu.last = 1;
1984 }
1985 r = r600_bc_add_alu(ctx->bc, &alu);
1986 if (r)
1987 return r;
1988 }
1989 return 0;
1990 }
1991
1992 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1993 {
1994 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1995 struct r600_bc_alu_src r600_src[3];
1996 struct r600_bc_alu alu;
1997 int i, r;
1998 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1999
2000 r = tgsi_split_constant(ctx, r600_src);
2001 if (r)
2002 return r;
2003 r = tgsi_split_literal_constant(ctx, r600_src);
2004 if (r)
2005 return r;
2006
2007 for (i = 0; i < lasti + 1; i++) {
2008 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2009 continue;
2010
2011 memset(&alu, 0, sizeof(struct r600_bc_alu));
2012 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2013 alu.src[0] = r600_src[0];
2014 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2015
2016 alu.src[1] = r600_src[2];
2017 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2018
2019 alu.src[2] = r600_src[1];
2020 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2021
2022 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2023 if (r)
2024 return r;
2025
2026 alu.dst.chan = i;
2027 alu.dst.write = 1;
2028 alu.is_op3 = 1;
2029 if (i == lasti)
2030 alu.last = 1;
2031 r = r600_bc_add_alu(ctx->bc, &alu);
2032 if (r)
2033 return r;
2034 }
2035 return 0;
2036 }
2037
2038 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2039 {
2040 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2041 struct r600_bc_alu_src r600_src[3];
2042 struct r600_bc_alu alu;
2043 uint32_t use_temp = 0;
2044 int i, r;
2045
2046 if (inst->Dst[0].Register.WriteMask != 0xf)
2047 use_temp = 1;
2048
2049 r = tgsi_split_constant(ctx, r600_src);
2050 if (r)
2051 return r;
2052 r = tgsi_split_literal_constant(ctx, r600_src);
2053 if (r)
2054 return r;
2055
2056 for (i = 0; i < 4; i++) {
2057 memset(&alu, 0, sizeof(struct r600_bc_alu));
2058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2059
2060 alu.src[0] = r600_src[0];
2061 switch (i) {
2062 case 0:
2063 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2064 break;
2065 case 1:
2066 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2067 break;
2068 case 2:
2069 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2070 break;
2071 case 3:
2072 alu.src[0].sel = V_SQ_ALU_SRC_0;
2073 alu.src[0].chan = i;
2074 }
2075
2076 alu.src[1] = r600_src[1];
2077 switch (i) {
2078 case 0:
2079 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2080 break;
2081 case 1:
2082 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2083 break;
2084 case 2:
2085 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2086 break;
2087 case 3:
2088 alu.src[1].sel = V_SQ_ALU_SRC_0;
2089 alu.src[1].chan = i;
2090 }
2091
2092 alu.dst.sel = ctx->temp_reg;
2093 alu.dst.chan = i;
2094 alu.dst.write = 1;
2095
2096 if (i == 3)
2097 alu.last = 1;
2098 r = r600_bc_add_alu(ctx->bc, &alu);
2099 if (r)
2100 return r;
2101 }
2102
2103 for (i = 0; i < 4; i++) {
2104 memset(&alu, 0, sizeof(struct r600_bc_alu));
2105 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2106
2107 alu.src[0] = r600_src[0];
2108 switch (i) {
2109 case 0:
2110 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2111 break;
2112 case 1:
2113 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2114 break;
2115 case 2:
2116 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2117 break;
2118 case 3:
2119 alu.src[0].sel = V_SQ_ALU_SRC_0;
2120 alu.src[0].chan = i;
2121 }
2122
2123 alu.src[1] = r600_src[1];
2124 switch (i) {
2125 case 0:
2126 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2127 break;
2128 case 1:
2129 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2130 break;
2131 case 2:
2132 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2133 break;
2134 case 3:
2135 alu.src[1].sel = V_SQ_ALU_SRC_0;
2136 alu.src[1].chan = i;
2137 }
2138
2139 alu.src[2].sel = ctx->temp_reg;
2140 alu.src[2].neg = 1;
2141 alu.src[2].chan = i;
2142
2143 if (use_temp)
2144 alu.dst.sel = ctx->temp_reg;
2145 else {
2146 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2147 if (r)
2148 return r;
2149 }
2150 alu.dst.chan = i;
2151 alu.dst.write = 1;
2152 alu.is_op3 = 1;
2153 if (i == 3)
2154 alu.last = 1;
2155 r = r600_bc_add_alu(ctx->bc, &alu);
2156 if (r)
2157 return r;
2158 }
2159 if (use_temp)
2160 return tgsi_helper_copy(ctx, inst);
2161 return 0;
2162 }
2163
2164 static int tgsi_exp(struct r600_shader_ctx *ctx)
2165 {
2166 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2167 struct r600_bc_alu_src r600_src[3] = { { 0 } };
2168 struct r600_bc_alu alu;
2169 int r;
2170
2171 /* result.x = 2^floor(src); */
2172 if (inst->Dst[0].Register.WriteMask & 1) {
2173 memset(&alu, 0, sizeof(struct r600_bc_alu));
2174
2175 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2176 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2177 if (r)
2178 return r;
2179
2180 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2181
2182 alu.dst.sel = ctx->temp_reg;
2183 alu.dst.chan = 0;
2184 alu.dst.write = 1;
2185 alu.last = 1;
2186 r = r600_bc_add_alu(ctx->bc, &alu);
2187 if (r)
2188 return r;
2189
2190 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2191 alu.src[0].sel = ctx->temp_reg;
2192 alu.src[0].chan = 0;
2193
2194 alu.dst.sel = ctx->temp_reg;
2195 alu.dst.chan = 0;
2196 alu.dst.write = 1;
2197 alu.last = 1;
2198 r = r600_bc_add_alu(ctx->bc, &alu);
2199 if (r)
2200 return r;
2201 }
2202
2203 /* result.y = tmp - floor(tmp); */
2204 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2205 memset(&alu, 0, sizeof(struct r600_bc_alu));
2206
2207 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2208 alu.src[0] = r600_src[0];
2209 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2210 if (r)
2211 return r;
2212 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2213
2214 alu.dst.sel = ctx->temp_reg;
2215 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2216 // if (r)
2217 // return r;
2218 alu.dst.write = 1;
2219 alu.dst.chan = 1;
2220
2221 alu.last = 1;
2222
2223 r = r600_bc_add_alu(ctx->bc, &alu);
2224 if (r)
2225 return r;
2226 }
2227
2228 /* result.z = RoughApprox2ToX(tmp);*/
2229 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2230 memset(&alu, 0, sizeof(struct r600_bc_alu));
2231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2232 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2233 if (r)
2234 return r;
2235 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2236
2237 alu.dst.sel = ctx->temp_reg;
2238 alu.dst.write = 1;
2239 alu.dst.chan = 2;
2240
2241 alu.last = 1;
2242
2243 r = r600_bc_add_alu(ctx->bc, &alu);
2244 if (r)
2245 return r;
2246 }
2247
2248 /* result.w = 1.0;*/
2249 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2250 memset(&alu, 0, sizeof(struct r600_bc_alu));
2251
2252 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2253 alu.src[0].sel = V_SQ_ALU_SRC_1;
2254 alu.src[0].chan = 0;
2255
2256 alu.dst.sel = ctx->temp_reg;
2257 alu.dst.chan = 3;
2258 alu.dst.write = 1;
2259 alu.last = 1;
2260 r = r600_bc_add_alu(ctx->bc, &alu);
2261 if (r)
2262 return r;
2263 }
2264 return tgsi_helper_copy(ctx, inst);
2265 }
2266
2267 static int tgsi_log(struct r600_shader_ctx *ctx)
2268 {
2269 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2270 struct r600_bc_alu alu;
2271 int r;
2272
2273 /* result.x = floor(log2(src)); */
2274 if (inst->Dst[0].Register.WriteMask & 1) {
2275 memset(&alu, 0, sizeof(struct r600_bc_alu));
2276
2277 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2278 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2279 if (r)
2280 return r;
2281
2282 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2283
2284 alu.dst.sel = ctx->temp_reg;
2285 alu.dst.chan = 0;
2286 alu.dst.write = 1;
2287 alu.last = 1;
2288 r = r600_bc_add_alu(ctx->bc, &alu);
2289 if (r)
2290 return r;
2291
2292 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2293 alu.src[0].sel = ctx->temp_reg;
2294 alu.src[0].chan = 0;
2295
2296 alu.dst.sel = ctx->temp_reg;
2297 alu.dst.chan = 0;
2298 alu.dst.write = 1;
2299 alu.last = 1;
2300
2301 r = r600_bc_add_alu(ctx->bc, &alu);
2302 if (r)
2303 return r;
2304 }
2305
2306 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2307 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2308 memset(&alu, 0, sizeof(struct r600_bc_alu));
2309
2310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2311 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2312 if (r)
2313 return r;
2314
2315 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2316
2317 alu.dst.sel = ctx->temp_reg;
2318 alu.dst.chan = 1;
2319 alu.dst.write = 1;
2320 alu.last = 1;
2321
2322 r = r600_bc_add_alu(ctx->bc, &alu);
2323 if (r)
2324 return r;
2325
2326 memset(&alu, 0, sizeof(struct r600_bc_alu));
2327
2328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2329 alu.src[0].sel = ctx->temp_reg;
2330 alu.src[0].chan = 1;
2331
2332 alu.dst.sel = ctx->temp_reg;
2333 alu.dst.chan = 1;
2334 alu.dst.write = 1;
2335 alu.last = 1;
2336
2337 r = r600_bc_add_alu(ctx->bc, &alu);
2338 if (r)
2339 return r;
2340
2341 memset(&alu, 0, sizeof(struct r600_bc_alu));
2342
2343 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2344 alu.src[0].sel = ctx->temp_reg;
2345 alu.src[0].chan = 1;
2346
2347 alu.dst.sel = ctx->temp_reg;
2348 alu.dst.chan = 1;
2349 alu.dst.write = 1;
2350 alu.last = 1;
2351
2352 r = r600_bc_add_alu(ctx->bc, &alu);
2353 if (r)
2354 return r;
2355
2356 memset(&alu, 0, sizeof(struct r600_bc_alu));
2357
2358 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2359 alu.src[0].sel = ctx->temp_reg;
2360 alu.src[0].chan = 1;
2361
2362 alu.dst.sel = ctx->temp_reg;
2363 alu.dst.chan = 1;
2364 alu.dst.write = 1;
2365 alu.last = 1;
2366
2367 r = r600_bc_add_alu(ctx->bc, &alu);
2368 if (r)
2369 return r;
2370
2371 memset(&alu, 0, sizeof(struct r600_bc_alu));
2372
2373 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2374
2375 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2376 if (r)
2377 return r;
2378
2379 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2380
2381 alu.src[1].sel = ctx->temp_reg;
2382 alu.src[1].chan = 1;
2383
2384 alu.dst.sel = ctx->temp_reg;
2385 alu.dst.chan = 1;
2386 alu.dst.write = 1;
2387 alu.last = 1;
2388
2389 r = r600_bc_add_alu(ctx->bc, &alu);
2390 if (r)
2391 return r;
2392 }
2393
2394 /* result.z = log2(src);*/
2395 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2396 memset(&alu, 0, sizeof(struct r600_bc_alu));
2397
2398 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2399 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2400 if (r)
2401 return r;
2402
2403 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2404
2405 alu.dst.sel = ctx->temp_reg;
2406 alu.dst.write = 1;
2407 alu.dst.chan = 2;
2408 alu.last = 1;
2409
2410 r = r600_bc_add_alu(ctx->bc, &alu);
2411 if (r)
2412 return r;
2413 }
2414
2415 /* result.w = 1.0; */
2416 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2417 memset(&alu, 0, sizeof(struct r600_bc_alu));
2418
2419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2420 alu.src[0].sel = V_SQ_ALU_SRC_1;
2421 alu.src[0].chan = 0;
2422
2423 alu.dst.sel = ctx->temp_reg;
2424 alu.dst.chan = 3;
2425 alu.dst.write = 1;
2426 alu.last = 1;
2427
2428 r = r600_bc_add_alu(ctx->bc, &alu);
2429 if (r)
2430 return r;
2431 }
2432
2433 return tgsi_helper_copy(ctx, inst);
2434 }
2435
2436 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2437 {
2438 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2439 struct r600_bc_alu alu;
2440 int r;
2441 memset(&alu, 0, sizeof(struct r600_bc_alu));
2442
2443 switch (inst->Instruction.Opcode) {
2444 case TGSI_OPCODE_ARL:
2445 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2446 break;
2447 case TGSI_OPCODE_ARR:
2448 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2449 break;
2450 default:
2451 assert(0);
2452 return -1;
2453 }
2454
2455 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2456 if (r)
2457 return r;
2458 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2459 alu.last = 1;
2460 alu.dst.chan = 0;
2461 alu.dst.sel = ctx->temp_reg;
2462 alu.dst.write = 1;
2463 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2464 if (r)
2465 return r;
2466 memset(&alu, 0, sizeof(struct r600_bc_alu));
2467 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2468 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2469 if (r)
2470 return r;
2471 alu.src[0].sel = ctx->temp_reg;
2472 alu.src[0].chan = 0;
2473 alu.last = 1;
2474 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2475 if (r)
2476 return r;
2477 return 0;
2478 }
2479 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2480 {
2481 /* TODO from r600c, ar values don't persist between clauses */
2482 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2483 struct r600_bc_alu alu;
2484 int r;
2485 memset(&alu, 0, sizeof(struct r600_bc_alu));
2486
2487 switch (inst->Instruction.Opcode) {
2488 case TGSI_OPCODE_ARL:
2489 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2490 break;
2491 case TGSI_OPCODE_ARR:
2492 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2493 break;
2494 default:
2495 assert(0);
2496 return -1;
2497 }
2498
2499
2500 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2501 if (r)
2502 return r;
2503 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2504
2505 alu.last = 1;
2506
2507 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2508 if (r)
2509 return r;
2510 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2511 return 0;
2512 }
2513
2514 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2515 {
2516 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2517 struct r600_bc_alu alu;
2518 int i, r = 0;
2519
2520 for (i = 0; i < 4; i++) {
2521 memset(&alu, 0, sizeof(struct r600_bc_alu));
2522
2523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2524 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2525 if (r)
2526 return r;
2527
2528 if (i == 0 || i == 3) {
2529 alu.src[0].sel = V_SQ_ALU_SRC_1;
2530 } else {
2531 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2532 if (r)
2533 return r;
2534 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2535 }
2536
2537 if (i == 0 || i == 2) {
2538 alu.src[1].sel = V_SQ_ALU_SRC_1;
2539 } else {
2540 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2541 if (r)
2542 return r;
2543 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2544 }
2545 if (i == 3)
2546 alu.last = 1;
2547 r = r600_bc_add_alu(ctx->bc, &alu);
2548 if (r)
2549 return r;
2550 }
2551 return 0;
2552 }
2553
2554 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2555 {
2556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2557 struct r600_bc_alu alu;
2558 int r;
2559
2560 memset(&alu, 0, sizeof(struct r600_bc_alu));
2561 alu.inst = opcode;
2562 alu.predicate = 1;
2563
2564 alu.dst.sel = ctx->temp_reg;
2565 alu.dst.write = 1;
2566 alu.dst.chan = 0;
2567
2568 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2569 if (r)
2570 return r;
2571 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2572 alu.src[1].sel = V_SQ_ALU_SRC_0;
2573 alu.src[1].chan = 0;
2574
2575 alu.last = 1;
2576
2577 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2578 if (r)
2579 return r;
2580 return 0;
2581 }
2582
2583 static int pops(struct r600_shader_ctx *ctx, int pops)
2584 {
2585 int alu_pop = 3;
2586 if (ctx->bc->cf_last) {
2587 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2588 alu_pop = 0;
2589 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2590 alu_pop = 1;
2591 }
2592 alu_pop += pops;
2593 if (alu_pop == 1) {
2594 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2595 ctx->bc->force_add_cf = 1;
2596 } else if (alu_pop == 2) {
2597 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2598 ctx->bc->force_add_cf = 1;
2599 } else {
2600 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2601 ctx->bc->cf_last->pop_count = pops;
2602 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2603 }
2604 return 0;
2605 }
2606
2607 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2608 {
2609 switch(reason) {
2610 case FC_PUSH_VPM:
2611 ctx->bc->callstack[ctx->bc->call_sp].current--;
2612 break;
2613 case FC_PUSH_WQM:
2614 case FC_LOOP:
2615 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2616 break;
2617 case FC_REP:
2618 /* TOODO : for 16 vp asic should -= 2; */
2619 ctx->bc->callstack[ctx->bc->call_sp].current --;
2620 break;
2621 }
2622 }
2623
2624 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2625 {
2626 if (check_max_only) {
2627 int diff;
2628 switch (reason) {
2629 case FC_PUSH_VPM:
2630 diff = 1;
2631 break;
2632 case FC_PUSH_WQM:
2633 diff = 4;
2634 break;
2635 default:
2636 assert(0);
2637 diff = 0;
2638 }
2639 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2640 ctx->bc->callstack[ctx->bc->call_sp].max) {
2641 ctx->bc->callstack[ctx->bc->call_sp].max =
2642 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2643 }
2644 return;
2645 }
2646 switch (reason) {
2647 case FC_PUSH_VPM:
2648 ctx->bc->callstack[ctx->bc->call_sp].current++;
2649 break;
2650 case FC_PUSH_WQM:
2651 case FC_LOOP:
2652 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2653 break;
2654 case FC_REP:
2655 ctx->bc->callstack[ctx->bc->call_sp].current++;
2656 break;
2657 }
2658
2659 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2660 ctx->bc->callstack[ctx->bc->call_sp].max) {
2661 ctx->bc->callstack[ctx->bc->call_sp].max =
2662 ctx->bc->callstack[ctx->bc->call_sp].current;
2663 }
2664 }
2665
2666 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2667 {
2668 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2669
2670 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2671 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2672 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2673 sp->num_mid++;
2674 }
2675
2676 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2677 {
2678 ctx->bc->fc_sp++;
2679 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2680 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2681 }
2682
2683 static void fc_poplevel(struct r600_shader_ctx *ctx)
2684 {
2685 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2686 if (sp->mid) {
2687 free(sp->mid);
2688 sp->mid = NULL;
2689 }
2690 sp->num_mid = 0;
2691 sp->start = NULL;
2692 sp->type = 0;
2693 ctx->bc->fc_sp--;
2694 }
2695
2696 #if 0
2697 static int emit_return(struct r600_shader_ctx *ctx)
2698 {
2699 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2700 return 0;
2701 }
2702
2703 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2704 {
2705
2706 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2707 ctx->bc->cf_last->pop_count = pops;
2708 /* TODO work out offset */
2709 return 0;
2710 }
2711
2712 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2713 {
2714 return 0;
2715 }
2716
2717 static void emit_testflag(struct r600_shader_ctx *ctx)
2718 {
2719
2720 }
2721
2722 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2723 {
2724 emit_testflag(ctx);
2725 emit_jump_to_offset(ctx, 1, 4);
2726 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2727 pops(ctx, ifidx + 1);
2728 emit_return(ctx);
2729 }
2730
2731 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2732 {
2733 emit_testflag(ctx);
2734
2735 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2736 ctx->bc->cf_last->pop_count = 1;
2737
2738 fc_set_mid(ctx, fc_sp);
2739
2740 pops(ctx, 1);
2741 }
2742 #endif
2743
2744 static int tgsi_if(struct r600_shader_ctx *ctx)
2745 {
2746 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2747
2748 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2749
2750 fc_pushlevel(ctx, FC_IF);
2751
2752 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2753 return 0;
2754 }
2755
2756 static int tgsi_else(struct r600_shader_ctx *ctx)
2757 {
2758 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2759 ctx->bc->cf_last->pop_count = 1;
2760
2761 fc_set_mid(ctx, ctx->bc->fc_sp);
2762 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2763 return 0;
2764 }
2765
2766 static int tgsi_endif(struct r600_shader_ctx *ctx)
2767 {
2768 pops(ctx, 1);
2769 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2770 R600_ERR("if/endif unbalanced in shader\n");
2771 return -1;
2772 }
2773
2774 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2775 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2776 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2777 } else {
2778 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2779 }
2780 fc_poplevel(ctx);
2781
2782 callstack_decrease_current(ctx, FC_PUSH_VPM);
2783 return 0;
2784 }
2785
2786 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2787 {
2788 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2789
2790 fc_pushlevel(ctx, FC_LOOP);
2791
2792 /* check stack depth */
2793 callstack_check_depth(ctx, FC_LOOP, 0);
2794 return 0;
2795 }
2796
2797 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2798 {
2799 int i;
2800
2801 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2802
2803 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2804 R600_ERR("loop/endloop in shader code are not paired.\n");
2805 return -EINVAL;
2806 }
2807
2808 /* fixup loop pointers - from r600isa
2809 LOOP END points to CF after LOOP START,
2810 LOOP START point to CF after LOOP END
2811 BRK/CONT point to LOOP END CF
2812 */
2813 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2814
2815 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2816
2817 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2818 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2819 }
2820 /* TODO add LOOPRET support */
2821 fc_poplevel(ctx);
2822 callstack_decrease_current(ctx, FC_LOOP);
2823 return 0;
2824 }
2825
2826 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2827 {
2828 unsigned int fscp;
2829
2830 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2831 {
2832 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2833 break;
2834 }
2835
2836 if (fscp == 0) {
2837 R600_ERR("Break not inside loop/endloop pair\n");
2838 return -EINVAL;
2839 }
2840
2841 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2842 ctx->bc->cf_last->pop_count = 1;
2843
2844 fc_set_mid(ctx, fscp);
2845
2846 pops(ctx, 1);
2847 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2848 return 0;
2849 }
2850
2851 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2852 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2853 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2854 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2855
2856 /* FIXME:
2857 * For state trackers other than OpenGL, we'll want to use
2858 * _RECIP_IEEE instead.
2859 */
2860 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2861
2862 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2863 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2864 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2865 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2866 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2867 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2868 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2869 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2870 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2871 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2872 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2873 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2874 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2875 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2876 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2877 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878 /* gap */
2879 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881 /* gap */
2882 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2885 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2887 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2889 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2890 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2891 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2892 /* gap */
2893 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2895 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2897 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2898 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2899 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2900 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2901 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2907 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2909 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2910 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2911 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2912 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2914 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2916 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2923 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2927 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2928 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2929 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2930 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2933 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2934 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2935 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2936 /* gap */
2937 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2940 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2941 /* gap */
2942 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2950 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951 /* gap */
2952 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2961 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2962 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2964 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2966 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967 /* gap */
2968 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2973 /* gap */
2974 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2978 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2979 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2981 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2983 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2984 /* gap */
2985 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2986 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2987 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2988 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2997 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013 };
3014
3015 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3016 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3017 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3018 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3019 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3020 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3021 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3022 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3024 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3025 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3026 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3027 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3028 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3029 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3030 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3031 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3032 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3033 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3034 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3035 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036 /* gap */
3037 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039 /* gap */
3040 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3043 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3045 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3047 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3048 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3049 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3050 /* gap */
3051 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3053 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3055 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3056 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3057 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3058 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3059 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3065 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3067 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3068 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3069 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3070 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3072 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3074 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3081 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3085 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3086 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3087 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3088 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3091 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3092 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3093 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3094 /* gap */
3095 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3098 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3099 /* gap */
3100 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3108 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 /* gap */
3110 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3119 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3122 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3124 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125 /* gap */
3126 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131 /* gap */
3132 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3140 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3141 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3142 /* gap */
3143 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171 };