r600g: avoid useless shader rebuild at draw call
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_opcodes.h"
32 #include "r600d.h"
33 #include <stdio.h>
34 #include <errno.h>
35
36 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37 {
38 struct r600_pipe_state *rstate = &shader->rstate;
39 struct r600_shader *rshader = &shader->shader;
40 unsigned spi_vs_out_id[10];
41 unsigned i, tmp;
42
43 /* clear previous register */
44 rstate->nregs = 0;
45
46 /* so far never got proper semantic id from tgsi */
47 /* FIXME better to move this in config things so they get emited
48 * only one time per cs
49 */
50 for (i = 0; i < 10; i++) {
51 spi_vs_out_id[i] = 0;
52 }
53 for (i = 0; i < 32; i++) {
54 tmp = i << ((i & 3) * 8);
55 spi_vs_out_id[i / 4] |= tmp;
56 }
57 for (i = 0; i < 10; i++) {
58 r600_pipe_state_add_reg(rstate,
59 R_028614_SPI_VS_OUT_ID_0 + i * 4,
60 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61 }
62
63 r600_pipe_state_add_reg(rstate,
64 R_0286C4_SPI_VS_OUT_CONFIG,
65 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66 0xFFFFFFFF, NULL);
67 r600_pipe_state_add_reg(rstate,
68 R_028868_SQ_PGM_RESOURCES_VS,
69 S_028868_NUM_GPRS(rshader->bc.ngpr) |
70 S_028868_STACK_SIZE(rshader->bc.nstack),
71 0xFFFFFFFF, NULL);
72 r600_pipe_state_add_reg(rstate,
73 R_0288D0_SQ_PGM_CF_OFFSET_VS,
74 0x00000000, 0xFFFFFFFF, NULL);
75 r600_pipe_state_add_reg(rstate,
76 R_028858_SQ_PGM_START_VS,
77 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79 #if 0
80 r600_pipe_state_add_reg(rstate,
81 R_0288A4_SQ_PGM_RESOURCES_FS,
82 0x00000000, 0xFFFFFFFF, NULL);
83 r600_pipe_state_add_reg(rstate,
84 R_0288DC_SQ_PGM_CF_OFFSET_FS,
85 0x00000000, 0xFFFFFFFF, NULL);
86 r600_pipe_state_add_reg(rstate,
87 R_028894_SQ_PGM_START_FS,
88 r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
89 #endif
90 r600_pipe_state_add_reg(rstate,
91 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
92 0xFFFFFFFF, NULL);
93
94 }
95
96 int r600_find_vs_semantic_index(struct r600_shader *vs,
97 struct r600_shader *ps, int id)
98 {
99 struct r600_shader_io *input = &ps->input[id];
100
101 for (int i = 0; i < vs->noutput; i++) {
102 if (input->name == vs->output[i].name &&
103 input->sid == vs->output[i].sid) {
104 return i - 1;
105 }
106 }
107 return 0;
108 }
109
110 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
111 {
112 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
113 struct r600_pipe_state *rstate = &shader->rstate;
114 struct r600_shader *rshader = &shader->shader;
115 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
116 int pos_index = -1, face_index = -1;
117
118 rstate->nregs = 0;
119
120 for (i = 0; i < rshader->ninput; i++) {
121 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
122 pos_index = i;
123 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
124 face_index = i;
125 }
126
127 for (i = 0; i < rshader->noutput; i++) {
128 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
129 r600_pipe_state_add_reg(rstate,
130 R_02880C_DB_SHADER_CONTROL,
131 S_02880C_Z_EXPORT_ENABLE(1),
132 S_02880C_Z_EXPORT_ENABLE(1), NULL);
133 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
134 r600_pipe_state_add_reg(rstate,
135 R_02880C_DB_SHADER_CONTROL,
136 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
137 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
138 }
139
140 exports_ps = 0;
141 num_cout = 0;
142 for (i = 0; i < rshader->noutput; i++) {
143 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
144 exports_ps |= 1;
145 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
146 num_cout++;
147 }
148 }
149 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
150 if (!exports_ps) {
151 /* always at least export 1 component per pixel */
152 exports_ps = 2;
153 }
154
155 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
156 S_0286CC_PERSP_GRADIENT_ENA(1);
157 spi_input_z = 0;
158 if (pos_index != -1) {
159 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
160 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
161 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
162 S_0286CC_BARYC_SAMPLE_CNTL(1));
163 spi_input_z |= 1;
164 }
165
166 spi_ps_in_control_1 = 0;
167 if (face_index != -1) {
168 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
169 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
170 }
171
172 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
173 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
174 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
175 r600_pipe_state_add_reg(rstate,
176 R_028840_SQ_PGM_START_PS,
177 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
178 r600_pipe_state_add_reg(rstate,
179 R_028850_SQ_PGM_RESOURCES_PS,
180 S_028868_NUM_GPRS(rshader->bc.ngpr) |
181 S_028868_STACK_SIZE(rshader->bc.nstack),
182 0xFFFFFFFF, NULL);
183 r600_pipe_state_add_reg(rstate,
184 R_028854_SQ_PGM_EXPORTS_PS,
185 exports_ps, 0xFFFFFFFF, NULL);
186 r600_pipe_state_add_reg(rstate,
187 R_0288CC_SQ_PGM_CF_OFFSET_PS,
188 0x00000000, 0xFFFFFFFF, NULL);
189
190 if (rshader->uses_kill) {
191 /* only set some bits here, the other bits are set in the dsa state */
192 r600_pipe_state_add_reg(rstate,
193 R_02880C_DB_SHADER_CONTROL,
194 S_02880C_KILL_ENABLE(1),
195 S_02880C_KILL_ENABLE(1), NULL);
196 }
197 r600_pipe_state_add_reg(rstate,
198 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
199 0xFFFFFFFF, NULL);
200 }
201
202 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
203 {
204 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
205 struct r600_shader *rshader = &shader->shader;
206 void *ptr;
207
208 /* copy new shader */
209 if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
210 shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
211 if (shader->bo_fetch == NULL) {
212 return -ENOMEM;
213 }
214 ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
215 memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
216 r600_bo_unmap(rctx->radeon, shader->bo_fetch);
217 }
218 if (shader->bo == NULL) {
219 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
220 if (shader->bo == NULL) {
221 return -ENOMEM;
222 }
223 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
224 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
225 r600_bo_unmap(rctx->radeon, shader->bo);
226 }
227 /* build state */
228 switch (rshader->processor_type) {
229 case TGSI_PROCESSOR_VERTEX:
230 if (rshader->family >= CHIP_CEDAR) {
231 evergreen_pipe_shader_vs(ctx, shader);
232 } else {
233 r600_pipe_shader_vs(ctx, shader);
234 }
235 break;
236 case TGSI_PROCESSOR_FRAGMENT:
237 if (rshader->family >= CHIP_CEDAR) {
238 evergreen_pipe_shader_ps(ctx, shader);
239 } else {
240 r600_pipe_shader_ps(ctx, shader);
241 }
242 break;
243 default:
244 return -EINVAL;
245 }
246 return 0;
247 }
248
249 static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
250 {
251 #if 0
252 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
253 struct r600_shader *shader = &rshader->shader;
254 const struct util_format_description *desc;
255 enum pipe_format resource_format[160];
256 unsigned i, nresources = 0;
257 struct r600_bc *bc = &shader->bc_fetch;
258 struct r600_bc_cf *cf;
259 struct r600_bc_vtx *vtx;
260
261 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
262 return 0;
263 /* doing a full memcmp fell over the refcount */
264 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
265 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
266 rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
267 return 0;
268 }
269 rshader->vertex_elements = *rctx->vertex_elements;
270 for (i = 0; i < rctx->vertex_elements->count; i++) {
271 resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
272 }
273 r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
274 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
275 switch (cf->inst) {
276 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
277 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
278 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
279 desc = util_format_description(resource_format[vtx->buffer_id]);
280 if (desc == NULL) {
281 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
282 return -EINVAL;
283 }
284 vtx->dst_sel_x = desc->swizzle[0];
285 vtx->dst_sel_y = desc->swizzle[1];
286 vtx->dst_sel_z = desc->swizzle[2];
287 vtx->dst_sel_w = desc->swizzle[3];
288 }
289 break;
290 default:
291 break;
292 }
293 }
294 return r600_bc_build(&shader->bc_fetch);
295 #else
296 return 0;
297 #endif
298 }
299
300 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
301 {
302 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
303 int r;
304
305 return r600_pipe_shader(ctx, shader);
306 }
307
308 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
309 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
310 {
311 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
312 int r;
313
314 //fprintf(stderr, "--------------------------------------------------------------\n");
315 //tgsi_dump(tokens, 0);
316 shader->shader.family = r600_get_family(rctx->radeon);
317 r = r600_shader_from_tgsi(tokens, &shader->shader);
318 if (r) {
319 R600_ERR("translation from TGSI failed !\n");
320 return r;
321 }
322 r = r600_bc_build(&shader->shader.bc);
323 if (r) {
324 R600_ERR("building bytecode failed !\n");
325 return r;
326 }
327 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
328 r = r600_bc_build(&shader->shader.bc_fetch);
329 if (r) {
330 R600_ERR("building bytecode failed !\n");
331 return r;
332 }
333 }
334 //r600_bc_dump(&shader->shader.bc);
335 //fprintf(stderr, "______________________________________________________________\n");
336 return r600_pipe_shader(ctx, shader);
337 }
338
339 void
340 r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
341 {
342 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
343
344 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
345 r600_bo_reference(rctx->radeon, &shader->bo_fetch, NULL);
346 r600_bc_clear(&shader->shader.bc_fetch);
347 }
348
349 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
350
351 r600_bc_clear(&shader->shader.bc);
352
353 /* FIXME: is there more stuff to free? */
354 }
355
356 /*
357 * tgsi -> r600 shader
358 */
359 struct r600_shader_tgsi_instruction;
360
361 struct r600_shader_ctx {
362 struct tgsi_shader_info info;
363 struct tgsi_parse_context parse;
364 const struct tgsi_token *tokens;
365 unsigned type;
366 unsigned file_offset[TGSI_FILE_COUNT];
367 unsigned temp_reg;
368 struct r600_shader_tgsi_instruction *inst_info;
369 struct r600_bc *bc;
370 struct r600_bc *bc_fetch;
371 struct r600_shader *shader;
372 u32 value[4];
373 u32 *literals;
374 u32 nliterals;
375 u32 max_driver_temp_used;
376 /* needed for evergreen interpolation */
377 boolean input_centroid;
378 boolean input_linear;
379 boolean input_perspective;
380 int num_interp_gpr;
381 };
382
383 struct r600_shader_tgsi_instruction {
384 unsigned tgsi_opcode;
385 unsigned is_op3;
386 unsigned r600_opcode;
387 int (*process)(struct r600_shader_ctx *ctx);
388 };
389
390 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
391 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
392
393 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
394 {
395 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
396 int j;
397
398 if (i->Instruction.NumDstRegs > 1) {
399 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
400 return -EINVAL;
401 }
402 if (i->Instruction.Predicate) {
403 R600_ERR("predicate unsupported\n");
404 return -EINVAL;
405 }
406 #if 0
407 if (i->Instruction.Label) {
408 R600_ERR("label unsupported\n");
409 return -EINVAL;
410 }
411 #endif
412 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
413 if (i->Src[j].Register.Dimension) {
414 R600_ERR("unsupported src %d (dimension %d)\n", j,
415 i->Src[j].Register.Dimension);
416 return -EINVAL;
417 }
418 }
419 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
420 if (i->Dst[j].Register.Dimension) {
421 R600_ERR("unsupported dst (dimension)\n");
422 return -EINVAL;
423 }
424 }
425 return 0;
426 }
427
428 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
429 {
430 int i, r;
431 struct r600_bc_alu alu;
432 int gpr = 0, base_chan = 0;
433 int ij_index = 0;
434
435 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
436 ij_index = 0;
437 if (ctx->shader->input[input].centroid)
438 ij_index++;
439 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
440 ij_index = 0;
441 /* if we have perspective add one */
442 if (ctx->input_perspective) {
443 ij_index++;
444 /* if we have perspective centroid */
445 if (ctx->input_centroid)
446 ij_index++;
447 }
448 if (ctx->shader->input[input].centroid)
449 ij_index++;
450 }
451
452 /* work out gpr and base_chan from index */
453 gpr = ij_index / 2;
454 base_chan = (2 * (ij_index % 2)) + 1;
455
456 for (i = 0; i < 8; i++) {
457 memset(&alu, 0, sizeof(struct r600_bc_alu));
458
459 if (i < 4)
460 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
461 else
462 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
463
464 if ((i > 1) && (i < 6)) {
465 alu.dst.sel = ctx->shader->input[input].gpr;
466 alu.dst.write = 1;
467 }
468
469 alu.dst.chan = i % 4;
470
471 alu.src[0].sel = gpr;
472 alu.src[0].chan = (base_chan - (i % 2));
473
474 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
475
476 alu.bank_swizzle_force = SQ_ALU_VEC_210;
477 if ((i % 4) == 3)
478 alu.last = 1;
479 r = r600_bc_add_alu(ctx->bc, &alu);
480 if (r)
481 return r;
482 }
483 return 0;
484 }
485
486
487 static int tgsi_declaration(struct r600_shader_ctx *ctx)
488 {
489 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
490 struct r600_bc_vtx vtx;
491 unsigned i;
492 int r;
493
494 switch (d->Declaration.File) {
495 case TGSI_FILE_INPUT:
496 i = ctx->shader->ninput++;
497 ctx->shader->input[i].name = d->Semantic.Name;
498 ctx->shader->input[i].sid = d->Semantic.Index;
499 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
500 ctx->shader->input[i].centroid = d->Declaration.Centroid;
501 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
502 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
503 /* turn input into fetch */
504 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
505 vtx.inst = 0;
506 vtx.fetch_type = 0;
507 vtx.buffer_id = i;
508 /* register containing the index into the buffer */
509 vtx.src_gpr = 0;
510 vtx.src_sel_x = 0;
511 vtx.mega_fetch_count = 0x1F;
512 vtx.dst_gpr = ctx->shader->input[i].gpr;
513 vtx.dst_sel_x = 0;
514 vtx.dst_sel_y = 1;
515 vtx.dst_sel_z = 2;
516 vtx.dst_sel_w = 3;
517 vtx.use_const_fields = 1;
518 r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
519 if (r)
520 return r;
521 }
522 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
523 /* turn input into interpolate on EG */
524 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
525 if (ctx->shader->input[i].interpolate > 0) {
526 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
527 evergreen_interp_alu(ctx, i);
528 }
529 }
530 }
531 break;
532 case TGSI_FILE_OUTPUT:
533 i = ctx->shader->noutput++;
534 ctx->shader->output[i].name = d->Semantic.Name;
535 ctx->shader->output[i].sid = d->Semantic.Index;
536 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
537 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
538 break;
539 case TGSI_FILE_CONSTANT:
540 case TGSI_FILE_TEMPORARY:
541 case TGSI_FILE_SAMPLER:
542 case TGSI_FILE_ADDRESS:
543 break;
544 default:
545 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
546 return -EINVAL;
547 }
548 return 0;
549 }
550
551 static int r600_get_temp(struct r600_shader_ctx *ctx)
552 {
553 return ctx->temp_reg + ctx->max_driver_temp_used++;
554 }
555
556 /*
557 * for evergreen we need to scan the shader to find the number of GPRs we need to
558 * reserve for interpolation.
559 *
560 * we need to know if we are going to emit
561 * any centroid inputs
562 * if perspective and linear are required
563 */
564 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
565 {
566 int i;
567 int num_baryc;
568
569 ctx->input_linear = FALSE;
570 ctx->input_perspective = FALSE;
571 ctx->input_centroid = FALSE;
572 ctx->num_interp_gpr = 1;
573
574 /* any centroid inputs */
575 for (i = 0; i < ctx->info.num_inputs; i++) {
576 /* skip position/face */
577 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
578 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
579 continue;
580 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
581 ctx->input_linear = TRUE;
582 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
583 ctx->input_perspective = TRUE;
584 if (ctx->info.input_centroid[i])
585 ctx->input_centroid = TRUE;
586 }
587
588 num_baryc = 0;
589 /* ignoring sample for now */
590 if (ctx->input_perspective)
591 num_baryc++;
592 if (ctx->input_linear)
593 num_baryc++;
594 if (ctx->input_centroid)
595 num_baryc *= 2;
596
597 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
598
599 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
600 return ctx->num_interp_gpr;
601 }
602
603 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
604 {
605 struct tgsi_full_immediate *immediate;
606 struct r600_shader_ctx ctx;
607 struct r600_bc_output output[32];
608 unsigned output_done, noutput;
609 unsigned opcode;
610 int i, r = 0, pos0;
611
612 ctx.bc = &shader->bc;
613 ctx.bc_fetch = &shader->bc_fetch;
614 ctx.shader = shader;
615 r = r600_bc_init(ctx.bc, shader->family);
616 if (r)
617 return r;
618 ctx.tokens = tokens;
619 tgsi_scan_shader(tokens, &ctx.info);
620 tgsi_parse_init(&ctx.parse, tokens);
621 ctx.type = ctx.parse.FullHeader.Processor.Processor;
622 shader->processor_type = ctx.type;
623 if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
624 r = r600_bc_init(ctx.bc_fetch, shader->family);
625 if (r)
626 return r;
627 ctx.bc_fetch->type = -1;
628 }
629 ctx.bc->type = shader->processor_type;
630
631 /* register allocations */
632 /* Values [0,127] correspond to GPR[0..127].
633 * Values [128,159] correspond to constant buffer bank 0
634 * Values [160,191] correspond to constant buffer bank 1
635 * Values [256,511] correspond to cfile constants c[0..255].
636 * Other special values are shown in the list below.
637 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
638 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
639 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
640 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
641 * 248 SQ_ALU_SRC_0: special constant 0.0.
642 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
643 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
644 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
645 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
646 * 253 SQ_ALU_SRC_LITERAL: literal constant.
647 * 254 SQ_ALU_SRC_PV: previous vector result.
648 * 255 SQ_ALU_SRC_PS: previous scalar result.
649 */
650 for (i = 0; i < TGSI_FILE_COUNT; i++) {
651 ctx.file_offset[i] = 0;
652 }
653 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
654 ctx.file_offset[TGSI_FILE_INPUT] = 1;
655 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
656 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
657 } else {
658 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
659 }
660 }
661 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
662 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
663 }
664 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
665 ctx.info.file_count[TGSI_FILE_INPUT];
666 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
667 ctx.info.file_count[TGSI_FILE_OUTPUT];
668
669 ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
670
671 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
672 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
673 ctx.info.file_count[TGSI_FILE_TEMPORARY];
674
675 ctx.nliterals = 0;
676 ctx.literals = NULL;
677
678 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
679 tgsi_parse_token(&ctx.parse);
680 switch (ctx.parse.FullToken.Token.Type) {
681 case TGSI_TOKEN_TYPE_IMMEDIATE:
682 immediate = &ctx.parse.FullToken.FullImmediate;
683 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
684 if(ctx.literals == NULL) {
685 r = -ENOMEM;
686 goto out_err;
687 }
688 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
689 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
690 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
691 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
692 ctx.nliterals++;
693 break;
694 case TGSI_TOKEN_TYPE_DECLARATION:
695 r = tgsi_declaration(&ctx);
696 if (r)
697 goto out_err;
698 break;
699 case TGSI_TOKEN_TYPE_INSTRUCTION:
700 r = tgsi_is_supported(&ctx);
701 if (r)
702 goto out_err;
703 ctx.max_driver_temp_used = 0;
704 /* reserve first tmp for everyone */
705 r600_get_temp(&ctx);
706 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
707 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
708 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
709 else
710 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
711 r = ctx.inst_info->process(&ctx);
712 if (r)
713 goto out_err;
714 r = r600_bc_add_literal(ctx.bc, ctx.value);
715 if (r)
716 goto out_err;
717 break;
718 default:
719 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
720 r = -EINVAL;
721 goto out_err;
722 }
723 }
724 /* export output */
725 noutput = shader->noutput;
726 for (i = 0, pos0 = 0; i < noutput; i++) {
727 memset(&output[i], 0, sizeof(struct r600_bc_output));
728 output[i].gpr = shader->output[i].gpr;
729 output[i].elem_size = 3;
730 output[i].swizzle_x = 0;
731 output[i].swizzle_y = 1;
732 output[i].swizzle_z = 2;
733 output[i].swizzle_w = 3;
734 output[i].barrier = 1;
735 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
736 output[i].array_base = i - pos0;
737 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
738 switch (ctx.type) {
739 case TGSI_PROCESSOR_VERTEX:
740 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
741 output[i].array_base = 60;
742 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
743 /* position doesn't count in array_base */
744 pos0++;
745 }
746 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
747 output[i].array_base = 61;
748 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
749 /* position doesn't count in array_base */
750 pos0++;
751 }
752 break;
753 case TGSI_PROCESSOR_FRAGMENT:
754 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
755 output[i].array_base = shader->output[i].sid;
756 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
757 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
758 output[i].array_base = 61;
759 output[i].swizzle_x = 2;
760 output[i].swizzle_y = 7;
761 output[i].swizzle_z = output[i].swizzle_w = 7;
762 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
763 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
764 output[i].array_base = 61;
765 output[i].swizzle_x = 7;
766 output[i].swizzle_y = 1;
767 output[i].swizzle_z = output[i].swizzle_w = 7;
768 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
769 } else {
770 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
771 r = -EINVAL;
772 goto out_err;
773 }
774 break;
775 default:
776 R600_ERR("unsupported processor type %d\n", ctx.type);
777 r = -EINVAL;
778 goto out_err;
779 }
780 }
781 /* add fake param output for vertex shader if no param is exported */
782 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
783 for (i = 0, pos0 = 0; i < noutput; i++) {
784 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
785 pos0 = 1;
786 break;
787 }
788 }
789 if (!pos0) {
790 memset(&output[i], 0, sizeof(struct r600_bc_output));
791 output[i].gpr = 0;
792 output[i].elem_size = 3;
793 output[i].swizzle_x = 0;
794 output[i].swizzle_y = 1;
795 output[i].swizzle_z = 2;
796 output[i].swizzle_w = 3;
797 output[i].barrier = 1;
798 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
799 output[i].array_base = 0;
800 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
801 noutput++;
802 }
803 }
804 /* add fake pixel export */
805 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
806 memset(&output[0], 0, sizeof(struct r600_bc_output));
807 output[0].gpr = 0;
808 output[0].elem_size = 3;
809 output[0].swizzle_x = 7;
810 output[0].swizzle_y = 7;
811 output[0].swizzle_z = 7;
812 output[0].swizzle_w = 7;
813 output[0].barrier = 1;
814 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
815 output[0].array_base = 0;
816 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
817 noutput++;
818 }
819 /* set export done on last export of each type */
820 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
821 if (i == (noutput - 1)) {
822 output[i].end_of_program = 1;
823 }
824 if (!(output_done & (1 << output[i].type))) {
825 output_done |= (1 << output[i].type);
826 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
827 }
828 }
829 /* add return to fetch shader */
830 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
831 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
832 r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
833 } else {
834 r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
835 }
836 }
837 /* add output to bytecode */
838 for (i = 0; i < noutput; i++) {
839 r = r600_bc_add_output(ctx.bc, &output[i]);
840 if (r)
841 goto out_err;
842 }
843 free(ctx.literals);
844 tgsi_parse_free(&ctx.parse);
845 return 0;
846 out_err:
847 free(ctx.literals);
848 tgsi_parse_free(&ctx.parse);
849 return r;
850 }
851
852 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
853 {
854 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
855 return -EINVAL;
856 }
857
858 static int tgsi_end(struct r600_shader_ctx *ctx)
859 {
860 return 0;
861 }
862
863 static int tgsi_src(struct r600_shader_ctx *ctx,
864 const struct tgsi_full_src_register *tgsi_src,
865 struct r600_bc_alu_src *r600_src)
866 {
867 int index;
868 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
869 r600_src->sel = tgsi_src->Register.Index;
870 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
871 r600_src->sel = 0;
872 index = tgsi_src->Register.Index;
873 ctx->value[0] = ctx->literals[index * 4 + 0];
874 ctx->value[1] = ctx->literals[index * 4 + 1];
875 ctx->value[2] = ctx->literals[index * 4 + 2];
876 ctx->value[3] = ctx->literals[index * 4 + 3];
877 }
878 if (tgsi_src->Register.Indirect)
879 r600_src->rel = V_SQ_REL_RELATIVE;
880 r600_src->neg = tgsi_src->Register.Negate;
881 r600_src->abs = tgsi_src->Register.Absolute;
882 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
883 return 0;
884 }
885
886 static int tgsi_dst(struct r600_shader_ctx *ctx,
887 const struct tgsi_full_dst_register *tgsi_dst,
888 unsigned swizzle,
889 struct r600_bc_alu_dst *r600_dst)
890 {
891 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
892
893 r600_dst->sel = tgsi_dst->Register.Index;
894 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
895 r600_dst->chan = swizzle;
896 r600_dst->write = 1;
897 if (tgsi_dst->Register.Indirect)
898 r600_dst->rel = V_SQ_REL_RELATIVE;
899 if (inst->Instruction.Saturate) {
900 r600_dst->clamp = 1;
901 }
902 return 0;
903 }
904
905 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
906 {
907 switch (swizzle) {
908 case 0:
909 return tgsi_src->Register.SwizzleX;
910 case 1:
911 return tgsi_src->Register.SwizzleY;
912 case 2:
913 return tgsi_src->Register.SwizzleZ;
914 case 3:
915 return tgsi_src->Register.SwizzleW;
916 default:
917 return 0;
918 }
919 }
920
921 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
922 {
923 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
924 struct r600_bc_alu alu;
925 int i, j, k, nconst, r;
926
927 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
928 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
929 nconst++;
930 }
931 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
932 if (r) {
933 return r;
934 }
935 }
936 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
937 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
938 int treg = r600_get_temp(ctx);
939 for (k = 0; k < 4; k++) {
940 memset(&alu, 0, sizeof(struct r600_bc_alu));
941 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
942 alu.src[0].sel = r600_src[i].sel;
943 alu.src[0].chan = k;
944 alu.src[0].rel = r600_src[i].rel;
945 alu.dst.sel = treg;
946 alu.dst.chan = k;
947 alu.dst.write = 1;
948 if (k == 3)
949 alu.last = 1;
950 r = r600_bc_add_alu(ctx->bc, &alu);
951 if (r)
952 return r;
953 }
954 r600_src[i].sel = treg;
955 r600_src[i].rel =0;
956 j--;
957 }
958 }
959 return 0;
960 }
961
962 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
963 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
964 {
965 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
966 struct r600_bc_alu alu;
967 int i, j, k, nliteral, r;
968
969 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
970 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
971 nliteral++;
972 }
973 }
974 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
975 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
976 int treg = r600_get_temp(ctx);
977 for (k = 0; k < 4; k++) {
978 memset(&alu, 0, sizeof(struct r600_bc_alu));
979 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
980 alu.src[0].sel = r600_src[i].sel;
981 alu.src[0].chan = k;
982 alu.dst.sel = treg;
983 alu.dst.chan = k;
984 alu.dst.write = 1;
985 if (k == 3)
986 alu.last = 1;
987 r = r600_bc_add_alu(ctx->bc, &alu);
988 if (r)
989 return r;
990 }
991 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
992 if (r)
993 return r;
994 r600_src[i].sel = treg;
995 j--;
996 }
997 }
998 return 0;
999 }
1000
1001 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1002 {
1003 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1004 struct r600_bc_alu_src r600_src[3];
1005 struct r600_bc_alu alu;
1006 int i, j, r;
1007 int lasti = 0;
1008
1009 for (i = 0; i < 4; i++) {
1010 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1011 lasti = i;
1012 }
1013 }
1014
1015 r = tgsi_split_constant(ctx, r600_src);
1016 if (r)
1017 return r;
1018 r = tgsi_split_literal_constant(ctx, r600_src);
1019 if (r)
1020 return r;
1021 for (i = 0; i < lasti + 1; i++) {
1022 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1023 continue;
1024
1025 memset(&alu, 0, sizeof(struct r600_bc_alu));
1026 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1027 if (r)
1028 return r;
1029
1030 alu.inst = ctx->inst_info->r600_opcode;
1031 if (!swap) {
1032 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1033 alu.src[j] = r600_src[j];
1034 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1035 }
1036 } else {
1037 alu.src[0] = r600_src[1];
1038 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1039
1040 alu.src[1] = r600_src[0];
1041 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1042 }
1043 /* handle some special cases */
1044 switch (ctx->inst_info->tgsi_opcode) {
1045 case TGSI_OPCODE_SUB:
1046 alu.src[1].neg = 1;
1047 break;
1048 case TGSI_OPCODE_ABS:
1049 alu.src[0].abs = 1;
1050 break;
1051 default:
1052 break;
1053 }
1054 if (i == lasti) {
1055 alu.last = 1;
1056 }
1057 r = r600_bc_add_alu(ctx->bc, &alu);
1058 if (r)
1059 return r;
1060 }
1061 return 0;
1062 }
1063
1064 static int tgsi_op2(struct r600_shader_ctx *ctx)
1065 {
1066 return tgsi_op2_s(ctx, 0);
1067 }
1068
1069 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1070 {
1071 return tgsi_op2_s(ctx, 1);
1072 }
1073
1074 /*
1075 * r600 - trunc to -PI..PI range
1076 * r700 - normalize by dividing by 2PI
1077 * see fdo bug 27901
1078 */
1079 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1080 struct r600_bc_alu_src r600_src[3])
1081 {
1082 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1083 int r;
1084 uint32_t lit_vals[4];
1085 struct r600_bc_alu alu;
1086
1087 memset(lit_vals, 0, 4*4);
1088 r = tgsi_split_constant(ctx, r600_src);
1089 if (r)
1090 return r;
1091 r = tgsi_split_literal_constant(ctx, r600_src);
1092 if (r)
1093 return r;
1094
1095 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1096 lit_vals[1] = fui(0.5f);
1097
1098 memset(&alu, 0, sizeof(struct r600_bc_alu));
1099 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1100 alu.is_op3 = 1;
1101
1102 alu.dst.chan = 0;
1103 alu.dst.sel = ctx->temp_reg;
1104 alu.dst.write = 1;
1105
1106 alu.src[0] = r600_src[0];
1107 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1108
1109 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1110 alu.src[1].chan = 0;
1111 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1112 alu.src[2].chan = 1;
1113 alu.last = 1;
1114 r = r600_bc_add_alu(ctx->bc, &alu);
1115 if (r)
1116 return r;
1117 r = r600_bc_add_literal(ctx->bc, lit_vals);
1118 if (r)
1119 return r;
1120
1121 memset(&alu, 0, sizeof(struct r600_bc_alu));
1122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1123
1124 alu.dst.chan = 0;
1125 alu.dst.sel = ctx->temp_reg;
1126 alu.dst.write = 1;
1127
1128 alu.src[0].sel = ctx->temp_reg;
1129 alu.src[0].chan = 0;
1130 alu.last = 1;
1131 r = r600_bc_add_alu(ctx->bc, &alu);
1132 if (r)
1133 return r;
1134
1135 if (ctx->bc->chiprev == CHIPREV_R600) {
1136 lit_vals[0] = fui(3.1415926535897f * 2.0f);
1137 lit_vals[1] = fui(-3.1415926535897f);
1138 } else {
1139 lit_vals[0] = fui(1.0f);
1140 lit_vals[1] = fui(-0.5f);
1141 }
1142
1143 memset(&alu, 0, sizeof(struct r600_bc_alu));
1144 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1145 alu.is_op3 = 1;
1146
1147 alu.dst.chan = 0;
1148 alu.dst.sel = ctx->temp_reg;
1149 alu.dst.write = 1;
1150
1151 alu.src[0].sel = ctx->temp_reg;
1152 alu.src[0].chan = 0;
1153
1154 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1155 alu.src[1].chan = 0;
1156 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1157 alu.src[2].chan = 1;
1158 alu.last = 1;
1159 r = r600_bc_add_alu(ctx->bc, &alu);
1160 if (r)
1161 return r;
1162 r = r600_bc_add_literal(ctx->bc, lit_vals);
1163 if (r)
1164 return r;
1165 return 0;
1166 }
1167
1168 static int tgsi_trig(struct r600_shader_ctx *ctx)
1169 {
1170 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1171 struct r600_bc_alu_src r600_src[3];
1172 struct r600_bc_alu alu;
1173 int i, r;
1174 int lasti = 0;
1175
1176 r = tgsi_setup_trig(ctx, r600_src);
1177 if (r)
1178 return r;
1179
1180 memset(&alu, 0, sizeof(struct r600_bc_alu));
1181 alu.inst = ctx->inst_info->r600_opcode;
1182 alu.dst.chan = 0;
1183 alu.dst.sel = ctx->temp_reg;
1184 alu.dst.write = 1;
1185
1186 alu.src[0].sel = ctx->temp_reg;
1187 alu.src[0].chan = 0;
1188 alu.last = 1;
1189 r = r600_bc_add_alu(ctx->bc, &alu);
1190 if (r)
1191 return r;
1192
1193 /* replicate result */
1194 for (i = 0; i < 4; i++) {
1195 if (inst->Dst[0].Register.WriteMask & (1 << i))
1196 lasti = i;
1197 }
1198 for (i = 0; i < lasti + 1; i++) {
1199 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1200 continue;
1201
1202 memset(&alu, 0, sizeof(struct r600_bc_alu));
1203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1204
1205 alu.src[0].sel = ctx->temp_reg;
1206 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1207 if (r)
1208 return r;
1209 if (i == lasti)
1210 alu.last = 1;
1211 r = r600_bc_add_alu(ctx->bc, &alu);
1212 if (r)
1213 return r;
1214 }
1215 return 0;
1216 }
1217
1218 static int tgsi_scs(struct r600_shader_ctx *ctx)
1219 {
1220 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1221 struct r600_bc_alu_src r600_src[3];
1222 struct r600_bc_alu alu;
1223 int r;
1224
1225 /* We'll only need the trig stuff if we are going to write to the
1226 * X or Y components of the destination vector.
1227 */
1228 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1229 r = tgsi_setup_trig(ctx, r600_src);
1230 if (r)
1231 return r;
1232 }
1233
1234 /* dst.x = COS */
1235 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1236 memset(&alu, 0, sizeof(struct r600_bc_alu));
1237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1238 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1239 if (r)
1240 return r;
1241
1242 alu.src[0].sel = ctx->temp_reg;
1243 alu.src[0].chan = 0;
1244 alu.last = 1;
1245 r = r600_bc_add_alu(ctx->bc, &alu);
1246 if (r)
1247 return r;
1248 }
1249
1250 /* dst.y = SIN */
1251 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1252 memset(&alu, 0, sizeof(struct r600_bc_alu));
1253 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1254 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1255 if (r)
1256 return r;
1257
1258 alu.src[0].sel = ctx->temp_reg;
1259 alu.src[0].chan = 0;
1260 alu.last = 1;
1261 r = r600_bc_add_alu(ctx->bc, &alu);
1262 if (r)
1263 return r;
1264 }
1265
1266 /* dst.z = 0.0; */
1267 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1268 memset(&alu, 0, sizeof(struct r600_bc_alu));
1269
1270 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1271
1272 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1273 if (r)
1274 return r;
1275
1276 alu.src[0].sel = V_SQ_ALU_SRC_0;
1277 alu.src[0].chan = 0;
1278
1279 alu.last = 1;
1280
1281 r = r600_bc_add_alu(ctx->bc, &alu);
1282 if (r)
1283 return r;
1284
1285 r = r600_bc_add_literal(ctx->bc, ctx->value);
1286 if (r)
1287 return r;
1288 }
1289
1290 /* dst.w = 1.0; */
1291 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1292 memset(&alu, 0, sizeof(struct r600_bc_alu));
1293
1294 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1295
1296 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1297 if (r)
1298 return r;
1299
1300 alu.src[0].sel = V_SQ_ALU_SRC_1;
1301 alu.src[0].chan = 0;
1302
1303 alu.last = 1;
1304
1305 r = r600_bc_add_alu(ctx->bc, &alu);
1306 if (r)
1307 return r;
1308
1309 r = r600_bc_add_literal(ctx->bc, ctx->value);
1310 if (r)
1311 return r;
1312 }
1313
1314 return 0;
1315 }
1316
1317 static int tgsi_kill(struct r600_shader_ctx *ctx)
1318 {
1319 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1320 struct r600_bc_alu alu;
1321 int i, r;
1322
1323 for (i = 0; i < 4; i++) {
1324 memset(&alu, 0, sizeof(struct r600_bc_alu));
1325 alu.inst = ctx->inst_info->r600_opcode;
1326
1327 alu.dst.chan = i;
1328
1329 alu.src[0].sel = V_SQ_ALU_SRC_0;
1330
1331 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1332 alu.src[1].sel = V_SQ_ALU_SRC_1;
1333 alu.src[1].neg = 1;
1334 } else {
1335 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1336 if (r)
1337 return r;
1338 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1339 }
1340 if (i == 3) {
1341 alu.last = 1;
1342 }
1343 r = r600_bc_add_alu(ctx->bc, &alu);
1344 if (r)
1345 return r;
1346 }
1347 r = r600_bc_add_literal(ctx->bc, ctx->value);
1348 if (r)
1349 return r;
1350
1351 /* kill must be last in ALU */
1352 ctx->bc->force_add_cf = 1;
1353 ctx->shader->uses_kill = TRUE;
1354 return 0;
1355 }
1356
1357 static int tgsi_lit(struct r600_shader_ctx *ctx)
1358 {
1359 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1360 struct r600_bc_alu alu;
1361 struct r600_bc_alu_src r600_src[3];
1362 int r;
1363
1364 r = tgsi_split_constant(ctx, r600_src);
1365 if (r)
1366 return r;
1367 r = tgsi_split_literal_constant(ctx, r600_src);
1368 if (r)
1369 return r;
1370
1371 /* dst.x, <- 1.0 */
1372 memset(&alu, 0, sizeof(struct r600_bc_alu));
1373 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1374 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1375 alu.src[0].chan = 0;
1376 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1377 if (r)
1378 return r;
1379 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1380 r = r600_bc_add_alu(ctx->bc, &alu);
1381 if (r)
1382 return r;
1383
1384 /* dst.y = max(src.x, 0.0) */
1385 memset(&alu, 0, sizeof(struct r600_bc_alu));
1386 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1387 alu.src[0] = r600_src[0];
1388 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1389 alu.src[1].chan = 0;
1390 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1391 if (r)
1392 return r;
1393 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1394 r = r600_bc_add_alu(ctx->bc, &alu);
1395 if (r)
1396 return r;
1397
1398 /* dst.w, <- 1.0 */
1399 memset(&alu, 0, sizeof(struct r600_bc_alu));
1400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1401 alu.src[0].sel = V_SQ_ALU_SRC_1;
1402 alu.src[0].chan = 0;
1403 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1404 if (r)
1405 return r;
1406 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1407 alu.last = 1;
1408 r = r600_bc_add_alu(ctx->bc, &alu);
1409 if (r)
1410 return r;
1411
1412 r = r600_bc_add_literal(ctx->bc, ctx->value);
1413 if (r)
1414 return r;
1415
1416 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1417 {
1418 int chan;
1419 int sel;
1420
1421 /* dst.z = log(src.y) */
1422 memset(&alu, 0, sizeof(struct r600_bc_alu));
1423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1424 alu.src[0] = r600_src[0];
1425 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1426 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1427 if (r)
1428 return r;
1429 alu.last = 1;
1430 r = r600_bc_add_alu(ctx->bc, &alu);
1431 if (r)
1432 return r;
1433
1434 r = r600_bc_add_literal(ctx->bc, ctx->value);
1435 if (r)
1436 return r;
1437
1438 chan = alu.dst.chan;
1439 sel = alu.dst.sel;
1440
1441 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1442 memset(&alu, 0, sizeof(struct r600_bc_alu));
1443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1444 alu.src[0] = r600_src[0];
1445 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1446 alu.src[1].sel = sel;
1447 alu.src[1].chan = chan;
1448
1449 alu.src[2] = r600_src[0];
1450 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1451 alu.dst.sel = ctx->temp_reg;
1452 alu.dst.chan = 0;
1453 alu.dst.write = 1;
1454 alu.is_op3 = 1;
1455 alu.last = 1;
1456 r = r600_bc_add_alu(ctx->bc, &alu);
1457 if (r)
1458 return r;
1459
1460 r = r600_bc_add_literal(ctx->bc, ctx->value);
1461 if (r)
1462 return r;
1463 /* dst.z = exp(tmp.x) */
1464 memset(&alu, 0, sizeof(struct r600_bc_alu));
1465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1466 alu.src[0].sel = ctx->temp_reg;
1467 alu.src[0].chan = 0;
1468 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1469 if (r)
1470 return r;
1471 alu.last = 1;
1472 r = r600_bc_add_alu(ctx->bc, &alu);
1473 if (r)
1474 return r;
1475 }
1476 return 0;
1477 }
1478
1479 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1480 {
1481 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1482 struct r600_bc_alu alu;
1483 int i, r;
1484
1485 memset(&alu, 0, sizeof(struct r600_bc_alu));
1486
1487 /* FIXME:
1488 * For state trackers other than OpenGL, we'll want to use
1489 * _RECIPSQRT_IEEE instead.
1490 */
1491 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1492
1493 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1494 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1495 if (r)
1496 return r;
1497 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1498 alu.src[i].abs = 1;
1499 }
1500 alu.dst.sel = ctx->temp_reg;
1501 alu.dst.write = 1;
1502 alu.last = 1;
1503 r = r600_bc_add_alu(ctx->bc, &alu);
1504 if (r)
1505 return r;
1506 r = r600_bc_add_literal(ctx->bc, ctx->value);
1507 if (r)
1508 return r;
1509 /* replicate result */
1510 return tgsi_helper_tempx_replicate(ctx);
1511 }
1512
1513 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1514 {
1515 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1516 struct r600_bc_alu alu;
1517 int i, r;
1518
1519 for (i = 0; i < 4; i++) {
1520 memset(&alu, 0, sizeof(struct r600_bc_alu));
1521 alu.src[0].sel = ctx->temp_reg;
1522 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1523 alu.dst.chan = i;
1524 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1525 if (r)
1526 return r;
1527 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1528 if (i == 3)
1529 alu.last = 1;
1530 r = r600_bc_add_alu(ctx->bc, &alu);
1531 if (r)
1532 return r;
1533 }
1534 return 0;
1535 }
1536
1537 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1538 {
1539 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1540 struct r600_bc_alu alu;
1541 int i, r;
1542
1543 memset(&alu, 0, sizeof(struct r600_bc_alu));
1544 alu.inst = ctx->inst_info->r600_opcode;
1545 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1546 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1547 if (r)
1548 return r;
1549 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1550 }
1551 alu.dst.sel = ctx->temp_reg;
1552 alu.dst.write = 1;
1553 alu.last = 1;
1554 r = r600_bc_add_alu(ctx->bc, &alu);
1555 if (r)
1556 return r;
1557 r = r600_bc_add_literal(ctx->bc, ctx->value);
1558 if (r)
1559 return r;
1560 /* replicate result */
1561 return tgsi_helper_tempx_replicate(ctx);
1562 }
1563
1564 static int tgsi_pow(struct r600_shader_ctx *ctx)
1565 {
1566 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1567 struct r600_bc_alu alu;
1568 int r;
1569
1570 /* LOG2(a) */
1571 memset(&alu, 0, sizeof(struct r600_bc_alu));
1572 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1573 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1574 if (r)
1575 return r;
1576 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1577 alu.dst.sel = ctx->temp_reg;
1578 alu.dst.write = 1;
1579 alu.last = 1;
1580 r = r600_bc_add_alu(ctx->bc, &alu);
1581 if (r)
1582 return r;
1583 r = r600_bc_add_literal(ctx->bc,ctx->value);
1584 if (r)
1585 return r;
1586 /* b * LOG2(a) */
1587 memset(&alu, 0, sizeof(struct r600_bc_alu));
1588 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1589 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1590 if (r)
1591 return r;
1592 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1593 alu.src[1].sel = ctx->temp_reg;
1594 alu.dst.sel = ctx->temp_reg;
1595 alu.dst.write = 1;
1596 alu.last = 1;
1597 r = r600_bc_add_alu(ctx->bc, &alu);
1598 if (r)
1599 return r;
1600 r = r600_bc_add_literal(ctx->bc,ctx->value);
1601 if (r)
1602 return r;
1603 /* POW(a,b) = EXP2(b * LOG2(a))*/
1604 memset(&alu, 0, sizeof(struct r600_bc_alu));
1605 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1606 alu.src[0].sel = ctx->temp_reg;
1607 alu.dst.sel = ctx->temp_reg;
1608 alu.dst.write = 1;
1609 alu.last = 1;
1610 r = r600_bc_add_alu(ctx->bc, &alu);
1611 if (r)
1612 return r;
1613 r = r600_bc_add_literal(ctx->bc,ctx->value);
1614 if (r)
1615 return r;
1616 return tgsi_helper_tempx_replicate(ctx);
1617 }
1618
1619 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1620 {
1621 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1622 struct r600_bc_alu alu;
1623 struct r600_bc_alu_src r600_src[3];
1624 int i, r;
1625
1626 r = tgsi_split_constant(ctx, r600_src);
1627 if (r)
1628 return r;
1629 r = tgsi_split_literal_constant(ctx, r600_src);
1630 if (r)
1631 return r;
1632
1633 /* tmp = (src > 0 ? 1 : src) */
1634 for (i = 0; i < 4; i++) {
1635 memset(&alu, 0, sizeof(struct r600_bc_alu));
1636 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1637 alu.is_op3 = 1;
1638
1639 alu.dst.sel = ctx->temp_reg;
1640 alu.dst.chan = i;
1641
1642 alu.src[0] = r600_src[0];
1643 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1644
1645 alu.src[1].sel = V_SQ_ALU_SRC_1;
1646
1647 alu.src[2] = r600_src[0];
1648 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1649 if (i == 3)
1650 alu.last = 1;
1651 r = r600_bc_add_alu(ctx->bc, &alu);
1652 if (r)
1653 return r;
1654 }
1655 r = r600_bc_add_literal(ctx->bc, ctx->value);
1656 if (r)
1657 return r;
1658
1659 /* dst = (-tmp > 0 ? -1 : tmp) */
1660 for (i = 0; i < 4; i++) {
1661 memset(&alu, 0, sizeof(struct r600_bc_alu));
1662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1663 alu.is_op3 = 1;
1664 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1665 if (r)
1666 return r;
1667
1668 alu.src[0].sel = ctx->temp_reg;
1669 alu.src[0].chan = i;
1670 alu.src[0].neg = 1;
1671
1672 alu.src[1].sel = V_SQ_ALU_SRC_1;
1673 alu.src[1].neg = 1;
1674
1675 alu.src[2].sel = ctx->temp_reg;
1676 alu.src[2].chan = i;
1677
1678 if (i == 3)
1679 alu.last = 1;
1680 r = r600_bc_add_alu(ctx->bc, &alu);
1681 if (r)
1682 return r;
1683 }
1684 return 0;
1685 }
1686
1687 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1688 {
1689 struct r600_bc_alu alu;
1690 int i, r;
1691
1692 r = r600_bc_add_literal(ctx->bc, ctx->value);
1693 if (r)
1694 return r;
1695 for (i = 0; i < 4; i++) {
1696 memset(&alu, 0, sizeof(struct r600_bc_alu));
1697 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1698 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1699 alu.dst.chan = i;
1700 } else {
1701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1702 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1703 if (r)
1704 return r;
1705 alu.src[0].sel = ctx->temp_reg;
1706 alu.src[0].chan = i;
1707 }
1708 if (i == 3) {
1709 alu.last = 1;
1710 }
1711 r = r600_bc_add_alu(ctx->bc, &alu);
1712 if (r)
1713 return r;
1714 }
1715 return 0;
1716 }
1717
1718 static int tgsi_op3(struct r600_shader_ctx *ctx)
1719 {
1720 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1721 struct r600_bc_alu_src r600_src[3];
1722 struct r600_bc_alu alu;
1723 int i, j, r;
1724
1725 r = tgsi_split_constant(ctx, r600_src);
1726 if (r)
1727 return r;
1728 r = tgsi_split_literal_constant(ctx, r600_src);
1729 if (r)
1730 return r;
1731 /* do it in 2 step as op3 doesn't support writemask */
1732 for (i = 0; i < 4; i++) {
1733 memset(&alu, 0, sizeof(struct r600_bc_alu));
1734 alu.inst = ctx->inst_info->r600_opcode;
1735 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1736 alu.src[j] = r600_src[j];
1737 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1738 }
1739 alu.dst.sel = ctx->temp_reg;
1740 alu.dst.chan = i;
1741 alu.dst.write = 1;
1742 alu.is_op3 = 1;
1743 if (i == 3) {
1744 alu.last = 1;
1745 }
1746 r = r600_bc_add_alu(ctx->bc, &alu);
1747 if (r)
1748 return r;
1749 }
1750 return tgsi_helper_copy(ctx, inst);
1751 }
1752
1753 static int tgsi_dp(struct r600_shader_ctx *ctx)
1754 {
1755 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1756 struct r600_bc_alu_src r600_src[3];
1757 struct r600_bc_alu alu;
1758 int i, j, r;
1759
1760 r = tgsi_split_constant(ctx, r600_src);
1761 if (r)
1762 return r;
1763 r = tgsi_split_literal_constant(ctx, r600_src);
1764 if (r)
1765 return r;
1766 for (i = 0; i < 4; i++) {
1767 memset(&alu, 0, sizeof(struct r600_bc_alu));
1768 alu.inst = ctx->inst_info->r600_opcode;
1769 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1770 alu.src[j] = r600_src[j];
1771 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1772 }
1773 alu.dst.sel = ctx->temp_reg;
1774 alu.dst.chan = i;
1775 alu.dst.write = 1;
1776 /* handle some special cases */
1777 switch (ctx->inst_info->tgsi_opcode) {
1778 case TGSI_OPCODE_DP2:
1779 if (i > 1) {
1780 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1781 alu.src[0].chan = alu.src[1].chan = 0;
1782 }
1783 break;
1784 case TGSI_OPCODE_DP3:
1785 if (i > 2) {
1786 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1787 alu.src[0].chan = alu.src[1].chan = 0;
1788 }
1789 break;
1790 case TGSI_OPCODE_DPH:
1791 if (i == 3) {
1792 alu.src[0].sel = V_SQ_ALU_SRC_1;
1793 alu.src[0].chan = 0;
1794 alu.src[0].neg = 0;
1795 }
1796 break;
1797 default:
1798 break;
1799 }
1800 if (i == 3) {
1801 alu.last = 1;
1802 }
1803 r = r600_bc_add_alu(ctx->bc, &alu);
1804 if (r)
1805 return r;
1806 }
1807 return tgsi_helper_copy(ctx, inst);
1808 }
1809
1810 static int tgsi_tex(struct r600_shader_ctx *ctx)
1811 {
1812 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1813 struct r600_bc_tex tex;
1814 struct r600_bc_alu alu;
1815 unsigned src_gpr;
1816 int r, i;
1817 int opcode;
1818 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1819 uint32_t lit_vals[4];
1820
1821 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1822
1823 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1824 /* Add perspective divide */
1825 memset(&alu, 0, sizeof(struct r600_bc_alu));
1826 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1827 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1828 if (r)
1829 return r;
1830
1831 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1832 alu.dst.sel = ctx->temp_reg;
1833 alu.dst.chan = 3;
1834 alu.last = 1;
1835 alu.dst.write = 1;
1836 r = r600_bc_add_alu(ctx->bc, &alu);
1837 if (r)
1838 return r;
1839
1840 for (i = 0; i < 3; i++) {
1841 memset(&alu, 0, sizeof(struct r600_bc_alu));
1842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1843 alu.src[0].sel = ctx->temp_reg;
1844 alu.src[0].chan = 3;
1845 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1846 if (r)
1847 return r;
1848 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1849 alu.dst.sel = ctx->temp_reg;
1850 alu.dst.chan = i;
1851 alu.dst.write = 1;
1852 r = r600_bc_add_alu(ctx->bc, &alu);
1853 if (r)
1854 return r;
1855 }
1856 memset(&alu, 0, sizeof(struct r600_bc_alu));
1857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1858 alu.src[0].sel = V_SQ_ALU_SRC_1;
1859 alu.src[0].chan = 0;
1860 alu.dst.sel = ctx->temp_reg;
1861 alu.dst.chan = 3;
1862 alu.last = 1;
1863 alu.dst.write = 1;
1864 r = r600_bc_add_alu(ctx->bc, &alu);
1865 if (r)
1866 return r;
1867 src_not_temp = FALSE;
1868 src_gpr = ctx->temp_reg;
1869 }
1870
1871 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1872 int src_chan, src2_chan;
1873
1874 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1875 for (i = 0; i < 4; i++) {
1876 memset(&alu, 0, sizeof(struct r600_bc_alu));
1877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1878 switch (i) {
1879 case 0:
1880 src_chan = 2;
1881 src2_chan = 1;
1882 break;
1883 case 1:
1884 src_chan = 2;
1885 src2_chan = 0;
1886 break;
1887 case 2:
1888 src_chan = 0;
1889 src2_chan = 2;
1890 break;
1891 case 3:
1892 src_chan = 1;
1893 src2_chan = 2;
1894 break;
1895 default:
1896 assert(0);
1897 src_chan = 0;
1898 src2_chan = 0;
1899 break;
1900 }
1901 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1902 if (r)
1903 return r;
1904 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1905 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1906 if (r)
1907 return r;
1908 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1909 alu.dst.sel = ctx->temp_reg;
1910 alu.dst.chan = i;
1911 if (i == 3)
1912 alu.last = 1;
1913 alu.dst.write = 1;
1914 r = r600_bc_add_alu(ctx->bc, &alu);
1915 if (r)
1916 return r;
1917 }
1918
1919 /* tmp1.z = RCP_e(|tmp1.z|) */
1920 memset(&alu, 0, sizeof(struct r600_bc_alu));
1921 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1922 alu.src[0].sel = ctx->temp_reg;
1923 alu.src[0].chan = 2;
1924 alu.src[0].abs = 1;
1925 alu.dst.sel = ctx->temp_reg;
1926 alu.dst.chan = 2;
1927 alu.dst.write = 1;
1928 alu.last = 1;
1929 r = r600_bc_add_alu(ctx->bc, &alu);
1930 if (r)
1931 return r;
1932
1933 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1934 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1935 * muladd has no writemask, have to use another temp
1936 */
1937 memset(&alu, 0, sizeof(struct r600_bc_alu));
1938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1939 alu.is_op3 = 1;
1940
1941 alu.src[0].sel = ctx->temp_reg;
1942 alu.src[0].chan = 0;
1943 alu.src[1].sel = ctx->temp_reg;
1944 alu.src[1].chan = 2;
1945
1946 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1947 alu.src[2].chan = 0;
1948
1949 alu.dst.sel = ctx->temp_reg;
1950 alu.dst.chan = 0;
1951 alu.dst.write = 1;
1952
1953 r = r600_bc_add_alu(ctx->bc, &alu);
1954 if (r)
1955 return r;
1956
1957 memset(&alu, 0, sizeof(struct r600_bc_alu));
1958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1959 alu.is_op3 = 1;
1960
1961 alu.src[0].sel = ctx->temp_reg;
1962 alu.src[0].chan = 1;
1963 alu.src[1].sel = ctx->temp_reg;
1964 alu.src[1].chan = 2;
1965
1966 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1967 alu.src[2].chan = 0;
1968
1969 alu.dst.sel = ctx->temp_reg;
1970 alu.dst.chan = 1;
1971 alu.dst.write = 1;
1972
1973 alu.last = 1;
1974 r = r600_bc_add_alu(ctx->bc, &alu);
1975 if (r)
1976 return r;
1977
1978 lit_vals[0] = fui(1.5f);
1979
1980 r = r600_bc_add_literal(ctx->bc, lit_vals);
1981 if (r)
1982 return r;
1983 src_not_temp = FALSE;
1984 src_gpr = ctx->temp_reg;
1985 }
1986
1987 if (src_not_temp) {
1988 for (i = 0; i < 4; i++) {
1989 memset(&alu, 0, sizeof(struct r600_bc_alu));
1990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1991 alu.src[0].sel = src_gpr;
1992 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1993 alu.dst.sel = ctx->temp_reg;
1994 alu.dst.chan = i;
1995 if (i == 3)
1996 alu.last = 1;
1997 alu.dst.write = 1;
1998 r = r600_bc_add_alu(ctx->bc, &alu);
1999 if (r)
2000 return r;
2001 }
2002 src_gpr = ctx->temp_reg;
2003 }
2004
2005 opcode = ctx->inst_info->r600_opcode;
2006 if (opcode == SQ_TEX_INST_SAMPLE &&
2007 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
2008 opcode = SQ_TEX_INST_SAMPLE_C;
2009
2010 memset(&tex, 0, sizeof(struct r600_bc_tex));
2011 tex.inst = opcode;
2012 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
2013 tex.resource_id = tex.sampler_id;
2014 tex.src_gpr = src_gpr;
2015 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2016 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2017 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2018 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2019 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2020 tex.src_sel_x = 0;
2021 tex.src_sel_y = 1;
2022 tex.src_sel_z = 2;
2023 tex.src_sel_w = 3;
2024
2025 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2026 tex.src_sel_x = 1;
2027 tex.src_sel_y = 0;
2028 tex.src_sel_z = 3;
2029 tex.src_sel_w = 1;
2030 }
2031
2032 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2033 tex.coord_type_x = 1;
2034 tex.coord_type_y = 1;
2035 tex.coord_type_z = 1;
2036 tex.coord_type_w = 1;
2037 }
2038
2039 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2040 tex.src_sel_w = 2;
2041
2042 r = r600_bc_add_tex(ctx->bc, &tex);
2043 if (r)
2044 return r;
2045
2046 /* add shadow ambient support - gallium doesn't do it yet */
2047 return 0;
2048 }
2049
2050 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2051 {
2052 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2053 struct r600_bc_alu_src r600_src[3];
2054 struct r600_bc_alu alu;
2055 unsigned i;
2056 int r;
2057
2058 r = tgsi_split_constant(ctx, r600_src);
2059 if (r)
2060 return r;
2061 r = tgsi_split_literal_constant(ctx, r600_src);
2062 if (r)
2063 return r;
2064 /* 1 - src0 */
2065 for (i = 0; i < 4; i++) {
2066 memset(&alu, 0, sizeof(struct r600_bc_alu));
2067 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2068 alu.src[0].sel = V_SQ_ALU_SRC_1;
2069 alu.src[0].chan = 0;
2070 alu.src[1] = r600_src[0];
2071 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2072 alu.src[1].neg = 1;
2073 alu.dst.sel = ctx->temp_reg;
2074 alu.dst.chan = i;
2075 if (i == 3) {
2076 alu.last = 1;
2077 }
2078 alu.dst.write = 1;
2079 r = r600_bc_add_alu(ctx->bc, &alu);
2080 if (r)
2081 return r;
2082 }
2083 r = r600_bc_add_literal(ctx->bc, ctx->value);
2084 if (r)
2085 return r;
2086
2087 /* (1 - src0) * src2 */
2088 for (i = 0; i < 4; i++) {
2089 memset(&alu, 0, sizeof(struct r600_bc_alu));
2090 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2091 alu.src[0].sel = ctx->temp_reg;
2092 alu.src[0].chan = i;
2093 alu.src[1] = r600_src[2];
2094 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2095 alu.dst.sel = ctx->temp_reg;
2096 alu.dst.chan = i;
2097 if (i == 3) {
2098 alu.last = 1;
2099 }
2100 alu.dst.write = 1;
2101 r = r600_bc_add_alu(ctx->bc, &alu);
2102 if (r)
2103 return r;
2104 }
2105 r = r600_bc_add_literal(ctx->bc, ctx->value);
2106 if (r)
2107 return r;
2108
2109 /* src0 * src1 + (1 - src0) * src2 */
2110 for (i = 0; i < 4; i++) {
2111 memset(&alu, 0, sizeof(struct r600_bc_alu));
2112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2113 alu.is_op3 = 1;
2114 alu.src[0] = r600_src[0];
2115 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2116 alu.src[1] = r600_src[1];
2117 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2118 alu.src[2].sel = ctx->temp_reg;
2119 alu.src[2].chan = i;
2120 alu.dst.sel = ctx->temp_reg;
2121 alu.dst.chan = i;
2122 if (i == 3) {
2123 alu.last = 1;
2124 }
2125 r = r600_bc_add_alu(ctx->bc, &alu);
2126 if (r)
2127 return r;
2128 }
2129 return tgsi_helper_copy(ctx, inst);
2130 }
2131
2132 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2133 {
2134 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2135 struct r600_bc_alu_src r600_src[3];
2136 struct r600_bc_alu alu;
2137 int use_temp = 0;
2138 int i, r;
2139
2140 r = tgsi_split_constant(ctx, r600_src);
2141 if (r)
2142 return r;
2143 r = tgsi_split_literal_constant(ctx, r600_src);
2144 if (r)
2145 return r;
2146
2147 if (inst->Dst[0].Register.WriteMask != 0xf)
2148 use_temp = 1;
2149
2150 for (i = 0; i < 4; i++) {
2151 memset(&alu, 0, sizeof(struct r600_bc_alu));
2152 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2153 alu.src[0] = r600_src[0];
2154 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2155
2156 alu.src[1] = r600_src[2];
2157 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2158
2159 alu.src[2] = r600_src[1];
2160 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2161
2162 if (use_temp)
2163 alu.dst.sel = ctx->temp_reg;
2164 else {
2165 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2166 if (r)
2167 return r;
2168 }
2169 alu.dst.chan = i;
2170 alu.dst.write = 1;
2171 alu.is_op3 = 1;
2172 if (i == 3)
2173 alu.last = 1;
2174 r = r600_bc_add_alu(ctx->bc, &alu);
2175 if (r)
2176 return r;
2177 }
2178 if (use_temp)
2179 return tgsi_helper_copy(ctx, inst);
2180 return 0;
2181 }
2182
2183 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2184 {
2185 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2186 struct r600_bc_alu_src r600_src[3];
2187 struct r600_bc_alu alu;
2188 uint32_t use_temp = 0;
2189 int i, r;
2190
2191 if (inst->Dst[0].Register.WriteMask != 0xf)
2192 use_temp = 1;
2193
2194 r = tgsi_split_constant(ctx, r600_src);
2195 if (r)
2196 return r;
2197 r = tgsi_split_literal_constant(ctx, r600_src);
2198 if (r)
2199 return r;
2200
2201 for (i = 0; i < 4; i++) {
2202 memset(&alu, 0, sizeof(struct r600_bc_alu));
2203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2204
2205 alu.src[0] = r600_src[0];
2206 switch (i) {
2207 case 0:
2208 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2209 break;
2210 case 1:
2211 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2212 break;
2213 case 2:
2214 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2215 break;
2216 case 3:
2217 alu.src[0].sel = V_SQ_ALU_SRC_0;
2218 alu.src[0].chan = i;
2219 }
2220
2221 alu.src[1] = r600_src[1];
2222 switch (i) {
2223 case 0:
2224 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2225 break;
2226 case 1:
2227 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2228 break;
2229 case 2:
2230 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2231 break;
2232 case 3:
2233 alu.src[1].sel = V_SQ_ALU_SRC_0;
2234 alu.src[1].chan = i;
2235 }
2236
2237 alu.dst.sel = ctx->temp_reg;
2238 alu.dst.chan = i;
2239 alu.dst.write = 1;
2240
2241 if (i == 3)
2242 alu.last = 1;
2243 r = r600_bc_add_alu(ctx->bc, &alu);
2244 if (r)
2245 return r;
2246
2247 r = r600_bc_add_literal(ctx->bc, ctx->value);
2248 if (r)
2249 return r;
2250 }
2251
2252 for (i = 0; i < 4; i++) {
2253 memset(&alu, 0, sizeof(struct r600_bc_alu));
2254 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2255
2256 alu.src[0] = r600_src[0];
2257 switch (i) {
2258 case 0:
2259 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2260 break;
2261 case 1:
2262 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2263 break;
2264 case 2:
2265 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2266 break;
2267 case 3:
2268 alu.src[0].sel = V_SQ_ALU_SRC_0;
2269 alu.src[0].chan = i;
2270 }
2271
2272 alu.src[1] = r600_src[1];
2273 switch (i) {
2274 case 0:
2275 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2276 break;
2277 case 1:
2278 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2279 break;
2280 case 2:
2281 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2282 break;
2283 case 3:
2284 alu.src[1].sel = V_SQ_ALU_SRC_0;
2285 alu.src[1].chan = i;
2286 }
2287
2288 alu.src[2].sel = ctx->temp_reg;
2289 alu.src[2].neg = 1;
2290 alu.src[2].chan = i;
2291
2292 if (use_temp)
2293 alu.dst.sel = ctx->temp_reg;
2294 else {
2295 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2296 if (r)
2297 return r;
2298 }
2299 alu.dst.chan = i;
2300 alu.dst.write = 1;
2301 alu.is_op3 = 1;
2302 if (i == 3)
2303 alu.last = 1;
2304 r = r600_bc_add_alu(ctx->bc, &alu);
2305 if (r)
2306 return r;
2307
2308 r = r600_bc_add_literal(ctx->bc, ctx->value);
2309 if (r)
2310 return r;
2311 }
2312 if (use_temp)
2313 return tgsi_helper_copy(ctx, inst);
2314 return 0;
2315 }
2316
2317 static int tgsi_exp(struct r600_shader_ctx *ctx)
2318 {
2319 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2320 struct r600_bc_alu_src r600_src[3] = { { 0 } };
2321 struct r600_bc_alu alu;
2322 int r;
2323
2324 /* result.x = 2^floor(src); */
2325 if (inst->Dst[0].Register.WriteMask & 1) {
2326 memset(&alu, 0, sizeof(struct r600_bc_alu));
2327
2328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2329 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2330 if (r)
2331 return r;
2332
2333 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2334
2335 alu.dst.sel = ctx->temp_reg;
2336 alu.dst.chan = 0;
2337 alu.dst.write = 1;
2338 alu.last = 1;
2339 r = r600_bc_add_alu(ctx->bc, &alu);
2340 if (r)
2341 return r;
2342
2343 r = r600_bc_add_literal(ctx->bc, ctx->value);
2344 if (r)
2345 return r;
2346
2347 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2348 alu.src[0].sel = ctx->temp_reg;
2349 alu.src[0].chan = 0;
2350
2351 alu.dst.sel = ctx->temp_reg;
2352 alu.dst.chan = 0;
2353 alu.dst.write = 1;
2354 alu.last = 1;
2355 r = r600_bc_add_alu(ctx->bc, &alu);
2356 if (r)
2357 return r;
2358
2359 r = r600_bc_add_literal(ctx->bc, ctx->value);
2360 if (r)
2361 return r;
2362 }
2363
2364 /* result.y = tmp - floor(tmp); */
2365 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2366 memset(&alu, 0, sizeof(struct r600_bc_alu));
2367
2368 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2369 alu.src[0] = r600_src[0];
2370 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2371 if (r)
2372 return r;
2373 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2374
2375 alu.dst.sel = ctx->temp_reg;
2376 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2377 // if (r)
2378 // return r;
2379 alu.dst.write = 1;
2380 alu.dst.chan = 1;
2381
2382 alu.last = 1;
2383
2384 r = r600_bc_add_alu(ctx->bc, &alu);
2385 if (r)
2386 return r;
2387 r = r600_bc_add_literal(ctx->bc, ctx->value);
2388 if (r)
2389 return r;
2390 }
2391
2392 /* result.z = RoughApprox2ToX(tmp);*/
2393 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2394 memset(&alu, 0, sizeof(struct r600_bc_alu));
2395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2396 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2397 if (r)
2398 return r;
2399 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2400
2401 alu.dst.sel = ctx->temp_reg;
2402 alu.dst.write = 1;
2403 alu.dst.chan = 2;
2404
2405 alu.last = 1;
2406
2407 r = r600_bc_add_alu(ctx->bc, &alu);
2408 if (r)
2409 return r;
2410 r = r600_bc_add_literal(ctx->bc, ctx->value);
2411 if (r)
2412 return r;
2413 }
2414
2415 /* result.w = 1.0;*/
2416 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2417 memset(&alu, 0, sizeof(struct r600_bc_alu));
2418
2419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2420 alu.src[0].sel = V_SQ_ALU_SRC_1;
2421 alu.src[0].chan = 0;
2422
2423 alu.dst.sel = ctx->temp_reg;
2424 alu.dst.chan = 3;
2425 alu.dst.write = 1;
2426 alu.last = 1;
2427 r = r600_bc_add_alu(ctx->bc, &alu);
2428 if (r)
2429 return r;
2430 r = r600_bc_add_literal(ctx->bc, ctx->value);
2431 if (r)
2432 return r;
2433 }
2434 return tgsi_helper_copy(ctx, inst);
2435 }
2436
2437 static int tgsi_log(struct r600_shader_ctx *ctx)
2438 {
2439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2440 struct r600_bc_alu alu;
2441 int r;
2442
2443 /* result.x = floor(log2(src)); */
2444 if (inst->Dst[0].Register.WriteMask & 1) {
2445 memset(&alu, 0, sizeof(struct r600_bc_alu));
2446
2447 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2448 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2449 if (r)
2450 return r;
2451
2452 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2453
2454 alu.dst.sel = ctx->temp_reg;
2455 alu.dst.chan = 0;
2456 alu.dst.write = 1;
2457 alu.last = 1;
2458 r = r600_bc_add_alu(ctx->bc, &alu);
2459 if (r)
2460 return r;
2461
2462 r = r600_bc_add_literal(ctx->bc, ctx->value);
2463 if (r)
2464 return r;
2465
2466 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2467 alu.src[0].sel = ctx->temp_reg;
2468 alu.src[0].chan = 0;
2469
2470 alu.dst.sel = ctx->temp_reg;
2471 alu.dst.chan = 0;
2472 alu.dst.write = 1;
2473 alu.last = 1;
2474
2475 r = r600_bc_add_alu(ctx->bc, &alu);
2476 if (r)
2477 return r;
2478
2479 r = r600_bc_add_literal(ctx->bc, ctx->value);
2480 if (r)
2481 return r;
2482 }
2483
2484 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2485 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2486 memset(&alu, 0, sizeof(struct r600_bc_alu));
2487
2488 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2489 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2490 if (r)
2491 return r;
2492
2493 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2494
2495 alu.dst.sel = ctx->temp_reg;
2496 alu.dst.chan = 1;
2497 alu.dst.write = 1;
2498 alu.last = 1;
2499
2500 r = r600_bc_add_alu(ctx->bc, &alu);
2501 if (r)
2502 return r;
2503
2504 r = r600_bc_add_literal(ctx->bc, ctx->value);
2505 if (r)
2506 return r;
2507
2508 memset(&alu, 0, sizeof(struct r600_bc_alu));
2509
2510 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2511 alu.src[0].sel = ctx->temp_reg;
2512 alu.src[0].chan = 1;
2513
2514 alu.dst.sel = ctx->temp_reg;
2515 alu.dst.chan = 1;
2516 alu.dst.write = 1;
2517 alu.last = 1;
2518
2519 r = r600_bc_add_alu(ctx->bc, &alu);
2520 if (r)
2521 return r;
2522
2523 r = r600_bc_add_literal(ctx->bc, ctx->value);
2524 if (r)
2525 return r;
2526
2527 memset(&alu, 0, sizeof(struct r600_bc_alu));
2528
2529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2530 alu.src[0].sel = ctx->temp_reg;
2531 alu.src[0].chan = 1;
2532
2533 alu.dst.sel = ctx->temp_reg;
2534 alu.dst.chan = 1;
2535 alu.dst.write = 1;
2536 alu.last = 1;
2537
2538 r = r600_bc_add_alu(ctx->bc, &alu);
2539 if (r)
2540 return r;
2541
2542 r = r600_bc_add_literal(ctx->bc, ctx->value);
2543 if (r)
2544 return r;
2545
2546 memset(&alu, 0, sizeof(struct r600_bc_alu));
2547
2548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2549 alu.src[0].sel = ctx->temp_reg;
2550 alu.src[0].chan = 1;
2551
2552 alu.dst.sel = ctx->temp_reg;
2553 alu.dst.chan = 1;
2554 alu.dst.write = 1;
2555 alu.last = 1;
2556
2557 r = r600_bc_add_alu(ctx->bc, &alu);
2558 if (r)
2559 return r;
2560
2561 r = r600_bc_add_literal(ctx->bc, ctx->value);
2562 if (r)
2563 return r;
2564
2565 memset(&alu, 0, sizeof(struct r600_bc_alu));
2566
2567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2568
2569 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2570 if (r)
2571 return r;
2572
2573 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2574
2575 alu.src[1].sel = ctx->temp_reg;
2576 alu.src[1].chan = 1;
2577
2578 alu.dst.sel = ctx->temp_reg;
2579 alu.dst.chan = 1;
2580 alu.dst.write = 1;
2581 alu.last = 1;
2582
2583 r = r600_bc_add_alu(ctx->bc, &alu);
2584 if (r)
2585 return r;
2586
2587 r = r600_bc_add_literal(ctx->bc, ctx->value);
2588 if (r)
2589 return r;
2590 }
2591
2592 /* result.z = log2(src);*/
2593 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2594 memset(&alu, 0, sizeof(struct r600_bc_alu));
2595
2596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2597 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2598 if (r)
2599 return r;
2600
2601 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2602
2603 alu.dst.sel = ctx->temp_reg;
2604 alu.dst.write = 1;
2605 alu.dst.chan = 2;
2606 alu.last = 1;
2607
2608 r = r600_bc_add_alu(ctx->bc, &alu);
2609 if (r)
2610 return r;
2611
2612 r = r600_bc_add_literal(ctx->bc, ctx->value);
2613 if (r)
2614 return r;
2615 }
2616
2617 /* result.w = 1.0; */
2618 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2619 memset(&alu, 0, sizeof(struct r600_bc_alu));
2620
2621 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2622 alu.src[0].sel = V_SQ_ALU_SRC_1;
2623 alu.src[0].chan = 0;
2624
2625 alu.dst.sel = ctx->temp_reg;
2626 alu.dst.chan = 3;
2627 alu.dst.write = 1;
2628 alu.last = 1;
2629
2630 r = r600_bc_add_alu(ctx->bc, &alu);
2631 if (r)
2632 return r;
2633
2634 r = r600_bc_add_literal(ctx->bc, ctx->value);
2635 if (r)
2636 return r;
2637 }
2638
2639 return tgsi_helper_copy(ctx, inst);
2640 }
2641
2642 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2643 {
2644 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2645 struct r600_bc_alu alu;
2646 int r;
2647 memset(&alu, 0, sizeof(struct r600_bc_alu));
2648
2649 switch (inst->Instruction.Opcode) {
2650 case TGSI_OPCODE_ARL:
2651 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2652 break;
2653 case TGSI_OPCODE_ARR:
2654 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2655 break;
2656 default:
2657 assert(0);
2658 return -1;
2659 }
2660
2661 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2662 if (r)
2663 return r;
2664 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2665 alu.last = 1;
2666 alu.dst.chan = 0;
2667 alu.dst.sel = ctx->temp_reg;
2668 alu.dst.write = 1;
2669 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2670 if (r)
2671 return r;
2672 memset(&alu, 0, sizeof(struct r600_bc_alu));
2673 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2674 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2675 if (r)
2676 return r;
2677 alu.src[0].sel = ctx->temp_reg;
2678 alu.src[0].chan = 0;
2679 alu.last = 1;
2680 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2681 if (r)
2682 return r;
2683 return 0;
2684 }
2685 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2686 {
2687 /* TODO from r600c, ar values don't persist between clauses */
2688 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2689 struct r600_bc_alu alu;
2690 int r;
2691 memset(&alu, 0, sizeof(struct r600_bc_alu));
2692
2693 switch (inst->Instruction.Opcode) {
2694 case TGSI_OPCODE_ARL:
2695 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2696 break;
2697 case TGSI_OPCODE_ARR:
2698 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2699 break;
2700 default:
2701 assert(0);
2702 return -1;
2703 }
2704
2705
2706 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2707 if (r)
2708 return r;
2709 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2710
2711 alu.last = 1;
2712
2713 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2714 if (r)
2715 return r;
2716 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2717 return 0;
2718 }
2719
2720 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2721 {
2722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2723 struct r600_bc_alu alu;
2724 int i, r = 0;
2725
2726 for (i = 0; i < 4; i++) {
2727 memset(&alu, 0, sizeof(struct r600_bc_alu));
2728
2729 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2730 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2731 if (r)
2732 return r;
2733
2734 if (i == 0 || i == 3) {
2735 alu.src[0].sel = V_SQ_ALU_SRC_1;
2736 } else {
2737 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2738 if (r)
2739 return r;
2740 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2741 }
2742
2743 if (i == 0 || i == 2) {
2744 alu.src[1].sel = V_SQ_ALU_SRC_1;
2745 } else {
2746 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2747 if (r)
2748 return r;
2749 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2750 }
2751 if (i == 3)
2752 alu.last = 1;
2753 r = r600_bc_add_alu(ctx->bc, &alu);
2754 if (r)
2755 return r;
2756 }
2757 return 0;
2758 }
2759
2760 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2761 {
2762 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2763 struct r600_bc_alu alu;
2764 int r;
2765
2766 memset(&alu, 0, sizeof(struct r600_bc_alu));
2767 alu.inst = opcode;
2768 alu.predicate = 1;
2769
2770 alu.dst.sel = ctx->temp_reg;
2771 alu.dst.write = 1;
2772 alu.dst.chan = 0;
2773
2774 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2775 if (r)
2776 return r;
2777 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2778 alu.src[1].sel = V_SQ_ALU_SRC_0;
2779 alu.src[1].chan = 0;
2780
2781 alu.last = 1;
2782
2783 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2784 if (r)
2785 return r;
2786 return 0;
2787 }
2788
2789 static int pops(struct r600_shader_ctx *ctx, int pops)
2790 {
2791 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2792 ctx->bc->cf_last->pop_count = pops;
2793 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2794 return 0;
2795 }
2796
2797 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2798 {
2799 switch(reason) {
2800 case FC_PUSH_VPM:
2801 ctx->bc->callstack[ctx->bc->call_sp].current--;
2802 break;
2803 case FC_PUSH_WQM:
2804 case FC_LOOP:
2805 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2806 break;
2807 case FC_REP:
2808 /* TOODO : for 16 vp asic should -= 2; */
2809 ctx->bc->callstack[ctx->bc->call_sp].current --;
2810 break;
2811 }
2812 }
2813
2814 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2815 {
2816 if (check_max_only) {
2817 int diff;
2818 switch (reason) {
2819 case FC_PUSH_VPM:
2820 diff = 1;
2821 break;
2822 case FC_PUSH_WQM:
2823 diff = 4;
2824 break;
2825 default:
2826 assert(0);
2827 diff = 0;
2828 }
2829 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2830 ctx->bc->callstack[ctx->bc->call_sp].max) {
2831 ctx->bc->callstack[ctx->bc->call_sp].max =
2832 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2833 }
2834 return;
2835 }
2836 switch (reason) {
2837 case FC_PUSH_VPM:
2838 ctx->bc->callstack[ctx->bc->call_sp].current++;
2839 break;
2840 case FC_PUSH_WQM:
2841 case FC_LOOP:
2842 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2843 break;
2844 case FC_REP:
2845 ctx->bc->callstack[ctx->bc->call_sp].current++;
2846 break;
2847 }
2848
2849 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2850 ctx->bc->callstack[ctx->bc->call_sp].max) {
2851 ctx->bc->callstack[ctx->bc->call_sp].max =
2852 ctx->bc->callstack[ctx->bc->call_sp].current;
2853 }
2854 }
2855
2856 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2857 {
2858 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2859
2860 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2861 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2862 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2863 sp->num_mid++;
2864 }
2865
2866 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2867 {
2868 ctx->bc->fc_sp++;
2869 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2870 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2871 }
2872
2873 static void fc_poplevel(struct r600_shader_ctx *ctx)
2874 {
2875 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2876 if (sp->mid) {
2877 free(sp->mid);
2878 sp->mid = NULL;
2879 }
2880 sp->num_mid = 0;
2881 sp->start = NULL;
2882 sp->type = 0;
2883 ctx->bc->fc_sp--;
2884 }
2885
2886 #if 0
2887 static int emit_return(struct r600_shader_ctx *ctx)
2888 {
2889 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2890 return 0;
2891 }
2892
2893 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2894 {
2895
2896 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2897 ctx->bc->cf_last->pop_count = pops;
2898 /* TODO work out offset */
2899 return 0;
2900 }
2901
2902 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2903 {
2904 return 0;
2905 }
2906
2907 static void emit_testflag(struct r600_shader_ctx *ctx)
2908 {
2909
2910 }
2911
2912 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2913 {
2914 emit_testflag(ctx);
2915 emit_jump_to_offset(ctx, 1, 4);
2916 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2917 pops(ctx, ifidx + 1);
2918 emit_return(ctx);
2919 }
2920
2921 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2922 {
2923 emit_testflag(ctx);
2924
2925 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2926 ctx->bc->cf_last->pop_count = 1;
2927
2928 fc_set_mid(ctx, fc_sp);
2929
2930 pops(ctx, 1);
2931 }
2932 #endif
2933
2934 static int tgsi_if(struct r600_shader_ctx *ctx)
2935 {
2936 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2937
2938 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2939
2940 fc_pushlevel(ctx, FC_IF);
2941
2942 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2943 return 0;
2944 }
2945
2946 static int tgsi_else(struct r600_shader_ctx *ctx)
2947 {
2948 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2949 ctx->bc->cf_last->pop_count = 1;
2950
2951 fc_set_mid(ctx, ctx->bc->fc_sp);
2952 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2953 return 0;
2954 }
2955
2956 static int tgsi_endif(struct r600_shader_ctx *ctx)
2957 {
2958 pops(ctx, 1);
2959 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2960 R600_ERR("if/endif unbalanced in shader\n");
2961 return -1;
2962 }
2963
2964 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2965 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2966 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2967 } else {
2968 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2969 }
2970 fc_poplevel(ctx);
2971
2972 callstack_decrease_current(ctx, FC_PUSH_VPM);
2973 return 0;
2974 }
2975
2976 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2977 {
2978 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2979
2980 fc_pushlevel(ctx, FC_LOOP);
2981
2982 /* check stack depth */
2983 callstack_check_depth(ctx, FC_LOOP, 0);
2984 return 0;
2985 }
2986
2987 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2988 {
2989 int i;
2990
2991 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2992
2993 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2994 R600_ERR("loop/endloop in shader code are not paired.\n");
2995 return -EINVAL;
2996 }
2997
2998 /* fixup loop pointers - from r600isa
2999 LOOP END points to CF after LOOP START,
3000 LOOP START point to CF after LOOP END
3001 BRK/CONT point to LOOP END CF
3002 */
3003 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3004
3005 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3006
3007 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3008 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3009 }
3010 /* TODO add LOOPRET support */
3011 fc_poplevel(ctx);
3012 callstack_decrease_current(ctx, FC_LOOP);
3013 return 0;
3014 }
3015
3016 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3017 {
3018 unsigned int fscp;
3019
3020 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3021 {
3022 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3023 break;
3024 }
3025
3026 if (fscp == 0) {
3027 R600_ERR("Break not inside loop/endloop pair\n");
3028 return -EINVAL;
3029 }
3030
3031 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3032 ctx->bc->cf_last->pop_count = 1;
3033
3034 fc_set_mid(ctx, fscp);
3035
3036 pops(ctx, 1);
3037 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3038 return 0;
3039 }
3040
3041 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3042 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3043 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3044 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3045
3046 /* FIXME:
3047 * For state trackers other than OpenGL, we'll want to use
3048 * _RECIP_IEEE instead.
3049 */
3050 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3051
3052 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3053 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3054 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3055 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3056 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3057 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3058 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3059 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3060 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3061 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3062 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3063 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3064 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3065 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3066 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3067 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068 /* gap */
3069 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071 /* gap */
3072 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3075 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3077 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3079 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3080 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3081 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3082 /* gap */
3083 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3085 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3087 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3088 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3089 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3090 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3091 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3097 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3099 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3100 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3101 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3102 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3104 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3106 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3113 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3117 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3118 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3119 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3120 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3123 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3124 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3125 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3126 /* gap */
3127 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3130 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3131 /* gap */
3132 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3140 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141 /* gap */
3142 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3151 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3154 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3156 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157 /* gap */
3158 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163 /* gap */
3164 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3173 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3174 /* gap */
3175 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203 };
3204
3205 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3206 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3207 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3208 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3209 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3210 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3211 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3212 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3214 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3215 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3216 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3217 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3218 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3219 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3220 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3221 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3222 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3223 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3224 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3225 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226 /* gap */
3227 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229 /* gap */
3230 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3233 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3234 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3235 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3236 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3237 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3238 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3239 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3240 /* gap */
3241 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3243 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3244 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3245 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3246 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3247 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3248 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3249 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3255 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3257 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3258 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3259 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3260 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3262 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3264 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3271 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3272 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3273 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3275 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3276 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3277 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3278 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3281 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3282 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3283 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3284 /* gap */
3285 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3288 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3289 /* gap */
3290 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3292 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3298 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299 /* gap */
3300 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3309 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3312 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3314 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315 /* gap */
3316 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321 /* gap */
3322 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3323 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3331 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3332 /* gap */
3333 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361 };