r600g: Cleanup fetch shader resources in r600_pipe_shader_destroy().
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_opcodes.h"
32 #include "r600d.h"
33 #include <stdio.h>
34 #include <errno.h>
35
36 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37 {
38 struct r600_pipe_state *rstate = &shader->rstate;
39 struct r600_shader *rshader = &shader->shader;
40 unsigned spi_vs_out_id[10];
41 unsigned i, tmp;
42
43 /* clear previous register */
44 rstate->nregs = 0;
45
46 /* so far never got proper semantic id from tgsi */
47 for (i = 0; i < 10; i++) {
48 spi_vs_out_id[i] = 0;
49 }
50 for (i = 0; i < 32; i++) {
51 tmp = i << ((i & 3) * 8);
52 spi_vs_out_id[i / 4] |= tmp;
53 }
54 for (i = 0; i < 10; i++) {
55 r600_pipe_state_add_reg(rstate,
56 R_028614_SPI_VS_OUT_ID_0 + i * 4,
57 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58 }
59
60 r600_pipe_state_add_reg(rstate,
61 R_0286C4_SPI_VS_OUT_CONFIG,
62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63 0xFFFFFFFF, NULL);
64 r600_pipe_state_add_reg(rstate,
65 R_028868_SQ_PGM_RESOURCES_VS,
66 S_028868_NUM_GPRS(rshader->bc.ngpr) |
67 S_028868_STACK_SIZE(rshader->bc.nstack),
68 0xFFFFFFFF, NULL);
69 r600_pipe_state_add_reg(rstate,
70 R_0288A4_SQ_PGM_RESOURCES_FS,
71 0x00000000, 0xFFFFFFFF, NULL);
72 r600_pipe_state_add_reg(rstate,
73 R_0288D0_SQ_PGM_CF_OFFSET_VS,
74 0x00000000, 0xFFFFFFFF, NULL);
75 r600_pipe_state_add_reg(rstate,
76 R_0288DC_SQ_PGM_CF_OFFSET_FS,
77 0x00000000, 0xFFFFFFFF, NULL);
78 r600_pipe_state_add_reg(rstate,
79 R_028858_SQ_PGM_START_VS,
80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81 r600_pipe_state_add_reg(rstate,
82 R_028894_SQ_PGM_START_FS,
83 r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
84
85 r600_pipe_state_add_reg(rstate,
86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87 0xFFFFFFFF, NULL);
88
89 }
90
91 int r600_find_vs_semantic_index(struct r600_shader *vs,
92 struct r600_shader *ps, int id)
93 {
94 struct r600_shader_io *input = &ps->input[id];
95
96 for (int i = 0; i < vs->noutput; i++) {
97 if (input->name == vs->output[i].name &&
98 input->sid == vs->output[i].sid) {
99 return i - 1;
100 }
101 }
102 return 0;
103 }
104
105 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106 {
107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108 struct r600_pipe_state *rstate = &shader->rstate;
109 struct r600_shader *rshader = &shader->shader;
110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111 int pos_index = -1, face_index = -1;
112
113 /* clear previous register */
114 rstate->nregs = 0;
115
116 for (i = 0; i < rshader->ninput; i++) {
117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118 if (rshader->input[i].centroid)
119 tmp |= S_028644_SEL_CENTROID(1);
120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121 tmp |= S_028644_SEL_LINEAR(1);
122
123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124 pos_index = i;
125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129 }
130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131 face_index = i;
132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134 tmp |= S_028644_PT_SPRITE_TEX(1);
135 }
136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137 }
138 for (i = 0; i < rshader->noutput; i++) {
139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140 r600_pipe_state_add_reg(rstate,
141 R_02880C_DB_SHADER_CONTROL,
142 S_02880C_Z_EXPORT_ENABLE(1),
143 S_02880C_Z_EXPORT_ENABLE(1), NULL);
144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145 r600_pipe_state_add_reg(rstate,
146 R_02880C_DB_SHADER_CONTROL,
147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149 }
150
151 exports_ps = 0;
152 num_cout = 0;
153 for (i = 0; i < rshader->noutput; i++) {
154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155 exports_ps |= 1;
156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157 num_cout++;
158 }
159 }
160 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161 if (!exports_ps) {
162 /* always at least export 1 component per pixel */
163 exports_ps = 2;
164 }
165
166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167 S_0286CC_PERSP_GRADIENT_ENA(1);
168 spi_input_z = 0;
169 if (pos_index != -1) {
170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173 S_0286CC_BARYC_SAMPLE_CNTL(1));
174 spi_input_z |= 1;
175 }
176
177 spi_ps_in_control_1 = 0;
178 if (face_index != -1) {
179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181 }
182
183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186 r600_pipe_state_add_reg(rstate,
187 R_028840_SQ_PGM_START_PS,
188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189 r600_pipe_state_add_reg(rstate,
190 R_028850_SQ_PGM_RESOURCES_PS,
191 S_028868_NUM_GPRS(rshader->bc.ngpr) |
192 S_028868_STACK_SIZE(rshader->bc.nstack),
193 0xFFFFFFFF, NULL);
194 r600_pipe_state_add_reg(rstate,
195 R_028854_SQ_PGM_EXPORTS_PS,
196 exports_ps, 0xFFFFFFFF, NULL);
197 r600_pipe_state_add_reg(rstate,
198 R_0288CC_SQ_PGM_CF_OFFSET_PS,
199 0x00000000, 0xFFFFFFFF, NULL);
200
201 if (rshader->uses_kill) {
202 /* only set some bits here, the other bits are set in the dsa state */
203 r600_pipe_state_add_reg(rstate,
204 R_02880C_DB_SHADER_CONTROL,
205 S_02880C_KILL_ENABLE(1),
206 S_02880C_KILL_ENABLE(1), NULL);
207 }
208 r600_pipe_state_add_reg(rstate,
209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210 0xFFFFFFFF, NULL);
211 }
212
213 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214 {
215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216 struct r600_shader *rshader = &shader->shader;
217 void *ptr;
218
219 /* copy new shader */
220 if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
221 shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
222 if (shader->bo_fetch == NULL) {
223 return -ENOMEM;
224 }
225 ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
226 memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
227 r600_bo_unmap(rctx->radeon, shader->bo_fetch);
228 }
229 if (shader->bo == NULL) {
230 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
231 if (shader->bo == NULL) {
232 return -ENOMEM;
233 }
234 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
235 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
236 r600_bo_unmap(rctx->radeon, shader->bo);
237 }
238 /* build state */
239 rshader->flat_shade = rctx->flatshade;
240 switch (rshader->processor_type) {
241 case TGSI_PROCESSOR_VERTEX:
242 if (rshader->family >= CHIP_CEDAR) {
243 evergreen_pipe_shader_vs(ctx, shader);
244 } else {
245 r600_pipe_shader_vs(ctx, shader);
246 }
247 break;
248 case TGSI_PROCESSOR_FRAGMENT:
249 if (rshader->family >= CHIP_CEDAR) {
250 evergreen_pipe_shader_ps(ctx, shader);
251 } else {
252 r600_pipe_shader_ps(ctx, shader);
253 }
254 break;
255 default:
256 return -EINVAL;
257 }
258 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
259 return 0;
260 }
261
262 static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
263 {
264 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
265 struct r600_shader *shader = &rshader->shader;
266 const struct util_format_description *desc;
267 enum pipe_format resource_format[160];
268 unsigned i, nresources = 0;
269 struct r600_bc *bc = &shader->bc_fetch;
270 struct r600_bc_cf *cf;
271 struct r600_bc_vtx *vtx;
272
273 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
274 return 0;
275 /* doing a full memcmp fell over the refcount */
276 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
277 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
278 rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
279 return 0;
280 }
281 rshader->vertex_elements = *rctx->vertex_elements;
282 for (i = 0; i < rctx->vertex_elements->count; i++) {
283 resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
284 }
285 r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
286 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
287 switch (cf->inst) {
288 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
289 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
290 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
291 desc = util_format_description(resource_format[vtx->buffer_id]);
292 if (desc == NULL) {
293 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
294 return -EINVAL;
295 }
296 vtx->dst_sel_x = desc->swizzle[0];
297 vtx->dst_sel_y = desc->swizzle[1];
298 vtx->dst_sel_z = desc->swizzle[2];
299 vtx->dst_sel_w = desc->swizzle[3];
300 }
301 break;
302 default:
303 break;
304 }
305 }
306 return r600_bc_build(&shader->bc_fetch);
307 }
308
309 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
310 {
311 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
312 int r;
313
314 if (shader == NULL)
315 return -EINVAL;
316 /* there should be enough input */
317 if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
318 R600_ERR("%d resources provided, expecting %d\n",
319 rctx->vertex_elements->count, shader->shader.bc.nresource);
320 return -EINVAL;
321 }
322 r = r600_shader_update(ctx, shader);
323 if (r)
324 return r;
325 return r600_pipe_shader(ctx, shader);
326 }
327
328 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
329 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
330 {
331 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
332 int r;
333
334 //fprintf(stderr, "--------------------------------------------------------------\n");
335 //tgsi_dump(tokens, 0);
336 shader->shader.family = r600_get_family(rctx->radeon);
337 r = r600_shader_from_tgsi(tokens, &shader->shader);
338 if (r) {
339 R600_ERR("translation from TGSI failed !\n");
340 return r;
341 }
342 r = r600_bc_build(&shader->shader.bc);
343 if (r) {
344 R600_ERR("building bytecode failed !\n");
345 return r;
346 }
347 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
348 r = r600_bc_build(&shader->shader.bc_fetch);
349 if (r) {
350 R600_ERR("building bytecode failed !\n");
351 return r;
352 }
353 }
354 //r600_bc_dump(&shader->shader.bc);
355 //fprintf(stderr, "______________________________________________________________\n");
356 return 0;
357 }
358
359 void
360 r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
361 {
362 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
363
364 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
365 r600_bo_reference(rctx->radeon, &shader->bo_fetch, NULL);
366 r600_bc_clear(&shader->shader.bc_fetch);
367 }
368
369 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
370
371 r600_bc_clear(&shader->shader.bc);
372
373 /* FIXME: is there more stuff to free? */
374 }
375
376 /*
377 * tgsi -> r600 shader
378 */
379 struct r600_shader_tgsi_instruction;
380
381 struct r600_shader_ctx {
382 struct tgsi_shader_info info;
383 struct tgsi_parse_context parse;
384 const struct tgsi_token *tokens;
385 unsigned type;
386 unsigned file_offset[TGSI_FILE_COUNT];
387 unsigned temp_reg;
388 struct r600_shader_tgsi_instruction *inst_info;
389 struct r600_bc *bc;
390 struct r600_bc *bc_fetch;
391 struct r600_shader *shader;
392 u32 value[4];
393 u32 *literals;
394 u32 nliterals;
395 u32 max_driver_temp_used;
396 /* needed for evergreen interpolation */
397 boolean input_centroid;
398 boolean input_linear;
399 boolean input_perspective;
400 int num_interp_gpr;
401 };
402
403 struct r600_shader_tgsi_instruction {
404 unsigned tgsi_opcode;
405 unsigned is_op3;
406 unsigned r600_opcode;
407 int (*process)(struct r600_shader_ctx *ctx);
408 };
409
410 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
411 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
412
413 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
414 {
415 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
416 int j;
417
418 if (i->Instruction.NumDstRegs > 1) {
419 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
420 return -EINVAL;
421 }
422 if (i->Instruction.Predicate) {
423 R600_ERR("predicate unsupported\n");
424 return -EINVAL;
425 }
426 #if 0
427 if (i->Instruction.Label) {
428 R600_ERR("label unsupported\n");
429 return -EINVAL;
430 }
431 #endif
432 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
433 if (i->Src[j].Register.Dimension) {
434 R600_ERR("unsupported src %d (dimension %d)\n", j,
435 i->Src[j].Register.Dimension);
436 return -EINVAL;
437 }
438 }
439 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
440 if (i->Dst[j].Register.Dimension) {
441 R600_ERR("unsupported dst (dimension)\n");
442 return -EINVAL;
443 }
444 }
445 return 0;
446 }
447
448 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
449 {
450 int i, r;
451 struct r600_bc_alu alu;
452 int gpr = 0, base_chan = 0;
453 int ij_index = 0;
454
455 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
456 ij_index = 0;
457 if (ctx->shader->input[input].centroid)
458 ij_index++;
459 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
460 ij_index = 0;
461 /* if we have perspective add one */
462 if (ctx->input_perspective) {
463 ij_index++;
464 /* if we have perspective centroid */
465 if (ctx->input_centroid)
466 ij_index++;
467 }
468 if (ctx->shader->input[input].centroid)
469 ij_index++;
470 }
471
472 /* work out gpr and base_chan from index */
473 gpr = ij_index / 2;
474 base_chan = (2 * (ij_index % 2)) + 1;
475
476 for (i = 0; i < 8; i++) {
477 memset(&alu, 0, sizeof(struct r600_bc_alu));
478
479 if (i < 4)
480 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
481 else
482 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
483
484 if ((i > 1) && (i < 6)) {
485 alu.dst.sel = ctx->shader->input[input].gpr;
486 alu.dst.write = 1;
487 }
488
489 alu.dst.chan = i % 4;
490
491 alu.src[0].sel = gpr;
492 alu.src[0].chan = (base_chan - (i % 2));
493
494 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
495
496 alu.bank_swizzle_force = SQ_ALU_VEC_210;
497 if ((i % 4) == 3)
498 alu.last = 1;
499 r = r600_bc_add_alu(ctx->bc, &alu);
500 if (r)
501 return r;
502 }
503 return 0;
504 }
505
506
507 static int tgsi_declaration(struct r600_shader_ctx *ctx)
508 {
509 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
510 struct r600_bc_vtx vtx;
511 unsigned i;
512 int r;
513
514 switch (d->Declaration.File) {
515 case TGSI_FILE_INPUT:
516 i = ctx->shader->ninput++;
517 ctx->shader->input[i].name = d->Semantic.Name;
518 ctx->shader->input[i].sid = d->Semantic.Index;
519 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
520 ctx->shader->input[i].centroid = d->Declaration.Centroid;
521 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
522 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
523 /* turn input into fetch */
524 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
525 vtx.inst = 0;
526 vtx.fetch_type = 0;
527 vtx.buffer_id = i;
528 /* register containing the index into the buffer */
529 vtx.src_gpr = 0;
530 vtx.src_sel_x = 0;
531 vtx.mega_fetch_count = 0x1F;
532 vtx.dst_gpr = ctx->shader->input[i].gpr;
533 vtx.dst_sel_x = 0;
534 vtx.dst_sel_y = 1;
535 vtx.dst_sel_z = 2;
536 vtx.dst_sel_w = 3;
537 vtx.use_const_fields = 1;
538 r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
539 if (r)
540 return r;
541 }
542 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
543 /* turn input into interpolate on EG */
544 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
545 if (ctx->shader->input[i].interpolate > 0) {
546 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
547 evergreen_interp_alu(ctx, i);
548 }
549 }
550 }
551 break;
552 case TGSI_FILE_OUTPUT:
553 i = ctx->shader->noutput++;
554 ctx->shader->output[i].name = d->Semantic.Name;
555 ctx->shader->output[i].sid = d->Semantic.Index;
556 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
557 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
558 break;
559 case TGSI_FILE_CONSTANT:
560 case TGSI_FILE_TEMPORARY:
561 case TGSI_FILE_SAMPLER:
562 case TGSI_FILE_ADDRESS:
563 break;
564 default:
565 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
566 return -EINVAL;
567 }
568 return 0;
569 }
570
571 static int r600_get_temp(struct r600_shader_ctx *ctx)
572 {
573 return ctx->temp_reg + ctx->max_driver_temp_used++;
574 }
575
576 /*
577 * for evergreen we need to scan the shader to find the number of GPRs we need to
578 * reserve for interpolation.
579 *
580 * we need to know if we are going to emit
581 * any centroid inputs
582 * if perspective and linear are required
583 */
584 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
585 {
586 int i;
587 int num_baryc;
588
589 ctx->input_linear = FALSE;
590 ctx->input_perspective = FALSE;
591 ctx->input_centroid = FALSE;
592 ctx->num_interp_gpr = 1;
593
594 /* any centroid inputs */
595 for (i = 0; i < ctx->info.num_inputs; i++) {
596 /* skip position/face */
597 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
598 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
599 continue;
600 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
601 ctx->input_linear = TRUE;
602 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
603 ctx->input_perspective = TRUE;
604 if (ctx->info.input_centroid[i])
605 ctx->input_centroid = TRUE;
606 }
607
608 num_baryc = 0;
609 /* ignoring sample for now */
610 if (ctx->input_perspective)
611 num_baryc++;
612 if (ctx->input_linear)
613 num_baryc++;
614 if (ctx->input_centroid)
615 num_baryc *= 2;
616
617 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
618
619 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
620 return ctx->num_interp_gpr;
621 }
622
623 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
624 {
625 struct tgsi_full_immediate *immediate;
626 struct r600_shader_ctx ctx;
627 struct r600_bc_output output[32];
628 unsigned output_done, noutput;
629 unsigned opcode;
630 int i, r = 0, pos0;
631
632 ctx.bc = &shader->bc;
633 ctx.bc_fetch = &shader->bc_fetch;
634 ctx.shader = shader;
635 r = r600_bc_init(ctx.bc, shader->family);
636 if (r)
637 return r;
638 ctx.tokens = tokens;
639 tgsi_scan_shader(tokens, &ctx.info);
640 tgsi_parse_init(&ctx.parse, tokens);
641 ctx.type = ctx.parse.FullHeader.Processor.Processor;
642 shader->processor_type = ctx.type;
643 if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
644 r = r600_bc_init(ctx.bc_fetch, shader->family);
645 if (r)
646 return r;
647 ctx.bc_fetch->type = -1;
648 }
649 ctx.bc->type = shader->processor_type;
650
651 /* register allocations */
652 /* Values [0,127] correspond to GPR[0..127].
653 * Values [128,159] correspond to constant buffer bank 0
654 * Values [160,191] correspond to constant buffer bank 1
655 * Values [256,511] correspond to cfile constants c[0..255].
656 * Other special values are shown in the list below.
657 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
658 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
659 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
660 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
661 * 248 SQ_ALU_SRC_0: special constant 0.0.
662 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
663 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
664 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
665 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
666 * 253 SQ_ALU_SRC_LITERAL: literal constant.
667 * 254 SQ_ALU_SRC_PV: previous vector result.
668 * 255 SQ_ALU_SRC_PS: previous scalar result.
669 */
670 for (i = 0; i < TGSI_FILE_COUNT; i++) {
671 ctx.file_offset[i] = 0;
672 }
673 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
674 ctx.file_offset[TGSI_FILE_INPUT] = 1;
675 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
676 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
677 } else {
678 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
679 }
680 }
681 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
682 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
683 }
684 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
685 ctx.info.file_count[TGSI_FILE_INPUT];
686 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
687 ctx.info.file_count[TGSI_FILE_OUTPUT];
688
689 ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
690
691 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
692 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
693 ctx.info.file_count[TGSI_FILE_TEMPORARY];
694
695 ctx.nliterals = 0;
696 ctx.literals = NULL;
697
698 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
699 tgsi_parse_token(&ctx.parse);
700 switch (ctx.parse.FullToken.Token.Type) {
701 case TGSI_TOKEN_TYPE_IMMEDIATE:
702 immediate = &ctx.parse.FullToken.FullImmediate;
703 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
704 if(ctx.literals == NULL) {
705 r = -ENOMEM;
706 goto out_err;
707 }
708 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
709 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
710 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
711 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
712 ctx.nliterals++;
713 break;
714 case TGSI_TOKEN_TYPE_DECLARATION:
715 r = tgsi_declaration(&ctx);
716 if (r)
717 goto out_err;
718 break;
719 case TGSI_TOKEN_TYPE_INSTRUCTION:
720 r = tgsi_is_supported(&ctx);
721 if (r)
722 goto out_err;
723 ctx.max_driver_temp_used = 0;
724 /* reserve first tmp for everyone */
725 r600_get_temp(&ctx);
726 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
727 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
728 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
729 else
730 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
731 r = ctx.inst_info->process(&ctx);
732 if (r)
733 goto out_err;
734 r = r600_bc_add_literal(ctx.bc, ctx.value);
735 if (r)
736 goto out_err;
737 break;
738 default:
739 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
740 r = -EINVAL;
741 goto out_err;
742 }
743 }
744 /* export output */
745 noutput = shader->noutput;
746 for (i = 0, pos0 = 0; i < noutput; i++) {
747 memset(&output[i], 0, sizeof(struct r600_bc_output));
748 output[i].gpr = shader->output[i].gpr;
749 output[i].elem_size = 3;
750 output[i].swizzle_x = 0;
751 output[i].swizzle_y = 1;
752 output[i].swizzle_z = 2;
753 output[i].swizzle_w = 3;
754 output[i].barrier = 1;
755 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
756 output[i].array_base = i - pos0;
757 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
758 switch (ctx.type) {
759 case TGSI_PROCESSOR_VERTEX:
760 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
761 output[i].array_base = 60;
762 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
763 /* position doesn't count in array_base */
764 pos0++;
765 }
766 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
767 output[i].array_base = 61;
768 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
769 /* position doesn't count in array_base */
770 pos0++;
771 }
772 break;
773 case TGSI_PROCESSOR_FRAGMENT:
774 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
775 output[i].array_base = shader->output[i].sid;
776 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
777 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
778 output[i].array_base = 61;
779 output[i].swizzle_x = 2;
780 output[i].swizzle_y = 7;
781 output[i].swizzle_z = output[i].swizzle_w = 7;
782 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
783 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
784 output[i].array_base = 61;
785 output[i].swizzle_x = 7;
786 output[i].swizzle_y = 1;
787 output[i].swizzle_z = output[i].swizzle_w = 7;
788 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
789 } else {
790 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
791 r = -EINVAL;
792 goto out_err;
793 }
794 break;
795 default:
796 R600_ERR("unsupported processor type %d\n", ctx.type);
797 r = -EINVAL;
798 goto out_err;
799 }
800 }
801 /* add fake param output for vertex shader if no param is exported */
802 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
803 for (i = 0, pos0 = 0; i < noutput; i++) {
804 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
805 pos0 = 1;
806 break;
807 }
808 }
809 if (!pos0) {
810 memset(&output[i], 0, sizeof(struct r600_bc_output));
811 output[i].gpr = 0;
812 output[i].elem_size = 3;
813 output[i].swizzle_x = 0;
814 output[i].swizzle_y = 1;
815 output[i].swizzle_z = 2;
816 output[i].swizzle_w = 3;
817 output[i].barrier = 1;
818 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
819 output[i].array_base = 0;
820 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
821 noutput++;
822 }
823 }
824 /* add fake pixel export */
825 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
826 memset(&output[0], 0, sizeof(struct r600_bc_output));
827 output[0].gpr = 0;
828 output[0].elem_size = 3;
829 output[0].swizzle_x = 7;
830 output[0].swizzle_y = 7;
831 output[0].swizzle_z = 7;
832 output[0].swizzle_w = 7;
833 output[0].barrier = 1;
834 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
835 output[0].array_base = 0;
836 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
837 noutput++;
838 }
839 /* set export done on last export of each type */
840 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
841 if (i == (noutput - 1)) {
842 output[i].end_of_program = 1;
843 }
844 if (!(output_done & (1 << output[i].type))) {
845 output_done |= (1 << output[i].type);
846 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
847 }
848 }
849 /* add return to fetch shader */
850 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
851 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
852 r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
853 } else {
854 r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
855 }
856 }
857 /* add output to bytecode */
858 for (i = 0; i < noutput; i++) {
859 r = r600_bc_add_output(ctx.bc, &output[i]);
860 if (r)
861 goto out_err;
862 }
863 free(ctx.literals);
864 tgsi_parse_free(&ctx.parse);
865 return 0;
866 out_err:
867 free(ctx.literals);
868 tgsi_parse_free(&ctx.parse);
869 return r;
870 }
871
872 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
873 {
874 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
875 return -EINVAL;
876 }
877
878 static int tgsi_end(struct r600_shader_ctx *ctx)
879 {
880 return 0;
881 }
882
883 static int tgsi_src(struct r600_shader_ctx *ctx,
884 const struct tgsi_full_src_register *tgsi_src,
885 struct r600_bc_alu_src *r600_src)
886 {
887 int index;
888 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
889 r600_src->sel = tgsi_src->Register.Index;
890 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
891 r600_src->sel = 0;
892 index = tgsi_src->Register.Index;
893 ctx->value[0] = ctx->literals[index * 4 + 0];
894 ctx->value[1] = ctx->literals[index * 4 + 1];
895 ctx->value[2] = ctx->literals[index * 4 + 2];
896 ctx->value[3] = ctx->literals[index * 4 + 3];
897 }
898 if (tgsi_src->Register.Indirect)
899 r600_src->rel = V_SQ_REL_RELATIVE;
900 r600_src->neg = tgsi_src->Register.Negate;
901 r600_src->abs = tgsi_src->Register.Absolute;
902 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
903 return 0;
904 }
905
906 static int tgsi_dst(struct r600_shader_ctx *ctx,
907 const struct tgsi_full_dst_register *tgsi_dst,
908 unsigned swizzle,
909 struct r600_bc_alu_dst *r600_dst)
910 {
911 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
912
913 r600_dst->sel = tgsi_dst->Register.Index;
914 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
915 r600_dst->chan = swizzle;
916 r600_dst->write = 1;
917 if (tgsi_dst->Register.Indirect)
918 r600_dst->rel = V_SQ_REL_RELATIVE;
919 if (inst->Instruction.Saturate) {
920 r600_dst->clamp = 1;
921 }
922 return 0;
923 }
924
925 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
926 {
927 switch (swizzle) {
928 case 0:
929 return tgsi_src->Register.SwizzleX;
930 case 1:
931 return tgsi_src->Register.SwizzleY;
932 case 2:
933 return tgsi_src->Register.SwizzleZ;
934 case 3:
935 return tgsi_src->Register.SwizzleW;
936 default:
937 return 0;
938 }
939 }
940
941 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
942 {
943 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
944 struct r600_bc_alu alu;
945 int i, j, k, nconst, r;
946
947 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
948 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
949 nconst++;
950 }
951 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
952 if (r) {
953 return r;
954 }
955 }
956 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
957 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
958 int treg = r600_get_temp(ctx);
959 for (k = 0; k < 4; k++) {
960 memset(&alu, 0, sizeof(struct r600_bc_alu));
961 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
962 alu.src[0].sel = r600_src[i].sel;
963 alu.src[0].chan = k;
964 alu.src[0].rel = r600_src[i].rel;
965 alu.dst.sel = treg;
966 alu.dst.chan = k;
967 alu.dst.write = 1;
968 if (k == 3)
969 alu.last = 1;
970 r = r600_bc_add_alu(ctx->bc, &alu);
971 if (r)
972 return r;
973 }
974 r600_src[i].sel = treg;
975 r600_src[i].rel =0;
976 j--;
977 }
978 }
979 return 0;
980 }
981
982 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
983 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
984 {
985 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
986 struct r600_bc_alu alu;
987 int i, j, k, nliteral, r;
988
989 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
990 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
991 nliteral++;
992 }
993 }
994 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
995 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
996 int treg = r600_get_temp(ctx);
997 for (k = 0; k < 4; k++) {
998 memset(&alu, 0, sizeof(struct r600_bc_alu));
999 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1000 alu.src[0].sel = r600_src[i].sel;
1001 alu.src[0].chan = k;
1002 alu.dst.sel = treg;
1003 alu.dst.chan = k;
1004 alu.dst.write = 1;
1005 if (k == 3)
1006 alu.last = 1;
1007 r = r600_bc_add_alu(ctx->bc, &alu);
1008 if (r)
1009 return r;
1010 }
1011 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
1012 if (r)
1013 return r;
1014 r600_src[i].sel = treg;
1015 j--;
1016 }
1017 }
1018 return 0;
1019 }
1020
1021 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1022 {
1023 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1024 struct r600_bc_alu_src r600_src[3];
1025 struct r600_bc_alu alu;
1026 int i, j, r;
1027 int lasti = 0;
1028
1029 for (i = 0; i < 4; i++) {
1030 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1031 lasti = i;
1032 }
1033 }
1034
1035 r = tgsi_split_constant(ctx, r600_src);
1036 if (r)
1037 return r;
1038 r = tgsi_split_literal_constant(ctx, r600_src);
1039 if (r)
1040 return r;
1041 for (i = 0; i < lasti + 1; i++) {
1042 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1043 continue;
1044
1045 memset(&alu, 0, sizeof(struct r600_bc_alu));
1046 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1047 if (r)
1048 return r;
1049
1050 alu.inst = ctx->inst_info->r600_opcode;
1051 if (!swap) {
1052 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1053 alu.src[j] = r600_src[j];
1054 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1055 }
1056 } else {
1057 alu.src[0] = r600_src[1];
1058 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1059
1060 alu.src[1] = r600_src[0];
1061 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1062 }
1063 /* handle some special cases */
1064 switch (ctx->inst_info->tgsi_opcode) {
1065 case TGSI_OPCODE_SUB:
1066 alu.src[1].neg = 1;
1067 break;
1068 case TGSI_OPCODE_ABS:
1069 alu.src[0].abs = 1;
1070 break;
1071 default:
1072 break;
1073 }
1074 if (i == lasti) {
1075 alu.last = 1;
1076 }
1077 r = r600_bc_add_alu(ctx->bc, &alu);
1078 if (r)
1079 return r;
1080 }
1081 return 0;
1082 }
1083
1084 static int tgsi_op2(struct r600_shader_ctx *ctx)
1085 {
1086 return tgsi_op2_s(ctx, 0);
1087 }
1088
1089 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1090 {
1091 return tgsi_op2_s(ctx, 1);
1092 }
1093
1094 /*
1095 * r600 - trunc to -PI..PI range
1096 * r700 - normalize by dividing by 2PI
1097 * see fdo bug 27901
1098 */
1099 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1100 struct r600_bc_alu_src r600_src[3])
1101 {
1102 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1103 int r;
1104 uint32_t lit_vals[4];
1105 struct r600_bc_alu alu;
1106
1107 memset(lit_vals, 0, 4*4);
1108 r = tgsi_split_constant(ctx, r600_src);
1109 if (r)
1110 return r;
1111 r = tgsi_split_literal_constant(ctx, r600_src);
1112 if (r)
1113 return r;
1114
1115 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1116 lit_vals[1] = fui(0.5f);
1117
1118 memset(&alu, 0, sizeof(struct r600_bc_alu));
1119 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1120 alu.is_op3 = 1;
1121
1122 alu.dst.chan = 0;
1123 alu.dst.sel = ctx->temp_reg;
1124 alu.dst.write = 1;
1125
1126 alu.src[0] = r600_src[0];
1127 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1128
1129 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1130 alu.src[1].chan = 0;
1131 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1132 alu.src[2].chan = 1;
1133 alu.last = 1;
1134 r = r600_bc_add_alu(ctx->bc, &alu);
1135 if (r)
1136 return r;
1137 r = r600_bc_add_literal(ctx->bc, lit_vals);
1138 if (r)
1139 return r;
1140
1141 memset(&alu, 0, sizeof(struct r600_bc_alu));
1142 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1143
1144 alu.dst.chan = 0;
1145 alu.dst.sel = ctx->temp_reg;
1146 alu.dst.write = 1;
1147
1148 alu.src[0].sel = ctx->temp_reg;
1149 alu.src[0].chan = 0;
1150 alu.last = 1;
1151 r = r600_bc_add_alu(ctx->bc, &alu);
1152 if (r)
1153 return r;
1154
1155 if (ctx->bc->chiprev == CHIPREV_R600) {
1156 lit_vals[0] = fui(3.1415926535897f * 2.0f);
1157 lit_vals[1] = fui(-3.1415926535897f);
1158 } else {
1159 lit_vals[0] = fui(1.0f);
1160 lit_vals[1] = fui(-0.5f);
1161 }
1162
1163 memset(&alu, 0, sizeof(struct r600_bc_alu));
1164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1165 alu.is_op3 = 1;
1166
1167 alu.dst.chan = 0;
1168 alu.dst.sel = ctx->temp_reg;
1169 alu.dst.write = 1;
1170
1171 alu.src[0].sel = ctx->temp_reg;
1172 alu.src[0].chan = 0;
1173
1174 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1175 alu.src[1].chan = 0;
1176 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1177 alu.src[2].chan = 1;
1178 alu.last = 1;
1179 r = r600_bc_add_alu(ctx->bc, &alu);
1180 if (r)
1181 return r;
1182 r = r600_bc_add_literal(ctx->bc, lit_vals);
1183 if (r)
1184 return r;
1185 return 0;
1186 }
1187
1188 static int tgsi_trig(struct r600_shader_ctx *ctx)
1189 {
1190 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1191 struct r600_bc_alu_src r600_src[3];
1192 struct r600_bc_alu alu;
1193 int i, r;
1194 int lasti = 0;
1195
1196 r = tgsi_setup_trig(ctx, r600_src);
1197 if (r)
1198 return r;
1199
1200 memset(&alu, 0, sizeof(struct r600_bc_alu));
1201 alu.inst = ctx->inst_info->r600_opcode;
1202 alu.dst.chan = 0;
1203 alu.dst.sel = ctx->temp_reg;
1204 alu.dst.write = 1;
1205
1206 alu.src[0].sel = ctx->temp_reg;
1207 alu.src[0].chan = 0;
1208 alu.last = 1;
1209 r = r600_bc_add_alu(ctx->bc, &alu);
1210 if (r)
1211 return r;
1212
1213 /* replicate result */
1214 for (i = 0; i < 4; i++) {
1215 if (inst->Dst[0].Register.WriteMask & (1 << i))
1216 lasti = i;
1217 }
1218 for (i = 0; i < lasti + 1; i++) {
1219 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1220 continue;
1221
1222 memset(&alu, 0, sizeof(struct r600_bc_alu));
1223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1224
1225 alu.src[0].sel = ctx->temp_reg;
1226 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1227 if (r)
1228 return r;
1229 if (i == lasti)
1230 alu.last = 1;
1231 r = r600_bc_add_alu(ctx->bc, &alu);
1232 if (r)
1233 return r;
1234 }
1235 return 0;
1236 }
1237
1238 static int tgsi_scs(struct r600_shader_ctx *ctx)
1239 {
1240 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1241 struct r600_bc_alu_src r600_src[3];
1242 struct r600_bc_alu alu;
1243 int r;
1244
1245 /* We'll only need the trig stuff if we are going to write to the
1246 * X or Y components of the destination vector.
1247 */
1248 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1249 r = tgsi_setup_trig(ctx, r600_src);
1250 if (r)
1251 return r;
1252 }
1253
1254 /* dst.x = COS */
1255 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1256 memset(&alu, 0, sizeof(struct r600_bc_alu));
1257 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1258 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1259 if (r)
1260 return r;
1261
1262 alu.src[0].sel = ctx->temp_reg;
1263 alu.src[0].chan = 0;
1264 alu.last = 1;
1265 r = r600_bc_add_alu(ctx->bc, &alu);
1266 if (r)
1267 return r;
1268 }
1269
1270 /* dst.y = SIN */
1271 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1272 memset(&alu, 0, sizeof(struct r600_bc_alu));
1273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1274 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1275 if (r)
1276 return r;
1277
1278 alu.src[0].sel = ctx->temp_reg;
1279 alu.src[0].chan = 0;
1280 alu.last = 1;
1281 r = r600_bc_add_alu(ctx->bc, &alu);
1282 if (r)
1283 return r;
1284 }
1285
1286 /* dst.z = 0.0; */
1287 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1288 memset(&alu, 0, sizeof(struct r600_bc_alu));
1289
1290 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1291
1292 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1293 if (r)
1294 return r;
1295
1296 alu.src[0].sel = V_SQ_ALU_SRC_0;
1297 alu.src[0].chan = 0;
1298
1299 alu.last = 1;
1300
1301 r = r600_bc_add_alu(ctx->bc, &alu);
1302 if (r)
1303 return r;
1304
1305 r = r600_bc_add_literal(ctx->bc, ctx->value);
1306 if (r)
1307 return r;
1308 }
1309
1310 /* dst.w = 1.0; */
1311 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1312 memset(&alu, 0, sizeof(struct r600_bc_alu));
1313
1314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1315
1316 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1317 if (r)
1318 return r;
1319
1320 alu.src[0].sel = V_SQ_ALU_SRC_1;
1321 alu.src[0].chan = 0;
1322
1323 alu.last = 1;
1324
1325 r = r600_bc_add_alu(ctx->bc, &alu);
1326 if (r)
1327 return r;
1328
1329 r = r600_bc_add_literal(ctx->bc, ctx->value);
1330 if (r)
1331 return r;
1332 }
1333
1334 return 0;
1335 }
1336
1337 static int tgsi_kill(struct r600_shader_ctx *ctx)
1338 {
1339 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1340 struct r600_bc_alu alu;
1341 int i, r;
1342
1343 for (i = 0; i < 4; i++) {
1344 memset(&alu, 0, sizeof(struct r600_bc_alu));
1345 alu.inst = ctx->inst_info->r600_opcode;
1346
1347 alu.dst.chan = i;
1348
1349 alu.src[0].sel = V_SQ_ALU_SRC_0;
1350
1351 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1352 alu.src[1].sel = V_SQ_ALU_SRC_1;
1353 alu.src[1].neg = 1;
1354 } else {
1355 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1356 if (r)
1357 return r;
1358 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1359 }
1360 if (i == 3) {
1361 alu.last = 1;
1362 }
1363 r = r600_bc_add_alu(ctx->bc, &alu);
1364 if (r)
1365 return r;
1366 }
1367 r = r600_bc_add_literal(ctx->bc, ctx->value);
1368 if (r)
1369 return r;
1370
1371 /* kill must be last in ALU */
1372 ctx->bc->force_add_cf = 1;
1373 ctx->shader->uses_kill = TRUE;
1374 return 0;
1375 }
1376
1377 static int tgsi_lit(struct r600_shader_ctx *ctx)
1378 {
1379 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1380 struct r600_bc_alu alu;
1381 struct r600_bc_alu_src r600_src[3];
1382 int r;
1383
1384 r = tgsi_split_constant(ctx, r600_src);
1385 if (r)
1386 return r;
1387 r = tgsi_split_literal_constant(ctx, r600_src);
1388 if (r)
1389 return r;
1390
1391 /* dst.x, <- 1.0 */
1392 memset(&alu, 0, sizeof(struct r600_bc_alu));
1393 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1394 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1395 alu.src[0].chan = 0;
1396 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1397 if (r)
1398 return r;
1399 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1400 r = r600_bc_add_alu(ctx->bc, &alu);
1401 if (r)
1402 return r;
1403
1404 /* dst.y = max(src.x, 0.0) */
1405 memset(&alu, 0, sizeof(struct r600_bc_alu));
1406 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1407 alu.src[0] = r600_src[0];
1408 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1409 alu.src[1].chan = 0;
1410 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1411 if (r)
1412 return r;
1413 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1414 r = r600_bc_add_alu(ctx->bc, &alu);
1415 if (r)
1416 return r;
1417
1418 /* dst.w, <- 1.0 */
1419 memset(&alu, 0, sizeof(struct r600_bc_alu));
1420 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1421 alu.src[0].sel = V_SQ_ALU_SRC_1;
1422 alu.src[0].chan = 0;
1423 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1424 if (r)
1425 return r;
1426 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1427 alu.last = 1;
1428 r = r600_bc_add_alu(ctx->bc, &alu);
1429 if (r)
1430 return r;
1431
1432 r = r600_bc_add_literal(ctx->bc, ctx->value);
1433 if (r)
1434 return r;
1435
1436 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1437 {
1438 int chan;
1439 int sel;
1440
1441 /* dst.z = log(src.y) */
1442 memset(&alu, 0, sizeof(struct r600_bc_alu));
1443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1444 alu.src[0] = r600_src[0];
1445 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1446 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1447 if (r)
1448 return r;
1449 alu.last = 1;
1450 r = r600_bc_add_alu(ctx->bc, &alu);
1451 if (r)
1452 return r;
1453
1454 r = r600_bc_add_literal(ctx->bc, ctx->value);
1455 if (r)
1456 return r;
1457
1458 chan = alu.dst.chan;
1459 sel = alu.dst.sel;
1460
1461 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1462 memset(&alu, 0, sizeof(struct r600_bc_alu));
1463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1464 alu.src[0] = r600_src[0];
1465 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1466 alu.src[1].sel = sel;
1467 alu.src[1].chan = chan;
1468
1469 alu.src[2] = r600_src[0];
1470 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1471 alu.dst.sel = ctx->temp_reg;
1472 alu.dst.chan = 0;
1473 alu.dst.write = 1;
1474 alu.is_op3 = 1;
1475 alu.last = 1;
1476 r = r600_bc_add_alu(ctx->bc, &alu);
1477 if (r)
1478 return r;
1479
1480 r = r600_bc_add_literal(ctx->bc, ctx->value);
1481 if (r)
1482 return r;
1483 /* dst.z = exp(tmp.x) */
1484 memset(&alu, 0, sizeof(struct r600_bc_alu));
1485 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1486 alu.src[0].sel = ctx->temp_reg;
1487 alu.src[0].chan = 0;
1488 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1489 if (r)
1490 return r;
1491 alu.last = 1;
1492 r = r600_bc_add_alu(ctx->bc, &alu);
1493 if (r)
1494 return r;
1495 }
1496 return 0;
1497 }
1498
1499 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1500 {
1501 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1502 struct r600_bc_alu alu;
1503 int i, r;
1504
1505 memset(&alu, 0, sizeof(struct r600_bc_alu));
1506
1507 /* FIXME:
1508 * For state trackers other than OpenGL, we'll want to use
1509 * _RECIPSQRT_IEEE instead.
1510 */
1511 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1512
1513 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1514 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1515 if (r)
1516 return r;
1517 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1518 alu.src[i].abs = 1;
1519 }
1520 alu.dst.sel = ctx->temp_reg;
1521 alu.dst.write = 1;
1522 alu.last = 1;
1523 r = r600_bc_add_alu(ctx->bc, &alu);
1524 if (r)
1525 return r;
1526 r = r600_bc_add_literal(ctx->bc, ctx->value);
1527 if (r)
1528 return r;
1529 /* replicate result */
1530 return tgsi_helper_tempx_replicate(ctx);
1531 }
1532
1533 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1534 {
1535 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1536 struct r600_bc_alu alu;
1537 int i, r;
1538
1539 for (i = 0; i < 4; i++) {
1540 memset(&alu, 0, sizeof(struct r600_bc_alu));
1541 alu.src[0].sel = ctx->temp_reg;
1542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1543 alu.dst.chan = i;
1544 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1545 if (r)
1546 return r;
1547 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1548 if (i == 3)
1549 alu.last = 1;
1550 r = r600_bc_add_alu(ctx->bc, &alu);
1551 if (r)
1552 return r;
1553 }
1554 return 0;
1555 }
1556
1557 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1558 {
1559 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1560 struct r600_bc_alu alu;
1561 int i, r;
1562
1563 memset(&alu, 0, sizeof(struct r600_bc_alu));
1564 alu.inst = ctx->inst_info->r600_opcode;
1565 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1566 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1567 if (r)
1568 return r;
1569 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1570 }
1571 alu.dst.sel = ctx->temp_reg;
1572 alu.dst.write = 1;
1573 alu.last = 1;
1574 r = r600_bc_add_alu(ctx->bc, &alu);
1575 if (r)
1576 return r;
1577 r = r600_bc_add_literal(ctx->bc, ctx->value);
1578 if (r)
1579 return r;
1580 /* replicate result */
1581 return tgsi_helper_tempx_replicate(ctx);
1582 }
1583
1584 static int tgsi_pow(struct r600_shader_ctx *ctx)
1585 {
1586 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1587 struct r600_bc_alu alu;
1588 int r;
1589
1590 /* LOG2(a) */
1591 memset(&alu, 0, sizeof(struct r600_bc_alu));
1592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1593 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1594 if (r)
1595 return r;
1596 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1597 alu.dst.sel = ctx->temp_reg;
1598 alu.dst.write = 1;
1599 alu.last = 1;
1600 r = r600_bc_add_alu(ctx->bc, &alu);
1601 if (r)
1602 return r;
1603 r = r600_bc_add_literal(ctx->bc,ctx->value);
1604 if (r)
1605 return r;
1606 /* b * LOG2(a) */
1607 memset(&alu, 0, sizeof(struct r600_bc_alu));
1608 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1609 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1610 if (r)
1611 return r;
1612 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1613 alu.src[1].sel = ctx->temp_reg;
1614 alu.dst.sel = ctx->temp_reg;
1615 alu.dst.write = 1;
1616 alu.last = 1;
1617 r = r600_bc_add_alu(ctx->bc, &alu);
1618 if (r)
1619 return r;
1620 r = r600_bc_add_literal(ctx->bc,ctx->value);
1621 if (r)
1622 return r;
1623 /* POW(a,b) = EXP2(b * LOG2(a))*/
1624 memset(&alu, 0, sizeof(struct r600_bc_alu));
1625 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1626 alu.src[0].sel = ctx->temp_reg;
1627 alu.dst.sel = ctx->temp_reg;
1628 alu.dst.write = 1;
1629 alu.last = 1;
1630 r = r600_bc_add_alu(ctx->bc, &alu);
1631 if (r)
1632 return r;
1633 r = r600_bc_add_literal(ctx->bc,ctx->value);
1634 if (r)
1635 return r;
1636 return tgsi_helper_tempx_replicate(ctx);
1637 }
1638
1639 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1640 {
1641 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1642 struct r600_bc_alu alu;
1643 struct r600_bc_alu_src r600_src[3];
1644 int i, r;
1645
1646 r = tgsi_split_constant(ctx, r600_src);
1647 if (r)
1648 return r;
1649 r = tgsi_split_literal_constant(ctx, r600_src);
1650 if (r)
1651 return r;
1652
1653 /* tmp = (src > 0 ? 1 : src) */
1654 for (i = 0; i < 4; i++) {
1655 memset(&alu, 0, sizeof(struct r600_bc_alu));
1656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1657 alu.is_op3 = 1;
1658
1659 alu.dst.sel = ctx->temp_reg;
1660 alu.dst.chan = i;
1661
1662 alu.src[0] = r600_src[0];
1663 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1664
1665 alu.src[1].sel = V_SQ_ALU_SRC_1;
1666
1667 alu.src[2] = r600_src[0];
1668 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1669 if (i == 3)
1670 alu.last = 1;
1671 r = r600_bc_add_alu(ctx->bc, &alu);
1672 if (r)
1673 return r;
1674 }
1675 r = r600_bc_add_literal(ctx->bc, ctx->value);
1676 if (r)
1677 return r;
1678
1679 /* dst = (-tmp > 0 ? -1 : tmp) */
1680 for (i = 0; i < 4; i++) {
1681 memset(&alu, 0, sizeof(struct r600_bc_alu));
1682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1683 alu.is_op3 = 1;
1684 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1685 if (r)
1686 return r;
1687
1688 alu.src[0].sel = ctx->temp_reg;
1689 alu.src[0].chan = i;
1690 alu.src[0].neg = 1;
1691
1692 alu.src[1].sel = V_SQ_ALU_SRC_1;
1693 alu.src[1].neg = 1;
1694
1695 alu.src[2].sel = ctx->temp_reg;
1696 alu.src[2].chan = i;
1697
1698 if (i == 3)
1699 alu.last = 1;
1700 r = r600_bc_add_alu(ctx->bc, &alu);
1701 if (r)
1702 return r;
1703 }
1704 return 0;
1705 }
1706
1707 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1708 {
1709 struct r600_bc_alu alu;
1710 int i, r;
1711
1712 r = r600_bc_add_literal(ctx->bc, ctx->value);
1713 if (r)
1714 return r;
1715 for (i = 0; i < 4; i++) {
1716 memset(&alu, 0, sizeof(struct r600_bc_alu));
1717 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1718 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1719 alu.dst.chan = i;
1720 } else {
1721 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1722 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1723 if (r)
1724 return r;
1725 alu.src[0].sel = ctx->temp_reg;
1726 alu.src[0].chan = i;
1727 }
1728 if (i == 3) {
1729 alu.last = 1;
1730 }
1731 r = r600_bc_add_alu(ctx->bc, &alu);
1732 if (r)
1733 return r;
1734 }
1735 return 0;
1736 }
1737
1738 static int tgsi_op3(struct r600_shader_ctx *ctx)
1739 {
1740 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1741 struct r600_bc_alu_src r600_src[3];
1742 struct r600_bc_alu alu;
1743 int i, j, r;
1744
1745 r = tgsi_split_constant(ctx, r600_src);
1746 if (r)
1747 return r;
1748 r = tgsi_split_literal_constant(ctx, r600_src);
1749 if (r)
1750 return r;
1751 /* do it in 2 step as op3 doesn't support writemask */
1752 for (i = 0; i < 4; i++) {
1753 memset(&alu, 0, sizeof(struct r600_bc_alu));
1754 alu.inst = ctx->inst_info->r600_opcode;
1755 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1756 alu.src[j] = r600_src[j];
1757 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1758 }
1759 alu.dst.sel = ctx->temp_reg;
1760 alu.dst.chan = i;
1761 alu.dst.write = 1;
1762 alu.is_op3 = 1;
1763 if (i == 3) {
1764 alu.last = 1;
1765 }
1766 r = r600_bc_add_alu(ctx->bc, &alu);
1767 if (r)
1768 return r;
1769 }
1770 return tgsi_helper_copy(ctx, inst);
1771 }
1772
1773 static int tgsi_dp(struct r600_shader_ctx *ctx)
1774 {
1775 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1776 struct r600_bc_alu_src r600_src[3];
1777 struct r600_bc_alu alu;
1778 int i, j, r;
1779
1780 r = tgsi_split_constant(ctx, r600_src);
1781 if (r)
1782 return r;
1783 r = tgsi_split_literal_constant(ctx, r600_src);
1784 if (r)
1785 return r;
1786 for (i = 0; i < 4; i++) {
1787 memset(&alu, 0, sizeof(struct r600_bc_alu));
1788 alu.inst = ctx->inst_info->r600_opcode;
1789 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1790 alu.src[j] = r600_src[j];
1791 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1792 }
1793 alu.dst.sel = ctx->temp_reg;
1794 alu.dst.chan = i;
1795 alu.dst.write = 1;
1796 /* handle some special cases */
1797 switch (ctx->inst_info->tgsi_opcode) {
1798 case TGSI_OPCODE_DP2:
1799 if (i > 1) {
1800 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1801 alu.src[0].chan = alu.src[1].chan = 0;
1802 }
1803 break;
1804 case TGSI_OPCODE_DP3:
1805 if (i > 2) {
1806 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1807 alu.src[0].chan = alu.src[1].chan = 0;
1808 }
1809 break;
1810 case TGSI_OPCODE_DPH:
1811 if (i == 3) {
1812 alu.src[0].sel = V_SQ_ALU_SRC_1;
1813 alu.src[0].chan = 0;
1814 alu.src[0].neg = 0;
1815 }
1816 break;
1817 default:
1818 break;
1819 }
1820 if (i == 3) {
1821 alu.last = 1;
1822 }
1823 r = r600_bc_add_alu(ctx->bc, &alu);
1824 if (r)
1825 return r;
1826 }
1827 return tgsi_helper_copy(ctx, inst);
1828 }
1829
1830 static int tgsi_tex(struct r600_shader_ctx *ctx)
1831 {
1832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1833 struct r600_bc_tex tex;
1834 struct r600_bc_alu alu;
1835 unsigned src_gpr;
1836 int r, i;
1837 int opcode;
1838 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1839 uint32_t lit_vals[4];
1840
1841 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1842
1843 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1844 /* Add perspective divide */
1845 memset(&alu, 0, sizeof(struct r600_bc_alu));
1846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1847 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1848 if (r)
1849 return r;
1850
1851 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1852 alu.dst.sel = ctx->temp_reg;
1853 alu.dst.chan = 3;
1854 alu.last = 1;
1855 alu.dst.write = 1;
1856 r = r600_bc_add_alu(ctx->bc, &alu);
1857 if (r)
1858 return r;
1859
1860 for (i = 0; i < 3; i++) {
1861 memset(&alu, 0, sizeof(struct r600_bc_alu));
1862 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1863 alu.src[0].sel = ctx->temp_reg;
1864 alu.src[0].chan = 3;
1865 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1866 if (r)
1867 return r;
1868 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1869 alu.dst.sel = ctx->temp_reg;
1870 alu.dst.chan = i;
1871 alu.dst.write = 1;
1872 r = r600_bc_add_alu(ctx->bc, &alu);
1873 if (r)
1874 return r;
1875 }
1876 memset(&alu, 0, sizeof(struct r600_bc_alu));
1877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1878 alu.src[0].sel = V_SQ_ALU_SRC_1;
1879 alu.src[0].chan = 0;
1880 alu.dst.sel = ctx->temp_reg;
1881 alu.dst.chan = 3;
1882 alu.last = 1;
1883 alu.dst.write = 1;
1884 r = r600_bc_add_alu(ctx->bc, &alu);
1885 if (r)
1886 return r;
1887 src_not_temp = FALSE;
1888 src_gpr = ctx->temp_reg;
1889 }
1890
1891 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1892 int src_chan, src2_chan;
1893
1894 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1895 for (i = 0; i < 4; i++) {
1896 memset(&alu, 0, sizeof(struct r600_bc_alu));
1897 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1898 switch (i) {
1899 case 0:
1900 src_chan = 2;
1901 src2_chan = 1;
1902 break;
1903 case 1:
1904 src_chan = 2;
1905 src2_chan = 0;
1906 break;
1907 case 2:
1908 src_chan = 0;
1909 src2_chan = 2;
1910 break;
1911 case 3:
1912 src_chan = 1;
1913 src2_chan = 2;
1914 break;
1915 default:
1916 assert(0);
1917 src_chan = 0;
1918 src2_chan = 0;
1919 break;
1920 }
1921 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1922 if (r)
1923 return r;
1924 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1925 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1926 if (r)
1927 return r;
1928 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1929 alu.dst.sel = ctx->temp_reg;
1930 alu.dst.chan = i;
1931 if (i == 3)
1932 alu.last = 1;
1933 alu.dst.write = 1;
1934 r = r600_bc_add_alu(ctx->bc, &alu);
1935 if (r)
1936 return r;
1937 }
1938
1939 /* tmp1.z = RCP_e(|tmp1.z|) */
1940 memset(&alu, 0, sizeof(struct r600_bc_alu));
1941 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1942 alu.src[0].sel = ctx->temp_reg;
1943 alu.src[0].chan = 2;
1944 alu.src[0].abs = 1;
1945 alu.dst.sel = ctx->temp_reg;
1946 alu.dst.chan = 2;
1947 alu.dst.write = 1;
1948 alu.last = 1;
1949 r = r600_bc_add_alu(ctx->bc, &alu);
1950 if (r)
1951 return r;
1952
1953 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1954 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1955 * muladd has no writemask, have to use another temp
1956 */
1957 memset(&alu, 0, sizeof(struct r600_bc_alu));
1958 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1959 alu.is_op3 = 1;
1960
1961 alu.src[0].sel = ctx->temp_reg;
1962 alu.src[0].chan = 0;
1963 alu.src[1].sel = ctx->temp_reg;
1964 alu.src[1].chan = 2;
1965
1966 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1967 alu.src[2].chan = 0;
1968
1969 alu.dst.sel = ctx->temp_reg;
1970 alu.dst.chan = 0;
1971 alu.dst.write = 1;
1972
1973 r = r600_bc_add_alu(ctx->bc, &alu);
1974 if (r)
1975 return r;
1976
1977 memset(&alu, 0, sizeof(struct r600_bc_alu));
1978 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1979 alu.is_op3 = 1;
1980
1981 alu.src[0].sel = ctx->temp_reg;
1982 alu.src[0].chan = 1;
1983 alu.src[1].sel = ctx->temp_reg;
1984 alu.src[1].chan = 2;
1985
1986 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1987 alu.src[2].chan = 0;
1988
1989 alu.dst.sel = ctx->temp_reg;
1990 alu.dst.chan = 1;
1991 alu.dst.write = 1;
1992
1993 alu.last = 1;
1994 r = r600_bc_add_alu(ctx->bc, &alu);
1995 if (r)
1996 return r;
1997
1998 lit_vals[0] = fui(1.5f);
1999
2000 r = r600_bc_add_literal(ctx->bc, lit_vals);
2001 if (r)
2002 return r;
2003 src_not_temp = FALSE;
2004 src_gpr = ctx->temp_reg;
2005 }
2006
2007 if (src_not_temp) {
2008 for (i = 0; i < 4; i++) {
2009 memset(&alu, 0, sizeof(struct r600_bc_alu));
2010 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2011 alu.src[0].sel = src_gpr;
2012 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2013 alu.dst.sel = ctx->temp_reg;
2014 alu.dst.chan = i;
2015 if (i == 3)
2016 alu.last = 1;
2017 alu.dst.write = 1;
2018 r = r600_bc_add_alu(ctx->bc, &alu);
2019 if (r)
2020 return r;
2021 }
2022 src_gpr = ctx->temp_reg;
2023 }
2024
2025 opcode = ctx->inst_info->r600_opcode;
2026 if (opcode == SQ_TEX_INST_SAMPLE &&
2027 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
2028 opcode = SQ_TEX_INST_SAMPLE_C;
2029
2030 memset(&tex, 0, sizeof(struct r600_bc_tex));
2031 tex.inst = opcode;
2032 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
2033 tex.resource_id = tex.sampler_id;
2034 tex.src_gpr = src_gpr;
2035 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2036 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2037 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2038 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2039 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2040 tex.src_sel_x = 0;
2041 tex.src_sel_y = 1;
2042 tex.src_sel_z = 2;
2043 tex.src_sel_w = 3;
2044
2045 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2046 tex.src_sel_x = 1;
2047 tex.src_sel_y = 0;
2048 tex.src_sel_z = 3;
2049 tex.src_sel_w = 1;
2050 }
2051
2052 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2053 tex.coord_type_x = 1;
2054 tex.coord_type_y = 1;
2055 tex.coord_type_z = 1;
2056 tex.coord_type_w = 1;
2057 }
2058
2059 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2060 tex.src_sel_w = 2;
2061
2062 r = r600_bc_add_tex(ctx->bc, &tex);
2063 if (r)
2064 return r;
2065
2066 /* add shadow ambient support - gallium doesn't do it yet */
2067 return 0;
2068 }
2069
2070 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2071 {
2072 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2073 struct r600_bc_alu_src r600_src[3];
2074 struct r600_bc_alu alu;
2075 unsigned i;
2076 int r;
2077
2078 r = tgsi_split_constant(ctx, r600_src);
2079 if (r)
2080 return r;
2081 r = tgsi_split_literal_constant(ctx, r600_src);
2082 if (r)
2083 return r;
2084 /* 1 - src0 */
2085 for (i = 0; i < 4; i++) {
2086 memset(&alu, 0, sizeof(struct r600_bc_alu));
2087 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2088 alu.src[0].sel = V_SQ_ALU_SRC_1;
2089 alu.src[0].chan = 0;
2090 alu.src[1] = r600_src[0];
2091 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2092 alu.src[1].neg = 1;
2093 alu.dst.sel = ctx->temp_reg;
2094 alu.dst.chan = i;
2095 if (i == 3) {
2096 alu.last = 1;
2097 }
2098 alu.dst.write = 1;
2099 r = r600_bc_add_alu(ctx->bc, &alu);
2100 if (r)
2101 return r;
2102 }
2103 r = r600_bc_add_literal(ctx->bc, ctx->value);
2104 if (r)
2105 return r;
2106
2107 /* (1 - src0) * src2 */
2108 for (i = 0; i < 4; i++) {
2109 memset(&alu, 0, sizeof(struct r600_bc_alu));
2110 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2111 alu.src[0].sel = ctx->temp_reg;
2112 alu.src[0].chan = i;
2113 alu.src[1] = r600_src[2];
2114 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2115 alu.dst.sel = ctx->temp_reg;
2116 alu.dst.chan = i;
2117 if (i == 3) {
2118 alu.last = 1;
2119 }
2120 alu.dst.write = 1;
2121 r = r600_bc_add_alu(ctx->bc, &alu);
2122 if (r)
2123 return r;
2124 }
2125 r = r600_bc_add_literal(ctx->bc, ctx->value);
2126 if (r)
2127 return r;
2128
2129 /* src0 * src1 + (1 - src0) * src2 */
2130 for (i = 0; i < 4; i++) {
2131 memset(&alu, 0, sizeof(struct r600_bc_alu));
2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2133 alu.is_op3 = 1;
2134 alu.src[0] = r600_src[0];
2135 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2136 alu.src[1] = r600_src[1];
2137 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2138 alu.src[2].sel = ctx->temp_reg;
2139 alu.src[2].chan = i;
2140 alu.dst.sel = ctx->temp_reg;
2141 alu.dst.chan = i;
2142 if (i == 3) {
2143 alu.last = 1;
2144 }
2145 r = r600_bc_add_alu(ctx->bc, &alu);
2146 if (r)
2147 return r;
2148 }
2149 return tgsi_helper_copy(ctx, inst);
2150 }
2151
2152 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2153 {
2154 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2155 struct r600_bc_alu_src r600_src[3];
2156 struct r600_bc_alu alu;
2157 int use_temp = 0;
2158 int i, r;
2159
2160 r = tgsi_split_constant(ctx, r600_src);
2161 if (r)
2162 return r;
2163 r = tgsi_split_literal_constant(ctx, r600_src);
2164 if (r)
2165 return r;
2166
2167 if (inst->Dst[0].Register.WriteMask != 0xf)
2168 use_temp = 1;
2169
2170 for (i = 0; i < 4; i++) {
2171 memset(&alu, 0, sizeof(struct r600_bc_alu));
2172 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2173 alu.src[0] = r600_src[0];
2174 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2175
2176 alu.src[1] = r600_src[2];
2177 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2178
2179 alu.src[2] = r600_src[1];
2180 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2181
2182 if (use_temp)
2183 alu.dst.sel = ctx->temp_reg;
2184 else {
2185 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2186 if (r)
2187 return r;
2188 }
2189 alu.dst.chan = i;
2190 alu.dst.write = 1;
2191 alu.is_op3 = 1;
2192 if (i == 3)
2193 alu.last = 1;
2194 r = r600_bc_add_alu(ctx->bc, &alu);
2195 if (r)
2196 return r;
2197 }
2198 if (use_temp)
2199 return tgsi_helper_copy(ctx, inst);
2200 return 0;
2201 }
2202
2203 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2204 {
2205 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2206 struct r600_bc_alu_src r600_src[3];
2207 struct r600_bc_alu alu;
2208 uint32_t use_temp = 0;
2209 int i, r;
2210
2211 if (inst->Dst[0].Register.WriteMask != 0xf)
2212 use_temp = 1;
2213
2214 r = tgsi_split_constant(ctx, r600_src);
2215 if (r)
2216 return r;
2217 r = tgsi_split_literal_constant(ctx, r600_src);
2218 if (r)
2219 return r;
2220
2221 for (i = 0; i < 4; i++) {
2222 memset(&alu, 0, sizeof(struct r600_bc_alu));
2223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2224
2225 alu.src[0] = r600_src[0];
2226 switch (i) {
2227 case 0:
2228 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2229 break;
2230 case 1:
2231 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2232 break;
2233 case 2:
2234 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2235 break;
2236 case 3:
2237 alu.src[0].sel = V_SQ_ALU_SRC_0;
2238 alu.src[0].chan = i;
2239 }
2240
2241 alu.src[1] = r600_src[1];
2242 switch (i) {
2243 case 0:
2244 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2245 break;
2246 case 1:
2247 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2248 break;
2249 case 2:
2250 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2251 break;
2252 case 3:
2253 alu.src[1].sel = V_SQ_ALU_SRC_0;
2254 alu.src[1].chan = i;
2255 }
2256
2257 alu.dst.sel = ctx->temp_reg;
2258 alu.dst.chan = i;
2259 alu.dst.write = 1;
2260
2261 if (i == 3)
2262 alu.last = 1;
2263 r = r600_bc_add_alu(ctx->bc, &alu);
2264 if (r)
2265 return r;
2266
2267 r = r600_bc_add_literal(ctx->bc, ctx->value);
2268 if (r)
2269 return r;
2270 }
2271
2272 for (i = 0; i < 4; i++) {
2273 memset(&alu, 0, sizeof(struct r600_bc_alu));
2274 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2275
2276 alu.src[0] = r600_src[0];
2277 switch (i) {
2278 case 0:
2279 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2280 break;
2281 case 1:
2282 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2283 break;
2284 case 2:
2285 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2286 break;
2287 case 3:
2288 alu.src[0].sel = V_SQ_ALU_SRC_0;
2289 alu.src[0].chan = i;
2290 }
2291
2292 alu.src[1] = r600_src[1];
2293 switch (i) {
2294 case 0:
2295 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2296 break;
2297 case 1:
2298 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2299 break;
2300 case 2:
2301 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2302 break;
2303 case 3:
2304 alu.src[1].sel = V_SQ_ALU_SRC_0;
2305 alu.src[1].chan = i;
2306 }
2307
2308 alu.src[2].sel = ctx->temp_reg;
2309 alu.src[2].neg = 1;
2310 alu.src[2].chan = i;
2311
2312 if (use_temp)
2313 alu.dst.sel = ctx->temp_reg;
2314 else {
2315 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2316 if (r)
2317 return r;
2318 }
2319 alu.dst.chan = i;
2320 alu.dst.write = 1;
2321 alu.is_op3 = 1;
2322 if (i == 3)
2323 alu.last = 1;
2324 r = r600_bc_add_alu(ctx->bc, &alu);
2325 if (r)
2326 return r;
2327
2328 r = r600_bc_add_literal(ctx->bc, ctx->value);
2329 if (r)
2330 return r;
2331 }
2332 if (use_temp)
2333 return tgsi_helper_copy(ctx, inst);
2334 return 0;
2335 }
2336
2337 static int tgsi_exp(struct r600_shader_ctx *ctx)
2338 {
2339 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2340 struct r600_bc_alu_src r600_src[3] = { { 0 } };
2341 struct r600_bc_alu alu;
2342 int r;
2343
2344 /* result.x = 2^floor(src); */
2345 if (inst->Dst[0].Register.WriteMask & 1) {
2346 memset(&alu, 0, sizeof(struct r600_bc_alu));
2347
2348 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2349 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2350 if (r)
2351 return r;
2352
2353 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2354
2355 alu.dst.sel = ctx->temp_reg;
2356 alu.dst.chan = 0;
2357 alu.dst.write = 1;
2358 alu.last = 1;
2359 r = r600_bc_add_alu(ctx->bc, &alu);
2360 if (r)
2361 return r;
2362
2363 r = r600_bc_add_literal(ctx->bc, ctx->value);
2364 if (r)
2365 return r;
2366
2367 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2368 alu.src[0].sel = ctx->temp_reg;
2369 alu.src[0].chan = 0;
2370
2371 alu.dst.sel = ctx->temp_reg;
2372 alu.dst.chan = 0;
2373 alu.dst.write = 1;
2374 alu.last = 1;
2375 r = r600_bc_add_alu(ctx->bc, &alu);
2376 if (r)
2377 return r;
2378
2379 r = r600_bc_add_literal(ctx->bc, ctx->value);
2380 if (r)
2381 return r;
2382 }
2383
2384 /* result.y = tmp - floor(tmp); */
2385 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2386 memset(&alu, 0, sizeof(struct r600_bc_alu));
2387
2388 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2389 alu.src[0] = r600_src[0];
2390 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2391 if (r)
2392 return r;
2393 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2394
2395 alu.dst.sel = ctx->temp_reg;
2396 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2397 // if (r)
2398 // return r;
2399 alu.dst.write = 1;
2400 alu.dst.chan = 1;
2401
2402 alu.last = 1;
2403
2404 r = r600_bc_add_alu(ctx->bc, &alu);
2405 if (r)
2406 return r;
2407 r = r600_bc_add_literal(ctx->bc, ctx->value);
2408 if (r)
2409 return r;
2410 }
2411
2412 /* result.z = RoughApprox2ToX(tmp);*/
2413 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2414 memset(&alu, 0, sizeof(struct r600_bc_alu));
2415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2416 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2417 if (r)
2418 return r;
2419 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2420
2421 alu.dst.sel = ctx->temp_reg;
2422 alu.dst.write = 1;
2423 alu.dst.chan = 2;
2424
2425 alu.last = 1;
2426
2427 r = r600_bc_add_alu(ctx->bc, &alu);
2428 if (r)
2429 return r;
2430 r = r600_bc_add_literal(ctx->bc, ctx->value);
2431 if (r)
2432 return r;
2433 }
2434
2435 /* result.w = 1.0;*/
2436 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2437 memset(&alu, 0, sizeof(struct r600_bc_alu));
2438
2439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2440 alu.src[0].sel = V_SQ_ALU_SRC_1;
2441 alu.src[0].chan = 0;
2442
2443 alu.dst.sel = ctx->temp_reg;
2444 alu.dst.chan = 3;
2445 alu.dst.write = 1;
2446 alu.last = 1;
2447 r = r600_bc_add_alu(ctx->bc, &alu);
2448 if (r)
2449 return r;
2450 r = r600_bc_add_literal(ctx->bc, ctx->value);
2451 if (r)
2452 return r;
2453 }
2454 return tgsi_helper_copy(ctx, inst);
2455 }
2456
2457 static int tgsi_log(struct r600_shader_ctx *ctx)
2458 {
2459 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2460 struct r600_bc_alu alu;
2461 int r;
2462
2463 /* result.x = floor(log2(src)); */
2464 if (inst->Dst[0].Register.WriteMask & 1) {
2465 memset(&alu, 0, sizeof(struct r600_bc_alu));
2466
2467 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2468 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2469 if (r)
2470 return r;
2471
2472 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2473
2474 alu.dst.sel = ctx->temp_reg;
2475 alu.dst.chan = 0;
2476 alu.dst.write = 1;
2477 alu.last = 1;
2478 r = r600_bc_add_alu(ctx->bc, &alu);
2479 if (r)
2480 return r;
2481
2482 r = r600_bc_add_literal(ctx->bc, ctx->value);
2483 if (r)
2484 return r;
2485
2486 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2487 alu.src[0].sel = ctx->temp_reg;
2488 alu.src[0].chan = 0;
2489
2490 alu.dst.sel = ctx->temp_reg;
2491 alu.dst.chan = 0;
2492 alu.dst.write = 1;
2493 alu.last = 1;
2494
2495 r = r600_bc_add_alu(ctx->bc, &alu);
2496 if (r)
2497 return r;
2498
2499 r = r600_bc_add_literal(ctx->bc, ctx->value);
2500 if (r)
2501 return r;
2502 }
2503
2504 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2505 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2506 memset(&alu, 0, sizeof(struct r600_bc_alu));
2507
2508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2509 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2510 if (r)
2511 return r;
2512
2513 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2514
2515 alu.dst.sel = ctx->temp_reg;
2516 alu.dst.chan = 1;
2517 alu.dst.write = 1;
2518 alu.last = 1;
2519
2520 r = r600_bc_add_alu(ctx->bc, &alu);
2521 if (r)
2522 return r;
2523
2524 r = r600_bc_add_literal(ctx->bc, ctx->value);
2525 if (r)
2526 return r;
2527
2528 memset(&alu, 0, sizeof(struct r600_bc_alu));
2529
2530 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2531 alu.src[0].sel = ctx->temp_reg;
2532 alu.src[0].chan = 1;
2533
2534 alu.dst.sel = ctx->temp_reg;
2535 alu.dst.chan = 1;
2536 alu.dst.write = 1;
2537 alu.last = 1;
2538
2539 r = r600_bc_add_alu(ctx->bc, &alu);
2540 if (r)
2541 return r;
2542
2543 r = r600_bc_add_literal(ctx->bc, ctx->value);
2544 if (r)
2545 return r;
2546
2547 memset(&alu, 0, sizeof(struct r600_bc_alu));
2548
2549 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2550 alu.src[0].sel = ctx->temp_reg;
2551 alu.src[0].chan = 1;
2552
2553 alu.dst.sel = ctx->temp_reg;
2554 alu.dst.chan = 1;
2555 alu.dst.write = 1;
2556 alu.last = 1;
2557
2558 r = r600_bc_add_alu(ctx->bc, &alu);
2559 if (r)
2560 return r;
2561
2562 r = r600_bc_add_literal(ctx->bc, ctx->value);
2563 if (r)
2564 return r;
2565
2566 memset(&alu, 0, sizeof(struct r600_bc_alu));
2567
2568 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2569 alu.src[0].sel = ctx->temp_reg;
2570 alu.src[0].chan = 1;
2571
2572 alu.dst.sel = ctx->temp_reg;
2573 alu.dst.chan = 1;
2574 alu.dst.write = 1;
2575 alu.last = 1;
2576
2577 r = r600_bc_add_alu(ctx->bc, &alu);
2578 if (r)
2579 return r;
2580
2581 r = r600_bc_add_literal(ctx->bc, ctx->value);
2582 if (r)
2583 return r;
2584
2585 memset(&alu, 0, sizeof(struct r600_bc_alu));
2586
2587 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2588
2589 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2590 if (r)
2591 return r;
2592
2593 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2594
2595 alu.src[1].sel = ctx->temp_reg;
2596 alu.src[1].chan = 1;
2597
2598 alu.dst.sel = ctx->temp_reg;
2599 alu.dst.chan = 1;
2600 alu.dst.write = 1;
2601 alu.last = 1;
2602
2603 r = r600_bc_add_alu(ctx->bc, &alu);
2604 if (r)
2605 return r;
2606
2607 r = r600_bc_add_literal(ctx->bc, ctx->value);
2608 if (r)
2609 return r;
2610 }
2611
2612 /* result.z = log2(src);*/
2613 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2614 memset(&alu, 0, sizeof(struct r600_bc_alu));
2615
2616 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2617 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2618 if (r)
2619 return r;
2620
2621 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2622
2623 alu.dst.sel = ctx->temp_reg;
2624 alu.dst.write = 1;
2625 alu.dst.chan = 2;
2626 alu.last = 1;
2627
2628 r = r600_bc_add_alu(ctx->bc, &alu);
2629 if (r)
2630 return r;
2631
2632 r = r600_bc_add_literal(ctx->bc, ctx->value);
2633 if (r)
2634 return r;
2635 }
2636
2637 /* result.w = 1.0; */
2638 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2639 memset(&alu, 0, sizeof(struct r600_bc_alu));
2640
2641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2642 alu.src[0].sel = V_SQ_ALU_SRC_1;
2643 alu.src[0].chan = 0;
2644
2645 alu.dst.sel = ctx->temp_reg;
2646 alu.dst.chan = 3;
2647 alu.dst.write = 1;
2648 alu.last = 1;
2649
2650 r = r600_bc_add_alu(ctx->bc, &alu);
2651 if (r)
2652 return r;
2653
2654 r = r600_bc_add_literal(ctx->bc, ctx->value);
2655 if (r)
2656 return r;
2657 }
2658
2659 return tgsi_helper_copy(ctx, inst);
2660 }
2661
2662 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2663 {
2664 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2665 struct r600_bc_alu alu;
2666 int r;
2667 memset(&alu, 0, sizeof(struct r600_bc_alu));
2668
2669 switch (inst->Instruction.Opcode) {
2670 case TGSI_OPCODE_ARL:
2671 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2672 break;
2673 case TGSI_OPCODE_ARR:
2674 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2675 break;
2676 default:
2677 assert(0);
2678 return -1;
2679 }
2680
2681 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2682 if (r)
2683 return r;
2684 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2685 alu.last = 1;
2686 alu.dst.chan = 0;
2687 alu.dst.sel = ctx->temp_reg;
2688 alu.dst.write = 1;
2689 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2690 if (r)
2691 return r;
2692 memset(&alu, 0, sizeof(struct r600_bc_alu));
2693 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2694 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2695 if (r)
2696 return r;
2697 alu.src[0].sel = ctx->temp_reg;
2698 alu.src[0].chan = 0;
2699 alu.last = 1;
2700 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2701 if (r)
2702 return r;
2703 return 0;
2704 }
2705 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2706 {
2707 /* TODO from r600c, ar values don't persist between clauses */
2708 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2709 struct r600_bc_alu alu;
2710 int r;
2711 memset(&alu, 0, sizeof(struct r600_bc_alu));
2712
2713 switch (inst->Instruction.Opcode) {
2714 case TGSI_OPCODE_ARL:
2715 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2716 break;
2717 case TGSI_OPCODE_ARR:
2718 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2719 break;
2720 default:
2721 assert(0);
2722 return -1;
2723 }
2724
2725
2726 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2727 if (r)
2728 return r;
2729 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2730
2731 alu.last = 1;
2732
2733 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2734 if (r)
2735 return r;
2736 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2737 return 0;
2738 }
2739
2740 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2741 {
2742 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2743 struct r600_bc_alu alu;
2744 int i, r = 0;
2745
2746 for (i = 0; i < 4; i++) {
2747 memset(&alu, 0, sizeof(struct r600_bc_alu));
2748
2749 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2750 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2751 if (r)
2752 return r;
2753
2754 if (i == 0 || i == 3) {
2755 alu.src[0].sel = V_SQ_ALU_SRC_1;
2756 } else {
2757 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2758 if (r)
2759 return r;
2760 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2761 }
2762
2763 if (i == 0 || i == 2) {
2764 alu.src[1].sel = V_SQ_ALU_SRC_1;
2765 } else {
2766 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2767 if (r)
2768 return r;
2769 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2770 }
2771 if (i == 3)
2772 alu.last = 1;
2773 r = r600_bc_add_alu(ctx->bc, &alu);
2774 if (r)
2775 return r;
2776 }
2777 return 0;
2778 }
2779
2780 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2781 {
2782 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2783 struct r600_bc_alu alu;
2784 int r;
2785
2786 memset(&alu, 0, sizeof(struct r600_bc_alu));
2787 alu.inst = opcode;
2788 alu.predicate = 1;
2789
2790 alu.dst.sel = ctx->temp_reg;
2791 alu.dst.write = 1;
2792 alu.dst.chan = 0;
2793
2794 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2795 if (r)
2796 return r;
2797 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2798 alu.src[1].sel = V_SQ_ALU_SRC_0;
2799 alu.src[1].chan = 0;
2800
2801 alu.last = 1;
2802
2803 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2804 if (r)
2805 return r;
2806 return 0;
2807 }
2808
2809 static int pops(struct r600_shader_ctx *ctx, int pops)
2810 {
2811 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2812 ctx->bc->cf_last->pop_count = pops;
2813 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2814 return 0;
2815 }
2816
2817 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2818 {
2819 switch(reason) {
2820 case FC_PUSH_VPM:
2821 ctx->bc->callstack[ctx->bc->call_sp].current--;
2822 break;
2823 case FC_PUSH_WQM:
2824 case FC_LOOP:
2825 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2826 break;
2827 case FC_REP:
2828 /* TOODO : for 16 vp asic should -= 2; */
2829 ctx->bc->callstack[ctx->bc->call_sp].current --;
2830 break;
2831 }
2832 }
2833
2834 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2835 {
2836 if (check_max_only) {
2837 int diff;
2838 switch (reason) {
2839 case FC_PUSH_VPM:
2840 diff = 1;
2841 break;
2842 case FC_PUSH_WQM:
2843 diff = 4;
2844 break;
2845 default:
2846 assert(0);
2847 diff = 0;
2848 }
2849 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2850 ctx->bc->callstack[ctx->bc->call_sp].max) {
2851 ctx->bc->callstack[ctx->bc->call_sp].max =
2852 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2853 }
2854 return;
2855 }
2856 switch (reason) {
2857 case FC_PUSH_VPM:
2858 ctx->bc->callstack[ctx->bc->call_sp].current++;
2859 break;
2860 case FC_PUSH_WQM:
2861 case FC_LOOP:
2862 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2863 break;
2864 case FC_REP:
2865 ctx->bc->callstack[ctx->bc->call_sp].current++;
2866 break;
2867 }
2868
2869 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2870 ctx->bc->callstack[ctx->bc->call_sp].max) {
2871 ctx->bc->callstack[ctx->bc->call_sp].max =
2872 ctx->bc->callstack[ctx->bc->call_sp].current;
2873 }
2874 }
2875
2876 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2877 {
2878 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2879
2880 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2881 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2882 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2883 sp->num_mid++;
2884 }
2885
2886 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2887 {
2888 ctx->bc->fc_sp++;
2889 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2890 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2891 }
2892
2893 static void fc_poplevel(struct r600_shader_ctx *ctx)
2894 {
2895 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2896 if (sp->mid) {
2897 free(sp->mid);
2898 sp->mid = NULL;
2899 }
2900 sp->num_mid = 0;
2901 sp->start = NULL;
2902 sp->type = 0;
2903 ctx->bc->fc_sp--;
2904 }
2905
2906 #if 0
2907 static int emit_return(struct r600_shader_ctx *ctx)
2908 {
2909 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2910 return 0;
2911 }
2912
2913 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2914 {
2915
2916 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2917 ctx->bc->cf_last->pop_count = pops;
2918 /* TODO work out offset */
2919 return 0;
2920 }
2921
2922 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2923 {
2924 return 0;
2925 }
2926
2927 static void emit_testflag(struct r600_shader_ctx *ctx)
2928 {
2929
2930 }
2931
2932 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2933 {
2934 emit_testflag(ctx);
2935 emit_jump_to_offset(ctx, 1, 4);
2936 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2937 pops(ctx, ifidx + 1);
2938 emit_return(ctx);
2939 }
2940
2941 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2942 {
2943 emit_testflag(ctx);
2944
2945 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2946 ctx->bc->cf_last->pop_count = 1;
2947
2948 fc_set_mid(ctx, fc_sp);
2949
2950 pops(ctx, 1);
2951 }
2952 #endif
2953
2954 static int tgsi_if(struct r600_shader_ctx *ctx)
2955 {
2956 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2957
2958 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2959
2960 fc_pushlevel(ctx, FC_IF);
2961
2962 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2963 return 0;
2964 }
2965
2966 static int tgsi_else(struct r600_shader_ctx *ctx)
2967 {
2968 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2969 ctx->bc->cf_last->pop_count = 1;
2970
2971 fc_set_mid(ctx, ctx->bc->fc_sp);
2972 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2973 return 0;
2974 }
2975
2976 static int tgsi_endif(struct r600_shader_ctx *ctx)
2977 {
2978 pops(ctx, 1);
2979 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2980 R600_ERR("if/endif unbalanced in shader\n");
2981 return -1;
2982 }
2983
2984 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2985 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2986 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2987 } else {
2988 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2989 }
2990 fc_poplevel(ctx);
2991
2992 callstack_decrease_current(ctx, FC_PUSH_VPM);
2993 return 0;
2994 }
2995
2996 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2997 {
2998 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2999
3000 fc_pushlevel(ctx, FC_LOOP);
3001
3002 /* check stack depth */
3003 callstack_check_depth(ctx, FC_LOOP, 0);
3004 return 0;
3005 }
3006
3007 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3008 {
3009 int i;
3010
3011 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3012
3013 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3014 R600_ERR("loop/endloop in shader code are not paired.\n");
3015 return -EINVAL;
3016 }
3017
3018 /* fixup loop pointers - from r600isa
3019 LOOP END points to CF after LOOP START,
3020 LOOP START point to CF after LOOP END
3021 BRK/CONT point to LOOP END CF
3022 */
3023 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3024
3025 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3026
3027 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3028 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3029 }
3030 /* TODO add LOOPRET support */
3031 fc_poplevel(ctx);
3032 callstack_decrease_current(ctx, FC_LOOP);
3033 return 0;
3034 }
3035
3036 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3037 {
3038 unsigned int fscp;
3039
3040 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3041 {
3042 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3043 break;
3044 }
3045
3046 if (fscp == 0) {
3047 R600_ERR("Break not inside loop/endloop pair\n");
3048 return -EINVAL;
3049 }
3050
3051 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3052 ctx->bc->cf_last->pop_count = 1;
3053
3054 fc_set_mid(ctx, fscp);
3055
3056 pops(ctx, 1);
3057 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3058 return 0;
3059 }
3060
3061 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3062 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3063 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3064 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3065
3066 /* FIXME:
3067 * For state trackers other than OpenGL, we'll want to use
3068 * _RECIP_IEEE instead.
3069 */
3070 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3071
3072 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3073 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3074 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3075 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3076 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3077 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3078 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3079 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3080 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3081 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3082 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3083 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3084 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3085 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3086 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3087 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088 /* gap */
3089 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 /* gap */
3092 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3095 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3097 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3099 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3100 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3101 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3102 /* gap */
3103 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3105 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3107 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3108 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3109 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3110 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3111 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3117 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3119 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3120 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3121 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3122 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3124 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3126 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3133 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3137 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3138 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3139 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3140 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3143 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3144 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3145 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3146 /* gap */
3147 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3150 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3151 /* gap */
3152 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3160 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161 /* gap */
3162 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3171 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3174 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3176 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177 /* gap */
3178 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183 /* gap */
3184 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3193 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3194 /* gap */
3195 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3217 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3218 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3219 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3220 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3221 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3223 };
3224
3225 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3226 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3227 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3228 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3229 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3230 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3231 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3232 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3233 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3234 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3235 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3236 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3237 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3238 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3239 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3240 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3241 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3242 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3243 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3244 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3245 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246 /* gap */
3247 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249 /* gap */
3250 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3253 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3255 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3257 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3258 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3259 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3260 /* gap */
3261 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3263 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3265 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3266 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3267 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3268 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3269 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3271 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3272 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3273 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3275 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3277 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3278 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3279 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3280 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3282 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3284 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3291 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3292 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3295 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3296 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3297 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3298 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3301 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3302 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3303 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3304 /* gap */
3305 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3308 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3309 /* gap */
3310 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3318 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319 /* gap */
3320 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3323 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3329 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3332 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3334 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335 /* gap */
3336 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341 /* gap */
3342 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3351 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3352 /* gap */
3353 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3373 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3375 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3376 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3377 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3378 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3379 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3380 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3381 };