77b180984dd10b5d3140692312d9146618dac72a
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_pipe.h"
29 #include "r600_asm.h"
30 #include "r600_sq.h"
31 #include "r600_opcodes.h"
32 #include "r600d.h"
33 #include <stdio.h>
34 #include <errno.h>
35
36 static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37 {
38 struct r600_pipe_state *rstate = &shader->rstate;
39 struct r600_shader *rshader = &shader->shader;
40 unsigned spi_vs_out_id[10];
41 unsigned i, tmp;
42
43 /* clear previous register */
44 rstate->nregs = 0;
45
46 /* so far never got proper semantic id from tgsi */
47 for (i = 0; i < 10; i++) {
48 spi_vs_out_id[i] = 0;
49 }
50 for (i = 0; i < 32; i++) {
51 tmp = i << ((i & 3) * 8);
52 spi_vs_out_id[i / 4] |= tmp;
53 }
54 for (i = 0; i < 10; i++) {
55 r600_pipe_state_add_reg(rstate,
56 R_028614_SPI_VS_OUT_ID_0 + i * 4,
57 spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58 }
59
60 r600_pipe_state_add_reg(rstate,
61 R_0286C4_SPI_VS_OUT_CONFIG,
62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63 0xFFFFFFFF, NULL);
64 r600_pipe_state_add_reg(rstate,
65 R_028868_SQ_PGM_RESOURCES_VS,
66 S_028868_NUM_GPRS(rshader->bc.ngpr) |
67 S_028868_STACK_SIZE(rshader->bc.nstack),
68 0xFFFFFFFF, NULL);
69 r600_pipe_state_add_reg(rstate,
70 R_0288A4_SQ_PGM_RESOURCES_FS,
71 0x00000000, 0xFFFFFFFF, NULL);
72 r600_pipe_state_add_reg(rstate,
73 R_0288D0_SQ_PGM_CF_OFFSET_VS,
74 0x00000000, 0xFFFFFFFF, NULL);
75 r600_pipe_state_add_reg(rstate,
76 R_0288DC_SQ_PGM_CF_OFFSET_FS,
77 0x00000000, 0xFFFFFFFF, NULL);
78 r600_pipe_state_add_reg(rstate,
79 R_028858_SQ_PGM_START_VS,
80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81 r600_pipe_state_add_reg(rstate,
82 R_028894_SQ_PGM_START_FS,
83 r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
84
85 r600_pipe_state_add_reg(rstate,
86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87 0xFFFFFFFF, NULL);
88
89 }
90
91 int r600_find_vs_semantic_index(struct r600_shader *vs,
92 struct r600_shader *ps, int id)
93 {
94 struct r600_shader_io *input = &ps->input[id];
95
96 for (int i = 0; i < vs->noutput; i++) {
97 if (input->name == vs->output[i].name &&
98 input->sid == vs->output[i].sid) {
99 return i - 1;
100 }
101 }
102 return 0;
103 }
104
105 static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106 {
107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108 struct r600_pipe_state *rstate = &shader->rstate;
109 struct r600_shader *rshader = &shader->shader;
110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111 int pos_index = -1, face_index = -1;
112
113 /* clear previous register */
114 rstate->nregs = 0;
115
116 for (i = 0; i < rshader->ninput; i++) {
117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118 if (rshader->input[i].centroid)
119 tmp |= S_028644_SEL_CENTROID(1);
120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121 tmp |= S_028644_SEL_LINEAR(1);
122
123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124 pos_index = i;
125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129 }
130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131 face_index = i;
132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134 tmp |= S_028644_PT_SPRITE_TEX(1);
135 }
136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137 }
138 for (i = 0; i < rshader->noutput; i++) {
139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140 r600_pipe_state_add_reg(rstate,
141 R_02880C_DB_SHADER_CONTROL,
142 S_02880C_Z_EXPORT_ENABLE(1),
143 S_02880C_Z_EXPORT_ENABLE(1), NULL);
144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145 r600_pipe_state_add_reg(rstate,
146 R_02880C_DB_SHADER_CONTROL,
147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149 }
150
151 exports_ps = 0;
152 num_cout = 0;
153 for (i = 0; i < rshader->noutput; i++) {
154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155 exports_ps |= 1;
156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157 num_cout++;
158 }
159 }
160 exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161 if (!exports_ps) {
162 /* always at least export 1 component per pixel */
163 exports_ps = 2;
164 }
165
166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167 S_0286CC_PERSP_GRADIENT_ENA(1);
168 spi_input_z = 0;
169 if (pos_index != -1) {
170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173 S_0286CC_BARYC_SAMPLE_CNTL(1));
174 spi_input_z |= 1;
175 }
176
177 spi_ps_in_control_1 = 0;
178 if (face_index != -1) {
179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181 }
182
183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186 r600_pipe_state_add_reg(rstate,
187 R_028840_SQ_PGM_START_PS,
188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189 r600_pipe_state_add_reg(rstate,
190 R_028850_SQ_PGM_RESOURCES_PS,
191 S_028868_NUM_GPRS(rshader->bc.ngpr) |
192 S_028868_STACK_SIZE(rshader->bc.nstack),
193 0xFFFFFFFF, NULL);
194 r600_pipe_state_add_reg(rstate,
195 R_028854_SQ_PGM_EXPORTS_PS,
196 exports_ps, 0xFFFFFFFF, NULL);
197 r600_pipe_state_add_reg(rstate,
198 R_0288CC_SQ_PGM_CF_OFFSET_PS,
199 0x00000000, 0xFFFFFFFF, NULL);
200
201 if (rshader->uses_kill) {
202 /* only set some bits here, the other bits are set in the dsa state */
203 r600_pipe_state_add_reg(rstate,
204 R_02880C_DB_SHADER_CONTROL,
205 S_02880C_KILL_ENABLE(1),
206 S_02880C_KILL_ENABLE(1), NULL);
207 }
208 r600_pipe_state_add_reg(rstate,
209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210 0xFFFFFFFF, NULL);
211 }
212
213 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214 {
215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216 struct r600_shader *rshader = &shader->shader;
217 void *ptr;
218
219 /* copy new shader */
220 if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
221 shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
222 if (shader->bo_fetch == NULL) {
223 return -ENOMEM;
224 }
225 ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
226 memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
227 r600_bo_unmap(rctx->radeon, shader->bo_fetch);
228 }
229 if (shader->bo == NULL) {
230 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
231 if (shader->bo == NULL) {
232 return -ENOMEM;
233 }
234 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
235 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
236 r600_bo_unmap(rctx->radeon, shader->bo);
237 }
238 /* build state */
239 rshader->flat_shade = rctx->flatshade;
240 switch (rshader->processor_type) {
241 case TGSI_PROCESSOR_VERTEX:
242 if (rshader->family >= CHIP_CEDAR) {
243 evergreen_pipe_shader_vs(ctx, shader);
244 } else {
245 r600_pipe_shader_vs(ctx, shader);
246 }
247 break;
248 case TGSI_PROCESSOR_FRAGMENT:
249 if (rshader->family >= CHIP_CEDAR) {
250 evergreen_pipe_shader_ps(ctx, shader);
251 } else {
252 r600_pipe_shader_ps(ctx, shader);
253 }
254 break;
255 default:
256 return -EINVAL;
257 }
258 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
259 return 0;
260 }
261
262 static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
263 {
264 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
265 struct r600_shader *shader = &rshader->shader;
266 const struct util_format_description *desc;
267 enum pipe_format resource_format[160];
268 unsigned i, nresources = 0;
269 struct r600_bc *bc = &shader->bc_fetch;
270 struct r600_bc_cf *cf;
271 struct r600_bc_vtx *vtx;
272
273 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
274 return 0;
275 /* doing a full memcmp fell over the refcount */
276 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
277 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
278 rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
279 return 0;
280 }
281 rshader->vertex_elements = *rctx->vertex_elements;
282 for (i = 0; i < rctx->vertex_elements->count; i++) {
283 resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
284 }
285 r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
286 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
287 switch (cf->inst) {
288 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
289 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
290 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
291 desc = util_format_description(resource_format[vtx->buffer_id]);
292 if (desc == NULL) {
293 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
294 return -EINVAL;
295 }
296 vtx->dst_sel_x = desc->swizzle[0];
297 vtx->dst_sel_y = desc->swizzle[1];
298 vtx->dst_sel_z = desc->swizzle[2];
299 vtx->dst_sel_w = desc->swizzle[3];
300 }
301 break;
302 default:
303 break;
304 }
305 }
306 return r600_bc_build(&shader->bc_fetch);
307 }
308
309 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
310 {
311 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
312 int r;
313
314 if (shader == NULL)
315 return -EINVAL;
316 /* there should be enough input */
317 if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
318 R600_ERR("%d resources provided, expecting %d\n",
319 rctx->vertex_elements->count, shader->shader.bc.nresource);
320 return -EINVAL;
321 }
322 r = r600_shader_update(ctx, shader);
323 if (r)
324 return r;
325 return r600_pipe_shader(ctx, shader);
326 }
327
328 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
329 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
330 {
331 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
332 int r;
333
334 //fprintf(stderr, "--------------------------------------------------------------\n");
335 //tgsi_dump(tokens, 0);
336 shader->shader.family = r600_get_family(rctx->radeon);
337 r = r600_shader_from_tgsi(tokens, &shader->shader);
338 if (r) {
339 R600_ERR("translation from TGSI failed !\n");
340 return r;
341 }
342 r = r600_bc_build(&shader->shader.bc);
343 if (r) {
344 R600_ERR("building bytecode failed !\n");
345 return r;
346 }
347 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
348 r = r600_bc_build(&shader->shader.bc_fetch);
349 if (r) {
350 R600_ERR("building bytecode failed !\n");
351 return r;
352 }
353 }
354 //fprintf(stderr, "______________________________________________________________\n");
355 return 0;
356 }
357
358 void
359 r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
360 {
361 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
362
363 r600_bo_reference(rctx->radeon, &shader->bo, NULL);
364
365 r600_bc_clear(&shader->shader.bc);
366
367 /* FIXME: is there more stuff to free? */
368 }
369
370 /*
371 * tgsi -> r600 shader
372 */
373 struct r600_shader_tgsi_instruction;
374
375 struct r600_shader_ctx {
376 struct tgsi_shader_info info;
377 struct tgsi_parse_context parse;
378 const struct tgsi_token *tokens;
379 unsigned type;
380 unsigned file_offset[TGSI_FILE_COUNT];
381 unsigned temp_reg;
382 struct r600_shader_tgsi_instruction *inst_info;
383 struct r600_bc *bc;
384 struct r600_bc *bc_fetch;
385 struct r600_shader *shader;
386 u32 value[4];
387 u32 *literals;
388 u32 nliterals;
389 u32 max_driver_temp_used;
390 /* needed for evergreen interpolation */
391 boolean input_centroid;
392 boolean input_linear;
393 boolean input_perspective;
394 int num_interp_gpr;
395 };
396
397 struct r600_shader_tgsi_instruction {
398 unsigned tgsi_opcode;
399 unsigned is_op3;
400 unsigned r600_opcode;
401 int (*process)(struct r600_shader_ctx *ctx);
402 };
403
404 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
405 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
406
407 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
408 {
409 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
410 int j;
411
412 if (i->Instruction.NumDstRegs > 1) {
413 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
414 return -EINVAL;
415 }
416 if (i->Instruction.Predicate) {
417 R600_ERR("predicate unsupported\n");
418 return -EINVAL;
419 }
420 #if 0
421 if (i->Instruction.Label) {
422 R600_ERR("label unsupported\n");
423 return -EINVAL;
424 }
425 #endif
426 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
427 if (i->Src[j].Register.Dimension) {
428 R600_ERR("unsupported src %d (dimension %d)\n", j,
429 i->Src[j].Register.Dimension);
430 return -EINVAL;
431 }
432 }
433 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
434 if (i->Dst[j].Register.Dimension) {
435 R600_ERR("unsupported dst (dimension)\n");
436 return -EINVAL;
437 }
438 }
439 return 0;
440 }
441
442 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
443 {
444 int i, r;
445 struct r600_bc_alu alu;
446 int gpr = 0, base_chan = 0;
447 int ij_index = 0;
448
449 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
450 ij_index = 0;
451 if (ctx->shader->input[input].centroid)
452 ij_index++;
453 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
454 ij_index = 0;
455 /* if we have perspective add one */
456 if (ctx->input_perspective) {
457 ij_index++;
458 /* if we have perspective centroid */
459 if (ctx->input_centroid)
460 ij_index++;
461 }
462 if (ctx->shader->input[input].centroid)
463 ij_index++;
464 }
465
466 /* work out gpr and base_chan from index */
467 gpr = ij_index / 2;
468 base_chan = (2 * (ij_index % 2)) + 1;
469
470 for (i = 0; i < 8; i++) {
471 memset(&alu, 0, sizeof(struct r600_bc_alu));
472
473 if (i < 4)
474 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
475 else
476 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
477
478 if ((i > 1) && (i < 6)) {
479 alu.dst.sel = ctx->shader->input[input].gpr;
480 alu.dst.write = 1;
481 }
482
483 alu.dst.chan = i % 4;
484
485 alu.src[0].sel = gpr;
486 alu.src[0].chan = (base_chan - (i % 2));
487
488 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
489
490 alu.bank_swizzle_force = SQ_ALU_VEC_210;
491 if ((i % 4) == 3)
492 alu.last = 1;
493 r = r600_bc_add_alu(ctx->bc, &alu);
494 if (r)
495 return r;
496 }
497 return 0;
498 }
499
500
501 static int tgsi_declaration(struct r600_shader_ctx *ctx)
502 {
503 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
504 struct r600_bc_vtx vtx;
505 unsigned i;
506 int r;
507
508 switch (d->Declaration.File) {
509 case TGSI_FILE_INPUT:
510 i = ctx->shader->ninput++;
511 ctx->shader->input[i].name = d->Semantic.Name;
512 ctx->shader->input[i].sid = d->Semantic.Index;
513 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
514 ctx->shader->input[i].centroid = d->Declaration.Centroid;
515 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
516 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
517 /* turn input into fetch */
518 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
519 vtx.inst = 0;
520 vtx.fetch_type = 0;
521 vtx.buffer_id = i;
522 /* register containing the index into the buffer */
523 vtx.src_gpr = 0;
524 vtx.src_sel_x = 0;
525 vtx.mega_fetch_count = 0x1F;
526 vtx.dst_gpr = ctx->shader->input[i].gpr;
527 vtx.dst_sel_x = 0;
528 vtx.dst_sel_y = 1;
529 vtx.dst_sel_z = 2;
530 vtx.dst_sel_w = 3;
531 vtx.use_const_fields = 1;
532 r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
533 if (r)
534 return r;
535 }
536 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
537 /* turn input into interpolate on EG */
538 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
539 if (ctx->shader->input[i].interpolate > 0) {
540 ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
541 evergreen_interp_alu(ctx, i);
542 }
543 }
544 }
545 break;
546 case TGSI_FILE_OUTPUT:
547 i = ctx->shader->noutput++;
548 ctx->shader->output[i].name = d->Semantic.Name;
549 ctx->shader->output[i].sid = d->Semantic.Index;
550 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
551 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
552 break;
553 case TGSI_FILE_CONSTANT:
554 case TGSI_FILE_TEMPORARY:
555 case TGSI_FILE_SAMPLER:
556 case TGSI_FILE_ADDRESS:
557 break;
558 default:
559 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
560 return -EINVAL;
561 }
562 return 0;
563 }
564
565 static int r600_get_temp(struct r600_shader_ctx *ctx)
566 {
567 return ctx->temp_reg + ctx->max_driver_temp_used++;
568 }
569
570 /*
571 * for evergreen we need to scan the shader to find the number of GPRs we need to
572 * reserve for interpolation.
573 *
574 * we need to know if we are going to emit
575 * any centroid inputs
576 * if perspective and linear are required
577 */
578 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
579 {
580 int i;
581 int num_baryc;
582
583 ctx->input_linear = FALSE;
584 ctx->input_perspective = FALSE;
585 ctx->input_centroid = FALSE;
586 ctx->num_interp_gpr = 1;
587
588 /* any centroid inputs */
589 for (i = 0; i < ctx->info.num_inputs; i++) {
590 /* skip position/face */
591 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
592 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
593 continue;
594 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
595 ctx->input_linear = TRUE;
596 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
597 ctx->input_perspective = TRUE;
598 if (ctx->info.input_centroid[i])
599 ctx->input_centroid = TRUE;
600 }
601
602 num_baryc = 0;
603 /* ignoring sample for now */
604 if (ctx->input_perspective)
605 num_baryc++;
606 if (ctx->input_linear)
607 num_baryc++;
608 if (ctx->input_centroid)
609 num_baryc *= 2;
610
611 ctx->num_interp_gpr += (num_baryc + 1) >> 1;
612
613 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
614 return ctx->num_interp_gpr;
615 }
616
617 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
618 {
619 struct tgsi_full_immediate *immediate;
620 struct r600_shader_ctx ctx;
621 struct r600_bc_output output[32];
622 unsigned output_done, noutput;
623 unsigned opcode;
624 int i, r = 0, pos0;
625
626 ctx.bc = &shader->bc;
627 ctx.bc_fetch = &shader->bc_fetch;
628 ctx.shader = shader;
629 r = r600_bc_init(ctx.bc, shader->family);
630 if (r)
631 return r;
632 ctx.tokens = tokens;
633 tgsi_scan_shader(tokens, &ctx.info);
634 tgsi_parse_init(&ctx.parse, tokens);
635 ctx.type = ctx.parse.FullHeader.Processor.Processor;
636 shader->processor_type = ctx.type;
637 if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
638 r = r600_bc_init(ctx.bc_fetch, shader->family);
639 if (r)
640 return r;
641 ctx.bc_fetch->type = -1;
642 }
643 ctx.bc->type = shader->processor_type;
644
645 /* register allocations */
646 /* Values [0,127] correspond to GPR[0..127].
647 * Values [128,159] correspond to constant buffer bank 0
648 * Values [160,191] correspond to constant buffer bank 1
649 * Values [256,511] correspond to cfile constants c[0..255].
650 * Other special values are shown in the list below.
651 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
652 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
653 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
654 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
655 * 248 SQ_ALU_SRC_0: special constant 0.0.
656 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
657 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
658 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
659 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
660 * 253 SQ_ALU_SRC_LITERAL: literal constant.
661 * 254 SQ_ALU_SRC_PV: previous vector result.
662 * 255 SQ_ALU_SRC_PS: previous scalar result.
663 */
664 for (i = 0; i < TGSI_FILE_COUNT; i++) {
665 ctx.file_offset[i] = 0;
666 }
667 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
668 ctx.file_offset[TGSI_FILE_INPUT] = 1;
669 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
670 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
671 } else {
672 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
673 }
674 }
675 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
676 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
677 }
678 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
679 ctx.info.file_count[TGSI_FILE_INPUT];
680 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
681 ctx.info.file_count[TGSI_FILE_OUTPUT];
682
683 ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
684
685 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
686 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
687 ctx.info.file_count[TGSI_FILE_TEMPORARY];
688
689 ctx.nliterals = 0;
690 ctx.literals = NULL;
691
692 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
693 tgsi_parse_token(&ctx.parse);
694 switch (ctx.parse.FullToken.Token.Type) {
695 case TGSI_TOKEN_TYPE_IMMEDIATE:
696 immediate = &ctx.parse.FullToken.FullImmediate;
697 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
698 if(ctx.literals == NULL) {
699 r = -ENOMEM;
700 goto out_err;
701 }
702 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
703 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
704 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
705 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
706 ctx.nliterals++;
707 break;
708 case TGSI_TOKEN_TYPE_DECLARATION:
709 r = tgsi_declaration(&ctx);
710 if (r)
711 goto out_err;
712 break;
713 case TGSI_TOKEN_TYPE_INSTRUCTION:
714 r = tgsi_is_supported(&ctx);
715 if (r)
716 goto out_err;
717 ctx.max_driver_temp_used = 0;
718 /* reserve first tmp for everyone */
719 r600_get_temp(&ctx);
720 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
721 if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
722 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
723 else
724 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
725 r = ctx.inst_info->process(&ctx);
726 if (r)
727 goto out_err;
728 r = r600_bc_add_literal(ctx.bc, ctx.value);
729 if (r)
730 goto out_err;
731 break;
732 default:
733 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
734 r = -EINVAL;
735 goto out_err;
736 }
737 }
738 /* export output */
739 noutput = shader->noutput;
740 for (i = 0, pos0 = 0; i < noutput; i++) {
741 memset(&output[i], 0, sizeof(struct r600_bc_output));
742 output[i].gpr = shader->output[i].gpr;
743 output[i].elem_size = 3;
744 output[i].swizzle_x = 0;
745 output[i].swizzle_y = 1;
746 output[i].swizzle_z = 2;
747 output[i].swizzle_w = 3;
748 output[i].barrier = 1;
749 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
750 output[i].array_base = i - pos0;
751 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
752 switch (ctx.type) {
753 case TGSI_PROCESSOR_VERTEX:
754 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
755 output[i].array_base = 60;
756 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
757 /* position doesn't count in array_base */
758 pos0++;
759 }
760 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
761 output[i].array_base = 61;
762 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
763 /* position doesn't count in array_base */
764 pos0++;
765 }
766 break;
767 case TGSI_PROCESSOR_FRAGMENT:
768 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
769 output[i].array_base = shader->output[i].sid;
770 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
771 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
772 output[i].array_base = 61;
773 output[i].swizzle_x = 2;
774 output[i].swizzle_y = 7;
775 output[i].swizzle_z = output[i].swizzle_w = 7;
776 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
777 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
778 output[i].array_base = 61;
779 output[i].swizzle_x = 7;
780 output[i].swizzle_y = 1;
781 output[i].swizzle_z = output[i].swizzle_w = 7;
782 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
783 } else {
784 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
785 r = -EINVAL;
786 goto out_err;
787 }
788 break;
789 default:
790 R600_ERR("unsupported processor type %d\n", ctx.type);
791 r = -EINVAL;
792 goto out_err;
793 }
794 }
795 /* add fake param output for vertex shader if no param is exported */
796 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
797 for (i = 0, pos0 = 0; i < noutput; i++) {
798 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
799 pos0 = 1;
800 break;
801 }
802 }
803 if (!pos0) {
804 memset(&output[i], 0, sizeof(struct r600_bc_output));
805 output[i].gpr = 0;
806 output[i].elem_size = 3;
807 output[i].swizzle_x = 0;
808 output[i].swizzle_y = 1;
809 output[i].swizzle_z = 2;
810 output[i].swizzle_w = 3;
811 output[i].barrier = 1;
812 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
813 output[i].array_base = 0;
814 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
815 noutput++;
816 }
817 }
818 /* add fake pixel export */
819 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
820 memset(&output[0], 0, sizeof(struct r600_bc_output));
821 output[0].gpr = 0;
822 output[0].elem_size = 3;
823 output[0].swizzle_x = 7;
824 output[0].swizzle_y = 7;
825 output[0].swizzle_z = 7;
826 output[0].swizzle_w = 7;
827 output[0].barrier = 1;
828 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
829 output[0].array_base = 0;
830 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
831 noutput++;
832 }
833 /* set export done on last export of each type */
834 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
835 if (i == (noutput - 1)) {
836 output[i].end_of_program = 1;
837 }
838 if (!(output_done & (1 << output[i].type))) {
839 output_done |= (1 << output[i].type);
840 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
841 }
842 }
843 /* add return to fetch shader */
844 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
845 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
846 r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
847 } else {
848 r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
849 }
850 }
851 /* add output to bytecode */
852 for (i = 0; i < noutput; i++) {
853 r = r600_bc_add_output(ctx.bc, &output[i]);
854 if (r)
855 goto out_err;
856 }
857 free(ctx.literals);
858 tgsi_parse_free(&ctx.parse);
859 return 0;
860 out_err:
861 free(ctx.literals);
862 tgsi_parse_free(&ctx.parse);
863 return r;
864 }
865
866 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
867 {
868 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
869 return -EINVAL;
870 }
871
872 static int tgsi_end(struct r600_shader_ctx *ctx)
873 {
874 return 0;
875 }
876
877 static int tgsi_src(struct r600_shader_ctx *ctx,
878 const struct tgsi_full_src_register *tgsi_src,
879 struct r600_bc_alu_src *r600_src)
880 {
881 int index;
882 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
883 r600_src->sel = tgsi_src->Register.Index;
884 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
885 r600_src->sel = 0;
886 index = tgsi_src->Register.Index;
887 ctx->value[0] = ctx->literals[index * 4 + 0];
888 ctx->value[1] = ctx->literals[index * 4 + 1];
889 ctx->value[2] = ctx->literals[index * 4 + 2];
890 ctx->value[3] = ctx->literals[index * 4 + 3];
891 }
892 if (tgsi_src->Register.Indirect)
893 r600_src->rel = V_SQ_REL_RELATIVE;
894 r600_src->neg = tgsi_src->Register.Negate;
895 r600_src->abs = tgsi_src->Register.Absolute;
896 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
897 return 0;
898 }
899
900 static int tgsi_dst(struct r600_shader_ctx *ctx,
901 const struct tgsi_full_dst_register *tgsi_dst,
902 unsigned swizzle,
903 struct r600_bc_alu_dst *r600_dst)
904 {
905 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
906
907 r600_dst->sel = tgsi_dst->Register.Index;
908 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
909 r600_dst->chan = swizzle;
910 r600_dst->write = 1;
911 if (tgsi_dst->Register.Indirect)
912 r600_dst->rel = V_SQ_REL_RELATIVE;
913 if (inst->Instruction.Saturate) {
914 r600_dst->clamp = 1;
915 }
916 return 0;
917 }
918
919 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
920 {
921 switch (swizzle) {
922 case 0:
923 return tgsi_src->Register.SwizzleX;
924 case 1:
925 return tgsi_src->Register.SwizzleY;
926 case 2:
927 return tgsi_src->Register.SwizzleZ;
928 case 3:
929 return tgsi_src->Register.SwizzleW;
930 default:
931 return 0;
932 }
933 }
934
935 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
936 {
937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
938 struct r600_bc_alu alu;
939 int i, j, k, nconst, r;
940
941 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
942 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
943 nconst++;
944 }
945 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
946 if (r) {
947 return r;
948 }
949 }
950 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
951 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
952 int treg = r600_get_temp(ctx);
953 for (k = 0; k < 4; k++) {
954 memset(&alu, 0, sizeof(struct r600_bc_alu));
955 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
956 alu.src[0].sel = r600_src[i].sel;
957 alu.src[0].chan = k;
958 alu.src[0].rel = r600_src[i].rel;
959 alu.dst.sel = treg;
960 alu.dst.chan = k;
961 alu.dst.write = 1;
962 if (k == 3)
963 alu.last = 1;
964 r = r600_bc_add_alu(ctx->bc, &alu);
965 if (r)
966 return r;
967 }
968 r600_src[i].sel = treg;
969 r600_src[i].rel =0;
970 j--;
971 }
972 }
973 return 0;
974 }
975
976 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
977 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
978 {
979 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
980 struct r600_bc_alu alu;
981 int i, j, k, nliteral, r;
982
983 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
984 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
985 nliteral++;
986 }
987 }
988 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
989 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
990 int treg = r600_get_temp(ctx);
991 for (k = 0; k < 4; k++) {
992 memset(&alu, 0, sizeof(struct r600_bc_alu));
993 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
994 alu.src[0].sel = r600_src[i].sel;
995 alu.src[0].chan = k;
996 alu.dst.sel = treg;
997 alu.dst.chan = k;
998 alu.dst.write = 1;
999 if (k == 3)
1000 alu.last = 1;
1001 r = r600_bc_add_alu(ctx->bc, &alu);
1002 if (r)
1003 return r;
1004 }
1005 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
1006 if (r)
1007 return r;
1008 r600_src[i].sel = treg;
1009 j--;
1010 }
1011 }
1012 return 0;
1013 }
1014
1015 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1016 {
1017 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1018 struct r600_bc_alu_src r600_src[3];
1019 struct r600_bc_alu alu;
1020 int i, j, r;
1021 int lasti = 0;
1022
1023 for (i = 0; i < 4; i++) {
1024 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1025 lasti = i;
1026 }
1027 }
1028
1029 r = tgsi_split_constant(ctx, r600_src);
1030 if (r)
1031 return r;
1032 r = tgsi_split_literal_constant(ctx, r600_src);
1033 if (r)
1034 return r;
1035 for (i = 0; i < lasti + 1; i++) {
1036 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1037 continue;
1038
1039 memset(&alu, 0, sizeof(struct r600_bc_alu));
1040 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1041 if (r)
1042 return r;
1043
1044 alu.inst = ctx->inst_info->r600_opcode;
1045 if (!swap) {
1046 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1047 alu.src[j] = r600_src[j];
1048 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1049 }
1050 } else {
1051 alu.src[0] = r600_src[1];
1052 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1053
1054 alu.src[1] = r600_src[0];
1055 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1056 }
1057 /* handle some special cases */
1058 switch (ctx->inst_info->tgsi_opcode) {
1059 case TGSI_OPCODE_SUB:
1060 alu.src[1].neg = 1;
1061 break;
1062 case TGSI_OPCODE_ABS:
1063 alu.src[0].abs = 1;
1064 break;
1065 default:
1066 break;
1067 }
1068 if (i == lasti) {
1069 alu.last = 1;
1070 }
1071 r = r600_bc_add_alu(ctx->bc, &alu);
1072 if (r)
1073 return r;
1074 }
1075 return 0;
1076 }
1077
1078 static int tgsi_op2(struct r600_shader_ctx *ctx)
1079 {
1080 return tgsi_op2_s(ctx, 0);
1081 }
1082
1083 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1084 {
1085 return tgsi_op2_s(ctx, 1);
1086 }
1087
1088 /*
1089 * r600 - trunc to -PI..PI range
1090 * r700 - normalize by dividing by 2PI
1091 * see fdo bug 27901
1092 */
1093 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1094 struct r600_bc_alu_src r600_src[3])
1095 {
1096 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1097 int r;
1098 uint32_t lit_vals[4];
1099 struct r600_bc_alu alu;
1100
1101 memset(lit_vals, 0, 4*4);
1102 r = tgsi_split_constant(ctx, r600_src);
1103 if (r)
1104 return r;
1105 r = tgsi_split_literal_constant(ctx, r600_src);
1106 if (r)
1107 return r;
1108
1109 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1110 lit_vals[1] = fui(0.5f);
1111
1112 memset(&alu, 0, sizeof(struct r600_bc_alu));
1113 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1114 alu.is_op3 = 1;
1115
1116 alu.dst.chan = 0;
1117 alu.dst.sel = ctx->temp_reg;
1118 alu.dst.write = 1;
1119
1120 alu.src[0] = r600_src[0];
1121 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1122
1123 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1124 alu.src[1].chan = 0;
1125 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1126 alu.src[2].chan = 1;
1127 alu.last = 1;
1128 r = r600_bc_add_alu(ctx->bc, &alu);
1129 if (r)
1130 return r;
1131 r = r600_bc_add_literal(ctx->bc, lit_vals);
1132 if (r)
1133 return r;
1134
1135 memset(&alu, 0, sizeof(struct r600_bc_alu));
1136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1137
1138 alu.dst.chan = 0;
1139 alu.dst.sel = ctx->temp_reg;
1140 alu.dst.write = 1;
1141
1142 alu.src[0].sel = ctx->temp_reg;
1143 alu.src[0].chan = 0;
1144 alu.last = 1;
1145 r = r600_bc_add_alu(ctx->bc, &alu);
1146 if (r)
1147 return r;
1148
1149 if (ctx->bc->chiprev == CHIPREV_R600) {
1150 lit_vals[0] = fui(3.1415926535897f * 2.0f);
1151 lit_vals[1] = fui(-3.1415926535897f);
1152 } else {
1153 lit_vals[0] = fui(1.0f);
1154 lit_vals[1] = fui(-0.5f);
1155 }
1156
1157 memset(&alu, 0, sizeof(struct r600_bc_alu));
1158 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1159 alu.is_op3 = 1;
1160
1161 alu.dst.chan = 0;
1162 alu.dst.sel = ctx->temp_reg;
1163 alu.dst.write = 1;
1164
1165 alu.src[0].sel = ctx->temp_reg;
1166 alu.src[0].chan = 0;
1167
1168 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1169 alu.src[1].chan = 0;
1170 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1171 alu.src[2].chan = 1;
1172 alu.last = 1;
1173 r = r600_bc_add_alu(ctx->bc, &alu);
1174 if (r)
1175 return r;
1176 r = r600_bc_add_literal(ctx->bc, lit_vals);
1177 if (r)
1178 return r;
1179 return 0;
1180 }
1181
1182 static int tgsi_trig(struct r600_shader_ctx *ctx)
1183 {
1184 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1185 struct r600_bc_alu_src r600_src[3];
1186 struct r600_bc_alu alu;
1187 int i, r;
1188 int lasti = 0;
1189
1190 r = tgsi_setup_trig(ctx, r600_src);
1191 if (r)
1192 return r;
1193
1194 memset(&alu, 0, sizeof(struct r600_bc_alu));
1195 alu.inst = ctx->inst_info->r600_opcode;
1196 alu.dst.chan = 0;
1197 alu.dst.sel = ctx->temp_reg;
1198 alu.dst.write = 1;
1199
1200 alu.src[0].sel = ctx->temp_reg;
1201 alu.src[0].chan = 0;
1202 alu.last = 1;
1203 r = r600_bc_add_alu(ctx->bc, &alu);
1204 if (r)
1205 return r;
1206
1207 /* replicate result */
1208 for (i = 0; i < 4; i++) {
1209 if (inst->Dst[0].Register.WriteMask & (1 << i))
1210 lasti = i;
1211 }
1212 for (i = 0; i < lasti + 1; i++) {
1213 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1214 continue;
1215
1216 memset(&alu, 0, sizeof(struct r600_bc_alu));
1217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1218
1219 alu.src[0].sel = ctx->temp_reg;
1220 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1221 if (r)
1222 return r;
1223 if (i == lasti)
1224 alu.last = 1;
1225 r = r600_bc_add_alu(ctx->bc, &alu);
1226 if (r)
1227 return r;
1228 }
1229 return 0;
1230 }
1231
1232 static int tgsi_scs(struct r600_shader_ctx *ctx)
1233 {
1234 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1235 struct r600_bc_alu_src r600_src[3];
1236 struct r600_bc_alu alu;
1237 int r;
1238
1239 /* We'll only need the trig stuff if we are going to write to the
1240 * X or Y components of the destination vector.
1241 */
1242 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1243 r = tgsi_setup_trig(ctx, r600_src);
1244 if (r)
1245 return r;
1246 }
1247
1248 /* dst.x = COS */
1249 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1250 memset(&alu, 0, sizeof(struct r600_bc_alu));
1251 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1252 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1253 if (r)
1254 return r;
1255
1256 alu.src[0].sel = ctx->temp_reg;
1257 alu.src[0].chan = 0;
1258 alu.last = 1;
1259 r = r600_bc_add_alu(ctx->bc, &alu);
1260 if (r)
1261 return r;
1262 }
1263
1264 /* dst.y = SIN */
1265 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1266 memset(&alu, 0, sizeof(struct r600_bc_alu));
1267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1268 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1269 if (r)
1270 return r;
1271
1272 alu.src[0].sel = ctx->temp_reg;
1273 alu.src[0].chan = 0;
1274 alu.last = 1;
1275 r = r600_bc_add_alu(ctx->bc, &alu);
1276 if (r)
1277 return r;
1278 }
1279
1280 /* dst.z = 0.0; */
1281 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1282 memset(&alu, 0, sizeof(struct r600_bc_alu));
1283
1284 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1285
1286 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1287 if (r)
1288 return r;
1289
1290 alu.src[0].sel = V_SQ_ALU_SRC_0;
1291 alu.src[0].chan = 0;
1292
1293 alu.last = 1;
1294
1295 r = r600_bc_add_alu(ctx->bc, &alu);
1296 if (r)
1297 return r;
1298
1299 r = r600_bc_add_literal(ctx->bc, ctx->value);
1300 if (r)
1301 return r;
1302 }
1303
1304 /* dst.w = 1.0; */
1305 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1306 memset(&alu, 0, sizeof(struct r600_bc_alu));
1307
1308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1309
1310 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1311 if (r)
1312 return r;
1313
1314 alu.src[0].sel = V_SQ_ALU_SRC_1;
1315 alu.src[0].chan = 0;
1316
1317 alu.last = 1;
1318
1319 r = r600_bc_add_alu(ctx->bc, &alu);
1320 if (r)
1321 return r;
1322
1323 r = r600_bc_add_literal(ctx->bc, ctx->value);
1324 if (r)
1325 return r;
1326 }
1327
1328 return 0;
1329 }
1330
1331 static int tgsi_kill(struct r600_shader_ctx *ctx)
1332 {
1333 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1334 struct r600_bc_alu alu;
1335 int i, r;
1336
1337 for (i = 0; i < 4; i++) {
1338 memset(&alu, 0, sizeof(struct r600_bc_alu));
1339 alu.inst = ctx->inst_info->r600_opcode;
1340
1341 alu.dst.chan = i;
1342
1343 alu.src[0].sel = V_SQ_ALU_SRC_0;
1344
1345 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1346 alu.src[1].sel = V_SQ_ALU_SRC_1;
1347 alu.src[1].neg = 1;
1348 } else {
1349 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1350 if (r)
1351 return r;
1352 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1353 }
1354 if (i == 3) {
1355 alu.last = 1;
1356 }
1357 r = r600_bc_add_alu(ctx->bc, &alu);
1358 if (r)
1359 return r;
1360 }
1361 r = r600_bc_add_literal(ctx->bc, ctx->value);
1362 if (r)
1363 return r;
1364
1365 /* kill must be last in ALU */
1366 ctx->bc->force_add_cf = 1;
1367 ctx->shader->uses_kill = TRUE;
1368 return 0;
1369 }
1370
1371 static int tgsi_lit(struct r600_shader_ctx *ctx)
1372 {
1373 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1374 struct r600_bc_alu alu;
1375 struct r600_bc_alu_src r600_src[3];
1376 int r;
1377
1378 r = tgsi_split_constant(ctx, r600_src);
1379 if (r)
1380 return r;
1381 r = tgsi_split_literal_constant(ctx, r600_src);
1382 if (r)
1383 return r;
1384
1385 /* dst.x, <- 1.0 */
1386 memset(&alu, 0, sizeof(struct r600_bc_alu));
1387 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1388 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1389 alu.src[0].chan = 0;
1390 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1391 if (r)
1392 return r;
1393 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1394 r = r600_bc_add_alu(ctx->bc, &alu);
1395 if (r)
1396 return r;
1397
1398 /* dst.y = max(src.x, 0.0) */
1399 memset(&alu, 0, sizeof(struct r600_bc_alu));
1400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1401 alu.src[0] = r600_src[0];
1402 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1403 alu.src[1].chan = 0;
1404 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1405 if (r)
1406 return r;
1407 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1408 r = r600_bc_add_alu(ctx->bc, &alu);
1409 if (r)
1410 return r;
1411
1412 /* dst.w, <- 1.0 */
1413 memset(&alu, 0, sizeof(struct r600_bc_alu));
1414 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1415 alu.src[0].sel = V_SQ_ALU_SRC_1;
1416 alu.src[0].chan = 0;
1417 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1418 if (r)
1419 return r;
1420 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1421 alu.last = 1;
1422 r = r600_bc_add_alu(ctx->bc, &alu);
1423 if (r)
1424 return r;
1425
1426 r = r600_bc_add_literal(ctx->bc, ctx->value);
1427 if (r)
1428 return r;
1429
1430 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1431 {
1432 int chan;
1433 int sel;
1434
1435 /* dst.z = log(src.y) */
1436 memset(&alu, 0, sizeof(struct r600_bc_alu));
1437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1438 alu.src[0] = r600_src[0];
1439 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1440 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1441 if (r)
1442 return r;
1443 alu.last = 1;
1444 r = r600_bc_add_alu(ctx->bc, &alu);
1445 if (r)
1446 return r;
1447
1448 r = r600_bc_add_literal(ctx->bc, ctx->value);
1449 if (r)
1450 return r;
1451
1452 chan = alu.dst.chan;
1453 sel = alu.dst.sel;
1454
1455 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1456 memset(&alu, 0, sizeof(struct r600_bc_alu));
1457 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1458 alu.src[0] = r600_src[0];
1459 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1460 alu.src[1].sel = sel;
1461 alu.src[1].chan = chan;
1462
1463 alu.src[2] = r600_src[0];
1464 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1465 alu.dst.sel = ctx->temp_reg;
1466 alu.dst.chan = 0;
1467 alu.dst.write = 1;
1468 alu.is_op3 = 1;
1469 alu.last = 1;
1470 r = r600_bc_add_alu(ctx->bc, &alu);
1471 if (r)
1472 return r;
1473
1474 r = r600_bc_add_literal(ctx->bc, ctx->value);
1475 if (r)
1476 return r;
1477 /* dst.z = exp(tmp.x) */
1478 memset(&alu, 0, sizeof(struct r600_bc_alu));
1479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1480 alu.src[0].sel = ctx->temp_reg;
1481 alu.src[0].chan = 0;
1482 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1483 if (r)
1484 return r;
1485 alu.last = 1;
1486 r = r600_bc_add_alu(ctx->bc, &alu);
1487 if (r)
1488 return r;
1489 }
1490 return 0;
1491 }
1492
1493 static int tgsi_rsq(struct r600_shader_ctx *ctx)
1494 {
1495 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1496 struct r600_bc_alu alu;
1497 int i, r;
1498
1499 memset(&alu, 0, sizeof(struct r600_bc_alu));
1500
1501 /* FIXME:
1502 * For state trackers other than OpenGL, we'll want to use
1503 * _RECIPSQRT_IEEE instead.
1504 */
1505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1506
1507 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1508 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1509 if (r)
1510 return r;
1511 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1512 alu.src[i].abs = 1;
1513 }
1514 alu.dst.sel = ctx->temp_reg;
1515 alu.dst.write = 1;
1516 alu.last = 1;
1517 r = r600_bc_add_alu(ctx->bc, &alu);
1518 if (r)
1519 return r;
1520 r = r600_bc_add_literal(ctx->bc, ctx->value);
1521 if (r)
1522 return r;
1523 /* replicate result */
1524 return tgsi_helper_tempx_replicate(ctx);
1525 }
1526
1527 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1528 {
1529 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1530 struct r600_bc_alu alu;
1531 int i, r;
1532
1533 for (i = 0; i < 4; i++) {
1534 memset(&alu, 0, sizeof(struct r600_bc_alu));
1535 alu.src[0].sel = ctx->temp_reg;
1536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1537 alu.dst.chan = i;
1538 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1539 if (r)
1540 return r;
1541 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1542 if (i == 3)
1543 alu.last = 1;
1544 r = r600_bc_add_alu(ctx->bc, &alu);
1545 if (r)
1546 return r;
1547 }
1548 return 0;
1549 }
1550
1551 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1552 {
1553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1554 struct r600_bc_alu alu;
1555 int i, r;
1556
1557 memset(&alu, 0, sizeof(struct r600_bc_alu));
1558 alu.inst = ctx->inst_info->r600_opcode;
1559 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1560 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1561 if (r)
1562 return r;
1563 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1564 }
1565 alu.dst.sel = ctx->temp_reg;
1566 alu.dst.write = 1;
1567 alu.last = 1;
1568 r = r600_bc_add_alu(ctx->bc, &alu);
1569 if (r)
1570 return r;
1571 r = r600_bc_add_literal(ctx->bc, ctx->value);
1572 if (r)
1573 return r;
1574 /* replicate result */
1575 return tgsi_helper_tempx_replicate(ctx);
1576 }
1577
1578 static int tgsi_pow(struct r600_shader_ctx *ctx)
1579 {
1580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1581 struct r600_bc_alu alu;
1582 int r;
1583
1584 /* LOG2(a) */
1585 memset(&alu, 0, sizeof(struct r600_bc_alu));
1586 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1587 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1588 if (r)
1589 return r;
1590 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1591 alu.dst.sel = ctx->temp_reg;
1592 alu.dst.write = 1;
1593 alu.last = 1;
1594 r = r600_bc_add_alu(ctx->bc, &alu);
1595 if (r)
1596 return r;
1597 r = r600_bc_add_literal(ctx->bc,ctx->value);
1598 if (r)
1599 return r;
1600 /* b * LOG2(a) */
1601 memset(&alu, 0, sizeof(struct r600_bc_alu));
1602 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1603 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1604 if (r)
1605 return r;
1606 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1607 alu.src[1].sel = ctx->temp_reg;
1608 alu.dst.sel = ctx->temp_reg;
1609 alu.dst.write = 1;
1610 alu.last = 1;
1611 r = r600_bc_add_alu(ctx->bc, &alu);
1612 if (r)
1613 return r;
1614 r = r600_bc_add_literal(ctx->bc,ctx->value);
1615 if (r)
1616 return r;
1617 /* POW(a,b) = EXP2(b * LOG2(a))*/
1618 memset(&alu, 0, sizeof(struct r600_bc_alu));
1619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1620 alu.src[0].sel = ctx->temp_reg;
1621 alu.dst.sel = ctx->temp_reg;
1622 alu.dst.write = 1;
1623 alu.last = 1;
1624 r = r600_bc_add_alu(ctx->bc, &alu);
1625 if (r)
1626 return r;
1627 r = r600_bc_add_literal(ctx->bc,ctx->value);
1628 if (r)
1629 return r;
1630 return tgsi_helper_tempx_replicate(ctx);
1631 }
1632
1633 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1634 {
1635 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1636 struct r600_bc_alu alu;
1637 struct r600_bc_alu_src r600_src[3];
1638 int i, r;
1639
1640 r = tgsi_split_constant(ctx, r600_src);
1641 if (r)
1642 return r;
1643 r = tgsi_split_literal_constant(ctx, r600_src);
1644 if (r)
1645 return r;
1646
1647 /* tmp = (src > 0 ? 1 : src) */
1648 for (i = 0; i < 4; i++) {
1649 memset(&alu, 0, sizeof(struct r600_bc_alu));
1650 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1651 alu.is_op3 = 1;
1652
1653 alu.dst.sel = ctx->temp_reg;
1654 alu.dst.chan = i;
1655
1656 alu.src[0] = r600_src[0];
1657 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1658
1659 alu.src[1].sel = V_SQ_ALU_SRC_1;
1660
1661 alu.src[2] = r600_src[0];
1662 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1663 if (i == 3)
1664 alu.last = 1;
1665 r = r600_bc_add_alu(ctx->bc, &alu);
1666 if (r)
1667 return r;
1668 }
1669 r = r600_bc_add_literal(ctx->bc, ctx->value);
1670 if (r)
1671 return r;
1672
1673 /* dst = (-tmp > 0 ? -1 : tmp) */
1674 for (i = 0; i < 4; i++) {
1675 memset(&alu, 0, sizeof(struct r600_bc_alu));
1676 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1677 alu.is_op3 = 1;
1678 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1679 if (r)
1680 return r;
1681
1682 alu.src[0].sel = ctx->temp_reg;
1683 alu.src[0].chan = i;
1684 alu.src[0].neg = 1;
1685
1686 alu.src[1].sel = V_SQ_ALU_SRC_1;
1687 alu.src[1].neg = 1;
1688
1689 alu.src[2].sel = ctx->temp_reg;
1690 alu.src[2].chan = i;
1691
1692 if (i == 3)
1693 alu.last = 1;
1694 r = r600_bc_add_alu(ctx->bc, &alu);
1695 if (r)
1696 return r;
1697 }
1698 return 0;
1699 }
1700
1701 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1702 {
1703 struct r600_bc_alu alu;
1704 int i, r;
1705
1706 r = r600_bc_add_literal(ctx->bc, ctx->value);
1707 if (r)
1708 return r;
1709 for (i = 0; i < 4; i++) {
1710 memset(&alu, 0, sizeof(struct r600_bc_alu));
1711 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1712 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1713 alu.dst.chan = i;
1714 } else {
1715 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1716 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1717 if (r)
1718 return r;
1719 alu.src[0].sel = ctx->temp_reg;
1720 alu.src[0].chan = i;
1721 }
1722 if (i == 3) {
1723 alu.last = 1;
1724 }
1725 r = r600_bc_add_alu(ctx->bc, &alu);
1726 if (r)
1727 return r;
1728 }
1729 return 0;
1730 }
1731
1732 static int tgsi_op3(struct r600_shader_ctx *ctx)
1733 {
1734 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1735 struct r600_bc_alu_src r600_src[3];
1736 struct r600_bc_alu alu;
1737 int i, j, r;
1738
1739 r = tgsi_split_constant(ctx, r600_src);
1740 if (r)
1741 return r;
1742 r = tgsi_split_literal_constant(ctx, r600_src);
1743 if (r)
1744 return r;
1745 /* do it in 2 step as op3 doesn't support writemask */
1746 for (i = 0; i < 4; i++) {
1747 memset(&alu, 0, sizeof(struct r600_bc_alu));
1748 alu.inst = ctx->inst_info->r600_opcode;
1749 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1750 alu.src[j] = r600_src[j];
1751 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1752 }
1753 alu.dst.sel = ctx->temp_reg;
1754 alu.dst.chan = i;
1755 alu.dst.write = 1;
1756 alu.is_op3 = 1;
1757 if (i == 3) {
1758 alu.last = 1;
1759 }
1760 r = r600_bc_add_alu(ctx->bc, &alu);
1761 if (r)
1762 return r;
1763 }
1764 return tgsi_helper_copy(ctx, inst);
1765 }
1766
1767 static int tgsi_dp(struct r600_shader_ctx *ctx)
1768 {
1769 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1770 struct r600_bc_alu_src r600_src[3];
1771 struct r600_bc_alu alu;
1772 int i, j, r;
1773
1774 r = tgsi_split_constant(ctx, r600_src);
1775 if (r)
1776 return r;
1777 r = tgsi_split_literal_constant(ctx, r600_src);
1778 if (r)
1779 return r;
1780 for (i = 0; i < 4; i++) {
1781 memset(&alu, 0, sizeof(struct r600_bc_alu));
1782 alu.inst = ctx->inst_info->r600_opcode;
1783 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1784 alu.src[j] = r600_src[j];
1785 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1786 }
1787 alu.dst.sel = ctx->temp_reg;
1788 alu.dst.chan = i;
1789 alu.dst.write = 1;
1790 /* handle some special cases */
1791 switch (ctx->inst_info->tgsi_opcode) {
1792 case TGSI_OPCODE_DP2:
1793 if (i > 1) {
1794 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1795 alu.src[0].chan = alu.src[1].chan = 0;
1796 }
1797 break;
1798 case TGSI_OPCODE_DP3:
1799 if (i > 2) {
1800 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1801 alu.src[0].chan = alu.src[1].chan = 0;
1802 }
1803 break;
1804 case TGSI_OPCODE_DPH:
1805 if (i == 3) {
1806 alu.src[0].sel = V_SQ_ALU_SRC_1;
1807 alu.src[0].chan = 0;
1808 alu.src[0].neg = 0;
1809 }
1810 break;
1811 default:
1812 break;
1813 }
1814 if (i == 3) {
1815 alu.last = 1;
1816 }
1817 r = r600_bc_add_alu(ctx->bc, &alu);
1818 if (r)
1819 return r;
1820 }
1821 return tgsi_helper_copy(ctx, inst);
1822 }
1823
1824 static int tgsi_tex(struct r600_shader_ctx *ctx)
1825 {
1826 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1827 struct r600_bc_tex tex;
1828 struct r600_bc_alu alu;
1829 unsigned src_gpr;
1830 int r, i;
1831 int opcode;
1832 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1833 uint32_t lit_vals[4];
1834
1835 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1836
1837 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1838 /* Add perspective divide */
1839 memset(&alu, 0, sizeof(struct r600_bc_alu));
1840 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1841 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1842 if (r)
1843 return r;
1844
1845 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1846 alu.dst.sel = ctx->temp_reg;
1847 alu.dst.chan = 3;
1848 alu.last = 1;
1849 alu.dst.write = 1;
1850 r = r600_bc_add_alu(ctx->bc, &alu);
1851 if (r)
1852 return r;
1853
1854 for (i = 0; i < 3; i++) {
1855 memset(&alu, 0, sizeof(struct r600_bc_alu));
1856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1857 alu.src[0].sel = ctx->temp_reg;
1858 alu.src[0].chan = 3;
1859 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1860 if (r)
1861 return r;
1862 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1863 alu.dst.sel = ctx->temp_reg;
1864 alu.dst.chan = i;
1865 alu.dst.write = 1;
1866 r = r600_bc_add_alu(ctx->bc, &alu);
1867 if (r)
1868 return r;
1869 }
1870 memset(&alu, 0, sizeof(struct r600_bc_alu));
1871 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1872 alu.src[0].sel = V_SQ_ALU_SRC_1;
1873 alu.src[0].chan = 0;
1874 alu.dst.sel = ctx->temp_reg;
1875 alu.dst.chan = 3;
1876 alu.last = 1;
1877 alu.dst.write = 1;
1878 r = r600_bc_add_alu(ctx->bc, &alu);
1879 if (r)
1880 return r;
1881 src_not_temp = FALSE;
1882 src_gpr = ctx->temp_reg;
1883 }
1884
1885 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1886 int src_chan, src2_chan;
1887
1888 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1889 for (i = 0; i < 4; i++) {
1890 memset(&alu, 0, sizeof(struct r600_bc_alu));
1891 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1892 switch (i) {
1893 case 0:
1894 src_chan = 2;
1895 src2_chan = 1;
1896 break;
1897 case 1:
1898 src_chan = 2;
1899 src2_chan = 0;
1900 break;
1901 case 2:
1902 src_chan = 0;
1903 src2_chan = 2;
1904 break;
1905 case 3:
1906 src_chan = 1;
1907 src2_chan = 2;
1908 break;
1909 default:
1910 assert(0);
1911 src_chan = 0;
1912 src2_chan = 0;
1913 break;
1914 }
1915 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1916 if (r)
1917 return r;
1918 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1919 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1920 if (r)
1921 return r;
1922 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1923 alu.dst.sel = ctx->temp_reg;
1924 alu.dst.chan = i;
1925 if (i == 3)
1926 alu.last = 1;
1927 alu.dst.write = 1;
1928 r = r600_bc_add_alu(ctx->bc, &alu);
1929 if (r)
1930 return r;
1931 }
1932
1933 /* tmp1.z = RCP_e(|tmp1.z|) */
1934 memset(&alu, 0, sizeof(struct r600_bc_alu));
1935 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1936 alu.src[0].sel = ctx->temp_reg;
1937 alu.src[0].chan = 2;
1938 alu.src[0].abs = 1;
1939 alu.dst.sel = ctx->temp_reg;
1940 alu.dst.chan = 2;
1941 alu.dst.write = 1;
1942 alu.last = 1;
1943 r = r600_bc_add_alu(ctx->bc, &alu);
1944 if (r)
1945 return r;
1946
1947 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1948 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1949 * muladd has no writemask, have to use another temp
1950 */
1951 memset(&alu, 0, sizeof(struct r600_bc_alu));
1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1953 alu.is_op3 = 1;
1954
1955 alu.src[0].sel = ctx->temp_reg;
1956 alu.src[0].chan = 0;
1957 alu.src[1].sel = ctx->temp_reg;
1958 alu.src[1].chan = 2;
1959
1960 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1961 alu.src[2].chan = 0;
1962
1963 alu.dst.sel = ctx->temp_reg;
1964 alu.dst.chan = 0;
1965 alu.dst.write = 1;
1966
1967 r = r600_bc_add_alu(ctx->bc, &alu);
1968 if (r)
1969 return r;
1970
1971 memset(&alu, 0, sizeof(struct r600_bc_alu));
1972 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1973 alu.is_op3 = 1;
1974
1975 alu.src[0].sel = ctx->temp_reg;
1976 alu.src[0].chan = 1;
1977 alu.src[1].sel = ctx->temp_reg;
1978 alu.src[1].chan = 2;
1979
1980 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1981 alu.src[2].chan = 0;
1982
1983 alu.dst.sel = ctx->temp_reg;
1984 alu.dst.chan = 1;
1985 alu.dst.write = 1;
1986
1987 alu.last = 1;
1988 r = r600_bc_add_alu(ctx->bc, &alu);
1989 if (r)
1990 return r;
1991
1992 lit_vals[0] = fui(1.5f);
1993
1994 r = r600_bc_add_literal(ctx->bc, lit_vals);
1995 if (r)
1996 return r;
1997 src_not_temp = FALSE;
1998 src_gpr = ctx->temp_reg;
1999 }
2000
2001 if (src_not_temp) {
2002 for (i = 0; i < 4; i++) {
2003 memset(&alu, 0, sizeof(struct r600_bc_alu));
2004 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2005 alu.src[0].sel = src_gpr;
2006 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2007 alu.dst.sel = ctx->temp_reg;
2008 alu.dst.chan = i;
2009 if (i == 3)
2010 alu.last = 1;
2011 alu.dst.write = 1;
2012 r = r600_bc_add_alu(ctx->bc, &alu);
2013 if (r)
2014 return r;
2015 }
2016 src_gpr = ctx->temp_reg;
2017 }
2018
2019 opcode = ctx->inst_info->r600_opcode;
2020 if (opcode == SQ_TEX_INST_SAMPLE &&
2021 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
2022 opcode = SQ_TEX_INST_SAMPLE_C;
2023
2024 memset(&tex, 0, sizeof(struct r600_bc_tex));
2025 tex.inst = opcode;
2026 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
2027 tex.resource_id = tex.sampler_id;
2028 tex.src_gpr = src_gpr;
2029 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2030 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2031 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2032 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2033 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2034 tex.src_sel_x = 0;
2035 tex.src_sel_y = 1;
2036 tex.src_sel_z = 2;
2037 tex.src_sel_w = 3;
2038
2039 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2040 tex.src_sel_x = 1;
2041 tex.src_sel_y = 0;
2042 tex.src_sel_z = 3;
2043 tex.src_sel_w = 1;
2044 }
2045
2046 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2047 tex.coord_type_x = 1;
2048 tex.coord_type_y = 1;
2049 tex.coord_type_z = 1;
2050 tex.coord_type_w = 1;
2051 }
2052
2053 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2054 tex.src_sel_w = 2;
2055
2056 r = r600_bc_add_tex(ctx->bc, &tex);
2057 if (r)
2058 return r;
2059
2060 /* add shadow ambient support - gallium doesn't do it yet */
2061 return 0;
2062 }
2063
2064 static int tgsi_lrp(struct r600_shader_ctx *ctx)
2065 {
2066 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2067 struct r600_bc_alu_src r600_src[3];
2068 struct r600_bc_alu alu;
2069 unsigned i;
2070 int r;
2071
2072 r = tgsi_split_constant(ctx, r600_src);
2073 if (r)
2074 return r;
2075 r = tgsi_split_literal_constant(ctx, r600_src);
2076 if (r)
2077 return r;
2078 /* 1 - src0 */
2079 for (i = 0; i < 4; i++) {
2080 memset(&alu, 0, sizeof(struct r600_bc_alu));
2081 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2082 alu.src[0].sel = V_SQ_ALU_SRC_1;
2083 alu.src[0].chan = 0;
2084 alu.src[1] = r600_src[0];
2085 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2086 alu.src[1].neg = 1;
2087 alu.dst.sel = ctx->temp_reg;
2088 alu.dst.chan = i;
2089 if (i == 3) {
2090 alu.last = 1;
2091 }
2092 alu.dst.write = 1;
2093 r = r600_bc_add_alu(ctx->bc, &alu);
2094 if (r)
2095 return r;
2096 }
2097 r = r600_bc_add_literal(ctx->bc, ctx->value);
2098 if (r)
2099 return r;
2100
2101 /* (1 - src0) * src2 */
2102 for (i = 0; i < 4; i++) {
2103 memset(&alu, 0, sizeof(struct r600_bc_alu));
2104 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2105 alu.src[0].sel = ctx->temp_reg;
2106 alu.src[0].chan = i;
2107 alu.src[1] = r600_src[2];
2108 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2109 alu.dst.sel = ctx->temp_reg;
2110 alu.dst.chan = i;
2111 if (i == 3) {
2112 alu.last = 1;
2113 }
2114 alu.dst.write = 1;
2115 r = r600_bc_add_alu(ctx->bc, &alu);
2116 if (r)
2117 return r;
2118 }
2119 r = r600_bc_add_literal(ctx->bc, ctx->value);
2120 if (r)
2121 return r;
2122
2123 /* src0 * src1 + (1 - src0) * src2 */
2124 for (i = 0; i < 4; i++) {
2125 memset(&alu, 0, sizeof(struct r600_bc_alu));
2126 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2127 alu.is_op3 = 1;
2128 alu.src[0] = r600_src[0];
2129 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2130 alu.src[1] = r600_src[1];
2131 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2132 alu.src[2].sel = ctx->temp_reg;
2133 alu.src[2].chan = i;
2134 alu.dst.sel = ctx->temp_reg;
2135 alu.dst.chan = i;
2136 if (i == 3) {
2137 alu.last = 1;
2138 }
2139 r = r600_bc_add_alu(ctx->bc, &alu);
2140 if (r)
2141 return r;
2142 }
2143 return tgsi_helper_copy(ctx, inst);
2144 }
2145
2146 static int tgsi_cmp(struct r600_shader_ctx *ctx)
2147 {
2148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2149 struct r600_bc_alu_src r600_src[3];
2150 struct r600_bc_alu alu;
2151 int use_temp = 0;
2152 int i, r;
2153
2154 r = tgsi_split_constant(ctx, r600_src);
2155 if (r)
2156 return r;
2157 r = tgsi_split_literal_constant(ctx, r600_src);
2158 if (r)
2159 return r;
2160
2161 if (inst->Dst[0].Register.WriteMask != 0xf)
2162 use_temp = 1;
2163
2164 for (i = 0; i < 4; i++) {
2165 memset(&alu, 0, sizeof(struct r600_bc_alu));
2166 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2167 alu.src[0] = r600_src[0];
2168 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2169
2170 alu.src[1] = r600_src[2];
2171 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2172
2173 alu.src[2] = r600_src[1];
2174 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2175
2176 if (use_temp)
2177 alu.dst.sel = ctx->temp_reg;
2178 else {
2179 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2180 if (r)
2181 return r;
2182 }
2183 alu.dst.chan = i;
2184 alu.dst.write = 1;
2185 alu.is_op3 = 1;
2186 if (i == 3)
2187 alu.last = 1;
2188 r = r600_bc_add_alu(ctx->bc, &alu);
2189 if (r)
2190 return r;
2191 }
2192 if (use_temp)
2193 return tgsi_helper_copy(ctx, inst);
2194 return 0;
2195 }
2196
2197 static int tgsi_xpd(struct r600_shader_ctx *ctx)
2198 {
2199 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2200 struct r600_bc_alu_src r600_src[3];
2201 struct r600_bc_alu alu;
2202 uint32_t use_temp = 0;
2203 int i, r;
2204
2205 if (inst->Dst[0].Register.WriteMask != 0xf)
2206 use_temp = 1;
2207
2208 r = tgsi_split_constant(ctx, r600_src);
2209 if (r)
2210 return r;
2211 r = tgsi_split_literal_constant(ctx, r600_src);
2212 if (r)
2213 return r;
2214
2215 for (i = 0; i < 4; i++) {
2216 memset(&alu, 0, sizeof(struct r600_bc_alu));
2217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2218
2219 alu.src[0] = r600_src[0];
2220 switch (i) {
2221 case 0:
2222 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2223 break;
2224 case 1:
2225 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2226 break;
2227 case 2:
2228 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2229 break;
2230 case 3:
2231 alu.src[0].sel = V_SQ_ALU_SRC_0;
2232 alu.src[0].chan = i;
2233 }
2234
2235 alu.src[1] = r600_src[1];
2236 switch (i) {
2237 case 0:
2238 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2239 break;
2240 case 1:
2241 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2242 break;
2243 case 2:
2244 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2245 break;
2246 case 3:
2247 alu.src[1].sel = V_SQ_ALU_SRC_0;
2248 alu.src[1].chan = i;
2249 }
2250
2251 alu.dst.sel = ctx->temp_reg;
2252 alu.dst.chan = i;
2253 alu.dst.write = 1;
2254
2255 if (i == 3)
2256 alu.last = 1;
2257 r = r600_bc_add_alu(ctx->bc, &alu);
2258 if (r)
2259 return r;
2260
2261 r = r600_bc_add_literal(ctx->bc, ctx->value);
2262 if (r)
2263 return r;
2264 }
2265
2266 for (i = 0; i < 4; i++) {
2267 memset(&alu, 0, sizeof(struct r600_bc_alu));
2268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2269
2270 alu.src[0] = r600_src[0];
2271 switch (i) {
2272 case 0:
2273 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2274 break;
2275 case 1:
2276 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2277 break;
2278 case 2:
2279 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2280 break;
2281 case 3:
2282 alu.src[0].sel = V_SQ_ALU_SRC_0;
2283 alu.src[0].chan = i;
2284 }
2285
2286 alu.src[1] = r600_src[1];
2287 switch (i) {
2288 case 0:
2289 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2290 break;
2291 case 1:
2292 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2293 break;
2294 case 2:
2295 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2296 break;
2297 case 3:
2298 alu.src[1].sel = V_SQ_ALU_SRC_0;
2299 alu.src[1].chan = i;
2300 }
2301
2302 alu.src[2].sel = ctx->temp_reg;
2303 alu.src[2].neg = 1;
2304 alu.src[2].chan = i;
2305
2306 if (use_temp)
2307 alu.dst.sel = ctx->temp_reg;
2308 else {
2309 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2310 if (r)
2311 return r;
2312 }
2313 alu.dst.chan = i;
2314 alu.dst.write = 1;
2315 alu.is_op3 = 1;
2316 if (i == 3)
2317 alu.last = 1;
2318 r = r600_bc_add_alu(ctx->bc, &alu);
2319 if (r)
2320 return r;
2321
2322 r = r600_bc_add_literal(ctx->bc, ctx->value);
2323 if (r)
2324 return r;
2325 }
2326 if (use_temp)
2327 return tgsi_helper_copy(ctx, inst);
2328 return 0;
2329 }
2330
2331 static int tgsi_exp(struct r600_shader_ctx *ctx)
2332 {
2333 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2334 struct r600_bc_alu_src r600_src[3] = { { 0 } };
2335 struct r600_bc_alu alu;
2336 int r;
2337
2338 /* result.x = 2^floor(src); */
2339 if (inst->Dst[0].Register.WriteMask & 1) {
2340 memset(&alu, 0, sizeof(struct r600_bc_alu));
2341
2342 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2343 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2344 if (r)
2345 return r;
2346
2347 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2348
2349 alu.dst.sel = ctx->temp_reg;
2350 alu.dst.chan = 0;
2351 alu.dst.write = 1;
2352 alu.last = 1;
2353 r = r600_bc_add_alu(ctx->bc, &alu);
2354 if (r)
2355 return r;
2356
2357 r = r600_bc_add_literal(ctx->bc, ctx->value);
2358 if (r)
2359 return r;
2360
2361 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2362 alu.src[0].sel = ctx->temp_reg;
2363 alu.src[0].chan = 0;
2364
2365 alu.dst.sel = ctx->temp_reg;
2366 alu.dst.chan = 0;
2367 alu.dst.write = 1;
2368 alu.last = 1;
2369 r = r600_bc_add_alu(ctx->bc, &alu);
2370 if (r)
2371 return r;
2372
2373 r = r600_bc_add_literal(ctx->bc, ctx->value);
2374 if (r)
2375 return r;
2376 }
2377
2378 /* result.y = tmp - floor(tmp); */
2379 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2380 memset(&alu, 0, sizeof(struct r600_bc_alu));
2381
2382 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2383 alu.src[0] = r600_src[0];
2384 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2385 if (r)
2386 return r;
2387 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2388
2389 alu.dst.sel = ctx->temp_reg;
2390 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2391 // if (r)
2392 // return r;
2393 alu.dst.write = 1;
2394 alu.dst.chan = 1;
2395
2396 alu.last = 1;
2397
2398 r = r600_bc_add_alu(ctx->bc, &alu);
2399 if (r)
2400 return r;
2401 r = r600_bc_add_literal(ctx->bc, ctx->value);
2402 if (r)
2403 return r;
2404 }
2405
2406 /* result.z = RoughApprox2ToX(tmp);*/
2407 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2408 memset(&alu, 0, sizeof(struct r600_bc_alu));
2409 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2410 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2411 if (r)
2412 return r;
2413 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2414
2415 alu.dst.sel = ctx->temp_reg;
2416 alu.dst.write = 1;
2417 alu.dst.chan = 2;
2418
2419 alu.last = 1;
2420
2421 r = r600_bc_add_alu(ctx->bc, &alu);
2422 if (r)
2423 return r;
2424 r = r600_bc_add_literal(ctx->bc, ctx->value);
2425 if (r)
2426 return r;
2427 }
2428
2429 /* result.w = 1.0;*/
2430 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2431 memset(&alu, 0, sizeof(struct r600_bc_alu));
2432
2433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2434 alu.src[0].sel = V_SQ_ALU_SRC_1;
2435 alu.src[0].chan = 0;
2436
2437 alu.dst.sel = ctx->temp_reg;
2438 alu.dst.chan = 3;
2439 alu.dst.write = 1;
2440 alu.last = 1;
2441 r = r600_bc_add_alu(ctx->bc, &alu);
2442 if (r)
2443 return r;
2444 r = r600_bc_add_literal(ctx->bc, ctx->value);
2445 if (r)
2446 return r;
2447 }
2448 return tgsi_helper_copy(ctx, inst);
2449 }
2450
2451 static int tgsi_log(struct r600_shader_ctx *ctx)
2452 {
2453 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2454 struct r600_bc_alu alu;
2455 int r;
2456
2457 /* result.x = floor(log2(src)); */
2458 if (inst->Dst[0].Register.WriteMask & 1) {
2459 memset(&alu, 0, sizeof(struct r600_bc_alu));
2460
2461 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2462 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2463 if (r)
2464 return r;
2465
2466 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2467
2468 alu.dst.sel = ctx->temp_reg;
2469 alu.dst.chan = 0;
2470 alu.dst.write = 1;
2471 alu.last = 1;
2472 r = r600_bc_add_alu(ctx->bc, &alu);
2473 if (r)
2474 return r;
2475
2476 r = r600_bc_add_literal(ctx->bc, ctx->value);
2477 if (r)
2478 return r;
2479
2480 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2481 alu.src[0].sel = ctx->temp_reg;
2482 alu.src[0].chan = 0;
2483
2484 alu.dst.sel = ctx->temp_reg;
2485 alu.dst.chan = 0;
2486 alu.dst.write = 1;
2487 alu.last = 1;
2488
2489 r = r600_bc_add_alu(ctx->bc, &alu);
2490 if (r)
2491 return r;
2492
2493 r = r600_bc_add_literal(ctx->bc, ctx->value);
2494 if (r)
2495 return r;
2496 }
2497
2498 /* result.y = src.x / (2 ^ floor(log2(src.x))); */
2499 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2500 memset(&alu, 0, sizeof(struct r600_bc_alu));
2501
2502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2503 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2504 if (r)
2505 return r;
2506
2507 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2508
2509 alu.dst.sel = ctx->temp_reg;
2510 alu.dst.chan = 1;
2511 alu.dst.write = 1;
2512 alu.last = 1;
2513
2514 r = r600_bc_add_alu(ctx->bc, &alu);
2515 if (r)
2516 return r;
2517
2518 r = r600_bc_add_literal(ctx->bc, ctx->value);
2519 if (r)
2520 return r;
2521
2522 memset(&alu, 0, sizeof(struct r600_bc_alu));
2523
2524 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2525 alu.src[0].sel = ctx->temp_reg;
2526 alu.src[0].chan = 1;
2527
2528 alu.dst.sel = ctx->temp_reg;
2529 alu.dst.chan = 1;
2530 alu.dst.write = 1;
2531 alu.last = 1;
2532
2533 r = r600_bc_add_alu(ctx->bc, &alu);
2534 if (r)
2535 return r;
2536
2537 r = r600_bc_add_literal(ctx->bc, ctx->value);
2538 if (r)
2539 return r;
2540
2541 memset(&alu, 0, sizeof(struct r600_bc_alu));
2542
2543 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2544 alu.src[0].sel = ctx->temp_reg;
2545 alu.src[0].chan = 1;
2546
2547 alu.dst.sel = ctx->temp_reg;
2548 alu.dst.chan = 1;
2549 alu.dst.write = 1;
2550 alu.last = 1;
2551
2552 r = r600_bc_add_alu(ctx->bc, &alu);
2553 if (r)
2554 return r;
2555
2556 r = r600_bc_add_literal(ctx->bc, ctx->value);
2557 if (r)
2558 return r;
2559
2560 memset(&alu, 0, sizeof(struct r600_bc_alu));
2561
2562 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2563 alu.src[0].sel = ctx->temp_reg;
2564 alu.src[0].chan = 1;
2565
2566 alu.dst.sel = ctx->temp_reg;
2567 alu.dst.chan = 1;
2568 alu.dst.write = 1;
2569 alu.last = 1;
2570
2571 r = r600_bc_add_alu(ctx->bc, &alu);
2572 if (r)
2573 return r;
2574
2575 r = r600_bc_add_literal(ctx->bc, ctx->value);
2576 if (r)
2577 return r;
2578
2579 memset(&alu, 0, sizeof(struct r600_bc_alu));
2580
2581 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2582
2583 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2584 if (r)
2585 return r;
2586
2587 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2588
2589 alu.src[1].sel = ctx->temp_reg;
2590 alu.src[1].chan = 1;
2591
2592 alu.dst.sel = ctx->temp_reg;
2593 alu.dst.chan = 1;
2594 alu.dst.write = 1;
2595 alu.last = 1;
2596
2597 r = r600_bc_add_alu(ctx->bc, &alu);
2598 if (r)
2599 return r;
2600
2601 r = r600_bc_add_literal(ctx->bc, ctx->value);
2602 if (r)
2603 return r;
2604 }
2605
2606 /* result.z = log2(src);*/
2607 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2608 memset(&alu, 0, sizeof(struct r600_bc_alu));
2609
2610 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2611 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2612 if (r)
2613 return r;
2614
2615 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2616
2617 alu.dst.sel = ctx->temp_reg;
2618 alu.dst.write = 1;
2619 alu.dst.chan = 2;
2620 alu.last = 1;
2621
2622 r = r600_bc_add_alu(ctx->bc, &alu);
2623 if (r)
2624 return r;
2625
2626 r = r600_bc_add_literal(ctx->bc, ctx->value);
2627 if (r)
2628 return r;
2629 }
2630
2631 /* result.w = 1.0; */
2632 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2633 memset(&alu, 0, sizeof(struct r600_bc_alu));
2634
2635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2636 alu.src[0].sel = V_SQ_ALU_SRC_1;
2637 alu.src[0].chan = 0;
2638
2639 alu.dst.sel = ctx->temp_reg;
2640 alu.dst.chan = 3;
2641 alu.dst.write = 1;
2642 alu.last = 1;
2643
2644 r = r600_bc_add_alu(ctx->bc, &alu);
2645 if (r)
2646 return r;
2647
2648 r = r600_bc_add_literal(ctx->bc, ctx->value);
2649 if (r)
2650 return r;
2651 }
2652
2653 return tgsi_helper_copy(ctx, inst);
2654 }
2655
2656 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2657 {
2658 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2659 struct r600_bc_alu alu;
2660 int r;
2661 memset(&alu, 0, sizeof(struct r600_bc_alu));
2662
2663 switch (inst->Instruction.Opcode) {
2664 case TGSI_OPCODE_ARL:
2665 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2666 break;
2667 case TGSI_OPCODE_ARR:
2668 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2669 break;
2670 default:
2671 assert(0);
2672 return -1;
2673 }
2674
2675 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2676 if (r)
2677 return r;
2678 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2679 alu.last = 1;
2680 alu.dst.chan = 0;
2681 alu.dst.sel = ctx->temp_reg;
2682 alu.dst.write = 1;
2683 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2684 if (r)
2685 return r;
2686 memset(&alu, 0, sizeof(struct r600_bc_alu));
2687 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2688 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2689 if (r)
2690 return r;
2691 alu.src[0].sel = ctx->temp_reg;
2692 alu.src[0].chan = 0;
2693 alu.last = 1;
2694 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2695 if (r)
2696 return r;
2697 return 0;
2698 }
2699 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2700 {
2701 /* TODO from r600c, ar values don't persist between clauses */
2702 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2703 struct r600_bc_alu alu;
2704 int r;
2705 memset(&alu, 0, sizeof(struct r600_bc_alu));
2706
2707 switch (inst->Instruction.Opcode) {
2708 case TGSI_OPCODE_ARL:
2709 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2710 break;
2711 case TGSI_OPCODE_ARR:
2712 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2713 break;
2714 default:
2715 assert(0);
2716 return -1;
2717 }
2718
2719
2720 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2721 if (r)
2722 return r;
2723 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2724
2725 alu.last = 1;
2726
2727 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2728 if (r)
2729 return r;
2730 ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2731 return 0;
2732 }
2733
2734 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2735 {
2736 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2737 struct r600_bc_alu alu;
2738 int i, r = 0;
2739
2740 for (i = 0; i < 4; i++) {
2741 memset(&alu, 0, sizeof(struct r600_bc_alu));
2742
2743 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2744 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2745 if (r)
2746 return r;
2747
2748 if (i == 0 || i == 3) {
2749 alu.src[0].sel = V_SQ_ALU_SRC_1;
2750 } else {
2751 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2752 if (r)
2753 return r;
2754 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2755 }
2756
2757 if (i == 0 || i == 2) {
2758 alu.src[1].sel = V_SQ_ALU_SRC_1;
2759 } else {
2760 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2761 if (r)
2762 return r;
2763 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2764 }
2765 if (i == 3)
2766 alu.last = 1;
2767 r = r600_bc_add_alu(ctx->bc, &alu);
2768 if (r)
2769 return r;
2770 }
2771 return 0;
2772 }
2773
2774 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2775 {
2776 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2777 struct r600_bc_alu alu;
2778 int r;
2779
2780 memset(&alu, 0, sizeof(struct r600_bc_alu));
2781 alu.inst = opcode;
2782 alu.predicate = 1;
2783
2784 alu.dst.sel = ctx->temp_reg;
2785 alu.dst.write = 1;
2786 alu.dst.chan = 0;
2787
2788 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2789 if (r)
2790 return r;
2791 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2792 alu.src[1].sel = V_SQ_ALU_SRC_0;
2793 alu.src[1].chan = 0;
2794
2795 alu.last = 1;
2796
2797 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2798 if (r)
2799 return r;
2800 return 0;
2801 }
2802
2803 static int pops(struct r600_shader_ctx *ctx, int pops)
2804 {
2805 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2806 ctx->bc->cf_last->pop_count = pops;
2807 return 0;
2808 }
2809
2810 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2811 {
2812 switch(reason) {
2813 case FC_PUSH_VPM:
2814 ctx->bc->callstack[ctx->bc->call_sp].current--;
2815 break;
2816 case FC_PUSH_WQM:
2817 case FC_LOOP:
2818 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2819 break;
2820 case FC_REP:
2821 /* TOODO : for 16 vp asic should -= 2; */
2822 ctx->bc->callstack[ctx->bc->call_sp].current --;
2823 break;
2824 }
2825 }
2826
2827 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2828 {
2829 if (check_max_only) {
2830 int diff;
2831 switch (reason) {
2832 case FC_PUSH_VPM:
2833 diff = 1;
2834 break;
2835 case FC_PUSH_WQM:
2836 diff = 4;
2837 break;
2838 default:
2839 assert(0);
2840 diff = 0;
2841 }
2842 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2843 ctx->bc->callstack[ctx->bc->call_sp].max) {
2844 ctx->bc->callstack[ctx->bc->call_sp].max =
2845 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2846 }
2847 return;
2848 }
2849 switch (reason) {
2850 case FC_PUSH_VPM:
2851 ctx->bc->callstack[ctx->bc->call_sp].current++;
2852 break;
2853 case FC_PUSH_WQM:
2854 case FC_LOOP:
2855 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2856 break;
2857 case FC_REP:
2858 ctx->bc->callstack[ctx->bc->call_sp].current++;
2859 break;
2860 }
2861
2862 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2863 ctx->bc->callstack[ctx->bc->call_sp].max) {
2864 ctx->bc->callstack[ctx->bc->call_sp].max =
2865 ctx->bc->callstack[ctx->bc->call_sp].current;
2866 }
2867 }
2868
2869 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2870 {
2871 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2872
2873 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2874 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2875 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2876 sp->num_mid++;
2877 }
2878
2879 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2880 {
2881 ctx->bc->fc_sp++;
2882 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2883 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2884 }
2885
2886 static void fc_poplevel(struct r600_shader_ctx *ctx)
2887 {
2888 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2889 if (sp->mid) {
2890 free(sp->mid);
2891 sp->mid = NULL;
2892 }
2893 sp->num_mid = 0;
2894 sp->start = NULL;
2895 sp->type = 0;
2896 ctx->bc->fc_sp--;
2897 }
2898
2899 #if 0
2900 static int emit_return(struct r600_shader_ctx *ctx)
2901 {
2902 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2903 return 0;
2904 }
2905
2906 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2907 {
2908
2909 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2910 ctx->bc->cf_last->pop_count = pops;
2911 /* TODO work out offset */
2912 return 0;
2913 }
2914
2915 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2916 {
2917 return 0;
2918 }
2919
2920 static void emit_testflag(struct r600_shader_ctx *ctx)
2921 {
2922
2923 }
2924
2925 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2926 {
2927 emit_testflag(ctx);
2928 emit_jump_to_offset(ctx, 1, 4);
2929 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2930 pops(ctx, ifidx + 1);
2931 emit_return(ctx);
2932 }
2933
2934 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2935 {
2936 emit_testflag(ctx);
2937
2938 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2939 ctx->bc->cf_last->pop_count = 1;
2940
2941 fc_set_mid(ctx, fc_sp);
2942
2943 pops(ctx, 1);
2944 }
2945 #endif
2946
2947 static int tgsi_if(struct r600_shader_ctx *ctx)
2948 {
2949 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2950
2951 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2952
2953 fc_pushlevel(ctx, FC_IF);
2954
2955 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2956 return 0;
2957 }
2958
2959 static int tgsi_else(struct r600_shader_ctx *ctx)
2960 {
2961 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2962 ctx->bc->cf_last->pop_count = 1;
2963
2964 fc_set_mid(ctx, ctx->bc->fc_sp);
2965 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2966 return 0;
2967 }
2968
2969 static int tgsi_endif(struct r600_shader_ctx *ctx)
2970 {
2971 pops(ctx, 1);
2972 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2973 R600_ERR("if/endif unbalanced in shader\n");
2974 return -1;
2975 }
2976
2977 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2978 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2979 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2980 } else {
2981 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2982 }
2983 fc_poplevel(ctx);
2984
2985 callstack_decrease_current(ctx, FC_PUSH_VPM);
2986 return 0;
2987 }
2988
2989 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2990 {
2991 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2992
2993 fc_pushlevel(ctx, FC_LOOP);
2994
2995 /* check stack depth */
2996 callstack_check_depth(ctx, FC_LOOP, 0);
2997 return 0;
2998 }
2999
3000 static int tgsi_endloop(struct r600_shader_ctx *ctx)
3001 {
3002 int i;
3003
3004 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3005
3006 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3007 R600_ERR("loop/endloop in shader code are not paired.\n");
3008 return -EINVAL;
3009 }
3010
3011 /* fixup loop pointers - from r600isa
3012 LOOP END points to CF after LOOP START,
3013 LOOP START point to CF after LOOP END
3014 BRK/CONT point to LOOP END CF
3015 */
3016 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3017
3018 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3019
3020 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3021 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3022 }
3023 /* TODO add LOOPRET support */
3024 fc_poplevel(ctx);
3025 callstack_decrease_current(ctx, FC_LOOP);
3026 return 0;
3027 }
3028
3029 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3030 {
3031 unsigned int fscp;
3032
3033 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3034 {
3035 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3036 break;
3037 }
3038
3039 if (fscp == 0) {
3040 R600_ERR("Break not inside loop/endloop pair\n");
3041 return -EINVAL;
3042 }
3043
3044 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3045 ctx->bc->cf_last->pop_count = 1;
3046
3047 fc_set_mid(ctx, fscp);
3048
3049 pops(ctx, 1);
3050 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3051 return 0;
3052 }
3053
3054 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3055 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3056 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3057 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3058
3059 /* FIXME:
3060 * For state trackers other than OpenGL, we'll want to use
3061 * _RECIP_IEEE instead.
3062 */
3063 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3064
3065 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3066 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3067 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3068 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3069 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3070 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3071 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3072 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3073 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3074 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3075 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3076 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3077 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3078 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3079 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3080 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081 /* gap */
3082 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084 /* gap */
3085 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3088 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3090 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3092 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3093 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3094 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3095 /* gap */
3096 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3098 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3100 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3101 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3102 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3103 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3104 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3110 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3112 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3113 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3114 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3115 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3117 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3119 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3126 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3130 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3131 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3132 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3133 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3136 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3137 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3138 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3139 /* gap */
3140 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3143 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3144 /* gap */
3145 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3153 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154 /* gap */
3155 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3164 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3167 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3169 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170 /* gap */
3171 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176 /* gap */
3177 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3186 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3187 /* gap */
3188 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216 };
3217
3218 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3219 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3220 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3221 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3222 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3223 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3224 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3225 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3227 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3228 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3229 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3230 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3231 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3232 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3233 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3234 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3235 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3236 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3237 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3238 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239 /* gap */
3240 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242 /* gap */
3243 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3244 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3246 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3248 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3250 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3251 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3252 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3253 /* gap */
3254 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3256 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3258 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3259 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3260 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3261 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
3262 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3268 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3270 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3271 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3272 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3273 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3275 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3277 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3284 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3288 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3289 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3290 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3291 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3292 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3294 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3295 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3296 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3297 /* gap */
3298 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3301 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3302 /* gap */
3303 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3311 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312 /* gap */
3313 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3322 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3323 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3325 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3327 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328 /* gap */
3329 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334 /* gap */
3335 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
3344 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
3345 /* gap */
3346 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3373 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374 };