r600g: fix up depth write swizzles.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 u32 *literals;
52 u32 nliterals;
53 u32 max_driver_temp_used;
54 };
55
56 struct r600_shader_tgsi_instruction {
57 unsigned tgsi_opcode;
58 unsigned is_op3;
59 unsigned r600_opcode;
60 int (*process)(struct r600_shader_ctx *ctx);
61 };
62
63 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
64 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
65
66 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
67 {
68 struct r600_context *rctx = r600_context(ctx);
69 const struct util_format_description *desc;
70 enum pipe_format resource_format[160];
71 unsigned i, nresources = 0;
72 struct r600_bc *bc = &shader->bc;
73 struct r600_bc_cf *cf;
74 struct r600_bc_vtx *vtx;
75
76 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
77 return 0;
78 for (i = 0; i < rctx->vertex_elements->count; i++) {
79 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
80 }
81 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
82 switch (cf->inst) {
83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
85 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
86 desc = util_format_description(resource_format[vtx->buffer_id]);
87 if (desc == NULL) {
88 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
89 return -EINVAL;
90 }
91 vtx->dst_sel_x = desc->swizzle[0];
92 vtx->dst_sel_y = desc->swizzle[1];
93 vtx->dst_sel_z = desc->swizzle[2];
94 vtx->dst_sel_w = desc->swizzle[3];
95 }
96 break;
97 default:
98 break;
99 }
100 }
101 return r600_bc_build(&shader->bc);
102 }
103
104 int r600_pipe_shader_create(struct pipe_context *ctx,
105 struct r600_context_state *rpshader,
106 const struct tgsi_token *tokens)
107 {
108 struct r600_screen *rscreen = r600_screen(ctx->screen);
109 int r;
110
111 //fprintf(stderr, "--------------------------------------------------------------\n");
112 //tgsi_dump(tokens, 0);
113 if (rpshader == NULL)
114 return -ENOMEM;
115 rpshader->shader.family = radeon_get_family(rscreen->rw);
116 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
117 if (r) {
118 R600_ERR("translation from TGSI failed !\n");
119 return r;
120 }
121 r = r600_bc_build(&rpshader->shader.bc);
122 if (r) {
123 R600_ERR("building bytecode failed !\n");
124 return r;
125 }
126 //fprintf(stderr, "______________________________________________________________\n");
127 return 0;
128 }
129
130 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
131 {
132 struct r600_screen *rscreen = r600_screen(ctx->screen);
133 struct r600_shader *rshader = &rpshader->shader;
134 struct radeon_state *state;
135 unsigned i, tmp;
136
137 rpshader->rstate = radeon_state_decref(rpshader->rstate);
138 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
139 if (state == NULL)
140 return -ENOMEM;
141 for (i = 0; i < 10; i++) {
142 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
143 }
144 /* so far never got proper semantic id from tgsi */
145 for (i = 0; i < 32; i++) {
146 tmp = i << ((i & 3) * 8);
147 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
148 }
149 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
150 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
151 S_028868_STACK_SIZE(rshader->bc.nstack);
152 rpshader->rstate = state;
153 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
154 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
155 rpshader->rstate->nbo = 2;
156 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
157 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
158 return radeon_state_pm4(state);
159 }
160
161 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
162 {
163 const struct pipe_rasterizer_state *rasterizer;
164 struct r600_screen *rscreen = r600_screen(ctx->screen);
165 struct r600_shader *rshader = &rpshader->shader;
166 struct r600_context *rctx = r600_context(ctx);
167 struct radeon_state *state;
168 unsigned i, tmp, exports_ps, num_cout;
169 boolean have_pos = FALSE;
170
171 rasterizer = &rctx->rasterizer->state.rasterizer;
172 rpshader->rstate = radeon_state_decref(rpshader->rstate);
173 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
174 if (state == NULL)
175 return -ENOMEM;
176 for (i = 0; i < rshader->ninput; i++) {
177 tmp = S_028644_SEMANTIC(i);
178 tmp |= S_028644_SEL_CENTROID(1);
179 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
180 have_pos = TRUE;
181 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
182 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
183 rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
184 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
185 }
186 if (rasterizer->sprite_coord_enable & (1 << i)) {
187 tmp |= S_028644_PT_SPRITE_TEX(1);
188 }
189 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
190 }
191
192 exports_ps = 0;
193 num_cout = 0;
194 for (i = 0; i < rshader->noutput; i++) {
195 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
196 exports_ps |= 1;
197 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
198 exports_ps |= (1 << (num_cout+1));
199 num_cout++;
200 }
201 }
202 if (!exports_ps) {
203 /* always at least export 1 component per pixel */
204 exports_ps = 2;
205 }
206 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
207 S_0286CC_PERSP_GRADIENT_ENA(1);
208 if (have_pos) {
209 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) |
210 S_0286CC_BARYC_SAMPLE_CNTL(1);
211 state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1;
212 }
213 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
214 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
215 S_028868_STACK_SIZE(rshader->bc.nstack);
216 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
217 rpshader->rstate = state;
218 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
219 rpshader->rstate->nbo = 1;
220 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
221 return radeon_state_pm4(state);
222 }
223
224 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
225 {
226 struct r600_screen *rscreen = r600_screen(ctx->screen);
227 struct r600_context *rctx = r600_context(ctx);
228 struct r600_shader *rshader = &rpshader->shader;
229 int r;
230
231 /* copy new shader */
232 radeon_bo_decref(rscreen->rw, rpshader->bo);
233 rpshader->bo = NULL;
234 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
235 4096, NULL);
236 if (rpshader->bo == NULL) {
237 return -ENOMEM;
238 }
239 radeon_bo_map(rscreen->rw, rpshader->bo);
240 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
241 radeon_bo_unmap(rscreen->rw, rpshader->bo);
242 /* build state */
243 rshader->flat_shade = rctx->flat_shade;
244 switch (rshader->processor_type) {
245 case TGSI_PROCESSOR_VERTEX:
246 r = r600_pipe_shader_vs(ctx, rpshader);
247 break;
248 case TGSI_PROCESSOR_FRAGMENT:
249 r = r600_pipe_shader_ps(ctx, rpshader);
250 break;
251 default:
252 r = -EINVAL;
253 break;
254 }
255 return r;
256 }
257
258 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
259 {
260 struct r600_context *rctx = r600_context(ctx);
261 int r;
262
263 if (rpshader == NULL)
264 return -EINVAL;
265 /* there should be enough input */
266 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
267 R600_ERR("%d resources provided, expecting %d\n",
268 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
269 return -EINVAL;
270 }
271 r = r600_shader_update(ctx, &rpshader->shader);
272 if (r)
273 return r;
274 return r600_pipe_shader(ctx, rpshader);
275 }
276
277 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
278 {
279 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
280 int j;
281
282 if (i->Instruction.NumDstRegs > 1) {
283 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
284 return -EINVAL;
285 }
286 if (i->Instruction.Predicate) {
287 R600_ERR("predicate unsupported\n");
288 return -EINVAL;
289 }
290 #if 0
291 if (i->Instruction.Label) {
292 R600_ERR("label unsupported\n");
293 return -EINVAL;
294 }
295 #endif
296 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
297 if (i->Src[j].Register.Dimension ||
298 i->Src[j].Register.Absolute) {
299 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
300 i->Src[j].Register.Dimension,
301 i->Src[j].Register.Absolute);
302 return -EINVAL;
303 }
304 }
305 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
306 if (i->Dst[j].Register.Dimension) {
307 R600_ERR("unsupported dst (dimension)\n");
308 return -EINVAL;
309 }
310 }
311 return 0;
312 }
313
314 static int tgsi_declaration(struct r600_shader_ctx *ctx)
315 {
316 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
317 struct r600_bc_vtx vtx;
318 unsigned i;
319 int r;
320
321 switch (d->Declaration.File) {
322 case TGSI_FILE_INPUT:
323 i = ctx->shader->ninput++;
324 ctx->shader->input[i].name = d->Semantic.Name;
325 ctx->shader->input[i].sid = d->Semantic.Index;
326 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
327 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
328 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
329 /* turn input into fetch */
330 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
331 vtx.inst = 0;
332 vtx.fetch_type = 0;
333 vtx.buffer_id = i;
334 /* register containing the index into the buffer */
335 vtx.src_gpr = 0;
336 vtx.src_sel_x = 0;
337 vtx.mega_fetch_count = 0x1F;
338 vtx.dst_gpr = ctx->shader->input[i].gpr;
339 vtx.dst_sel_x = 0;
340 vtx.dst_sel_y = 1;
341 vtx.dst_sel_z = 2;
342 vtx.dst_sel_w = 3;
343 r = r600_bc_add_vtx(ctx->bc, &vtx);
344 if (r)
345 return r;
346 }
347 break;
348 case TGSI_FILE_OUTPUT:
349 i = ctx->shader->noutput++;
350 ctx->shader->output[i].name = d->Semantic.Name;
351 ctx->shader->output[i].sid = d->Semantic.Index;
352 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
353 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
354 break;
355 case TGSI_FILE_CONSTANT:
356 case TGSI_FILE_TEMPORARY:
357 case TGSI_FILE_SAMPLER:
358 case TGSI_FILE_ADDRESS:
359 break;
360 default:
361 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
362 return -EINVAL;
363 }
364 return 0;
365 }
366
367 static int r600_get_temp(struct r600_shader_ctx *ctx)
368 {
369 return ctx->temp_reg + ctx->max_driver_temp_used++;
370 }
371
372 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
373 {
374 struct tgsi_full_immediate *immediate;
375 struct r600_shader_ctx ctx;
376 struct r600_bc_output output[32];
377 unsigned output_done, noutput;
378 unsigned opcode;
379 int i, r = 0, pos0;
380
381 ctx.bc = &shader->bc;
382 ctx.shader = shader;
383 r = r600_bc_init(ctx.bc, shader->family);
384 if (r)
385 return r;
386 ctx.tokens = tokens;
387 tgsi_scan_shader(tokens, &ctx.info);
388 tgsi_parse_init(&ctx.parse, tokens);
389 ctx.type = ctx.parse.FullHeader.Processor.Processor;
390 shader->processor_type = ctx.type;
391
392 /* register allocations */
393 /* Values [0,127] correspond to GPR[0..127].
394 * Values [128,159] correspond to constant buffer bank 0
395 * Values [160,191] correspond to constant buffer bank 1
396 * Values [256,511] correspond to cfile constants c[0..255].
397 * Other special values are shown in the list below.
398 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
399 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
400 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
401 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
402 * 248 SQ_ALU_SRC_0: special constant 0.0.
403 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
404 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
405 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
406 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
407 * 253 SQ_ALU_SRC_LITERAL: literal constant.
408 * 254 SQ_ALU_SRC_PV: previous vector result.
409 * 255 SQ_ALU_SRC_PS: previous scalar result.
410 */
411 for (i = 0; i < TGSI_FILE_COUNT; i++) {
412 ctx.file_offset[i] = 0;
413 }
414 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
415 ctx.file_offset[TGSI_FILE_INPUT] = 1;
416 }
417 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
418 ctx.info.file_count[TGSI_FILE_INPUT];
419 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
420 ctx.info.file_count[TGSI_FILE_OUTPUT];
421 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
422 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
423 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
424 ctx.info.file_count[TGSI_FILE_TEMPORARY];
425
426 ctx.nliterals = 0;
427 ctx.literals = NULL;
428
429 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
430 tgsi_parse_token(&ctx.parse);
431 switch (ctx.parse.FullToken.Token.Type) {
432 case TGSI_TOKEN_TYPE_IMMEDIATE:
433 immediate = &ctx.parse.FullToken.FullImmediate;
434 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
435 if(ctx.literals == NULL) {
436 r = -ENOMEM;
437 goto out_err;
438 }
439 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
440 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
441 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
442 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
443 ctx.nliterals++;
444 break;
445 case TGSI_TOKEN_TYPE_DECLARATION:
446 r = tgsi_declaration(&ctx);
447 if (r)
448 goto out_err;
449 break;
450 case TGSI_TOKEN_TYPE_INSTRUCTION:
451 r = tgsi_is_supported(&ctx);
452 if (r)
453 goto out_err;
454 ctx.max_driver_temp_used = 0;
455 /* reserve first tmp for everyone */
456 r600_get_temp(&ctx);
457 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
458 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
459 r = ctx.inst_info->process(&ctx);
460 if (r)
461 goto out_err;
462 r = r600_bc_add_literal(ctx.bc, ctx.value);
463 if (r)
464 goto out_err;
465 break;
466 default:
467 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
468 r = -EINVAL;
469 goto out_err;
470 }
471 }
472 /* export output */
473 noutput = shader->noutput;
474 for (i = 0, pos0 = 0; i < noutput; i++) {
475 memset(&output[i], 0, sizeof(struct r600_bc_output));
476 output[i].gpr = shader->output[i].gpr;
477 output[i].elem_size = 3;
478 output[i].swizzle_x = 0;
479 output[i].swizzle_y = 1;
480 output[i].swizzle_z = 2;
481 output[i].swizzle_w = 3;
482 output[i].barrier = 1;
483 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
484 output[i].array_base = i - pos0;
485 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
486 switch (ctx.type) {
487 case TGSI_PROCESSOR_VERTEX:
488 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
489 output[i].array_base = 60;
490 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
491 /* position doesn't count in array_base */
492 pos0++;
493 }
494 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
495 output[i].array_base = 61;
496 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
497 /* position doesn't count in array_base */
498 pos0++;
499 }
500 break;
501 case TGSI_PROCESSOR_FRAGMENT:
502 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
503 output[i].array_base = shader->output[i].sid;
504 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
505 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
506 output[i].array_base = 61;
507 output[i].swizzle_x = 2;
508 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
509 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
510 } else {
511 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
512 r = -EINVAL;
513 goto out_err;
514 }
515 break;
516 default:
517 R600_ERR("unsupported processor type %d\n", ctx.type);
518 r = -EINVAL;
519 goto out_err;
520 }
521 }
522 /* add fake param output for vertex shader if no param is exported */
523 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
524 for (i = 0, pos0 = 0; i < noutput; i++) {
525 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
526 pos0 = 1;
527 break;
528 }
529 }
530 if (!pos0) {
531 memset(&output[i], 0, sizeof(struct r600_bc_output));
532 output[i].gpr = 0;
533 output[i].elem_size = 3;
534 output[i].swizzle_x = 0;
535 output[i].swizzle_y = 1;
536 output[i].swizzle_z = 2;
537 output[i].swizzle_w = 3;
538 output[i].barrier = 1;
539 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
540 output[i].array_base = 0;
541 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
542 noutput++;
543 }
544 }
545 /* add fake pixel export */
546 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
547 memset(&output[0], 0, sizeof(struct r600_bc_output));
548 output[0].gpr = 0;
549 output[0].elem_size = 3;
550 output[0].swizzle_x = 7;
551 output[0].swizzle_y = 7;
552 output[0].swizzle_z = 7;
553 output[0].swizzle_w = 7;
554 output[0].barrier = 1;
555 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
556 output[0].array_base = 0;
557 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
558 noutput++;
559 }
560 /* set export done on last export of each type */
561 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
562 if (i == (noutput - 1)) {
563 output[i].end_of_program = 1;
564 }
565 if (!(output_done & (1 << output[i].type))) {
566 output_done |= (1 << output[i].type);
567 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
568 }
569 }
570 /* add output to bytecode */
571 for (i = 0; i < noutput; i++) {
572 r = r600_bc_add_output(ctx.bc, &output[i]);
573 if (r)
574 goto out_err;
575 }
576 free(ctx.literals);
577 tgsi_parse_free(&ctx.parse);
578 return 0;
579 out_err:
580 free(ctx.literals);
581 tgsi_parse_free(&ctx.parse);
582 return r;
583 }
584
585 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
586 {
587 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
588 return -EINVAL;
589 }
590
591 static int tgsi_end(struct r600_shader_ctx *ctx)
592 {
593 return 0;
594 }
595
596 static int tgsi_src(struct r600_shader_ctx *ctx,
597 const struct tgsi_full_src_register *tgsi_src,
598 struct r600_bc_alu_src *r600_src)
599 {
600 int index;
601 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
602 r600_src->sel = tgsi_src->Register.Index;
603 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
604 r600_src->sel = 0;
605 index = tgsi_src->Register.Index;
606 ctx->value[0] = ctx->literals[index * 4 + 0];
607 ctx->value[1] = ctx->literals[index * 4 + 1];
608 ctx->value[2] = ctx->literals[index * 4 + 2];
609 ctx->value[3] = ctx->literals[index * 4 + 3];
610 }
611 if (tgsi_src->Register.Indirect)
612 r600_src->rel = V_SQ_REL_RELATIVE;
613 r600_src->neg = tgsi_src->Register.Negate;
614 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
615 return 0;
616 }
617
618 static int tgsi_dst(struct r600_shader_ctx *ctx,
619 const struct tgsi_full_dst_register *tgsi_dst,
620 unsigned swizzle,
621 struct r600_bc_alu_dst *r600_dst)
622 {
623 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
624
625 r600_dst->sel = tgsi_dst->Register.Index;
626 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
627 r600_dst->chan = swizzle;
628 r600_dst->write = 1;
629 if (tgsi_dst->Register.Indirect)
630 r600_dst->rel = V_SQ_REL_RELATIVE;
631 if (inst->Instruction.Saturate) {
632 r600_dst->clamp = 1;
633 }
634 return 0;
635 }
636
637 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
638 {
639 switch (swizzle) {
640 case 0:
641 return tgsi_src->Register.SwizzleX;
642 case 1:
643 return tgsi_src->Register.SwizzleY;
644 case 2:
645 return tgsi_src->Register.SwizzleZ;
646 case 3:
647 return tgsi_src->Register.SwizzleW;
648 default:
649 return 0;
650 }
651 }
652
653 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
654 {
655 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
656 struct r600_bc_alu alu;
657 int i, j, k, nconst, r;
658
659 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
660 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
661 nconst++;
662 }
663 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
664 if (r) {
665 return r;
666 }
667 }
668 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
669 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
670 int treg = r600_get_temp(ctx);
671 for (k = 0; k < 4; k++) {
672 memset(&alu, 0, sizeof(struct r600_bc_alu));
673 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
674 alu.src[0].sel = r600_src[j].sel;
675 alu.src[0].chan = k;
676 alu.dst.sel = treg;
677 alu.dst.chan = k;
678 alu.dst.write = 1;
679 if (k == 3)
680 alu.last = 1;
681 r = r600_bc_add_alu(ctx->bc, &alu);
682 if (r)
683 return r;
684 }
685 r600_src[j].sel = treg;
686 j--;
687 }
688 }
689 return 0;
690 }
691
692 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
693 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
694 {
695 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
696 struct r600_bc_alu alu;
697 int i, j, k, nliteral, r;
698
699 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
700 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
701 nliteral++;
702 }
703 }
704 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
705 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
706 int treg = r600_get_temp(ctx);
707 for (k = 0; k < 4; k++) {
708 memset(&alu, 0, sizeof(struct r600_bc_alu));
709 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
710 alu.src[0].sel = r600_src[j].sel;
711 alu.src[0].chan = k;
712 alu.dst.sel = treg;
713 alu.dst.chan = k;
714 alu.dst.write = 1;
715 if (k == 3)
716 alu.last = 1;
717 r = r600_bc_add_alu(ctx->bc, &alu);
718 if (r)
719 return r;
720 }
721 r = r600_bc_add_literal(ctx->bc, ctx->value);
722 if (r)
723 return r;
724 r600_src[j].sel = treg;
725 j++;
726 }
727 }
728 return 0;
729 }
730
731 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
732 {
733 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
734 struct r600_bc_alu_src r600_src[3];
735 struct r600_bc_alu alu;
736 int i, j, r;
737 int lasti = 0;
738
739 for (i = 0; i < 4; i++) {
740 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
741 lasti = i;
742 }
743 }
744
745 r = tgsi_split_constant(ctx, r600_src);
746 if (r)
747 return r;
748 for (i = 0; i < lasti + 1; i++) {
749 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
750 continue;
751
752 memset(&alu, 0, sizeof(struct r600_bc_alu));
753 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
754 if (r)
755 return r;
756
757 alu.inst = ctx->inst_info->r600_opcode;
758 if (!swap) {
759 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
760 alu.src[j] = r600_src[j];
761 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
762 }
763 } else {
764 alu.src[0] = r600_src[1];
765 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
766
767 alu.src[1] = r600_src[0];
768 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
769 }
770 /* handle some special cases */
771 switch (ctx->inst_info->tgsi_opcode) {
772 case TGSI_OPCODE_SUB:
773 alu.src[1].neg = 1;
774 break;
775 case TGSI_OPCODE_ABS:
776 alu.src[0].abs = 1;
777 break;
778 default:
779 break;
780 }
781 if (i == lasti) {
782 alu.last = 1;
783 }
784 r = r600_bc_add_alu(ctx->bc, &alu);
785 if (r)
786 return r;
787 }
788 return 0;
789 }
790
791 static int tgsi_op2(struct r600_shader_ctx *ctx)
792 {
793 return tgsi_op2_s(ctx, 0);
794 }
795
796 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
797 {
798 return tgsi_op2_s(ctx, 1);
799 }
800
801 /*
802 * r600 - trunc to -PI..PI range
803 * r700 - normalize by dividing by 2PI
804 * see fdo bug 27901
805 */
806 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
807 struct r600_bc_alu_src r600_src[3])
808 {
809 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
810 int r;
811 uint32_t lit_vals[4];
812 struct r600_bc_alu alu;
813
814 memset(lit_vals, 0, 4*4);
815 r = tgsi_split_constant(ctx, r600_src);
816 if (r)
817 return r;
818
819 r = tgsi_split_literal_constant(ctx, r600_src);
820 if (r)
821 return r;
822
823 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
824 lit_vals[1] = fui(0.5f);
825
826 memset(&alu, 0, sizeof(struct r600_bc_alu));
827 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
828 alu.is_op3 = 1;
829
830 alu.dst.chan = 0;
831 alu.dst.sel = ctx->temp_reg;
832 alu.dst.write = 1;
833
834 alu.src[0] = r600_src[0];
835 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
836
837 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
838 alu.src[1].chan = 0;
839 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
840 alu.src[2].chan = 1;
841 alu.last = 1;
842 r = r600_bc_add_alu(ctx->bc, &alu);
843 if (r)
844 return r;
845 r = r600_bc_add_literal(ctx->bc, lit_vals);
846 if (r)
847 return r;
848
849 memset(&alu, 0, sizeof(struct r600_bc_alu));
850 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
851
852 alu.dst.chan = 0;
853 alu.dst.sel = ctx->temp_reg;
854 alu.dst.write = 1;
855
856 alu.src[0].sel = ctx->temp_reg;
857 alu.src[0].chan = 0;
858 alu.last = 1;
859 r = r600_bc_add_alu(ctx->bc, &alu);
860 if (r)
861 return r;
862
863 if (ctx->bc->chiprev == 0) {
864 lit_vals[0] = fui(3.1415926535897f * 2.0f);
865 lit_vals[1] = fui(-3.1415926535897f);
866 } else {
867 lit_vals[0] = fui(1.0f);
868 lit_vals[1] = fui(-0.5f);
869 }
870
871 memset(&alu, 0, sizeof(struct r600_bc_alu));
872 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
873 alu.is_op3 = 1;
874
875 alu.dst.chan = 0;
876 alu.dst.sel = ctx->temp_reg;
877 alu.dst.write = 1;
878
879 alu.src[0].sel = ctx->temp_reg;
880 alu.src[0].chan = 0;
881
882 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
883 alu.src[1].chan = 0;
884 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
885 alu.src[2].chan = 1;
886 alu.last = 1;
887 r = r600_bc_add_alu(ctx->bc, &alu);
888 if (r)
889 return r;
890 r = r600_bc_add_literal(ctx->bc, lit_vals);
891 if (r)
892 return r;
893 return 0;
894 }
895
896 static int tgsi_trig(struct r600_shader_ctx *ctx)
897 {
898 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
899 struct r600_bc_alu_src r600_src[3];
900 struct r600_bc_alu alu;
901 int i, r;
902 int lasti = 0;
903
904 r = tgsi_setup_trig(ctx, r600_src);
905 if (r)
906 return r;
907
908 memset(&alu, 0, sizeof(struct r600_bc_alu));
909 alu.inst = ctx->inst_info->r600_opcode;
910 alu.dst.chan = 0;
911 alu.dst.sel = ctx->temp_reg;
912 alu.dst.write = 1;
913
914 alu.src[0].sel = ctx->temp_reg;
915 alu.src[0].chan = 0;
916 alu.last = 1;
917 r = r600_bc_add_alu(ctx->bc, &alu);
918 if (r)
919 return r;
920
921 /* replicate result */
922 for (i = 0; i < 4; i++) {
923 if (inst->Dst[0].Register.WriteMask & (1 << i))
924 lasti = i;
925 }
926 for (i = 0; i < lasti + 1; i++) {
927 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
928 continue;
929
930 memset(&alu, 0, sizeof(struct r600_bc_alu));
931 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
932
933 alu.src[0].sel = ctx->temp_reg;
934 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
935 if (r)
936 return r;
937 if (i == lasti)
938 alu.last = 1;
939 r = r600_bc_add_alu(ctx->bc, &alu);
940 if (r)
941 return r;
942 }
943 return 0;
944 }
945
946 static int tgsi_scs(struct r600_shader_ctx *ctx)
947 {
948 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
949 struct r600_bc_alu_src r600_src[3];
950 struct r600_bc_alu alu;
951 int r;
952
953 r = tgsi_setup_trig(ctx, r600_src);
954 if (r)
955 return r;
956
957
958 /* dst.x = COS */
959 memset(&alu, 0, sizeof(struct r600_bc_alu));
960 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
961 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
962 if (r)
963 return r;
964
965 alu.src[0].sel = ctx->temp_reg;
966 alu.src[0].chan = 0;
967 alu.last = 1;
968 r = r600_bc_add_alu(ctx->bc, &alu);
969 if (r)
970 return r;
971
972 /* dst.y = SIN */
973 memset(&alu, 0, sizeof(struct r600_bc_alu));
974 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
975 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
976 if (r)
977 return r;
978
979 alu.src[0].sel = ctx->temp_reg;
980 alu.src[0].chan = 0;
981 alu.last = 1;
982 r = r600_bc_add_alu(ctx->bc, &alu);
983 if (r)
984 return r;
985 return 0;
986 }
987
988 static int tgsi_kill(struct r600_shader_ctx *ctx)
989 {
990 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
991 struct r600_bc_alu alu;
992 int i, r;
993
994 for (i = 0; i < 4; i++) {
995 memset(&alu, 0, sizeof(struct r600_bc_alu));
996 alu.inst = ctx->inst_info->r600_opcode;
997
998 alu.dst.chan = i;
999
1000 alu.src[0].sel = V_SQ_ALU_SRC_0;
1001
1002 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1003 alu.src[1].sel = V_SQ_ALU_SRC_1;
1004 alu.src[1].neg = 1;
1005 } else {
1006 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1007 if (r)
1008 return r;
1009 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1010 }
1011 if (i == 3) {
1012 alu.last = 1;
1013 }
1014 r = r600_bc_add_alu(ctx->bc, &alu);
1015 if (r)
1016 return r;
1017 }
1018 r = r600_bc_add_literal(ctx->bc, ctx->value);
1019 if (r)
1020 return r;
1021
1022 /* kill must be last in ALU */
1023 ctx->bc->force_add_cf = 1;
1024 ctx->shader->uses_kill = TRUE;
1025 return 0;
1026 }
1027
1028 static int tgsi_lit(struct r600_shader_ctx *ctx)
1029 {
1030 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1031 struct r600_bc_alu alu;
1032 struct r600_bc_alu_src r600_src[3];
1033 int r;
1034
1035 r = tgsi_split_constant(ctx, r600_src);
1036 if (r)
1037 return r;
1038 r = tgsi_split_literal_constant(ctx, r600_src);
1039 if (r)
1040 return r;
1041
1042 /* dst.x, <- 1.0 */
1043 memset(&alu, 0, sizeof(struct r600_bc_alu));
1044 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1045 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1046 alu.src[0].chan = 0;
1047 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1048 if (r)
1049 return r;
1050 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1051 r = r600_bc_add_alu(ctx->bc, &alu);
1052 if (r)
1053 return r;
1054
1055 /* dst.y = max(src.x, 0.0) */
1056 memset(&alu, 0, sizeof(struct r600_bc_alu));
1057 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
1058 alu.src[0] = r600_src[0];
1059 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1060 alu.src[1].chan = 0;
1061 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1062 if (r)
1063 return r;
1064 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1065 r = r600_bc_add_alu(ctx->bc, &alu);
1066 if (r)
1067 return r;
1068
1069 /* dst.w, <- 1.0 */
1070 memset(&alu, 0, sizeof(struct r600_bc_alu));
1071 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1072 alu.src[0].sel = V_SQ_ALU_SRC_1;
1073 alu.src[0].chan = 0;
1074 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1075 if (r)
1076 return r;
1077 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1078 alu.last = 1;
1079 r = r600_bc_add_alu(ctx->bc, &alu);
1080 if (r)
1081 return r;
1082
1083 r = r600_bc_add_literal(ctx->bc, ctx->value);
1084 if (r)
1085 return r;
1086
1087 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1088 {
1089 int chan;
1090 int sel;
1091
1092 /* dst.z = log(src.y) */
1093 memset(&alu, 0, sizeof(struct r600_bc_alu));
1094 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
1095 alu.src[0] = r600_src[0];
1096 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1097 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1098 if (r)
1099 return r;
1100 alu.last = 1;
1101 r = r600_bc_add_alu(ctx->bc, &alu);
1102 if (r)
1103 return r;
1104
1105 r = r600_bc_add_literal(ctx->bc, ctx->value);
1106 if (r)
1107 return r;
1108
1109 chan = alu.dst.chan;
1110 sel = alu.dst.sel;
1111
1112 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1113 memset(&alu, 0, sizeof(struct r600_bc_alu));
1114 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
1115 alu.src[0] = r600_src[0];
1116 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1117 alu.src[1].sel = sel;
1118 alu.src[1].chan = chan;
1119
1120 alu.src[2] = r600_src[0];
1121 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1122 alu.dst.sel = ctx->temp_reg;
1123 alu.dst.chan = 0;
1124 alu.dst.write = 1;
1125 alu.is_op3 = 1;
1126 alu.last = 1;
1127 r = r600_bc_add_alu(ctx->bc, &alu);
1128 if (r)
1129 return r;
1130
1131 r = r600_bc_add_literal(ctx->bc, ctx->value);
1132 if (r)
1133 return r;
1134 /* dst.z = exp(tmp.x) */
1135 memset(&alu, 0, sizeof(struct r600_bc_alu));
1136 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1137 alu.src[0].sel = ctx->temp_reg;
1138 alu.src[0].chan = 0;
1139 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1140 if (r)
1141 return r;
1142 alu.last = 1;
1143 r = r600_bc_add_alu(ctx->bc, &alu);
1144 if (r)
1145 return r;
1146 }
1147 return 0;
1148 }
1149
1150 static int tgsi_trans(struct r600_shader_ctx *ctx)
1151 {
1152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1153 struct r600_bc_alu alu;
1154 int i, j, r;
1155
1156 for (i = 0; i < 4; i++) {
1157 memset(&alu, 0, sizeof(struct r600_bc_alu));
1158 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1159 alu.inst = ctx->inst_info->r600_opcode;
1160 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1161 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1162 if (r)
1163 return r;
1164 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1165 }
1166 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1167 if (r)
1168 return r;
1169 alu.last = 1;
1170 r = r600_bc_add_alu(ctx->bc, &alu);
1171 if (r)
1172 return r;
1173 }
1174 }
1175 return 0;
1176 }
1177
1178 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1179 {
1180 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1181 struct r600_bc_alu alu;
1182 int i, r;
1183
1184 for (i = 0; i < 4; i++) {
1185 memset(&alu, 0, sizeof(struct r600_bc_alu));
1186 alu.src[0].sel = ctx->temp_reg;
1187 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1188 alu.dst.chan = i;
1189 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1190 if (r)
1191 return r;
1192 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1193 if (i == 3)
1194 alu.last = 1;
1195 r = r600_bc_add_alu(ctx->bc, &alu);
1196 if (r)
1197 return r;
1198 }
1199 return 0;
1200 }
1201
1202 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1203 {
1204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1205 struct r600_bc_alu alu;
1206 int i, r;
1207
1208 memset(&alu, 0, sizeof(struct r600_bc_alu));
1209 alu.inst = ctx->inst_info->r600_opcode;
1210 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1211 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1212 if (r)
1213 return r;
1214 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1215 }
1216 alu.dst.sel = ctx->temp_reg;
1217 alu.dst.write = 1;
1218 alu.last = 1;
1219 r = r600_bc_add_alu(ctx->bc, &alu);
1220 if (r)
1221 return r;
1222 r = r600_bc_add_literal(ctx->bc, ctx->value);
1223 if (r)
1224 return r;
1225 /* replicate result */
1226 return tgsi_helper_tempx_replicate(ctx);
1227 }
1228
1229 static int tgsi_pow(struct r600_shader_ctx *ctx)
1230 {
1231 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1232 struct r600_bc_alu alu;
1233 int r;
1234
1235 /* LOG2(a) */
1236 memset(&alu, 0, sizeof(struct r600_bc_alu));
1237 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1238 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1239 if (r)
1240 return r;
1241 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1242 alu.dst.sel = ctx->temp_reg;
1243 alu.dst.write = 1;
1244 alu.last = 1;
1245 r = r600_bc_add_alu(ctx->bc, &alu);
1246 if (r)
1247 return r;
1248 r = r600_bc_add_literal(ctx->bc,ctx->value);
1249 if (r)
1250 return r;
1251 /* b * LOG2(a) */
1252 memset(&alu, 0, sizeof(struct r600_bc_alu));
1253 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1254 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1255 if (r)
1256 return r;
1257 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1258 alu.src[1].sel = ctx->temp_reg;
1259 alu.dst.sel = ctx->temp_reg;
1260 alu.dst.write = 1;
1261 alu.last = 1;
1262 r = r600_bc_add_alu(ctx->bc, &alu);
1263 if (r)
1264 return r;
1265 r = r600_bc_add_literal(ctx->bc,ctx->value);
1266 if (r)
1267 return r;
1268 /* POW(a,b) = EXP2(b * LOG2(a))*/
1269 memset(&alu, 0, sizeof(struct r600_bc_alu));
1270 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1271 alu.src[0].sel = ctx->temp_reg;
1272 alu.dst.sel = ctx->temp_reg;
1273 alu.dst.write = 1;
1274 alu.last = 1;
1275 r = r600_bc_add_alu(ctx->bc, &alu);
1276 if (r)
1277 return r;
1278 r = r600_bc_add_literal(ctx->bc,ctx->value);
1279 if (r)
1280 return r;
1281 return tgsi_helper_tempx_replicate(ctx);
1282 }
1283
1284 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1285 {
1286 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1287 struct r600_bc_alu alu;
1288 struct r600_bc_alu_src r600_src[3];
1289 int i, r;
1290
1291 r = tgsi_split_constant(ctx, r600_src);
1292 if (r)
1293 return r;
1294
1295 /* tmp = (src > 0 ? 1 : src) */
1296 for (i = 0; i < 4; i++) {
1297 memset(&alu, 0, sizeof(struct r600_bc_alu));
1298 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1299 alu.is_op3 = 1;
1300
1301 alu.dst.sel = ctx->temp_reg;
1302 alu.dst.chan = i;
1303
1304 alu.src[0] = r600_src[0];
1305 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1306
1307 alu.src[1].sel = V_SQ_ALU_SRC_1;
1308
1309 alu.src[2] = r600_src[0];
1310 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1311 if (i == 3)
1312 alu.last = 1;
1313 r = r600_bc_add_alu(ctx->bc, &alu);
1314 if (r)
1315 return r;
1316 }
1317 r = r600_bc_add_literal(ctx->bc, ctx->value);
1318 if (r)
1319 return r;
1320
1321 /* dst = (-tmp > 0 ? -1 : tmp) */
1322 for (i = 0; i < 4; i++) {
1323 memset(&alu, 0, sizeof(struct r600_bc_alu));
1324 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1325 alu.is_op3 = 1;
1326 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1327 if (r)
1328 return r;
1329
1330 alu.src[0].sel = ctx->temp_reg;
1331 alu.src[0].chan = i;
1332 alu.src[0].neg = 1;
1333
1334 alu.src[1].sel = V_SQ_ALU_SRC_1;
1335 alu.src[1].neg = 1;
1336
1337 alu.src[2].sel = ctx->temp_reg;
1338 alu.src[2].chan = i;
1339
1340 if (i == 3)
1341 alu.last = 1;
1342 r = r600_bc_add_alu(ctx->bc, &alu);
1343 if (r)
1344 return r;
1345 }
1346 return 0;
1347 }
1348
1349 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1350 {
1351 struct r600_bc_alu alu;
1352 int i, r;
1353
1354 r = r600_bc_add_literal(ctx->bc, ctx->value);
1355 if (r)
1356 return r;
1357 for (i = 0; i < 4; i++) {
1358 memset(&alu, 0, sizeof(struct r600_bc_alu));
1359 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1360 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1361 alu.dst.chan = i;
1362 } else {
1363 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1364 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1365 if (r)
1366 return r;
1367 alu.src[0].sel = ctx->temp_reg;
1368 alu.src[0].chan = i;
1369 }
1370 if (i == 3) {
1371 alu.last = 1;
1372 }
1373 r = r600_bc_add_alu(ctx->bc, &alu);
1374 if (r)
1375 return r;
1376 }
1377 return 0;
1378 }
1379
1380 static int tgsi_op3(struct r600_shader_ctx *ctx)
1381 {
1382 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1383 struct r600_bc_alu_src r600_src[3];
1384 struct r600_bc_alu alu;
1385 int i, j, r;
1386
1387 r = tgsi_split_constant(ctx, r600_src);
1388 if (r)
1389 return r;
1390 /* do it in 2 step as op3 doesn't support writemask */
1391 for (i = 0; i < 4; i++) {
1392 memset(&alu, 0, sizeof(struct r600_bc_alu));
1393 alu.inst = ctx->inst_info->r600_opcode;
1394 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1395 alu.src[j] = r600_src[j];
1396 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1397 }
1398 alu.dst.sel = ctx->temp_reg;
1399 alu.dst.chan = i;
1400 alu.dst.write = 1;
1401 alu.is_op3 = 1;
1402 if (i == 3) {
1403 alu.last = 1;
1404 }
1405 r = r600_bc_add_alu(ctx->bc, &alu);
1406 if (r)
1407 return r;
1408 }
1409 return tgsi_helper_copy(ctx, inst);
1410 }
1411
1412 static int tgsi_dp(struct r600_shader_ctx *ctx)
1413 {
1414 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1415 struct r600_bc_alu_src r600_src[3];
1416 struct r600_bc_alu alu;
1417 int i, j, r;
1418
1419 r = tgsi_split_constant(ctx, r600_src);
1420 if (r)
1421 return r;
1422 for (i = 0; i < 4; i++) {
1423 memset(&alu, 0, sizeof(struct r600_bc_alu));
1424 alu.inst = ctx->inst_info->r600_opcode;
1425 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1426 alu.src[j] = r600_src[j];
1427 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1428 }
1429 alu.dst.sel = ctx->temp_reg;
1430 alu.dst.chan = i;
1431 alu.dst.write = 1;
1432 /* handle some special cases */
1433 switch (ctx->inst_info->tgsi_opcode) {
1434 case TGSI_OPCODE_DP2:
1435 if (i > 1) {
1436 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1437 alu.src[0].chan = alu.src[1].chan = 0;
1438 }
1439 break;
1440 case TGSI_OPCODE_DP3:
1441 if (i > 2) {
1442 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1443 alu.src[0].chan = alu.src[1].chan = 0;
1444 }
1445 break;
1446 case TGSI_OPCODE_DPH:
1447 if (i == 3) {
1448 alu.src[0].sel = V_SQ_ALU_SRC_1;
1449 alu.src[0].chan = 0;
1450 alu.src[0].neg = 0;
1451 }
1452 break;
1453 default:
1454 break;
1455 }
1456 if (i == 3) {
1457 alu.last = 1;
1458 }
1459 r = r600_bc_add_alu(ctx->bc, &alu);
1460 if (r)
1461 return r;
1462 }
1463 return tgsi_helper_copy(ctx, inst);
1464 }
1465
1466 static int tgsi_tex(struct r600_shader_ctx *ctx)
1467 {
1468 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1469 struct r600_bc_tex tex;
1470 struct r600_bc_alu alu;
1471 unsigned src_gpr;
1472 int r, i;
1473
1474 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1475
1476 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1477 /* Add perspective divide */
1478 memset(&alu, 0, sizeof(struct r600_bc_alu));
1479 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1480 alu.src[0].sel = src_gpr;
1481 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1482 alu.dst.sel = ctx->temp_reg;
1483 alu.dst.chan = 3;
1484 alu.last = 1;
1485 alu.dst.write = 1;
1486 r = r600_bc_add_alu(ctx->bc, &alu);
1487 if (r)
1488 return r;
1489
1490 for (i = 0; i < 3; i++) {
1491 memset(&alu, 0, sizeof(struct r600_bc_alu));
1492 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1493 alu.src[0].sel = ctx->temp_reg;
1494 alu.src[0].chan = 3;
1495 alu.src[1].sel = src_gpr;
1496 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1497 alu.dst.sel = ctx->temp_reg;
1498 alu.dst.chan = i;
1499 alu.dst.write = 1;
1500 r = r600_bc_add_alu(ctx->bc, &alu);
1501 if (r)
1502 return r;
1503 }
1504 memset(&alu, 0, sizeof(struct r600_bc_alu));
1505 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1506 alu.src[0].sel = V_SQ_ALU_SRC_1;
1507 alu.src[0].chan = 0;
1508 alu.dst.sel = ctx->temp_reg;
1509 alu.dst.chan = 3;
1510 alu.last = 1;
1511 alu.dst.write = 1;
1512 r = r600_bc_add_alu(ctx->bc, &alu);
1513 if (r)
1514 return r;
1515 src_gpr = ctx->temp_reg;
1516 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1517 for (i = 0; i < 4; i++) {
1518 memset(&alu, 0, sizeof(struct r600_bc_alu));
1519 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1520 alu.src[0].sel = src_gpr;
1521 alu.src[0].chan = i;
1522 alu.dst.sel = ctx->temp_reg;
1523 alu.dst.chan = i;
1524 if (i == 3)
1525 alu.last = 1;
1526 alu.dst.write = 1;
1527 r = r600_bc_add_alu(ctx->bc, &alu);
1528 if (r)
1529 return r;
1530 }
1531 src_gpr = ctx->temp_reg;
1532 }
1533
1534 memset(&tex, 0, sizeof(struct r600_bc_tex));
1535 tex.inst = ctx->inst_info->r600_opcode;
1536 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1537 tex.sampler_id = tex.resource_id;
1538 tex.src_gpr = src_gpr;
1539 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1540 tex.dst_sel_x = 0;
1541 tex.dst_sel_y = 1;
1542 tex.dst_sel_z = 2;
1543 tex.dst_sel_w = 3;
1544 tex.src_sel_x = 0;
1545 tex.src_sel_y = 1;
1546 tex.src_sel_z = 2;
1547 tex.src_sel_w = 3;
1548
1549 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1550 tex.coord_type_x = 1;
1551 tex.coord_type_y = 1;
1552 tex.coord_type_z = 1;
1553 tex.coord_type_w = 1;
1554 }
1555 return r600_bc_add_tex(ctx->bc, &tex);
1556 }
1557
1558 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1559 {
1560 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1561 struct r600_bc_alu_src r600_src[3];
1562 struct r600_bc_alu alu;
1563 unsigned i;
1564 int r;
1565
1566 r = tgsi_split_constant(ctx, r600_src);
1567 if (r)
1568 return r;
1569 /* 1 - src0 */
1570 for (i = 0; i < 4; i++) {
1571 memset(&alu, 0, sizeof(struct r600_bc_alu));
1572 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1573 alu.src[0].sel = V_SQ_ALU_SRC_1;
1574 alu.src[0].chan = 0;
1575 alu.src[1] = r600_src[0];
1576 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1577 alu.src[1].neg = 1;
1578 alu.dst.sel = ctx->temp_reg;
1579 alu.dst.chan = i;
1580 if (i == 3) {
1581 alu.last = 1;
1582 }
1583 alu.dst.write = 1;
1584 r = r600_bc_add_alu(ctx->bc, &alu);
1585 if (r)
1586 return r;
1587 }
1588 r = r600_bc_add_literal(ctx->bc, ctx->value);
1589 if (r)
1590 return r;
1591
1592 /* (1 - src0) * src2 */
1593 for (i = 0; i < 4; i++) {
1594 memset(&alu, 0, sizeof(struct r600_bc_alu));
1595 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1596 alu.src[0].sel = ctx->temp_reg;
1597 alu.src[0].chan = i;
1598 alu.src[1] = r600_src[2];
1599 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1600 alu.dst.sel = ctx->temp_reg;
1601 alu.dst.chan = i;
1602 if (i == 3) {
1603 alu.last = 1;
1604 }
1605 alu.dst.write = 1;
1606 r = r600_bc_add_alu(ctx->bc, &alu);
1607 if (r)
1608 return r;
1609 }
1610 r = r600_bc_add_literal(ctx->bc, ctx->value);
1611 if (r)
1612 return r;
1613
1614 /* src0 * src1 + (1 - src0) * src2 */
1615 for (i = 0; i < 4; i++) {
1616 memset(&alu, 0, sizeof(struct r600_bc_alu));
1617 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1618 alu.is_op3 = 1;
1619 alu.src[0] = r600_src[0];
1620 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1621 alu.src[1] = r600_src[1];
1622 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1623 alu.src[2].sel = ctx->temp_reg;
1624 alu.src[2].chan = i;
1625 alu.dst.sel = ctx->temp_reg;
1626 alu.dst.chan = i;
1627 if (i == 3) {
1628 alu.last = 1;
1629 }
1630 r = r600_bc_add_alu(ctx->bc, &alu);
1631 if (r)
1632 return r;
1633 }
1634 return tgsi_helper_copy(ctx, inst);
1635 }
1636
1637 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1638 {
1639 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1640 struct r600_bc_alu_src r600_src[3];
1641 struct r600_bc_alu alu;
1642 int use_temp = 0;
1643 int i, r;
1644
1645 r = tgsi_split_constant(ctx, r600_src);
1646 if (r)
1647 return r;
1648
1649 if (inst->Dst[0].Register.WriteMask != 0xf)
1650 use_temp = 1;
1651
1652 for (i = 0; i < 4; i++) {
1653 memset(&alu, 0, sizeof(struct r600_bc_alu));
1654 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1655 alu.src[0] = r600_src[0];
1656 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1657
1658 alu.src[1] = r600_src[2];
1659 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1660
1661 alu.src[2] = r600_src[1];
1662 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1663
1664 if (use_temp)
1665 alu.dst.sel = ctx->temp_reg;
1666 else {
1667 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1668 if (r)
1669 return r;
1670 }
1671 alu.dst.chan = i;
1672 alu.dst.write = 1;
1673 alu.is_op3 = 1;
1674 if (i == 3)
1675 alu.last = 1;
1676 r = r600_bc_add_alu(ctx->bc, &alu);
1677 if (r)
1678 return r;
1679 }
1680 if (use_temp)
1681 return tgsi_helper_copy(ctx, inst);
1682 return 0;
1683 }
1684
1685 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1686 {
1687 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1688 struct r600_bc_alu_src r600_src[3];
1689 struct r600_bc_alu alu;
1690 uint32_t use_temp = 0;
1691 int i, r;
1692
1693 if (inst->Dst[0].Register.WriteMask != 0xf)
1694 use_temp = 1;
1695
1696 r = tgsi_split_constant(ctx, r600_src);
1697 if (r)
1698 return r;
1699
1700 for (i = 0; i < 4; i++) {
1701 memset(&alu, 0, sizeof(struct r600_bc_alu));
1702 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1703
1704 alu.src[0] = r600_src[0];
1705 switch (i) {
1706 case 0:
1707 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1708 break;
1709 case 1:
1710 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1711 break;
1712 case 2:
1713 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1714 break;
1715 case 3:
1716 alu.src[0].sel = V_SQ_ALU_SRC_0;
1717 alu.src[0].chan = i;
1718 }
1719
1720 alu.src[1] = r600_src[1];
1721 switch (i) {
1722 case 0:
1723 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1724 break;
1725 case 1:
1726 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1727 break;
1728 case 2:
1729 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1730 break;
1731 case 3:
1732 alu.src[1].sel = V_SQ_ALU_SRC_0;
1733 alu.src[1].chan = i;
1734 }
1735
1736 alu.dst.sel = ctx->temp_reg;
1737 alu.dst.chan = i;
1738 alu.dst.write = 1;
1739
1740 if (i == 3)
1741 alu.last = 1;
1742 r = r600_bc_add_alu(ctx->bc, &alu);
1743 if (r)
1744 return r;
1745 }
1746
1747 for (i = 0; i < 4; i++) {
1748 memset(&alu, 0, sizeof(struct r600_bc_alu));
1749 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1750
1751 alu.src[0] = r600_src[0];
1752 switch (i) {
1753 case 0:
1754 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1755 break;
1756 case 1:
1757 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1758 break;
1759 case 2:
1760 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1761 break;
1762 case 3:
1763 alu.src[0].sel = V_SQ_ALU_SRC_0;
1764 alu.src[0].chan = i;
1765 }
1766
1767 alu.src[1] = r600_src[1];
1768 switch (i) {
1769 case 0:
1770 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1771 break;
1772 case 1:
1773 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1774 break;
1775 case 2:
1776 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1777 break;
1778 case 3:
1779 alu.src[1].sel = V_SQ_ALU_SRC_0;
1780 alu.src[1].chan = i;
1781 }
1782
1783 alu.src[2].sel = ctx->temp_reg;
1784 alu.src[2].neg = 1;
1785 alu.src[2].chan = i;
1786
1787 if (use_temp)
1788 alu.dst.sel = ctx->temp_reg;
1789 else {
1790 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1791 if (r)
1792 return r;
1793 }
1794 alu.dst.chan = i;
1795 alu.dst.write = 1;
1796 alu.is_op3 = 1;
1797 if (i == 3)
1798 alu.last = 1;
1799 r = r600_bc_add_alu(ctx->bc, &alu);
1800 if (r)
1801 return r;
1802 }
1803 if (use_temp)
1804 return tgsi_helper_copy(ctx, inst);
1805 return 0;
1806 }
1807
1808 static int tgsi_exp(struct r600_shader_ctx *ctx)
1809 {
1810 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1811 struct r600_bc_alu_src r600_src[3];
1812 struct r600_bc_alu alu;
1813 int r;
1814
1815 /* result.x = 2^floor(src); */
1816 if (inst->Dst[0].Register.WriteMask & 1) {
1817 memset(&alu, 0, sizeof(struct r600_bc_alu));
1818
1819 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1820 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1821 if (r)
1822 return r;
1823
1824 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1825
1826 alu.dst.sel = ctx->temp_reg;
1827 alu.dst.chan = 0;
1828 alu.dst.write = 1;
1829 alu.last = 1;
1830 r = r600_bc_add_alu(ctx->bc, &alu);
1831 if (r)
1832 return r;
1833
1834 r = r600_bc_add_literal(ctx->bc, ctx->value);
1835 if (r)
1836 return r;
1837
1838 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1839 alu.src[0].sel = ctx->temp_reg;
1840 alu.src[0].chan = 0;
1841
1842 alu.dst.sel = ctx->temp_reg;
1843 alu.dst.chan = 0;
1844 alu.dst.write = 1;
1845 alu.last = 1;
1846 r = r600_bc_add_alu(ctx->bc, &alu);
1847 if (r)
1848 return r;
1849
1850 r = r600_bc_add_literal(ctx->bc, ctx->value);
1851 if (r)
1852 return r;
1853 }
1854
1855 /* result.y = tmp - floor(tmp); */
1856 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1857 memset(&alu, 0, sizeof(struct r600_bc_alu));
1858
1859 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1860 alu.src[0] = r600_src[0];
1861 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1862 if (r)
1863 return r;
1864 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1865
1866 alu.dst.sel = ctx->temp_reg;
1867 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1868 // if (r)
1869 // return r;
1870 alu.dst.write = 1;
1871 alu.dst.chan = 1;
1872
1873 alu.last = 1;
1874
1875 r = r600_bc_add_alu(ctx->bc, &alu);
1876 if (r)
1877 return r;
1878 r = r600_bc_add_literal(ctx->bc, ctx->value);
1879 if (r)
1880 return r;
1881 }
1882
1883 /* result.z = RoughApprox2ToX(tmp);*/
1884 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1885 memset(&alu, 0, sizeof(struct r600_bc_alu));
1886 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1887 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1888 if (r)
1889 return r;
1890 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1891
1892 alu.dst.sel = ctx->temp_reg;
1893 alu.dst.write = 1;
1894 alu.dst.chan = 2;
1895
1896 alu.last = 1;
1897
1898 r = r600_bc_add_alu(ctx->bc, &alu);
1899 if (r)
1900 return r;
1901 r = r600_bc_add_literal(ctx->bc, ctx->value);
1902 if (r)
1903 return r;
1904 }
1905
1906 /* result.w = 1.0;*/
1907 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1908 memset(&alu, 0, sizeof(struct r600_bc_alu));
1909
1910 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1911 alu.src[0].sel = V_SQ_ALU_SRC_1;
1912 alu.src[0].chan = 0;
1913
1914 alu.dst.sel = ctx->temp_reg;
1915 alu.dst.chan = 3;
1916 alu.dst.write = 1;
1917 alu.last = 1;
1918 r = r600_bc_add_alu(ctx->bc, &alu);
1919 if (r)
1920 return r;
1921 r = r600_bc_add_literal(ctx->bc, ctx->value);
1922 if (r)
1923 return r;
1924 }
1925 return tgsi_helper_copy(ctx, inst);
1926 }
1927
1928 static int tgsi_arl(struct r600_shader_ctx *ctx)
1929 {
1930 /* TODO from r600c, ar values don't persist between clauses */
1931 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1932 struct r600_bc_alu alu;
1933 int r;
1934 memset(&alu, 0, sizeof(struct r600_bc_alu));
1935
1936 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
1937
1938 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1939 if (r)
1940 return r;
1941 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1942
1943 alu.last = 1;
1944
1945 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
1946 if (r)
1947 return r;
1948 return 0;
1949 }
1950
1951 static int tgsi_opdst(struct r600_shader_ctx *ctx)
1952 {
1953 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1954 struct r600_bc_alu alu;
1955 int i, r = 0;
1956
1957 for (i = 0; i < 4; i++) {
1958 memset(&alu, 0, sizeof(struct r600_bc_alu));
1959
1960 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1961 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1962 if (r)
1963 return r;
1964
1965 if (i == 0 || i == 3) {
1966 alu.src[0].sel = V_SQ_ALU_SRC_1;
1967 } else {
1968 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1969 if (r)
1970 return r;
1971 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1972 }
1973
1974 if (i == 0 || i == 2) {
1975 alu.src[1].sel = V_SQ_ALU_SRC_1;
1976 } else {
1977 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
1978 if (r)
1979 return r;
1980 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1981 }
1982 if (i == 3)
1983 alu.last = 1;
1984 r = r600_bc_add_alu(ctx->bc, &alu);
1985 if (r)
1986 return r;
1987 }
1988 return 0;
1989 }
1990
1991 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
1992 {
1993 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1994 struct r600_bc_alu alu;
1995 int r;
1996
1997 memset(&alu, 0, sizeof(struct r600_bc_alu));
1998 alu.inst = opcode;
1999 alu.predicate = 1;
2000
2001 alu.dst.sel = ctx->temp_reg;
2002 alu.dst.write = 1;
2003 alu.dst.chan = 0;
2004
2005 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2006 if (r)
2007 return r;
2008 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2009 alu.src[1].sel = V_SQ_ALU_SRC_0;
2010 alu.src[1].chan = 0;
2011
2012 alu.last = 1;
2013
2014 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
2015 if (r)
2016 return r;
2017 return 0;
2018 }
2019
2020 static int pops(struct r600_shader_ctx *ctx, int pops)
2021 {
2022 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
2023 ctx->bc->cf_last->pop_count = pops;
2024 return 0;
2025 }
2026
2027 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2028 {
2029 switch(reason) {
2030 case FC_PUSH_VPM:
2031 ctx->bc->callstack[ctx->bc->call_sp].current--;
2032 break;
2033 case FC_PUSH_WQM:
2034 case FC_LOOP:
2035 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2036 break;
2037 case FC_REP:
2038 /* TOODO : for 16 vp asic should -= 2; */
2039 ctx->bc->callstack[ctx->bc->call_sp].current --;
2040 break;
2041 }
2042 }
2043
2044 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2045 {
2046 if (check_max_only) {
2047 int diff;
2048 switch (reason) {
2049 case FC_PUSH_VPM:
2050 diff = 1;
2051 break;
2052 case FC_PUSH_WQM:
2053 diff = 4;
2054 break;
2055 }
2056 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2057 ctx->bc->callstack[ctx->bc->call_sp].max) {
2058 ctx->bc->callstack[ctx->bc->call_sp].max =
2059 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2060 }
2061 return;
2062 }
2063 switch (reason) {
2064 case FC_PUSH_VPM:
2065 ctx->bc->callstack[ctx->bc->call_sp].current++;
2066 break;
2067 case FC_PUSH_WQM:
2068 case FC_LOOP:
2069 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2070 break;
2071 case FC_REP:
2072 ctx->bc->callstack[ctx->bc->call_sp].current++;
2073 break;
2074 }
2075
2076 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2077 ctx->bc->callstack[ctx->bc->call_sp].max) {
2078 ctx->bc->callstack[ctx->bc->call_sp].max =
2079 ctx->bc->callstack[ctx->bc->call_sp].current;
2080 }
2081 }
2082
2083 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2084 {
2085 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2086
2087 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2088 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2089 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2090 sp->num_mid++;
2091 }
2092
2093 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2094 {
2095 ctx->bc->fc_sp++;
2096 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2097 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2098 }
2099
2100 static void fc_poplevel(struct r600_shader_ctx *ctx)
2101 {
2102 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2103 if (sp->mid) {
2104 free(sp->mid);
2105 sp->mid = NULL;
2106 }
2107 sp->num_mid = 0;
2108 sp->start = NULL;
2109 sp->type = 0;
2110 ctx->bc->fc_sp--;
2111 }
2112
2113 #if 0
2114 static int emit_return(struct r600_shader_ctx *ctx)
2115 {
2116 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2117 return 0;
2118 }
2119
2120 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2121 {
2122
2123 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2124 ctx->bc->cf_last->pop_count = pops;
2125 /* TODO work out offset */
2126 return 0;
2127 }
2128
2129 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2130 {
2131 return 0;
2132 }
2133
2134 static void emit_testflag(struct r600_shader_ctx *ctx)
2135 {
2136
2137 }
2138
2139 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2140 {
2141 emit_testflag(ctx);
2142 emit_jump_to_offset(ctx, 1, 4);
2143 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2144 pops(ctx, ifidx + 1);
2145 emit_return(ctx);
2146 }
2147
2148 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2149 {
2150 emit_testflag(ctx);
2151
2152 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2153 ctx->bc->cf_last->pop_count = 1;
2154
2155 fc_set_mid(ctx, fc_sp);
2156
2157 pops(ctx, 1);
2158 }
2159 #endif
2160
2161 static int tgsi_if(struct r600_shader_ctx *ctx)
2162 {
2163 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
2164
2165 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2166
2167 fc_pushlevel(ctx, FC_IF);
2168
2169 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2170 return 0;
2171 }
2172
2173 static int tgsi_else(struct r600_shader_ctx *ctx)
2174 {
2175 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
2176 ctx->bc->cf_last->pop_count = 1;
2177
2178 fc_set_mid(ctx, ctx->bc->fc_sp);
2179 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2180 return 0;
2181 }
2182
2183 static int tgsi_endif(struct r600_shader_ctx *ctx)
2184 {
2185 pops(ctx, 1);
2186 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2187 R600_ERR("if/endif unbalanced in shader\n");
2188 return -1;
2189 }
2190
2191 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2192 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2193 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2194 } else {
2195 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2196 }
2197 fc_poplevel(ctx);
2198
2199 callstack_decrease_current(ctx, FC_PUSH_VPM);
2200 return 0;
2201 }
2202
2203 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2204 {
2205 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
2206
2207 fc_pushlevel(ctx, FC_LOOP);
2208
2209 /* check stack depth */
2210 callstack_check_depth(ctx, FC_LOOP, 0);
2211 return 0;
2212 }
2213
2214 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2215 {
2216 int i;
2217
2218 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
2219
2220 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2221 R600_ERR("loop/endloop in shader code are not paired.\n");
2222 return -EINVAL;
2223 }
2224
2225 /* fixup loop pointers - from r600isa
2226 LOOP END points to CF after LOOP START,
2227 LOOP START point to CF after LOOP END
2228 BRK/CONT point to LOOP END CF
2229 */
2230 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2231
2232 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2233
2234 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2235 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2236 }
2237 /* TODO add LOOPRET support */
2238 fc_poplevel(ctx);
2239 callstack_decrease_current(ctx, FC_LOOP);
2240 return 0;
2241 }
2242
2243 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2244 {
2245 unsigned int fscp;
2246
2247 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2248 {
2249 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2250 break;
2251 }
2252
2253 if (fscp == 0) {
2254 R600_ERR("Break not inside loop/endloop pair\n");
2255 return -EINVAL;
2256 }
2257
2258 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2259 ctx->bc->cf_last->pop_count = 1;
2260
2261 fc_set_mid(ctx, fscp);
2262
2263 pops(ctx, 1);
2264 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2265 return 0;
2266 }
2267
2268 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2269 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2270 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2271 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2272 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2273 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2274 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2275 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2276 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2277 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2278 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2279 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2280 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2281 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2282 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2283 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2284 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2285 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2286 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2287 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2288 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2289 /* gap */
2290 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2291 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2292 /* gap */
2293 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2294 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2295 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2296 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2297 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2298 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2299 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2300 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2301 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2302 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2303 /* gap */
2304 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2305 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2306 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2307 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2308 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2309 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2310 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2311 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2312 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2313 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2314 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2315 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2316 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2317 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2318 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2319 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2320 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2321 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2322 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2323 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2324 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2325 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2326 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2327 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2328 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2329 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2330 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2331 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2332 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2333 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2334 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2335 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2336 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2337 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2338 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2339 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2340 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2341 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2342 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2343 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2344 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2345 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2346 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2347 /* gap */
2348 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2349 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2350 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2351 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2352 /* gap */
2353 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2354 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2355 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2356 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2357 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2358 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2359 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2360 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2361 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2362 /* gap */
2363 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2364 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2365 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2366 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2367 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2368 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2369 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2370 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2371 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2372 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2373 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2374 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2375 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2376 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2377 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2378 /* gap */
2379 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2380 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2381 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2382 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2383 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2384 /* gap */
2385 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2386 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2387 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2388 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2389 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2390 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2391 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2392 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2393 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2394 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2395 /* gap */
2396 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2397 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2398 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2399 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2400 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2401 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2402 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2403 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2404 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2405 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2406 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2407 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2408 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2409 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2410 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2411 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2412 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2413 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2414 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2415 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2416 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2417 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2418 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2419 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2420 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2421 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2422 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2423 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2424 };