bac96e8a30e5b4b301791f05262bba43d6c8d9db
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 u32 *literals;
52 u32 nliterals;
53 u32 max_driver_temp_used;
54 };
55
56 struct r600_shader_tgsi_instruction {
57 unsigned tgsi_opcode;
58 unsigned is_op3;
59 unsigned r600_opcode;
60 int (*process)(struct r600_shader_ctx *ctx);
61 };
62
63 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
64 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
65
66 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
67 {
68 struct r600_context *rctx = r600_context(ctx);
69 const struct util_format_description *desc;
70 enum pipe_format resource_format[160];
71 unsigned i, nresources = 0;
72 struct r600_bc *bc = &shader->bc;
73 struct r600_bc_cf *cf;
74 struct r600_bc_vtx *vtx;
75
76 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
77 return 0;
78 for (i = 0; i < rctx->vertex_elements->count; i++) {
79 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
80 }
81 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
82 switch (cf->inst) {
83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
85 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
86 desc = util_format_description(resource_format[vtx->buffer_id]);
87 if (desc == NULL) {
88 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
89 return -EINVAL;
90 }
91 vtx->dst_sel_x = desc->swizzle[0];
92 vtx->dst_sel_y = desc->swizzle[1];
93 vtx->dst_sel_z = desc->swizzle[2];
94 vtx->dst_sel_w = desc->swizzle[3];
95 }
96 break;
97 default:
98 break;
99 }
100 }
101 return r600_bc_build(&shader->bc);
102 }
103
104 int r600_pipe_shader_create(struct pipe_context *ctx,
105 struct r600_context_state *rpshader,
106 const struct tgsi_token *tokens)
107 {
108 struct r600_screen *rscreen = r600_screen(ctx->screen);
109 int r;
110
111 //fprintf(stderr, "--------------------------------------------------------------\n");
112 //tgsi_dump(tokens, 0);
113 if (rpshader == NULL)
114 return -ENOMEM;
115 rpshader->shader.family = radeon_get_family(rscreen->rw);
116 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
117 if (r) {
118 R600_ERR("translation from TGSI failed !\n");
119 return r;
120 }
121 r = r600_bc_build(&rpshader->shader.bc);
122 if (r) {
123 R600_ERR("building bytecode failed !\n");
124 return r;
125 }
126 //fprintf(stderr, "______________________________________________________________\n");
127 return 0;
128 }
129
130 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
131 {
132 struct r600_screen *rscreen = r600_screen(ctx->screen);
133 struct r600_shader *rshader = &rpshader->shader;
134 struct radeon_state *state;
135 unsigned i, tmp;
136
137 rpshader->rstate = radeon_state_decref(rpshader->rstate);
138 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
139 if (state == NULL)
140 return -ENOMEM;
141 for (i = 0; i < 10; i++) {
142 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
143 }
144 /* so far never got proper semantic id from tgsi */
145 for (i = 0; i < 32; i++) {
146 tmp = i << ((i & 3) * 8);
147 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
148 }
149 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
150 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
151 S_028868_STACK_SIZE(rshader->bc.nstack);
152 rpshader->rstate = state;
153 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
154 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
155 rpshader->rstate->nbo = 2;
156 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
157 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
158 return radeon_state_pm4(state);
159 }
160
161 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
162 {
163 const struct pipe_rasterizer_state *rasterizer;
164 struct r600_screen *rscreen = r600_screen(ctx->screen);
165 struct r600_shader *rshader = &rpshader->shader;
166 struct r600_context *rctx = r600_context(ctx);
167 struct radeon_state *state;
168 unsigned i, tmp, exports_ps, num_cout;
169
170 rasterizer = &rctx->rasterizer->state.rasterizer;
171 rpshader->rstate = radeon_state_decref(rpshader->rstate);
172 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
173 if (state == NULL)
174 return -ENOMEM;
175 for (i = 0; i < rshader->ninput; i++) {
176 tmp = S_028644_SEMANTIC(i);
177 tmp |= S_028644_SEL_CENTROID(1);
178 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
179 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
180 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
181 }
182 if (rasterizer->sprite_coord_enable & (1 << i)) {
183 tmp |= S_028644_PT_SPRITE_TEX(1);
184 }
185 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
186 }
187
188 exports_ps = 0;
189 num_cout = 0;
190 for (i = 0; i < rshader->noutput; i++) {
191 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
192 exports_ps |= 1;
193 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
194 exports_ps |= (1 << (num_cout+1));
195 num_cout++;
196 }
197 }
198 if (!exports_ps) {
199 /* always at least export 1 component per pixel */
200 exports_ps = 2;
201 }
202 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
203 S_0286CC_PERSP_GRADIENT_ENA(1);
204 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
205 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
206 S_028868_STACK_SIZE(rshader->bc.nstack);
207 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
208 rpshader->rstate = state;
209 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
210 rpshader->rstate->nbo = 1;
211 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
212 return radeon_state_pm4(state);
213 }
214
215 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
216 {
217 struct r600_screen *rscreen = r600_screen(ctx->screen);
218 struct r600_context *rctx = r600_context(ctx);
219 struct r600_shader *rshader = &rpshader->shader;
220 int r;
221
222 /* copy new shader */
223 radeon_bo_decref(rscreen->rw, rpshader->bo);
224 rpshader->bo = NULL;
225 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
226 4096, NULL);
227 if (rpshader->bo == NULL) {
228 return -ENOMEM;
229 }
230 radeon_bo_map(rscreen->rw, rpshader->bo);
231 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
232 radeon_bo_unmap(rscreen->rw, rpshader->bo);
233 /* build state */
234 rshader->flat_shade = rctx->flat_shade;
235 switch (rshader->processor_type) {
236 case TGSI_PROCESSOR_VERTEX:
237 r = r600_pipe_shader_vs(ctx, rpshader);
238 break;
239 case TGSI_PROCESSOR_FRAGMENT:
240 r = r600_pipe_shader_ps(ctx, rpshader);
241 break;
242 default:
243 r = -EINVAL;
244 break;
245 }
246 return r;
247 }
248
249 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
250 {
251 struct r600_context *rctx = r600_context(ctx);
252 int r;
253
254 if (rpshader == NULL)
255 return -EINVAL;
256 /* there should be enough input */
257 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
258 R600_ERR("%d resources provided, expecting %d\n",
259 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
260 return -EINVAL;
261 }
262 r = r600_shader_update(ctx, &rpshader->shader);
263 if (r)
264 return r;
265 return r600_pipe_shader(ctx, rpshader);
266 }
267
268 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
269 {
270 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
271 int j;
272
273 if (i->Instruction.NumDstRegs > 1) {
274 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
275 return -EINVAL;
276 }
277 if (i->Instruction.Predicate) {
278 R600_ERR("predicate unsupported\n");
279 return -EINVAL;
280 }
281 #if 0
282 if (i->Instruction.Label) {
283 R600_ERR("label unsupported\n");
284 return -EINVAL;
285 }
286 #endif
287 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
288 if (i->Src[j].Register.Dimension ||
289 i->Src[j].Register.Absolute) {
290 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
291 i->Src[j].Register.Dimension,
292 i->Src[j].Register.Absolute);
293 return -EINVAL;
294 }
295 }
296 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
297 if (i->Dst[j].Register.Dimension) {
298 R600_ERR("unsupported dst (dimension)\n");
299 return -EINVAL;
300 }
301 }
302 return 0;
303 }
304
305 static int tgsi_declaration(struct r600_shader_ctx *ctx)
306 {
307 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
308 struct r600_bc_vtx vtx;
309 unsigned i;
310 int r;
311
312 switch (d->Declaration.File) {
313 case TGSI_FILE_INPUT:
314 i = ctx->shader->ninput++;
315 ctx->shader->input[i].name = d->Semantic.Name;
316 ctx->shader->input[i].sid = d->Semantic.Index;
317 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
318 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
319 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
320 /* turn input into fetch */
321 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
322 vtx.inst = 0;
323 vtx.fetch_type = 0;
324 vtx.buffer_id = i;
325 /* register containing the index into the buffer */
326 vtx.src_gpr = 0;
327 vtx.src_sel_x = 0;
328 vtx.mega_fetch_count = 0x1F;
329 vtx.dst_gpr = ctx->shader->input[i].gpr;
330 vtx.dst_sel_x = 0;
331 vtx.dst_sel_y = 1;
332 vtx.dst_sel_z = 2;
333 vtx.dst_sel_w = 3;
334 r = r600_bc_add_vtx(ctx->bc, &vtx);
335 if (r)
336 return r;
337 }
338 break;
339 case TGSI_FILE_OUTPUT:
340 i = ctx->shader->noutput++;
341 ctx->shader->output[i].name = d->Semantic.Name;
342 ctx->shader->output[i].sid = d->Semantic.Index;
343 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
344 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
345 break;
346 case TGSI_FILE_CONSTANT:
347 case TGSI_FILE_TEMPORARY:
348 case TGSI_FILE_SAMPLER:
349 case TGSI_FILE_ADDRESS:
350 break;
351 default:
352 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
353 return -EINVAL;
354 }
355 return 0;
356 }
357
358 static int r600_get_temp(struct r600_shader_ctx *ctx)
359 {
360 return ctx->temp_reg + ctx->max_driver_temp_used++;
361 }
362
363 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
364 {
365 struct tgsi_full_immediate *immediate;
366 struct r600_shader_ctx ctx;
367 struct r600_bc_output output[32];
368 unsigned output_done, noutput;
369 unsigned opcode;
370 int i, r = 0, pos0;
371
372 ctx.bc = &shader->bc;
373 ctx.shader = shader;
374 r = r600_bc_init(ctx.bc, shader->family);
375 if (r)
376 return r;
377 ctx.tokens = tokens;
378 tgsi_scan_shader(tokens, &ctx.info);
379 tgsi_parse_init(&ctx.parse, tokens);
380 ctx.type = ctx.parse.FullHeader.Processor.Processor;
381 shader->processor_type = ctx.type;
382
383 /* register allocations */
384 /* Values [0,127] correspond to GPR[0..127].
385 * Values [128,159] correspond to constant buffer bank 0
386 * Values [160,191] correspond to constant buffer bank 1
387 * Values [256,511] correspond to cfile constants c[0..255].
388 * Other special values are shown in the list below.
389 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
390 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
391 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
392 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
393 * 248 SQ_ALU_SRC_0: special constant 0.0.
394 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
395 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
396 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
397 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
398 * 253 SQ_ALU_SRC_LITERAL: literal constant.
399 * 254 SQ_ALU_SRC_PV: previous vector result.
400 * 255 SQ_ALU_SRC_PS: previous scalar result.
401 */
402 for (i = 0; i < TGSI_FILE_COUNT; i++) {
403 ctx.file_offset[i] = 0;
404 }
405 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
406 ctx.file_offset[TGSI_FILE_INPUT] = 1;
407 }
408 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
409 ctx.info.file_count[TGSI_FILE_INPUT];
410 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
411 ctx.info.file_count[TGSI_FILE_OUTPUT];
412 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
413 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
414 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
415 ctx.info.file_count[TGSI_FILE_TEMPORARY];
416
417 ctx.nliterals = 0;
418 ctx.literals = NULL;
419
420 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
421 tgsi_parse_token(&ctx.parse);
422 switch (ctx.parse.FullToken.Token.Type) {
423 case TGSI_TOKEN_TYPE_IMMEDIATE:
424 immediate = &ctx.parse.FullToken.FullImmediate;
425 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
426 if(ctx.literals == NULL) {
427 r = -ENOMEM;
428 goto out_err;
429 }
430 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
431 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
432 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
433 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
434 ctx.nliterals++;
435 break;
436 case TGSI_TOKEN_TYPE_DECLARATION:
437 r = tgsi_declaration(&ctx);
438 if (r)
439 goto out_err;
440 break;
441 case TGSI_TOKEN_TYPE_INSTRUCTION:
442 r = tgsi_is_supported(&ctx);
443 if (r)
444 goto out_err;
445 ctx.max_driver_temp_used = 0;
446 /* reserve first tmp for everyone */
447 r600_get_temp(&ctx);
448 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
449 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
450 r = ctx.inst_info->process(&ctx);
451 if (r)
452 goto out_err;
453 r = r600_bc_add_literal(ctx.bc, ctx.value);
454 if (r)
455 goto out_err;
456 break;
457 default:
458 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
459 r = -EINVAL;
460 goto out_err;
461 }
462 }
463 /* export output */
464 noutput = shader->noutput;
465 for (i = 0, pos0 = 0; i < noutput; i++) {
466 memset(&output[i], 0, sizeof(struct r600_bc_output));
467 output[i].gpr = shader->output[i].gpr;
468 output[i].elem_size = 3;
469 output[i].swizzle_x = 0;
470 output[i].swizzle_y = 1;
471 output[i].swizzle_z = 2;
472 output[i].swizzle_w = 3;
473 output[i].barrier = 1;
474 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
475 output[i].array_base = i - pos0;
476 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
477 switch (ctx.type) {
478 case TGSI_PROCESSOR_VERTEX:
479 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
480 output[i].array_base = 60;
481 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
482 /* position doesn't count in array_base */
483 pos0++;
484 }
485 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
486 output[i].array_base = 61;
487 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
488 /* position doesn't count in array_base */
489 pos0++;
490 }
491 break;
492 case TGSI_PROCESSOR_FRAGMENT:
493 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
494 output[i].array_base = shader->output[i].sid;
495 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
496 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
497 output[i].array_base = 61;
498 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
499 } else {
500 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
501 r = -EINVAL;
502 goto out_err;
503 }
504 break;
505 default:
506 R600_ERR("unsupported processor type %d\n", ctx.type);
507 r = -EINVAL;
508 goto out_err;
509 }
510 }
511 /* add fake param output for vertex shader if no param is exported */
512 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
513 for (i = 0, pos0 = 0; i < noutput; i++) {
514 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
515 pos0 = 1;
516 break;
517 }
518 }
519 if (!pos0) {
520 memset(&output[i], 0, sizeof(struct r600_bc_output));
521 output[i].gpr = 0;
522 output[i].elem_size = 3;
523 output[i].swizzle_x = 0;
524 output[i].swizzle_y = 1;
525 output[i].swizzle_z = 2;
526 output[i].swizzle_w = 3;
527 output[i].barrier = 1;
528 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
529 output[i].array_base = 0;
530 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
531 noutput++;
532 }
533 }
534 /* add fake pixel export */
535 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
536 memset(&output[0], 0, sizeof(struct r600_bc_output));
537 output[0].gpr = 0;
538 output[0].elem_size = 3;
539 output[0].swizzle_x = 7;
540 output[0].swizzle_y = 7;
541 output[0].swizzle_z = 7;
542 output[0].swizzle_w = 7;
543 output[0].barrier = 1;
544 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
545 output[0].array_base = 0;
546 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
547 noutput++;
548 }
549 /* set export done on last export of each type */
550 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
551 if (i == (noutput - 1)) {
552 output[i].end_of_program = 1;
553 }
554 if (!(output_done & (1 << output[i].type))) {
555 output_done |= (1 << output[i].type);
556 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
557 }
558 }
559 /* add output to bytecode */
560 for (i = 0; i < noutput; i++) {
561 r = r600_bc_add_output(ctx.bc, &output[i]);
562 if (r)
563 goto out_err;
564 }
565 free(ctx.literals);
566 tgsi_parse_free(&ctx.parse);
567 return 0;
568 out_err:
569 free(ctx.literals);
570 tgsi_parse_free(&ctx.parse);
571 return r;
572 }
573
574 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
575 {
576 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
577 return -EINVAL;
578 }
579
580 static int tgsi_end(struct r600_shader_ctx *ctx)
581 {
582 return 0;
583 }
584
585 static int tgsi_src(struct r600_shader_ctx *ctx,
586 const struct tgsi_full_src_register *tgsi_src,
587 struct r600_bc_alu_src *r600_src)
588 {
589 int index;
590 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
591 r600_src->sel = tgsi_src->Register.Index;
592 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
593 r600_src->sel = 0;
594 index = tgsi_src->Register.Index;
595 ctx->value[0] = ctx->literals[index * 4 + 0];
596 ctx->value[1] = ctx->literals[index * 4 + 1];
597 ctx->value[2] = ctx->literals[index * 4 + 2];
598 ctx->value[3] = ctx->literals[index * 4 + 3];
599 }
600 if (tgsi_src->Register.Indirect)
601 r600_src->rel = V_SQ_REL_RELATIVE;
602 r600_src->neg = tgsi_src->Register.Negate;
603 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
604 return 0;
605 }
606
607 static int tgsi_dst(struct r600_shader_ctx *ctx,
608 const struct tgsi_full_dst_register *tgsi_dst,
609 unsigned swizzle,
610 struct r600_bc_alu_dst *r600_dst)
611 {
612 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
613
614 r600_dst->sel = tgsi_dst->Register.Index;
615 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
616 r600_dst->chan = swizzle;
617 r600_dst->write = 1;
618 if (tgsi_dst->Register.Indirect)
619 r600_dst->rel = V_SQ_REL_RELATIVE;
620 if (inst->Instruction.Saturate) {
621 r600_dst->clamp = 1;
622 }
623 return 0;
624 }
625
626 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
627 {
628 switch (swizzle) {
629 case 0:
630 return tgsi_src->Register.SwizzleX;
631 case 1:
632 return tgsi_src->Register.SwizzleY;
633 case 2:
634 return tgsi_src->Register.SwizzleZ;
635 case 3:
636 return tgsi_src->Register.SwizzleW;
637 default:
638 return 0;
639 }
640 }
641
642 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
643 {
644 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
645 struct r600_bc_alu alu;
646 int i, j, k, nconst, r;
647
648 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
649 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
650 nconst++;
651 }
652 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
653 if (r) {
654 return r;
655 }
656 }
657 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
658 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
659 int treg = r600_get_temp(ctx);
660 for (k = 0; k < 4; k++) {
661 memset(&alu, 0, sizeof(struct r600_bc_alu));
662 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
663 alu.src[0].sel = r600_src[j].sel;
664 alu.src[0].chan = k;
665 alu.dst.sel = treg;
666 alu.dst.chan = k;
667 alu.dst.write = 1;
668 if (k == 3)
669 alu.last = 1;
670 r = r600_bc_add_alu(ctx->bc, &alu);
671 if (r)
672 return r;
673 }
674 r600_src[j].sel = treg;
675 j--;
676 }
677 }
678 return 0;
679 }
680
681 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
682 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
683 {
684 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
685 struct r600_bc_alu alu;
686 int i, j, k, nliteral, r;
687
688 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
689 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
690 nliteral++;
691 }
692 }
693 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
694 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
695 int treg = r600_get_temp(ctx);
696 for (k = 0; k < 4; k++) {
697 memset(&alu, 0, sizeof(struct r600_bc_alu));
698 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
699 alu.src[0].sel = r600_src[j].sel;
700 alu.src[0].chan = k;
701 alu.dst.sel = treg;
702 alu.dst.chan = k;
703 alu.dst.write = 1;
704 if (k == 3)
705 alu.last = 1;
706 r = r600_bc_add_alu(ctx->bc, &alu);
707 if (r)
708 return r;
709 }
710 r = r600_bc_add_literal(ctx->bc, ctx->value);
711 if (r)
712 return r;
713 r600_src[j].sel = treg;
714 j++;
715 }
716 }
717 return 0;
718 }
719
720 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
721 {
722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
723 struct r600_bc_alu_src r600_src[3];
724 struct r600_bc_alu alu;
725 int i, j, r;
726 int lasti = 0;
727
728 for (i = 0; i < 4; i++) {
729 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
730 lasti = i;
731 }
732 }
733
734 r = tgsi_split_constant(ctx, r600_src);
735 if (r)
736 return r;
737 for (i = 0; i < lasti + 1; i++) {
738 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
739 continue;
740
741 memset(&alu, 0, sizeof(struct r600_bc_alu));
742 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
743 if (r)
744 return r;
745
746 alu.inst = ctx->inst_info->r600_opcode;
747 if (!swap) {
748 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
749 alu.src[j] = r600_src[j];
750 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
751 }
752 } else {
753 alu.src[0] = r600_src[1];
754 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
755
756 alu.src[1] = r600_src[0];
757 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
758 }
759 /* handle some special cases */
760 switch (ctx->inst_info->tgsi_opcode) {
761 case TGSI_OPCODE_SUB:
762 alu.src[1].neg = 1;
763 break;
764 case TGSI_OPCODE_ABS:
765 alu.src[0].abs = 1;
766 break;
767 default:
768 break;
769 }
770 if (i == lasti) {
771 alu.last = 1;
772 }
773 r = r600_bc_add_alu(ctx->bc, &alu);
774 if (r)
775 return r;
776 }
777 return 0;
778 }
779
780 static int tgsi_op2(struct r600_shader_ctx *ctx)
781 {
782 return tgsi_op2_s(ctx, 0);
783 }
784
785 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
786 {
787 return tgsi_op2_s(ctx, 1);
788 }
789
790 /*
791 * r600 - trunc to -PI..PI range
792 * r700 - normalize by dividing by 2PI
793 * see fdo bug 27901
794 */
795 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
796 struct r600_bc_alu_src r600_src[3])
797 {
798 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
799 int r;
800 uint32_t lit_vals[4];
801 struct r600_bc_alu alu;
802
803 memset(lit_vals, 0, 4*4);
804 r = tgsi_split_constant(ctx, r600_src);
805 if (r)
806 return r;
807
808 r = tgsi_split_literal_constant(ctx, r600_src);
809 if (r)
810 return r;
811
812 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
813 lit_vals[1] = fui(0.5f);
814
815 memset(&alu, 0, sizeof(struct r600_bc_alu));
816 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
817 alu.is_op3 = 1;
818
819 alu.dst.chan = 0;
820 alu.dst.sel = ctx->temp_reg;
821 alu.dst.write = 1;
822
823 alu.src[0] = r600_src[0];
824 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
825
826 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
827 alu.src[1].chan = 0;
828 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
829 alu.src[2].chan = 1;
830 alu.last = 1;
831 r = r600_bc_add_alu(ctx->bc, &alu);
832 if (r)
833 return r;
834 r = r600_bc_add_literal(ctx->bc, lit_vals);
835 if (r)
836 return r;
837
838 memset(&alu, 0, sizeof(struct r600_bc_alu));
839 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
840
841 alu.dst.chan = 0;
842 alu.dst.sel = ctx->temp_reg;
843 alu.dst.write = 1;
844
845 alu.src[0].sel = ctx->temp_reg;
846 alu.src[0].chan = 0;
847 alu.last = 1;
848 r = r600_bc_add_alu(ctx->bc, &alu);
849 if (r)
850 return r;
851
852 if (ctx->bc->chiprev == 0) {
853 lit_vals[0] = fui(3.1415926535897f * 2.0f);
854 lit_vals[1] = fui(-3.1415926535897f);
855 } else {
856 lit_vals[0] = fui(1.0f);
857 lit_vals[1] = fui(-0.5f);
858 }
859
860 memset(&alu, 0, sizeof(struct r600_bc_alu));
861 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
862 alu.is_op3 = 1;
863
864 alu.dst.chan = 0;
865 alu.dst.sel = ctx->temp_reg;
866 alu.dst.write = 1;
867
868 alu.src[0].sel = ctx->temp_reg;
869 alu.src[0].chan = 0;
870
871 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
872 alu.src[1].chan = 0;
873 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
874 alu.src[2].chan = 1;
875 alu.last = 1;
876 r = r600_bc_add_alu(ctx->bc, &alu);
877 if (r)
878 return r;
879 r = r600_bc_add_literal(ctx->bc, lit_vals);
880 if (r)
881 return r;
882 return 0;
883 }
884
885 static int tgsi_trig(struct r600_shader_ctx *ctx)
886 {
887 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
888 struct r600_bc_alu_src r600_src[3];
889 struct r600_bc_alu alu;
890 int i, r;
891 int lasti = 0;
892
893 r = tgsi_setup_trig(ctx, r600_src);
894 if (r)
895 return r;
896
897 memset(&alu, 0, sizeof(struct r600_bc_alu));
898 alu.inst = ctx->inst_info->r600_opcode;
899 alu.dst.chan = 0;
900 alu.dst.sel = ctx->temp_reg;
901 alu.dst.write = 1;
902
903 alu.src[0].sel = ctx->temp_reg;
904 alu.src[0].chan = 0;
905 alu.last = 1;
906 r = r600_bc_add_alu(ctx->bc, &alu);
907 if (r)
908 return r;
909
910 /* replicate result */
911 for (i = 0; i < 4; i++) {
912 if (inst->Dst[0].Register.WriteMask & (1 << i))
913 lasti = i;
914 }
915 for (i = 0; i < lasti + 1; i++) {
916 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
917 continue;
918
919 memset(&alu, 0, sizeof(struct r600_bc_alu));
920 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
921
922 alu.src[0].sel = ctx->temp_reg;
923 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
924 if (r)
925 return r;
926 if (i == lasti)
927 alu.last = 1;
928 r = r600_bc_add_alu(ctx->bc, &alu);
929 if (r)
930 return r;
931 }
932 return 0;
933 }
934
935 static int tgsi_scs(struct r600_shader_ctx *ctx)
936 {
937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
938 struct r600_bc_alu_src r600_src[3];
939 struct r600_bc_alu alu;
940 int r;
941
942 r = tgsi_split_constant(ctx, r600_src);
943 if (r)
944 return r;
945
946 r = tgsi_setup_trig(ctx, r600_src);
947 if (r)
948 return r;
949
950
951 /* dst.x = COS */
952 memset(&alu, 0, sizeof(struct r600_bc_alu));
953 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
954 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
955 if (r)
956 return r;
957
958 alu.src[0].sel = ctx->temp_reg;
959 alu.src[0].chan = 0;
960 alu.last = 1;
961 r = r600_bc_add_alu(ctx->bc, &alu);
962 if (r)
963 return r;
964
965 /* dst.y = SIN */
966 memset(&alu, 0, sizeof(struct r600_bc_alu));
967 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
968 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
969 if (r)
970 return r;
971
972 alu.src[0].sel = ctx->temp_reg;
973 alu.src[0].chan = 0;
974 alu.last = 1;
975 r = r600_bc_add_alu(ctx->bc, &alu);
976 if (r)
977 return r;
978 return 0;
979 }
980
981 static int tgsi_kill(struct r600_shader_ctx *ctx)
982 {
983 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
984 struct r600_bc_alu alu;
985 int i, r;
986
987 for (i = 0; i < 4; i++) {
988 memset(&alu, 0, sizeof(struct r600_bc_alu));
989 alu.inst = ctx->inst_info->r600_opcode;
990
991 alu.dst.chan = i;
992
993 alu.src[0].sel = V_SQ_ALU_SRC_0;
994
995 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
996 alu.src[1].sel = V_SQ_ALU_SRC_1;
997 alu.src[1].neg = 1;
998 } else {
999 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1000 if (r)
1001 return r;
1002 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1003 }
1004 if (i == 3) {
1005 alu.last = 1;
1006 }
1007 r = r600_bc_add_alu(ctx->bc, &alu);
1008 if (r)
1009 return r;
1010 }
1011 r = r600_bc_add_literal(ctx->bc, ctx->value);
1012 if (r)
1013 return r;
1014
1015 /* kill must be last in ALU */
1016 ctx->bc->force_add_cf = 1;
1017 ctx->shader->uses_kill = TRUE;
1018 return 0;
1019 }
1020
1021 static int tgsi_lit(struct r600_shader_ctx *ctx)
1022 {
1023 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1024 struct r600_bc_alu alu;
1025 struct r600_bc_alu_src r600_src[3];
1026 int r;
1027
1028 r = tgsi_split_constant(ctx, r600_src);
1029 if (r)
1030 return r;
1031 r = tgsi_split_literal_constant(ctx, r600_src);
1032 if (r)
1033 return r;
1034
1035 /* dst.x, <- 1.0 */
1036 memset(&alu, 0, sizeof(struct r600_bc_alu));
1037 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1038 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
1039 alu.src[0].chan = 0;
1040 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1041 if (r)
1042 return r;
1043 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1044 r = r600_bc_add_alu(ctx->bc, &alu);
1045 if (r)
1046 return r;
1047
1048 /* dst.y = max(src.x, 0.0) */
1049 memset(&alu, 0, sizeof(struct r600_bc_alu));
1050 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
1051 alu.src[0] = r600_src[0];
1052 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
1053 alu.src[1].chan = 0;
1054 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1055 if (r)
1056 return r;
1057 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1058 r = r600_bc_add_alu(ctx->bc, &alu);
1059 if (r)
1060 return r;
1061
1062 /* dst.w, <- 1.0 */
1063 memset(&alu, 0, sizeof(struct r600_bc_alu));
1064 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1065 alu.src[0].sel = V_SQ_ALU_SRC_1;
1066 alu.src[0].chan = 0;
1067 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1068 if (r)
1069 return r;
1070 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1071 alu.last = 1;
1072 r = r600_bc_add_alu(ctx->bc, &alu);
1073 if (r)
1074 return r;
1075
1076 r = r600_bc_add_literal(ctx->bc, ctx->value);
1077 if (r)
1078 return r;
1079
1080 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1081 {
1082 int chan;
1083 int sel;
1084
1085 /* dst.z = log(src.y) */
1086 memset(&alu, 0, sizeof(struct r600_bc_alu));
1087 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
1088 alu.src[0] = r600_src[0];
1089 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1090 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1091 if (r)
1092 return r;
1093 alu.last = 1;
1094 r = r600_bc_add_alu(ctx->bc, &alu);
1095 if (r)
1096 return r;
1097
1098 r = r600_bc_add_literal(ctx->bc, ctx->value);
1099 if (r)
1100 return r;
1101
1102 chan = alu.dst.chan;
1103 sel = alu.dst.sel;
1104
1105 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1106 memset(&alu, 0, sizeof(struct r600_bc_alu));
1107 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
1108 alu.src[0] = r600_src[0];
1109 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1110 alu.src[1].sel = sel;
1111 alu.src[1].chan = chan;
1112
1113 alu.src[2] = r600_src[0];
1114 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1115 alu.dst.sel = ctx->temp_reg;
1116 alu.dst.chan = 0;
1117 alu.dst.write = 1;
1118 alu.is_op3 = 1;
1119 alu.last = 1;
1120 r = r600_bc_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123
1124 r = r600_bc_add_literal(ctx->bc, ctx->value);
1125 if (r)
1126 return r;
1127 /* dst.z = exp(tmp.x) */
1128 memset(&alu, 0, sizeof(struct r600_bc_alu));
1129 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1130 alu.src[0].sel = ctx->temp_reg;
1131 alu.src[0].chan = 0;
1132 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1133 if (r)
1134 return r;
1135 alu.last = 1;
1136 r = r600_bc_add_alu(ctx->bc, &alu);
1137 if (r)
1138 return r;
1139 }
1140 return 0;
1141 }
1142
1143 static int tgsi_trans(struct r600_shader_ctx *ctx)
1144 {
1145 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1146 struct r600_bc_alu alu;
1147 int i, j, r;
1148
1149 for (i = 0; i < 4; i++) {
1150 memset(&alu, 0, sizeof(struct r600_bc_alu));
1151 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1152 alu.inst = ctx->inst_info->r600_opcode;
1153 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1154 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1155 if (r)
1156 return r;
1157 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1158 }
1159 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1160 if (r)
1161 return r;
1162 alu.last = 1;
1163 r = r600_bc_add_alu(ctx->bc, &alu);
1164 if (r)
1165 return r;
1166 }
1167 }
1168 return 0;
1169 }
1170
1171 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1172 {
1173 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1174 struct r600_bc_alu alu;
1175 int i, r;
1176
1177 for (i = 0; i < 4; i++) {
1178 memset(&alu, 0, sizeof(struct r600_bc_alu));
1179 alu.src[0].sel = ctx->temp_reg;
1180 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1181 alu.dst.chan = i;
1182 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1183 if (r)
1184 return r;
1185 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1186 if (i == 3)
1187 alu.last = 1;
1188 r = r600_bc_add_alu(ctx->bc, &alu);
1189 if (r)
1190 return r;
1191 }
1192 return 0;
1193 }
1194
1195 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1196 {
1197 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1198 struct r600_bc_alu alu;
1199 int i, r;
1200
1201 memset(&alu, 0, sizeof(struct r600_bc_alu));
1202 alu.inst = ctx->inst_info->r600_opcode;
1203 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1204 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1205 if (r)
1206 return r;
1207 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1208 }
1209 alu.dst.sel = ctx->temp_reg;
1210 alu.dst.write = 1;
1211 alu.last = 1;
1212 r = r600_bc_add_alu(ctx->bc, &alu);
1213 if (r)
1214 return r;
1215 r = r600_bc_add_literal(ctx->bc, ctx->value);
1216 if (r)
1217 return r;
1218 /* replicate result */
1219 return tgsi_helper_tempx_replicate(ctx);
1220 }
1221
1222 static int tgsi_pow(struct r600_shader_ctx *ctx)
1223 {
1224 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1225 struct r600_bc_alu alu;
1226 int r;
1227
1228 /* LOG2(a) */
1229 memset(&alu, 0, sizeof(struct r600_bc_alu));
1230 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1231 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1232 if (r)
1233 return r;
1234 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1235 alu.dst.sel = ctx->temp_reg;
1236 alu.dst.write = 1;
1237 alu.last = 1;
1238 r = r600_bc_add_alu(ctx->bc, &alu);
1239 if (r)
1240 return r;
1241 r = r600_bc_add_literal(ctx->bc,ctx->value);
1242 if (r)
1243 return r;
1244 /* b * LOG2(a) */
1245 memset(&alu, 0, sizeof(struct r600_bc_alu));
1246 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1247 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1248 if (r)
1249 return r;
1250 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1251 alu.src[1].sel = ctx->temp_reg;
1252 alu.dst.sel = ctx->temp_reg;
1253 alu.dst.write = 1;
1254 alu.last = 1;
1255 r = r600_bc_add_alu(ctx->bc, &alu);
1256 if (r)
1257 return r;
1258 r = r600_bc_add_literal(ctx->bc,ctx->value);
1259 if (r)
1260 return r;
1261 /* POW(a,b) = EXP2(b * LOG2(a))*/
1262 memset(&alu, 0, sizeof(struct r600_bc_alu));
1263 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1264 alu.src[0].sel = ctx->temp_reg;
1265 alu.dst.sel = ctx->temp_reg;
1266 alu.dst.write = 1;
1267 alu.last = 1;
1268 r = r600_bc_add_alu(ctx->bc, &alu);
1269 if (r)
1270 return r;
1271 r = r600_bc_add_literal(ctx->bc,ctx->value);
1272 if (r)
1273 return r;
1274 return tgsi_helper_tempx_replicate(ctx);
1275 }
1276
1277 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1278 {
1279 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1280 struct r600_bc_alu alu;
1281 struct r600_bc_alu_src r600_src[3];
1282 int i, r;
1283
1284 r = tgsi_split_constant(ctx, r600_src);
1285 if (r)
1286 return r;
1287
1288 /* tmp = (src > 0 ? 1 : src) */
1289 for (i = 0; i < 4; i++) {
1290 memset(&alu, 0, sizeof(struct r600_bc_alu));
1291 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1292 alu.is_op3 = 1;
1293
1294 alu.dst.sel = ctx->temp_reg;
1295 alu.dst.chan = i;
1296
1297 alu.src[0] = r600_src[0];
1298 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1299
1300 alu.src[1].sel = V_SQ_ALU_SRC_1;
1301
1302 alu.src[2] = r600_src[0];
1303 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1304 if (i == 3)
1305 alu.last = 1;
1306 r = r600_bc_add_alu(ctx->bc, &alu);
1307 if (r)
1308 return r;
1309 }
1310 r = r600_bc_add_literal(ctx->bc, ctx->value);
1311 if (r)
1312 return r;
1313
1314 /* dst = (-tmp > 0 ? -1 : tmp) */
1315 for (i = 0; i < 4; i++) {
1316 memset(&alu, 0, sizeof(struct r600_bc_alu));
1317 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1318 alu.is_op3 = 1;
1319 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1320 if (r)
1321 return r;
1322
1323 alu.src[0].sel = ctx->temp_reg;
1324 alu.src[0].chan = i;
1325 alu.src[0].neg = 1;
1326
1327 alu.src[1].sel = V_SQ_ALU_SRC_1;
1328 alu.src[1].neg = 1;
1329
1330 alu.src[2].sel = ctx->temp_reg;
1331 alu.src[2].chan = i;
1332
1333 if (i == 3)
1334 alu.last = 1;
1335 r = r600_bc_add_alu(ctx->bc, &alu);
1336 if (r)
1337 return r;
1338 }
1339 return 0;
1340 }
1341
1342 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1343 {
1344 struct r600_bc_alu alu;
1345 int i, r;
1346
1347 r = r600_bc_add_literal(ctx->bc, ctx->value);
1348 if (r)
1349 return r;
1350 for (i = 0; i < 4; i++) {
1351 memset(&alu, 0, sizeof(struct r600_bc_alu));
1352 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1353 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1354 alu.dst.chan = i;
1355 } else {
1356 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1357 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1358 if (r)
1359 return r;
1360 alu.src[0].sel = ctx->temp_reg;
1361 alu.src[0].chan = i;
1362 }
1363 if (i == 3) {
1364 alu.last = 1;
1365 }
1366 r = r600_bc_add_alu(ctx->bc, &alu);
1367 if (r)
1368 return r;
1369 }
1370 return 0;
1371 }
1372
1373 static int tgsi_op3(struct r600_shader_ctx *ctx)
1374 {
1375 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1376 struct r600_bc_alu_src r600_src[3];
1377 struct r600_bc_alu alu;
1378 int i, j, r;
1379
1380 r = tgsi_split_constant(ctx, r600_src);
1381 if (r)
1382 return r;
1383 /* do it in 2 step as op3 doesn't support writemask */
1384 for (i = 0; i < 4; i++) {
1385 memset(&alu, 0, sizeof(struct r600_bc_alu));
1386 alu.inst = ctx->inst_info->r600_opcode;
1387 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1388 alu.src[j] = r600_src[j];
1389 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1390 }
1391 alu.dst.sel = ctx->temp_reg;
1392 alu.dst.chan = i;
1393 alu.dst.write = 1;
1394 alu.is_op3 = 1;
1395 if (i == 3) {
1396 alu.last = 1;
1397 }
1398 r = r600_bc_add_alu(ctx->bc, &alu);
1399 if (r)
1400 return r;
1401 }
1402 return tgsi_helper_copy(ctx, inst);
1403 }
1404
1405 static int tgsi_dp(struct r600_shader_ctx *ctx)
1406 {
1407 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1408 struct r600_bc_alu_src r600_src[3];
1409 struct r600_bc_alu alu;
1410 int i, j, r;
1411
1412 r = tgsi_split_constant(ctx, r600_src);
1413 if (r)
1414 return r;
1415 for (i = 0; i < 4; i++) {
1416 memset(&alu, 0, sizeof(struct r600_bc_alu));
1417 alu.inst = ctx->inst_info->r600_opcode;
1418 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1419 alu.src[j] = r600_src[j];
1420 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1421 }
1422 alu.dst.sel = ctx->temp_reg;
1423 alu.dst.chan = i;
1424 alu.dst.write = 1;
1425 /* handle some special cases */
1426 switch (ctx->inst_info->tgsi_opcode) {
1427 case TGSI_OPCODE_DP2:
1428 if (i > 1) {
1429 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1430 alu.src[0].chan = alu.src[1].chan = 0;
1431 }
1432 break;
1433 case TGSI_OPCODE_DP3:
1434 if (i > 2) {
1435 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1436 alu.src[0].chan = alu.src[1].chan = 0;
1437 }
1438 break;
1439 case TGSI_OPCODE_DPH:
1440 if (i == 3) {
1441 alu.src[0].sel = V_SQ_ALU_SRC_1;
1442 alu.src[0].chan = 0;
1443 alu.src[0].neg = 0;
1444 }
1445 break;
1446 default:
1447 break;
1448 }
1449 if (i == 3) {
1450 alu.last = 1;
1451 }
1452 r = r600_bc_add_alu(ctx->bc, &alu);
1453 if (r)
1454 return r;
1455 }
1456 return tgsi_helper_copy(ctx, inst);
1457 }
1458
1459 static int tgsi_tex(struct r600_shader_ctx *ctx)
1460 {
1461 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1462 struct r600_bc_tex tex;
1463 struct r600_bc_alu alu;
1464 unsigned src_gpr;
1465 int r, i;
1466
1467 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1468
1469 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1470 /* Add perspective divide */
1471 memset(&alu, 0, sizeof(struct r600_bc_alu));
1472 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1473 alu.src[0].sel = src_gpr;
1474 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1475 alu.dst.sel = ctx->temp_reg;
1476 alu.dst.chan = 3;
1477 alu.last = 1;
1478 alu.dst.write = 1;
1479 r = r600_bc_add_alu(ctx->bc, &alu);
1480 if (r)
1481 return r;
1482
1483 for (i = 0; i < 3; i++) {
1484 memset(&alu, 0, sizeof(struct r600_bc_alu));
1485 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1486 alu.src[0].sel = ctx->temp_reg;
1487 alu.src[0].chan = 3;
1488 alu.src[1].sel = src_gpr;
1489 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1490 alu.dst.sel = ctx->temp_reg;
1491 alu.dst.chan = i;
1492 alu.dst.write = 1;
1493 r = r600_bc_add_alu(ctx->bc, &alu);
1494 if (r)
1495 return r;
1496 }
1497 memset(&alu, 0, sizeof(struct r600_bc_alu));
1498 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1499 alu.src[0].sel = V_SQ_ALU_SRC_1;
1500 alu.src[0].chan = 0;
1501 alu.dst.sel = ctx->temp_reg;
1502 alu.dst.chan = 3;
1503 alu.last = 1;
1504 alu.dst.write = 1;
1505 r = r600_bc_add_alu(ctx->bc, &alu);
1506 if (r)
1507 return r;
1508 src_gpr = ctx->temp_reg;
1509 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1510 for (i = 0; i < 4; i++) {
1511 memset(&alu, 0, sizeof(struct r600_bc_alu));
1512 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1513 alu.src[0].sel = src_gpr;
1514 alu.src[0].chan = i;
1515 alu.dst.sel = ctx->temp_reg;
1516 alu.dst.chan = i;
1517 if (i == 3)
1518 alu.last = 1;
1519 alu.dst.write = 1;
1520 r = r600_bc_add_alu(ctx->bc, &alu);
1521 if (r)
1522 return r;
1523 }
1524 src_gpr = ctx->temp_reg;
1525 }
1526
1527 memset(&tex, 0, sizeof(struct r600_bc_tex));
1528 tex.inst = ctx->inst_info->r600_opcode;
1529 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1530 tex.sampler_id = tex.resource_id;
1531 tex.src_gpr = src_gpr;
1532 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1533 tex.dst_sel_x = 0;
1534 tex.dst_sel_y = 1;
1535 tex.dst_sel_z = 2;
1536 tex.dst_sel_w = 3;
1537 tex.src_sel_x = 0;
1538 tex.src_sel_y = 1;
1539 tex.src_sel_z = 2;
1540 tex.src_sel_w = 3;
1541
1542 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1543 tex.coord_type_x = 1;
1544 tex.coord_type_y = 1;
1545 tex.coord_type_z = 1;
1546 tex.coord_type_w = 1;
1547 }
1548 return r600_bc_add_tex(ctx->bc, &tex);
1549 }
1550
1551 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1552 {
1553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1554 struct r600_bc_alu_src r600_src[3];
1555 struct r600_bc_alu alu;
1556 unsigned i;
1557 int r;
1558
1559 r = tgsi_split_constant(ctx, r600_src);
1560 if (r)
1561 return r;
1562 /* 1 - src0 */
1563 for (i = 0; i < 4; i++) {
1564 memset(&alu, 0, sizeof(struct r600_bc_alu));
1565 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1566 alu.src[0].sel = V_SQ_ALU_SRC_1;
1567 alu.src[0].chan = 0;
1568 alu.src[1] = r600_src[0];
1569 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1570 alu.src[1].neg = 1;
1571 alu.dst.sel = ctx->temp_reg;
1572 alu.dst.chan = i;
1573 if (i == 3) {
1574 alu.last = 1;
1575 }
1576 alu.dst.write = 1;
1577 r = r600_bc_add_alu(ctx->bc, &alu);
1578 if (r)
1579 return r;
1580 }
1581 r = r600_bc_add_literal(ctx->bc, ctx->value);
1582 if (r)
1583 return r;
1584
1585 /* (1 - src0) * src2 */
1586 for (i = 0; i < 4; i++) {
1587 memset(&alu, 0, sizeof(struct r600_bc_alu));
1588 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1589 alu.src[0].sel = ctx->temp_reg;
1590 alu.src[0].chan = i;
1591 alu.src[1] = r600_src[2];
1592 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1593 alu.dst.sel = ctx->temp_reg;
1594 alu.dst.chan = i;
1595 if (i == 3) {
1596 alu.last = 1;
1597 }
1598 alu.dst.write = 1;
1599 r = r600_bc_add_alu(ctx->bc, &alu);
1600 if (r)
1601 return r;
1602 }
1603 r = r600_bc_add_literal(ctx->bc, ctx->value);
1604 if (r)
1605 return r;
1606
1607 /* src0 * src1 + (1 - src0) * src2 */
1608 for (i = 0; i < 4; i++) {
1609 memset(&alu, 0, sizeof(struct r600_bc_alu));
1610 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1611 alu.is_op3 = 1;
1612 alu.src[0] = r600_src[0];
1613 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1614 alu.src[1] = r600_src[1];
1615 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1616 alu.src[2].sel = ctx->temp_reg;
1617 alu.src[2].chan = i;
1618 alu.dst.sel = ctx->temp_reg;
1619 alu.dst.chan = i;
1620 if (i == 3) {
1621 alu.last = 1;
1622 }
1623 r = r600_bc_add_alu(ctx->bc, &alu);
1624 if (r)
1625 return r;
1626 }
1627 return tgsi_helper_copy(ctx, inst);
1628 }
1629
1630 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1631 {
1632 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1633 struct r600_bc_alu_src r600_src[3];
1634 struct r600_bc_alu alu;
1635 int use_temp = 0;
1636 int i, r;
1637
1638 r = tgsi_split_constant(ctx, r600_src);
1639 if (r)
1640 return r;
1641
1642 if (inst->Dst[0].Register.WriteMask != 0xf)
1643 use_temp = 1;
1644
1645 for (i = 0; i < 4; i++) {
1646 memset(&alu, 0, sizeof(struct r600_bc_alu));
1647 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1648 alu.src[0] = r600_src[0];
1649 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1650
1651 alu.src[1] = r600_src[2];
1652 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1653
1654 alu.src[2] = r600_src[1];
1655 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1656
1657 if (use_temp)
1658 alu.dst.sel = ctx->temp_reg;
1659 else {
1660 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1661 if (r)
1662 return r;
1663 }
1664 alu.dst.chan = i;
1665 alu.dst.write = 1;
1666 alu.is_op3 = 1;
1667 if (i == 3)
1668 alu.last = 1;
1669 r = r600_bc_add_alu(ctx->bc, &alu);
1670 if (r)
1671 return r;
1672 }
1673 if (use_temp)
1674 return tgsi_helper_copy(ctx, inst);
1675 return 0;
1676 }
1677
1678 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1679 {
1680 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1681 struct r600_bc_alu_src r600_src[3];
1682 struct r600_bc_alu alu;
1683 uint32_t use_temp = 0;
1684 int i, r;
1685
1686 if (inst->Dst[0].Register.WriteMask != 0xf)
1687 use_temp = 1;
1688
1689 r = tgsi_split_constant(ctx, r600_src);
1690 if (r)
1691 return r;
1692
1693 for (i = 0; i < 4; i++) {
1694 memset(&alu, 0, sizeof(struct r600_bc_alu));
1695 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1696
1697 alu.src[0] = r600_src[0];
1698 switch (i) {
1699 case 0:
1700 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1701 break;
1702 case 1:
1703 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1704 break;
1705 case 2:
1706 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1707 break;
1708 case 3:
1709 alu.src[0].sel = V_SQ_ALU_SRC_0;
1710 alu.src[0].chan = i;
1711 }
1712
1713 alu.src[1] = r600_src[1];
1714 switch (i) {
1715 case 0:
1716 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1717 break;
1718 case 1:
1719 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1720 break;
1721 case 2:
1722 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1723 break;
1724 case 3:
1725 alu.src[1].sel = V_SQ_ALU_SRC_0;
1726 alu.src[1].chan = i;
1727 }
1728
1729 alu.dst.sel = ctx->temp_reg;
1730 alu.dst.chan = i;
1731 alu.dst.write = 1;
1732
1733 if (i == 3)
1734 alu.last = 1;
1735 r = r600_bc_add_alu(ctx->bc, &alu);
1736 if (r)
1737 return r;
1738 }
1739
1740 for (i = 0; i < 4; i++) {
1741 memset(&alu, 0, sizeof(struct r600_bc_alu));
1742 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1743
1744 alu.src[0] = r600_src[0];
1745 switch (i) {
1746 case 0:
1747 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1748 break;
1749 case 1:
1750 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1751 break;
1752 case 2:
1753 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1754 break;
1755 case 3:
1756 alu.src[0].sel = V_SQ_ALU_SRC_0;
1757 alu.src[0].chan = i;
1758 }
1759
1760 alu.src[1] = r600_src[1];
1761 switch (i) {
1762 case 0:
1763 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1764 break;
1765 case 1:
1766 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1767 break;
1768 case 2:
1769 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1770 break;
1771 case 3:
1772 alu.src[1].sel = V_SQ_ALU_SRC_0;
1773 alu.src[1].chan = i;
1774 }
1775
1776 alu.src[2].sel = ctx->temp_reg;
1777 alu.src[2].neg = 1;
1778 alu.src[2].chan = i;
1779
1780 if (use_temp)
1781 alu.dst.sel = ctx->temp_reg;
1782 else {
1783 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1784 if (r)
1785 return r;
1786 }
1787 alu.dst.chan = i;
1788 alu.dst.write = 1;
1789 alu.is_op3 = 1;
1790 if (i == 3)
1791 alu.last = 1;
1792 r = r600_bc_add_alu(ctx->bc, &alu);
1793 if (r)
1794 return r;
1795 }
1796 if (use_temp)
1797 return tgsi_helper_copy(ctx, inst);
1798 return 0;
1799 }
1800
1801 static int tgsi_exp(struct r600_shader_ctx *ctx)
1802 {
1803 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1804 struct r600_bc_alu_src r600_src[3];
1805 struct r600_bc_alu alu;
1806 int r;
1807
1808 /* result.x = 2^floor(src); */
1809 if (inst->Dst[0].Register.WriteMask & 1) {
1810 memset(&alu, 0, sizeof(struct r600_bc_alu));
1811
1812 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1813 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1814 if (r)
1815 return r;
1816
1817 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1818
1819 alu.dst.sel = ctx->temp_reg;
1820 alu.dst.chan = 0;
1821 alu.dst.write = 1;
1822 alu.last = 1;
1823 r = r600_bc_add_alu(ctx->bc, &alu);
1824 if (r)
1825 return r;
1826
1827 r = r600_bc_add_literal(ctx->bc, ctx->value);
1828 if (r)
1829 return r;
1830
1831 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1832 alu.src[0].sel = ctx->temp_reg;
1833 alu.src[0].chan = 0;
1834
1835 alu.dst.sel = ctx->temp_reg;
1836 alu.dst.chan = 0;
1837 alu.dst.write = 1;
1838 alu.last = 1;
1839 r = r600_bc_add_alu(ctx->bc, &alu);
1840 if (r)
1841 return r;
1842
1843 r = r600_bc_add_literal(ctx->bc, ctx->value);
1844 if (r)
1845 return r;
1846 }
1847
1848 /* result.y = tmp - floor(tmp); */
1849 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1850 memset(&alu, 0, sizeof(struct r600_bc_alu));
1851
1852 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1853 alu.src[0] = r600_src[0];
1854 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1855 if (r)
1856 return r;
1857 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1858
1859 alu.dst.sel = ctx->temp_reg;
1860 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1861 // if (r)
1862 // return r;
1863 alu.dst.write = 1;
1864 alu.dst.chan = 1;
1865
1866 alu.last = 1;
1867
1868 r = r600_bc_add_alu(ctx->bc, &alu);
1869 if (r)
1870 return r;
1871 r = r600_bc_add_literal(ctx->bc, ctx->value);
1872 if (r)
1873 return r;
1874 }
1875
1876 /* result.z = RoughApprox2ToX(tmp);*/
1877 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1878 memset(&alu, 0, sizeof(struct r600_bc_alu));
1879 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1880 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1881 if (r)
1882 return r;
1883 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1884
1885 alu.dst.sel = ctx->temp_reg;
1886 alu.dst.write = 1;
1887 alu.dst.chan = 2;
1888
1889 alu.last = 1;
1890
1891 r = r600_bc_add_alu(ctx->bc, &alu);
1892 if (r)
1893 return r;
1894 r = r600_bc_add_literal(ctx->bc, ctx->value);
1895 if (r)
1896 return r;
1897 }
1898
1899 /* result.w = 1.0;*/
1900 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1901 memset(&alu, 0, sizeof(struct r600_bc_alu));
1902
1903 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1904 alu.src[0].sel = V_SQ_ALU_SRC_1;
1905 alu.src[0].chan = 0;
1906
1907 alu.dst.sel = ctx->temp_reg;
1908 alu.dst.chan = 3;
1909 alu.dst.write = 1;
1910 alu.last = 1;
1911 r = r600_bc_add_alu(ctx->bc, &alu);
1912 if (r)
1913 return r;
1914 r = r600_bc_add_literal(ctx->bc, ctx->value);
1915 if (r)
1916 return r;
1917 }
1918 return tgsi_helper_copy(ctx, inst);
1919 }
1920
1921 static int tgsi_arl(struct r600_shader_ctx *ctx)
1922 {
1923 /* TODO from r600c, ar values don't persist between clauses */
1924 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1925 struct r600_bc_alu alu;
1926 int r;
1927 memset(&alu, 0, sizeof(struct r600_bc_alu));
1928
1929 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
1930
1931 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1932 if (r)
1933 return r;
1934 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1935
1936 alu.last = 1;
1937
1938 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
1939 if (r)
1940 return r;
1941 return 0;
1942 }
1943
1944 static int tgsi_opdst(struct r600_shader_ctx *ctx)
1945 {
1946 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1947 struct r600_bc_alu alu;
1948 int i, r = 0;
1949
1950 for (i = 0; i < 4; i++) {
1951 memset(&alu, 0, sizeof(struct r600_bc_alu));
1952
1953 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1954 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1955 if (r)
1956 return r;
1957
1958 if (i == 0 || i == 3) {
1959 alu.src[0].sel = V_SQ_ALU_SRC_1;
1960 } else {
1961 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1962 if (r)
1963 return r;
1964 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1965 }
1966
1967 if (i == 0 || i == 2) {
1968 alu.src[1].sel = V_SQ_ALU_SRC_1;
1969 } else {
1970 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
1971 if (r)
1972 return r;
1973 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1974 }
1975 if (i == 3)
1976 alu.last = 1;
1977 r = r600_bc_add_alu(ctx->bc, &alu);
1978 if (r)
1979 return r;
1980 }
1981 return 0;
1982 }
1983
1984 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
1985 {
1986 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1987 struct r600_bc_alu alu;
1988 int r;
1989
1990 memset(&alu, 0, sizeof(struct r600_bc_alu));
1991 alu.inst = opcode;
1992 alu.predicate = 1;
1993
1994 alu.dst.sel = ctx->temp_reg;
1995 alu.dst.write = 1;
1996 alu.dst.chan = 0;
1997
1998 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1999 if (r)
2000 return r;
2001 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2002 alu.src[1].sel = V_SQ_ALU_SRC_0;
2003 alu.src[1].chan = 0;
2004
2005 alu.last = 1;
2006
2007 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
2008 if (r)
2009 return r;
2010 return 0;
2011 }
2012
2013 static int pops(struct r600_shader_ctx *ctx, int pops)
2014 {
2015 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
2016 ctx->bc->cf_last->pop_count = pops;
2017 return 0;
2018 }
2019
2020 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2021 {
2022 switch(reason) {
2023 case FC_PUSH_VPM:
2024 ctx->bc->callstack[ctx->bc->call_sp].current--;
2025 break;
2026 case FC_PUSH_WQM:
2027 case FC_LOOP:
2028 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2029 break;
2030 case FC_REP:
2031 /* TOODO : for 16 vp asic should -= 2; */
2032 ctx->bc->callstack[ctx->bc->call_sp].current --;
2033 break;
2034 }
2035 }
2036
2037 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2038 {
2039 if (check_max_only) {
2040 int diff;
2041 switch (reason) {
2042 case FC_PUSH_VPM:
2043 diff = 1;
2044 break;
2045 case FC_PUSH_WQM:
2046 diff = 4;
2047 break;
2048 }
2049 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2050 ctx->bc->callstack[ctx->bc->call_sp].max) {
2051 ctx->bc->callstack[ctx->bc->call_sp].max =
2052 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2053 }
2054 return;
2055 }
2056 switch (reason) {
2057 case FC_PUSH_VPM:
2058 ctx->bc->callstack[ctx->bc->call_sp].current++;
2059 break;
2060 case FC_PUSH_WQM:
2061 case FC_LOOP:
2062 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2063 break;
2064 case FC_REP:
2065 ctx->bc->callstack[ctx->bc->call_sp].current++;
2066 break;
2067 }
2068
2069 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2070 ctx->bc->callstack[ctx->bc->call_sp].max) {
2071 ctx->bc->callstack[ctx->bc->call_sp].max =
2072 ctx->bc->callstack[ctx->bc->call_sp].current;
2073 }
2074 }
2075
2076 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2077 {
2078 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2079
2080 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2081 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2082 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2083 sp->num_mid++;
2084 }
2085
2086 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2087 {
2088 ctx->bc->fc_sp++;
2089 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2090 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2091 }
2092
2093 static void fc_poplevel(struct r600_shader_ctx *ctx)
2094 {
2095 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2096 if (sp->mid) {
2097 free(sp->mid);
2098 sp->mid = NULL;
2099 }
2100 sp->num_mid = 0;
2101 sp->start = NULL;
2102 sp->type = 0;
2103 ctx->bc->fc_sp--;
2104 }
2105
2106 #if 0
2107 static int emit_return(struct r600_shader_ctx *ctx)
2108 {
2109 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2110 return 0;
2111 }
2112
2113 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2114 {
2115
2116 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2117 ctx->bc->cf_last->pop_count = pops;
2118 /* TODO work out offset */
2119 return 0;
2120 }
2121
2122 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2123 {
2124 return 0;
2125 }
2126
2127 static void emit_testflag(struct r600_shader_ctx *ctx)
2128 {
2129
2130 }
2131
2132 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2133 {
2134 emit_testflag(ctx);
2135 emit_jump_to_offset(ctx, 1, 4);
2136 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2137 pops(ctx, ifidx + 1);
2138 emit_return(ctx);
2139 }
2140
2141 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2142 {
2143 emit_testflag(ctx);
2144
2145 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2146 ctx->bc->cf_last->pop_count = 1;
2147
2148 fc_set_mid(ctx, fc_sp);
2149
2150 pops(ctx, 1);
2151 }
2152 #endif
2153
2154 static int tgsi_if(struct r600_shader_ctx *ctx)
2155 {
2156 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
2157
2158 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2159
2160 fc_pushlevel(ctx, FC_IF);
2161
2162 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2163 return 0;
2164 }
2165
2166 static int tgsi_else(struct r600_shader_ctx *ctx)
2167 {
2168 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
2169 ctx->bc->cf_last->pop_count = 1;
2170
2171 fc_set_mid(ctx, ctx->bc->fc_sp);
2172 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2173 return 0;
2174 }
2175
2176 static int tgsi_endif(struct r600_shader_ctx *ctx)
2177 {
2178 pops(ctx, 1);
2179 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2180 R600_ERR("if/endif unbalanced in shader\n");
2181 return -1;
2182 }
2183
2184 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2185 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2186 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2187 } else {
2188 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2189 }
2190 fc_poplevel(ctx);
2191
2192 callstack_decrease_current(ctx, FC_PUSH_VPM);
2193 return 0;
2194 }
2195
2196 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2197 {
2198 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
2199
2200 fc_pushlevel(ctx, FC_LOOP);
2201
2202 /* check stack depth */
2203 callstack_check_depth(ctx, FC_LOOP, 0);
2204 return 0;
2205 }
2206
2207 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2208 {
2209 int i;
2210
2211 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
2212
2213 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2214 R600_ERR("loop/endloop in shader code are not paired.\n");
2215 return -EINVAL;
2216 }
2217
2218 /* fixup loop pointers - from r600isa
2219 LOOP END points to CF after LOOP START,
2220 LOOP START point to CF after LOOP END
2221 BRK/CONT point to LOOP END CF
2222 */
2223 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2224
2225 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2226
2227 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2228 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2229 }
2230 /* TODO add LOOPRET support */
2231 fc_poplevel(ctx);
2232 callstack_decrease_current(ctx, FC_LOOP);
2233 return 0;
2234 }
2235
2236 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2237 {
2238 unsigned int fscp;
2239
2240 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2241 {
2242 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2243 break;
2244 }
2245
2246 if (fscp == 0) {
2247 R600_ERR("Break not inside loop/endloop pair\n");
2248 return -EINVAL;
2249 }
2250
2251 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2252 ctx->bc->cf_last->pop_count = 1;
2253
2254 fc_set_mid(ctx, fscp);
2255
2256 pops(ctx, 1);
2257 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2258 return 0;
2259 }
2260
2261 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2262 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2263 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2264 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2265 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2266 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2267 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2268 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2269 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2270 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2271 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2272 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2273 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2274 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2275 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2276 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2277 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2278 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2279 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2280 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2281 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2282 /* gap */
2283 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2284 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2285 /* gap */
2286 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2287 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2288 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2289 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2290 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2291 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2292 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2293 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2294 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2295 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2296 /* gap */
2297 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2298 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2299 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2300 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2301 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2302 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2303 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2304 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2305 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2306 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2307 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2308 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2309 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2310 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2311 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2312 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2313 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2314 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2315 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2316 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2317 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2318 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2319 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2320 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2321 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2322 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2323 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2324 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2325 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2326 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2327 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2328 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2329 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2330 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2331 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2332 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2333 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2334 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2335 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2336 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2337 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2338 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2339 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2340 /* gap */
2341 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2342 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2343 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2344 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2345 /* gap */
2346 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2347 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2348 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2349 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2350 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2351 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2352 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2353 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2354 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2355 /* gap */
2356 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2357 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2358 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2359 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2360 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2361 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2362 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2363 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2364 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2365 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2366 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2367 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2368 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2369 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2370 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2371 /* gap */
2372 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2373 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2374 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2375 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2376 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2377 /* gap */
2378 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2379 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2380 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2381 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2382 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2383 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2384 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2385 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2386 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2387 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2388 /* gap */
2389 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2390 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2391 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2392 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2393 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2394 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2395 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2396 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2397 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2398 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2399 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2400 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2401 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2402 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2403 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2404 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2405 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2406 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2407 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2408 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2409 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2410 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2411 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2412 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2413 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2414 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2415 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2416 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2417 };