r600g: add support for constants in memory buffers.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 u32 *literals;
52 u32 nliterals;
53 u32 max_driver_temp_used;
54 };
55
56 struct r600_shader_tgsi_instruction {
57 unsigned tgsi_opcode;
58 unsigned is_op3;
59 unsigned r600_opcode;
60 int (*process)(struct r600_shader_ctx *ctx);
61 };
62
63 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
64 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
65
66 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
67 {
68 struct r600_context *rctx = r600_context(ctx);
69 const struct util_format_description *desc;
70 enum pipe_format resource_format[160];
71 unsigned i, nresources = 0;
72 struct r600_bc *bc = &shader->bc;
73 struct r600_bc_cf *cf;
74 struct r600_bc_vtx *vtx;
75
76 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
77 return 0;
78 for (i = 0; i < rctx->vertex_elements->count; i++) {
79 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
80 }
81 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
82 switch (cf->inst) {
83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
85 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
86 desc = util_format_description(resource_format[vtx->buffer_id]);
87 if (desc == NULL) {
88 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
89 return -EINVAL;
90 }
91 vtx->dst_sel_x = desc->swizzle[0];
92 vtx->dst_sel_y = desc->swizzle[1];
93 vtx->dst_sel_z = desc->swizzle[2];
94 vtx->dst_sel_w = desc->swizzle[3];
95 }
96 break;
97 default:
98 break;
99 }
100 }
101 return r600_bc_build(&shader->bc);
102 }
103
104 int r600_pipe_shader_create(struct pipe_context *ctx,
105 struct r600_context_state *rpshader,
106 const struct tgsi_token *tokens)
107 {
108 struct r600_screen *rscreen = r600_screen(ctx->screen);
109 int r;
110
111 //fprintf(stderr, "--------------------------------------------------------------\n");
112 //tgsi_dump(tokens, 0);
113 if (rpshader == NULL)
114 return -ENOMEM;
115 rpshader->shader.family = radeon_get_family(rscreen->rw);
116 rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
117 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
118 if (r) {
119 R600_ERR("translation from TGSI failed !\n");
120 return r;
121 }
122 r = r600_bc_build(&rpshader->shader.bc);
123 if (r) {
124 R600_ERR("building bytecode failed !\n");
125 return r;
126 }
127 //fprintf(stderr, "______________________________________________________________\n");
128 return 0;
129 }
130
131 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
132 {
133 struct r600_context *rctx = r600_context(ctx);
134 struct radeon_state *state;
135
136 state = &rpshader->rstate[0];
137 radeon_state_fini(&rpshader->rstate[0]);
138
139 return rctx->vtbl->vs_shader(rctx, rpshader, state);
140 }
141
142 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
143 {
144 struct r600_context *rctx = r600_context(ctx);
145 struct radeon_state *state;
146
147 state = &rpshader->rstate[0];
148 radeon_state_fini(state);
149
150 return rctx->vtbl->ps_shader(rctx, rpshader, state);
151 }
152
153 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
154 {
155 struct r600_screen *rscreen = r600_screen(ctx->screen);
156 struct r600_context *rctx = r600_context(ctx);
157 struct r600_shader *rshader = &rpshader->shader;
158 int r;
159
160 /* copy new shader */
161 radeon_bo_decref(rscreen->rw, rpshader->bo);
162 rpshader->bo = NULL;
163 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
164 4096, NULL);
165 if (rpshader->bo == NULL) {
166 return -ENOMEM;
167 }
168 radeon_bo_map(rscreen->rw, rpshader->bo);
169 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
170 radeon_bo_unmap(rscreen->rw, rpshader->bo);
171 /* build state */
172 rshader->flat_shade = rctx->flat_shade;
173 switch (rshader->processor_type) {
174 case TGSI_PROCESSOR_VERTEX:
175 r = r600_pipe_shader_vs(ctx, rpshader);
176 break;
177 case TGSI_PROCESSOR_FRAGMENT:
178 r = r600_pipe_shader_ps(ctx, rpshader);
179 break;
180 default:
181 r = -EINVAL;
182 break;
183 }
184 return r;
185 }
186
187 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
188 {
189 struct r600_context *rctx = r600_context(ctx);
190 int r;
191
192 if (rpshader == NULL)
193 return -EINVAL;
194 /* there should be enough input */
195 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
196 R600_ERR("%d resources provided, expecting %d\n",
197 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
198 return -EINVAL;
199 }
200 r = r600_shader_update(ctx, &rpshader->shader);
201 if (r)
202 return r;
203 return r600_pipe_shader(ctx, rpshader);
204 }
205
206 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
207 {
208 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
209 int j;
210
211 if (i->Instruction.NumDstRegs > 1) {
212 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
213 return -EINVAL;
214 }
215 if (i->Instruction.Predicate) {
216 R600_ERR("predicate unsupported\n");
217 return -EINVAL;
218 }
219 #if 0
220 if (i->Instruction.Label) {
221 R600_ERR("label unsupported\n");
222 return -EINVAL;
223 }
224 #endif
225 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
226 if (i->Src[j].Register.Dimension ||
227 i->Src[j].Register.Absolute) {
228 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
229 i->Src[j].Register.Dimension,
230 i->Src[j].Register.Absolute);
231 return -EINVAL;
232 }
233 }
234 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
235 if (i->Dst[j].Register.Dimension) {
236 R600_ERR("unsupported dst (dimension)\n");
237 return -EINVAL;
238 }
239 }
240 return 0;
241 }
242
243 static int tgsi_declaration(struct r600_shader_ctx *ctx)
244 {
245 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
246 struct r600_bc_vtx vtx;
247 unsigned i;
248 int r;
249
250 switch (d->Declaration.File) {
251 case TGSI_FILE_INPUT:
252 i = ctx->shader->ninput++;
253 ctx->shader->input[i].name = d->Semantic.Name;
254 ctx->shader->input[i].sid = d->Semantic.Index;
255 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
256 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
257 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
258 /* turn input into fetch */
259 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
260 vtx.inst = 0;
261 vtx.fetch_type = 0;
262 vtx.buffer_id = i;
263 /* register containing the index into the buffer */
264 vtx.src_gpr = 0;
265 vtx.src_sel_x = 0;
266 vtx.mega_fetch_count = 0x1F;
267 vtx.dst_gpr = ctx->shader->input[i].gpr;
268 vtx.dst_sel_x = 0;
269 vtx.dst_sel_y = 1;
270 vtx.dst_sel_z = 2;
271 vtx.dst_sel_w = 3;
272 r = r600_bc_add_vtx(ctx->bc, &vtx);
273 if (r)
274 return r;
275 }
276 break;
277 case TGSI_FILE_OUTPUT:
278 i = ctx->shader->noutput++;
279 ctx->shader->output[i].name = d->Semantic.Name;
280 ctx->shader->output[i].sid = d->Semantic.Index;
281 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
282 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
283 break;
284 case TGSI_FILE_CONSTANT:
285 case TGSI_FILE_TEMPORARY:
286 case TGSI_FILE_SAMPLER:
287 case TGSI_FILE_ADDRESS:
288 break;
289 default:
290 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
291 return -EINVAL;
292 }
293 return 0;
294 }
295
296 static int r600_get_temp(struct r600_shader_ctx *ctx)
297 {
298 return ctx->temp_reg + ctx->max_driver_temp_used++;
299 }
300
301 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
302 {
303 struct tgsi_full_immediate *immediate;
304 struct r600_shader_ctx ctx;
305 struct r600_bc_output output[32];
306 unsigned output_done, noutput;
307 unsigned opcode;
308 int i, r = 0, pos0;
309
310 ctx.bc = &shader->bc;
311 ctx.shader = shader;
312 r = r600_bc_init(ctx.bc, shader->family);
313 if (r)
314 return r;
315 ctx.bc->use_mem_constant = shader->use_mem_constant;
316 ctx.tokens = tokens;
317 tgsi_scan_shader(tokens, &ctx.info);
318 tgsi_parse_init(&ctx.parse, tokens);
319 ctx.type = ctx.parse.FullHeader.Processor.Processor;
320 shader->processor_type = ctx.type;
321
322 /* register allocations */
323 /* Values [0,127] correspond to GPR[0..127].
324 * Values [128,159] correspond to constant buffer bank 0
325 * Values [160,191] correspond to constant buffer bank 1
326 * Values [256,511] correspond to cfile constants c[0..255].
327 * Other special values are shown in the list below.
328 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
329 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
330 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
331 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
332 * 248 SQ_ALU_SRC_0: special constant 0.0.
333 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
334 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
335 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
336 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
337 * 253 SQ_ALU_SRC_LITERAL: literal constant.
338 * 254 SQ_ALU_SRC_PV: previous vector result.
339 * 255 SQ_ALU_SRC_PS: previous scalar result.
340 */
341 for (i = 0; i < TGSI_FILE_COUNT; i++) {
342 ctx.file_offset[i] = 0;
343 }
344 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
345 ctx.file_offset[TGSI_FILE_INPUT] = 1;
346 }
347 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
348 ctx.info.file_count[TGSI_FILE_INPUT];
349 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
350 ctx.info.file_count[TGSI_FILE_OUTPUT];
351 if (ctx.shader->use_mem_constant)
352 ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
353 else
354 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
355
356 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
357 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
358 ctx.info.file_count[TGSI_FILE_TEMPORARY];
359
360 ctx.nliterals = 0;
361 ctx.literals = NULL;
362
363 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
364 tgsi_parse_token(&ctx.parse);
365 switch (ctx.parse.FullToken.Token.Type) {
366 case TGSI_TOKEN_TYPE_IMMEDIATE:
367 immediate = &ctx.parse.FullToken.FullImmediate;
368 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
369 if(ctx.literals == NULL) {
370 r = -ENOMEM;
371 goto out_err;
372 }
373 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
374 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
375 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
376 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
377 ctx.nliterals++;
378 break;
379 case TGSI_TOKEN_TYPE_DECLARATION:
380 r = tgsi_declaration(&ctx);
381 if (r)
382 goto out_err;
383 break;
384 case TGSI_TOKEN_TYPE_INSTRUCTION:
385 r = tgsi_is_supported(&ctx);
386 if (r)
387 goto out_err;
388 ctx.max_driver_temp_used = 0;
389 /* reserve first tmp for everyone */
390 r600_get_temp(&ctx);
391 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
392 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
393 r = ctx.inst_info->process(&ctx);
394 if (r)
395 goto out_err;
396 r = r600_bc_add_literal(ctx.bc, ctx.value);
397 if (r)
398 goto out_err;
399 break;
400 default:
401 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
402 r = -EINVAL;
403 goto out_err;
404 }
405 }
406 /* export output */
407 noutput = shader->noutput;
408 for (i = 0, pos0 = 0; i < noutput; i++) {
409 memset(&output[i], 0, sizeof(struct r600_bc_output));
410 output[i].gpr = shader->output[i].gpr;
411 output[i].elem_size = 3;
412 output[i].swizzle_x = 0;
413 output[i].swizzle_y = 1;
414 output[i].swizzle_z = 2;
415 output[i].swizzle_w = 3;
416 output[i].barrier = 1;
417 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
418 output[i].array_base = i - pos0;
419 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
420 switch (ctx.type) {
421 case TGSI_PROCESSOR_VERTEX:
422 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
423 output[i].array_base = 60;
424 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
425 /* position doesn't count in array_base */
426 pos0++;
427 }
428 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
429 output[i].array_base = 61;
430 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
431 /* position doesn't count in array_base */
432 pos0++;
433 }
434 break;
435 case TGSI_PROCESSOR_FRAGMENT:
436 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
437 output[i].array_base = shader->output[i].sid;
438 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
439 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
440 output[i].array_base = 61;
441 output[i].swizzle_x = 2;
442 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
443 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
444 } else {
445 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
446 r = -EINVAL;
447 goto out_err;
448 }
449 break;
450 default:
451 R600_ERR("unsupported processor type %d\n", ctx.type);
452 r = -EINVAL;
453 goto out_err;
454 }
455 }
456 /* add fake param output for vertex shader if no param is exported */
457 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
458 for (i = 0, pos0 = 0; i < noutput; i++) {
459 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
460 pos0 = 1;
461 break;
462 }
463 }
464 if (!pos0) {
465 memset(&output[i], 0, sizeof(struct r600_bc_output));
466 output[i].gpr = 0;
467 output[i].elem_size = 3;
468 output[i].swizzle_x = 0;
469 output[i].swizzle_y = 1;
470 output[i].swizzle_z = 2;
471 output[i].swizzle_w = 3;
472 output[i].barrier = 1;
473 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
474 output[i].array_base = 0;
475 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
476 noutput++;
477 }
478 }
479 /* add fake pixel export */
480 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
481 memset(&output[0], 0, sizeof(struct r600_bc_output));
482 output[0].gpr = 0;
483 output[0].elem_size = 3;
484 output[0].swizzle_x = 7;
485 output[0].swizzle_y = 7;
486 output[0].swizzle_z = 7;
487 output[0].swizzle_w = 7;
488 output[0].barrier = 1;
489 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
490 output[0].array_base = 0;
491 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
492 noutput++;
493 }
494 /* set export done on last export of each type */
495 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
496 if (i == (noutput - 1)) {
497 output[i].end_of_program = 1;
498 }
499 if (!(output_done & (1 << output[i].type))) {
500 output_done |= (1 << output[i].type);
501 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
502 }
503 }
504 /* add output to bytecode */
505 for (i = 0; i < noutput; i++) {
506 r = r600_bc_add_output(ctx.bc, &output[i]);
507 if (r)
508 goto out_err;
509 }
510 free(ctx.literals);
511 tgsi_parse_free(&ctx.parse);
512 return 0;
513 out_err:
514 free(ctx.literals);
515 tgsi_parse_free(&ctx.parse);
516 return r;
517 }
518
519 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
520 {
521 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
522 return -EINVAL;
523 }
524
525 static int tgsi_end(struct r600_shader_ctx *ctx)
526 {
527 return 0;
528 }
529
530 static int tgsi_src(struct r600_shader_ctx *ctx,
531 const struct tgsi_full_src_register *tgsi_src,
532 struct r600_bc_alu_src *r600_src)
533 {
534 int index;
535 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
536 r600_src->sel = tgsi_src->Register.Index;
537 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
538 r600_src->sel = 0;
539 index = tgsi_src->Register.Index;
540 ctx->value[0] = ctx->literals[index * 4 + 0];
541 ctx->value[1] = ctx->literals[index * 4 + 1];
542 ctx->value[2] = ctx->literals[index * 4 + 2];
543 ctx->value[3] = ctx->literals[index * 4 + 3];
544 }
545 if (tgsi_src->Register.Indirect)
546 r600_src->rel = V_SQ_REL_RELATIVE;
547 r600_src->neg = tgsi_src->Register.Negate;
548 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
549 return 0;
550 }
551
552 static int tgsi_dst(struct r600_shader_ctx *ctx,
553 const struct tgsi_full_dst_register *tgsi_dst,
554 unsigned swizzle,
555 struct r600_bc_alu_dst *r600_dst)
556 {
557 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
558
559 r600_dst->sel = tgsi_dst->Register.Index;
560 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
561 r600_dst->chan = swizzle;
562 r600_dst->write = 1;
563 if (tgsi_dst->Register.Indirect)
564 r600_dst->rel = V_SQ_REL_RELATIVE;
565 if (inst->Instruction.Saturate) {
566 r600_dst->clamp = 1;
567 }
568 return 0;
569 }
570
571 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
572 {
573 switch (swizzle) {
574 case 0:
575 return tgsi_src->Register.SwizzleX;
576 case 1:
577 return tgsi_src->Register.SwizzleY;
578 case 2:
579 return tgsi_src->Register.SwizzleZ;
580 case 3:
581 return tgsi_src->Register.SwizzleW;
582 default:
583 return 0;
584 }
585 }
586
587 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
588 {
589 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
590 struct r600_bc_alu alu;
591 int i, j, k, nconst, r;
592
593 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
594 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
595 nconst++;
596 }
597 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
598 if (r) {
599 return r;
600 }
601 }
602 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
603 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
604 int treg = r600_get_temp(ctx);
605 for (k = 0; k < 4; k++) {
606 memset(&alu, 0, sizeof(struct r600_bc_alu));
607 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
608 alu.src[0].sel = r600_src[j].sel;
609 alu.src[0].chan = k;
610 alu.dst.sel = treg;
611 alu.dst.chan = k;
612 alu.dst.write = 1;
613 if (k == 3)
614 alu.last = 1;
615 r = r600_bc_add_alu(ctx->bc, &alu);
616 if (r)
617 return r;
618 }
619 r600_src[j].sel = treg;
620 j--;
621 }
622 }
623 return 0;
624 }
625
626 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
627 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
628 {
629 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
630 struct r600_bc_alu alu;
631 int i, j, k, nliteral, r;
632
633 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
634 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
635 nliteral++;
636 }
637 }
638 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
639 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
640 int treg = r600_get_temp(ctx);
641 for (k = 0; k < 4; k++) {
642 memset(&alu, 0, sizeof(struct r600_bc_alu));
643 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
644 alu.src[0].sel = r600_src[j].sel;
645 alu.src[0].chan = k;
646 alu.dst.sel = treg;
647 alu.dst.chan = k;
648 alu.dst.write = 1;
649 if (k == 3)
650 alu.last = 1;
651 r = r600_bc_add_alu(ctx->bc, &alu);
652 if (r)
653 return r;
654 }
655 r = r600_bc_add_literal(ctx->bc, ctx->value);
656 if (r)
657 return r;
658 r600_src[j].sel = treg;
659 j++;
660 }
661 }
662 return 0;
663 }
664
665 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
666 {
667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
668 struct r600_bc_alu_src r600_src[3];
669 struct r600_bc_alu alu;
670 int i, j, r;
671 int lasti = 0;
672
673 for (i = 0; i < 4; i++) {
674 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
675 lasti = i;
676 }
677 }
678
679 r = tgsi_split_constant(ctx, r600_src);
680 if (r)
681 return r;
682 for (i = 0; i < lasti + 1; i++) {
683 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
684 continue;
685
686 memset(&alu, 0, sizeof(struct r600_bc_alu));
687 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
688 if (r)
689 return r;
690
691 alu.inst = ctx->inst_info->r600_opcode;
692 if (!swap) {
693 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
694 alu.src[j] = r600_src[j];
695 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
696 }
697 } else {
698 alu.src[0] = r600_src[1];
699 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
700
701 alu.src[1] = r600_src[0];
702 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
703 }
704 /* handle some special cases */
705 switch (ctx->inst_info->tgsi_opcode) {
706 case TGSI_OPCODE_SUB:
707 alu.src[1].neg = 1;
708 break;
709 case TGSI_OPCODE_ABS:
710 alu.src[0].abs = 1;
711 break;
712 default:
713 break;
714 }
715 if (i == lasti) {
716 alu.last = 1;
717 }
718 r = r600_bc_add_alu(ctx->bc, &alu);
719 if (r)
720 return r;
721 }
722 return 0;
723 }
724
725 static int tgsi_op2(struct r600_shader_ctx *ctx)
726 {
727 return tgsi_op2_s(ctx, 0);
728 }
729
730 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
731 {
732 return tgsi_op2_s(ctx, 1);
733 }
734
735 /*
736 * r600 - trunc to -PI..PI range
737 * r700 - normalize by dividing by 2PI
738 * see fdo bug 27901
739 */
740 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
741 struct r600_bc_alu_src r600_src[3])
742 {
743 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
744 int r;
745 uint32_t lit_vals[4];
746 struct r600_bc_alu alu;
747
748 memset(lit_vals, 0, 4*4);
749 r = tgsi_split_constant(ctx, r600_src);
750 if (r)
751 return r;
752
753 r = tgsi_split_literal_constant(ctx, r600_src);
754 if (r)
755 return r;
756
757 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
758 lit_vals[1] = fui(0.5f);
759
760 memset(&alu, 0, sizeof(struct r600_bc_alu));
761 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
762 alu.is_op3 = 1;
763
764 alu.dst.chan = 0;
765 alu.dst.sel = ctx->temp_reg;
766 alu.dst.write = 1;
767
768 alu.src[0] = r600_src[0];
769 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
770
771 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
772 alu.src[1].chan = 0;
773 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
774 alu.src[2].chan = 1;
775 alu.last = 1;
776 r = r600_bc_add_alu(ctx->bc, &alu);
777 if (r)
778 return r;
779 r = r600_bc_add_literal(ctx->bc, lit_vals);
780 if (r)
781 return r;
782
783 memset(&alu, 0, sizeof(struct r600_bc_alu));
784 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
785
786 alu.dst.chan = 0;
787 alu.dst.sel = ctx->temp_reg;
788 alu.dst.write = 1;
789
790 alu.src[0].sel = ctx->temp_reg;
791 alu.src[0].chan = 0;
792 alu.last = 1;
793 r = r600_bc_add_alu(ctx->bc, &alu);
794 if (r)
795 return r;
796
797 if (ctx->bc->chiprev == 0) {
798 lit_vals[0] = fui(3.1415926535897f * 2.0f);
799 lit_vals[1] = fui(-3.1415926535897f);
800 } else {
801 lit_vals[0] = fui(1.0f);
802 lit_vals[1] = fui(-0.5f);
803 }
804
805 memset(&alu, 0, sizeof(struct r600_bc_alu));
806 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
807 alu.is_op3 = 1;
808
809 alu.dst.chan = 0;
810 alu.dst.sel = ctx->temp_reg;
811 alu.dst.write = 1;
812
813 alu.src[0].sel = ctx->temp_reg;
814 alu.src[0].chan = 0;
815
816 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
817 alu.src[1].chan = 0;
818 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
819 alu.src[2].chan = 1;
820 alu.last = 1;
821 r = r600_bc_add_alu(ctx->bc, &alu);
822 if (r)
823 return r;
824 r = r600_bc_add_literal(ctx->bc, lit_vals);
825 if (r)
826 return r;
827 return 0;
828 }
829
830 static int tgsi_trig(struct r600_shader_ctx *ctx)
831 {
832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
833 struct r600_bc_alu_src r600_src[3];
834 struct r600_bc_alu alu;
835 int i, r;
836 int lasti = 0;
837
838 r = tgsi_setup_trig(ctx, r600_src);
839 if (r)
840 return r;
841
842 memset(&alu, 0, sizeof(struct r600_bc_alu));
843 alu.inst = ctx->inst_info->r600_opcode;
844 alu.dst.chan = 0;
845 alu.dst.sel = ctx->temp_reg;
846 alu.dst.write = 1;
847
848 alu.src[0].sel = ctx->temp_reg;
849 alu.src[0].chan = 0;
850 alu.last = 1;
851 r = r600_bc_add_alu(ctx->bc, &alu);
852 if (r)
853 return r;
854
855 /* replicate result */
856 for (i = 0; i < 4; i++) {
857 if (inst->Dst[0].Register.WriteMask & (1 << i))
858 lasti = i;
859 }
860 for (i = 0; i < lasti + 1; i++) {
861 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
862 continue;
863
864 memset(&alu, 0, sizeof(struct r600_bc_alu));
865 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
866
867 alu.src[0].sel = ctx->temp_reg;
868 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
869 if (r)
870 return r;
871 if (i == lasti)
872 alu.last = 1;
873 r = r600_bc_add_alu(ctx->bc, &alu);
874 if (r)
875 return r;
876 }
877 return 0;
878 }
879
880 static int tgsi_scs(struct r600_shader_ctx *ctx)
881 {
882 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
883 struct r600_bc_alu_src r600_src[3];
884 struct r600_bc_alu alu;
885 int r;
886
887 r = tgsi_setup_trig(ctx, r600_src);
888 if (r)
889 return r;
890
891
892 /* dst.x = COS */
893 memset(&alu, 0, sizeof(struct r600_bc_alu));
894 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
895 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
896 if (r)
897 return r;
898
899 alu.src[0].sel = ctx->temp_reg;
900 alu.src[0].chan = 0;
901 alu.last = 1;
902 r = r600_bc_add_alu(ctx->bc, &alu);
903 if (r)
904 return r;
905
906 /* dst.y = SIN */
907 memset(&alu, 0, sizeof(struct r600_bc_alu));
908 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
909 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
910 if (r)
911 return r;
912
913 alu.src[0].sel = ctx->temp_reg;
914 alu.src[0].chan = 0;
915 alu.last = 1;
916 r = r600_bc_add_alu(ctx->bc, &alu);
917 if (r)
918 return r;
919 return 0;
920 }
921
922 static int tgsi_kill(struct r600_shader_ctx *ctx)
923 {
924 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
925 struct r600_bc_alu alu;
926 int i, r;
927
928 for (i = 0; i < 4; i++) {
929 memset(&alu, 0, sizeof(struct r600_bc_alu));
930 alu.inst = ctx->inst_info->r600_opcode;
931
932 alu.dst.chan = i;
933
934 alu.src[0].sel = V_SQ_ALU_SRC_0;
935
936 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
937 alu.src[1].sel = V_SQ_ALU_SRC_1;
938 alu.src[1].neg = 1;
939 } else {
940 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
941 if (r)
942 return r;
943 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
944 }
945 if (i == 3) {
946 alu.last = 1;
947 }
948 r = r600_bc_add_alu(ctx->bc, &alu);
949 if (r)
950 return r;
951 }
952 r = r600_bc_add_literal(ctx->bc, ctx->value);
953 if (r)
954 return r;
955
956 /* kill must be last in ALU */
957 ctx->bc->force_add_cf = 1;
958 ctx->shader->uses_kill = TRUE;
959 return 0;
960 }
961
962 static int tgsi_lit(struct r600_shader_ctx *ctx)
963 {
964 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
965 struct r600_bc_alu alu;
966 struct r600_bc_alu_src r600_src[3];
967 int r;
968
969 r = tgsi_split_constant(ctx, r600_src);
970 if (r)
971 return r;
972 r = tgsi_split_literal_constant(ctx, r600_src);
973 if (r)
974 return r;
975
976 /* dst.x, <- 1.0 */
977 memset(&alu, 0, sizeof(struct r600_bc_alu));
978 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
979 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
980 alu.src[0].chan = 0;
981 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
982 if (r)
983 return r;
984 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
985 r = r600_bc_add_alu(ctx->bc, &alu);
986 if (r)
987 return r;
988
989 /* dst.y = max(src.x, 0.0) */
990 memset(&alu, 0, sizeof(struct r600_bc_alu));
991 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
992 alu.src[0] = r600_src[0];
993 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
994 alu.src[1].chan = 0;
995 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
996 if (r)
997 return r;
998 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
999 r = r600_bc_add_alu(ctx->bc, &alu);
1000 if (r)
1001 return r;
1002
1003 /* dst.w, <- 1.0 */
1004 memset(&alu, 0, sizeof(struct r600_bc_alu));
1005 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1006 alu.src[0].sel = V_SQ_ALU_SRC_1;
1007 alu.src[0].chan = 0;
1008 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1009 if (r)
1010 return r;
1011 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1012 alu.last = 1;
1013 r = r600_bc_add_alu(ctx->bc, &alu);
1014 if (r)
1015 return r;
1016
1017 r = r600_bc_add_literal(ctx->bc, ctx->value);
1018 if (r)
1019 return r;
1020
1021 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1022 {
1023 int chan;
1024 int sel;
1025
1026 /* dst.z = log(src.y) */
1027 memset(&alu, 0, sizeof(struct r600_bc_alu));
1028 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
1029 alu.src[0] = r600_src[0];
1030 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1031 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1032 if (r)
1033 return r;
1034 alu.last = 1;
1035 r = r600_bc_add_alu(ctx->bc, &alu);
1036 if (r)
1037 return r;
1038
1039 r = r600_bc_add_literal(ctx->bc, ctx->value);
1040 if (r)
1041 return r;
1042
1043 chan = alu.dst.chan;
1044 sel = alu.dst.sel;
1045
1046 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1047 memset(&alu, 0, sizeof(struct r600_bc_alu));
1048 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
1049 alu.src[0] = r600_src[0];
1050 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1051 alu.src[1].sel = sel;
1052 alu.src[1].chan = chan;
1053
1054 alu.src[2] = r600_src[0];
1055 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1056 alu.dst.sel = ctx->temp_reg;
1057 alu.dst.chan = 0;
1058 alu.dst.write = 1;
1059 alu.is_op3 = 1;
1060 alu.last = 1;
1061 r = r600_bc_add_alu(ctx->bc, &alu);
1062 if (r)
1063 return r;
1064
1065 r = r600_bc_add_literal(ctx->bc, ctx->value);
1066 if (r)
1067 return r;
1068 /* dst.z = exp(tmp.x) */
1069 memset(&alu, 0, sizeof(struct r600_bc_alu));
1070 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1071 alu.src[0].sel = ctx->temp_reg;
1072 alu.src[0].chan = 0;
1073 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1074 if (r)
1075 return r;
1076 alu.last = 1;
1077 r = r600_bc_add_alu(ctx->bc, &alu);
1078 if (r)
1079 return r;
1080 }
1081 return 0;
1082 }
1083
1084 static int tgsi_trans(struct r600_shader_ctx *ctx)
1085 {
1086 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1087 struct r600_bc_alu alu;
1088 int i, j, r;
1089
1090 for (i = 0; i < 4; i++) {
1091 memset(&alu, 0, sizeof(struct r600_bc_alu));
1092 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1093 alu.inst = ctx->inst_info->r600_opcode;
1094 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1095 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1096 if (r)
1097 return r;
1098 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1099 }
1100 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1101 if (r)
1102 return r;
1103 alu.last = 1;
1104 r = r600_bc_add_alu(ctx->bc, &alu);
1105 if (r)
1106 return r;
1107 }
1108 }
1109 return 0;
1110 }
1111
1112 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1113 {
1114 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1115 struct r600_bc_alu alu;
1116 int i, r;
1117
1118 for (i = 0; i < 4; i++) {
1119 memset(&alu, 0, sizeof(struct r600_bc_alu));
1120 alu.src[0].sel = ctx->temp_reg;
1121 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1122 alu.dst.chan = i;
1123 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1124 if (r)
1125 return r;
1126 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1127 if (i == 3)
1128 alu.last = 1;
1129 r = r600_bc_add_alu(ctx->bc, &alu);
1130 if (r)
1131 return r;
1132 }
1133 return 0;
1134 }
1135
1136 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1137 {
1138 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1139 struct r600_bc_alu alu;
1140 int i, r;
1141
1142 memset(&alu, 0, sizeof(struct r600_bc_alu));
1143 alu.inst = ctx->inst_info->r600_opcode;
1144 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1145 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1146 if (r)
1147 return r;
1148 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1149 }
1150 alu.dst.sel = ctx->temp_reg;
1151 alu.dst.write = 1;
1152 alu.last = 1;
1153 r = r600_bc_add_alu(ctx->bc, &alu);
1154 if (r)
1155 return r;
1156 r = r600_bc_add_literal(ctx->bc, ctx->value);
1157 if (r)
1158 return r;
1159 /* replicate result */
1160 return tgsi_helper_tempx_replicate(ctx);
1161 }
1162
1163 static int tgsi_pow(struct r600_shader_ctx *ctx)
1164 {
1165 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1166 struct r600_bc_alu alu;
1167 int r;
1168
1169 /* LOG2(a) */
1170 memset(&alu, 0, sizeof(struct r600_bc_alu));
1171 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1172 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1173 if (r)
1174 return r;
1175 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1176 alu.dst.sel = ctx->temp_reg;
1177 alu.dst.write = 1;
1178 alu.last = 1;
1179 r = r600_bc_add_alu(ctx->bc, &alu);
1180 if (r)
1181 return r;
1182 r = r600_bc_add_literal(ctx->bc,ctx->value);
1183 if (r)
1184 return r;
1185 /* b * LOG2(a) */
1186 memset(&alu, 0, sizeof(struct r600_bc_alu));
1187 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1188 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1189 if (r)
1190 return r;
1191 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1192 alu.src[1].sel = ctx->temp_reg;
1193 alu.dst.sel = ctx->temp_reg;
1194 alu.dst.write = 1;
1195 alu.last = 1;
1196 r = r600_bc_add_alu(ctx->bc, &alu);
1197 if (r)
1198 return r;
1199 r = r600_bc_add_literal(ctx->bc,ctx->value);
1200 if (r)
1201 return r;
1202 /* POW(a,b) = EXP2(b * LOG2(a))*/
1203 memset(&alu, 0, sizeof(struct r600_bc_alu));
1204 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1205 alu.src[0].sel = ctx->temp_reg;
1206 alu.dst.sel = ctx->temp_reg;
1207 alu.dst.write = 1;
1208 alu.last = 1;
1209 r = r600_bc_add_alu(ctx->bc, &alu);
1210 if (r)
1211 return r;
1212 r = r600_bc_add_literal(ctx->bc,ctx->value);
1213 if (r)
1214 return r;
1215 return tgsi_helper_tempx_replicate(ctx);
1216 }
1217
1218 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1219 {
1220 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1221 struct r600_bc_alu alu;
1222 struct r600_bc_alu_src r600_src[3];
1223 int i, r;
1224
1225 r = tgsi_split_constant(ctx, r600_src);
1226 if (r)
1227 return r;
1228
1229 /* tmp = (src > 0 ? 1 : src) */
1230 for (i = 0; i < 4; i++) {
1231 memset(&alu, 0, sizeof(struct r600_bc_alu));
1232 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1233 alu.is_op3 = 1;
1234
1235 alu.dst.sel = ctx->temp_reg;
1236 alu.dst.chan = i;
1237
1238 alu.src[0] = r600_src[0];
1239 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1240
1241 alu.src[1].sel = V_SQ_ALU_SRC_1;
1242
1243 alu.src[2] = r600_src[0];
1244 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1245 if (i == 3)
1246 alu.last = 1;
1247 r = r600_bc_add_alu(ctx->bc, &alu);
1248 if (r)
1249 return r;
1250 }
1251 r = r600_bc_add_literal(ctx->bc, ctx->value);
1252 if (r)
1253 return r;
1254
1255 /* dst = (-tmp > 0 ? -1 : tmp) */
1256 for (i = 0; i < 4; i++) {
1257 memset(&alu, 0, sizeof(struct r600_bc_alu));
1258 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1259 alu.is_op3 = 1;
1260 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1261 if (r)
1262 return r;
1263
1264 alu.src[0].sel = ctx->temp_reg;
1265 alu.src[0].chan = i;
1266 alu.src[0].neg = 1;
1267
1268 alu.src[1].sel = V_SQ_ALU_SRC_1;
1269 alu.src[1].neg = 1;
1270
1271 alu.src[2].sel = ctx->temp_reg;
1272 alu.src[2].chan = i;
1273
1274 if (i == 3)
1275 alu.last = 1;
1276 r = r600_bc_add_alu(ctx->bc, &alu);
1277 if (r)
1278 return r;
1279 }
1280 return 0;
1281 }
1282
1283 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1284 {
1285 struct r600_bc_alu alu;
1286 int i, r;
1287
1288 r = r600_bc_add_literal(ctx->bc, ctx->value);
1289 if (r)
1290 return r;
1291 for (i = 0; i < 4; i++) {
1292 memset(&alu, 0, sizeof(struct r600_bc_alu));
1293 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1294 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1295 alu.dst.chan = i;
1296 } else {
1297 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1298 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1299 if (r)
1300 return r;
1301 alu.src[0].sel = ctx->temp_reg;
1302 alu.src[0].chan = i;
1303 }
1304 if (i == 3) {
1305 alu.last = 1;
1306 }
1307 r = r600_bc_add_alu(ctx->bc, &alu);
1308 if (r)
1309 return r;
1310 }
1311 return 0;
1312 }
1313
1314 static int tgsi_op3(struct r600_shader_ctx *ctx)
1315 {
1316 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1317 struct r600_bc_alu_src r600_src[3];
1318 struct r600_bc_alu alu;
1319 int i, j, r;
1320
1321 r = tgsi_split_constant(ctx, r600_src);
1322 if (r)
1323 return r;
1324 /* do it in 2 step as op3 doesn't support writemask */
1325 for (i = 0; i < 4; i++) {
1326 memset(&alu, 0, sizeof(struct r600_bc_alu));
1327 alu.inst = ctx->inst_info->r600_opcode;
1328 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1329 alu.src[j] = r600_src[j];
1330 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1331 }
1332 alu.dst.sel = ctx->temp_reg;
1333 alu.dst.chan = i;
1334 alu.dst.write = 1;
1335 alu.is_op3 = 1;
1336 if (i == 3) {
1337 alu.last = 1;
1338 }
1339 r = r600_bc_add_alu(ctx->bc, &alu);
1340 if (r)
1341 return r;
1342 }
1343 return tgsi_helper_copy(ctx, inst);
1344 }
1345
1346 static int tgsi_dp(struct r600_shader_ctx *ctx)
1347 {
1348 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1349 struct r600_bc_alu_src r600_src[3];
1350 struct r600_bc_alu alu;
1351 int i, j, r;
1352
1353 r = tgsi_split_constant(ctx, r600_src);
1354 if (r)
1355 return r;
1356 for (i = 0; i < 4; i++) {
1357 memset(&alu, 0, sizeof(struct r600_bc_alu));
1358 alu.inst = ctx->inst_info->r600_opcode;
1359 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1360 alu.src[j] = r600_src[j];
1361 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1362 }
1363 alu.dst.sel = ctx->temp_reg;
1364 alu.dst.chan = i;
1365 alu.dst.write = 1;
1366 /* handle some special cases */
1367 switch (ctx->inst_info->tgsi_opcode) {
1368 case TGSI_OPCODE_DP2:
1369 if (i > 1) {
1370 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1371 alu.src[0].chan = alu.src[1].chan = 0;
1372 }
1373 break;
1374 case TGSI_OPCODE_DP3:
1375 if (i > 2) {
1376 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1377 alu.src[0].chan = alu.src[1].chan = 0;
1378 }
1379 break;
1380 case TGSI_OPCODE_DPH:
1381 if (i == 3) {
1382 alu.src[0].sel = V_SQ_ALU_SRC_1;
1383 alu.src[0].chan = 0;
1384 alu.src[0].neg = 0;
1385 }
1386 break;
1387 default:
1388 break;
1389 }
1390 if (i == 3) {
1391 alu.last = 1;
1392 }
1393 r = r600_bc_add_alu(ctx->bc, &alu);
1394 if (r)
1395 return r;
1396 }
1397 return tgsi_helper_copy(ctx, inst);
1398 }
1399
1400 static int tgsi_tex(struct r600_shader_ctx *ctx)
1401 {
1402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1403 struct r600_bc_tex tex;
1404 struct r600_bc_alu alu;
1405 unsigned src_gpr;
1406 int r, i;
1407 int opcode;
1408 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1409 uint32_t lit_vals[4];
1410
1411 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1412
1413 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1414 /* Add perspective divide */
1415 memset(&alu, 0, sizeof(struct r600_bc_alu));
1416 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1417 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1418 if (r)
1419 return r;
1420
1421 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1422 alu.dst.sel = ctx->temp_reg;
1423 alu.dst.chan = 3;
1424 alu.last = 1;
1425 alu.dst.write = 1;
1426 r = r600_bc_add_alu(ctx->bc, &alu);
1427 if (r)
1428 return r;
1429
1430 for (i = 0; i < 3; i++) {
1431 memset(&alu, 0, sizeof(struct r600_bc_alu));
1432 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1433 alu.src[0].sel = ctx->temp_reg;
1434 alu.src[0].chan = 3;
1435 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1436 if (r)
1437 return r;
1438 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1439 alu.dst.sel = ctx->temp_reg;
1440 alu.dst.chan = i;
1441 alu.dst.write = 1;
1442 r = r600_bc_add_alu(ctx->bc, &alu);
1443 if (r)
1444 return r;
1445 }
1446 memset(&alu, 0, sizeof(struct r600_bc_alu));
1447 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1448 alu.src[0].sel = V_SQ_ALU_SRC_1;
1449 alu.src[0].chan = 0;
1450 alu.dst.sel = ctx->temp_reg;
1451 alu.dst.chan = 3;
1452 alu.last = 1;
1453 alu.dst.write = 1;
1454 r = r600_bc_add_alu(ctx->bc, &alu);
1455 if (r)
1456 return r;
1457 src_not_temp = false;
1458 src_gpr = ctx->temp_reg;
1459 }
1460
1461 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1462 int src_chan, src2_chan;
1463
1464 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1465 for (i = 0; i < 4; i++) {
1466 memset(&alu, 0, sizeof(struct r600_bc_alu));
1467 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
1468 switch (i) {
1469 case 0:
1470 src_chan = 2;
1471 src2_chan = 1;
1472 break;
1473 case 1:
1474 src_chan = 2;
1475 src2_chan = 0;
1476 break;
1477 case 2:
1478 src_chan = 0;
1479 src2_chan = 2;
1480 break;
1481 case 3:
1482 src_chan = 1;
1483 src2_chan = 2;
1484 break;
1485 }
1486 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1487 if (r)
1488 return r;
1489 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1490 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1491 if (r)
1492 return r;
1493 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1494 alu.dst.sel = ctx->temp_reg;
1495 alu.dst.chan = i;
1496 if (i == 3)
1497 alu.last = 1;
1498 alu.dst.write = 1;
1499 r = r600_bc_add_alu(ctx->bc, &alu);
1500 if (r)
1501 return r;
1502 }
1503
1504 /* tmp1.z = RCP_e(|tmp1.z|) */
1505 memset(&alu, 0, sizeof(struct r600_bc_alu));
1506 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1507 alu.src[0].sel = ctx->temp_reg;
1508 alu.src[0].chan = 2;
1509 alu.src[0].abs = 1;
1510 alu.dst.sel = ctx->temp_reg;
1511 alu.dst.chan = 2;
1512 alu.dst.write = 1;
1513 alu.last = 1;
1514 r = r600_bc_add_alu(ctx->bc, &alu);
1515 if (r)
1516 return r;
1517
1518 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1519 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1520 * muladd has no writemask, have to use another temp
1521 */
1522 memset(&alu, 0, sizeof(struct r600_bc_alu));
1523 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1524 alu.is_op3 = 1;
1525
1526 alu.src[0].sel = ctx->temp_reg;
1527 alu.src[0].chan = 0;
1528 alu.src[1].sel = ctx->temp_reg;
1529 alu.src[1].chan = 2;
1530
1531 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1532 alu.src[2].chan = 0;
1533
1534 alu.dst.sel = ctx->temp_reg;
1535 alu.dst.chan = 0;
1536 alu.dst.write = 1;
1537
1538 r = r600_bc_add_alu(ctx->bc, &alu);
1539 if (r)
1540 return r;
1541
1542 memset(&alu, 0, sizeof(struct r600_bc_alu));
1543 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1544 alu.is_op3 = 1;
1545
1546 alu.src[0].sel = ctx->temp_reg;
1547 alu.src[0].chan = 1;
1548 alu.src[1].sel = ctx->temp_reg;
1549 alu.src[1].chan = 2;
1550
1551 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1552 alu.src[2].chan = 0;
1553
1554 alu.dst.sel = ctx->temp_reg;
1555 alu.dst.chan = 1;
1556 alu.dst.write = 1;
1557
1558 alu.last = 1;
1559 r = r600_bc_add_alu(ctx->bc, &alu);
1560 if (r)
1561 return r;
1562
1563 lit_vals[0] = fui(1.5f);
1564
1565 r = r600_bc_add_literal(ctx->bc, lit_vals);
1566 if (r)
1567 return r;
1568 src_not_temp = false;
1569 src_gpr = ctx->temp_reg;
1570 }
1571
1572 if (src_not_temp) {
1573 for (i = 0; i < 4; i++) {
1574 memset(&alu, 0, sizeof(struct r600_bc_alu));
1575 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1576 alu.src[0].sel = src_gpr;
1577 alu.src[0].chan = i;
1578 alu.dst.sel = ctx->temp_reg;
1579 alu.dst.chan = i;
1580 if (i == 3)
1581 alu.last = 1;
1582 alu.dst.write = 1;
1583 r = r600_bc_add_alu(ctx->bc, &alu);
1584 if (r)
1585 return r;
1586 }
1587 src_gpr = ctx->temp_reg;
1588 }
1589
1590 opcode = ctx->inst_info->r600_opcode;
1591 if (opcode == SQ_TEX_INST_SAMPLE &&
1592 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1593 opcode = SQ_TEX_INST_SAMPLE_C;
1594
1595 memset(&tex, 0, sizeof(struct r600_bc_tex));
1596 tex.inst = opcode;
1597 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1598 tex.sampler_id = tex.resource_id;
1599 tex.src_gpr = src_gpr;
1600 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1601 tex.dst_sel_x = 0;
1602 tex.dst_sel_y = 1;
1603 tex.dst_sel_z = 2;
1604 tex.dst_sel_w = 3;
1605 tex.src_sel_x = 0;
1606 tex.src_sel_y = 1;
1607 tex.src_sel_z = 2;
1608 tex.src_sel_w = 3;
1609
1610 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1611 tex.src_sel_x = 1;
1612 tex.src_sel_y = 0;
1613 tex.src_sel_z = 3;
1614 tex.src_sel_w = 1;
1615 }
1616
1617 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1618 tex.coord_type_x = 1;
1619 tex.coord_type_y = 1;
1620 tex.coord_type_z = 1;
1621 tex.coord_type_w = 1;
1622 }
1623
1624 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1625 tex.src_sel_w = 2;
1626
1627 r = r600_bc_add_tex(ctx->bc, &tex);
1628 if (r)
1629 return r;
1630
1631 /* add shadow ambient support - gallium doesn't do it yet */
1632 return 0;
1633
1634 }
1635
1636 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1637 {
1638 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1639 struct r600_bc_alu_src r600_src[3];
1640 struct r600_bc_alu alu;
1641 unsigned i;
1642 int r;
1643
1644 r = tgsi_split_constant(ctx, r600_src);
1645 if (r)
1646 return r;
1647 /* 1 - src0 */
1648 for (i = 0; i < 4; i++) {
1649 memset(&alu, 0, sizeof(struct r600_bc_alu));
1650 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1651 alu.src[0].sel = V_SQ_ALU_SRC_1;
1652 alu.src[0].chan = 0;
1653 alu.src[1] = r600_src[0];
1654 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1655 alu.src[1].neg = 1;
1656 alu.dst.sel = ctx->temp_reg;
1657 alu.dst.chan = i;
1658 if (i == 3) {
1659 alu.last = 1;
1660 }
1661 alu.dst.write = 1;
1662 r = r600_bc_add_alu(ctx->bc, &alu);
1663 if (r)
1664 return r;
1665 }
1666 r = r600_bc_add_literal(ctx->bc, ctx->value);
1667 if (r)
1668 return r;
1669
1670 /* (1 - src0) * src2 */
1671 for (i = 0; i < 4; i++) {
1672 memset(&alu, 0, sizeof(struct r600_bc_alu));
1673 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1674 alu.src[0].sel = ctx->temp_reg;
1675 alu.src[0].chan = i;
1676 alu.src[1] = r600_src[2];
1677 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1678 alu.dst.sel = ctx->temp_reg;
1679 alu.dst.chan = i;
1680 if (i == 3) {
1681 alu.last = 1;
1682 }
1683 alu.dst.write = 1;
1684 r = r600_bc_add_alu(ctx->bc, &alu);
1685 if (r)
1686 return r;
1687 }
1688 r = r600_bc_add_literal(ctx->bc, ctx->value);
1689 if (r)
1690 return r;
1691
1692 /* src0 * src1 + (1 - src0) * src2 */
1693 for (i = 0; i < 4; i++) {
1694 memset(&alu, 0, sizeof(struct r600_bc_alu));
1695 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1696 alu.is_op3 = 1;
1697 alu.src[0] = r600_src[0];
1698 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1699 alu.src[1] = r600_src[1];
1700 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1701 alu.src[2].sel = ctx->temp_reg;
1702 alu.src[2].chan = i;
1703 alu.dst.sel = ctx->temp_reg;
1704 alu.dst.chan = i;
1705 if (i == 3) {
1706 alu.last = 1;
1707 }
1708 r = r600_bc_add_alu(ctx->bc, &alu);
1709 if (r)
1710 return r;
1711 }
1712 return tgsi_helper_copy(ctx, inst);
1713 }
1714
1715 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1716 {
1717 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1718 struct r600_bc_alu_src r600_src[3];
1719 struct r600_bc_alu alu;
1720 int use_temp = 0;
1721 int i, r;
1722
1723 r = tgsi_split_constant(ctx, r600_src);
1724 if (r)
1725 return r;
1726
1727 if (inst->Dst[0].Register.WriteMask != 0xf)
1728 use_temp = 1;
1729
1730 for (i = 0; i < 4; i++) {
1731 memset(&alu, 0, sizeof(struct r600_bc_alu));
1732 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1733 alu.src[0] = r600_src[0];
1734 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1735
1736 alu.src[1] = r600_src[2];
1737 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1738
1739 alu.src[2] = r600_src[1];
1740 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1741
1742 if (use_temp)
1743 alu.dst.sel = ctx->temp_reg;
1744 else {
1745 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1746 if (r)
1747 return r;
1748 }
1749 alu.dst.chan = i;
1750 alu.dst.write = 1;
1751 alu.is_op3 = 1;
1752 if (i == 3)
1753 alu.last = 1;
1754 r = r600_bc_add_alu(ctx->bc, &alu);
1755 if (r)
1756 return r;
1757 }
1758 if (use_temp)
1759 return tgsi_helper_copy(ctx, inst);
1760 return 0;
1761 }
1762
1763 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1764 {
1765 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1766 struct r600_bc_alu_src r600_src[3];
1767 struct r600_bc_alu alu;
1768 uint32_t use_temp = 0;
1769 int i, r;
1770
1771 if (inst->Dst[0].Register.WriteMask != 0xf)
1772 use_temp = 1;
1773
1774 r = tgsi_split_constant(ctx, r600_src);
1775 if (r)
1776 return r;
1777
1778 for (i = 0; i < 4; i++) {
1779 memset(&alu, 0, sizeof(struct r600_bc_alu));
1780 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1781
1782 alu.src[0] = r600_src[0];
1783 switch (i) {
1784 case 0:
1785 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1786 break;
1787 case 1:
1788 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1789 break;
1790 case 2:
1791 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1792 break;
1793 case 3:
1794 alu.src[0].sel = V_SQ_ALU_SRC_0;
1795 alu.src[0].chan = i;
1796 }
1797
1798 alu.src[1] = r600_src[1];
1799 switch (i) {
1800 case 0:
1801 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1802 break;
1803 case 1:
1804 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1805 break;
1806 case 2:
1807 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1808 break;
1809 case 3:
1810 alu.src[1].sel = V_SQ_ALU_SRC_0;
1811 alu.src[1].chan = i;
1812 }
1813
1814 alu.dst.sel = ctx->temp_reg;
1815 alu.dst.chan = i;
1816 alu.dst.write = 1;
1817
1818 if (i == 3)
1819 alu.last = 1;
1820 r = r600_bc_add_alu(ctx->bc, &alu);
1821 if (r)
1822 return r;
1823 }
1824
1825 for (i = 0; i < 4; i++) {
1826 memset(&alu, 0, sizeof(struct r600_bc_alu));
1827 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1828
1829 alu.src[0] = r600_src[0];
1830 switch (i) {
1831 case 0:
1832 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1833 break;
1834 case 1:
1835 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1836 break;
1837 case 2:
1838 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1839 break;
1840 case 3:
1841 alu.src[0].sel = V_SQ_ALU_SRC_0;
1842 alu.src[0].chan = i;
1843 }
1844
1845 alu.src[1] = r600_src[1];
1846 switch (i) {
1847 case 0:
1848 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1849 break;
1850 case 1:
1851 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1852 break;
1853 case 2:
1854 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1855 break;
1856 case 3:
1857 alu.src[1].sel = V_SQ_ALU_SRC_0;
1858 alu.src[1].chan = i;
1859 }
1860
1861 alu.src[2].sel = ctx->temp_reg;
1862 alu.src[2].neg = 1;
1863 alu.src[2].chan = i;
1864
1865 if (use_temp)
1866 alu.dst.sel = ctx->temp_reg;
1867 else {
1868 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1869 if (r)
1870 return r;
1871 }
1872 alu.dst.chan = i;
1873 alu.dst.write = 1;
1874 alu.is_op3 = 1;
1875 if (i == 3)
1876 alu.last = 1;
1877 r = r600_bc_add_alu(ctx->bc, &alu);
1878 if (r)
1879 return r;
1880 }
1881 if (use_temp)
1882 return tgsi_helper_copy(ctx, inst);
1883 return 0;
1884 }
1885
1886 static int tgsi_exp(struct r600_shader_ctx *ctx)
1887 {
1888 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1889 struct r600_bc_alu_src r600_src[3];
1890 struct r600_bc_alu alu;
1891 int r;
1892
1893 /* result.x = 2^floor(src); */
1894 if (inst->Dst[0].Register.WriteMask & 1) {
1895 memset(&alu, 0, sizeof(struct r600_bc_alu));
1896
1897 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1898 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1899 if (r)
1900 return r;
1901
1902 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1903
1904 alu.dst.sel = ctx->temp_reg;
1905 alu.dst.chan = 0;
1906 alu.dst.write = 1;
1907 alu.last = 1;
1908 r = r600_bc_add_alu(ctx->bc, &alu);
1909 if (r)
1910 return r;
1911
1912 r = r600_bc_add_literal(ctx->bc, ctx->value);
1913 if (r)
1914 return r;
1915
1916 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1917 alu.src[0].sel = ctx->temp_reg;
1918 alu.src[0].chan = 0;
1919
1920 alu.dst.sel = ctx->temp_reg;
1921 alu.dst.chan = 0;
1922 alu.dst.write = 1;
1923 alu.last = 1;
1924 r = r600_bc_add_alu(ctx->bc, &alu);
1925 if (r)
1926 return r;
1927
1928 r = r600_bc_add_literal(ctx->bc, ctx->value);
1929 if (r)
1930 return r;
1931 }
1932
1933 /* result.y = tmp - floor(tmp); */
1934 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1935 memset(&alu, 0, sizeof(struct r600_bc_alu));
1936
1937 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1938 alu.src[0] = r600_src[0];
1939 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1940 if (r)
1941 return r;
1942 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1943
1944 alu.dst.sel = ctx->temp_reg;
1945 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1946 // if (r)
1947 // return r;
1948 alu.dst.write = 1;
1949 alu.dst.chan = 1;
1950
1951 alu.last = 1;
1952
1953 r = r600_bc_add_alu(ctx->bc, &alu);
1954 if (r)
1955 return r;
1956 r = r600_bc_add_literal(ctx->bc, ctx->value);
1957 if (r)
1958 return r;
1959 }
1960
1961 /* result.z = RoughApprox2ToX(tmp);*/
1962 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1963 memset(&alu, 0, sizeof(struct r600_bc_alu));
1964 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1965 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1966 if (r)
1967 return r;
1968 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1969
1970 alu.dst.sel = ctx->temp_reg;
1971 alu.dst.write = 1;
1972 alu.dst.chan = 2;
1973
1974 alu.last = 1;
1975
1976 r = r600_bc_add_alu(ctx->bc, &alu);
1977 if (r)
1978 return r;
1979 r = r600_bc_add_literal(ctx->bc, ctx->value);
1980 if (r)
1981 return r;
1982 }
1983
1984 /* result.w = 1.0;*/
1985 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1986 memset(&alu, 0, sizeof(struct r600_bc_alu));
1987
1988 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1989 alu.src[0].sel = V_SQ_ALU_SRC_1;
1990 alu.src[0].chan = 0;
1991
1992 alu.dst.sel = ctx->temp_reg;
1993 alu.dst.chan = 3;
1994 alu.dst.write = 1;
1995 alu.last = 1;
1996 r = r600_bc_add_alu(ctx->bc, &alu);
1997 if (r)
1998 return r;
1999 r = r600_bc_add_literal(ctx->bc, ctx->value);
2000 if (r)
2001 return r;
2002 }
2003 return tgsi_helper_copy(ctx, inst);
2004 }
2005
2006 static int tgsi_arl(struct r600_shader_ctx *ctx)
2007 {
2008 /* TODO from r600c, ar values don't persist between clauses */
2009 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2010 struct r600_bc_alu alu;
2011 int r;
2012 memset(&alu, 0, sizeof(struct r600_bc_alu));
2013
2014 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2015
2016 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2017 if (r)
2018 return r;
2019 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2020
2021 alu.last = 1;
2022
2023 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
2024 if (r)
2025 return r;
2026 return 0;
2027 }
2028
2029 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2030 {
2031 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2032 struct r600_bc_alu alu;
2033 int i, r = 0;
2034
2035 for (i = 0; i < 4; i++) {
2036 memset(&alu, 0, sizeof(struct r600_bc_alu));
2037
2038 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
2039 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2040 if (r)
2041 return r;
2042
2043 if (i == 0 || i == 3) {
2044 alu.src[0].sel = V_SQ_ALU_SRC_1;
2045 } else {
2046 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2047 if (r)
2048 return r;
2049 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2050 }
2051
2052 if (i == 0 || i == 2) {
2053 alu.src[1].sel = V_SQ_ALU_SRC_1;
2054 } else {
2055 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2056 if (r)
2057 return r;
2058 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2059 }
2060 if (i == 3)
2061 alu.last = 1;
2062 r = r600_bc_add_alu(ctx->bc, &alu);
2063 if (r)
2064 return r;
2065 }
2066 return 0;
2067 }
2068
2069 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2070 {
2071 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2072 struct r600_bc_alu alu;
2073 int r;
2074
2075 memset(&alu, 0, sizeof(struct r600_bc_alu));
2076 alu.inst = opcode;
2077 alu.predicate = 1;
2078
2079 alu.dst.sel = ctx->temp_reg;
2080 alu.dst.write = 1;
2081 alu.dst.chan = 0;
2082
2083 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2084 if (r)
2085 return r;
2086 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2087 alu.src[1].sel = V_SQ_ALU_SRC_0;
2088 alu.src[1].chan = 0;
2089
2090 alu.last = 1;
2091
2092 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
2093 if (r)
2094 return r;
2095 return 0;
2096 }
2097
2098 static int pops(struct r600_shader_ctx *ctx, int pops)
2099 {
2100 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
2101 ctx->bc->cf_last->pop_count = pops;
2102 return 0;
2103 }
2104
2105 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2106 {
2107 switch(reason) {
2108 case FC_PUSH_VPM:
2109 ctx->bc->callstack[ctx->bc->call_sp].current--;
2110 break;
2111 case FC_PUSH_WQM:
2112 case FC_LOOP:
2113 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2114 break;
2115 case FC_REP:
2116 /* TOODO : for 16 vp asic should -= 2; */
2117 ctx->bc->callstack[ctx->bc->call_sp].current --;
2118 break;
2119 }
2120 }
2121
2122 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2123 {
2124 if (check_max_only) {
2125 int diff;
2126 switch (reason) {
2127 case FC_PUSH_VPM:
2128 diff = 1;
2129 break;
2130 case FC_PUSH_WQM:
2131 diff = 4;
2132 break;
2133 }
2134 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2135 ctx->bc->callstack[ctx->bc->call_sp].max) {
2136 ctx->bc->callstack[ctx->bc->call_sp].max =
2137 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2138 }
2139 return;
2140 }
2141 switch (reason) {
2142 case FC_PUSH_VPM:
2143 ctx->bc->callstack[ctx->bc->call_sp].current++;
2144 break;
2145 case FC_PUSH_WQM:
2146 case FC_LOOP:
2147 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2148 break;
2149 case FC_REP:
2150 ctx->bc->callstack[ctx->bc->call_sp].current++;
2151 break;
2152 }
2153
2154 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2155 ctx->bc->callstack[ctx->bc->call_sp].max) {
2156 ctx->bc->callstack[ctx->bc->call_sp].max =
2157 ctx->bc->callstack[ctx->bc->call_sp].current;
2158 }
2159 }
2160
2161 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2162 {
2163 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2164
2165 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2166 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2167 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2168 sp->num_mid++;
2169 }
2170
2171 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2172 {
2173 ctx->bc->fc_sp++;
2174 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2175 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2176 }
2177
2178 static void fc_poplevel(struct r600_shader_ctx *ctx)
2179 {
2180 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2181 if (sp->mid) {
2182 free(sp->mid);
2183 sp->mid = NULL;
2184 }
2185 sp->num_mid = 0;
2186 sp->start = NULL;
2187 sp->type = 0;
2188 ctx->bc->fc_sp--;
2189 }
2190
2191 #if 0
2192 static int emit_return(struct r600_shader_ctx *ctx)
2193 {
2194 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2195 return 0;
2196 }
2197
2198 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2199 {
2200
2201 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2202 ctx->bc->cf_last->pop_count = pops;
2203 /* TODO work out offset */
2204 return 0;
2205 }
2206
2207 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2208 {
2209 return 0;
2210 }
2211
2212 static void emit_testflag(struct r600_shader_ctx *ctx)
2213 {
2214
2215 }
2216
2217 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2218 {
2219 emit_testflag(ctx);
2220 emit_jump_to_offset(ctx, 1, 4);
2221 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2222 pops(ctx, ifidx + 1);
2223 emit_return(ctx);
2224 }
2225
2226 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2227 {
2228 emit_testflag(ctx);
2229
2230 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2231 ctx->bc->cf_last->pop_count = 1;
2232
2233 fc_set_mid(ctx, fc_sp);
2234
2235 pops(ctx, 1);
2236 }
2237 #endif
2238
2239 static int tgsi_if(struct r600_shader_ctx *ctx)
2240 {
2241 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
2242
2243 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2244
2245 fc_pushlevel(ctx, FC_IF);
2246
2247 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2248 return 0;
2249 }
2250
2251 static int tgsi_else(struct r600_shader_ctx *ctx)
2252 {
2253 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
2254 ctx->bc->cf_last->pop_count = 1;
2255
2256 fc_set_mid(ctx, ctx->bc->fc_sp);
2257 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2258 return 0;
2259 }
2260
2261 static int tgsi_endif(struct r600_shader_ctx *ctx)
2262 {
2263 pops(ctx, 1);
2264 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2265 R600_ERR("if/endif unbalanced in shader\n");
2266 return -1;
2267 }
2268
2269 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2270 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2271 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2272 } else {
2273 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2274 }
2275 fc_poplevel(ctx);
2276
2277 callstack_decrease_current(ctx, FC_PUSH_VPM);
2278 return 0;
2279 }
2280
2281 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2282 {
2283 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
2284
2285 fc_pushlevel(ctx, FC_LOOP);
2286
2287 /* check stack depth */
2288 callstack_check_depth(ctx, FC_LOOP, 0);
2289 return 0;
2290 }
2291
2292 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2293 {
2294 int i;
2295
2296 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
2297
2298 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2299 R600_ERR("loop/endloop in shader code are not paired.\n");
2300 return -EINVAL;
2301 }
2302
2303 /* fixup loop pointers - from r600isa
2304 LOOP END points to CF after LOOP START,
2305 LOOP START point to CF after LOOP END
2306 BRK/CONT point to LOOP END CF
2307 */
2308 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2309
2310 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2311
2312 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2313 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2314 }
2315 /* TODO add LOOPRET support */
2316 fc_poplevel(ctx);
2317 callstack_decrease_current(ctx, FC_LOOP);
2318 return 0;
2319 }
2320
2321 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2322 {
2323 unsigned int fscp;
2324
2325 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2326 {
2327 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2328 break;
2329 }
2330
2331 if (fscp == 0) {
2332 R600_ERR("Break not inside loop/endloop pair\n");
2333 return -EINVAL;
2334 }
2335
2336 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2337 ctx->bc->cf_last->pop_count = 1;
2338
2339 fc_set_mid(ctx, fscp);
2340
2341 pops(ctx, 1);
2342 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2343 return 0;
2344 }
2345
2346 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2347 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2348 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2349 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2350 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2351 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2352 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2353 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2354 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2355 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2356 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2357 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2358 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2359 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2360 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2361 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2362 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2363 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2364 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2365 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2366 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2367 /* gap */
2368 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2369 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2370 /* gap */
2371 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2372 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2373 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2374 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2375 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2376 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2377 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2378 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2379 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2380 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2381 /* gap */
2382 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2383 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2384 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2385 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2386 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2387 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2388 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2389 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2390 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2391 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2392 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2393 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2394 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2395 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2396 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2397 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2398 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2399 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2400 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2401 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2402 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2403 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2404 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2405 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2406 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2407 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2408 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2409 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2410 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2411 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2412 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2413 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2414 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2415 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2416 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2417 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2418 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2419 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2420 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2421 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2422 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2423 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2424 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2425 /* gap */
2426 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2427 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2428 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2429 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2430 /* gap */
2431 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2432 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2433 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2434 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2435 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2436 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2437 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2438 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2439 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2440 /* gap */
2441 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2442 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2443 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2444 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2445 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2446 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2447 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2448 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2449 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2450 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2451 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2452 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2453 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2454 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2455 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2456 /* gap */
2457 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2458 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2459 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2460 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2461 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2462 /* gap */
2463 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2464 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2465 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2466 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2467 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2468 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2469 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2470 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2471 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2472 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2473 /* gap */
2474 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2475 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2476 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2477 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2478 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2479 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2480 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2481 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2482 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2483 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2484 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2485 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2486 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2487 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2488 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2489 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2490 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2491 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2492 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2493 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2494 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2495 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2496 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2497 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2498 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2499 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2500 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2501 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2502 };