r600g: split opcodes out and add wrapper around usage.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600_opcodes.h"
34 #include "r600d.h"
35 #include <stdio.h>
36 #include <errno.h>
37
38
39 struct r600_shader_tgsi_instruction;
40
41 struct r600_shader_ctx {
42 struct tgsi_shader_info info;
43 struct tgsi_parse_context parse;
44 const struct tgsi_token *tokens;
45 unsigned type;
46 unsigned file_offset[TGSI_FILE_COUNT];
47 unsigned temp_reg;
48 struct r600_shader_tgsi_instruction *inst_info;
49 struct r600_bc *bc;
50 struct r600_shader *shader;
51 u32 value[4];
52 u32 *literals;
53 u32 nliterals;
54 u32 max_driver_temp_used;
55 };
56
57 struct r600_shader_tgsi_instruction {
58 unsigned tgsi_opcode;
59 unsigned is_op3;
60 unsigned r600_opcode;
61 int (*process)(struct r600_shader_ctx *ctx);
62 };
63
64 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
65 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
66
67 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
68 {
69 struct r600_context *rctx = r600_context(ctx);
70 const struct util_format_description *desc;
71 enum pipe_format resource_format[160];
72 unsigned i, nresources = 0;
73 struct r600_bc *bc = &shader->bc;
74 struct r600_bc_cf *cf;
75 struct r600_bc_vtx *vtx;
76
77 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
78 return 0;
79 for (i = 0; i < rctx->vertex_elements->count; i++) {
80 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
81 }
82 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
83 switch (cf->inst) {
84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
85 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
86 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
87 desc = util_format_description(resource_format[vtx->buffer_id]);
88 if (desc == NULL) {
89 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
90 return -EINVAL;
91 }
92 vtx->dst_sel_x = desc->swizzle[0];
93 vtx->dst_sel_y = desc->swizzle[1];
94 vtx->dst_sel_z = desc->swizzle[2];
95 vtx->dst_sel_w = desc->swizzle[3];
96 }
97 break;
98 default:
99 break;
100 }
101 }
102 return r600_bc_build(&shader->bc);
103 }
104
105 int r600_pipe_shader_create(struct pipe_context *ctx,
106 struct r600_context_state *rpshader,
107 const struct tgsi_token *tokens)
108 {
109 struct r600_screen *rscreen = r600_screen(ctx->screen);
110 int r;
111
112 //fprintf(stderr, "--------------------------------------------------------------\n");
113 //tgsi_dump(tokens, 0);
114 if (rpshader == NULL)
115 return -ENOMEM;
116 rpshader->shader.family = radeon_get_family(rscreen->rw);
117 rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
118 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
119 if (r) {
120 R600_ERR("translation from TGSI failed !\n");
121 return r;
122 }
123 r = r600_bc_build(&rpshader->shader.bc);
124 if (r) {
125 R600_ERR("building bytecode failed !\n");
126 return r;
127 }
128 //fprintf(stderr, "______________________________________________________________\n");
129 return 0;
130 }
131
132 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
133 {
134 struct r600_context *rctx = r600_context(ctx);
135 struct radeon_state *state;
136
137 state = &rpshader->rstate[0];
138 radeon_state_fini(&rpshader->rstate[0]);
139
140 return rctx->vtbl->vs_shader(rctx, rpshader, state);
141 }
142
143 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
144 {
145 struct r600_context *rctx = r600_context(ctx);
146 struct radeon_state *state;
147
148 state = &rpshader->rstate[0];
149 radeon_state_fini(state);
150
151 return rctx->vtbl->ps_shader(rctx, rpshader, state);
152 }
153
154 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
155 {
156 struct r600_screen *rscreen = r600_screen(ctx->screen);
157 struct r600_context *rctx = r600_context(ctx);
158 struct r600_shader *rshader = &rpshader->shader;
159 int r;
160
161 /* copy new shader */
162 radeon_bo_decref(rscreen->rw, rpshader->bo);
163 rpshader->bo = NULL;
164 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
165 4096, NULL);
166 if (rpshader->bo == NULL) {
167 return -ENOMEM;
168 }
169 radeon_bo_map(rscreen->rw, rpshader->bo);
170 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
171 radeon_bo_unmap(rscreen->rw, rpshader->bo);
172 /* build state */
173 rshader->flat_shade = rctx->flat_shade;
174 switch (rshader->processor_type) {
175 case TGSI_PROCESSOR_VERTEX:
176 r = r600_pipe_shader_vs(ctx, rpshader);
177 break;
178 case TGSI_PROCESSOR_FRAGMENT:
179 r = r600_pipe_shader_ps(ctx, rpshader);
180 break;
181 default:
182 r = -EINVAL;
183 break;
184 }
185 return r;
186 }
187
188 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
189 {
190 struct r600_context *rctx = r600_context(ctx);
191 int r;
192
193 if (rpshader == NULL)
194 return -EINVAL;
195 /* there should be enough input */
196 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
197 R600_ERR("%d resources provided, expecting %d\n",
198 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
199 return -EINVAL;
200 }
201 r = r600_shader_update(ctx, &rpshader->shader);
202 if (r)
203 return r;
204 return r600_pipe_shader(ctx, rpshader);
205 }
206
207 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
208 {
209 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
210 int j;
211
212 if (i->Instruction.NumDstRegs > 1) {
213 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
214 return -EINVAL;
215 }
216 if (i->Instruction.Predicate) {
217 R600_ERR("predicate unsupported\n");
218 return -EINVAL;
219 }
220 #if 0
221 if (i->Instruction.Label) {
222 R600_ERR("label unsupported\n");
223 return -EINVAL;
224 }
225 #endif
226 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
227 if (i->Src[j].Register.Dimension ||
228 i->Src[j].Register.Absolute) {
229 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
230 i->Src[j].Register.Dimension,
231 i->Src[j].Register.Absolute);
232 return -EINVAL;
233 }
234 }
235 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
236 if (i->Dst[j].Register.Dimension) {
237 R600_ERR("unsupported dst (dimension)\n");
238 return -EINVAL;
239 }
240 }
241 return 0;
242 }
243
244 static int tgsi_declaration(struct r600_shader_ctx *ctx)
245 {
246 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
247 struct r600_bc_vtx vtx;
248 unsigned i;
249 int r;
250
251 switch (d->Declaration.File) {
252 case TGSI_FILE_INPUT:
253 i = ctx->shader->ninput++;
254 ctx->shader->input[i].name = d->Semantic.Name;
255 ctx->shader->input[i].sid = d->Semantic.Index;
256 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
257 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
258 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
259 /* turn input into fetch */
260 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
261 vtx.inst = 0;
262 vtx.fetch_type = 0;
263 vtx.buffer_id = i;
264 /* register containing the index into the buffer */
265 vtx.src_gpr = 0;
266 vtx.src_sel_x = 0;
267 vtx.mega_fetch_count = 0x1F;
268 vtx.dst_gpr = ctx->shader->input[i].gpr;
269 vtx.dst_sel_x = 0;
270 vtx.dst_sel_y = 1;
271 vtx.dst_sel_z = 2;
272 vtx.dst_sel_w = 3;
273 r = r600_bc_add_vtx(ctx->bc, &vtx);
274 if (r)
275 return r;
276 }
277 break;
278 case TGSI_FILE_OUTPUT:
279 i = ctx->shader->noutput++;
280 ctx->shader->output[i].name = d->Semantic.Name;
281 ctx->shader->output[i].sid = d->Semantic.Index;
282 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
283 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
284 break;
285 case TGSI_FILE_CONSTANT:
286 case TGSI_FILE_TEMPORARY:
287 case TGSI_FILE_SAMPLER:
288 case TGSI_FILE_ADDRESS:
289 break;
290 default:
291 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
292 return -EINVAL;
293 }
294 return 0;
295 }
296
297 static int r600_get_temp(struct r600_shader_ctx *ctx)
298 {
299 return ctx->temp_reg + ctx->max_driver_temp_used++;
300 }
301
302 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
303 {
304 struct tgsi_full_immediate *immediate;
305 struct r600_shader_ctx ctx;
306 struct r600_bc_output output[32];
307 unsigned output_done, noutput;
308 unsigned opcode;
309 int i, r = 0, pos0;
310
311 ctx.bc = &shader->bc;
312 ctx.shader = shader;
313 r = r600_bc_init(ctx.bc, shader->family);
314 if (r)
315 return r;
316 ctx.bc->use_mem_constant = shader->use_mem_constant;
317 ctx.tokens = tokens;
318 tgsi_scan_shader(tokens, &ctx.info);
319 tgsi_parse_init(&ctx.parse, tokens);
320 ctx.type = ctx.parse.FullHeader.Processor.Processor;
321 shader->processor_type = ctx.type;
322
323 /* register allocations */
324 /* Values [0,127] correspond to GPR[0..127].
325 * Values [128,159] correspond to constant buffer bank 0
326 * Values [160,191] correspond to constant buffer bank 1
327 * Values [256,511] correspond to cfile constants c[0..255].
328 * Other special values are shown in the list below.
329 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
330 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
331 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
332 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
333 * 248 SQ_ALU_SRC_0: special constant 0.0.
334 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
335 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
336 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
337 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
338 * 253 SQ_ALU_SRC_LITERAL: literal constant.
339 * 254 SQ_ALU_SRC_PV: previous vector result.
340 * 255 SQ_ALU_SRC_PS: previous scalar result.
341 */
342 for (i = 0; i < TGSI_FILE_COUNT; i++) {
343 ctx.file_offset[i] = 0;
344 }
345 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
346 ctx.file_offset[TGSI_FILE_INPUT] = 1;
347 }
348 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
349 ctx.info.file_count[TGSI_FILE_INPUT];
350 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
351 ctx.info.file_count[TGSI_FILE_OUTPUT];
352 if (ctx.shader->use_mem_constant)
353 ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
354 else
355 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
356
357 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
358 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
359 ctx.info.file_count[TGSI_FILE_TEMPORARY];
360
361 ctx.nliterals = 0;
362 ctx.literals = NULL;
363
364 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
365 tgsi_parse_token(&ctx.parse);
366 switch (ctx.parse.FullToken.Token.Type) {
367 case TGSI_TOKEN_TYPE_IMMEDIATE:
368 immediate = &ctx.parse.FullToken.FullImmediate;
369 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
370 if(ctx.literals == NULL) {
371 r = -ENOMEM;
372 goto out_err;
373 }
374 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
375 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
376 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
377 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
378 ctx.nliterals++;
379 break;
380 case TGSI_TOKEN_TYPE_DECLARATION:
381 r = tgsi_declaration(&ctx);
382 if (r)
383 goto out_err;
384 break;
385 case TGSI_TOKEN_TYPE_INSTRUCTION:
386 r = tgsi_is_supported(&ctx);
387 if (r)
388 goto out_err;
389 ctx.max_driver_temp_used = 0;
390 /* reserve first tmp for everyone */
391 r600_get_temp(&ctx);
392 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
393 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
394 r = ctx.inst_info->process(&ctx);
395 if (r)
396 goto out_err;
397 r = r600_bc_add_literal(ctx.bc, ctx.value);
398 if (r)
399 goto out_err;
400 break;
401 default:
402 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
403 r = -EINVAL;
404 goto out_err;
405 }
406 }
407 /* export output */
408 noutput = shader->noutput;
409 for (i = 0, pos0 = 0; i < noutput; i++) {
410 memset(&output[i], 0, sizeof(struct r600_bc_output));
411 output[i].gpr = shader->output[i].gpr;
412 output[i].elem_size = 3;
413 output[i].swizzle_x = 0;
414 output[i].swizzle_y = 1;
415 output[i].swizzle_z = 2;
416 output[i].swizzle_w = 3;
417 output[i].barrier = 1;
418 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
419 output[i].array_base = i - pos0;
420 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
421 switch (ctx.type) {
422 case TGSI_PROCESSOR_VERTEX:
423 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
424 output[i].array_base = 60;
425 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
426 /* position doesn't count in array_base */
427 pos0++;
428 }
429 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
430 output[i].array_base = 61;
431 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
432 /* position doesn't count in array_base */
433 pos0++;
434 }
435 break;
436 case TGSI_PROCESSOR_FRAGMENT:
437 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
438 output[i].array_base = shader->output[i].sid;
439 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
440 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
441 output[i].array_base = 61;
442 output[i].swizzle_x = 2;
443 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
444 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
445 } else {
446 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
447 r = -EINVAL;
448 goto out_err;
449 }
450 break;
451 default:
452 R600_ERR("unsupported processor type %d\n", ctx.type);
453 r = -EINVAL;
454 goto out_err;
455 }
456 }
457 /* add fake param output for vertex shader if no param is exported */
458 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
459 for (i = 0, pos0 = 0; i < noutput; i++) {
460 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
461 pos0 = 1;
462 break;
463 }
464 }
465 if (!pos0) {
466 memset(&output[i], 0, sizeof(struct r600_bc_output));
467 output[i].gpr = 0;
468 output[i].elem_size = 3;
469 output[i].swizzle_x = 0;
470 output[i].swizzle_y = 1;
471 output[i].swizzle_z = 2;
472 output[i].swizzle_w = 3;
473 output[i].barrier = 1;
474 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
475 output[i].array_base = 0;
476 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
477 noutput++;
478 }
479 }
480 /* add fake pixel export */
481 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
482 memset(&output[0], 0, sizeof(struct r600_bc_output));
483 output[0].gpr = 0;
484 output[0].elem_size = 3;
485 output[0].swizzle_x = 7;
486 output[0].swizzle_y = 7;
487 output[0].swizzle_z = 7;
488 output[0].swizzle_w = 7;
489 output[0].barrier = 1;
490 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
491 output[0].array_base = 0;
492 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
493 noutput++;
494 }
495 /* set export done on last export of each type */
496 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
497 if (i == (noutput - 1)) {
498 output[i].end_of_program = 1;
499 }
500 if (!(output_done & (1 << output[i].type))) {
501 output_done |= (1 << output[i].type);
502 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
503 }
504 }
505 /* add output to bytecode */
506 for (i = 0; i < noutput; i++) {
507 r = r600_bc_add_output(ctx.bc, &output[i]);
508 if (r)
509 goto out_err;
510 }
511 free(ctx.literals);
512 tgsi_parse_free(&ctx.parse);
513 return 0;
514 out_err:
515 free(ctx.literals);
516 tgsi_parse_free(&ctx.parse);
517 return r;
518 }
519
520 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
521 {
522 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
523 return -EINVAL;
524 }
525
526 static int tgsi_end(struct r600_shader_ctx *ctx)
527 {
528 return 0;
529 }
530
531 static int tgsi_src(struct r600_shader_ctx *ctx,
532 const struct tgsi_full_src_register *tgsi_src,
533 struct r600_bc_alu_src *r600_src)
534 {
535 int index;
536 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
537 r600_src->sel = tgsi_src->Register.Index;
538 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
539 r600_src->sel = 0;
540 index = tgsi_src->Register.Index;
541 ctx->value[0] = ctx->literals[index * 4 + 0];
542 ctx->value[1] = ctx->literals[index * 4 + 1];
543 ctx->value[2] = ctx->literals[index * 4 + 2];
544 ctx->value[3] = ctx->literals[index * 4 + 3];
545 }
546 if (tgsi_src->Register.Indirect)
547 r600_src->rel = V_SQ_REL_RELATIVE;
548 r600_src->neg = tgsi_src->Register.Negate;
549 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
550 return 0;
551 }
552
553 static int tgsi_dst(struct r600_shader_ctx *ctx,
554 const struct tgsi_full_dst_register *tgsi_dst,
555 unsigned swizzle,
556 struct r600_bc_alu_dst *r600_dst)
557 {
558 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
559
560 r600_dst->sel = tgsi_dst->Register.Index;
561 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
562 r600_dst->chan = swizzle;
563 r600_dst->write = 1;
564 if (tgsi_dst->Register.Indirect)
565 r600_dst->rel = V_SQ_REL_RELATIVE;
566 if (inst->Instruction.Saturate) {
567 r600_dst->clamp = 1;
568 }
569 return 0;
570 }
571
572 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
573 {
574 switch (swizzle) {
575 case 0:
576 return tgsi_src->Register.SwizzleX;
577 case 1:
578 return tgsi_src->Register.SwizzleY;
579 case 2:
580 return tgsi_src->Register.SwizzleZ;
581 case 3:
582 return tgsi_src->Register.SwizzleW;
583 default:
584 return 0;
585 }
586 }
587
588 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
589 {
590 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
591 struct r600_bc_alu alu;
592 int i, j, k, nconst, r;
593
594 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
595 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
596 nconst++;
597 }
598 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
599 if (r) {
600 return r;
601 }
602 }
603 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
604 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
605 int treg = r600_get_temp(ctx);
606 for (k = 0; k < 4; k++) {
607 memset(&alu, 0, sizeof(struct r600_bc_alu));
608 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
609 alu.src[0].sel = r600_src[j].sel;
610 alu.src[0].chan = k;
611 alu.dst.sel = treg;
612 alu.dst.chan = k;
613 alu.dst.write = 1;
614 if (k == 3)
615 alu.last = 1;
616 r = r600_bc_add_alu(ctx->bc, &alu);
617 if (r)
618 return r;
619 }
620 r600_src[j].sel = treg;
621 j--;
622 }
623 }
624 return 0;
625 }
626
627 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
628 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
629 {
630 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
631 struct r600_bc_alu alu;
632 int i, j, k, nliteral, r;
633
634 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
635 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
636 nliteral++;
637 }
638 }
639 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
640 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
641 int treg = r600_get_temp(ctx);
642 for (k = 0; k < 4; k++) {
643 memset(&alu, 0, sizeof(struct r600_bc_alu));
644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
645 alu.src[0].sel = r600_src[j].sel;
646 alu.src[0].chan = k;
647 alu.dst.sel = treg;
648 alu.dst.chan = k;
649 alu.dst.write = 1;
650 if (k == 3)
651 alu.last = 1;
652 r = r600_bc_add_alu(ctx->bc, &alu);
653 if (r)
654 return r;
655 }
656 r = r600_bc_add_literal(ctx->bc, ctx->value);
657 if (r)
658 return r;
659 r600_src[j].sel = treg;
660 j++;
661 }
662 }
663 return 0;
664 }
665
666 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
667 {
668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
669 struct r600_bc_alu_src r600_src[3];
670 struct r600_bc_alu alu;
671 int i, j, r;
672 int lasti = 0;
673
674 for (i = 0; i < 4; i++) {
675 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
676 lasti = i;
677 }
678 }
679
680 r = tgsi_split_constant(ctx, r600_src);
681 if (r)
682 return r;
683 for (i = 0; i < lasti + 1; i++) {
684 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
685 continue;
686
687 memset(&alu, 0, sizeof(struct r600_bc_alu));
688 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
689 if (r)
690 return r;
691
692 alu.inst = ctx->inst_info->r600_opcode;
693 if (!swap) {
694 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
695 alu.src[j] = r600_src[j];
696 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
697 }
698 } else {
699 alu.src[0] = r600_src[1];
700 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
701
702 alu.src[1] = r600_src[0];
703 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
704 }
705 /* handle some special cases */
706 switch (ctx->inst_info->tgsi_opcode) {
707 case TGSI_OPCODE_SUB:
708 alu.src[1].neg = 1;
709 break;
710 case TGSI_OPCODE_ABS:
711 alu.src[0].abs = 1;
712 break;
713 default:
714 break;
715 }
716 if (i == lasti) {
717 alu.last = 1;
718 }
719 r = r600_bc_add_alu(ctx->bc, &alu);
720 if (r)
721 return r;
722 }
723 return 0;
724 }
725
726 static int tgsi_op2(struct r600_shader_ctx *ctx)
727 {
728 return tgsi_op2_s(ctx, 0);
729 }
730
731 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
732 {
733 return tgsi_op2_s(ctx, 1);
734 }
735
736 /*
737 * r600 - trunc to -PI..PI range
738 * r700 - normalize by dividing by 2PI
739 * see fdo bug 27901
740 */
741 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
742 struct r600_bc_alu_src r600_src[3])
743 {
744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
745 int r;
746 uint32_t lit_vals[4];
747 struct r600_bc_alu alu;
748
749 memset(lit_vals, 0, 4*4);
750 r = tgsi_split_constant(ctx, r600_src);
751 if (r)
752 return r;
753
754 r = tgsi_split_literal_constant(ctx, r600_src);
755 if (r)
756 return r;
757
758 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
759 lit_vals[1] = fui(0.5f);
760
761 memset(&alu, 0, sizeof(struct r600_bc_alu));
762 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
763 alu.is_op3 = 1;
764
765 alu.dst.chan = 0;
766 alu.dst.sel = ctx->temp_reg;
767 alu.dst.write = 1;
768
769 alu.src[0] = r600_src[0];
770 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
771
772 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
773 alu.src[1].chan = 0;
774 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
775 alu.src[2].chan = 1;
776 alu.last = 1;
777 r = r600_bc_add_alu(ctx->bc, &alu);
778 if (r)
779 return r;
780 r = r600_bc_add_literal(ctx->bc, lit_vals);
781 if (r)
782 return r;
783
784 memset(&alu, 0, sizeof(struct r600_bc_alu));
785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
786
787 alu.dst.chan = 0;
788 alu.dst.sel = ctx->temp_reg;
789 alu.dst.write = 1;
790
791 alu.src[0].sel = ctx->temp_reg;
792 alu.src[0].chan = 0;
793 alu.last = 1;
794 r = r600_bc_add_alu(ctx->bc, &alu);
795 if (r)
796 return r;
797
798 if (ctx->bc->chiprev == 0) {
799 lit_vals[0] = fui(3.1415926535897f * 2.0f);
800 lit_vals[1] = fui(-3.1415926535897f);
801 } else {
802 lit_vals[0] = fui(1.0f);
803 lit_vals[1] = fui(-0.5f);
804 }
805
806 memset(&alu, 0, sizeof(struct r600_bc_alu));
807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
808 alu.is_op3 = 1;
809
810 alu.dst.chan = 0;
811 alu.dst.sel = ctx->temp_reg;
812 alu.dst.write = 1;
813
814 alu.src[0].sel = ctx->temp_reg;
815 alu.src[0].chan = 0;
816
817 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
818 alu.src[1].chan = 0;
819 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
820 alu.src[2].chan = 1;
821 alu.last = 1;
822 r = r600_bc_add_alu(ctx->bc, &alu);
823 if (r)
824 return r;
825 r = r600_bc_add_literal(ctx->bc, lit_vals);
826 if (r)
827 return r;
828 return 0;
829 }
830
831 static int tgsi_trig(struct r600_shader_ctx *ctx)
832 {
833 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
834 struct r600_bc_alu_src r600_src[3];
835 struct r600_bc_alu alu;
836 int i, r;
837 int lasti = 0;
838
839 r = tgsi_setup_trig(ctx, r600_src);
840 if (r)
841 return r;
842
843 memset(&alu, 0, sizeof(struct r600_bc_alu));
844 alu.inst = ctx->inst_info->r600_opcode;
845 alu.dst.chan = 0;
846 alu.dst.sel = ctx->temp_reg;
847 alu.dst.write = 1;
848
849 alu.src[0].sel = ctx->temp_reg;
850 alu.src[0].chan = 0;
851 alu.last = 1;
852 r = r600_bc_add_alu(ctx->bc, &alu);
853 if (r)
854 return r;
855
856 /* replicate result */
857 for (i = 0; i < 4; i++) {
858 if (inst->Dst[0].Register.WriteMask & (1 << i))
859 lasti = i;
860 }
861 for (i = 0; i < lasti + 1; i++) {
862 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
863 continue;
864
865 memset(&alu, 0, sizeof(struct r600_bc_alu));
866 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
867
868 alu.src[0].sel = ctx->temp_reg;
869 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
870 if (r)
871 return r;
872 if (i == lasti)
873 alu.last = 1;
874 r = r600_bc_add_alu(ctx->bc, &alu);
875 if (r)
876 return r;
877 }
878 return 0;
879 }
880
881 static int tgsi_scs(struct r600_shader_ctx *ctx)
882 {
883 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
884 struct r600_bc_alu_src r600_src[3];
885 struct r600_bc_alu alu;
886 int r;
887
888 r = tgsi_setup_trig(ctx, r600_src);
889 if (r)
890 return r;
891
892
893 /* dst.x = COS */
894 memset(&alu, 0, sizeof(struct r600_bc_alu));
895 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
896 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
897 if (r)
898 return r;
899
900 alu.src[0].sel = ctx->temp_reg;
901 alu.src[0].chan = 0;
902 alu.last = 1;
903 r = r600_bc_add_alu(ctx->bc, &alu);
904 if (r)
905 return r;
906
907 /* dst.y = SIN */
908 memset(&alu, 0, sizeof(struct r600_bc_alu));
909 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
910 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
911 if (r)
912 return r;
913
914 alu.src[0].sel = ctx->temp_reg;
915 alu.src[0].chan = 0;
916 alu.last = 1;
917 r = r600_bc_add_alu(ctx->bc, &alu);
918 if (r)
919 return r;
920 return 0;
921 }
922
923 static int tgsi_kill(struct r600_shader_ctx *ctx)
924 {
925 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
926 struct r600_bc_alu alu;
927 int i, r;
928
929 for (i = 0; i < 4; i++) {
930 memset(&alu, 0, sizeof(struct r600_bc_alu));
931 alu.inst = ctx->inst_info->r600_opcode;
932
933 alu.dst.chan = i;
934
935 alu.src[0].sel = V_SQ_ALU_SRC_0;
936
937 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
938 alu.src[1].sel = V_SQ_ALU_SRC_1;
939 alu.src[1].neg = 1;
940 } else {
941 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
942 if (r)
943 return r;
944 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
945 }
946 if (i == 3) {
947 alu.last = 1;
948 }
949 r = r600_bc_add_alu(ctx->bc, &alu);
950 if (r)
951 return r;
952 }
953 r = r600_bc_add_literal(ctx->bc, ctx->value);
954 if (r)
955 return r;
956
957 /* kill must be last in ALU */
958 ctx->bc->force_add_cf = 1;
959 ctx->shader->uses_kill = TRUE;
960 return 0;
961 }
962
963 static int tgsi_lit(struct r600_shader_ctx *ctx)
964 {
965 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
966 struct r600_bc_alu alu;
967 struct r600_bc_alu_src r600_src[3];
968 int r;
969
970 r = tgsi_split_constant(ctx, r600_src);
971 if (r)
972 return r;
973 r = tgsi_split_literal_constant(ctx, r600_src);
974 if (r)
975 return r;
976
977 /* dst.x, <- 1.0 */
978 memset(&alu, 0, sizeof(struct r600_bc_alu));
979 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
980 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
981 alu.src[0].chan = 0;
982 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
983 if (r)
984 return r;
985 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
986 r = r600_bc_add_alu(ctx->bc, &alu);
987 if (r)
988 return r;
989
990 /* dst.y = max(src.x, 0.0) */
991 memset(&alu, 0, sizeof(struct r600_bc_alu));
992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
993 alu.src[0] = r600_src[0];
994 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
995 alu.src[1].chan = 0;
996 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
997 if (r)
998 return r;
999 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1000 r = r600_bc_add_alu(ctx->bc, &alu);
1001 if (r)
1002 return r;
1003
1004 /* dst.w, <- 1.0 */
1005 memset(&alu, 0, sizeof(struct r600_bc_alu));
1006 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1007 alu.src[0].sel = V_SQ_ALU_SRC_1;
1008 alu.src[0].chan = 0;
1009 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1010 if (r)
1011 return r;
1012 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1013 alu.last = 1;
1014 r = r600_bc_add_alu(ctx->bc, &alu);
1015 if (r)
1016 return r;
1017
1018 r = r600_bc_add_literal(ctx->bc, ctx->value);
1019 if (r)
1020 return r;
1021
1022 if (inst->Dst[0].Register.WriteMask & (1 << 2))
1023 {
1024 int chan;
1025 int sel;
1026
1027 /* dst.z = log(src.y) */
1028 memset(&alu, 0, sizeof(struct r600_bc_alu));
1029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1030 alu.src[0] = r600_src[0];
1031 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1032 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1033 if (r)
1034 return r;
1035 alu.last = 1;
1036 r = r600_bc_add_alu(ctx->bc, &alu);
1037 if (r)
1038 return r;
1039
1040 r = r600_bc_add_literal(ctx->bc, ctx->value);
1041 if (r)
1042 return r;
1043
1044 chan = alu.dst.chan;
1045 sel = alu.dst.sel;
1046
1047 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1048 memset(&alu, 0, sizeof(struct r600_bc_alu));
1049 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1050 alu.src[0] = r600_src[0];
1051 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1052 alu.src[1].sel = sel;
1053 alu.src[1].chan = chan;
1054
1055 alu.src[2] = r600_src[0];
1056 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1057 alu.dst.sel = ctx->temp_reg;
1058 alu.dst.chan = 0;
1059 alu.dst.write = 1;
1060 alu.is_op3 = 1;
1061 alu.last = 1;
1062 r = r600_bc_add_alu(ctx->bc, &alu);
1063 if (r)
1064 return r;
1065
1066 r = r600_bc_add_literal(ctx->bc, ctx->value);
1067 if (r)
1068 return r;
1069 /* dst.z = exp(tmp.x) */
1070 memset(&alu, 0, sizeof(struct r600_bc_alu));
1071 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1072 alu.src[0].sel = ctx->temp_reg;
1073 alu.src[0].chan = 0;
1074 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1075 if (r)
1076 return r;
1077 alu.last = 1;
1078 r = r600_bc_add_alu(ctx->bc, &alu);
1079 if (r)
1080 return r;
1081 }
1082 return 0;
1083 }
1084
1085 static int tgsi_trans(struct r600_shader_ctx *ctx)
1086 {
1087 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1088 struct r600_bc_alu alu;
1089 int i, j, r;
1090
1091 for (i = 0; i < 4; i++) {
1092 memset(&alu, 0, sizeof(struct r600_bc_alu));
1093 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1094 alu.inst = ctx->inst_info->r600_opcode;
1095 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1096 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1097 if (r)
1098 return r;
1099 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1100 }
1101 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1102 if (r)
1103 return r;
1104 alu.last = 1;
1105 r = r600_bc_add_alu(ctx->bc, &alu);
1106 if (r)
1107 return r;
1108 }
1109 }
1110 return 0;
1111 }
1112
1113 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1114 {
1115 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1116 struct r600_bc_alu alu;
1117 int i, r;
1118
1119 for (i = 0; i < 4; i++) {
1120 memset(&alu, 0, sizeof(struct r600_bc_alu));
1121 alu.src[0].sel = ctx->temp_reg;
1122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1123 alu.dst.chan = i;
1124 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1125 if (r)
1126 return r;
1127 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1128 if (i == 3)
1129 alu.last = 1;
1130 r = r600_bc_add_alu(ctx->bc, &alu);
1131 if (r)
1132 return r;
1133 }
1134 return 0;
1135 }
1136
1137 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1138 {
1139 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1140 struct r600_bc_alu alu;
1141 int i, r;
1142
1143 memset(&alu, 0, sizeof(struct r600_bc_alu));
1144 alu.inst = ctx->inst_info->r600_opcode;
1145 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1146 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1147 if (r)
1148 return r;
1149 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1150 }
1151 alu.dst.sel = ctx->temp_reg;
1152 alu.dst.write = 1;
1153 alu.last = 1;
1154 r = r600_bc_add_alu(ctx->bc, &alu);
1155 if (r)
1156 return r;
1157 r = r600_bc_add_literal(ctx->bc, ctx->value);
1158 if (r)
1159 return r;
1160 /* replicate result */
1161 return tgsi_helper_tempx_replicate(ctx);
1162 }
1163
1164 static int tgsi_pow(struct r600_shader_ctx *ctx)
1165 {
1166 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1167 struct r600_bc_alu alu;
1168 int r;
1169
1170 /* LOG2(a) */
1171 memset(&alu, 0, sizeof(struct r600_bc_alu));
1172 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1173 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1174 if (r)
1175 return r;
1176 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1177 alu.dst.sel = ctx->temp_reg;
1178 alu.dst.write = 1;
1179 alu.last = 1;
1180 r = r600_bc_add_alu(ctx->bc, &alu);
1181 if (r)
1182 return r;
1183 r = r600_bc_add_literal(ctx->bc,ctx->value);
1184 if (r)
1185 return r;
1186 /* b * LOG2(a) */
1187 memset(&alu, 0, sizeof(struct r600_bc_alu));
1188 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1189 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1190 if (r)
1191 return r;
1192 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1193 alu.src[1].sel = ctx->temp_reg;
1194 alu.dst.sel = ctx->temp_reg;
1195 alu.dst.write = 1;
1196 alu.last = 1;
1197 r = r600_bc_add_alu(ctx->bc, &alu);
1198 if (r)
1199 return r;
1200 r = r600_bc_add_literal(ctx->bc,ctx->value);
1201 if (r)
1202 return r;
1203 /* POW(a,b) = EXP2(b * LOG2(a))*/
1204 memset(&alu, 0, sizeof(struct r600_bc_alu));
1205 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1206 alu.src[0].sel = ctx->temp_reg;
1207 alu.dst.sel = ctx->temp_reg;
1208 alu.dst.write = 1;
1209 alu.last = 1;
1210 r = r600_bc_add_alu(ctx->bc, &alu);
1211 if (r)
1212 return r;
1213 r = r600_bc_add_literal(ctx->bc,ctx->value);
1214 if (r)
1215 return r;
1216 return tgsi_helper_tempx_replicate(ctx);
1217 }
1218
1219 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1220 {
1221 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1222 struct r600_bc_alu alu;
1223 struct r600_bc_alu_src r600_src[3];
1224 int i, r;
1225
1226 r = tgsi_split_constant(ctx, r600_src);
1227 if (r)
1228 return r;
1229
1230 /* tmp = (src > 0 ? 1 : src) */
1231 for (i = 0; i < 4; i++) {
1232 memset(&alu, 0, sizeof(struct r600_bc_alu));
1233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1234 alu.is_op3 = 1;
1235
1236 alu.dst.sel = ctx->temp_reg;
1237 alu.dst.chan = i;
1238
1239 alu.src[0] = r600_src[0];
1240 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1241
1242 alu.src[1].sel = V_SQ_ALU_SRC_1;
1243
1244 alu.src[2] = r600_src[0];
1245 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1246 if (i == 3)
1247 alu.last = 1;
1248 r = r600_bc_add_alu(ctx->bc, &alu);
1249 if (r)
1250 return r;
1251 }
1252 r = r600_bc_add_literal(ctx->bc, ctx->value);
1253 if (r)
1254 return r;
1255
1256 /* dst = (-tmp > 0 ? -1 : tmp) */
1257 for (i = 0; i < 4; i++) {
1258 memset(&alu, 0, sizeof(struct r600_bc_alu));
1259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1260 alu.is_op3 = 1;
1261 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1262 if (r)
1263 return r;
1264
1265 alu.src[0].sel = ctx->temp_reg;
1266 alu.src[0].chan = i;
1267 alu.src[0].neg = 1;
1268
1269 alu.src[1].sel = V_SQ_ALU_SRC_1;
1270 alu.src[1].neg = 1;
1271
1272 alu.src[2].sel = ctx->temp_reg;
1273 alu.src[2].chan = i;
1274
1275 if (i == 3)
1276 alu.last = 1;
1277 r = r600_bc_add_alu(ctx->bc, &alu);
1278 if (r)
1279 return r;
1280 }
1281 return 0;
1282 }
1283
1284 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1285 {
1286 struct r600_bc_alu alu;
1287 int i, r;
1288
1289 r = r600_bc_add_literal(ctx->bc, ctx->value);
1290 if (r)
1291 return r;
1292 for (i = 0; i < 4; i++) {
1293 memset(&alu, 0, sizeof(struct r600_bc_alu));
1294 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1295 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1296 alu.dst.chan = i;
1297 } else {
1298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1299 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1300 if (r)
1301 return r;
1302 alu.src[0].sel = ctx->temp_reg;
1303 alu.src[0].chan = i;
1304 }
1305 if (i == 3) {
1306 alu.last = 1;
1307 }
1308 r = r600_bc_add_alu(ctx->bc, &alu);
1309 if (r)
1310 return r;
1311 }
1312 return 0;
1313 }
1314
1315 static int tgsi_op3(struct r600_shader_ctx *ctx)
1316 {
1317 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1318 struct r600_bc_alu_src r600_src[3];
1319 struct r600_bc_alu alu;
1320 int i, j, r;
1321
1322 r = tgsi_split_constant(ctx, r600_src);
1323 if (r)
1324 return r;
1325 /* do it in 2 step as op3 doesn't support writemask */
1326 for (i = 0; i < 4; i++) {
1327 memset(&alu, 0, sizeof(struct r600_bc_alu));
1328 alu.inst = ctx->inst_info->r600_opcode;
1329 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1330 alu.src[j] = r600_src[j];
1331 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1332 }
1333 alu.dst.sel = ctx->temp_reg;
1334 alu.dst.chan = i;
1335 alu.dst.write = 1;
1336 alu.is_op3 = 1;
1337 if (i == 3) {
1338 alu.last = 1;
1339 }
1340 r = r600_bc_add_alu(ctx->bc, &alu);
1341 if (r)
1342 return r;
1343 }
1344 return tgsi_helper_copy(ctx, inst);
1345 }
1346
1347 static int tgsi_dp(struct r600_shader_ctx *ctx)
1348 {
1349 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1350 struct r600_bc_alu_src r600_src[3];
1351 struct r600_bc_alu alu;
1352 int i, j, r;
1353
1354 r = tgsi_split_constant(ctx, r600_src);
1355 if (r)
1356 return r;
1357 for (i = 0; i < 4; i++) {
1358 memset(&alu, 0, sizeof(struct r600_bc_alu));
1359 alu.inst = ctx->inst_info->r600_opcode;
1360 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1361 alu.src[j] = r600_src[j];
1362 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1363 }
1364 alu.dst.sel = ctx->temp_reg;
1365 alu.dst.chan = i;
1366 alu.dst.write = 1;
1367 /* handle some special cases */
1368 switch (ctx->inst_info->tgsi_opcode) {
1369 case TGSI_OPCODE_DP2:
1370 if (i > 1) {
1371 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1372 alu.src[0].chan = alu.src[1].chan = 0;
1373 }
1374 break;
1375 case TGSI_OPCODE_DP3:
1376 if (i > 2) {
1377 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1378 alu.src[0].chan = alu.src[1].chan = 0;
1379 }
1380 break;
1381 case TGSI_OPCODE_DPH:
1382 if (i == 3) {
1383 alu.src[0].sel = V_SQ_ALU_SRC_1;
1384 alu.src[0].chan = 0;
1385 alu.src[0].neg = 0;
1386 }
1387 break;
1388 default:
1389 break;
1390 }
1391 if (i == 3) {
1392 alu.last = 1;
1393 }
1394 r = r600_bc_add_alu(ctx->bc, &alu);
1395 if (r)
1396 return r;
1397 }
1398 return tgsi_helper_copy(ctx, inst);
1399 }
1400
1401 static int tgsi_tex(struct r600_shader_ctx *ctx)
1402 {
1403 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1404 struct r600_bc_tex tex;
1405 struct r600_bc_alu alu;
1406 unsigned src_gpr;
1407 int r, i;
1408 int opcode;
1409 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1410 uint32_t lit_vals[4];
1411
1412 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1413
1414 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1415 /* Add perspective divide */
1416 memset(&alu, 0, sizeof(struct r600_bc_alu));
1417 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1418 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1419 if (r)
1420 return r;
1421
1422 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1423 alu.dst.sel = ctx->temp_reg;
1424 alu.dst.chan = 3;
1425 alu.last = 1;
1426 alu.dst.write = 1;
1427 r = r600_bc_add_alu(ctx->bc, &alu);
1428 if (r)
1429 return r;
1430
1431 for (i = 0; i < 3; i++) {
1432 memset(&alu, 0, sizeof(struct r600_bc_alu));
1433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1434 alu.src[0].sel = ctx->temp_reg;
1435 alu.src[0].chan = 3;
1436 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1437 if (r)
1438 return r;
1439 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1440 alu.dst.sel = ctx->temp_reg;
1441 alu.dst.chan = i;
1442 alu.dst.write = 1;
1443 r = r600_bc_add_alu(ctx->bc, &alu);
1444 if (r)
1445 return r;
1446 }
1447 memset(&alu, 0, sizeof(struct r600_bc_alu));
1448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1449 alu.src[0].sel = V_SQ_ALU_SRC_1;
1450 alu.src[0].chan = 0;
1451 alu.dst.sel = ctx->temp_reg;
1452 alu.dst.chan = 3;
1453 alu.last = 1;
1454 alu.dst.write = 1;
1455 r = r600_bc_add_alu(ctx->bc, &alu);
1456 if (r)
1457 return r;
1458 src_not_temp = false;
1459 src_gpr = ctx->temp_reg;
1460 }
1461
1462 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1463 int src_chan, src2_chan;
1464
1465 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1466 for (i = 0; i < 4; i++) {
1467 memset(&alu, 0, sizeof(struct r600_bc_alu));
1468 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1469 switch (i) {
1470 case 0:
1471 src_chan = 2;
1472 src2_chan = 1;
1473 break;
1474 case 1:
1475 src_chan = 2;
1476 src2_chan = 0;
1477 break;
1478 case 2:
1479 src_chan = 0;
1480 src2_chan = 2;
1481 break;
1482 case 3:
1483 src_chan = 1;
1484 src2_chan = 2;
1485 break;
1486 }
1487 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1488 if (r)
1489 return r;
1490 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1491 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1492 if (r)
1493 return r;
1494 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1495 alu.dst.sel = ctx->temp_reg;
1496 alu.dst.chan = i;
1497 if (i == 3)
1498 alu.last = 1;
1499 alu.dst.write = 1;
1500 r = r600_bc_add_alu(ctx->bc, &alu);
1501 if (r)
1502 return r;
1503 }
1504
1505 /* tmp1.z = RCP_e(|tmp1.z|) */
1506 memset(&alu, 0, sizeof(struct r600_bc_alu));
1507 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1508 alu.src[0].sel = ctx->temp_reg;
1509 alu.src[0].chan = 2;
1510 alu.src[0].abs = 1;
1511 alu.dst.sel = ctx->temp_reg;
1512 alu.dst.chan = 2;
1513 alu.dst.write = 1;
1514 alu.last = 1;
1515 r = r600_bc_add_alu(ctx->bc, &alu);
1516 if (r)
1517 return r;
1518
1519 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
1520 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
1521 * muladd has no writemask, have to use another temp
1522 */
1523 memset(&alu, 0, sizeof(struct r600_bc_alu));
1524 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1525 alu.is_op3 = 1;
1526
1527 alu.src[0].sel = ctx->temp_reg;
1528 alu.src[0].chan = 0;
1529 alu.src[1].sel = ctx->temp_reg;
1530 alu.src[1].chan = 2;
1531
1532 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1533 alu.src[2].chan = 0;
1534
1535 alu.dst.sel = ctx->temp_reg;
1536 alu.dst.chan = 0;
1537 alu.dst.write = 1;
1538
1539 r = r600_bc_add_alu(ctx->bc, &alu);
1540 if (r)
1541 return r;
1542
1543 memset(&alu, 0, sizeof(struct r600_bc_alu));
1544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1545 alu.is_op3 = 1;
1546
1547 alu.src[0].sel = ctx->temp_reg;
1548 alu.src[0].chan = 1;
1549 alu.src[1].sel = ctx->temp_reg;
1550 alu.src[1].chan = 2;
1551
1552 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1553 alu.src[2].chan = 0;
1554
1555 alu.dst.sel = ctx->temp_reg;
1556 alu.dst.chan = 1;
1557 alu.dst.write = 1;
1558
1559 alu.last = 1;
1560 r = r600_bc_add_alu(ctx->bc, &alu);
1561 if (r)
1562 return r;
1563
1564 lit_vals[0] = fui(1.5f);
1565
1566 r = r600_bc_add_literal(ctx->bc, lit_vals);
1567 if (r)
1568 return r;
1569 src_not_temp = false;
1570 src_gpr = ctx->temp_reg;
1571 }
1572
1573 if (src_not_temp) {
1574 for (i = 0; i < 4; i++) {
1575 memset(&alu, 0, sizeof(struct r600_bc_alu));
1576 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1577 alu.src[0].sel = src_gpr;
1578 alu.src[0].chan = i;
1579 alu.dst.sel = ctx->temp_reg;
1580 alu.dst.chan = i;
1581 if (i == 3)
1582 alu.last = 1;
1583 alu.dst.write = 1;
1584 r = r600_bc_add_alu(ctx->bc, &alu);
1585 if (r)
1586 return r;
1587 }
1588 src_gpr = ctx->temp_reg;
1589 }
1590
1591 opcode = ctx->inst_info->r600_opcode;
1592 if (opcode == SQ_TEX_INST_SAMPLE &&
1593 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1594 opcode = SQ_TEX_INST_SAMPLE_C;
1595
1596 memset(&tex, 0, sizeof(struct r600_bc_tex));
1597 tex.inst = opcode;
1598 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1599 tex.sampler_id = tex.resource_id;
1600 tex.src_gpr = src_gpr;
1601 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1602 tex.dst_sel_x = 0;
1603 tex.dst_sel_y = 1;
1604 tex.dst_sel_z = 2;
1605 tex.dst_sel_w = 3;
1606 tex.src_sel_x = 0;
1607 tex.src_sel_y = 1;
1608 tex.src_sel_z = 2;
1609 tex.src_sel_w = 3;
1610
1611 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1612 tex.src_sel_x = 1;
1613 tex.src_sel_y = 0;
1614 tex.src_sel_z = 3;
1615 tex.src_sel_w = 1;
1616 }
1617
1618 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1619 tex.coord_type_x = 1;
1620 tex.coord_type_y = 1;
1621 tex.coord_type_z = 1;
1622 tex.coord_type_w = 1;
1623 }
1624
1625 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1626 tex.src_sel_w = 2;
1627
1628 r = r600_bc_add_tex(ctx->bc, &tex);
1629 if (r)
1630 return r;
1631
1632 /* add shadow ambient support - gallium doesn't do it yet */
1633 return 0;
1634
1635 }
1636
1637 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1638 {
1639 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1640 struct r600_bc_alu_src r600_src[3];
1641 struct r600_bc_alu alu;
1642 unsigned i;
1643 int r;
1644
1645 r = tgsi_split_constant(ctx, r600_src);
1646 if (r)
1647 return r;
1648 /* 1 - src0 */
1649 for (i = 0; i < 4; i++) {
1650 memset(&alu, 0, sizeof(struct r600_bc_alu));
1651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1652 alu.src[0].sel = V_SQ_ALU_SRC_1;
1653 alu.src[0].chan = 0;
1654 alu.src[1] = r600_src[0];
1655 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1656 alu.src[1].neg = 1;
1657 alu.dst.sel = ctx->temp_reg;
1658 alu.dst.chan = i;
1659 if (i == 3) {
1660 alu.last = 1;
1661 }
1662 alu.dst.write = 1;
1663 r = r600_bc_add_alu(ctx->bc, &alu);
1664 if (r)
1665 return r;
1666 }
1667 r = r600_bc_add_literal(ctx->bc, ctx->value);
1668 if (r)
1669 return r;
1670
1671 /* (1 - src0) * src2 */
1672 for (i = 0; i < 4; i++) {
1673 memset(&alu, 0, sizeof(struct r600_bc_alu));
1674 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1675 alu.src[0].sel = ctx->temp_reg;
1676 alu.src[0].chan = i;
1677 alu.src[1] = r600_src[2];
1678 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1679 alu.dst.sel = ctx->temp_reg;
1680 alu.dst.chan = i;
1681 if (i == 3) {
1682 alu.last = 1;
1683 }
1684 alu.dst.write = 1;
1685 r = r600_bc_add_alu(ctx->bc, &alu);
1686 if (r)
1687 return r;
1688 }
1689 r = r600_bc_add_literal(ctx->bc, ctx->value);
1690 if (r)
1691 return r;
1692
1693 /* src0 * src1 + (1 - src0) * src2 */
1694 for (i = 0; i < 4; i++) {
1695 memset(&alu, 0, sizeof(struct r600_bc_alu));
1696 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1697 alu.is_op3 = 1;
1698 alu.src[0] = r600_src[0];
1699 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1700 alu.src[1] = r600_src[1];
1701 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1702 alu.src[2].sel = ctx->temp_reg;
1703 alu.src[2].chan = i;
1704 alu.dst.sel = ctx->temp_reg;
1705 alu.dst.chan = i;
1706 if (i == 3) {
1707 alu.last = 1;
1708 }
1709 r = r600_bc_add_alu(ctx->bc, &alu);
1710 if (r)
1711 return r;
1712 }
1713 return tgsi_helper_copy(ctx, inst);
1714 }
1715
1716 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1717 {
1718 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1719 struct r600_bc_alu_src r600_src[3];
1720 struct r600_bc_alu alu;
1721 int use_temp = 0;
1722 int i, r;
1723
1724 r = tgsi_split_constant(ctx, r600_src);
1725 if (r)
1726 return r;
1727
1728 if (inst->Dst[0].Register.WriteMask != 0xf)
1729 use_temp = 1;
1730
1731 for (i = 0; i < 4; i++) {
1732 memset(&alu, 0, sizeof(struct r600_bc_alu));
1733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1734 alu.src[0] = r600_src[0];
1735 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1736
1737 alu.src[1] = r600_src[2];
1738 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1739
1740 alu.src[2] = r600_src[1];
1741 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1742
1743 if (use_temp)
1744 alu.dst.sel = ctx->temp_reg;
1745 else {
1746 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1747 if (r)
1748 return r;
1749 }
1750 alu.dst.chan = i;
1751 alu.dst.write = 1;
1752 alu.is_op3 = 1;
1753 if (i == 3)
1754 alu.last = 1;
1755 r = r600_bc_add_alu(ctx->bc, &alu);
1756 if (r)
1757 return r;
1758 }
1759 if (use_temp)
1760 return tgsi_helper_copy(ctx, inst);
1761 return 0;
1762 }
1763
1764 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1765 {
1766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1767 struct r600_bc_alu_src r600_src[3];
1768 struct r600_bc_alu alu;
1769 uint32_t use_temp = 0;
1770 int i, r;
1771
1772 if (inst->Dst[0].Register.WriteMask != 0xf)
1773 use_temp = 1;
1774
1775 r = tgsi_split_constant(ctx, r600_src);
1776 if (r)
1777 return r;
1778
1779 for (i = 0; i < 4; i++) {
1780 memset(&alu, 0, sizeof(struct r600_bc_alu));
1781 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1782
1783 alu.src[0] = r600_src[0];
1784 switch (i) {
1785 case 0:
1786 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1787 break;
1788 case 1:
1789 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1790 break;
1791 case 2:
1792 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1793 break;
1794 case 3:
1795 alu.src[0].sel = V_SQ_ALU_SRC_0;
1796 alu.src[0].chan = i;
1797 }
1798
1799 alu.src[1] = r600_src[1];
1800 switch (i) {
1801 case 0:
1802 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1803 break;
1804 case 1:
1805 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1806 break;
1807 case 2:
1808 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1809 break;
1810 case 3:
1811 alu.src[1].sel = V_SQ_ALU_SRC_0;
1812 alu.src[1].chan = i;
1813 }
1814
1815 alu.dst.sel = ctx->temp_reg;
1816 alu.dst.chan = i;
1817 alu.dst.write = 1;
1818
1819 if (i == 3)
1820 alu.last = 1;
1821 r = r600_bc_add_alu(ctx->bc, &alu);
1822 if (r)
1823 return r;
1824 }
1825
1826 for (i = 0; i < 4; i++) {
1827 memset(&alu, 0, sizeof(struct r600_bc_alu));
1828 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1829
1830 alu.src[0] = r600_src[0];
1831 switch (i) {
1832 case 0:
1833 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1834 break;
1835 case 1:
1836 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1837 break;
1838 case 2:
1839 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1840 break;
1841 case 3:
1842 alu.src[0].sel = V_SQ_ALU_SRC_0;
1843 alu.src[0].chan = i;
1844 }
1845
1846 alu.src[1] = r600_src[1];
1847 switch (i) {
1848 case 0:
1849 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1850 break;
1851 case 1:
1852 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1853 break;
1854 case 2:
1855 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1856 break;
1857 case 3:
1858 alu.src[1].sel = V_SQ_ALU_SRC_0;
1859 alu.src[1].chan = i;
1860 }
1861
1862 alu.src[2].sel = ctx->temp_reg;
1863 alu.src[2].neg = 1;
1864 alu.src[2].chan = i;
1865
1866 if (use_temp)
1867 alu.dst.sel = ctx->temp_reg;
1868 else {
1869 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1870 if (r)
1871 return r;
1872 }
1873 alu.dst.chan = i;
1874 alu.dst.write = 1;
1875 alu.is_op3 = 1;
1876 if (i == 3)
1877 alu.last = 1;
1878 r = r600_bc_add_alu(ctx->bc, &alu);
1879 if (r)
1880 return r;
1881 }
1882 if (use_temp)
1883 return tgsi_helper_copy(ctx, inst);
1884 return 0;
1885 }
1886
1887 static int tgsi_exp(struct r600_shader_ctx *ctx)
1888 {
1889 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1890 struct r600_bc_alu_src r600_src[3];
1891 struct r600_bc_alu alu;
1892 int r;
1893
1894 /* result.x = 2^floor(src); */
1895 if (inst->Dst[0].Register.WriteMask & 1) {
1896 memset(&alu, 0, sizeof(struct r600_bc_alu));
1897
1898 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1899 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1900 if (r)
1901 return r;
1902
1903 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1904
1905 alu.dst.sel = ctx->temp_reg;
1906 alu.dst.chan = 0;
1907 alu.dst.write = 1;
1908 alu.last = 1;
1909 r = r600_bc_add_alu(ctx->bc, &alu);
1910 if (r)
1911 return r;
1912
1913 r = r600_bc_add_literal(ctx->bc, ctx->value);
1914 if (r)
1915 return r;
1916
1917 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1918 alu.src[0].sel = ctx->temp_reg;
1919 alu.src[0].chan = 0;
1920
1921 alu.dst.sel = ctx->temp_reg;
1922 alu.dst.chan = 0;
1923 alu.dst.write = 1;
1924 alu.last = 1;
1925 r = r600_bc_add_alu(ctx->bc, &alu);
1926 if (r)
1927 return r;
1928
1929 r = r600_bc_add_literal(ctx->bc, ctx->value);
1930 if (r)
1931 return r;
1932 }
1933
1934 /* result.y = tmp - floor(tmp); */
1935 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1936 memset(&alu, 0, sizeof(struct r600_bc_alu));
1937
1938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1939 alu.src[0] = r600_src[0];
1940 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1941 if (r)
1942 return r;
1943 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1944
1945 alu.dst.sel = ctx->temp_reg;
1946 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1947 // if (r)
1948 // return r;
1949 alu.dst.write = 1;
1950 alu.dst.chan = 1;
1951
1952 alu.last = 1;
1953
1954 r = r600_bc_add_alu(ctx->bc, &alu);
1955 if (r)
1956 return r;
1957 r = r600_bc_add_literal(ctx->bc, ctx->value);
1958 if (r)
1959 return r;
1960 }
1961
1962 /* result.z = RoughApprox2ToX(tmp);*/
1963 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1964 memset(&alu, 0, sizeof(struct r600_bc_alu));
1965 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1966 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1967 if (r)
1968 return r;
1969 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1970
1971 alu.dst.sel = ctx->temp_reg;
1972 alu.dst.write = 1;
1973 alu.dst.chan = 2;
1974
1975 alu.last = 1;
1976
1977 r = r600_bc_add_alu(ctx->bc, &alu);
1978 if (r)
1979 return r;
1980 r = r600_bc_add_literal(ctx->bc, ctx->value);
1981 if (r)
1982 return r;
1983 }
1984
1985 /* result.w = 1.0;*/
1986 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1987 memset(&alu, 0, sizeof(struct r600_bc_alu));
1988
1989 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1990 alu.src[0].sel = V_SQ_ALU_SRC_1;
1991 alu.src[0].chan = 0;
1992
1993 alu.dst.sel = ctx->temp_reg;
1994 alu.dst.chan = 3;
1995 alu.dst.write = 1;
1996 alu.last = 1;
1997 r = r600_bc_add_alu(ctx->bc, &alu);
1998 if (r)
1999 return r;
2000 r = r600_bc_add_literal(ctx->bc, ctx->value);
2001 if (r)
2002 return r;
2003 }
2004 return tgsi_helper_copy(ctx, inst);
2005 }
2006
2007 static int tgsi_arl(struct r600_shader_ctx *ctx)
2008 {
2009 /* TODO from r600c, ar values don't persist between clauses */
2010 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2011 struct r600_bc_alu alu;
2012 int r;
2013 memset(&alu, 0, sizeof(struct r600_bc_alu));
2014
2015 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2016
2017 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2018 if (r)
2019 return r;
2020 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2021
2022 alu.last = 1;
2023
2024 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2025 if (r)
2026 return r;
2027 return 0;
2028 }
2029
2030 static int tgsi_opdst(struct r600_shader_ctx *ctx)
2031 {
2032 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2033 struct r600_bc_alu alu;
2034 int i, r = 0;
2035
2036 for (i = 0; i < 4; i++) {
2037 memset(&alu, 0, sizeof(struct r600_bc_alu));
2038
2039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2040 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2041 if (r)
2042 return r;
2043
2044 if (i == 0 || i == 3) {
2045 alu.src[0].sel = V_SQ_ALU_SRC_1;
2046 } else {
2047 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2048 if (r)
2049 return r;
2050 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2051 }
2052
2053 if (i == 0 || i == 2) {
2054 alu.src[1].sel = V_SQ_ALU_SRC_1;
2055 } else {
2056 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2057 if (r)
2058 return r;
2059 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2060 }
2061 if (i == 3)
2062 alu.last = 1;
2063 r = r600_bc_add_alu(ctx->bc, &alu);
2064 if (r)
2065 return r;
2066 }
2067 return 0;
2068 }
2069
2070 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2071 {
2072 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2073 struct r600_bc_alu alu;
2074 int r;
2075
2076 memset(&alu, 0, sizeof(struct r600_bc_alu));
2077 alu.inst = opcode;
2078 alu.predicate = 1;
2079
2080 alu.dst.sel = ctx->temp_reg;
2081 alu.dst.write = 1;
2082 alu.dst.chan = 0;
2083
2084 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2085 if (r)
2086 return r;
2087 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2088 alu.src[1].sel = V_SQ_ALU_SRC_0;
2089 alu.src[1].chan = 0;
2090
2091 alu.last = 1;
2092
2093 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2094 if (r)
2095 return r;
2096 return 0;
2097 }
2098
2099 static int pops(struct r600_shader_ctx *ctx, int pops)
2100 {
2101 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2102 ctx->bc->cf_last->pop_count = pops;
2103 return 0;
2104 }
2105
2106 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2107 {
2108 switch(reason) {
2109 case FC_PUSH_VPM:
2110 ctx->bc->callstack[ctx->bc->call_sp].current--;
2111 break;
2112 case FC_PUSH_WQM:
2113 case FC_LOOP:
2114 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2115 break;
2116 case FC_REP:
2117 /* TOODO : for 16 vp asic should -= 2; */
2118 ctx->bc->callstack[ctx->bc->call_sp].current --;
2119 break;
2120 }
2121 }
2122
2123 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2124 {
2125 if (check_max_only) {
2126 int diff;
2127 switch (reason) {
2128 case FC_PUSH_VPM:
2129 diff = 1;
2130 break;
2131 case FC_PUSH_WQM:
2132 diff = 4;
2133 break;
2134 }
2135 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2136 ctx->bc->callstack[ctx->bc->call_sp].max) {
2137 ctx->bc->callstack[ctx->bc->call_sp].max =
2138 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2139 }
2140 return;
2141 }
2142 switch (reason) {
2143 case FC_PUSH_VPM:
2144 ctx->bc->callstack[ctx->bc->call_sp].current++;
2145 break;
2146 case FC_PUSH_WQM:
2147 case FC_LOOP:
2148 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2149 break;
2150 case FC_REP:
2151 ctx->bc->callstack[ctx->bc->call_sp].current++;
2152 break;
2153 }
2154
2155 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2156 ctx->bc->callstack[ctx->bc->call_sp].max) {
2157 ctx->bc->callstack[ctx->bc->call_sp].max =
2158 ctx->bc->callstack[ctx->bc->call_sp].current;
2159 }
2160 }
2161
2162 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2163 {
2164 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2165
2166 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2167 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2168 sp->mid[sp->num_mid] = ctx->bc->cf_last;
2169 sp->num_mid++;
2170 }
2171
2172 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2173 {
2174 ctx->bc->fc_sp++;
2175 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2176 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2177 }
2178
2179 static void fc_poplevel(struct r600_shader_ctx *ctx)
2180 {
2181 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2182 if (sp->mid) {
2183 free(sp->mid);
2184 sp->mid = NULL;
2185 }
2186 sp->num_mid = 0;
2187 sp->start = NULL;
2188 sp->type = 0;
2189 ctx->bc->fc_sp--;
2190 }
2191
2192 #if 0
2193 static int emit_return(struct r600_shader_ctx *ctx)
2194 {
2195 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2196 return 0;
2197 }
2198
2199 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2200 {
2201
2202 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2203 ctx->bc->cf_last->pop_count = pops;
2204 /* TODO work out offset */
2205 return 0;
2206 }
2207
2208 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2209 {
2210 return 0;
2211 }
2212
2213 static void emit_testflag(struct r600_shader_ctx *ctx)
2214 {
2215
2216 }
2217
2218 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2219 {
2220 emit_testflag(ctx);
2221 emit_jump_to_offset(ctx, 1, 4);
2222 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2223 pops(ctx, ifidx + 1);
2224 emit_return(ctx);
2225 }
2226
2227 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2228 {
2229 emit_testflag(ctx);
2230
2231 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2232 ctx->bc->cf_last->pop_count = 1;
2233
2234 fc_set_mid(ctx, fc_sp);
2235
2236 pops(ctx, 1);
2237 }
2238 #endif
2239
2240 static int tgsi_if(struct r600_shader_ctx *ctx)
2241 {
2242 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2243
2244 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2245
2246 fc_pushlevel(ctx, FC_IF);
2247
2248 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2249 return 0;
2250 }
2251
2252 static int tgsi_else(struct r600_shader_ctx *ctx)
2253 {
2254 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2255 ctx->bc->cf_last->pop_count = 1;
2256
2257 fc_set_mid(ctx, ctx->bc->fc_sp);
2258 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2259 return 0;
2260 }
2261
2262 static int tgsi_endif(struct r600_shader_ctx *ctx)
2263 {
2264 pops(ctx, 1);
2265 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2266 R600_ERR("if/endif unbalanced in shader\n");
2267 return -1;
2268 }
2269
2270 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2271 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2272 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2273 } else {
2274 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2275 }
2276 fc_poplevel(ctx);
2277
2278 callstack_decrease_current(ctx, FC_PUSH_VPM);
2279 return 0;
2280 }
2281
2282 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2283 {
2284 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2285
2286 fc_pushlevel(ctx, FC_LOOP);
2287
2288 /* check stack depth */
2289 callstack_check_depth(ctx, FC_LOOP, 0);
2290 return 0;
2291 }
2292
2293 static int tgsi_endloop(struct r600_shader_ctx *ctx)
2294 {
2295 int i;
2296
2297 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2298
2299 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2300 R600_ERR("loop/endloop in shader code are not paired.\n");
2301 return -EINVAL;
2302 }
2303
2304 /* fixup loop pointers - from r600isa
2305 LOOP END points to CF after LOOP START,
2306 LOOP START point to CF after LOOP END
2307 BRK/CONT point to LOOP END CF
2308 */
2309 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2310
2311 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2312
2313 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2314 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2315 }
2316 /* TODO add LOOPRET support */
2317 fc_poplevel(ctx);
2318 callstack_decrease_current(ctx, FC_LOOP);
2319 return 0;
2320 }
2321
2322 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2323 {
2324 unsigned int fscp;
2325
2326 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2327 {
2328 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2329 break;
2330 }
2331
2332 if (fscp == 0) {
2333 R600_ERR("Break not inside loop/endloop pair\n");
2334 return -EINVAL;
2335 }
2336
2337 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2338 ctx->bc->cf_last->pop_count = 1;
2339
2340 fc_set_mid(ctx, fscp);
2341
2342 pops(ctx, 1);
2343 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2344 return 0;
2345 }
2346
2347 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2348 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2349 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2350 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2351 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2352 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2353 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2354 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2355 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2356 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2357 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2358 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2359 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2360 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2361 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2362 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2363 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2364 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2365 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2366 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2367 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2368 /* gap */
2369 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2370 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2371 /* gap */
2372 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2373 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2374 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2375 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2376 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2377 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2378 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2379 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2380 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2381 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2382 /* gap */
2383 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2384 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2385 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2386 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2387 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2388 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2389 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2390 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2391 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2392 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2393 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2394 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2395 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2396 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2397 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2398 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2399 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2400 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2401 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2402 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2403 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2404 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2405 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2406 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2407 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2408 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2409 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2410 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2411 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2412 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2413 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2414 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2415 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2416 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2417 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2418 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2419 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2420 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2421 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2422 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2423 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2424 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2425 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2426 /* gap */
2427 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2428 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2429 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2430 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2431 /* gap */
2432 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2433 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2434 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2435 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2436 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2437 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2438 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2439 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2440 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2441 /* gap */
2442 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2443 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2444 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2445 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2446 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2447 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2448 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2449 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2450 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2451 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2452 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2453 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2454 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2455 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2456 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2457 /* gap */
2458 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2459 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2460 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2461 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2462 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2463 /* gap */
2464 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2465 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2466 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2467 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2468 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2469 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2470 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2471 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2472 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2473 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2474 /* gap */
2475 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2476 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2477 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2478 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2479 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2480 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2481 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2482 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2483 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2484 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2485 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2486 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2487 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2488 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2489 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2490 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2491 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2492 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2493 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2494 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2495 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2496 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2497 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2498 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2499 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2500 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2501 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2502 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2503 };