r600g: really fix multi target support
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 //fprintf(stderr, "--------------------------------------------------------------\n");
109 //tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 //fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 return radeon_state_pm4(state);
154 }
155
156 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
157 {
158 struct r600_screen *rscreen = r600_screen(ctx->screen);
159 struct r600_shader *rshader = &rpshader->shader;
160 struct radeon_state *state;
161 unsigned i, tmp, exports_ps, num_cout;
162
163 rpshader->rstate = radeon_state_decref(rpshader->rstate);
164 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
165 if (state == NULL)
166 return -ENOMEM;
167 for (i = 0; i < rshader->ninput; i++) {
168 tmp = S_028644_SEMANTIC(i);
169 tmp |= S_028644_SEL_CENTROID(1);
170 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
171 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
172 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
173 }
174 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
175 }
176
177 exports_ps = 0;
178 num_cout = 0;
179 for (i = 0; i < rshader->noutput; i++) {
180 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
181 exports_ps |= 1;
182 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
183 exports_ps |= (1 << (num_cout+1));
184 num_cout++;
185 }
186 }
187 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
188 S_0286CC_PERSP_GRADIENT_ENA(1);
189 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
190 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
191 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
192 rpshader->rstate = state;
193 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
194 rpshader->rstate->nbo = 1;
195 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
196 return radeon_state_pm4(state);
197 }
198
199 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
200 {
201 struct r600_screen *rscreen = r600_screen(ctx->screen);
202 struct r600_context *rctx = r600_context(ctx);
203 struct r600_shader *rshader = &rpshader->shader;
204 int r;
205
206 /* copy new shader */
207 radeon_bo_decref(rscreen->rw, rpshader->bo);
208 rpshader->bo = NULL;
209 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
210 4096, NULL);
211 if (rpshader->bo == NULL) {
212 return -ENOMEM;
213 }
214 radeon_bo_map(rscreen->rw, rpshader->bo);
215 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
216 radeon_bo_unmap(rscreen->rw, rpshader->bo);
217 /* build state */
218 rshader->flat_shade = rctx->flat_shade;
219 switch (rshader->processor_type) {
220 case TGSI_PROCESSOR_VERTEX:
221 r = r600_pipe_shader_vs(ctx, rpshader);
222 break;
223 case TGSI_PROCESSOR_FRAGMENT:
224 r = r600_pipe_shader_ps(ctx, rpshader);
225 break;
226 default:
227 r = -EINVAL;
228 break;
229 }
230 return r;
231 }
232
233 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
234 {
235 struct r600_context *rctx = r600_context(ctx);
236 int r;
237
238 if (rpshader == NULL)
239 return -EINVAL;
240 /* there should be enough input */
241 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
242 R600_ERR("%d resources provided, expecting %d\n",
243 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
244 return -EINVAL;
245 }
246 r = r600_shader_update(ctx, &rpshader->shader);
247 if (r)
248 return r;
249 return r600_pipe_shader(ctx, rpshader);
250 }
251
252 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
253 {
254 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
255 int j;
256
257 if (i->Instruction.NumDstRegs > 1) {
258 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
259 return -EINVAL;
260 }
261 if (i->Instruction.Predicate) {
262 R600_ERR("predicate unsupported\n");
263 return -EINVAL;
264 }
265 if (i->Instruction.Label) {
266 R600_ERR("label unsupported\n");
267 return -EINVAL;
268 }
269 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
270 if (i->Src[j].Register.Indirect ||
271 i->Src[j].Register.Dimension ||
272 i->Src[j].Register.Absolute) {
273 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
274 return -EINVAL;
275 }
276 }
277 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
278 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
279 R600_ERR("unsupported dst (indirect|dimension)\n");
280 return -EINVAL;
281 }
282 }
283 return 0;
284 }
285
286 static int tgsi_declaration(struct r600_shader_ctx *ctx)
287 {
288 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
289 struct r600_bc_vtx vtx;
290 unsigned i;
291 int r;
292
293 switch (d->Declaration.File) {
294 case TGSI_FILE_INPUT:
295 i = ctx->shader->ninput++;
296 ctx->shader->input[i].name = d->Semantic.Name;
297 ctx->shader->input[i].sid = d->Semantic.Index;
298 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
299 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
300 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
301 /* turn input into fetch */
302 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
303 vtx.inst = 0;
304 vtx.fetch_type = 0;
305 vtx.buffer_id = i;
306 /* register containing the index into the buffer */
307 vtx.src_gpr = 0;
308 vtx.src_sel_x = 0;
309 vtx.mega_fetch_count = 0x1F;
310 vtx.dst_gpr = ctx->shader->input[i].gpr;
311 vtx.dst_sel_x = 0;
312 vtx.dst_sel_y = 1;
313 vtx.dst_sel_z = 2;
314 vtx.dst_sel_w = 3;
315 r = r600_bc_add_vtx(ctx->bc, &vtx);
316 if (r)
317 return r;
318 }
319 break;
320 case TGSI_FILE_OUTPUT:
321 i = ctx->shader->noutput++;
322 ctx->shader->output[i].name = d->Semantic.Name;
323 ctx->shader->output[i].sid = d->Semantic.Index;
324 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
325 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
326 break;
327 case TGSI_FILE_CONSTANT:
328 case TGSI_FILE_TEMPORARY:
329 case TGSI_FILE_SAMPLER:
330 break;
331 default:
332 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
333 return -EINVAL;
334 }
335 return 0;
336 }
337
338 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
339 {
340 struct tgsi_full_immediate *immediate;
341 struct r600_shader_ctx ctx;
342 struct r600_bc_output output[32];
343 unsigned output_done;
344 unsigned opcode;
345 int i, r = 0, pos0;
346
347 ctx.bc = &shader->bc;
348 ctx.shader = shader;
349 r = r600_bc_init(ctx.bc, shader->family);
350 if (r)
351 return r;
352 ctx.tokens = tokens;
353 tgsi_scan_shader(tokens, &ctx.info);
354 tgsi_parse_init(&ctx.parse, tokens);
355 ctx.type = ctx.parse.FullHeader.Processor.Processor;
356 shader->processor_type = ctx.type;
357
358 /* register allocations */
359 /* Values [0,127] correspond to GPR[0..127].
360 * Values [256,511] correspond to cfile constants c[0..255].
361 * Other special values are shown in the list below.
362 * 248 SQ_ALU_SRC_0: special constant 0.0.
363 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
364 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
365 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
366 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
367 * 253 SQ_ALU_SRC_LITERAL: literal constant.
368 * 254 SQ_ALU_SRC_PV: previous vector result.
369 * 255 SQ_ALU_SRC_PS: previous scalar result.
370 */
371 for (i = 0; i < TGSI_FILE_COUNT; i++) {
372 ctx.file_offset[i] = 0;
373 }
374 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
375 ctx.file_offset[TGSI_FILE_INPUT] = 1;
376 }
377 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
378 ctx.info.file_count[TGSI_FILE_INPUT];
379 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
380 ctx.info.file_count[TGSI_FILE_OUTPUT];
381 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
382 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
383 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
384 ctx.info.file_count[TGSI_FILE_TEMPORARY];
385
386 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
387 tgsi_parse_token(&ctx.parse);
388 switch (ctx.parse.FullToken.Token.Type) {
389 case TGSI_TOKEN_TYPE_IMMEDIATE:
390 immediate = &ctx.parse.FullToken.FullImmediate;
391 ctx.value[0] = immediate->u[0].Uint;
392 ctx.value[1] = immediate->u[1].Uint;
393 ctx.value[2] = immediate->u[2].Uint;
394 ctx.value[3] = immediate->u[3].Uint;
395 break;
396 case TGSI_TOKEN_TYPE_DECLARATION:
397 r = tgsi_declaration(&ctx);
398 if (r)
399 goto out_err;
400 break;
401 case TGSI_TOKEN_TYPE_INSTRUCTION:
402 r = tgsi_is_supported(&ctx);
403 if (r)
404 goto out_err;
405 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
406 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
407 r = ctx.inst_info->process(&ctx);
408 if (r)
409 goto out_err;
410 r = r600_bc_add_literal(ctx.bc, ctx.value);
411 if (r)
412 goto out_err;
413 break;
414 default:
415 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
416 r = -EINVAL;
417 goto out_err;
418 }
419 }
420 /* export output */
421 for (i = 0, pos0 = 0; i < shader->noutput; i++) {
422 memset(&output[i], 0, sizeof(struct r600_bc_output));
423 output[i].gpr = shader->output[i].gpr;
424 output[i].elem_size = 3;
425 output[i].swizzle_x = 0;
426 output[i].swizzle_y = 1;
427 output[i].swizzle_z = 2;
428 output[i].swizzle_w = 3;
429 output[i].barrier = 1;
430 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
431 output[i].array_base = i - pos0;
432 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
433 switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
434 case TGSI_PROCESSOR_VERTEX:
435 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
436 output[i].array_base = 60;
437 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
438 /* position doesn't count in array_base */
439 pos0 = 1;
440 }
441 break;
442 case TGSI_PROCESSOR_FRAGMENT:
443 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
444 output[i].array_base = shader->output[i].sid;
445 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
446 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
447 output[i].array_base = 61;
448 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
449 } else {
450 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
451 r = -EINVAL;
452 goto out_err;
453 }
454 break;
455 default:
456 R600_ERR("unsupported processor type %d\n", ctx.type);
457 r = -EINVAL;
458 goto out_err;
459 }
460 if (i == (shader->noutput - 1)) {
461 output[i].end_of_program = 1;
462 }
463 }
464 for (i = shader->noutput - 1, output_done = 0; i >= 0; i--) {
465 if (!(output_done & (1 << output[i].type))) {
466 output_done |= (1 << output[i].type);
467 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
468 }
469 }
470 for (i = 0; i < shader->noutput; i++) {
471 r = r600_bc_add_output(ctx.bc, &output[i]);
472 if (r)
473 goto out_err;
474 }
475 tgsi_parse_free(&ctx.parse);
476 return 0;
477 out_err:
478 tgsi_parse_free(&ctx.parse);
479 return r;
480 }
481
482 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
483 {
484 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
485 return -EINVAL;
486 }
487
488 static int tgsi_end(struct r600_shader_ctx *ctx)
489 {
490 return 0;
491 }
492
493 static int tgsi_src(struct r600_shader_ctx *ctx,
494 const struct tgsi_full_src_register *tgsi_src,
495 struct r600_bc_alu_src *r600_src)
496 {
497 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
498 r600_src->sel = tgsi_src->Register.Index;
499 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
500 r600_src->sel = 0;
501 }
502 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
503 return 0;
504 }
505
506 static int tgsi_dst(struct r600_shader_ctx *ctx,
507 const struct tgsi_full_dst_register *tgsi_dst,
508 unsigned swizzle,
509 struct r600_bc_alu_dst *r600_dst)
510 {
511 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
512
513 r600_dst->sel = tgsi_dst->Register.Index;
514 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
515 r600_dst->chan = swizzle;
516 r600_dst->write = 1;
517 if (inst->Instruction.Saturate) {
518 r600_dst->clamp = 1;
519 }
520 return 0;
521 }
522
523 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
524 {
525 switch (swizzle) {
526 case 0:
527 return tgsi_src->Register.SwizzleX;
528 case 1:
529 return tgsi_src->Register.SwizzleY;
530 case 2:
531 return tgsi_src->Register.SwizzleZ;
532 case 3:
533 return tgsi_src->Register.SwizzleW;
534 default:
535 return 0;
536 }
537 }
538
539 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
540 {
541 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
542 struct r600_bc_alu alu;
543 int i, j, k, nconst, r;
544
545 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
546 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
547 nconst++;
548 }
549 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
550 if (r) {
551 return r;
552 }
553 }
554 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
555 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
556 for (k = 0; k < 4; k++) {
557 memset(&alu, 0, sizeof(struct r600_bc_alu));
558 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
559 alu.src[0].sel = r600_src[0].sel;
560 alu.src[0].chan = k;
561 alu.dst.sel = ctx->temp_reg + j;
562 alu.dst.chan = k;
563 alu.dst.write = 1;
564 if (k == 3)
565 alu.last = 1;
566 r = r600_bc_add_alu(ctx->bc, &alu);
567 if (r)
568 return r;
569 }
570 r600_src[0].sel = ctx->temp_reg + j;
571 j--;
572 }
573 }
574 return 0;
575 }
576
577 static int tgsi_op2(struct r600_shader_ctx *ctx)
578 {
579 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
580 struct r600_bc_alu_src r600_src[3];
581 struct r600_bc_alu alu;
582 int i, j, r;
583
584 r = tgsi_split_constant(ctx, r600_src);
585 if (r)
586 return r;
587 for (i = 0; i < 4; i++) {
588 memset(&alu, 0, sizeof(struct r600_bc_alu));
589 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
590 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
591 alu.dst.chan = i;
592 } else {
593 alu.inst = ctx->inst_info->r600_opcode;
594 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
595 alu.src[j] = r600_src[j];
596 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
597 }
598 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
599 if (r)
600 return r;
601 }
602 /* handle some special cases */
603 switch (ctx->inst_info->tgsi_opcode) {
604 case TGSI_OPCODE_SUB:
605 alu.src[1].neg = 1;
606 break;
607 case TGSI_OPCODE_ABS:
608 alu.src[0].abs = 1;
609 break;
610 default:
611 break;
612 }
613 if (i == 3) {
614 alu.last = 1;
615 }
616 r = r600_bc_add_alu(ctx->bc, &alu);
617 if (r)
618 return r;
619 }
620 return 0;
621 }
622
623 static int tgsi_kill(struct r600_shader_ctx *ctx)
624 {
625 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
626 struct r600_bc_alu alu;
627 int i, r;
628
629 for (i = 0; i < 4; i++) {
630 memset(&alu, 0, sizeof(struct r600_bc_alu));
631 alu.inst = ctx->inst_info->r600_opcode;
632 alu.dst.chan = i;
633 alu.src[0].sel = 248;
634 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
635 if (r)
636 return r;
637 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
638 if (i == 3) {
639 alu.last = 1;
640 }
641 r = r600_bc_add_alu(ctx->bc, &alu);
642 if (r)
643 return r;
644 }
645 return 0;
646 }
647
648 static int tgsi_slt(struct r600_shader_ctx *ctx)
649 {
650 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
651 struct r600_bc_alu_src r600_src[3];
652 struct r600_bc_alu alu;
653 int i, r;
654
655 r = tgsi_split_constant(ctx, r600_src);
656 if (r)
657 return r;
658 for (i = 0; i < 4; i++) {
659 memset(&alu, 0, sizeof(struct r600_bc_alu));
660 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
661 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
662 alu.dst.chan = i;
663 } else {
664 alu.inst = ctx->inst_info->r600_opcode;
665 alu.src[1] = r600_src[0];
666 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
667 alu.src[0] = r600_src[1];
668 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
669 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
670 if (r)
671 return r;
672 }
673 if (i == 3) {
674 alu.last = 1;
675 }
676 r = r600_bc_add_alu(ctx->bc, &alu);
677 if (r)
678 return r;
679 }
680 return 0;
681 }
682
683 static int tgsi_lit(struct r600_shader_ctx *ctx)
684 {
685 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
686 struct r600_bc_alu alu;
687 int r;
688
689 /* dst.x, <- 1.0 */
690 memset(&alu, 0, sizeof(struct r600_bc_alu));
691 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
692 alu.src[0].sel = 249; /*1.0*/
693 alu.src[0].chan = 0;
694 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
695 if (r)
696 return r;
697 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
698 r = r600_bc_add_alu(ctx->bc, &alu);
699 if (r)
700 return r;
701
702 /* dst.y = max(src.x, 0.0) */
703 memset(&alu, 0, sizeof(struct r600_bc_alu));
704 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
705 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
706 if (r)
707 return r;
708 alu.src[1].sel = 248; /*0.0*/
709 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
710 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
711 if (r)
712 return r;
713 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
714 r = r600_bc_add_alu(ctx->bc, &alu);
715 if (r)
716 return r;
717
718 /* dst.z = NOP - fill Z slot */
719 memset(&alu, 0, sizeof(struct r600_bc_alu));
720 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
721 alu.dst.chan = 2;
722 r = r600_bc_add_alu(ctx->bc, &alu);
723 if (r)
724 return r;
725
726 /* dst.w, <- 1.0 */
727 memset(&alu, 0, sizeof(struct r600_bc_alu));
728 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
729 alu.src[0].sel = 249;
730 alu.src[0].chan = 0;
731 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
732 if (r)
733 return r;
734 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
735 alu.last = 1;
736 r = r600_bc_add_alu(ctx->bc, &alu);
737 if (r)
738 return r;
739
740 if (inst->Dst[0].Register.WriteMask & (1 << 2))
741 {
742 int chan;
743 int sel;
744
745 /* dst.z = log(src.y) */
746 memset(&alu, 0, sizeof(struct r600_bc_alu));
747 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
748 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
749 if (r)
750 return r;
751 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
752 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
753 if (r)
754 return r;
755 alu.last = 1;
756 r = r600_bc_add_alu(ctx->bc, &alu);
757 if (r)
758 return r;
759
760 chan = alu.dst.chan;
761 sel = alu.dst.sel;
762
763 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
764 memset(&alu, 0, sizeof(struct r600_bc_alu));
765 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
766 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
767 if (r)
768 return r;
769 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
770 alu.src[1].sel = sel;
771 alu.src[1].chan = chan;
772 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
773 if (r)
774 return r;
775 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
776 alu.dst.sel = ctx->temp_reg;
777 alu.dst.chan = 0;
778 alu.dst.write = 1;
779 alu.is_op3 = 1;
780 alu.last = 1;
781 r = r600_bc_add_alu(ctx->bc, &alu);
782 if (r)
783 return r;
784
785 /* dst.z = exp(tmp.x) */
786 memset(&alu, 0, sizeof(struct r600_bc_alu));
787 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
788 alu.src[0].sel = ctx->temp_reg;
789 alu.src[0].chan = 0;
790 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
791 if (r)
792 return r;
793 alu.last = 1;
794 r = r600_bc_add_alu(ctx->bc, &alu);
795 if (r)
796 return r;
797 }
798 return 0;
799 }
800
801 static int tgsi_trans(struct r600_shader_ctx *ctx)
802 {
803 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
804 struct r600_bc_alu alu;
805 int i, j, r;
806
807 for (i = 0; i < 4; i++) {
808 memset(&alu, 0, sizeof(struct r600_bc_alu));
809 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
810 alu.inst = ctx->inst_info->r600_opcode;
811 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
812 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
813 if (r)
814 return r;
815 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
816 }
817 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
818 if (r)
819 return r;
820 alu.last = 1;
821 r = r600_bc_add_alu(ctx->bc, &alu);
822 if (r)
823 return r;
824 }
825 }
826 return 0;
827 }
828
829 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
830 {
831 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
832 struct r600_bc_alu alu;
833 int i, j, r;
834
835 memset(&alu, 0, sizeof(struct r600_bc_alu));
836 alu.inst = ctx->inst_info->r600_opcode;
837 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
838 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
839 if (r)
840 return r;
841 alu.src[j].chan = tgsi_chan(&inst->Src[j], 0);
842 }
843 alu.dst.sel = ctx->temp_reg;
844 alu.dst.write = 1;
845 alu.last = 1;
846 r = r600_bc_add_alu(ctx->bc, &alu);
847 if (r)
848 return r;
849 /* replicate result */
850 for (i = 0; i < 4; i++) {
851 memset(&alu, 0, sizeof(struct r600_bc_alu));
852 alu.src[0].sel = ctx->temp_reg;
853 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
854 alu.dst.chan = i;
855 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
856 if (r)
857 return r;
858 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
859 if (i == 3)
860 alu.last = 1;
861 r = r600_bc_add_alu(ctx->bc, &alu);
862 if (r)
863 return r;
864 }
865 return 0;
866 }
867
868 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
869 {
870 struct r600_bc_alu alu;
871 int i, r;
872
873 r = r600_bc_add_literal(ctx->bc, ctx->value);
874 if (r)
875 return r;
876 for (i = 0; i < 4; i++) {
877 memset(&alu, 0, sizeof(struct r600_bc_alu));
878 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
879 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
880 alu.dst.chan = i;
881 } else {
882 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
883 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
884 if (r)
885 return r;
886 alu.src[0].sel = ctx->temp_reg;
887 alu.src[0].chan = i;
888 }
889 if (i == 3) {
890 alu.last = 1;
891 }
892 r = r600_bc_add_alu(ctx->bc, &alu);
893 if (r)
894 return r;
895 }
896 return 0;
897 }
898
899 static int tgsi_op3(struct r600_shader_ctx *ctx)
900 {
901 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
902 struct r600_bc_alu_src r600_src[3];
903 struct r600_bc_alu alu;
904 int i, j, r;
905
906 r = tgsi_split_constant(ctx, r600_src);
907 if (r)
908 return r;
909 /* do it in 2 step as op3 doesn't support writemask */
910 for (i = 0; i < 4; i++) {
911 memset(&alu, 0, sizeof(struct r600_bc_alu));
912 alu.inst = ctx->inst_info->r600_opcode;
913 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
914 alu.src[j] = r600_src[j];
915 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
916 }
917 alu.dst.sel = ctx->temp_reg;
918 alu.dst.chan = i;
919 alu.dst.write = 1;
920 alu.is_op3 = 1;
921 if (i == 3) {
922 alu.last = 1;
923 }
924 r = r600_bc_add_alu(ctx->bc, &alu);
925 if (r)
926 return r;
927 }
928 return tgsi_helper_copy(ctx, inst);
929 }
930
931 static int tgsi_dp(struct r600_shader_ctx *ctx)
932 {
933 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
934 struct r600_bc_alu_src r600_src[3];
935 struct r600_bc_alu alu;
936 int i, j, r;
937
938 r = tgsi_split_constant(ctx, r600_src);
939 if (r)
940 return r;
941 for (i = 0; i < 4; i++) {
942 memset(&alu, 0, sizeof(struct r600_bc_alu));
943 alu.inst = ctx->inst_info->r600_opcode;
944 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
945 alu.src[j] = r600_src[j];
946 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
947 }
948 alu.dst.sel = ctx->temp_reg;
949 alu.dst.chan = i;
950 alu.dst.write = 1;
951 /* handle some special cases */
952 switch (ctx->inst_info->tgsi_opcode) {
953 case TGSI_OPCODE_DP2:
954 if (i > 1) {
955 alu.src[0].sel = alu.src[1].sel = 248;
956 alu.src[0].chan = alu.src[1].chan = 0;
957 }
958 break;
959 case TGSI_OPCODE_DP3:
960 if (i > 2) {
961 alu.src[0].sel = alu.src[1].sel = 248;
962 alu.src[0].chan = alu.src[1].chan = 0;
963 }
964 break;
965 default:
966 break;
967 }
968 if (i == 3) {
969 alu.last = 1;
970 }
971 r = r600_bc_add_alu(ctx->bc, &alu);
972 if (r)
973 return r;
974 }
975 return tgsi_helper_copy(ctx, inst);
976 }
977
978 static int tgsi_tex(struct r600_shader_ctx *ctx)
979 {
980 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
981 struct r600_bc_tex tex;
982 struct r600_bc_alu alu;
983 unsigned src_gpr;
984 int r;
985
986 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
987
988 /* Add perspective divide */
989 memset(&alu, 0, sizeof(struct r600_bc_alu));
990 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
991 alu.src[0].sel = src_gpr;
992 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
993 alu.dst.sel = ctx->temp_reg;
994 alu.dst.chan = 3;
995 alu.last = 1;
996 alu.dst.write = 1;
997 r = r600_bc_add_alu(ctx->bc, &alu);
998 if (r)
999 return r;
1000
1001 memset(&alu, 0, sizeof(struct r600_bc_alu));
1002 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1003 alu.src[0].sel = ctx->temp_reg;
1004 alu.src[0].chan = 3;
1005 alu.src[1].sel = src_gpr;
1006 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
1007 alu.dst.sel = ctx->temp_reg;
1008 alu.dst.chan = 0;
1009 alu.dst.write = 1;
1010 r = r600_bc_add_alu(ctx->bc, &alu);
1011 if (r)
1012 return r;
1013 memset(&alu, 0, sizeof(struct r600_bc_alu));
1014 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1015 alu.src[0].sel = ctx->temp_reg;
1016 alu.src[0].chan = 3;
1017 alu.src[1].sel = src_gpr;
1018 alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
1019 alu.dst.sel = ctx->temp_reg;
1020 alu.dst.chan = 1;
1021 alu.dst.write = 1;
1022 r = r600_bc_add_alu(ctx->bc, &alu);
1023 if (r)
1024 return r;
1025 memset(&alu, 0, sizeof(struct r600_bc_alu));
1026 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1027 alu.src[0].sel = ctx->temp_reg;
1028 alu.src[0].chan = 3;
1029 alu.src[1].sel = src_gpr;
1030 alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
1031 alu.dst.sel = ctx->temp_reg;
1032 alu.dst.chan = 2;
1033 alu.dst.write = 1;
1034 r = r600_bc_add_alu(ctx->bc, &alu);
1035 if (r)
1036 return r;
1037 memset(&alu, 0, sizeof(struct r600_bc_alu));
1038 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1039 alu.src[0].sel = 249;
1040 alu.src[0].chan = 0;
1041 alu.dst.sel = ctx->temp_reg;
1042 alu.dst.chan = 3;
1043 alu.last = 1;
1044 alu.dst.write = 1;
1045 r = r600_bc_add_alu(ctx->bc, &alu);
1046 if (r)
1047 return r;
1048 src_gpr = ctx->temp_reg;
1049
1050 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
1051 memset(&tex, 0, sizeof(struct r600_bc_tex));
1052 tex.inst = ctx->inst_info->r600_opcode;
1053 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1054 tex.sampler_id = tex.resource_id;
1055 tex.src_gpr = src_gpr;
1056 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1057 tex.dst_sel_x = 0;
1058 tex.dst_sel_y = 1;
1059 tex.dst_sel_z = 2;
1060 tex.dst_sel_w = 3;
1061 tex.src_sel_x = 0;
1062 tex.src_sel_y = 1;
1063 tex.src_sel_z = 2;
1064 tex.src_sel_w = 3;
1065
1066 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1067 tex.coord_type_x = 1;
1068 tex.coord_type_y = 1;
1069 tex.coord_type_z = 1;
1070 tex.coord_type_w = 1;
1071 }
1072 return r600_bc_add_tex(ctx->bc, &tex);
1073 }
1074
1075 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1076 {
1077 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1078 struct r600_bc_alu_src r600_src[3];
1079 struct r600_bc_alu alu;
1080 unsigned i;
1081 int r;
1082
1083 r = tgsi_split_constant(ctx, r600_src);
1084 if (r)
1085 return r;
1086 /* 1 - src0 */
1087 for (i = 0; i < 4; i++) {
1088 memset(&alu, 0, sizeof(struct r600_bc_alu));
1089 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1090 alu.src[0].sel = 249;
1091 alu.src[0].chan = 0;
1092 alu.src[1] = r600_src[0];
1093 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1094 alu.src[1].neg = 1;
1095 alu.dst.sel = ctx->temp_reg;
1096 alu.dst.chan = i;
1097 if (i == 3) {
1098 alu.last = 1;
1099 }
1100 alu.dst.write = 1;
1101 r = r600_bc_add_alu(ctx->bc, &alu);
1102 if (r)
1103 return r;
1104 }
1105 r = r600_bc_add_literal(ctx->bc, ctx->value);
1106 if (r)
1107 return r;
1108
1109 /* (1 - src0) * src2 */
1110 for (i = 0; i < 4; i++) {
1111 memset(&alu, 0, sizeof(struct r600_bc_alu));
1112 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1113 alu.src[0].sel = ctx->temp_reg;
1114 alu.src[0].chan = i;
1115 alu.src[1] = r600_src[2];
1116 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1117 alu.dst.sel = ctx->temp_reg;
1118 alu.dst.chan = i;
1119 if (i == 3) {
1120 alu.last = 1;
1121 }
1122 alu.dst.write = 1;
1123 r = r600_bc_add_alu(ctx->bc, &alu);
1124 if (r)
1125 return r;
1126 }
1127 r = r600_bc_add_literal(ctx->bc, ctx->value);
1128 if (r)
1129 return r;
1130
1131 /* src0 * src1 + (1 - src0) * src2 */
1132 for (i = 0; i < 4; i++) {
1133 memset(&alu, 0, sizeof(struct r600_bc_alu));
1134 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1135 alu.is_op3 = 1;
1136 alu.src[0] = r600_src[0];
1137 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1138 alu.src[1] = r600_src[1];
1139 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1140 alu.src[2].sel = ctx->temp_reg;
1141 alu.src[2].chan = i;
1142 alu.dst.sel = ctx->temp_reg;
1143 alu.dst.chan = i;
1144 if (i == 3) {
1145 alu.last = 1;
1146 }
1147 r = r600_bc_add_alu(ctx->bc, &alu);
1148 if (r)
1149 return r;
1150 }
1151 return tgsi_helper_copy(ctx, inst);
1152 }
1153
1154 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1155 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1156 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1157 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1158 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1159 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1160 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1161 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1162 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1163 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1164 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1165 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1166 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1167 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1168 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1169 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1170 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1171 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1172 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1173 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1174 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1175 /* gap */
1176 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1177 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1178 /* gap */
1179 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1180 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1181 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1182 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1183 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1184 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1185 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1186 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1187 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1188 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1189 /* gap */
1190 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1191 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1192 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1193 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1194 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1195 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1196 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1197 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1198 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1199 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1200 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1201 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1202 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1203 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1204 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1205 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1206 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1207 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1208 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1209 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1210 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
1211 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1212 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
1213 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1214 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1215 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1216 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1217 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1218 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1219 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1220 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1221 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1222 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1223 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1224 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1225 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1226 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1227 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1228 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1229 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1230 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1231 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1232 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1233 /* gap */
1234 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1235 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1236 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1237 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1238 /* gap */
1239 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1240 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1241 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1242 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1243 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1244 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1245 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1246 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1247 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1248 /* gap */
1249 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1250 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1251 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1252 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1253 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1254 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1255 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1256 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1257 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1258 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1259 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1260 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1261 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1262 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1263 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1264 /* gap */
1265 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1266 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1267 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1268 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1269 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1270 /* gap */
1271 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1272 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1273 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1274 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1275 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1276 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1277 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1278 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1279 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1280 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1281 /* gap */
1282 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1283 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1284 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1285 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1286 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1287 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1288 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1289 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1290 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1291 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1292 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1293 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1294 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1295 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1296 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1297 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1298 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1299 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1300 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1301 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1302 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1303 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1304 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1305 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1306 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1307 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1308 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1309 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1310 };