7d304f5ae80a2e0161036e04dc5226458c85fcdf
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 fprintf(stderr, "--------------------------------------------------------------\n");
109 tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 return radeon_state_pm4(state);
154 }
155
156 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
157 {
158 struct r600_screen *rscreen = r600_screen(ctx->screen);
159 struct r600_shader *rshader = &rpshader->shader;
160 struct radeon_state *state;
161 unsigned i, tmp;
162
163 rpshader->rstate = radeon_state_decref(rpshader->rstate);
164 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
165 if (state == NULL)
166 return -ENOMEM;
167 for (i = 0; i < rshader->ninput; i++) {
168 tmp = S_028644_SEMANTIC(i);
169 tmp |= S_028644_SEL_CENTROID(1);
170 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
171 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
172 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
173 }
174 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
175 }
176 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
177 S_0286CC_PERSP_GRADIENT_ENA(1);
178 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
179 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
180 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
181 rpshader->rstate = state;
182 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
183 rpshader->rstate->nbo = 1;
184 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
185 return radeon_state_pm4(state);
186 }
187
188 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
189 {
190 struct r600_screen *rscreen = r600_screen(ctx->screen);
191 struct r600_context *rctx = r600_context(ctx);
192 struct r600_shader *rshader = &rpshader->shader;
193 int r;
194
195 /* copy new shader */
196 radeon_bo_decref(rscreen->rw, rpshader->bo);
197 rpshader->bo = NULL;
198 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
199 4096, NULL);
200 if (rpshader->bo == NULL) {
201 return -ENOMEM;
202 }
203 radeon_bo_map(rscreen->rw, rpshader->bo);
204 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
205 radeon_bo_unmap(rscreen->rw, rpshader->bo);
206 /* build state */
207 rshader->flat_shade = rctx->flat_shade;
208 switch (rshader->processor_type) {
209 case TGSI_PROCESSOR_VERTEX:
210 r = r600_pipe_shader_vs(ctx, rpshader);
211 break;
212 case TGSI_PROCESSOR_FRAGMENT:
213 r = r600_pipe_shader_ps(ctx, rpshader);
214 break;
215 default:
216 r = -EINVAL;
217 break;
218 }
219 return r;
220 }
221
222 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
223 {
224 struct r600_context *rctx = r600_context(ctx);
225 int r;
226
227 if (rpshader == NULL)
228 return -EINVAL;
229 /* there should be enough input */
230 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
231 R600_ERR("%d resources provided, expecting %d\n",
232 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
233 return -EINVAL;
234 }
235 r = r600_shader_update(ctx, &rpshader->shader);
236 if (r)
237 return r;
238 return r600_pipe_shader(ctx, rpshader);
239 }
240
241 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
242 {
243 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
244 int j;
245
246 if (i->Instruction.NumDstRegs > 1) {
247 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
248 return -EINVAL;
249 }
250 if (i->Instruction.Predicate) {
251 R600_ERR("predicate unsupported\n");
252 return -EINVAL;
253 }
254 if (i->Instruction.Label) {
255 R600_ERR("label unsupported\n");
256 return -EINVAL;
257 }
258 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
259 if (i->Src[j].Register.Indirect ||
260 i->Src[j].Register.Dimension ||
261 i->Src[j].Register.Absolute) {
262 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
263 return -EINVAL;
264 }
265 }
266 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
267 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
268 R600_ERR("unsupported dst (indirect|dimension)\n");
269 return -EINVAL;
270 }
271 }
272 return 0;
273 }
274
275 static int tgsi_declaration(struct r600_shader_ctx *ctx)
276 {
277 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
278 struct r600_bc_vtx vtx;
279 unsigned i;
280 int r;
281
282 switch (d->Declaration.File) {
283 case TGSI_FILE_INPUT:
284 i = ctx->shader->ninput++;
285 ctx->shader->input[i].name = d->Semantic.Name;
286 ctx->shader->input[i].sid = d->Semantic.Index;
287 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
288 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
289 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
290 /* turn input into fetch */
291 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
292 vtx.inst = 0;
293 vtx.fetch_type = 0;
294 vtx.buffer_id = i;
295 /* register containing the index into the buffer */
296 vtx.src_gpr = 0;
297 vtx.src_sel_x = 0;
298 vtx.mega_fetch_count = 0x1F;
299 vtx.dst_gpr = ctx->shader->input[i].gpr;
300 vtx.dst_sel_x = 0;
301 vtx.dst_sel_y = 1;
302 vtx.dst_sel_z = 2;
303 vtx.dst_sel_w = 3;
304 r = r600_bc_add_vtx(ctx->bc, &vtx);
305 if (r)
306 return r;
307 }
308 break;
309 case TGSI_FILE_OUTPUT:
310 i = ctx->shader->noutput++;
311 ctx->shader->output[i].name = d->Semantic.Name;
312 ctx->shader->output[i].sid = d->Semantic.Index;
313 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
314 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
315 break;
316 case TGSI_FILE_CONSTANT:
317 case TGSI_FILE_TEMPORARY:
318 case TGSI_FILE_SAMPLER:
319 break;
320 default:
321 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
322 return -EINVAL;
323 }
324 return 0;
325 }
326
327 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
328 {
329 struct tgsi_full_immediate *immediate;
330 struct r600_shader_ctx ctx;
331 struct r600_bc_output output;
332 unsigned opcode;
333 int i, r = 0, pos0;
334
335 ctx.bc = &shader->bc;
336 ctx.shader = shader;
337 r = r600_bc_init(ctx.bc, shader->family);
338 if (r)
339 return r;
340 ctx.tokens = tokens;
341 tgsi_scan_shader(tokens, &ctx.info);
342 tgsi_parse_init(&ctx.parse, tokens);
343 ctx.type = ctx.parse.FullHeader.Processor.Processor;
344 shader->processor_type = ctx.type;
345
346 /* register allocations */
347 /* Values [0,127] correspond to GPR[0..127].
348 * Values [256,511] correspond to cfile constants c[0..255].
349 * Other special values are shown in the list below.
350 * 248 SQ_ALU_SRC_0: special constant 0.0.
351 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
352 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
353 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
354 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
355 * 253 SQ_ALU_SRC_LITERAL: literal constant.
356 * 254 SQ_ALU_SRC_PV: previous vector result.
357 * 255 SQ_ALU_SRC_PS: previous scalar result.
358 */
359 for (i = 0; i < TGSI_FILE_COUNT; i++) {
360 ctx.file_offset[i] = 0;
361 }
362 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
363 ctx.file_offset[TGSI_FILE_INPUT] = 1;
364 }
365 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
366 ctx.info.file_count[TGSI_FILE_INPUT];
367 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
368 ctx.info.file_count[TGSI_FILE_OUTPUT];
369 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
370 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
371 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
372 ctx.info.file_count[TGSI_FILE_TEMPORARY];
373
374 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
375 tgsi_parse_token(&ctx.parse);
376 switch (ctx.parse.FullToken.Token.Type) {
377 case TGSI_TOKEN_TYPE_IMMEDIATE:
378 immediate = &ctx.parse.FullToken.FullImmediate;
379 ctx.value[0] = immediate->u[0].Uint;
380 ctx.value[1] = immediate->u[1].Uint;
381 ctx.value[2] = immediate->u[2].Uint;
382 ctx.value[3] = immediate->u[3].Uint;
383 break;
384 case TGSI_TOKEN_TYPE_DECLARATION:
385 r = tgsi_declaration(&ctx);
386 if (r)
387 goto out_err;
388 break;
389 case TGSI_TOKEN_TYPE_INSTRUCTION:
390 r = tgsi_is_supported(&ctx);
391 if (r)
392 goto out_err;
393 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
394 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
395 r = ctx.inst_info->process(&ctx);
396 if (r)
397 goto out_err;
398 r = r600_bc_add_literal(ctx.bc, ctx.value);
399 if (r)
400 goto out_err;
401 break;
402 default:
403 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
404 r = -EINVAL;
405 goto out_err;
406 }
407 }
408 /* export output */
409 for (i = 0, pos0 = 0; i < shader->noutput; i++) {
410 memset(&output, 0, sizeof(struct r600_bc_output));
411 output.gpr = shader->output[i].gpr;
412 output.elem_size = 3;
413 output.swizzle_x = 0;
414 output.swizzle_y = 1;
415 output.swizzle_z = 2;
416 output.swizzle_w = 3;
417 output.barrier = 1;
418 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
419 output.array_base = i - pos0;
420 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
421 switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
422 case TGSI_PROCESSOR_VERTEX:
423 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
424 output.array_base = 60;
425 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
426 /* position doesn't count in array_base */
427 pos0 = 1;
428 }
429 break;
430 case TGSI_PROCESSOR_FRAGMENT:
431 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
432 output.array_base = 0;
433 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
434 } else {
435 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
436 r = -EINVAL;
437 goto out_err;
438 }
439 break;
440 default:
441 R600_ERR("unsupported processor type %d\n", ctx.type);
442 r = -EINVAL;
443 goto out_err;
444 }
445 if (i == (shader->noutput - 1)) {
446 output.end_of_program = 1;
447 }
448 r = r600_bc_add_output(ctx.bc, &output);
449 if (r)
450 goto out_err;
451 }
452 tgsi_parse_free(&ctx.parse);
453 return 0;
454 out_err:
455 tgsi_parse_free(&ctx.parse);
456 return r;
457 }
458
459 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
460 {
461 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
462 return -EINVAL;
463 }
464
465 static int tgsi_end(struct r600_shader_ctx *ctx)
466 {
467 return 0;
468 }
469
470 static int tgsi_src(struct r600_shader_ctx *ctx,
471 const struct tgsi_full_src_register *tgsi_src,
472 struct r600_bc_alu_src *r600_src)
473 {
474 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
475 r600_src->sel = tgsi_src->Register.Index;
476 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
477 r600_src->sel = 0;
478 }
479 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
480 return 0;
481 }
482
483 static int tgsi_dst(struct r600_shader_ctx *ctx,
484 const struct tgsi_full_dst_register *tgsi_dst,
485 unsigned swizzle,
486 struct r600_bc_alu_dst *r600_dst)
487 {
488 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
489
490 r600_dst->sel = tgsi_dst->Register.Index;
491 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
492 r600_dst->chan = swizzle;
493 r600_dst->write = 1;
494 if (inst->Instruction.Saturate) {
495 r600_dst->clamp = 1;
496 }
497 return 0;
498 }
499
500 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
501 {
502 switch (swizzle) {
503 case 0:
504 return tgsi_src->Register.SwizzleX;
505 case 1:
506 return tgsi_src->Register.SwizzleY;
507 case 2:
508 return tgsi_src->Register.SwizzleZ;
509 case 3:
510 return tgsi_src->Register.SwizzleW;
511 default:
512 return 0;
513 }
514 }
515
516 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
517 {
518 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
519 struct r600_bc_alu alu;
520 int i, j, k, nconst, r;
521
522 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
523 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
524 nconst++;
525 }
526 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
527 if (r) {
528 return r;
529 }
530 }
531 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
532 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
533 for (k = 0; k < 4; k++) {
534 memset(&alu, 0, sizeof(struct r600_bc_alu));
535 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
536 alu.src[0].sel = r600_src[0].sel;
537 alu.src[0].chan = k;
538 alu.dst.sel = ctx->temp_reg + j;
539 alu.dst.chan = k;
540 alu.dst.write = 1;
541 if (k == 3)
542 alu.last = 1;
543 r = r600_bc_add_alu(ctx->bc, &alu);
544 if (r)
545 return r;
546 }
547 r600_src[0].sel = ctx->temp_reg + j;
548 j--;
549 }
550 }
551 return 0;
552 }
553
554 static int tgsi_op2(struct r600_shader_ctx *ctx)
555 {
556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
557 struct r600_bc_alu_src r600_src[3];
558 struct r600_bc_alu alu;
559 int i, j, r;
560
561 r = tgsi_split_constant(ctx, r600_src);
562 if (r)
563 return r;
564 for (i = 0; i < 4; i++) {
565 memset(&alu, 0, sizeof(struct r600_bc_alu));
566 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
567 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
568 alu.dst.chan = i;
569 } else {
570 alu.inst = ctx->inst_info->r600_opcode;
571 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
572 alu.src[j] = r600_src[j];
573 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
574 }
575 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
576 if (r)
577 return r;
578 }
579 /* handle some special cases */
580 switch (ctx->inst_info->tgsi_opcode) {
581 case TGSI_OPCODE_SUB:
582 alu.src[1].neg = 1;
583 break;
584 case TGSI_OPCODE_ABS:
585 alu.src[0].abs = 1;
586 break;
587 default:
588 break;
589 }
590 if (i == 3) {
591 alu.last = 1;
592 }
593 r = r600_bc_add_alu(ctx->bc, &alu);
594 if (r)
595 return r;
596 }
597 return 0;
598 }
599
600 static int tgsi_kill(struct r600_shader_ctx *ctx)
601 {
602 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
603 struct r600_bc_alu alu;
604 int i, r;
605
606 for (i = 0; i < 4; i++) {
607 memset(&alu, 0, sizeof(struct r600_bc_alu));
608 alu.inst = ctx->inst_info->r600_opcode;
609 alu.dst.chan = i;
610 alu.src[0].sel = 248;
611 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
612 if (r)
613 return r;
614 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
615 if (i == 3) {
616 alu.last = 1;
617 }
618 r = r600_bc_add_alu(ctx->bc, &alu);
619 if (r)
620 return r;
621 }
622 return 0;
623 }
624
625 static int tgsi_slt(struct r600_shader_ctx *ctx)
626 {
627 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
628 struct r600_bc_alu_src r600_src[3];
629 struct r600_bc_alu alu;
630 int i, r;
631
632 r = tgsi_split_constant(ctx, r600_src);
633 if (r)
634 return r;
635 for (i = 0; i < 4; i++) {
636 memset(&alu, 0, sizeof(struct r600_bc_alu));
637 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
638 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
639 alu.dst.chan = i;
640 } else {
641 alu.inst = ctx->inst_info->r600_opcode;
642 alu.src[1] = r600_src[0];
643 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
644 alu.src[0] = r600_src[1];
645 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
646 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
647 if (r)
648 return r;
649 }
650 if (i == 3) {
651 alu.last = 1;
652 }
653 r = r600_bc_add_alu(ctx->bc, &alu);
654 if (r)
655 return r;
656 }
657 return 0;
658 }
659
660 static int tgsi_lit(struct r600_shader_ctx *ctx)
661 {
662 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
663 struct r600_bc_alu alu;
664 int r;
665
666 /* dst.x, <- 1.0 */
667 memset(&alu, 0, sizeof(struct r600_bc_alu));
668 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
669 alu.src[0].sel = 249; /*1.0*/
670 alu.src[0].chan = 0;
671 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
672 if (r)
673 return r;
674 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
675 r = r600_bc_add_alu(ctx->bc, &alu);
676 if (r)
677 return r;
678
679 /* dst.y = max(src.x, 0.0) */
680 memset(&alu, 0, sizeof(struct r600_bc_alu));
681 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
682 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
683 if (r)
684 return r;
685 alu.src[1].sel = 248; /*0.0*/
686 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
687 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
688 if (r)
689 return r;
690 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
691 r = r600_bc_add_alu(ctx->bc, &alu);
692 if (r)
693 return r;
694
695 /* dst.z = NOP - fill Z slot */
696 memset(&alu, 0, sizeof(struct r600_bc_alu));
697 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
698 alu.dst.chan = 2;
699 r = r600_bc_add_alu(ctx->bc, &alu);
700 if (r)
701 return r;
702
703 /* dst.w, <- 1.0 */
704 memset(&alu, 0, sizeof(struct r600_bc_alu));
705 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
706 alu.src[0].sel = 249;
707 alu.src[0].chan = 0;
708 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
709 if (r)
710 return r;
711 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
712 alu.last = 1;
713 r = r600_bc_add_alu(ctx->bc, &alu);
714 if (r)
715 return r;
716
717 if (inst->Dst[0].Register.WriteMask & (1 << 2))
718 {
719 int chan;
720 int sel;
721
722 /* dst.z = log(src.y) */
723 memset(&alu, 0, sizeof(struct r600_bc_alu));
724 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
725 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
726 if (r)
727 return r;
728 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
729 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
730 if (r)
731 return r;
732 alu.last = 1;
733 r = r600_bc_add_alu(ctx->bc, &alu);
734 if (r)
735 return r;
736
737 chan = alu.dst.chan;
738 sel = alu.dst.sel;
739
740 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
741 memset(&alu, 0, sizeof(struct r600_bc_alu));
742 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
743 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
744 if (r)
745 return r;
746 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
747 alu.src[1].sel = sel;
748 alu.src[1].chan = chan;
749 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
750 if (r)
751 return r;
752 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
753 alu.dst.sel = ctx->temp_reg;
754 alu.dst.chan = 0;
755 alu.dst.write = 1;
756 alu.is_op3 = 1;
757 alu.last = 1;
758 r = r600_bc_add_alu(ctx->bc, &alu);
759 if (r)
760 return r;
761
762 /* dst.z = exp(tmp.x) */
763 memset(&alu, 0, sizeof(struct r600_bc_alu));
764 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
765 alu.src[0].sel = ctx->temp_reg;
766 alu.src[0].chan = 0;
767 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
768 if (r)
769 return r;
770 alu.last = 1;
771 r = r600_bc_add_alu(ctx->bc, &alu);
772 if (r)
773 return r;
774 }
775 return 0;
776 }
777
778 static int tgsi_trans(struct r600_shader_ctx *ctx)
779 {
780 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
781 struct r600_bc_alu alu;
782 int i, j, r;
783
784 for (i = 0; i < 4; i++) {
785 memset(&alu, 0, sizeof(struct r600_bc_alu));
786 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
787 alu.inst = ctx->inst_info->r600_opcode;
788 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
789 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
790 if (r)
791 return r;
792 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
793 }
794 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
795 if (r)
796 return r;
797 alu.last = 1;
798 r = r600_bc_add_alu(ctx->bc, &alu);
799 if (r)
800 return r;
801 }
802 }
803 return 0;
804 }
805
806 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
807 {
808 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
809 struct r600_bc_alu alu;
810 int i, j, r;
811
812 memset(&alu, 0, sizeof(struct r600_bc_alu));
813 alu.inst = ctx->inst_info->r600_opcode;
814 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
815 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
816 if (r)
817 return r;
818 alu.src[j].chan = tgsi_chan(&inst->Src[j], 0);
819 }
820 alu.dst.sel = ctx->temp_reg;
821 alu.dst.write = 1;
822 alu.last = 1;
823 r = r600_bc_add_alu(ctx->bc, &alu);
824 if (r)
825 return r;
826 /* replicate result */
827 for (i = 0; i < 4; i++) {
828 memset(&alu, 0, sizeof(struct r600_bc_alu));
829 alu.src[0].sel = ctx->temp_reg;
830 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
831 alu.dst.chan = i;
832 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
833 if (r)
834 return r;
835 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
836 if (i == 3)
837 alu.last = 1;
838 r = r600_bc_add_alu(ctx->bc, &alu);
839 if (r)
840 return r;
841 }
842 return 0;
843 }
844
845 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
846 {
847 struct r600_bc_alu alu;
848 int i, r;
849
850 r = r600_bc_add_literal(ctx->bc, ctx->value);
851 if (r)
852 return r;
853 for (i = 0; i < 4; i++) {
854 memset(&alu, 0, sizeof(struct r600_bc_alu));
855 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
856 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
857 alu.dst.chan = i;
858 } else {
859 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
860 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
861 if (r)
862 return r;
863 alu.src[0].sel = ctx->temp_reg;
864 alu.src[0].chan = i;
865 }
866 if (i == 3) {
867 alu.last = 1;
868 }
869 r = r600_bc_add_alu(ctx->bc, &alu);
870 if (r)
871 return r;
872 }
873 return 0;
874 }
875
876 static int tgsi_op3(struct r600_shader_ctx *ctx)
877 {
878 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
879 struct r600_bc_alu_src r600_src[3];
880 struct r600_bc_alu alu;
881 int i, j, r;
882
883 r = tgsi_split_constant(ctx, r600_src);
884 if (r)
885 return r;
886 /* do it in 2 step as op3 doesn't support writemask */
887 for (i = 0; i < 4; i++) {
888 memset(&alu, 0, sizeof(struct r600_bc_alu));
889 alu.inst = ctx->inst_info->r600_opcode;
890 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
891 alu.src[j] = r600_src[j];
892 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
893 }
894 alu.dst.sel = ctx->temp_reg;
895 alu.dst.chan = i;
896 alu.dst.write = 1;
897 alu.is_op3 = 1;
898 if (i == 3) {
899 alu.last = 1;
900 }
901 r = r600_bc_add_alu(ctx->bc, &alu);
902 if (r)
903 return r;
904 }
905 return tgsi_helper_copy(ctx, inst);
906 }
907
908 static int tgsi_dp(struct r600_shader_ctx *ctx)
909 {
910 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
911 struct r600_bc_alu_src r600_src[3];
912 struct r600_bc_alu alu;
913 int i, j, r;
914
915 r = tgsi_split_constant(ctx, r600_src);
916 if (r)
917 return r;
918 for (i = 0; i < 4; i++) {
919 memset(&alu, 0, sizeof(struct r600_bc_alu));
920 alu.inst = ctx->inst_info->r600_opcode;
921 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
922 alu.src[j] = r600_src[j];
923 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
924 }
925 alu.dst.sel = ctx->temp_reg;
926 alu.dst.chan = i;
927 alu.dst.write = 1;
928 /* handle some special cases */
929 switch (ctx->inst_info->tgsi_opcode) {
930 case TGSI_OPCODE_DP2:
931 if (i > 1) {
932 alu.src[0].sel = alu.src[1].sel = 248;
933 alu.src[0].chan = alu.src[1].chan = 0;
934 }
935 break;
936 case TGSI_OPCODE_DP3:
937 if (i > 2) {
938 alu.src[0].sel = alu.src[1].sel = 248;
939 alu.src[0].chan = alu.src[1].chan = 0;
940 }
941 break;
942 default:
943 break;
944 }
945 if (i == 3) {
946 alu.last = 1;
947 }
948 r = r600_bc_add_alu(ctx->bc, &alu);
949 if (r)
950 return r;
951 }
952 return tgsi_helper_copy(ctx, inst);
953 }
954
955 static int tgsi_tex(struct r600_shader_ctx *ctx)
956 {
957 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
958 struct r600_bc_tex tex;
959 struct r600_bc_alu alu;
960 unsigned src_gpr;
961 int r;
962
963 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
964
965 /* Add perspective divide */
966 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
967 memset(&alu, 0, sizeof(struct r600_bc_alu));
968 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
969 alu.src[0].sel = src_gpr;
970 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
971 alu.dst.sel = ctx->temp_reg;
972 alu.dst.chan = 3;
973 alu.last = 1;
974 alu.dst.write = 1;
975 r = r600_bc_add_alu(ctx->bc, &alu);
976 if (r)
977 return r;
978
979 memset(&alu, 0, sizeof(struct r600_bc_alu));
980 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
981 alu.src[0].sel = ctx->temp_reg;
982 alu.src[0].chan = 3;
983 alu.src[1].sel = src_gpr;
984 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
985 alu.dst.sel = ctx->temp_reg;
986 alu.dst.chan = 0;
987 alu.dst.write = 1;
988 r = r600_bc_add_alu(ctx->bc, &alu);
989 if (r)
990 return r;
991 memset(&alu, 0, sizeof(struct r600_bc_alu));
992 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
993 alu.src[0].sel = ctx->temp_reg;
994 alu.src[0].chan = 3;
995 alu.src[1].sel = src_gpr;
996 alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
997 alu.dst.sel = ctx->temp_reg;
998 alu.dst.chan = 1;
999 alu.dst.write = 1;
1000 r = r600_bc_add_alu(ctx->bc, &alu);
1001 if (r)
1002 return r;
1003 memset(&alu, 0, sizeof(struct r600_bc_alu));
1004 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1005 alu.src[0].sel = ctx->temp_reg;
1006 alu.src[0].chan = 3;
1007 alu.src[1].sel = src_gpr;
1008 alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
1009 alu.dst.sel = ctx->temp_reg;
1010 alu.dst.chan = 2;
1011 alu.dst.write = 1;
1012 r = r600_bc_add_alu(ctx->bc, &alu);
1013 if (r)
1014 return r;
1015 memset(&alu, 0, sizeof(struct r600_bc_alu));
1016 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1017 alu.src[0].sel = 249;
1018 alu.src[0].chan = 0;
1019 alu.dst.sel = ctx->temp_reg;
1020 alu.dst.chan = 3;
1021 alu.last = 1;
1022 alu.dst.write = 1;
1023 r = r600_bc_add_alu(ctx->bc, &alu);
1024 if (r)
1025 return r;
1026 src_gpr = ctx->temp_reg;
1027 }
1028
1029 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
1030 memset(&tex, 0, sizeof(struct r600_bc_tex));
1031 tex.inst = ctx->inst_info->r600_opcode;
1032 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1033 tex.sampler_id = tex.resource_id;
1034 tex.src_gpr = src_gpr;
1035 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1036 tex.dst_sel_x = 0;
1037 tex.dst_sel_y = 1;
1038 tex.dst_sel_z = 2;
1039 tex.dst_sel_w = 3;
1040 tex.src_sel_x = 0;
1041 tex.src_sel_y = 1;
1042 tex.src_sel_z = 2;
1043 tex.src_sel_w = 3;
1044 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1045 tex.coord_type_x = 1;
1046 tex.coord_type_y = 1;
1047 tex.coord_type_z = 1;
1048 tex.coord_type_w = 1;
1049 }
1050 return r600_bc_add_tex(ctx->bc, &tex);
1051 }
1052
1053 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1054 {
1055 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1056 struct r600_bc_alu_src r600_src[3];
1057 struct r600_bc_alu alu;
1058 unsigned i;
1059 int r;
1060
1061 r = tgsi_split_constant(ctx, r600_src);
1062 if (r)
1063 return r;
1064 /* 1 - src0 */
1065 for (i = 0; i < 4; i++) {
1066 memset(&alu, 0, sizeof(struct r600_bc_alu));
1067 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1068 alu.src[0].sel = 249;
1069 alu.src[0].chan = 0;
1070 alu.src[1] = r600_src[0];
1071 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1072 alu.src[1].neg = 1;
1073 alu.dst.sel = ctx->temp_reg;
1074 alu.dst.chan = i;
1075 if (i == 3) {
1076 alu.last = 1;
1077 }
1078 alu.dst.write = 1;
1079 r = r600_bc_add_alu(ctx->bc, &alu);
1080 if (r)
1081 return r;
1082 }
1083 r = r600_bc_add_literal(ctx->bc, ctx->value);
1084 if (r)
1085 return r;
1086
1087 /* (1 - src0) * src2 */
1088 for (i = 0; i < 4; i++) {
1089 memset(&alu, 0, sizeof(struct r600_bc_alu));
1090 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1091 alu.src[0].sel = ctx->temp_reg;
1092 alu.src[0].chan = i;
1093 alu.src[1] = r600_src[2];
1094 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1095 alu.dst.sel = ctx->temp_reg;
1096 alu.dst.chan = i;
1097 if (i == 3) {
1098 alu.last = 1;
1099 }
1100 alu.dst.write = 1;
1101 r = r600_bc_add_alu(ctx->bc, &alu);
1102 if (r)
1103 return r;
1104 }
1105 r = r600_bc_add_literal(ctx->bc, ctx->value);
1106 if (r)
1107 return r;
1108
1109 /* src0 * src1 + (1 - src0) * src2 */
1110 for (i = 0; i < 4; i++) {
1111 memset(&alu, 0, sizeof(struct r600_bc_alu));
1112 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1113 alu.is_op3 = 1;
1114 alu.src[0] = r600_src[0];
1115 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1116 alu.src[1] = r600_src[1];
1117 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1118 alu.src[2].sel = ctx->temp_reg;
1119 alu.src[2].chan = i;
1120 alu.dst.sel = ctx->temp_reg;
1121 alu.dst.chan = i;
1122 if (i == 3) {
1123 alu.last = 1;
1124 }
1125 r = r600_bc_add_alu(ctx->bc, &alu);
1126 if (r)
1127 return r;
1128 }
1129 return tgsi_helper_copy(ctx, inst);
1130 }
1131
1132 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1133 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1134 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1135 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1136 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1137 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1138 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1139 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1140 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1141 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1142 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1143 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1144 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1145 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1146 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1147 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1148 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1149 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1150 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1151 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1152 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1153 /* gap */
1154 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1155 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1156 /* gap */
1157 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1158 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1159 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1160 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1161 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1162 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1163 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1164 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1165 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1166 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1167 /* gap */
1168 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1169 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1170 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1171 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1172 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1173 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1174 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1175 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1176 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1177 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1178 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1179 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1180 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1181 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1182 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1183 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1184 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1185 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1186 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1187 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1188 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
1189 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1190 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
1191 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1192 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1193 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1194 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1195 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1196 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1197 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1198 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1199 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1200 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1201 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1202 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1203 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1204 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1205 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1206 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1207 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1208 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1209 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1210 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1211 /* gap */
1212 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1213 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1214 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1215 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1216 /* gap */
1217 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1218 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1219 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1220 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1221 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1222 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1223 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1224 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1225 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1226 /* gap */
1227 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1228 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1229 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1230 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1231 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1232 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1233 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1234 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1235 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1236 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1237 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1238 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1239 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1240 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1241 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1242 /* gap */
1243 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1244 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1245 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1246 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1247 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1248 /* gap */
1249 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1250 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1251 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1252 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1253 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1254 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1255 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1256 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1257 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1258 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1259 /* gap */
1260 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1261 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1262 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1263 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1264 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1265 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1266 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1267 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1268 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1269 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1270 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1271 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1272 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1273 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1274 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1275 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1276 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1277 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1278 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1279 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1280 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1281 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1282 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1283 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1284 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1285 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1286 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1287 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1288 };