3909c704e76d7b4aba30a6a2062786e4cff48498
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx,
102 const struct tgsi_token *tokens)
103 {
104 struct r600_screen *rscreen = r600_screen(ctx->screen);
105 struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader);
106 int r;
107
108 fprintf(stderr, "--------------------------------------------------------------\n");
109 tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return NULL;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 goto out_err;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 goto out_err;
122 }
123 fprintf(stderr, "______________________________________________________________\n");
124 return rpshader;
125 out_err:
126 free(rpshader);
127 return NULL;
128 }
129
130 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
131 {
132 struct r600_screen *rscreen = r600_screen(ctx->screen);
133 struct r600_shader *rshader = &rpshader->shader;
134 struct radeon_state *state;
135 unsigned i, j, tmp;
136
137 rpshader->state = radeon_state_decref(rpshader->state);
138 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
139 if (state == NULL)
140 return -ENOMEM;
141 for (i = 0; i < 10; i++) {
142 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
143 }
144 for (i = 0, j = 0; i < rshader->noutput; i++) {
145 if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) {
146 tmp = rshader->output[i].sid << ((j & 3) * 8);
147 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp;
148 j++;
149 }
150 }
151 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
152 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
153 rpshader->state = state;
154 rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
155 rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
156 rpshader->state->nbo = 2;
157 rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT;
158 return radeon_state_pm4(state);
159 }
160
161 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
162 {
163 struct r600_screen *rscreen = r600_screen(ctx->screen);
164 struct r600_shader *rshader = &rpshader->shader;
165 struct radeon_state *state;
166 unsigned i, tmp;
167
168 rpshader->state = radeon_state_decref(rpshader->state);
169 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
170 if (state == NULL)
171 return -ENOMEM;
172 for (i = 0; i < rshader->ninput; i++) {
173 tmp = S_028644_SEMANTIC(rshader->input[i].sid);
174 tmp |= S_028644_SEL_CENTROID(1);
175 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
176 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
177 }
178 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
179 S_0286CC_PERSP_GRADIENT_ENA(1);
180 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
181 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
182 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
183 rpshader->state = state;
184 rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
185 rpshader->state->nbo = 1;
186 rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT;
187 return radeon_state_pm4(state);
188 }
189
190 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
191 {
192 struct r600_screen *rscreen = r600_screen(ctx->screen);
193 struct r600_context *rctx = r600_context(ctx);
194 struct r600_shader *rshader = &rpshader->shader;
195 int r;
196
197 /* copy new shader */
198 radeon_bo_decref(rscreen->rw, rpshader->bo);
199 rpshader->bo = NULL;
200 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
201 4096, NULL);
202 if (rpshader->bo == NULL) {
203 return -ENOMEM;
204 }
205 radeon_bo_map(rscreen->rw, rpshader->bo);
206 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
207 radeon_bo_unmap(rscreen->rw, rpshader->bo);
208 /* build state */
209 rshader->flat_shade = rctx->flat_shade;
210 switch (rshader->processor_type) {
211 case TGSI_PROCESSOR_VERTEX:
212 r = r600_pipe_shader_vs(ctx, rpshader);
213 break;
214 case TGSI_PROCESSOR_FRAGMENT:
215 r = r600_pipe_shader_ps(ctx, rpshader);
216 break;
217 default:
218 r = -EINVAL;
219 break;
220 }
221 return r;
222 }
223
224 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
225 {
226 struct r600_context *rctx = r600_context(ctx);
227 int r;
228
229 if (rpshader == NULL)
230 return -EINVAL;
231 /* there should be enough input */
232 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
233 R600_ERR("%d resources provided, expecting %d\n",
234 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
235 return -EINVAL;
236 }
237 r = r600_shader_update(ctx, &rpshader->shader);
238 if (r)
239 return r;
240 return r600_pipe_shader(ctx, rpshader);
241 }
242
243 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
244 {
245 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
246 int j;
247
248 if (i->Instruction.NumDstRegs > 1) {
249 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
250 return -EINVAL;
251 }
252 if (i->Instruction.Saturate) {
253 R600_ERR("staturate unsupported\n");
254 return -EINVAL;
255 }
256 if (i->Instruction.Predicate) {
257 R600_ERR("predicate unsupported\n");
258 return -EINVAL;
259 }
260 if (i->Instruction.Label) {
261 R600_ERR("label unsupported\n");
262 return -EINVAL;
263 }
264 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
265 if (i->Src[j].Register.Indirect ||
266 i->Src[j].Register.Dimension ||
267 i->Src[j].Register.Absolute) {
268 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
269 return -EINVAL;
270 }
271 }
272 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
273 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
274 R600_ERR("unsupported dst (indirect|dimension)\n");
275 return -EINVAL;
276 }
277 }
278 return 0;
279 }
280
281 static int tgsi_declaration(struct r600_shader_ctx *ctx)
282 {
283 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
284 struct r600_bc_vtx vtx;
285 unsigned i;
286 int r;
287
288 switch (d->Declaration.File) {
289 case TGSI_FILE_INPUT:
290 i = ctx->shader->ninput++;
291 ctx->shader->input[i].name = d->Semantic.Name;
292 ctx->shader->input[i].sid = d->Semantic.Index;
293 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
294 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
295 /* turn input into fetch */
296 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
297 vtx.inst = 0;
298 vtx.fetch_type = 0;
299 vtx.buffer_id = i;
300 /* register containing the index into the buffer */
301 vtx.src_gpr = 0;
302 vtx.src_sel_x = 0;
303 vtx.mega_fetch_count = 0x1F;
304 vtx.dst_gpr = ctx->shader->input[i].gpr;
305 vtx.dst_sel_x = 0;
306 vtx.dst_sel_y = 1;
307 vtx.dst_sel_z = 2;
308 vtx.dst_sel_w = 3;
309 r = r600_bc_add_vtx(ctx->bc, &vtx);
310 if (r)
311 return r;
312 }
313 break;
314 case TGSI_FILE_OUTPUT:
315 i = ctx->shader->noutput++;
316 ctx->shader->output[i].name = d->Semantic.Name;
317 ctx->shader->output[i].sid = d->Semantic.Index;
318 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
319 break;
320 case TGSI_FILE_CONSTANT:
321 case TGSI_FILE_TEMPORARY:
322 case TGSI_FILE_SAMPLER:
323 break;
324 default:
325 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
326 return -EINVAL;
327 }
328 return 0;
329 }
330
331 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
332 {
333 struct tgsi_full_immediate *immediate;
334 struct r600_shader_ctx ctx;
335 struct r600_bc_output output;
336 unsigned opcode;
337 int i, r = 0, pos0;
338
339 ctx.bc = &shader->bc;
340 ctx.shader = shader;
341 r = r600_bc_init(ctx.bc, shader->family);
342 if (r)
343 return r;
344 ctx.tokens = tokens;
345 tgsi_scan_shader(tokens, &ctx.info);
346 tgsi_parse_init(&ctx.parse, tokens);
347 ctx.type = ctx.parse.FullHeader.Processor.Processor;
348 shader->processor_type = ctx.type;
349
350 /* register allocations */
351 /* Values [0,127] correspond to GPR[0..127].
352 * Values [256,511] correspond to cfile constants c[0..255].
353 * Other special values are shown in the list below.
354 * 248 SQ_ALU_SRC_0: special constant 0.0.
355 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
356 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
357 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
358 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
359 * 253 SQ_ALU_SRC_LITERAL: literal constant.
360 * 254 SQ_ALU_SRC_PV: previous vector result.
361 * 255 SQ_ALU_SRC_PS: previous scalar result.
362 */
363 for (i = 0; i < TGSI_FILE_COUNT; i++) {
364 ctx.file_offset[i] = 0;
365 }
366 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
367 ctx.file_offset[TGSI_FILE_INPUT] = 1;
368 }
369 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
370 ctx.info.file_count[TGSI_FILE_INPUT];
371 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
372 ctx.info.file_count[TGSI_FILE_OUTPUT];
373 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
374 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
375 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
376 ctx.info.file_count[TGSI_FILE_TEMPORARY];
377
378 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
379 tgsi_parse_token(&ctx.parse);
380 switch (ctx.parse.FullToken.Token.Type) {
381 case TGSI_TOKEN_TYPE_IMMEDIATE:
382 immediate = &ctx.parse.FullToken.FullImmediate;
383 ctx.value[0] = immediate->u[0].Uint;
384 ctx.value[1] = immediate->u[1].Uint;
385 ctx.value[2] = immediate->u[2].Uint;
386 ctx.value[3] = immediate->u[3].Uint;
387 break;
388 case TGSI_TOKEN_TYPE_DECLARATION:
389 r = tgsi_declaration(&ctx);
390 if (r)
391 goto out_err;
392 break;
393 case TGSI_TOKEN_TYPE_INSTRUCTION:
394 r = tgsi_is_supported(&ctx);
395 if (r)
396 goto out_err;
397 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
398 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
399 r = ctx.inst_info->process(&ctx);
400 if (r)
401 goto out_err;
402 r = r600_bc_add_literal(ctx.bc, ctx.value);
403 if (r)
404 goto out_err;
405 break;
406 default:
407 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
408 r = -EINVAL;
409 goto out_err;
410 }
411 }
412 /* export output */
413 for (i = 0, pos0 = 0; i < shader->noutput; i++) {
414 memset(&output, 0, sizeof(struct r600_bc_output));
415 output.gpr = shader->output[i].gpr;
416 output.elem_size = 3;
417 output.swizzle_x = 0;
418 output.swizzle_y = 1;
419 output.swizzle_z = 2;
420 output.swizzle_w = 3;
421 output.barrier = 1;
422 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
423 output.array_base = i - pos0;
424 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
425 switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
426 case TGSI_PROCESSOR_VERTEX:
427 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
428 output.array_base = 60;
429 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
430 /* position doesn't count in array_base */
431 pos0 = 1;
432 }
433 break;
434 case TGSI_PROCESSOR_FRAGMENT:
435 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
436 output.array_base = 0;
437 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
438 } else {
439 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
440 r = -EINVAL;
441 goto out_err;
442 }
443 break;
444 default:
445 R600_ERR("unsupported processor type %d\n", ctx.type);
446 r = -EINVAL;
447 goto out_err;
448 }
449 if (i == (shader->noutput - 1)) {
450 output.end_of_program = 1;
451 }
452 r = r600_bc_add_output(ctx.bc, &output);
453 if (r)
454 goto out_err;
455 }
456 tgsi_parse_free(&ctx.parse);
457 return 0;
458 out_err:
459 tgsi_parse_free(&ctx.parse);
460 return r;
461 }
462
463 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
464 {
465 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
466 return -EINVAL;
467 }
468
469 static int tgsi_end(struct r600_shader_ctx *ctx)
470 {
471 return 0;
472 }
473
474 static int tgsi_src(struct r600_shader_ctx *ctx,
475 const struct tgsi_full_src_register *tgsi_src,
476 unsigned swizzle,
477 struct r600_bc_alu_src *r600_src)
478 {
479 r600_src->sel = tgsi_src->Register.Index;
480 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
481 r600_src->sel = 0;
482 }
483 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
484 switch (swizzle) {
485 case 0:
486 r600_src->chan = tgsi_src->Register.SwizzleX;
487 break;
488 case 1:
489 r600_src->chan = tgsi_src->Register.SwizzleY;
490 break;
491 case 2:
492 r600_src->chan = tgsi_src->Register.SwizzleZ;
493 break;
494 case 3:
495 r600_src->chan = tgsi_src->Register.SwizzleW;
496 break;
497 default:
498 return -EINVAL;
499 }
500 return 0;
501 }
502
503 static int tgsi_dst(struct r600_shader_ctx *ctx,
504 const struct tgsi_full_dst_register *tgsi_dst,
505 unsigned swizzle,
506 struct r600_bc_alu_dst *r600_dst)
507 {
508 r600_dst->sel = tgsi_dst->Register.Index;
509 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
510 r600_dst->chan = swizzle;
511 r600_dst->write = 1;
512 return 0;
513 }
514
515 static int tgsi_op2(struct r600_shader_ctx *ctx)
516 {
517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
518 struct r600_bc_alu alu;
519 int i, j, r;
520
521 for (i = 0; i < 4; i++) {
522 memset(&alu, 0, sizeof(struct r600_bc_alu));
523 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
524 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
525 } else {
526 alu.inst = ctx->inst_info->r600_opcode;
527 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
528 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
529 if (r)
530 return r;
531 }
532 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
533 if (r)
534 return r;
535 }
536 /* handle some special cases */
537 switch (ctx->inst_info->tgsi_opcode) {
538 case TGSI_OPCODE_SUB:
539 alu.src[1].neg = 1;
540 break;
541 default:
542 break;
543 }
544 if (i == 3) {
545 alu.last = 1;
546 }
547 r = r600_bc_add_alu(ctx->bc, &alu);
548 if (r)
549 return r;
550 }
551 return 0;
552 }
553
554 static int tgsi_slt(struct r600_shader_ctx *ctx)
555 {
556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
557 struct r600_bc_alu alu;
558 int i, r;
559
560 for (i = 0; i < 4; i++) {
561 memset(&alu, 0, sizeof(struct r600_bc_alu));
562 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
563 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
564 } else {
565 alu.inst = ctx->inst_info->r600_opcode;
566 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
567 if (r)
568 return r;
569 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]);
570 if (r)
571 return r;
572 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
573 if (r)
574 return r;
575 }
576 if (i == 3) {
577 alu.last = 1;
578 }
579 r = r600_bc_add_alu(ctx->bc, &alu);
580 if (r)
581 return r;
582 }
583 return 0;
584 }
585
586 static int tgsi_lit(struct r600_shader_ctx *ctx)
587 {
588 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
589 struct r600_bc_alu alu;
590 int r;
591
592 if (inst->Dst[0].Register.WriteMask & (1 << 0))
593 {
594 /* dst.x, <- 1.0 */
595 memset(&alu, 0, sizeof(struct r600_bc_alu));
596 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
597 alu.src[0].sel = 249; /*1.0*/
598 alu.src[0].chan = 0;
599 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
600 if (r)
601 return r;
602 if ((inst->Dst[0].Register.WriteMask & 0xe) == 0)
603 alu.last = 1;
604 r = r600_bc_add_alu(ctx->bc, &alu);
605 if (r)
606 return r;
607 }
608
609
610 if (inst->Dst[0].Register.WriteMask & (1 << 1))
611 {
612 /* dst.y = max(src.x, 0.0) */
613 memset(&alu, 0, sizeof(struct r600_bc_alu));
614 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
615 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]);
616 if (r)
617 return r;
618 alu.src[1].sel = 248; /*0.0*/
619 alu.src[1].chan = 0;
620 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
621 if (r)
622 return r;
623 if ((inst->Dst[0].Register.WriteMask & 0xa) == 0)
624 alu.last = 1;
625 r = r600_bc_add_alu(ctx->bc, &alu);
626 if (r)
627 return r;
628 }
629
630 if (inst->Dst[0].Register.WriteMask & (1 << 3))
631 {
632 /* dst.w, <- 1.0 */
633 memset(&alu, 0, sizeof(struct r600_bc_alu));
634 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
635 alu.src[0].sel = 249;
636 alu.src[0].chan = 0;
637 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
638 if (r)
639 return r;
640 if ((inst->Dst[0].Register.WriteMask & 0x4) == 0)
641 alu.last = 1;
642 r = r600_bc_add_alu(ctx->bc, &alu);
643 if (r)
644 return r;
645 }
646
647 if (inst->Dst[0].Register.WriteMask & (1 << 2))
648 {
649 int chan;
650 int sel;
651
652 /* dst.z = log(src.y) */
653 memset(&alu, 0, sizeof(struct r600_bc_alu));
654 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
655 r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]);
656 if (r)
657 return r;
658 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
659 if (r)
660 return r;
661 alu.last = 1;
662 r = r600_bc_add_alu(ctx->bc, &alu);
663 if (r)
664 return r;
665
666 chan = alu.dst.chan;
667 sel = alu.dst.sel;
668
669 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
670 memset(&alu, 0, sizeof(struct r600_bc_alu));
671 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
672 r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]);
673 if (r)
674 return r;
675 alu.src[1].sel = sel;
676 alu.src[1].chan = chan;
677 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]);
678 if (r)
679 return r;
680 alu.dst.sel = ctx->temp_reg;
681 alu.dst.chan = 0;
682 alu.dst.write = 1;
683 alu.is_op3 = 1;
684 alu.last = 1;
685 r = r600_bc_add_alu(ctx->bc, &alu);
686 if (r)
687 return r;
688
689 /* dst.z = exp(tmp.x) */
690 memset(&alu, 0, sizeof(struct r600_bc_alu));
691 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
692 alu.src[0].sel = ctx->temp_reg;
693 alu.src[0].chan = 0;
694 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
695 if (r)
696 return r;
697 alu.last = 1;
698 r = r600_bc_add_alu(ctx->bc, &alu);
699 if (r)
700 return r;
701 }
702 return 0;
703 }
704
705 static int tgsi_trans(struct r600_shader_ctx *ctx)
706 {
707 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
708 struct r600_bc_alu alu;
709 int i, j, r;
710
711 for (i = 0; i < 4; i++) {
712 memset(&alu, 0, sizeof(struct r600_bc_alu));
713 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
714 alu.inst = ctx->inst_info->r600_opcode;
715 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
716 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
717 if (r)
718 return r;
719 }
720 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
721 if (r)
722 return r;
723 alu.last = 1;
724 r = r600_bc_add_alu(ctx->bc, &alu);
725 if (r)
726 return r;
727 }
728 }
729 return 0;
730 }
731
732 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
733 {
734 struct r600_bc_alu alu;
735 int i, r;
736
737 r = r600_bc_add_literal(ctx->bc, ctx->value);
738 if (r)
739 return r;
740 for (i = 0; i < 4; i++) {
741 memset(&alu, 0, sizeof(struct r600_bc_alu));
742 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
743 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
744 } else {
745 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
746 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
747 if (r)
748 return r;
749 alu.src[0].sel = ctx->temp_reg;
750 alu.src[0].chan = i;
751 }
752 if (i == 3) {
753 alu.last = 1;
754 }
755 r = r600_bc_add_alu(ctx->bc, &alu);
756 if (r)
757 return r;
758 }
759 return 0;
760 }
761
762 static int tgsi_op3(struct r600_shader_ctx *ctx)
763 {
764 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
765 struct r600_bc_alu alu;
766 int i, j, r;
767
768 /* do it in 2 step as op3 doesn't support writemask */
769 for (i = 0; i < 4; i++) {
770 memset(&alu, 0, sizeof(struct r600_bc_alu));
771 alu.inst = ctx->inst_info->r600_opcode;
772 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
773 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
774 if (r)
775 return r;
776 }
777 alu.dst.sel = ctx->temp_reg;
778 alu.dst.chan = i;
779 alu.dst.write = 1;
780 alu.is_op3 = 1;
781 if (i == 3) {
782 alu.last = 1;
783 }
784 r = r600_bc_add_alu(ctx->bc, &alu);
785 if (r)
786 return r;
787 }
788 return tgsi_helper_copy(ctx, inst);
789 }
790
791 static int tgsi_dp(struct r600_shader_ctx *ctx)
792 {
793 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
794 struct r600_bc_alu alu;
795 int i, j, r;
796
797 for (i = 0; i < 4; i++) {
798 memset(&alu, 0, sizeof(struct r600_bc_alu));
799 alu.inst = ctx->inst_info->r600_opcode;
800 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
801 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
802 if (r)
803 return r;
804 }
805 alu.dst.sel = ctx->temp_reg;
806 alu.dst.chan = i;
807 alu.dst.write = 1;
808 /* handle some special cases */
809 switch (ctx->inst_info->tgsi_opcode) {
810 case TGSI_OPCODE_DP2:
811 if (i > 1) {
812 alu.src[0].sel = alu.src[1].sel = 248;
813 alu.src[0].chan = alu.src[1].chan = 0;
814 }
815 break;
816 case TGSI_OPCODE_DP3:
817 if (i > 2) {
818 alu.src[0].sel = alu.src[1].sel = 248;
819 alu.src[0].chan = alu.src[1].chan = 0;
820 }
821 break;
822 default:
823 break;
824 }
825 if (i == 3) {
826 alu.last = 1;
827 }
828 r = r600_bc_add_alu(ctx->bc, &alu);
829 if (r)
830 return r;
831 }
832 return tgsi_helper_copy(ctx, inst);
833 }
834
835 static int tgsi_tex(struct r600_shader_ctx *ctx)
836 {
837 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
838 struct r600_bc_tex tex;
839 struct r600_bc_alu alu;
840 unsigned src_gpr;
841 int r;
842
843 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
844
845 /* Add perspective divide */
846 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
847 memset(&alu, 0, sizeof(struct r600_bc_alu));
848 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
849 alu.src[0].sel = src_gpr;
850 alu.src[0].chan = 3;
851 alu.dst.sel = ctx->temp_reg;
852 alu.dst.chan = 3;
853 alu.last = 1;
854 alu.dst.write = 1;
855 r = r600_bc_add_alu(ctx->bc, &alu);
856 if (r)
857 return r;
858
859 memset(&alu, 0, sizeof(struct r600_bc_alu));
860 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
861 alu.src[0].sel = ctx->temp_reg;
862 alu.src[0].chan = 3;
863 alu.src[1].sel = src_gpr;
864 alu.src[1].chan = 0;
865 alu.dst.sel = ctx->temp_reg;
866 alu.dst.chan = 0;
867 alu.dst.write = 1;
868 r = r600_bc_add_alu(ctx->bc, &alu);
869 if (r)
870 return r;
871 memset(&alu, 0, sizeof(struct r600_bc_alu));
872 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
873 alu.src[0].sel = ctx->temp_reg;
874 alu.src[0].chan = 3;
875 alu.src[1].sel = src_gpr;
876 alu.src[1].chan = 1;
877 alu.dst.sel = ctx->temp_reg;
878 alu.dst.chan = 1;
879 alu.dst.write = 1;
880 r = r600_bc_add_alu(ctx->bc, &alu);
881 if (r)
882 return r;
883 memset(&alu, 0, sizeof(struct r600_bc_alu));
884 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
885 alu.src[0].sel = ctx->temp_reg;
886 alu.src[0].chan = 3;
887 alu.src[1].sel = src_gpr;
888 alu.src[1].chan = 2;
889 alu.dst.sel = ctx->temp_reg;
890 alu.dst.chan = 2;
891 alu.dst.write = 1;
892 r = r600_bc_add_alu(ctx->bc, &alu);
893 if (r)
894 return r;
895 memset(&alu, 0, sizeof(struct r600_bc_alu));
896 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
897 alu.src[0].sel = 249;
898 alu.src[0].chan = 0;
899 alu.dst.sel = ctx->temp_reg;
900 alu.dst.chan = 3;
901 alu.last = 1;
902 alu.dst.write = 1;
903 r = r600_bc_add_alu(ctx->bc, &alu);
904 if (r)
905 return r;
906 src_gpr = ctx->temp_reg;
907 }
908
909 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
910 memset(&tex, 0, sizeof(struct r600_bc_tex));
911 tex.inst = ctx->inst_info->r600_opcode;
912 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
913 tex.sampler_id = tex.resource_id;
914 tex.src_gpr = src_gpr;
915 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index;
916 tex.dst_sel_x = 0;
917 tex.dst_sel_y = 1;
918 tex.dst_sel_z = 2;
919 tex.dst_sel_w = 3;
920 tex.src_sel_x = 0;
921 tex.src_sel_y = 1;
922 tex.src_sel_z = 2;
923 tex.src_sel_w = 3;
924 tex.coord_type_x = 1;
925 tex.coord_type_y = 1;
926 tex.coord_type_z = 1;
927 tex.coord_type_w = 1;
928 return r600_bc_add_tex(ctx->bc, &tex);
929 }
930
931 static int tgsi_lrp(struct r600_shader_ctx *ctx)
932 {
933 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
934 struct r600_bc_alu alu;
935 unsigned i;
936 int r;
937
938 /* 1 - src0 */
939 for (i = 0; i < 4; i++) {
940 memset(&alu, 0, sizeof(struct r600_bc_alu));
941 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
942 alu.src[0].sel = 249;
943 alu.src[0].chan = 0;
944 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
945 if (r)
946 return r;
947 alu.src[1].neg = 1;
948 alu.dst.sel = ctx->temp_reg;
949 alu.dst.chan = i;
950 if (i == 3) {
951 alu.last = 1;
952 }
953 alu.dst.write = 1;
954 r = r600_bc_add_alu(ctx->bc, &alu);
955 if (r)
956 return r;
957 }
958 r = r600_bc_add_literal(ctx->bc, ctx->value);
959 if (r)
960 return r;
961
962 /* (1 - src0) * src2 */
963 for (i = 0; i < 4; i++) {
964 memset(&alu, 0, sizeof(struct r600_bc_alu));
965 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
966 alu.src[0].sel = ctx->temp_reg;
967 alu.src[0].chan = i;
968 r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]);
969 if (r)
970 return r;
971 alu.dst.sel = ctx->temp_reg;
972 alu.dst.chan = i;
973 if (i == 3) {
974 alu.last = 1;
975 }
976 alu.dst.write = 1;
977 r = r600_bc_add_alu(ctx->bc, &alu);
978 if (r)
979 return r;
980 }
981 r = r600_bc_add_literal(ctx->bc, ctx->value);
982 if (r)
983 return r;
984
985 /* src0 * src1 + (1 - src0) * src2 */
986 for (i = 0; i < 4; i++) {
987 memset(&alu, 0, sizeof(struct r600_bc_alu));
988 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
989 alu.is_op3 = 1;
990 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]);
991 if (r)
992 return r;
993 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]);
994 if (r)
995 return r;
996 alu.src[2].sel = ctx->temp_reg;
997 alu.src[2].chan = i;
998 alu.dst.sel = ctx->temp_reg;
999 alu.dst.chan = i;
1000 if (i == 3) {
1001 alu.last = 1;
1002 }
1003 r = r600_bc_add_alu(ctx->bc, &alu);
1004 if (r)
1005 return r;
1006 }
1007 return tgsi_helper_copy(ctx, inst);
1008 }
1009
1010 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1011 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1012 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1013 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1014 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1015 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans},
1016 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1017 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1018 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1019 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1020 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1021 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1022 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1023 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1024 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1025 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1026 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1027 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1028 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1029 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1030 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1031 /* gap */
1032 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1033 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1034 /* gap */
1035 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1036 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1037 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1038 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1039 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1040 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1041 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1042 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1043 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1044 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1045 /* gap */
1046 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1047 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1048 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1049 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1050 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1051 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1052 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1053 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1054 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1055 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1056 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1057 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1058 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1059 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1060 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1061 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1062 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1063 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1064 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1065 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1066 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
1067 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1068 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
1069 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1070 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1071 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1072 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1073 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1074 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1075 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1076 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1077 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1078 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1079 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1080 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1081 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1082 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1083 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1084 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1085 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1086 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1087 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1088 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1089 /* gap */
1090 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1091 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1092 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1093 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1094 /* gap */
1095 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1096 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1097 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1098 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1099 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1100 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1101 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1102 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1103 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1104 /* gap */
1105 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1106 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1107 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1108 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1109 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1110 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1111 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1112 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1113 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1114 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1115 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1116 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1117 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1118 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1119 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1120 /* gap */
1121 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1122 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1123 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1124 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1125 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1126 /* gap */
1127 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1128 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1129 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1130 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1131 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1132 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1133 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1134 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1135 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */
1136 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1137 /* gap */
1138 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1139 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1140 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1141 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1142 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1143 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1144 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1145 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1146 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1147 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1148 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1149 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1150 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1151 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1152 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1153 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1154 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1155 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1156 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1157 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1158 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1159 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1160 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1161 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1162 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1163 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1164 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1165 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1166 };