r600g: mipmap early support + EX2/ABS instruction + culling
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 fprintf(stderr, "--------------------------------------------------------------\n");
109 tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, j, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 for (i = 0, j = 0; i < rshader->noutput; i++) {
142 if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) {
143 tmp = rshader->output[i].sid << ((j & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp;
145 j++;
146 }
147 }
148 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
149 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
150 rpshader->rstate = state;
151 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
152 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
153 rpshader->rstate->nbo = 2;
154 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
155 return radeon_state_pm4(state);
156 }
157
158 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
159 {
160 struct r600_screen *rscreen = r600_screen(ctx->screen);
161 struct r600_shader *rshader = &rpshader->shader;
162 struct radeon_state *state;
163 unsigned i, tmp;
164
165 rpshader->rstate = radeon_state_decref(rpshader->rstate);
166 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
167 if (state == NULL)
168 return -ENOMEM;
169 for (i = 0; i < rshader->ninput; i++) {
170 tmp = S_028644_SEMANTIC(rshader->input[i].sid);
171 tmp |= S_028644_SEL_CENTROID(1);
172 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
173 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
174 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
175 }
176 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
177 }
178 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
179 S_0286CC_PERSP_GRADIENT_ENA(1);
180 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
181 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
182 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
183 rpshader->rstate = state;
184 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
185 rpshader->rstate->nbo = 1;
186 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
187 return radeon_state_pm4(state);
188 }
189
190 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
191 {
192 struct r600_screen *rscreen = r600_screen(ctx->screen);
193 struct r600_context *rctx = r600_context(ctx);
194 struct r600_shader *rshader = &rpshader->shader;
195 int r;
196
197 /* copy new shader */
198 radeon_bo_decref(rscreen->rw, rpshader->bo);
199 rpshader->bo = NULL;
200 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
201 4096, NULL);
202 if (rpshader->bo == NULL) {
203 return -ENOMEM;
204 }
205 radeon_bo_map(rscreen->rw, rpshader->bo);
206 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
207 radeon_bo_unmap(rscreen->rw, rpshader->bo);
208 /* build state */
209 rshader->flat_shade = rctx->flat_shade;
210 switch (rshader->processor_type) {
211 case TGSI_PROCESSOR_VERTEX:
212 r = r600_pipe_shader_vs(ctx, rpshader);
213 break;
214 case TGSI_PROCESSOR_FRAGMENT:
215 r = r600_pipe_shader_ps(ctx, rpshader);
216 break;
217 default:
218 r = -EINVAL;
219 break;
220 }
221 return r;
222 }
223
224 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
225 {
226 struct r600_context *rctx = r600_context(ctx);
227 int r;
228
229 if (rpshader == NULL)
230 return -EINVAL;
231 /* there should be enough input */
232 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
233 R600_ERR("%d resources provided, expecting %d\n",
234 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
235 return -EINVAL;
236 }
237 r = r600_shader_update(ctx, &rpshader->shader);
238 if (r)
239 return r;
240 return r600_pipe_shader(ctx, rpshader);
241 }
242
243 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
244 {
245 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
246 int j;
247
248 if (i->Instruction.NumDstRegs > 1) {
249 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
250 return -EINVAL;
251 }
252 if (i->Instruction.Predicate) {
253 R600_ERR("predicate unsupported\n");
254 return -EINVAL;
255 }
256 if (i->Instruction.Label) {
257 R600_ERR("label unsupported\n");
258 return -EINVAL;
259 }
260 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
261 if (i->Src[j].Register.Indirect ||
262 i->Src[j].Register.Dimension ||
263 i->Src[j].Register.Absolute) {
264 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
265 return -EINVAL;
266 }
267 }
268 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
269 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
270 R600_ERR("unsupported dst (indirect|dimension)\n");
271 return -EINVAL;
272 }
273 }
274 return 0;
275 }
276
277 static int tgsi_declaration(struct r600_shader_ctx *ctx)
278 {
279 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
280 struct r600_bc_vtx vtx;
281 unsigned i;
282 int r;
283
284 switch (d->Declaration.File) {
285 case TGSI_FILE_INPUT:
286 i = ctx->shader->ninput++;
287 ctx->shader->input[i].name = d->Semantic.Name;
288 ctx->shader->input[i].sid = d->Semantic.Index;
289 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
290 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
291 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
292 /* turn input into fetch */
293 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
294 vtx.inst = 0;
295 vtx.fetch_type = 0;
296 vtx.buffer_id = i;
297 /* register containing the index into the buffer */
298 vtx.src_gpr = 0;
299 vtx.src_sel_x = 0;
300 vtx.mega_fetch_count = 0x1F;
301 vtx.dst_gpr = ctx->shader->input[i].gpr;
302 vtx.dst_sel_x = 0;
303 vtx.dst_sel_y = 1;
304 vtx.dst_sel_z = 2;
305 vtx.dst_sel_w = 3;
306 r = r600_bc_add_vtx(ctx->bc, &vtx);
307 if (r)
308 return r;
309 }
310 break;
311 case TGSI_FILE_OUTPUT:
312 i = ctx->shader->noutput++;
313 ctx->shader->output[i].name = d->Semantic.Name;
314 ctx->shader->output[i].sid = d->Semantic.Index;
315 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
316 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
317 break;
318 case TGSI_FILE_CONSTANT:
319 case TGSI_FILE_TEMPORARY:
320 case TGSI_FILE_SAMPLER:
321 break;
322 default:
323 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
324 return -EINVAL;
325 }
326 return 0;
327 }
328
329 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
330 {
331 struct tgsi_full_immediate *immediate;
332 struct r600_shader_ctx ctx;
333 struct r600_bc_output output;
334 unsigned opcode;
335 int i, r = 0, pos0;
336
337 ctx.bc = &shader->bc;
338 ctx.shader = shader;
339 r = r600_bc_init(ctx.bc, shader->family);
340 if (r)
341 return r;
342 ctx.tokens = tokens;
343 tgsi_scan_shader(tokens, &ctx.info);
344 tgsi_parse_init(&ctx.parse, tokens);
345 ctx.type = ctx.parse.FullHeader.Processor.Processor;
346 shader->processor_type = ctx.type;
347
348 /* register allocations */
349 /* Values [0,127] correspond to GPR[0..127].
350 * Values [256,511] correspond to cfile constants c[0..255].
351 * Other special values are shown in the list below.
352 * 248 SQ_ALU_SRC_0: special constant 0.0.
353 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
354 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
355 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
356 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
357 * 253 SQ_ALU_SRC_LITERAL: literal constant.
358 * 254 SQ_ALU_SRC_PV: previous vector result.
359 * 255 SQ_ALU_SRC_PS: previous scalar result.
360 */
361 for (i = 0; i < TGSI_FILE_COUNT; i++) {
362 ctx.file_offset[i] = 0;
363 }
364 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
365 ctx.file_offset[TGSI_FILE_INPUT] = 1;
366 }
367 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
368 ctx.info.file_count[TGSI_FILE_INPUT];
369 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
370 ctx.info.file_count[TGSI_FILE_OUTPUT];
371 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
372 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
373 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
374 ctx.info.file_count[TGSI_FILE_TEMPORARY];
375
376 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
377 tgsi_parse_token(&ctx.parse);
378 switch (ctx.parse.FullToken.Token.Type) {
379 case TGSI_TOKEN_TYPE_IMMEDIATE:
380 immediate = &ctx.parse.FullToken.FullImmediate;
381 ctx.value[0] = immediate->u[0].Uint;
382 ctx.value[1] = immediate->u[1].Uint;
383 ctx.value[2] = immediate->u[2].Uint;
384 ctx.value[3] = immediate->u[3].Uint;
385 break;
386 case TGSI_TOKEN_TYPE_DECLARATION:
387 r = tgsi_declaration(&ctx);
388 if (r)
389 goto out_err;
390 break;
391 case TGSI_TOKEN_TYPE_INSTRUCTION:
392 r = tgsi_is_supported(&ctx);
393 if (r)
394 goto out_err;
395 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
396 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
397 r = ctx.inst_info->process(&ctx);
398 if (r)
399 goto out_err;
400 r = r600_bc_add_literal(ctx.bc, ctx.value);
401 if (r)
402 goto out_err;
403 break;
404 default:
405 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
406 r = -EINVAL;
407 goto out_err;
408 }
409 }
410 /* export output */
411 for (i = 0, pos0 = 0; i < shader->noutput; i++) {
412 memset(&output, 0, sizeof(struct r600_bc_output));
413 output.gpr = shader->output[i].gpr;
414 output.elem_size = 3;
415 output.swizzle_x = 0;
416 output.swizzle_y = 1;
417 output.swizzle_z = 2;
418 output.swizzle_w = 3;
419 output.barrier = 1;
420 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
421 output.array_base = i - pos0;
422 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
423 switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
424 case TGSI_PROCESSOR_VERTEX:
425 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
426 output.array_base = 60;
427 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
428 /* position doesn't count in array_base */
429 pos0 = 1;
430 }
431 break;
432 case TGSI_PROCESSOR_FRAGMENT:
433 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
434 output.array_base = 0;
435 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
436 } else {
437 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
438 r = -EINVAL;
439 goto out_err;
440 }
441 break;
442 default:
443 R600_ERR("unsupported processor type %d\n", ctx.type);
444 r = -EINVAL;
445 goto out_err;
446 }
447 if (i == (shader->noutput - 1)) {
448 output.end_of_program = 1;
449 }
450 r = r600_bc_add_output(ctx.bc, &output);
451 if (r)
452 goto out_err;
453 }
454 tgsi_parse_free(&ctx.parse);
455 return 0;
456 out_err:
457 tgsi_parse_free(&ctx.parse);
458 return r;
459 }
460
461 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
462 {
463 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
464 return -EINVAL;
465 }
466
467 static int tgsi_end(struct r600_shader_ctx *ctx)
468 {
469 return 0;
470 }
471
472 static int tgsi_src(struct r600_shader_ctx *ctx,
473 const struct tgsi_full_src_register *tgsi_src,
474 unsigned swizzle,
475 struct r600_bc_alu_src *r600_src)
476 {
477 r600_src->sel = tgsi_src->Register.Index;
478 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
479 r600_src->sel = 0;
480 }
481 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
482 switch (swizzle) {
483 case 0:
484 r600_src->chan = tgsi_src->Register.SwizzleX;
485 break;
486 case 1:
487 r600_src->chan = tgsi_src->Register.SwizzleY;
488 break;
489 case 2:
490 r600_src->chan = tgsi_src->Register.SwizzleZ;
491 break;
492 case 3:
493 r600_src->chan = tgsi_src->Register.SwizzleW;
494 break;
495 default:
496 return -EINVAL;
497 }
498 return 0;
499 }
500
501 static int tgsi_dst(struct r600_shader_ctx *ctx,
502 const struct tgsi_full_dst_register *tgsi_dst,
503 unsigned swizzle,
504 struct r600_bc_alu_dst *r600_dst)
505 {
506 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
507
508 r600_dst->sel = tgsi_dst->Register.Index;
509 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
510 r600_dst->chan = swizzle;
511 r600_dst->write = 1;
512 if (inst->Instruction.Saturate) {
513 r600_dst->clamp = 1;
514 }
515 return 0;
516 }
517
518 static int tgsi_op2(struct r600_shader_ctx *ctx)
519 {
520 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
521 struct r600_bc_alu alu;
522 int i, j, r;
523
524 for (i = 0; i < 4; i++) {
525 memset(&alu, 0, sizeof(struct r600_bc_alu));
526 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
527 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
528 } else {
529 alu.inst = ctx->inst_info->r600_opcode;
530 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
531 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
532 if (r)
533 return r;
534 }
535 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
536 if (r)
537 return r;
538 }
539 /* handle some special cases */
540 switch (ctx->inst_info->tgsi_opcode) {
541 case TGSI_OPCODE_SUB:
542 alu.src[1].neg = 1;
543 break;
544 case TGSI_OPCODE_ABS:
545 alu.src[0].abs = 1;
546 break;
547 default:
548 break;
549 }
550 if (i == 3) {
551 alu.last = 1;
552 }
553 r = r600_bc_add_alu(ctx->bc, &alu);
554 if (r)
555 return r;
556 }
557 return 0;
558 }
559
560 static int tgsi_slt(struct r600_shader_ctx *ctx)
561 {
562 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
563 struct r600_bc_alu alu;
564 int i, r;
565
566 for (i = 0; i < 4; i++) {
567 memset(&alu, 0, sizeof(struct r600_bc_alu));
568 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
569 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
570 } else {
571 alu.inst = ctx->inst_info->r600_opcode;
572 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
573 if (r)
574 return r;
575 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]);
576 if (r)
577 return r;
578 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
579 if (r)
580 return r;
581 }
582 if (i == 3) {
583 alu.last = 1;
584 }
585 r = r600_bc_add_alu(ctx->bc, &alu);
586 if (r)
587 return r;
588 }
589 return 0;
590 }
591
592 static int tgsi_lit(struct r600_shader_ctx *ctx)
593 {
594 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
595 struct r600_bc_alu alu;
596 int r;
597
598 if (inst->Dst[0].Register.WriteMask & (1 << 0))
599 {
600 /* dst.x, <- 1.0 */
601 memset(&alu, 0, sizeof(struct r600_bc_alu));
602 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
603 alu.src[0].sel = 249; /*1.0*/
604 alu.src[0].chan = 0;
605 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
606 if (r)
607 return r;
608 if ((inst->Dst[0].Register.WriteMask & 0xe) == 0)
609 alu.last = 1;
610 r = r600_bc_add_alu(ctx->bc, &alu);
611 if (r)
612 return r;
613 }
614
615
616 if (inst->Dst[0].Register.WriteMask & (1 << 1))
617 {
618 /* dst.y = max(src.x, 0.0) */
619 memset(&alu, 0, sizeof(struct r600_bc_alu));
620 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
621 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]);
622 if (r)
623 return r;
624 alu.src[1].sel = 248; /*0.0*/
625 alu.src[1].chan = 0;
626 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
627 if (r)
628 return r;
629 if ((inst->Dst[0].Register.WriteMask & 0xa) == 0)
630 alu.last = 1;
631 r = r600_bc_add_alu(ctx->bc, &alu);
632 if (r)
633 return r;
634 }
635
636 if (inst->Dst[0].Register.WriteMask & (1 << 3))
637 {
638 /* dst.w, <- 1.0 */
639 memset(&alu, 0, sizeof(struct r600_bc_alu));
640 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
641 alu.src[0].sel = 249;
642 alu.src[0].chan = 0;
643 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
644 if (r)
645 return r;
646 if ((inst->Dst[0].Register.WriteMask & 0x4) == 0)
647 alu.last = 1;
648 r = r600_bc_add_alu(ctx->bc, &alu);
649 if (r)
650 return r;
651 }
652
653 if (inst->Dst[0].Register.WriteMask & (1 << 2))
654 {
655 int chan;
656 int sel;
657
658 /* dst.z = log(src.y) */
659 memset(&alu, 0, sizeof(struct r600_bc_alu));
660 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
661 r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]);
662 if (r)
663 return r;
664 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
665 if (r)
666 return r;
667 alu.last = 1;
668 r = r600_bc_add_alu(ctx->bc, &alu);
669 if (r)
670 return r;
671
672 chan = alu.dst.chan;
673 sel = alu.dst.sel;
674
675 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
676 memset(&alu, 0, sizeof(struct r600_bc_alu));
677 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
678 r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]);
679 if (r)
680 return r;
681 alu.src[1].sel = sel;
682 alu.src[1].chan = chan;
683 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]);
684 if (r)
685 return r;
686 alu.dst.sel = ctx->temp_reg;
687 alu.dst.chan = 0;
688 alu.dst.write = 1;
689 alu.is_op3 = 1;
690 alu.last = 1;
691 r = r600_bc_add_alu(ctx->bc, &alu);
692 if (r)
693 return r;
694
695 /* dst.z = exp(tmp.x) */
696 memset(&alu, 0, sizeof(struct r600_bc_alu));
697 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
698 alu.src[0].sel = ctx->temp_reg;
699 alu.src[0].chan = 0;
700 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
701 if (r)
702 return r;
703 alu.last = 1;
704 r = r600_bc_add_alu(ctx->bc, &alu);
705 if (r)
706 return r;
707 }
708 return 0;
709 }
710
711 static int tgsi_trans(struct r600_shader_ctx *ctx)
712 {
713 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
714 struct r600_bc_alu alu;
715 int i, j, r;
716
717 for (i = 0; i < 4; i++) {
718 memset(&alu, 0, sizeof(struct r600_bc_alu));
719 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
720 alu.inst = ctx->inst_info->r600_opcode;
721 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
722 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
723 if (r)
724 return r;
725 }
726 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
727 if (r)
728 return r;
729 alu.last = 1;
730 r = r600_bc_add_alu(ctx->bc, &alu);
731 if (r)
732 return r;
733 }
734 }
735 return 0;
736 }
737
738 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
739 {
740 struct r600_bc_alu alu;
741 int i, r;
742
743 r = r600_bc_add_literal(ctx->bc, ctx->value);
744 if (r)
745 return r;
746 for (i = 0; i < 4; i++) {
747 memset(&alu, 0, sizeof(struct r600_bc_alu));
748 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
749 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
750 } else {
751 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
752 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
753 if (r)
754 return r;
755 alu.src[0].sel = ctx->temp_reg;
756 alu.src[0].chan = i;
757 }
758 if (i == 3) {
759 alu.last = 1;
760 }
761 r = r600_bc_add_alu(ctx->bc, &alu);
762 if (r)
763 return r;
764 }
765 return 0;
766 }
767
768 static int tgsi_op3(struct r600_shader_ctx *ctx)
769 {
770 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
771 struct r600_bc_alu alu;
772 int i, j, r;
773
774 /* do it in 2 step as op3 doesn't support writemask */
775 for (i = 0; i < 4; i++) {
776 memset(&alu, 0, sizeof(struct r600_bc_alu));
777 alu.inst = ctx->inst_info->r600_opcode;
778 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
779 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
780 if (r)
781 return r;
782 }
783 alu.dst.sel = ctx->temp_reg;
784 alu.dst.chan = i;
785 alu.dst.write = 1;
786 alu.is_op3 = 1;
787 if (i == 3) {
788 alu.last = 1;
789 }
790 r = r600_bc_add_alu(ctx->bc, &alu);
791 if (r)
792 return r;
793 }
794 return tgsi_helper_copy(ctx, inst);
795 }
796
797 static int tgsi_dp(struct r600_shader_ctx *ctx)
798 {
799 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
800 struct r600_bc_alu alu;
801 int i, j, r;
802
803 for (i = 0; i < 4; i++) {
804 memset(&alu, 0, sizeof(struct r600_bc_alu));
805 alu.inst = ctx->inst_info->r600_opcode;
806 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
807 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
808 if (r)
809 return r;
810 }
811 alu.dst.sel = ctx->temp_reg;
812 alu.dst.chan = i;
813 alu.dst.write = 1;
814 /* handle some special cases */
815 switch (ctx->inst_info->tgsi_opcode) {
816 case TGSI_OPCODE_DP2:
817 if (i > 1) {
818 alu.src[0].sel = alu.src[1].sel = 248;
819 alu.src[0].chan = alu.src[1].chan = 0;
820 }
821 break;
822 case TGSI_OPCODE_DP3:
823 if (i > 2) {
824 alu.src[0].sel = alu.src[1].sel = 248;
825 alu.src[0].chan = alu.src[1].chan = 0;
826 }
827 break;
828 default:
829 break;
830 }
831 if (i == 3) {
832 alu.last = 1;
833 }
834 r = r600_bc_add_alu(ctx->bc, &alu);
835 if (r)
836 return r;
837 }
838 return tgsi_helper_copy(ctx, inst);
839 }
840
841 static int tgsi_tex(struct r600_shader_ctx *ctx)
842 {
843 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
844 struct r600_bc_tex tex;
845 struct r600_bc_alu alu;
846 unsigned src_gpr;
847 int r;
848
849 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
850
851 /* Add perspective divide */
852 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
853 memset(&alu, 0, sizeof(struct r600_bc_alu));
854 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
855 alu.src[0].sel = src_gpr;
856 alu.src[0].chan = 3;
857 alu.dst.sel = ctx->temp_reg;
858 alu.dst.chan = 3;
859 alu.last = 1;
860 alu.dst.write = 1;
861 r = r600_bc_add_alu(ctx->bc, &alu);
862 if (r)
863 return r;
864
865 memset(&alu, 0, sizeof(struct r600_bc_alu));
866 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
867 alu.src[0].sel = ctx->temp_reg;
868 alu.src[0].chan = 3;
869 alu.src[1].sel = src_gpr;
870 alu.src[1].chan = 0;
871 alu.dst.sel = ctx->temp_reg;
872 alu.dst.chan = 0;
873 alu.dst.write = 1;
874 r = r600_bc_add_alu(ctx->bc, &alu);
875 if (r)
876 return r;
877 memset(&alu, 0, sizeof(struct r600_bc_alu));
878 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
879 alu.src[0].sel = ctx->temp_reg;
880 alu.src[0].chan = 3;
881 alu.src[1].sel = src_gpr;
882 alu.src[1].chan = 1;
883 alu.dst.sel = ctx->temp_reg;
884 alu.dst.chan = 1;
885 alu.dst.write = 1;
886 r = r600_bc_add_alu(ctx->bc, &alu);
887 if (r)
888 return r;
889 memset(&alu, 0, sizeof(struct r600_bc_alu));
890 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
891 alu.src[0].sel = ctx->temp_reg;
892 alu.src[0].chan = 3;
893 alu.src[1].sel = src_gpr;
894 alu.src[1].chan = 2;
895 alu.dst.sel = ctx->temp_reg;
896 alu.dst.chan = 2;
897 alu.dst.write = 1;
898 r = r600_bc_add_alu(ctx->bc, &alu);
899 if (r)
900 return r;
901 memset(&alu, 0, sizeof(struct r600_bc_alu));
902 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
903 alu.src[0].sel = 249;
904 alu.src[0].chan = 0;
905 alu.dst.sel = ctx->temp_reg;
906 alu.dst.chan = 3;
907 alu.last = 1;
908 alu.dst.write = 1;
909 r = r600_bc_add_alu(ctx->bc, &alu);
910 if (r)
911 return r;
912 src_gpr = ctx->temp_reg;
913 }
914
915 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
916 memset(&tex, 0, sizeof(struct r600_bc_tex));
917 tex.inst = ctx->inst_info->r600_opcode;
918 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
919 tex.sampler_id = tex.resource_id;
920 tex.src_gpr = src_gpr;
921 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index;
922 tex.dst_sel_x = 0;
923 tex.dst_sel_y = 1;
924 tex.dst_sel_z = 2;
925 tex.dst_sel_w = 3;
926 tex.src_sel_x = 0;
927 tex.src_sel_y = 1;
928 tex.src_sel_z = 2;
929 tex.src_sel_w = 3;
930 tex.coord_type_x = 1;
931 tex.coord_type_y = 1;
932 tex.coord_type_z = 1;
933 tex.coord_type_w = 1;
934 return r600_bc_add_tex(ctx->bc, &tex);
935 }
936
937 static int tgsi_lrp(struct r600_shader_ctx *ctx)
938 {
939 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
940 struct r600_bc_alu alu;
941 unsigned i;
942 int r;
943
944 /* 1 - src0 */
945 for (i = 0; i < 4; i++) {
946 memset(&alu, 0, sizeof(struct r600_bc_alu));
947 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
948 alu.src[0].sel = 249;
949 alu.src[0].chan = 0;
950 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
951 if (r)
952 return r;
953 alu.src[1].neg = 1;
954 alu.dst.sel = ctx->temp_reg;
955 alu.dst.chan = i;
956 if (i == 3) {
957 alu.last = 1;
958 }
959 alu.dst.write = 1;
960 r = r600_bc_add_alu(ctx->bc, &alu);
961 if (r)
962 return r;
963 }
964 r = r600_bc_add_literal(ctx->bc, ctx->value);
965 if (r)
966 return r;
967
968 /* (1 - src0) * src2 */
969 for (i = 0; i < 4; i++) {
970 memset(&alu, 0, sizeof(struct r600_bc_alu));
971 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
972 alu.src[0].sel = ctx->temp_reg;
973 alu.src[0].chan = i;
974 r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]);
975 if (r)
976 return r;
977 alu.dst.sel = ctx->temp_reg;
978 alu.dst.chan = i;
979 if (i == 3) {
980 alu.last = 1;
981 }
982 alu.dst.write = 1;
983 r = r600_bc_add_alu(ctx->bc, &alu);
984 if (r)
985 return r;
986 }
987 r = r600_bc_add_literal(ctx->bc, ctx->value);
988 if (r)
989 return r;
990
991 /* src0 * src1 + (1 - src0) * src2 */
992 for (i = 0; i < 4; i++) {
993 memset(&alu, 0, sizeof(struct r600_bc_alu));
994 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
995 alu.is_op3 = 1;
996 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]);
997 if (r)
998 return r;
999 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]);
1000 if (r)
1001 return r;
1002 alu.src[2].sel = ctx->temp_reg;
1003 alu.src[2].chan = i;
1004 alu.dst.sel = ctx->temp_reg;
1005 alu.dst.chan = i;
1006 if (i == 3) {
1007 alu.last = 1;
1008 }
1009 r = r600_bc_add_alu(ctx->bc, &alu);
1010 if (r)
1011 return r;
1012 }
1013 return tgsi_helper_copy(ctx, inst);
1014 }
1015
1016 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1017 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1018 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1019 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1020 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1021 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans},
1022 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1023 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1024 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1025 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1026 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1027 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1028 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1029 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1030 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1031 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1032 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1033 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1034 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1035 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1036 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1037 /* gap */
1038 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1039 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1040 /* gap */
1041 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1042 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1043 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1044 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1045 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1046 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1047 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans},
1048 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1049 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1050 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1051 /* gap */
1052 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1053 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1054 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1055 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1056 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1057 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1058 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1059 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1060 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1061 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1062 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1063 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1064 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1065 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1066 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1067 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1068 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1069 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1070 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1071 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1072 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
1073 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1074 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
1075 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1076 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1077 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1078 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1079 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1080 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1081 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1082 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1083 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1084 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1085 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1086 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1087 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1088 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1089 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1090 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1091 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1092 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1093 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1094 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1095 /* gap */
1096 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1097 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1098 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1099 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1100 /* gap */
1101 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1102 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1103 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1104 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1105 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1106 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1107 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1108 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1109 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1110 /* gap */
1111 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1112 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1113 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1114 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1115 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1116 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1117 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1118 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1119 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1120 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1121 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1122 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1123 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1124 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1125 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1126 /* gap */
1127 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1128 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1129 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1130 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1131 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1132 /* gap */
1133 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1134 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1135 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1136 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1137 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1138 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1139 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1140 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1141 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */
1142 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1143 /* gap */
1144 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1145 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1146 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1147 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1148 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1149 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1150 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1151 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1152 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1153 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1154 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1155 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1156 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1157 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1158 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1159 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1160 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1161 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1162 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1163 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1164 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1165 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1166 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1167 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1168 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1169 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1170 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1171 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1172 };