r600g: actualy fix the literal emission
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx,
102 const struct tgsi_token *tokens)
103 {
104 struct r600_screen *rscreen = r600_screen(ctx->screen);
105 struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader);
106 int r;
107
108 fprintf(stderr, "--------------------------------------------------------------\n");
109 tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return NULL;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 goto out_err;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 goto out_err;
122 }
123 fprintf(stderr, "______________________________________________________________\n");
124 return rpshader;
125 out_err:
126 free(rpshader);
127 return NULL;
128 }
129
130 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
131 {
132 struct r600_screen *rscreen = r600_screen(ctx->screen);
133 struct r600_shader *rshader = &rpshader->shader;
134 struct radeon_state *state;
135 unsigned i, j, tmp;
136
137 rpshader->state = radeon_state_decref(rpshader->state);
138 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
139 if (state == NULL)
140 return -ENOMEM;
141 for (i = 0; i < 10; i++) {
142 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
143 }
144 for (i = 0, j = 0; i < rshader->noutput; i++) {
145 if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) {
146 tmp = rshader->output[i].sid << ((j & 3) * 8);
147 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp;
148 j++;
149 }
150 }
151 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
152 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
153 rpshader->state = state;
154 rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
155 rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
156 rpshader->state->nbo = 2;
157 rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT;
158 return radeon_state_pm4(state);
159 }
160
161 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
162 {
163 struct r600_screen *rscreen = r600_screen(ctx->screen);
164 struct r600_shader *rshader = &rpshader->shader;
165 struct radeon_state *state;
166 unsigned i, tmp;
167
168 rpshader->state = radeon_state_decref(rpshader->state);
169 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
170 if (state == NULL)
171 return -ENOMEM;
172 for (i = 0; i < rshader->ninput; i++) {
173 tmp = S_028644_SEMANTIC(rshader->input[i].sid);
174 tmp |= S_028644_SEL_CENTROID(1);
175 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
176 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
177 }
178 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
179 S_0286CC_PERSP_GRADIENT_ENA(1);
180 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
181 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
182 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
183 rpshader->state = state;
184 rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
185 rpshader->state->nbo = 1;
186 rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT;
187 return radeon_state_pm4(state);
188 }
189
190 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
191 {
192 struct r600_screen *rscreen = r600_screen(ctx->screen);
193 struct r600_context *rctx = r600_context(ctx);
194 struct r600_shader *rshader = &rpshader->shader;
195 int r;
196
197 /* copy new shader */
198 radeon_bo_decref(rscreen->rw, rpshader->bo);
199 rpshader->bo = NULL;
200 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
201 4096, NULL);
202 if (rpshader->bo == NULL) {
203 return -ENOMEM;
204 }
205 radeon_bo_map(rscreen->rw, rpshader->bo);
206 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
207 radeon_bo_unmap(rscreen->rw, rpshader->bo);
208 /* build state */
209 rshader->flat_shade = rctx->flat_shade;
210 switch (rshader->processor_type) {
211 case TGSI_PROCESSOR_VERTEX:
212 r = r600_pipe_shader_vs(ctx, rpshader);
213 break;
214 case TGSI_PROCESSOR_FRAGMENT:
215 r = r600_pipe_shader_ps(ctx, rpshader);
216 break;
217 default:
218 r = -EINVAL;
219 break;
220 }
221 return r;
222 }
223
224 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
225 {
226 struct r600_context *rctx = r600_context(ctx);
227 int r;
228
229 if (rpshader == NULL)
230 return -EINVAL;
231 /* there should be enough input */
232 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
233 R600_ERR("%d resources provided, expecting %d\n",
234 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
235 return -EINVAL;
236 }
237 r = r600_shader_update(ctx, &rpshader->shader);
238 if (r)
239 return r;
240 return r600_pipe_shader(ctx, rpshader);
241 }
242
243 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
244 {
245 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
246 int j;
247
248 if (i->Instruction.NumDstRegs > 1) {
249 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
250 return -EINVAL;
251 }
252 if (i->Instruction.Saturate) {
253 R600_ERR("staturate unsupported\n");
254 return -EINVAL;
255 }
256 if (i->Instruction.Predicate) {
257 R600_ERR("predicate unsupported\n");
258 return -EINVAL;
259 }
260 if (i->Instruction.Label) {
261 R600_ERR("label unsupported\n");
262 return -EINVAL;
263 }
264 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
265 if (i->Src[j].Register.Indirect ||
266 i->Src[j].Register.Dimension ||
267 i->Src[j].Register.Absolute) {
268 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
269 return -EINVAL;
270 }
271 }
272 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
273 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
274 R600_ERR("unsupported dst (indirect|dimension)\n");
275 return -EINVAL;
276 }
277 }
278 return 0;
279 }
280
281 static int tgsi_declaration(struct r600_shader_ctx *ctx)
282 {
283 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
284 struct r600_bc_vtx vtx;
285 unsigned i;
286 int r;
287
288 switch (d->Declaration.File) {
289 case TGSI_FILE_INPUT:
290 i = ctx->shader->ninput++;
291 ctx->shader->input[i].name = d->Semantic.Name;
292 ctx->shader->input[i].sid = d->Semantic.Index;
293 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
294 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
295 /* turn input into fetch */
296 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
297 vtx.inst = 0;
298 vtx.fetch_type = 0;
299 vtx.buffer_id = i;
300 /* register containing the index into the buffer */
301 vtx.src_gpr = 0;
302 vtx.src_sel_x = 0;
303 vtx.mega_fetch_count = 0x1F;
304 vtx.dst_gpr = ctx->shader->input[i].gpr;
305 vtx.dst_sel_x = 0;
306 vtx.dst_sel_y = 1;
307 vtx.dst_sel_z = 2;
308 vtx.dst_sel_w = 3;
309 r = r600_bc_add_vtx(ctx->bc, &vtx);
310 if (r)
311 return r;
312 }
313 break;
314 case TGSI_FILE_OUTPUT:
315 i = ctx->shader->noutput++;
316 ctx->shader->output[i].name = d->Semantic.Name;
317 ctx->shader->output[i].sid = d->Semantic.Index;
318 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
319 break;
320 case TGSI_FILE_CONSTANT:
321 case TGSI_FILE_TEMPORARY:
322 case TGSI_FILE_SAMPLER:
323 break;
324 default:
325 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
326 return -EINVAL;
327 }
328 return 0;
329 }
330
331 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
332 {
333 struct tgsi_full_immediate *immediate;
334 struct r600_shader_ctx ctx;
335 struct r600_bc_output output;
336 unsigned opcode;
337 int i, r = 0, pos0;
338
339 ctx.bc = &shader->bc;
340 ctx.shader = shader;
341 r = r600_bc_init(ctx.bc, shader->family);
342 if (r)
343 return r;
344 ctx.tokens = tokens;
345 tgsi_scan_shader(tokens, &ctx.info);
346 tgsi_parse_init(&ctx.parse, tokens);
347 ctx.type = ctx.parse.FullHeader.Processor.Processor;
348 shader->processor_type = ctx.type;
349
350 /* register allocations */
351 /* Values [0,127] correspond to GPR[0..127].
352 * Values [256,511] correspond to cfile constants c[0..255].
353 * Other special values are shown in the list below.
354 * 248 SQ_ALU_SRC_0: special constant 0.0.
355 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
356 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
357 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
358 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
359 * 253 SQ_ALU_SRC_LITERAL: literal constant.
360 * 254 SQ_ALU_SRC_PV: previous vector result.
361 * 255 SQ_ALU_SRC_PS: previous scalar result.
362 */
363 for (i = 0; i < TGSI_FILE_COUNT; i++) {
364 ctx.file_offset[i] = 0;
365 }
366 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
367 ctx.file_offset[TGSI_FILE_INPUT] = 1;
368 }
369 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
370 ctx.info.file_count[TGSI_FILE_INPUT];
371 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
372 ctx.info.file_count[TGSI_FILE_OUTPUT];
373 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
374 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
375 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
376 ctx.info.file_count[TGSI_FILE_TEMPORARY];
377
378 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
379 tgsi_parse_token(&ctx.parse);
380 switch (ctx.parse.FullToken.Token.Type) {
381 case TGSI_TOKEN_TYPE_IMMEDIATE:
382 immediate = &ctx.parse.FullToken.FullImmediate;
383 ctx.value[0] = immediate->u[0].Uint;
384 ctx.value[1] = immediate->u[1].Uint;
385 ctx.value[2] = immediate->u[2].Uint;
386 ctx.value[3] = immediate->u[3].Uint;
387 break;
388 case TGSI_TOKEN_TYPE_DECLARATION:
389 r = tgsi_declaration(&ctx);
390 if (r)
391 goto out_err;
392 break;
393 case TGSI_TOKEN_TYPE_INSTRUCTION:
394 r = tgsi_is_supported(&ctx);
395 if (r)
396 goto out_err;
397 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
398 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
399 r = ctx.inst_info->process(&ctx);
400 if (r)
401 goto out_err;
402 r = r600_bc_add_literal(ctx.bc, ctx.value);
403 if (r)
404 goto out_err;
405 break;
406 default:
407 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
408 r = -EINVAL;
409 goto out_err;
410 }
411 }
412 /* export output */
413 for (i = 0, pos0 = 0; i < shader->noutput; i++) {
414 memset(&output, 0, sizeof(struct r600_bc_output));
415 output.gpr = shader->output[i].gpr;
416 output.elem_size = 3;
417 output.swizzle_x = 0;
418 output.swizzle_y = 1;
419 output.swizzle_z = 2;
420 output.swizzle_w = 3;
421 output.barrier = 1;
422 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
423 output.array_base = i - pos0;
424 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
425 switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
426 case TGSI_PROCESSOR_VERTEX:
427 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
428 output.array_base = 60;
429 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
430 /* position doesn't count in array_base */
431 pos0 = 1;
432 }
433 break;
434 case TGSI_PROCESSOR_FRAGMENT:
435 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
436 output.array_base = 0;
437 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
438 } else {
439 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
440 r = -EINVAL;
441 goto out_err;
442 }
443 break;
444 default:
445 R600_ERR("unsupported processor type %d\n", ctx.type);
446 r = -EINVAL;
447 goto out_err;
448 }
449 if (i == (shader->noutput - 1)) {
450 output.end_of_program = 1;
451 }
452 r = r600_bc_add_output(ctx.bc, &output);
453 if (r)
454 goto out_err;
455 }
456 tgsi_parse_free(&ctx.parse);
457 return 0;
458 out_err:
459 tgsi_parse_free(&ctx.parse);
460 return r;
461 }
462
463 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
464 {
465 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
466 return -EINVAL;
467 }
468
469 static int tgsi_end(struct r600_shader_ctx *ctx)
470 {
471 return 0;
472 }
473
474 static int tgsi_src(struct r600_shader_ctx *ctx,
475 const struct tgsi_full_src_register *tgsi_src,
476 unsigned swizzle,
477 struct r600_bc_alu_src *r600_src)
478 {
479 r600_src->sel = tgsi_src->Register.Index;
480 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
481 r600_src->sel = 0;
482 }
483 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
484 switch (swizzle) {
485 case 0:
486 r600_src->chan = tgsi_src->Register.SwizzleX;
487 break;
488 case 1:
489 r600_src->chan = tgsi_src->Register.SwizzleY;
490 break;
491 case 2:
492 r600_src->chan = tgsi_src->Register.SwizzleZ;
493 break;
494 case 3:
495 r600_src->chan = tgsi_src->Register.SwizzleW;
496 break;
497 default:
498 return -EINVAL;
499 }
500 return 0;
501 }
502
503 static int tgsi_dst(struct r600_shader_ctx *ctx,
504 const struct tgsi_full_dst_register *tgsi_dst,
505 unsigned swizzle,
506 struct r600_bc_alu_dst *r600_dst)
507 {
508 r600_dst->sel = tgsi_dst->Register.Index;
509 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
510 r600_dst->chan = swizzle;
511 r600_dst->write = 1;
512 return 0;
513 }
514
515 static int tgsi_op2(struct r600_shader_ctx *ctx)
516 {
517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
518 struct r600_bc_alu alu;
519 int i, j, r;
520
521 for (i = 0; i < 4; i++) {
522 memset(&alu, 0, sizeof(struct r600_bc_alu));
523 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
524 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
525 } else {
526 alu.inst = ctx->inst_info->r600_opcode;
527 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
528 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
529 if (r)
530 return r;
531 }
532 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
533 if (r)
534 return r;
535 }
536 /* handle some special cases */
537 switch (ctx->inst_info->tgsi_opcode) {
538 case TGSI_OPCODE_SUB:
539 alu.src[1].neg = 1;
540 break;
541 default:
542 break;
543 }
544 if (i == 3) {
545 alu.last = 1;
546 }
547 r = r600_bc_add_alu(ctx->bc, &alu);
548 if (r)
549 return r;
550 }
551 return 0;
552 }
553
554 static int tgsi_slt(struct r600_shader_ctx *ctx)
555 {
556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
557 struct r600_bc_alu alu;
558 int i, r;
559
560 for (i = 0; i < 4; i++) {
561 memset(&alu, 0, sizeof(struct r600_bc_alu));
562 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
563 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
564 } else {
565 alu.inst = ctx->inst_info->r600_opcode;
566 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
567 if (r)
568 return r;
569 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]);
570 if (r)
571 return r;
572 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
573 if (r)
574 return r;
575 }
576 if (i == 3) {
577 alu.last = 1;
578 }
579 r = r600_bc_add_alu(ctx->bc, &alu);
580 if (r)
581 return r;
582 }
583 return 0;
584 }
585
586 static int tgsi_lit(struct r600_shader_ctx *ctx)
587 {
588 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
589 struct r600_bc_alu alu;
590
591 int r;
592
593
594 if (inst->Dst[0].Register.WriteMask & (1 << 0))
595 {
596 /* dst.x, <- 1.0 */
597 memset(&alu, 0, sizeof(struct r600_bc_alu));
598 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
599 alu.src[0].sel = 249; /*1.0*/
600 alu.src[0].chan = 0;
601 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
602 if (r)
603 return r;
604 if ((inst->Dst[0].Register.WriteMask & 0xe) == 0)
605 alu.last = 1;
606 r = r600_bc_add_alu(ctx->bc, &alu);
607 if (r)
608 return r;
609 }
610
611
612 if (inst->Dst[0].Register.WriteMask & (1 << 1))
613 {
614 /* dst.y = max(src.x, 0.0) */
615 memset(&alu, 0, sizeof(struct r600_bc_alu));
616 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
617 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]);
618 if (r)
619 return r;
620 alu.src[1].sel = 248; /*0.0*/
621 alu.src[1].chan = 0;
622 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
623 if (r)
624 return r;
625 if ((inst->Dst[0].Register.WriteMask & 0xa) == 0)
626 alu.last = 1;
627 r = r600_bc_add_alu(ctx->bc, &alu);
628 if (r)
629 return r;
630 }
631
632 if (inst->Dst[0].Register.WriteMask & (1 << 3))
633 {
634 /* dst.w, <- 1.0 */
635 memset(&alu, 0, sizeof(struct r600_bc_alu));
636 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
637 alu.src[0].sel = 249;
638 alu.src[0].chan = 0;
639 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
640 if (r)
641 return r;
642 if ((inst->Dst[0].Register.WriteMask & 0x4) == 0)
643 alu.last = 1;
644 r = r600_bc_add_alu(ctx->bc, &alu);
645 if (r)
646 return r;
647 }
648
649 if (inst->Dst[0].Register.WriteMask & (1 << 2))
650 {
651 int chan;
652 int sel;
653
654 /* dst.z = log(src.y) */
655 memset(&alu, 0, sizeof(struct r600_bc_alu));
656 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
657 r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]);
658 if (r)
659 return r;
660 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
661 if (r)
662 return r;
663 alu.last = 1;
664 r = r600_bc_add_alu(ctx->bc, &alu);
665 if (r)
666 return r;
667
668 chan = alu.dst.chan;
669 sel = alu.dst.sel;
670
671 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
672 memset(&alu, 0, sizeof(struct r600_bc_alu));
673 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
674 r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]);
675 if (r)
676 return r;
677 alu.src[1].sel = sel;
678 alu.src[1].chan = chan;
679 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]);
680 if (r)
681 return r;
682 alu.dst.sel = ctx->temp_reg;
683 alu.dst.chan = 0;
684 alu.dst.write = 1;
685 alu.is_op3 = 1;
686 alu.last = 1;
687 r = r600_bc_add_alu(ctx->bc, &alu);
688 if (r)
689 return r;
690
691 /* dst.z = exp(tmp.x) */
692 memset(&alu, 0, sizeof(struct r600_bc_alu));
693 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
694 alu.src[0].sel = ctx->temp_reg;
695 alu.src[0].chan = 0;
696 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
697 if (r)
698 return r;
699 alu.last = 1;
700 r = r600_bc_add_alu(ctx->bc, &alu);
701 if (r)
702 return r;
703 }
704 return 0;
705 }
706
707 static int tgsi_trans(struct r600_shader_ctx *ctx)
708 {
709 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
710 struct r600_bc_alu alu;
711 int i, j, r;
712
713 for (i = 0; i < 4; i++) {
714 memset(&alu, 0, sizeof(struct r600_bc_alu));
715 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
716 alu.inst = ctx->inst_info->r600_opcode;
717 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
718 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
719 if (r)
720 return r;
721 }
722 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
723 if (r)
724 return r;
725 alu.last = 1;
726 r = r600_bc_add_alu(ctx->bc, &alu);
727 if (r)
728 return r;
729 }
730 }
731 return 0;
732 }
733
734 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
735 {
736 struct r600_bc_alu alu;
737 int i, r;
738
739 r = r600_bc_add_literal(ctx->bc, ctx->value);
740 if (r)
741 return r;
742 for (i = 0; i < 4; i++) {
743 memset(&alu, 0, sizeof(struct r600_bc_alu));
744 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
745 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
746 } else {
747 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
748 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
749 if (r)
750 return r;
751 alu.src[0].sel = ctx->temp_reg;
752 alu.src[0].chan = i;
753 }
754 if (i == 3) {
755 alu.last = 1;
756 }
757 r = r600_bc_add_alu(ctx->bc, &alu);
758 if (r)
759 return r;
760 }
761 return 0;
762 }
763
764 static int tgsi_op3(struct r600_shader_ctx *ctx)
765 {
766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
767 struct r600_bc_alu alu;
768 int i, j, r;
769
770 /* do it in 2 step as op3 doesn't support writemask */
771 for (i = 0; i < 4; i++) {
772 memset(&alu, 0, sizeof(struct r600_bc_alu));
773 alu.inst = ctx->inst_info->r600_opcode;
774 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
775 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
776 if (r)
777 return r;
778 }
779 alu.dst.sel = ctx->temp_reg;
780 alu.dst.chan = i;
781 alu.dst.write = 1;
782 alu.is_op3 = 1;
783 if (i == 3) {
784 alu.last = 1;
785 }
786 r = r600_bc_add_alu(ctx->bc, &alu);
787 if (r)
788 return r;
789 }
790 return tgsi_helper_copy(ctx, inst);
791 }
792
793 static int tgsi_dp(struct r600_shader_ctx *ctx)
794 {
795 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
796 struct r600_bc_alu alu;
797 int i, j, r;
798
799 for (i = 0; i < 4; i++) {
800 memset(&alu, 0, sizeof(struct r600_bc_alu));
801 alu.inst = ctx->inst_info->r600_opcode;
802 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
803 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
804 if (r)
805 return r;
806 }
807 alu.dst.sel = ctx->temp_reg;
808 alu.dst.chan = i;
809 alu.dst.write = 1;
810 /* handle some special cases */
811 switch (ctx->inst_info->tgsi_opcode) {
812 case TGSI_OPCODE_DP2:
813 if (i > 1) {
814 alu.src[0].sel = alu.src[1].sel = 248;
815 alu.src[0].chan = alu.src[1].chan = 0;
816 }
817 break;
818 case TGSI_OPCODE_DP3:
819 if (i > 2) {
820 alu.src[0].sel = alu.src[1].sel = 248;
821 alu.src[0].chan = alu.src[1].chan = 0;
822 }
823 break;
824 default:
825 break;
826 }
827 if (i == 3) {
828 alu.last = 1;
829 }
830 r = r600_bc_add_alu(ctx->bc, &alu);
831 if (r)
832 return r;
833 }
834 return tgsi_helper_copy(ctx, inst);
835 }
836
837 static int tgsi_tex(struct r600_shader_ctx *ctx)
838 {
839 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
840 struct r600_bc_tex tex;
841 struct r600_bc_alu alu;
842 unsigned src_gpr;
843 int r;
844
845 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
846
847 /* Add perspective divide */
848 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
849 memset(&alu, 0, sizeof(struct r600_bc_alu));
850 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
851 alu.src[0].sel = src_gpr;
852 alu.src[0].chan = 3;
853 alu.dst.sel = ctx->temp_reg;
854 alu.dst.chan = 3;
855 alu.last = 1;
856 alu.dst.write = 1;
857 r = r600_bc_add_alu(ctx->bc, &alu);
858 if (r)
859 return r;
860
861 memset(&alu, 0, sizeof(struct r600_bc_alu));
862 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
863 alu.src[0].sel = ctx->temp_reg;
864 alu.src[0].chan = 3;
865 alu.src[1].sel = src_gpr;
866 alu.src[1].chan = 0;
867 alu.dst.sel = ctx->temp_reg;
868 alu.dst.chan = 0;
869 alu.dst.write = 1;
870 r = r600_bc_add_alu(ctx->bc, &alu);
871 if (r)
872 return r;
873 memset(&alu, 0, sizeof(struct r600_bc_alu));
874 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
875 alu.src[0].sel = ctx->temp_reg;
876 alu.src[0].chan = 3;
877 alu.src[1].sel = src_gpr;
878 alu.src[1].chan = 1;
879 alu.dst.sel = ctx->temp_reg;
880 alu.dst.chan = 1;
881 alu.dst.write = 1;
882 r = r600_bc_add_alu(ctx->bc, &alu);
883 if (r)
884 return r;
885 memset(&alu, 0, sizeof(struct r600_bc_alu));
886 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
887 alu.src[0].sel = ctx->temp_reg;
888 alu.src[0].chan = 3;
889 alu.src[1].sel = src_gpr;
890 alu.src[1].chan = 2;
891 alu.dst.sel = ctx->temp_reg;
892 alu.dst.chan = 2;
893 alu.dst.write = 1;
894 r = r600_bc_add_alu(ctx->bc, &alu);
895 if (r)
896 return r;
897 memset(&alu, 0, sizeof(struct r600_bc_alu));
898 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
899 alu.src[0].sel = 249;
900 alu.src[0].chan = 0;
901 alu.dst.sel = ctx->temp_reg;
902 alu.dst.chan = 3;
903 alu.last = 1;
904 alu.dst.write = 1;
905 r = r600_bc_add_alu(ctx->bc, &alu);
906 if (r)
907 return r;
908 src_gpr = ctx->temp_reg;
909 }
910
911 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
912 memset(&tex, 0, sizeof(struct r600_bc_tex));
913 tex.inst = ctx->inst_info->r600_opcode;
914 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
915 tex.sampler_id = tex.resource_id;
916 tex.src_gpr = src_gpr;
917 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index;
918 tex.dst_sel_x = 0;
919 tex.dst_sel_y = 1;
920 tex.dst_sel_z = 2;
921 tex.dst_sel_w = 3;
922 tex.src_sel_x = 0;
923 tex.src_sel_y = 1;
924 tex.src_sel_z = 2;
925 tex.src_sel_w = 3;
926 tex.coord_type_x = 1;
927 tex.coord_type_y = 1;
928 tex.coord_type_z = 1;
929 tex.coord_type_w = 1;
930 return r600_bc_add_tex(ctx->bc, &tex);
931 }
932
933 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
934 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
935 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
936 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
937 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
938 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans},
939 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
940 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
941 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
942 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
943 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
944 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
945 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
946 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
947 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
948 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
949 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
950 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
951 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
952 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
953 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
954 /* gap */
955 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
956 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
957 /* gap */
958 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
959 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
960 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
961 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
962 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
963 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
964 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
965 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
966 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
967 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
968 /* gap */
969 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
970 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
971 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
972 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
973 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
974 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
975 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
976 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
977 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
978 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
979 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
980 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
981 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
982 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
983 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
984 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
985 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
986 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
987 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
988 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
989 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
990 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
991 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
992 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
993 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
994 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
995 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
996 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
997 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
998 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
999 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1000 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1001 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1002 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1003 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1004 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1005 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1006 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1007 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1008 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1009 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1010 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1011 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1012 /* gap */
1013 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1014 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1015 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1016 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1017 /* gap */
1018 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1019 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1020 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1021 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1022 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1023 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1024 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1025 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1026 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1027 /* gap */
1028 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1029 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1030 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1031 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1032 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1033 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1034 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1035 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1036 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1037 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1038 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1039 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1040 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1041 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1042 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1043 /* gap */
1044 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1045 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1046 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1047 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1048 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1049 /* gap */
1050 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1051 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1052 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1053 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1054 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1055 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1056 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1057 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1058 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */
1059 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1060 /* gap */
1061 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1062 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1063 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1064 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1065 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1066 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1067 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1068 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1069 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1070 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1071 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1072 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1073 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1074 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1075 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1076 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1077 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1078 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1079 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1080 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1081 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1082 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1083 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1084 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1085 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1086 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1087 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1088 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1089 };