r600g: add sin/cos
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 fprintf(stderr, "--------------------------------------------------------------\n");
109 tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 return radeon_state_pm4(state);
154 }
155
156 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
157 {
158 const struct pipe_rasterizer_state *rasterizer;
159 struct r600_screen *rscreen = r600_screen(ctx->screen);
160 struct r600_shader *rshader = &rpshader->shader;
161 struct r600_context *rctx = r600_context(ctx);
162 struct radeon_state *state;
163 unsigned i, tmp, exports_ps, num_cout;
164
165 rasterizer = &rctx->rasterizer->state.rasterizer;
166 rpshader->rstate = radeon_state_decref(rpshader->rstate);
167 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
168 if (state == NULL)
169 return -ENOMEM;
170 for (i = 0; i < rshader->ninput; i++) {
171 tmp = S_028644_SEMANTIC(i);
172 tmp |= S_028644_SEL_CENTROID(1);
173 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
174 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
175 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
176 }
177 if (rasterizer->sprite_coord_enable & (1 << i)) {
178 tmp |= S_028644_PT_SPRITE_TEX(1);
179 }
180 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
181 }
182
183 exports_ps = 0;
184 num_cout = 0;
185 for (i = 0; i < rshader->noutput; i++) {
186 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
187 exports_ps |= 1;
188 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
189 exports_ps |= (1 << (num_cout+1));
190 num_cout++;
191 }
192 }
193 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
194 S_0286CC_PERSP_GRADIENT_ENA(1);
195 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
196 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
197 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
198 rpshader->rstate = state;
199 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
200 rpshader->rstate->nbo = 1;
201 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
202 return radeon_state_pm4(state);
203 }
204
205 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
206 {
207 struct r600_screen *rscreen = r600_screen(ctx->screen);
208 struct r600_context *rctx = r600_context(ctx);
209 struct r600_shader *rshader = &rpshader->shader;
210 int r;
211
212 /* copy new shader */
213 radeon_bo_decref(rscreen->rw, rpshader->bo);
214 rpshader->bo = NULL;
215 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
216 4096, NULL);
217 if (rpshader->bo == NULL) {
218 return -ENOMEM;
219 }
220 radeon_bo_map(rscreen->rw, rpshader->bo);
221 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
222 radeon_bo_unmap(rscreen->rw, rpshader->bo);
223 /* build state */
224 rshader->flat_shade = rctx->flat_shade;
225 switch (rshader->processor_type) {
226 case TGSI_PROCESSOR_VERTEX:
227 r = r600_pipe_shader_vs(ctx, rpshader);
228 break;
229 case TGSI_PROCESSOR_FRAGMENT:
230 r = r600_pipe_shader_ps(ctx, rpshader);
231 break;
232 default:
233 r = -EINVAL;
234 break;
235 }
236 return r;
237 }
238
239 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
240 {
241 struct r600_context *rctx = r600_context(ctx);
242 int r;
243
244 if (rpshader == NULL)
245 return -EINVAL;
246 /* there should be enough input */
247 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
248 R600_ERR("%d resources provided, expecting %d\n",
249 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
250 return -EINVAL;
251 }
252 r = r600_shader_update(ctx, &rpshader->shader);
253 if (r)
254 return r;
255 return r600_pipe_shader(ctx, rpshader);
256 }
257
258 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
259 {
260 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
261 int j;
262
263 if (i->Instruction.NumDstRegs > 1) {
264 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
265 return -EINVAL;
266 }
267 if (i->Instruction.Predicate) {
268 R600_ERR("predicate unsupported\n");
269 return -EINVAL;
270 }
271 if (i->Instruction.Label) {
272 R600_ERR("label unsupported\n");
273 return -EINVAL;
274 }
275 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
276 if (i->Src[j].Register.Indirect ||
277 i->Src[j].Register.Dimension ||
278 i->Src[j].Register.Absolute) {
279 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
280 return -EINVAL;
281 }
282 }
283 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
284 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
285 R600_ERR("unsupported dst (indirect|dimension)\n");
286 return -EINVAL;
287 }
288 }
289 return 0;
290 }
291
292 static int tgsi_declaration(struct r600_shader_ctx *ctx)
293 {
294 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
295 struct r600_bc_vtx vtx;
296 unsigned i;
297 int r;
298
299 switch (d->Declaration.File) {
300 case TGSI_FILE_INPUT:
301 i = ctx->shader->ninput++;
302 ctx->shader->input[i].name = d->Semantic.Name;
303 ctx->shader->input[i].sid = d->Semantic.Index;
304 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
305 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
306 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
307 /* turn input into fetch */
308 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
309 vtx.inst = 0;
310 vtx.fetch_type = 0;
311 vtx.buffer_id = i;
312 /* register containing the index into the buffer */
313 vtx.src_gpr = 0;
314 vtx.src_sel_x = 0;
315 vtx.mega_fetch_count = 0x1F;
316 vtx.dst_gpr = ctx->shader->input[i].gpr;
317 vtx.dst_sel_x = 0;
318 vtx.dst_sel_y = 1;
319 vtx.dst_sel_z = 2;
320 vtx.dst_sel_w = 3;
321 r = r600_bc_add_vtx(ctx->bc, &vtx);
322 if (r)
323 return r;
324 }
325 break;
326 case TGSI_FILE_OUTPUT:
327 i = ctx->shader->noutput++;
328 ctx->shader->output[i].name = d->Semantic.Name;
329 ctx->shader->output[i].sid = d->Semantic.Index;
330 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
331 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
332 break;
333 case TGSI_FILE_CONSTANT:
334 case TGSI_FILE_TEMPORARY:
335 case TGSI_FILE_SAMPLER:
336 break;
337 default:
338 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
339 return -EINVAL;
340 }
341 return 0;
342 }
343
344 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
345 {
346 struct tgsi_full_immediate *immediate;
347 struct r600_shader_ctx ctx;
348 struct r600_bc_output output[32];
349 unsigned output_done, noutput;
350 unsigned opcode;
351 int i, r = 0, pos0;
352
353 ctx.bc = &shader->bc;
354 ctx.shader = shader;
355 r = r600_bc_init(ctx.bc, shader->family);
356 if (r)
357 return r;
358 ctx.tokens = tokens;
359 tgsi_scan_shader(tokens, &ctx.info);
360 tgsi_parse_init(&ctx.parse, tokens);
361 ctx.type = ctx.parse.FullHeader.Processor.Processor;
362 shader->processor_type = ctx.type;
363
364 /* register allocations */
365 /* Values [0,127] correspond to GPR[0..127].
366 * Values [256,511] correspond to cfile constants c[0..255].
367 * Other special values are shown in the list below.
368 * 248 SQ_ALU_SRC_0: special constant 0.0.
369 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
370 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
371 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
372 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
373 * 253 SQ_ALU_SRC_LITERAL: literal constant.
374 * 254 SQ_ALU_SRC_PV: previous vector result.
375 * 255 SQ_ALU_SRC_PS: previous scalar result.
376 */
377 for (i = 0; i < TGSI_FILE_COUNT; i++) {
378 ctx.file_offset[i] = 0;
379 }
380 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
381 ctx.file_offset[TGSI_FILE_INPUT] = 1;
382 }
383 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
384 ctx.info.file_count[TGSI_FILE_INPUT];
385 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
386 ctx.info.file_count[TGSI_FILE_OUTPUT];
387 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
388 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
389 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
390 ctx.info.file_count[TGSI_FILE_TEMPORARY];
391
392 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
393 tgsi_parse_token(&ctx.parse);
394 switch (ctx.parse.FullToken.Token.Type) {
395 case TGSI_TOKEN_TYPE_IMMEDIATE:
396 immediate = &ctx.parse.FullToken.FullImmediate;
397 ctx.value[0] = immediate->u[0].Uint;
398 ctx.value[1] = immediate->u[1].Uint;
399 ctx.value[2] = immediate->u[2].Uint;
400 ctx.value[3] = immediate->u[3].Uint;
401 break;
402 case TGSI_TOKEN_TYPE_DECLARATION:
403 r = tgsi_declaration(&ctx);
404 if (r)
405 goto out_err;
406 break;
407 case TGSI_TOKEN_TYPE_INSTRUCTION:
408 r = tgsi_is_supported(&ctx);
409 if (r)
410 goto out_err;
411 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
412 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
413 r = ctx.inst_info->process(&ctx);
414 if (r)
415 goto out_err;
416 r = r600_bc_add_literal(ctx.bc, ctx.value);
417 if (r)
418 goto out_err;
419 break;
420 default:
421 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
422 r = -EINVAL;
423 goto out_err;
424 }
425 }
426 /* export output */
427 noutput = shader->noutput;
428 for (i = 0, pos0 = 0; i < noutput; i++) {
429 memset(&output[i], 0, sizeof(struct r600_bc_output));
430 output[i].gpr = shader->output[i].gpr;
431 output[i].elem_size = 3;
432 output[i].swizzle_x = 0;
433 output[i].swizzle_y = 1;
434 output[i].swizzle_z = 2;
435 output[i].swizzle_w = 3;
436 output[i].barrier = 1;
437 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
438 output[i].array_base = i - pos0;
439 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
440 switch (ctx.type) {
441 case TGSI_PROCESSOR_VERTEX:
442 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
443 output[i].array_base = 60;
444 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
445 /* position doesn't count in array_base */
446 pos0++;
447 }
448 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
449 output[i].array_base = 61;
450 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
451 /* position doesn't count in array_base */
452 pos0++;
453 }
454 break;
455 case TGSI_PROCESSOR_FRAGMENT:
456 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
457 output[i].array_base = shader->output[i].sid;
458 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
459 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
460 output[i].array_base = 61;
461 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
462 } else {
463 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
464 r = -EINVAL;
465 goto out_err;
466 }
467 break;
468 default:
469 R600_ERR("unsupported processor type %d\n", ctx.type);
470 r = -EINVAL;
471 goto out_err;
472 }
473 }
474 /* add fake param output for vertex shader if no param is exported */
475 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
476 for (i = 0, pos0 = 0; i < noutput; i++) {
477 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
478 pos0 = 1;
479 break;
480 }
481 }
482 if (!pos0) {
483 memset(&output[i], 0, sizeof(struct r600_bc_output));
484 output[i].gpr = 0;
485 output[i].elem_size = 3;
486 output[i].swizzle_x = 0;
487 output[i].swizzle_y = 1;
488 output[i].swizzle_z = 2;
489 output[i].swizzle_w = 3;
490 output[i].barrier = 1;
491 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
492 output[i].array_base = 0;
493 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
494 noutput++;
495 }
496 }
497 /* add fake pixel export */
498 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
499 memset(&output[0], 0, sizeof(struct r600_bc_output));
500 output[0].gpr = 0;
501 output[0].elem_size = 3;
502 output[0].swizzle_x = 7;
503 output[0].swizzle_y = 7;
504 output[0].swizzle_z = 7;
505 output[0].swizzle_w = 7;
506 output[0].barrier = 1;
507 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
508 output[0].array_base = 0;
509 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
510 noutput++;
511 }
512 /* set export done on last export of each type */
513 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
514 if (i == (noutput - 1)) {
515 output[i].end_of_program = 1;
516 }
517 if (!(output_done & (1 << output[i].type))) {
518 output_done |= (1 << output[i].type);
519 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
520 }
521 }
522 /* add output to bytecode */
523 for (i = 0; i < noutput; i++) {
524 r = r600_bc_add_output(ctx.bc, &output[i]);
525 if (r)
526 goto out_err;
527 }
528 tgsi_parse_free(&ctx.parse);
529 return 0;
530 out_err:
531 tgsi_parse_free(&ctx.parse);
532 return r;
533 }
534
535 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
536 {
537 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
538 return -EINVAL;
539 }
540
541 static int tgsi_end(struct r600_shader_ctx *ctx)
542 {
543 return 0;
544 }
545
546 static int tgsi_src(struct r600_shader_ctx *ctx,
547 const struct tgsi_full_src_register *tgsi_src,
548 struct r600_bc_alu_src *r600_src)
549 {
550 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
551 r600_src->sel = tgsi_src->Register.Index;
552 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
553 r600_src->sel = 0;
554 }
555 r600_src->neg = tgsi_src->Register.Negate;
556 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
557 return 0;
558 }
559
560 static int tgsi_dst(struct r600_shader_ctx *ctx,
561 const struct tgsi_full_dst_register *tgsi_dst,
562 unsigned swizzle,
563 struct r600_bc_alu_dst *r600_dst)
564 {
565 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
566
567 r600_dst->sel = tgsi_dst->Register.Index;
568 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
569 r600_dst->chan = swizzle;
570 r600_dst->write = 1;
571 if (inst->Instruction.Saturate) {
572 r600_dst->clamp = 1;
573 }
574 return 0;
575 }
576
577 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
578 {
579 switch (swizzle) {
580 case 0:
581 return tgsi_src->Register.SwizzleX;
582 case 1:
583 return tgsi_src->Register.SwizzleY;
584 case 2:
585 return tgsi_src->Register.SwizzleZ;
586 case 3:
587 return tgsi_src->Register.SwizzleW;
588 default:
589 return 0;
590 }
591 }
592
593 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
594 {
595 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
596 struct r600_bc_alu alu;
597 int i, j, k, nconst, r;
598
599 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
600 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
601 nconst++;
602 }
603 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
604 if (r) {
605 return r;
606 }
607 }
608 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
609 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
610 for (k = 0; k < 4; k++) {
611 memset(&alu, 0, sizeof(struct r600_bc_alu));
612 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
613 alu.src[0].sel = r600_src[0].sel;
614 alu.src[0].chan = k;
615 alu.dst.sel = ctx->temp_reg + j;
616 alu.dst.chan = k;
617 alu.dst.write = 1;
618 if (k == 3)
619 alu.last = 1;
620 r = r600_bc_add_alu(ctx->bc, &alu);
621 if (r)
622 return r;
623 }
624 r600_src[0].sel = ctx->temp_reg + j;
625 j--;
626 }
627 }
628 return 0;
629 }
630
631 static int tgsi_op2(struct r600_shader_ctx *ctx)
632 {
633 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
634 struct r600_bc_alu_src r600_src[3];
635 struct r600_bc_alu alu;
636 int i, j, r;
637
638 r = tgsi_split_constant(ctx, r600_src);
639 if (r)
640 return r;
641 for (i = 0; i < 4; i++) {
642 memset(&alu, 0, sizeof(struct r600_bc_alu));
643 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
644 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
645 alu.dst.chan = i;
646 } else {
647 alu.inst = ctx->inst_info->r600_opcode;
648 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
649 alu.src[j] = r600_src[j];
650 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
651 }
652 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
653 if (r)
654 return r;
655 }
656 /* handle some special cases */
657 switch (ctx->inst_info->tgsi_opcode) {
658 case TGSI_OPCODE_SUB:
659 alu.src[1].neg = 1;
660 break;
661 case TGSI_OPCODE_ABS:
662 alu.src[0].abs = 1;
663 break;
664 default:
665 break;
666 }
667 if (i == 3) {
668 alu.last = 1;
669 }
670 r = r600_bc_add_alu(ctx->bc, &alu);
671 if (r)
672 return r;
673 }
674 return 0;
675 }
676
677 /*
678 * r600 - trunc to -PI..PI range
679 * r700 - normalize by dividing by 2PI
680 * see fdo bug 27901
681 */
682 static int tgsi_trig(struct r600_shader_ctx *ctx)
683 {
684 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
685 struct r600_bc_alu_src r600_src[3];
686 struct r600_bc_alu alu;
687 int i, r;
688 uint32_t lit_vals[4];
689
690 memset(lit_vals, 0, 4*4);
691 r = tgsi_split_constant(ctx, r600_src);
692 if (r)
693 return r;
694 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
695 lit_vals[1] = fui(0.5f);
696
697 memset(&alu, 0, sizeof(struct r600_bc_alu));
698 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
699 alu.is_op3 = 1;
700
701 alu.dst.chan = 0;
702 alu.dst.sel = ctx->temp_reg;
703 alu.dst.write = 1;
704
705 alu.src[0] = r600_src[0];
706 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
707
708 alu.src[1].sel = SQ_ALU_SRC_LITERAL;
709 alu.src[1].chan = 0;
710 alu.src[2].sel = SQ_ALU_SRC_LITERAL;
711 alu.src[2].chan = 1;
712 alu.last = 1;
713 r = r600_bc_add_alu(ctx->bc, &alu);
714 if (r)
715 return r;
716 r = r600_bc_add_literal(ctx->bc, lit_vals);
717 if (r)
718 return r;
719
720 memset(&alu, 0, sizeof(struct r600_bc_alu));
721 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
722
723 alu.dst.chan = 0;
724 alu.dst.sel = ctx->temp_reg;
725 alu.dst.write = 1;
726
727 alu.src[0].sel = ctx->temp_reg;
728 alu.src[0].chan = 0;
729 alu.last = 1;
730 r = r600_bc_add_alu(ctx->bc, &alu);
731 if (r)
732 return r;
733
734 if (ctx->bc->chiprev == 0) {
735 lit_vals[0] = fui(3.1415926535897f * 2.0f);
736 lit_vals[1] = fui(-3.1415926535897f);
737 } else {
738 lit_vals[0] = fui(1.0f);
739 lit_vals[1] = fui(-0.5f);
740 }
741
742 memset(&alu, 0, sizeof(struct r600_bc_alu));
743 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
744 alu.is_op3 = 1;
745
746 alu.dst.chan = 0;
747 alu.dst.sel = ctx->temp_reg;
748 alu.dst.write = 1;
749
750 alu.src[0].sel = ctx->temp_reg;
751 alu.src[0].chan = 0;
752
753 alu.src[1].sel = SQ_ALU_SRC_LITERAL;
754 alu.src[1].chan = 0;
755 alu.src[2].sel = SQ_ALU_SRC_LITERAL;
756 alu.src[2].chan = 1;
757 alu.last = 1;
758 r = r600_bc_add_alu(ctx->bc, &alu);
759 if (r)
760 return r;
761 r = r600_bc_add_literal(ctx->bc, lit_vals);
762 if (r)
763 return r;
764
765 memset(&alu, 0, sizeof(struct r600_bc_alu));
766 alu.inst = ctx->inst_info->r600_opcode;
767 alu.dst.chan = 0;
768 alu.dst.sel = ctx->temp_reg;
769 alu.dst.write = 1;
770
771 alu.src[0].sel = ctx->temp_reg;
772 alu.src[0].chan = 0;
773 alu.last = 1;
774 r = r600_bc_add_alu(ctx->bc, &alu);
775 if (r)
776 return r;
777
778 /* replicate result */
779 for (i = 0; i < 4; i++) {
780 memset(&alu, 0, sizeof(struct r600_bc_alu));
781 alu.src[0].sel = ctx->temp_reg;
782 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
783 alu.dst.chan = i;
784 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
785 if (r)
786 return r;
787 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
788 if (i == 3)
789 alu.last = 1;
790 r = r600_bc_add_alu(ctx->bc, &alu);
791 if (r)
792 return r;
793 }
794 return 0;
795 }
796
797 static int tgsi_kill(struct r600_shader_ctx *ctx)
798 {
799 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
800 struct r600_bc_alu alu;
801 int i, r;
802
803 for (i = 0; i < 4; i++) {
804 memset(&alu, 0, sizeof(struct r600_bc_alu));
805 alu.inst = ctx->inst_info->r600_opcode;
806 alu.dst.chan = i;
807 alu.src[0].sel = SQ_ALU_SRC_0;
808 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
809 if (r)
810 return r;
811 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
812 if (i == 3) {
813 alu.last = 1;
814 }
815 r = r600_bc_add_alu(ctx->bc, &alu);
816 if (r)
817 return r;
818 }
819 return 0;
820 }
821
822 static int tgsi_slt(struct r600_shader_ctx *ctx)
823 {
824 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
825 struct r600_bc_alu_src r600_src[3];
826 struct r600_bc_alu alu;
827 int i, r;
828
829 r = tgsi_split_constant(ctx, r600_src);
830 if (r)
831 return r;
832 for (i = 0; i < 4; i++) {
833 memset(&alu, 0, sizeof(struct r600_bc_alu));
834 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
835 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
836 alu.dst.chan = i;
837 } else {
838 alu.inst = ctx->inst_info->r600_opcode;
839 alu.src[1] = r600_src[0];
840 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
841 alu.src[0] = r600_src[1];
842 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
843 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
844 if (r)
845 return r;
846 }
847 if (i == 3) {
848 alu.last = 1;
849 }
850 r = r600_bc_add_alu(ctx->bc, &alu);
851 if (r)
852 return r;
853 }
854 return 0;
855 }
856
857 static int tgsi_lit(struct r600_shader_ctx *ctx)
858 {
859 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
860 struct r600_bc_alu alu;
861 int r;
862
863 /* dst.x, <- 1.0 */
864 memset(&alu, 0, sizeof(struct r600_bc_alu));
865 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
866 alu.src[0].sel = SQ_ALU_SRC_1; /*1.0*/
867 alu.src[0].chan = 0;
868 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
869 if (r)
870 return r;
871 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
872 r = r600_bc_add_alu(ctx->bc, &alu);
873 if (r)
874 return r;
875
876 /* dst.y = max(src.x, 0.0) */
877 memset(&alu, 0, sizeof(struct r600_bc_alu));
878 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
879 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
880 if (r)
881 return r;
882 alu.src[1].sel = SQ_ALU_SRC_0; /*0.0*/
883 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
884 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
885 if (r)
886 return r;
887 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
888 r = r600_bc_add_alu(ctx->bc, &alu);
889 if (r)
890 return r;
891
892 /* dst.z = NOP - fill Z slot */
893 memset(&alu, 0, sizeof(struct r600_bc_alu));
894 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
895 alu.dst.chan = 2;
896 r = r600_bc_add_alu(ctx->bc, &alu);
897 if (r)
898 return r;
899
900 /* dst.w, <- 1.0 */
901 memset(&alu, 0, sizeof(struct r600_bc_alu));
902 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
903 alu.src[0].sel = SQ_ALU_SRC_1;
904 alu.src[0].chan = 0;
905 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
906 if (r)
907 return r;
908 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
909 alu.last = 1;
910 r = r600_bc_add_alu(ctx->bc, &alu);
911 if (r)
912 return r;
913
914 if (inst->Dst[0].Register.WriteMask & (1 << 2))
915 {
916 int chan;
917 int sel;
918
919 /* dst.z = log(src.y) */
920 memset(&alu, 0, sizeof(struct r600_bc_alu));
921 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
922 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
923 if (r)
924 return r;
925 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
926 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
927 if (r)
928 return r;
929 alu.last = 1;
930 r = r600_bc_add_alu(ctx->bc, &alu);
931 if (r)
932 return r;
933
934 chan = alu.dst.chan;
935 sel = alu.dst.sel;
936
937 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
938 memset(&alu, 0, sizeof(struct r600_bc_alu));
939 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
940 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
941 if (r)
942 return r;
943 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
944 alu.src[1].sel = sel;
945 alu.src[1].chan = chan;
946 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
947 if (r)
948 return r;
949 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
950 alu.dst.sel = ctx->temp_reg;
951 alu.dst.chan = 0;
952 alu.dst.write = 1;
953 alu.is_op3 = 1;
954 alu.last = 1;
955 r = r600_bc_add_alu(ctx->bc, &alu);
956 if (r)
957 return r;
958
959 /* dst.z = exp(tmp.x) */
960 memset(&alu, 0, sizeof(struct r600_bc_alu));
961 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
962 alu.src[0].sel = ctx->temp_reg;
963 alu.src[0].chan = 0;
964 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
965 if (r)
966 return r;
967 alu.last = 1;
968 r = r600_bc_add_alu(ctx->bc, &alu);
969 if (r)
970 return r;
971 }
972 return 0;
973 }
974
975 static int tgsi_trans(struct r600_shader_ctx *ctx)
976 {
977 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
978 struct r600_bc_alu alu;
979 int i, j, r;
980
981 for (i = 0; i < 4; i++) {
982 memset(&alu, 0, sizeof(struct r600_bc_alu));
983 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
984 alu.inst = ctx->inst_info->r600_opcode;
985 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
986 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
987 if (r)
988 return r;
989 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
990 }
991 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
992 if (r)
993 return r;
994 alu.last = 1;
995 r = r600_bc_add_alu(ctx->bc, &alu);
996 if (r)
997 return r;
998 }
999 }
1000 return 0;
1001 }
1002
1003 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1004 {
1005 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1006 struct r600_bc_alu alu;
1007 int i, j, r;
1008
1009 memset(&alu, 0, sizeof(struct r600_bc_alu));
1010 alu.inst = ctx->inst_info->r600_opcode;
1011 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1012 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1013 if (r)
1014 return r;
1015 alu.src[j].chan = tgsi_chan(&inst->Src[j], 0);
1016 }
1017 alu.dst.sel = ctx->temp_reg;
1018 alu.dst.write = 1;
1019 alu.last = 1;
1020 r = r600_bc_add_alu(ctx->bc, &alu);
1021 if (r)
1022 return r;
1023 /* replicate result */
1024 for (i = 0; i < 4; i++) {
1025 memset(&alu, 0, sizeof(struct r600_bc_alu));
1026 alu.src[0].sel = ctx->temp_reg;
1027 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1028 alu.dst.chan = i;
1029 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1030 if (r)
1031 return r;
1032 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1033 if (i == 3)
1034 alu.last = 1;
1035 r = r600_bc_add_alu(ctx->bc, &alu);
1036 if (r)
1037 return r;
1038 }
1039 return 0;
1040 }
1041
1042 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1043 {
1044 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1045 struct r600_bc_alu alu;
1046 struct r600_bc_alu_src r600_src[3];
1047 int i, j, r;
1048
1049 r = tgsi_split_constant(ctx, r600_src);
1050 if (r)
1051 return r;
1052
1053 /* tmp = (src > 0 ? 1 : src) */
1054 for (i = 0; i < 4; i++) {
1055 memset(&alu, 0, sizeof(struct r600_bc_alu));
1056 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1057 alu.is_op3 = 1;
1058 alu.dst.sel = ctx->temp_reg;
1059 alu.dst.write = 1;
1060
1061 alu.src[0] = r600_src[0];
1062 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1063
1064 alu.src[1].sel = SQ_ALU_SRC_1;
1065
1066 alu.src[2] = r600_src[0];
1067 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1068 if (i == 3)
1069 alu.last = 1;
1070 r = r600_bc_add_alu(ctx->bc, &alu);
1071 if (r)
1072 return r;
1073 }
1074
1075 /* dst = (-tmp > 0 ? -1 : tmp) */
1076 for (i = 0; i < 4; i++) {
1077 memset(&alu, 0, sizeof(struct r600_bc_alu));
1078 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1079 alu.is_op3 = 1;
1080 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1081 if (r)
1082 return r;
1083
1084 alu.src[0].sel = ctx->temp_reg;
1085 alu.src[0].neg = 1;
1086
1087 alu.src[1].sel = SQ_ALU_SRC_1;
1088 alu.src[1].neg = 1;
1089
1090 alu.src[2].sel = ctx->temp_reg;
1091
1092 alu.dst.write = 1;
1093 if (i == 3)
1094 alu.last = 1;
1095 r = r600_bc_add_alu(ctx->bc, &alu);
1096 if (r)
1097 return r;
1098 }
1099 return 0;
1100 }
1101
1102 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1103 {
1104 struct r600_bc_alu alu;
1105 int i, r;
1106
1107 r = r600_bc_add_literal(ctx->bc, ctx->value);
1108 if (r)
1109 return r;
1110 for (i = 0; i < 4; i++) {
1111 memset(&alu, 0, sizeof(struct r600_bc_alu));
1112 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1113 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1114 alu.dst.chan = i;
1115 } else {
1116 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1117 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1118 if (r)
1119 return r;
1120 alu.src[0].sel = ctx->temp_reg;
1121 alu.src[0].chan = i;
1122 }
1123 if (i == 3) {
1124 alu.last = 1;
1125 }
1126 r = r600_bc_add_alu(ctx->bc, &alu);
1127 if (r)
1128 return r;
1129 }
1130 return 0;
1131 }
1132
1133 static int tgsi_op3(struct r600_shader_ctx *ctx)
1134 {
1135 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1136 struct r600_bc_alu_src r600_src[3];
1137 struct r600_bc_alu alu;
1138 int i, j, r;
1139
1140 r = tgsi_split_constant(ctx, r600_src);
1141 if (r)
1142 return r;
1143 /* do it in 2 step as op3 doesn't support writemask */
1144 for (i = 0; i < 4; i++) {
1145 memset(&alu, 0, sizeof(struct r600_bc_alu));
1146 alu.inst = ctx->inst_info->r600_opcode;
1147 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1148 alu.src[j] = r600_src[j];
1149 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1150 }
1151 alu.dst.sel = ctx->temp_reg;
1152 alu.dst.chan = i;
1153 alu.dst.write = 1;
1154 alu.is_op3 = 1;
1155 if (i == 3) {
1156 alu.last = 1;
1157 }
1158 r = r600_bc_add_alu(ctx->bc, &alu);
1159 if (r)
1160 return r;
1161 }
1162 return tgsi_helper_copy(ctx, inst);
1163 }
1164
1165 static int tgsi_dp(struct r600_shader_ctx *ctx)
1166 {
1167 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1168 struct r600_bc_alu_src r600_src[3];
1169 struct r600_bc_alu alu;
1170 int i, j, r;
1171
1172 r = tgsi_split_constant(ctx, r600_src);
1173 if (r)
1174 return r;
1175 for (i = 0; i < 4; i++) {
1176 memset(&alu, 0, sizeof(struct r600_bc_alu));
1177 alu.inst = ctx->inst_info->r600_opcode;
1178 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1179 alu.src[j] = r600_src[j];
1180 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1181 }
1182 alu.dst.sel = ctx->temp_reg;
1183 alu.dst.chan = i;
1184 alu.dst.write = 1;
1185 /* handle some special cases */
1186 switch (ctx->inst_info->tgsi_opcode) {
1187 case TGSI_OPCODE_DP2:
1188 if (i > 1) {
1189 alu.src[0].sel = alu.src[1].sel = SQ_ALU_SRC_0;
1190 alu.src[0].chan = alu.src[1].chan = 0;
1191 }
1192 break;
1193 case TGSI_OPCODE_DP3:
1194 if (i > 2) {
1195 alu.src[0].sel = alu.src[1].sel = SQ_ALU_SRC_0;
1196 alu.src[0].chan = alu.src[1].chan = 0;
1197 }
1198 break;
1199 default:
1200 break;
1201 }
1202 if (i == 3) {
1203 alu.last = 1;
1204 }
1205 r = r600_bc_add_alu(ctx->bc, &alu);
1206 if (r)
1207 return r;
1208 }
1209 return tgsi_helper_copy(ctx, inst);
1210 }
1211
1212 static int tgsi_tex(struct r600_shader_ctx *ctx)
1213 {
1214 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1215 struct r600_bc_tex tex;
1216 struct r600_bc_alu alu;
1217 unsigned src_gpr;
1218 int r, i;
1219
1220 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1221
1222 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1223 /* Add perspective divide */
1224 memset(&alu, 0, sizeof(struct r600_bc_alu));
1225 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1226 alu.src[0].sel = src_gpr;
1227 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1228 alu.dst.sel = ctx->temp_reg;
1229 alu.dst.chan = 3;
1230 alu.last = 1;
1231 alu.dst.write = 1;
1232 r = r600_bc_add_alu(ctx->bc, &alu);
1233 if (r)
1234 return r;
1235
1236 for (i = 0; i < 3; i++) {
1237 memset(&alu, 0, sizeof(struct r600_bc_alu));
1238 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1239 alu.src[0].sel = ctx->temp_reg;
1240 alu.src[0].chan = 3;
1241 alu.src[1].sel = src_gpr;
1242 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1243 alu.dst.sel = ctx->temp_reg;
1244 alu.dst.chan = i;
1245 alu.dst.write = 1;
1246 r = r600_bc_add_alu(ctx->bc, &alu);
1247 if (r)
1248 return r;
1249 }
1250 memset(&alu, 0, sizeof(struct r600_bc_alu));
1251 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1252 alu.src[0].sel = SQ_ALU_SRC_1;
1253 alu.src[0].chan = 0;
1254 alu.dst.sel = ctx->temp_reg;
1255 alu.dst.chan = 3;
1256 alu.last = 1;
1257 alu.dst.write = 1;
1258 r = r600_bc_add_alu(ctx->bc, &alu);
1259 if (r)
1260 return r;
1261 src_gpr = ctx->temp_reg;
1262 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1263 for (i = 0; i < 4; i++) {
1264 memset(&alu, 0, sizeof(struct r600_bc_alu));
1265 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1266 alu.src[0].sel = src_gpr;
1267 alu.src[0].chan = i;
1268 alu.dst.sel = ctx->temp_reg;
1269 alu.dst.chan = i;
1270 if (i == 3)
1271 alu.last = 1;
1272 alu.dst.write = 1;
1273 r = r600_bc_add_alu(ctx->bc, &alu);
1274 if (r)
1275 return r;
1276 }
1277 src_gpr = ctx->temp_reg;
1278 }
1279
1280 memset(&tex, 0, sizeof(struct r600_bc_tex));
1281 tex.inst = ctx->inst_info->r600_opcode;
1282 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1283 tex.sampler_id = tex.resource_id;
1284 tex.src_gpr = src_gpr;
1285 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1286 tex.dst_sel_x = 0;
1287 tex.dst_sel_y = 1;
1288 tex.dst_sel_z = 2;
1289 tex.dst_sel_w = 3;
1290 tex.src_sel_x = 0;
1291 tex.src_sel_y = 1;
1292 tex.src_sel_z = 2;
1293 tex.src_sel_w = 3;
1294
1295 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1296 tex.coord_type_x = 1;
1297 tex.coord_type_y = 1;
1298 tex.coord_type_z = 1;
1299 tex.coord_type_w = 1;
1300 }
1301 return r600_bc_add_tex(ctx->bc, &tex);
1302 }
1303
1304 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1305 {
1306 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1307 struct r600_bc_alu_src r600_src[3];
1308 struct r600_bc_alu alu;
1309 unsigned i;
1310 int r;
1311
1312 r = tgsi_split_constant(ctx, r600_src);
1313 if (r)
1314 return r;
1315 /* 1 - src0 */
1316 for (i = 0; i < 4; i++) {
1317 memset(&alu, 0, sizeof(struct r600_bc_alu));
1318 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1319 alu.src[0].sel = SQ_ALU_SRC_1;
1320 alu.src[0].chan = 0;
1321 alu.src[1] = r600_src[0];
1322 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1323 alu.src[1].neg = 1;
1324 alu.dst.sel = ctx->temp_reg;
1325 alu.dst.chan = i;
1326 if (i == 3) {
1327 alu.last = 1;
1328 }
1329 alu.dst.write = 1;
1330 r = r600_bc_add_alu(ctx->bc, &alu);
1331 if (r)
1332 return r;
1333 }
1334 r = r600_bc_add_literal(ctx->bc, ctx->value);
1335 if (r)
1336 return r;
1337
1338 /* (1 - src0) * src2 */
1339 for (i = 0; i < 4; i++) {
1340 memset(&alu, 0, sizeof(struct r600_bc_alu));
1341 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1342 alu.src[0].sel = ctx->temp_reg;
1343 alu.src[0].chan = i;
1344 alu.src[1] = r600_src[2];
1345 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1346 alu.dst.sel = ctx->temp_reg;
1347 alu.dst.chan = i;
1348 if (i == 3) {
1349 alu.last = 1;
1350 }
1351 alu.dst.write = 1;
1352 r = r600_bc_add_alu(ctx->bc, &alu);
1353 if (r)
1354 return r;
1355 }
1356 r = r600_bc_add_literal(ctx->bc, ctx->value);
1357 if (r)
1358 return r;
1359
1360 /* src0 * src1 + (1 - src0) * src2 */
1361 for (i = 0; i < 4; i++) {
1362 memset(&alu, 0, sizeof(struct r600_bc_alu));
1363 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1364 alu.is_op3 = 1;
1365 alu.src[0] = r600_src[0];
1366 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1367 alu.src[1] = r600_src[1];
1368 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1369 alu.src[2].sel = ctx->temp_reg;
1370 alu.src[2].chan = i;
1371 alu.dst.sel = ctx->temp_reg;
1372 alu.dst.chan = i;
1373 if (i == 3) {
1374 alu.last = 1;
1375 }
1376 r = r600_bc_add_alu(ctx->bc, &alu);
1377 if (r)
1378 return r;
1379 }
1380 return tgsi_helper_copy(ctx, inst);
1381 }
1382
1383 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1384 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1385 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1386 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1387 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1388 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1389 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1390 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1391 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1392 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1393 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1394 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1395 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1396 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1397 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1398 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1399 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
1400 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1401 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1402 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1403 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1404 /* gap */
1405 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1406 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1407 /* gap */
1408 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1409 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1410 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
1411 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1412 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
1413 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1414 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1415 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
1416 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1417 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1418 /* gap */
1419 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1420 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1421 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1422 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1423 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
1424 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
1425 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
1426 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1427 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1428 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1429 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1430 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1431 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1432 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
1433 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1434 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
1435 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
1436 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
1437 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
1438 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1439 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1440 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1441 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1442 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1443 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1444 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1445 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1446 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1447 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1448 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1449 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1450 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1451 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1452 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
1453 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1454 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1455 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
1456 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1457 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1458 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1459 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1460 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1461 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1462 /* gap */
1463 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1464 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1465 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1466 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1467 /* gap */
1468 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1469 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1470 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1471 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1472 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1473 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1474 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1475 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
1476 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1477 /* gap */
1478 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1479 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1480 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1481 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1482 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1483 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1484 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1485 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1486 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1487 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1488 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1489 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1490 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1491 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1492 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1493 /* gap */
1494 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1495 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1496 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1497 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1498 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1499 /* gap */
1500 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1501 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1502 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1503 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1504 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1505 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1506 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1507 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1508 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1509 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1510 /* gap */
1511 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1512 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1513 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1514 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1515 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1516 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1517 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1518 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1519 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1520 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1521 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1522 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1523 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1524 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1525 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1526 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1527 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1528 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1529 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1530 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1531 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1532 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1533 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1534 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1535 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1536 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1537 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1538 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1539 };