r600g: add XPD support
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 //fprintf(stderr, "--------------------------------------------------------------\n");
109 //tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 //fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
154 return radeon_state_pm4(state);
155 }
156
157 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
158 {
159 const struct pipe_rasterizer_state *rasterizer;
160 struct r600_screen *rscreen = r600_screen(ctx->screen);
161 struct r600_shader *rshader = &rpshader->shader;
162 struct r600_context *rctx = r600_context(ctx);
163 struct radeon_state *state;
164 unsigned i, tmp, exports_ps, num_cout;
165
166 rasterizer = &rctx->rasterizer->state.rasterizer;
167 rpshader->rstate = radeon_state_decref(rpshader->rstate);
168 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
169 if (state == NULL)
170 return -ENOMEM;
171 for (i = 0; i < rshader->ninput; i++) {
172 tmp = S_028644_SEMANTIC(i);
173 tmp |= S_028644_SEL_CENTROID(1);
174 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
175 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
176 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
177 }
178 if (rasterizer->sprite_coord_enable & (1 << i)) {
179 tmp |= S_028644_PT_SPRITE_TEX(1);
180 }
181 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
182 }
183
184 exports_ps = 0;
185 num_cout = 0;
186 for (i = 0; i < rshader->noutput; i++) {
187 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
188 exports_ps |= 1;
189 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
190 exports_ps |= (1 << (num_cout+1));
191 num_cout++;
192 }
193 }
194 if (!exports_ps) {
195 /* always at least export 1 component per pixel */
196 exports_ps = 2;
197 }
198 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
199 S_0286CC_PERSP_GRADIENT_ENA(1);
200 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
201 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
202 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
203 rpshader->rstate = state;
204 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
205 rpshader->rstate->nbo = 1;
206 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
207 return radeon_state_pm4(state);
208 }
209
210 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
211 {
212 struct r600_screen *rscreen = r600_screen(ctx->screen);
213 struct r600_context *rctx = r600_context(ctx);
214 struct r600_shader *rshader = &rpshader->shader;
215 int r;
216
217 /* copy new shader */
218 radeon_bo_decref(rscreen->rw, rpshader->bo);
219 rpshader->bo = NULL;
220 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
221 4096, NULL);
222 if (rpshader->bo == NULL) {
223 return -ENOMEM;
224 }
225 radeon_bo_map(rscreen->rw, rpshader->bo);
226 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
227 radeon_bo_unmap(rscreen->rw, rpshader->bo);
228 /* build state */
229 rshader->flat_shade = rctx->flat_shade;
230 switch (rshader->processor_type) {
231 case TGSI_PROCESSOR_VERTEX:
232 r = r600_pipe_shader_vs(ctx, rpshader);
233 break;
234 case TGSI_PROCESSOR_FRAGMENT:
235 r = r600_pipe_shader_ps(ctx, rpshader);
236 break;
237 default:
238 r = -EINVAL;
239 break;
240 }
241 return r;
242 }
243
244 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
245 {
246 struct r600_context *rctx = r600_context(ctx);
247 int r;
248
249 if (rpshader == NULL)
250 return -EINVAL;
251 /* there should be enough input */
252 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
253 R600_ERR("%d resources provided, expecting %d\n",
254 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
255 return -EINVAL;
256 }
257 r = r600_shader_update(ctx, &rpshader->shader);
258 if (r)
259 return r;
260 return r600_pipe_shader(ctx, rpshader);
261 }
262
263 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
264 {
265 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
266 int j;
267
268 if (i->Instruction.NumDstRegs > 1) {
269 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
270 return -EINVAL;
271 }
272 if (i->Instruction.Predicate) {
273 R600_ERR("predicate unsupported\n");
274 return -EINVAL;
275 }
276 if (i->Instruction.Label) {
277 R600_ERR("label unsupported\n");
278 return -EINVAL;
279 }
280 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
281 if (i->Src[j].Register.Indirect ||
282 i->Src[j].Register.Dimension ||
283 i->Src[j].Register.Absolute) {
284 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
285 return -EINVAL;
286 }
287 }
288 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
289 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
290 R600_ERR("unsupported dst (indirect|dimension)\n");
291 return -EINVAL;
292 }
293 }
294 return 0;
295 }
296
297 static int tgsi_declaration(struct r600_shader_ctx *ctx)
298 {
299 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
300 struct r600_bc_vtx vtx;
301 unsigned i;
302 int r;
303
304 switch (d->Declaration.File) {
305 case TGSI_FILE_INPUT:
306 i = ctx->shader->ninput++;
307 ctx->shader->input[i].name = d->Semantic.Name;
308 ctx->shader->input[i].sid = d->Semantic.Index;
309 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
310 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
311 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
312 /* turn input into fetch */
313 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
314 vtx.inst = 0;
315 vtx.fetch_type = 0;
316 vtx.buffer_id = i;
317 /* register containing the index into the buffer */
318 vtx.src_gpr = 0;
319 vtx.src_sel_x = 0;
320 vtx.mega_fetch_count = 0x1F;
321 vtx.dst_gpr = ctx->shader->input[i].gpr;
322 vtx.dst_sel_x = 0;
323 vtx.dst_sel_y = 1;
324 vtx.dst_sel_z = 2;
325 vtx.dst_sel_w = 3;
326 r = r600_bc_add_vtx(ctx->bc, &vtx);
327 if (r)
328 return r;
329 }
330 break;
331 case TGSI_FILE_OUTPUT:
332 i = ctx->shader->noutput++;
333 ctx->shader->output[i].name = d->Semantic.Name;
334 ctx->shader->output[i].sid = d->Semantic.Index;
335 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
336 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
337 break;
338 case TGSI_FILE_CONSTANT:
339 case TGSI_FILE_TEMPORARY:
340 case TGSI_FILE_SAMPLER:
341 break;
342 default:
343 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
344 return -EINVAL;
345 }
346 return 0;
347 }
348
349 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
350 {
351 struct tgsi_full_immediate *immediate;
352 struct r600_shader_ctx ctx;
353 struct r600_bc_output output[32];
354 unsigned output_done, noutput;
355 unsigned opcode;
356 int i, r = 0, pos0;
357
358 ctx.bc = &shader->bc;
359 ctx.shader = shader;
360 r = r600_bc_init(ctx.bc, shader->family);
361 if (r)
362 return r;
363 ctx.tokens = tokens;
364 tgsi_scan_shader(tokens, &ctx.info);
365 tgsi_parse_init(&ctx.parse, tokens);
366 ctx.type = ctx.parse.FullHeader.Processor.Processor;
367 shader->processor_type = ctx.type;
368
369 /* register allocations */
370 /* Values [0,127] correspond to GPR[0..127].
371 * Values [128,159] correspond to constant buffer bank 0
372 * Values [160,191] correspond to constant buffer bank 1
373 * Values [256,511] correspond to cfile constants c[0..255].
374 * Other special values are shown in the list below.
375 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
376 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
377 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
378 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
379 * 248 SQ_ALU_SRC_0: special constant 0.0.
380 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
381 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
382 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
383 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
384 * 253 SQ_ALU_SRC_LITERAL: literal constant.
385 * 254 SQ_ALU_SRC_PV: previous vector result.
386 * 255 SQ_ALU_SRC_PS: previous scalar result.
387 */
388 for (i = 0; i < TGSI_FILE_COUNT; i++) {
389 ctx.file_offset[i] = 0;
390 }
391 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
392 ctx.file_offset[TGSI_FILE_INPUT] = 1;
393 }
394 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
395 ctx.info.file_count[TGSI_FILE_INPUT];
396 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
397 ctx.info.file_count[TGSI_FILE_OUTPUT];
398 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
399 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
400 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
401 ctx.info.file_count[TGSI_FILE_TEMPORARY];
402
403 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
404 tgsi_parse_token(&ctx.parse);
405 switch (ctx.parse.FullToken.Token.Type) {
406 case TGSI_TOKEN_TYPE_IMMEDIATE:
407 immediate = &ctx.parse.FullToken.FullImmediate;
408 ctx.value[0] = immediate->u[0].Uint;
409 ctx.value[1] = immediate->u[1].Uint;
410 ctx.value[2] = immediate->u[2].Uint;
411 ctx.value[3] = immediate->u[3].Uint;
412 break;
413 case TGSI_TOKEN_TYPE_DECLARATION:
414 r = tgsi_declaration(&ctx);
415 if (r)
416 goto out_err;
417 break;
418 case TGSI_TOKEN_TYPE_INSTRUCTION:
419 r = tgsi_is_supported(&ctx);
420 if (r)
421 goto out_err;
422 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
423 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
424 r = ctx.inst_info->process(&ctx);
425 if (r)
426 goto out_err;
427 r = r600_bc_add_literal(ctx.bc, ctx.value);
428 if (r)
429 goto out_err;
430 break;
431 default:
432 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
433 r = -EINVAL;
434 goto out_err;
435 }
436 }
437 /* export output */
438 noutput = shader->noutput;
439 for (i = 0, pos0 = 0; i < noutput; i++) {
440 memset(&output[i], 0, sizeof(struct r600_bc_output));
441 output[i].gpr = shader->output[i].gpr;
442 output[i].elem_size = 3;
443 output[i].swizzle_x = 0;
444 output[i].swizzle_y = 1;
445 output[i].swizzle_z = 2;
446 output[i].swizzle_w = 3;
447 output[i].barrier = 1;
448 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
449 output[i].array_base = i - pos0;
450 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
451 switch (ctx.type) {
452 case TGSI_PROCESSOR_VERTEX:
453 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
454 output[i].array_base = 60;
455 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
456 /* position doesn't count in array_base */
457 pos0++;
458 }
459 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
460 output[i].array_base = 61;
461 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
462 /* position doesn't count in array_base */
463 pos0++;
464 }
465 break;
466 case TGSI_PROCESSOR_FRAGMENT:
467 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
468 output[i].array_base = shader->output[i].sid;
469 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
470 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
471 output[i].array_base = 61;
472 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
473 } else {
474 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
475 r = -EINVAL;
476 goto out_err;
477 }
478 break;
479 default:
480 R600_ERR("unsupported processor type %d\n", ctx.type);
481 r = -EINVAL;
482 goto out_err;
483 }
484 }
485 /* add fake param output for vertex shader if no param is exported */
486 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
487 for (i = 0, pos0 = 0; i < noutput; i++) {
488 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
489 pos0 = 1;
490 break;
491 }
492 }
493 if (!pos0) {
494 memset(&output[i], 0, sizeof(struct r600_bc_output));
495 output[i].gpr = 0;
496 output[i].elem_size = 3;
497 output[i].swizzle_x = 0;
498 output[i].swizzle_y = 1;
499 output[i].swizzle_z = 2;
500 output[i].swizzle_w = 3;
501 output[i].barrier = 1;
502 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
503 output[i].array_base = 0;
504 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
505 noutput++;
506 }
507 }
508 /* add fake pixel export */
509 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
510 memset(&output[0], 0, sizeof(struct r600_bc_output));
511 output[0].gpr = 0;
512 output[0].elem_size = 3;
513 output[0].swizzle_x = 7;
514 output[0].swizzle_y = 7;
515 output[0].swizzle_z = 7;
516 output[0].swizzle_w = 7;
517 output[0].barrier = 1;
518 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
519 output[0].array_base = 0;
520 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
521 noutput++;
522 }
523 /* set export done on last export of each type */
524 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
525 if (i == (noutput - 1)) {
526 output[i].end_of_program = 1;
527 }
528 if (!(output_done & (1 << output[i].type))) {
529 output_done |= (1 << output[i].type);
530 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
531 }
532 }
533 /* add output to bytecode */
534 for (i = 0; i < noutput; i++) {
535 r = r600_bc_add_output(ctx.bc, &output[i]);
536 if (r)
537 goto out_err;
538 }
539 tgsi_parse_free(&ctx.parse);
540 return 0;
541 out_err:
542 tgsi_parse_free(&ctx.parse);
543 return r;
544 }
545
546 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
547 {
548 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
549 return -EINVAL;
550 }
551
552 static int tgsi_end(struct r600_shader_ctx *ctx)
553 {
554 return 0;
555 }
556
557 static int tgsi_src(struct r600_shader_ctx *ctx,
558 const struct tgsi_full_src_register *tgsi_src,
559 struct r600_bc_alu_src *r600_src)
560 {
561 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
562 r600_src->sel = tgsi_src->Register.Index;
563 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
564 r600_src->sel = 0;
565 }
566 r600_src->neg = tgsi_src->Register.Negate;
567 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
568 return 0;
569 }
570
571 static int tgsi_dst(struct r600_shader_ctx *ctx,
572 const struct tgsi_full_dst_register *tgsi_dst,
573 unsigned swizzle,
574 struct r600_bc_alu_dst *r600_dst)
575 {
576 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
577
578 r600_dst->sel = tgsi_dst->Register.Index;
579 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
580 r600_dst->chan = swizzle;
581 r600_dst->write = 1;
582 if (inst->Instruction.Saturate) {
583 r600_dst->clamp = 1;
584 }
585 return 0;
586 }
587
588 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
589 {
590 switch (swizzle) {
591 case 0:
592 return tgsi_src->Register.SwizzleX;
593 case 1:
594 return tgsi_src->Register.SwizzleY;
595 case 2:
596 return tgsi_src->Register.SwizzleZ;
597 case 3:
598 return tgsi_src->Register.SwizzleW;
599 default:
600 return 0;
601 }
602 }
603
604 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
605 {
606 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
607 struct r600_bc_alu alu;
608 int i, j, k, nconst, r;
609
610 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
611 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
612 nconst++;
613 }
614 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
615 if (r) {
616 return r;
617 }
618 }
619 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
620 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
621 for (k = 0; k < 4; k++) {
622 memset(&alu, 0, sizeof(struct r600_bc_alu));
623 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
624 alu.src[0].sel = r600_src[0].sel;
625 alu.src[0].chan = k;
626 alu.dst.sel = ctx->temp_reg + j;
627 alu.dst.chan = k;
628 alu.dst.write = 1;
629 if (k == 3)
630 alu.last = 1;
631 r = r600_bc_add_alu(ctx->bc, &alu);
632 if (r)
633 return r;
634 }
635 r600_src[0].sel = ctx->temp_reg + j;
636 j--;
637 }
638 }
639 return 0;
640 }
641
642 static int tgsi_op2(struct r600_shader_ctx *ctx)
643 {
644 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
645 struct r600_bc_alu_src r600_src[3];
646 struct r600_bc_alu alu;
647 int i, j, r;
648
649 r = tgsi_split_constant(ctx, r600_src);
650 if (r)
651 return r;
652 for (i = 0; i < 4; i++) {
653 memset(&alu, 0, sizeof(struct r600_bc_alu));
654 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
655 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
656 alu.dst.chan = i;
657 } else {
658 alu.inst = ctx->inst_info->r600_opcode;
659 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
660 alu.src[j] = r600_src[j];
661 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
662 }
663 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
664 if (r)
665 return r;
666 }
667 /* handle some special cases */
668 switch (ctx->inst_info->tgsi_opcode) {
669 case TGSI_OPCODE_SUB:
670 alu.src[1].neg = 1;
671 break;
672 case TGSI_OPCODE_ABS:
673 alu.src[0].abs = 1;
674 break;
675 default:
676 break;
677 }
678 if (i == 3) {
679 alu.last = 1;
680 }
681 r = r600_bc_add_alu(ctx->bc, &alu);
682 if (r)
683 return r;
684 }
685 return 0;
686 }
687
688 /*
689 * r600 - trunc to -PI..PI range
690 * r700 - normalize by dividing by 2PI
691 * see fdo bug 27901
692 */
693 static int tgsi_trig(struct r600_shader_ctx *ctx)
694 {
695 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
696 struct r600_bc_alu_src r600_src[3];
697 struct r600_bc_alu alu;
698 int i, r;
699 uint32_t lit_vals[4];
700
701 memset(lit_vals, 0, 4*4);
702 r = tgsi_split_constant(ctx, r600_src);
703 if (r)
704 return r;
705 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
706 lit_vals[1] = fui(0.5f);
707
708 memset(&alu, 0, sizeof(struct r600_bc_alu));
709 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
710 alu.is_op3 = 1;
711
712 alu.dst.chan = 0;
713 alu.dst.sel = ctx->temp_reg;
714 alu.dst.write = 1;
715
716 alu.src[0] = r600_src[0];
717 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
718
719 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
720 alu.src[1].chan = 0;
721 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
722 alu.src[2].chan = 1;
723 alu.last = 1;
724 r = r600_bc_add_alu(ctx->bc, &alu);
725 if (r)
726 return r;
727 r = r600_bc_add_literal(ctx->bc, lit_vals);
728 if (r)
729 return r;
730
731 memset(&alu, 0, sizeof(struct r600_bc_alu));
732 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
733
734 alu.dst.chan = 0;
735 alu.dst.sel = ctx->temp_reg;
736 alu.dst.write = 1;
737
738 alu.src[0].sel = ctx->temp_reg;
739 alu.src[0].chan = 0;
740 alu.last = 1;
741 r = r600_bc_add_alu(ctx->bc, &alu);
742 if (r)
743 return r;
744
745 if (ctx->bc->chiprev == 0) {
746 lit_vals[0] = fui(3.1415926535897f * 2.0f);
747 lit_vals[1] = fui(-3.1415926535897f);
748 } else {
749 lit_vals[0] = fui(1.0f);
750 lit_vals[1] = fui(-0.5f);
751 }
752
753 memset(&alu, 0, sizeof(struct r600_bc_alu));
754 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
755 alu.is_op3 = 1;
756
757 alu.dst.chan = 0;
758 alu.dst.sel = ctx->temp_reg;
759 alu.dst.write = 1;
760
761 alu.src[0].sel = ctx->temp_reg;
762 alu.src[0].chan = 0;
763
764 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
765 alu.src[1].chan = 0;
766 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
767 alu.src[2].chan = 1;
768 alu.last = 1;
769 r = r600_bc_add_alu(ctx->bc, &alu);
770 if (r)
771 return r;
772 r = r600_bc_add_literal(ctx->bc, lit_vals);
773 if (r)
774 return r;
775
776 memset(&alu, 0, sizeof(struct r600_bc_alu));
777 alu.inst = ctx->inst_info->r600_opcode;
778 alu.dst.chan = 0;
779 alu.dst.sel = ctx->temp_reg;
780 alu.dst.write = 1;
781
782 alu.src[0].sel = ctx->temp_reg;
783 alu.src[0].chan = 0;
784 alu.last = 1;
785 r = r600_bc_add_alu(ctx->bc, &alu);
786 if (r)
787 return r;
788
789 /* replicate result */
790 for (i = 0; i < 4; i++) {
791 memset(&alu, 0, sizeof(struct r600_bc_alu));
792 alu.src[0].sel = ctx->temp_reg;
793 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
794 alu.dst.chan = i;
795 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
796 if (r)
797 return r;
798 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
799 if (i == 3)
800 alu.last = 1;
801 r = r600_bc_add_alu(ctx->bc, &alu);
802 if (r)
803 return r;
804 }
805 return 0;
806 }
807
808 static int tgsi_kill(struct r600_shader_ctx *ctx)
809 {
810 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
811 struct r600_bc_alu alu;
812 int i, r;
813
814 for (i = 0; i < 4; i++) {
815 memset(&alu, 0, sizeof(struct r600_bc_alu));
816 alu.inst = ctx->inst_info->r600_opcode;
817 alu.dst.chan = i;
818 alu.src[0].sel = V_SQ_ALU_SRC_0;
819 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
820 if (r)
821 return r;
822 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
823 if (i == 3) {
824 alu.last = 1;
825 }
826 r = r600_bc_add_alu(ctx->bc, &alu);
827 if (r)
828 return r;
829 }
830 return 0;
831 }
832
833 static int tgsi_slt(struct r600_shader_ctx *ctx)
834 {
835 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
836 struct r600_bc_alu_src r600_src[3];
837 struct r600_bc_alu alu;
838 int i, r;
839
840 r = tgsi_split_constant(ctx, r600_src);
841 if (r)
842 return r;
843 for (i = 0; i < 4; i++) {
844 memset(&alu, 0, sizeof(struct r600_bc_alu));
845 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
846 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
847 alu.dst.chan = i;
848 } else {
849 alu.inst = ctx->inst_info->r600_opcode;
850 alu.src[1] = r600_src[0];
851 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
852 alu.src[0] = r600_src[1];
853 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
854 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
855 if (r)
856 return r;
857 }
858 if (i == 3) {
859 alu.last = 1;
860 }
861 r = r600_bc_add_alu(ctx->bc, &alu);
862 if (r)
863 return r;
864 }
865 return 0;
866 }
867
868 static int tgsi_lit(struct r600_shader_ctx *ctx)
869 {
870 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
871 struct r600_bc_alu alu;
872 int r;
873
874 /* dst.x, <- 1.0 */
875 memset(&alu, 0, sizeof(struct r600_bc_alu));
876 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
877 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
878 alu.src[0].chan = 0;
879 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
880 if (r)
881 return r;
882 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
883 r = r600_bc_add_alu(ctx->bc, &alu);
884 if (r)
885 return r;
886
887 /* dst.y = max(src.x, 0.0) */
888 memset(&alu, 0, sizeof(struct r600_bc_alu));
889 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
890 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
891 if (r)
892 return r;
893 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
894 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
895 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
896 if (r)
897 return r;
898 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
899 r = r600_bc_add_alu(ctx->bc, &alu);
900 if (r)
901 return r;
902
903 /* dst.z = NOP - fill Z slot */
904 memset(&alu, 0, sizeof(struct r600_bc_alu));
905 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
906 alu.dst.chan = 2;
907 r = r600_bc_add_alu(ctx->bc, &alu);
908 if (r)
909 return r;
910
911 /* dst.w, <- 1.0 */
912 memset(&alu, 0, sizeof(struct r600_bc_alu));
913 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
914 alu.src[0].sel = V_SQ_ALU_SRC_1;
915 alu.src[0].chan = 0;
916 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
917 if (r)
918 return r;
919 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
920 alu.last = 1;
921 r = r600_bc_add_alu(ctx->bc, &alu);
922 if (r)
923 return r;
924
925 if (inst->Dst[0].Register.WriteMask & (1 << 2))
926 {
927 int chan;
928 int sel;
929
930 /* dst.z = log(src.y) */
931 memset(&alu, 0, sizeof(struct r600_bc_alu));
932 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
933 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
934 if (r)
935 return r;
936 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
937 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
938 if (r)
939 return r;
940 alu.last = 1;
941 r = r600_bc_add_alu(ctx->bc, &alu);
942 if (r)
943 return r;
944
945 chan = alu.dst.chan;
946 sel = alu.dst.sel;
947
948 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
949 memset(&alu, 0, sizeof(struct r600_bc_alu));
950 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
951 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
952 if (r)
953 return r;
954 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
955 alu.src[1].sel = sel;
956 alu.src[1].chan = chan;
957 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
958 if (r)
959 return r;
960 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
961 alu.dst.sel = ctx->temp_reg;
962 alu.dst.chan = 0;
963 alu.dst.write = 1;
964 alu.is_op3 = 1;
965 alu.last = 1;
966 r = r600_bc_add_alu(ctx->bc, &alu);
967 if (r)
968 return r;
969
970 /* dst.z = exp(tmp.x) */
971 memset(&alu, 0, sizeof(struct r600_bc_alu));
972 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
973 alu.src[0].sel = ctx->temp_reg;
974 alu.src[0].chan = 0;
975 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
976 if (r)
977 return r;
978 alu.last = 1;
979 r = r600_bc_add_alu(ctx->bc, &alu);
980 if (r)
981 return r;
982 }
983 return 0;
984 }
985
986 static int tgsi_trans(struct r600_shader_ctx *ctx)
987 {
988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
989 struct r600_bc_alu alu;
990 int i, j, r;
991
992 for (i = 0; i < 4; i++) {
993 memset(&alu, 0, sizeof(struct r600_bc_alu));
994 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
995 alu.inst = ctx->inst_info->r600_opcode;
996 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
997 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
998 if (r)
999 return r;
1000 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1001 }
1002 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1003 if (r)
1004 return r;
1005 alu.last = 1;
1006 r = r600_bc_add_alu(ctx->bc, &alu);
1007 if (r)
1008 return r;
1009 }
1010 }
1011 return 0;
1012 }
1013
1014 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1015 {
1016 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1017 struct r600_bc_alu alu;
1018 int i, r;
1019
1020 for (i = 0; i < 4; i++) {
1021 memset(&alu, 0, sizeof(struct r600_bc_alu));
1022 alu.src[0].sel = ctx->temp_reg;
1023 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1024 alu.dst.chan = i;
1025 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1026 if (r)
1027 return r;
1028 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1029 if (i == 3)
1030 alu.last = 1;
1031 r = r600_bc_add_alu(ctx->bc, &alu);
1032 if (r)
1033 return r;
1034 }
1035 return 0;
1036 }
1037
1038 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1039 {
1040 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1041 struct r600_bc_alu alu;
1042 int i, r;
1043
1044 memset(&alu, 0, sizeof(struct r600_bc_alu));
1045 alu.inst = ctx->inst_info->r600_opcode;
1046 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1047 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1048 if (r)
1049 return r;
1050 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1051 }
1052 alu.dst.sel = ctx->temp_reg;
1053 alu.dst.write = 1;
1054 alu.last = 1;
1055 r = r600_bc_add_alu(ctx->bc, &alu);
1056 if (r)
1057 return r;
1058 /* replicate result */
1059 return tgsi_helper_tempx_replicate(ctx);
1060 }
1061
1062 static int tgsi_pow(struct r600_shader_ctx *ctx)
1063 {
1064 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1065 struct r600_bc_alu alu;
1066 int r;
1067
1068 /* LOG2(a) */
1069 memset(&alu, 0, sizeof(struct r600_bc_alu));
1070 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1071 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1072 if (r)
1073 return r;
1074 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1075 alu.dst.sel = ctx->temp_reg;
1076 alu.dst.write = 1;
1077 alu.last = 1;
1078 r = r600_bc_add_alu(ctx->bc, &alu);
1079 if (r)
1080 return r;
1081 /* b * LOG2(a) */
1082 memset(&alu, 0, sizeof(struct r600_bc_alu));
1083 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1084 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1085 if (r)
1086 return r;
1087 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1088 alu.src[1].sel = ctx->temp_reg;
1089 alu.dst.sel = ctx->temp_reg;
1090 alu.dst.write = 1;
1091 alu.last = 1;
1092 r = r600_bc_add_alu(ctx->bc, &alu);
1093 if (r)
1094 return r;
1095 /* POW(a,b) = EXP2(b * LOG2(a))*/
1096 memset(&alu, 0, sizeof(struct r600_bc_alu));
1097 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1098 alu.src[0].sel = ctx->temp_reg;
1099 alu.dst.sel = ctx->temp_reg;
1100 alu.dst.write = 1;
1101 alu.last = 1;
1102 r = r600_bc_add_alu(ctx->bc, &alu);
1103 if (r)
1104 return r;
1105 return tgsi_helper_tempx_replicate(ctx);
1106 }
1107
1108 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1109 {
1110 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1111 struct r600_bc_alu alu;
1112 struct r600_bc_alu_src r600_src[3];
1113 int i, r;
1114
1115 r = tgsi_split_constant(ctx, r600_src);
1116 if (r)
1117 return r;
1118
1119 /* tmp = (src > 0 ? 1 : src) */
1120 for (i = 0; i < 4; i++) {
1121 memset(&alu, 0, sizeof(struct r600_bc_alu));
1122 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1123 alu.is_op3 = 1;
1124 alu.dst.sel = ctx->temp_reg;
1125 alu.dst.write = 1;
1126
1127 alu.src[0] = r600_src[0];
1128 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1129
1130 alu.src[1].sel = V_SQ_ALU_SRC_1;
1131
1132 alu.src[2] = r600_src[0];
1133 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1134 if (i == 3)
1135 alu.last = 1;
1136 r = r600_bc_add_alu(ctx->bc, &alu);
1137 if (r)
1138 return r;
1139 }
1140
1141 /* dst = (-tmp > 0 ? -1 : tmp) */
1142 for (i = 0; i < 4; i++) {
1143 memset(&alu, 0, sizeof(struct r600_bc_alu));
1144 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1145 alu.is_op3 = 1;
1146 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1147 if (r)
1148 return r;
1149
1150 alu.src[0].sel = ctx->temp_reg;
1151 alu.src[0].neg = 1;
1152
1153 alu.src[1].sel = V_SQ_ALU_SRC_1;
1154 alu.src[1].neg = 1;
1155
1156 alu.src[2].sel = ctx->temp_reg;
1157
1158 alu.dst.write = 1;
1159 if (i == 3)
1160 alu.last = 1;
1161 r = r600_bc_add_alu(ctx->bc, &alu);
1162 if (r)
1163 return r;
1164 }
1165 return 0;
1166 }
1167
1168 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1169 {
1170 struct r600_bc_alu alu;
1171 int i, r;
1172
1173 r = r600_bc_add_literal(ctx->bc, ctx->value);
1174 if (r)
1175 return r;
1176 for (i = 0; i < 4; i++) {
1177 memset(&alu, 0, sizeof(struct r600_bc_alu));
1178 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1179 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1180 alu.dst.chan = i;
1181 } else {
1182 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1183 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1184 if (r)
1185 return r;
1186 alu.src[0].sel = ctx->temp_reg;
1187 alu.src[0].chan = i;
1188 }
1189 if (i == 3) {
1190 alu.last = 1;
1191 }
1192 r = r600_bc_add_alu(ctx->bc, &alu);
1193 if (r)
1194 return r;
1195 }
1196 return 0;
1197 }
1198
1199 static int tgsi_op3(struct r600_shader_ctx *ctx)
1200 {
1201 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1202 struct r600_bc_alu_src r600_src[3];
1203 struct r600_bc_alu alu;
1204 int i, j, r;
1205
1206 r = tgsi_split_constant(ctx, r600_src);
1207 if (r)
1208 return r;
1209 /* do it in 2 step as op3 doesn't support writemask */
1210 for (i = 0; i < 4; i++) {
1211 memset(&alu, 0, sizeof(struct r600_bc_alu));
1212 alu.inst = ctx->inst_info->r600_opcode;
1213 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1214 alu.src[j] = r600_src[j];
1215 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1216 }
1217 alu.dst.sel = ctx->temp_reg;
1218 alu.dst.chan = i;
1219 alu.dst.write = 1;
1220 alu.is_op3 = 1;
1221 if (i == 3) {
1222 alu.last = 1;
1223 }
1224 r = r600_bc_add_alu(ctx->bc, &alu);
1225 if (r)
1226 return r;
1227 }
1228 return tgsi_helper_copy(ctx, inst);
1229 }
1230
1231 static int tgsi_dp(struct r600_shader_ctx *ctx)
1232 {
1233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1234 struct r600_bc_alu_src r600_src[3];
1235 struct r600_bc_alu alu;
1236 int i, j, r;
1237
1238 r = tgsi_split_constant(ctx, r600_src);
1239 if (r)
1240 return r;
1241 for (i = 0; i < 4; i++) {
1242 memset(&alu, 0, sizeof(struct r600_bc_alu));
1243 alu.inst = ctx->inst_info->r600_opcode;
1244 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1245 alu.src[j] = r600_src[j];
1246 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1247 }
1248 alu.dst.sel = ctx->temp_reg;
1249 alu.dst.chan = i;
1250 alu.dst.write = 1;
1251 /* handle some special cases */
1252 switch (ctx->inst_info->tgsi_opcode) {
1253 case TGSI_OPCODE_DP2:
1254 if (i > 1) {
1255 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1256 alu.src[0].chan = alu.src[1].chan = 0;
1257 }
1258 break;
1259 case TGSI_OPCODE_DP3:
1260 if (i > 2) {
1261 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1262 alu.src[0].chan = alu.src[1].chan = 0;
1263 }
1264 break;
1265 default:
1266 break;
1267 }
1268 if (i == 3) {
1269 alu.last = 1;
1270 }
1271 r = r600_bc_add_alu(ctx->bc, &alu);
1272 if (r)
1273 return r;
1274 }
1275 return tgsi_helper_copy(ctx, inst);
1276 }
1277
1278 static int tgsi_tex(struct r600_shader_ctx *ctx)
1279 {
1280 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1281 struct r600_bc_tex tex;
1282 struct r600_bc_alu alu;
1283 unsigned src_gpr;
1284 int r, i;
1285
1286 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1287
1288 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1289 /* Add perspective divide */
1290 memset(&alu, 0, sizeof(struct r600_bc_alu));
1291 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1292 alu.src[0].sel = src_gpr;
1293 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1294 alu.dst.sel = ctx->temp_reg;
1295 alu.dst.chan = 3;
1296 alu.last = 1;
1297 alu.dst.write = 1;
1298 r = r600_bc_add_alu(ctx->bc, &alu);
1299 if (r)
1300 return r;
1301
1302 for (i = 0; i < 3; i++) {
1303 memset(&alu, 0, sizeof(struct r600_bc_alu));
1304 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1305 alu.src[0].sel = ctx->temp_reg;
1306 alu.src[0].chan = 3;
1307 alu.src[1].sel = src_gpr;
1308 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1309 alu.dst.sel = ctx->temp_reg;
1310 alu.dst.chan = i;
1311 alu.dst.write = 1;
1312 r = r600_bc_add_alu(ctx->bc, &alu);
1313 if (r)
1314 return r;
1315 }
1316 memset(&alu, 0, sizeof(struct r600_bc_alu));
1317 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1318 alu.src[0].sel = V_SQ_ALU_SRC_1;
1319 alu.src[0].chan = 0;
1320 alu.dst.sel = ctx->temp_reg;
1321 alu.dst.chan = 3;
1322 alu.last = 1;
1323 alu.dst.write = 1;
1324 r = r600_bc_add_alu(ctx->bc, &alu);
1325 if (r)
1326 return r;
1327 src_gpr = ctx->temp_reg;
1328 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1329 for (i = 0; i < 4; i++) {
1330 memset(&alu, 0, sizeof(struct r600_bc_alu));
1331 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1332 alu.src[0].sel = src_gpr;
1333 alu.src[0].chan = i;
1334 alu.dst.sel = ctx->temp_reg;
1335 alu.dst.chan = i;
1336 if (i == 3)
1337 alu.last = 1;
1338 alu.dst.write = 1;
1339 r = r600_bc_add_alu(ctx->bc, &alu);
1340 if (r)
1341 return r;
1342 }
1343 src_gpr = ctx->temp_reg;
1344 }
1345
1346 memset(&tex, 0, sizeof(struct r600_bc_tex));
1347 tex.inst = ctx->inst_info->r600_opcode;
1348 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1349 tex.sampler_id = tex.resource_id;
1350 tex.src_gpr = src_gpr;
1351 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1352 tex.dst_sel_x = 0;
1353 tex.dst_sel_y = 1;
1354 tex.dst_sel_z = 2;
1355 tex.dst_sel_w = 3;
1356 tex.src_sel_x = 0;
1357 tex.src_sel_y = 1;
1358 tex.src_sel_z = 2;
1359 tex.src_sel_w = 3;
1360
1361 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1362 tex.coord_type_x = 1;
1363 tex.coord_type_y = 1;
1364 tex.coord_type_z = 1;
1365 tex.coord_type_w = 1;
1366 }
1367 return r600_bc_add_tex(ctx->bc, &tex);
1368 }
1369
1370 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1371 {
1372 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1373 struct r600_bc_alu_src r600_src[3];
1374 struct r600_bc_alu alu;
1375 unsigned i;
1376 int r;
1377
1378 r = tgsi_split_constant(ctx, r600_src);
1379 if (r)
1380 return r;
1381 /* 1 - src0 */
1382 for (i = 0; i < 4; i++) {
1383 memset(&alu, 0, sizeof(struct r600_bc_alu));
1384 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1385 alu.src[0].sel = V_SQ_ALU_SRC_1;
1386 alu.src[0].chan = 0;
1387 alu.src[1] = r600_src[0];
1388 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1389 alu.src[1].neg = 1;
1390 alu.dst.sel = ctx->temp_reg;
1391 alu.dst.chan = i;
1392 if (i == 3) {
1393 alu.last = 1;
1394 }
1395 alu.dst.write = 1;
1396 r = r600_bc_add_alu(ctx->bc, &alu);
1397 if (r)
1398 return r;
1399 }
1400 r = r600_bc_add_literal(ctx->bc, ctx->value);
1401 if (r)
1402 return r;
1403
1404 /* (1 - src0) * src2 */
1405 for (i = 0; i < 4; i++) {
1406 memset(&alu, 0, sizeof(struct r600_bc_alu));
1407 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1408 alu.src[0].sel = ctx->temp_reg;
1409 alu.src[0].chan = i;
1410 alu.src[1] = r600_src[2];
1411 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1412 alu.dst.sel = ctx->temp_reg;
1413 alu.dst.chan = i;
1414 if (i == 3) {
1415 alu.last = 1;
1416 }
1417 alu.dst.write = 1;
1418 r = r600_bc_add_alu(ctx->bc, &alu);
1419 if (r)
1420 return r;
1421 }
1422 r = r600_bc_add_literal(ctx->bc, ctx->value);
1423 if (r)
1424 return r;
1425
1426 /* src0 * src1 + (1 - src0) * src2 */
1427 for (i = 0; i < 4; i++) {
1428 memset(&alu, 0, sizeof(struct r600_bc_alu));
1429 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1430 alu.is_op3 = 1;
1431 alu.src[0] = r600_src[0];
1432 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1433 alu.src[1] = r600_src[1];
1434 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1435 alu.src[2].sel = ctx->temp_reg;
1436 alu.src[2].chan = i;
1437 alu.dst.sel = ctx->temp_reg;
1438 alu.dst.chan = i;
1439 if (i == 3) {
1440 alu.last = 1;
1441 }
1442 r = r600_bc_add_alu(ctx->bc, &alu);
1443 if (r)
1444 return r;
1445 }
1446 return tgsi_helper_copy(ctx, inst);
1447 }
1448
1449 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1450 {
1451 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1452 struct r600_bc_alu_src r600_src[3];
1453 struct r600_bc_alu alu;
1454 int use_temp = 0;
1455 int i, r;
1456
1457 r = tgsi_split_constant(ctx, r600_src);
1458 if (r)
1459 return r;
1460
1461 if (inst->Dst[0].Register.WriteMask != 0xf)
1462 use_temp = 1;
1463
1464 for (i = 0; i < 4; i++) {
1465 memset(&alu, 0, sizeof(struct r600_bc_alu));
1466 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1467 alu.src[0] = r600_src[0];
1468 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1469
1470 alu.src[1] = r600_src[2];
1471 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1472
1473 alu.src[2] = r600_src[1];
1474 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1475
1476 if (use_temp)
1477 alu.dst.sel = ctx->temp_reg;
1478 else {
1479 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1480 if (r)
1481 return r;
1482 }
1483 alu.dst.chan = i;
1484 alu.dst.write = 1;
1485 alu.is_op3 = 1;
1486 if (i == 3)
1487 alu.last = 1;
1488 r = r600_bc_add_alu(ctx->bc, &alu);
1489 if (r)
1490 return r;
1491 }
1492 if (use_temp)
1493 return tgsi_helper_copy(ctx, inst);
1494 return 0;
1495 }
1496
1497 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1498 {
1499 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1500 struct r600_bc_alu_src r600_src[3];
1501 struct r600_bc_alu alu;
1502 uint32_t use_temp = 0;
1503 int i, r;
1504
1505 if (inst->Dst[0].Register.WriteMask != 0xf)
1506 use_temp = 1;
1507
1508 r = tgsi_split_constant(ctx, r600_src);
1509 if (r)
1510 return r;
1511
1512 for (i = 0; i < 4; i++) {
1513 memset(&alu, 0, sizeof(struct r600_bc_alu));
1514 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1515
1516 alu.src[0] = r600_src[0];
1517 switch (i) {
1518 case 0:
1519 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1520 break;
1521 case 1:
1522 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1523 break;
1524 case 2:
1525 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1526 break;
1527 case 3:
1528 alu.src[0].sel = V_SQ_ALU_SRC_0;
1529 alu.src[0].chan = i;
1530 }
1531
1532 alu.src[1] = r600_src[1];
1533 switch (i) {
1534 case 0:
1535 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1536 break;
1537 case 1:
1538 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1539 break;
1540 case 2:
1541 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1542 break;
1543 case 3:
1544 alu.src[1].sel = V_SQ_ALU_SRC_0;
1545 alu.src[1].chan = i;
1546 }
1547
1548 alu.dst.sel = ctx->temp_reg;
1549 alu.dst.chan = i;
1550 alu.dst.write = 1;
1551
1552 if (i == 3)
1553 alu.last = 1;
1554 r = r600_bc_add_alu(ctx->bc, &alu);
1555 if (r)
1556 return r;
1557 }
1558
1559 for (i = 0; i < 4; i++) {
1560 memset(&alu, 0, sizeof(struct r600_bc_alu));
1561 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1562
1563 alu.src[0] = r600_src[0];
1564 switch (i) {
1565 case 0:
1566 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1567 break;
1568 case 1:
1569 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1570 break;
1571 case 2:
1572 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1573 break;
1574 case 3:
1575 alu.src[0].sel = V_SQ_ALU_SRC_0;
1576 alu.src[0].chan = i;
1577 }
1578
1579 alu.src[1] = r600_src[1];
1580 switch (i) {
1581 case 0:
1582 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1583 break;
1584 case 1:
1585 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1586 break;
1587 case 2:
1588 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1589 break;
1590 case 3:
1591 alu.src[1].sel = V_SQ_ALU_SRC_0;
1592 alu.src[1].chan = i;
1593 }
1594
1595 alu.src[2].sel = ctx->temp_reg;
1596 alu.src[2].neg = 1;
1597 alu.src[2].chan = i;
1598
1599 if (use_temp)
1600 alu.dst.sel = ctx->temp_reg;
1601 else {
1602 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1603 if (r)
1604 return r;
1605 }
1606 alu.dst.chan = i;
1607 alu.dst.write = 1;
1608 alu.is_op3 = 1;
1609 if (i == 3)
1610 alu.last = 1;
1611 r = r600_bc_add_alu(ctx->bc, &alu);
1612 if (r)
1613 return r;
1614 }
1615 if (use_temp)
1616 return tgsi_helper_copy(ctx, inst);
1617 return 0;
1618 }
1619
1620
1621 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1622 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1623 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1624 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1625 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1626 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1627 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1628 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1629 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1630 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1631 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1632 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1633 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1634 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1635 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1636 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1637 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
1638 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1639 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1640 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1641 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1642 /* gap */
1643 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1644 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1645 /* gap */
1646 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1647 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1648 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
1649 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1650 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
1651 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1652 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1653 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
1654 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
1655 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
1656 /* gap */
1657 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1658 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1659 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1660 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1661 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
1662 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
1663 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
1664 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1665 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1666 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1667 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1668 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1669 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1670 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
1671 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1672 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
1673 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
1674 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
1675 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
1676 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1677 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1678 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1679 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1680 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1681 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1682 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1683 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1684 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1685 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1686 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1687 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1688 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1689 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1690 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
1691 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
1692 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1693 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
1694 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1695 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1696 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1697 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1698 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1699 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1700 /* gap */
1701 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1702 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1703 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1704 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1705 /* gap */
1706 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1707 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1708 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1709 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1710 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1711 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1712 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1713 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
1714 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1715 /* gap */
1716 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1717 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1718 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1719 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1720 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1721 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1722 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1723 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1724 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1725 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1726 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1727 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1728 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1729 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1730 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1731 /* gap */
1732 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1733 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1734 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1735 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1736 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1737 /* gap */
1738 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1739 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1740 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1741 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1742 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1743 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1744 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1745 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1746 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1747 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1748 /* gap */
1749 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1750 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1751 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1752 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1753 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1754 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1755 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1756 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1757 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1758 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1759 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1760 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1761 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1762 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1763 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1764 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1765 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1766 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1767 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1768 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1769 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1770 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1771 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1772 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1773 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1774 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1775 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1776 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1777 };