r600g: add DPH support.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 //fprintf(stderr, "--------------------------------------------------------------\n");
109 //tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 //fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
154 state->reloc_pm4_id[0] = R600_VS_SHADER__SQ_PGM_START_VS_BO_ID;
155 state->reloc_pm4_id[1] = R600_VS_SHADER__SQ_PGM_START_FS_BO_ID;
156 return radeon_state_pm4(state);
157 }
158
159 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
160 {
161 const struct pipe_rasterizer_state *rasterizer;
162 struct r600_screen *rscreen = r600_screen(ctx->screen);
163 struct r600_shader *rshader = &rpshader->shader;
164 struct r600_context *rctx = r600_context(ctx);
165 struct radeon_state *state;
166 unsigned i, tmp, exports_ps, num_cout;
167
168 rasterizer = &rctx->rasterizer->state.rasterizer;
169 rpshader->rstate = radeon_state_decref(rpshader->rstate);
170 state = radeon_state(rscreen->rw, R600_PS_SHADER);
171 if (state == NULL)
172 return -ENOMEM;
173 for (i = 0; i < rshader->ninput; i++) {
174 tmp = S_028644_SEMANTIC(i);
175 tmp |= S_028644_SEL_CENTROID(1);
176 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
177 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
178 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
179 }
180 if (rasterizer->sprite_coord_enable & (1 << i)) {
181 tmp |= S_028644_PT_SPRITE_TEX(1);
182 }
183 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
184 }
185
186 exports_ps = 0;
187 num_cout = 0;
188 for (i = 0; i < rshader->noutput; i++) {
189 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
190 exports_ps |= 1;
191 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
192 exports_ps |= (1 << (num_cout+1));
193 num_cout++;
194 }
195 }
196 if (!exports_ps) {
197 /* always at least export 1 component per pixel */
198 exports_ps = 2;
199 }
200 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
201 S_0286CC_PERSP_GRADIENT_ENA(1);
202 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
203 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
204 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
205 rpshader->rstate = state;
206 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
207 rpshader->rstate->nbo = 1;
208 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
209 state->reloc_pm4_id[0] = R600_PS_SHADER__SQ_PGM_START_PS_BO_ID;
210 return radeon_state_pm4(state);
211 }
212
213 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
214 {
215 struct r600_screen *rscreen = r600_screen(ctx->screen);
216 struct r600_context *rctx = r600_context(ctx);
217 struct r600_shader *rshader = &rpshader->shader;
218 int r;
219
220 /* copy new shader */
221 radeon_bo_decref(rscreen->rw, rpshader->bo);
222 rpshader->bo = NULL;
223 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
224 4096, NULL);
225 if (rpshader->bo == NULL) {
226 return -ENOMEM;
227 }
228 radeon_bo_map(rscreen->rw, rpshader->bo);
229 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
230 radeon_bo_unmap(rscreen->rw, rpshader->bo);
231 /* build state */
232 rshader->flat_shade = rctx->flat_shade;
233 switch (rshader->processor_type) {
234 case TGSI_PROCESSOR_VERTEX:
235 r = r600_pipe_shader_vs(ctx, rpshader);
236 break;
237 case TGSI_PROCESSOR_FRAGMENT:
238 r = r600_pipe_shader_ps(ctx, rpshader);
239 break;
240 default:
241 r = -EINVAL;
242 break;
243 }
244 return r;
245 }
246
247 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
248 {
249 struct r600_context *rctx = r600_context(ctx);
250 int r;
251
252 if (rpshader == NULL)
253 return -EINVAL;
254 /* there should be enough input */
255 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
256 R600_ERR("%d resources provided, expecting %d\n",
257 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
258 return -EINVAL;
259 }
260 r = r600_shader_update(ctx, &rpshader->shader);
261 if (r)
262 return r;
263 return r600_pipe_shader(ctx, rpshader);
264 }
265
266 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
267 {
268 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
269 int j;
270
271 if (i->Instruction.NumDstRegs > 1) {
272 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
273 return -EINVAL;
274 }
275 if (i->Instruction.Predicate) {
276 R600_ERR("predicate unsupported\n");
277 return -EINVAL;
278 }
279 if (i->Instruction.Label) {
280 R600_ERR("label unsupported\n");
281 return -EINVAL;
282 }
283 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
284 if (i->Src[j].Register.Indirect ||
285 i->Src[j].Register.Dimension ||
286 i->Src[j].Register.Absolute) {
287 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
288 return -EINVAL;
289 }
290 }
291 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
292 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
293 R600_ERR("unsupported dst (indirect|dimension)\n");
294 return -EINVAL;
295 }
296 }
297 return 0;
298 }
299
300 static int tgsi_declaration(struct r600_shader_ctx *ctx)
301 {
302 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
303 struct r600_bc_vtx vtx;
304 unsigned i;
305 int r;
306
307 switch (d->Declaration.File) {
308 case TGSI_FILE_INPUT:
309 i = ctx->shader->ninput++;
310 ctx->shader->input[i].name = d->Semantic.Name;
311 ctx->shader->input[i].sid = d->Semantic.Index;
312 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
313 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
314 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
315 /* turn input into fetch */
316 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
317 vtx.inst = 0;
318 vtx.fetch_type = 0;
319 vtx.buffer_id = i;
320 /* register containing the index into the buffer */
321 vtx.src_gpr = 0;
322 vtx.src_sel_x = 0;
323 vtx.mega_fetch_count = 0x1F;
324 vtx.dst_gpr = ctx->shader->input[i].gpr;
325 vtx.dst_sel_x = 0;
326 vtx.dst_sel_y = 1;
327 vtx.dst_sel_z = 2;
328 vtx.dst_sel_w = 3;
329 r = r600_bc_add_vtx(ctx->bc, &vtx);
330 if (r)
331 return r;
332 }
333 break;
334 case TGSI_FILE_OUTPUT:
335 i = ctx->shader->noutput++;
336 ctx->shader->output[i].name = d->Semantic.Name;
337 ctx->shader->output[i].sid = d->Semantic.Index;
338 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
339 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
340 break;
341 case TGSI_FILE_CONSTANT:
342 case TGSI_FILE_TEMPORARY:
343 case TGSI_FILE_SAMPLER:
344 break;
345 default:
346 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
347 return -EINVAL;
348 }
349 return 0;
350 }
351
352 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
353 {
354 struct tgsi_full_immediate *immediate;
355 struct r600_shader_ctx ctx;
356 struct r600_bc_output output[32];
357 unsigned output_done, noutput;
358 unsigned opcode;
359 int i, r = 0, pos0;
360
361 ctx.bc = &shader->bc;
362 ctx.shader = shader;
363 r = r600_bc_init(ctx.bc, shader->family);
364 if (r)
365 return r;
366 ctx.tokens = tokens;
367 tgsi_scan_shader(tokens, &ctx.info);
368 tgsi_parse_init(&ctx.parse, tokens);
369 ctx.type = ctx.parse.FullHeader.Processor.Processor;
370 shader->processor_type = ctx.type;
371
372 /* register allocations */
373 /* Values [0,127] correspond to GPR[0..127].
374 * Values [128,159] correspond to constant buffer bank 0
375 * Values [160,191] correspond to constant buffer bank 1
376 * Values [256,511] correspond to cfile constants c[0..255].
377 * Other special values are shown in the list below.
378 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
379 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
380 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
381 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
382 * 248 SQ_ALU_SRC_0: special constant 0.0.
383 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
384 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
385 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
386 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
387 * 253 SQ_ALU_SRC_LITERAL: literal constant.
388 * 254 SQ_ALU_SRC_PV: previous vector result.
389 * 255 SQ_ALU_SRC_PS: previous scalar result.
390 */
391 for (i = 0; i < TGSI_FILE_COUNT; i++) {
392 ctx.file_offset[i] = 0;
393 }
394 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
395 ctx.file_offset[TGSI_FILE_INPUT] = 1;
396 }
397 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
398 ctx.info.file_count[TGSI_FILE_INPUT];
399 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
400 ctx.info.file_count[TGSI_FILE_OUTPUT];
401 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
402 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
403 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
404 ctx.info.file_count[TGSI_FILE_TEMPORARY];
405
406 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
407 tgsi_parse_token(&ctx.parse);
408 switch (ctx.parse.FullToken.Token.Type) {
409 case TGSI_TOKEN_TYPE_IMMEDIATE:
410 immediate = &ctx.parse.FullToken.FullImmediate;
411 ctx.value[0] = immediate->u[0].Uint;
412 ctx.value[1] = immediate->u[1].Uint;
413 ctx.value[2] = immediate->u[2].Uint;
414 ctx.value[3] = immediate->u[3].Uint;
415 break;
416 case TGSI_TOKEN_TYPE_DECLARATION:
417 r = tgsi_declaration(&ctx);
418 if (r)
419 goto out_err;
420 break;
421 case TGSI_TOKEN_TYPE_INSTRUCTION:
422 r = tgsi_is_supported(&ctx);
423 if (r)
424 goto out_err;
425 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
426 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
427 r = ctx.inst_info->process(&ctx);
428 if (r)
429 goto out_err;
430 r = r600_bc_add_literal(ctx.bc, ctx.value);
431 if (r)
432 goto out_err;
433 break;
434 default:
435 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
436 r = -EINVAL;
437 goto out_err;
438 }
439 }
440 /* export output */
441 noutput = shader->noutput;
442 for (i = 0, pos0 = 0; i < noutput; i++) {
443 memset(&output[i], 0, sizeof(struct r600_bc_output));
444 output[i].gpr = shader->output[i].gpr;
445 output[i].elem_size = 3;
446 output[i].swizzle_x = 0;
447 output[i].swizzle_y = 1;
448 output[i].swizzle_z = 2;
449 output[i].swizzle_w = 3;
450 output[i].barrier = 1;
451 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
452 output[i].array_base = i - pos0;
453 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
454 switch (ctx.type) {
455 case TGSI_PROCESSOR_VERTEX:
456 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
457 output[i].array_base = 60;
458 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
459 /* position doesn't count in array_base */
460 pos0++;
461 }
462 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
463 output[i].array_base = 61;
464 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
465 /* position doesn't count in array_base */
466 pos0++;
467 }
468 break;
469 case TGSI_PROCESSOR_FRAGMENT:
470 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
471 output[i].array_base = shader->output[i].sid;
472 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
473 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
474 output[i].array_base = 61;
475 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
476 } else {
477 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
478 r = -EINVAL;
479 goto out_err;
480 }
481 break;
482 default:
483 R600_ERR("unsupported processor type %d\n", ctx.type);
484 r = -EINVAL;
485 goto out_err;
486 }
487 }
488 /* add fake param output for vertex shader if no param is exported */
489 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
490 for (i = 0, pos0 = 0; i < noutput; i++) {
491 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
492 pos0 = 1;
493 break;
494 }
495 }
496 if (!pos0) {
497 memset(&output[i], 0, sizeof(struct r600_bc_output));
498 output[i].gpr = 0;
499 output[i].elem_size = 3;
500 output[i].swizzle_x = 0;
501 output[i].swizzle_y = 1;
502 output[i].swizzle_z = 2;
503 output[i].swizzle_w = 3;
504 output[i].barrier = 1;
505 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
506 output[i].array_base = 0;
507 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
508 noutput++;
509 }
510 }
511 /* add fake pixel export */
512 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
513 memset(&output[0], 0, sizeof(struct r600_bc_output));
514 output[0].gpr = 0;
515 output[0].elem_size = 3;
516 output[0].swizzle_x = 7;
517 output[0].swizzle_y = 7;
518 output[0].swizzle_z = 7;
519 output[0].swizzle_w = 7;
520 output[0].barrier = 1;
521 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
522 output[0].array_base = 0;
523 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
524 noutput++;
525 }
526 /* set export done on last export of each type */
527 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
528 if (i == (noutput - 1)) {
529 output[i].end_of_program = 1;
530 }
531 if (!(output_done & (1 << output[i].type))) {
532 output_done |= (1 << output[i].type);
533 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
534 }
535 }
536 /* add output to bytecode */
537 for (i = 0; i < noutput; i++) {
538 r = r600_bc_add_output(ctx.bc, &output[i]);
539 if (r)
540 goto out_err;
541 }
542 tgsi_parse_free(&ctx.parse);
543 return 0;
544 out_err:
545 tgsi_parse_free(&ctx.parse);
546 return r;
547 }
548
549 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
550 {
551 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
552 return -EINVAL;
553 }
554
555 static int tgsi_end(struct r600_shader_ctx *ctx)
556 {
557 return 0;
558 }
559
560 static int tgsi_src(struct r600_shader_ctx *ctx,
561 const struct tgsi_full_src_register *tgsi_src,
562 struct r600_bc_alu_src *r600_src)
563 {
564 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
565 r600_src->sel = tgsi_src->Register.Index;
566 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
567 r600_src->sel = 0;
568 }
569 r600_src->neg = tgsi_src->Register.Negate;
570 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
571 return 0;
572 }
573
574 static int tgsi_dst(struct r600_shader_ctx *ctx,
575 const struct tgsi_full_dst_register *tgsi_dst,
576 unsigned swizzle,
577 struct r600_bc_alu_dst *r600_dst)
578 {
579 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
580
581 r600_dst->sel = tgsi_dst->Register.Index;
582 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
583 r600_dst->chan = swizzle;
584 r600_dst->write = 1;
585 if (inst->Instruction.Saturate) {
586 r600_dst->clamp = 1;
587 }
588 return 0;
589 }
590
591 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
592 {
593 switch (swizzle) {
594 case 0:
595 return tgsi_src->Register.SwizzleX;
596 case 1:
597 return tgsi_src->Register.SwizzleY;
598 case 2:
599 return tgsi_src->Register.SwizzleZ;
600 case 3:
601 return tgsi_src->Register.SwizzleW;
602 default:
603 return 0;
604 }
605 }
606
607 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
608 {
609 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
610 struct r600_bc_alu alu;
611 int i, j, k, nconst, r;
612
613 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
614 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
615 nconst++;
616 }
617 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
618 if (r) {
619 return r;
620 }
621 }
622 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
623 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
624 for (k = 0; k < 4; k++) {
625 memset(&alu, 0, sizeof(struct r600_bc_alu));
626 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
627 alu.src[0].sel = r600_src[0].sel;
628 alu.src[0].chan = k;
629 alu.dst.sel = ctx->temp_reg + j;
630 alu.dst.chan = k;
631 alu.dst.write = 1;
632 if (k == 3)
633 alu.last = 1;
634 r = r600_bc_add_alu(ctx->bc, &alu);
635 if (r)
636 return r;
637 }
638 r600_src[0].sel = ctx->temp_reg + j;
639 j--;
640 }
641 }
642 return 0;
643 }
644
645 static int tgsi_op2(struct r600_shader_ctx *ctx)
646 {
647 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
648 struct r600_bc_alu_src r600_src[3];
649 struct r600_bc_alu alu;
650 int i, j, r;
651
652 r = tgsi_split_constant(ctx, r600_src);
653 if (r)
654 return r;
655 for (i = 0; i < 4; i++) {
656 memset(&alu, 0, sizeof(struct r600_bc_alu));
657 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
658 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
659 alu.dst.chan = i;
660 } else {
661 alu.inst = ctx->inst_info->r600_opcode;
662 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
663 alu.src[j] = r600_src[j];
664 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
665 }
666 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
667 if (r)
668 return r;
669 }
670 /* handle some special cases */
671 switch (ctx->inst_info->tgsi_opcode) {
672 case TGSI_OPCODE_SUB:
673 alu.src[1].neg = 1;
674 break;
675 case TGSI_OPCODE_ABS:
676 alu.src[0].abs = 1;
677 break;
678 default:
679 break;
680 }
681 if (i == 3) {
682 alu.last = 1;
683 }
684 r = r600_bc_add_alu(ctx->bc, &alu);
685 if (r)
686 return r;
687 }
688 return 0;
689 }
690
691 /*
692 * r600 - trunc to -PI..PI range
693 * r700 - normalize by dividing by 2PI
694 * see fdo bug 27901
695 */
696 static int tgsi_trig(struct r600_shader_ctx *ctx)
697 {
698 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
699 struct r600_bc_alu_src r600_src[3];
700 struct r600_bc_alu alu;
701 int i, r;
702 uint32_t lit_vals[4];
703
704 memset(lit_vals, 0, 4*4);
705 r = tgsi_split_constant(ctx, r600_src);
706 if (r)
707 return r;
708 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
709 lit_vals[1] = fui(0.5f);
710
711 memset(&alu, 0, sizeof(struct r600_bc_alu));
712 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
713 alu.is_op3 = 1;
714
715 alu.dst.chan = 0;
716 alu.dst.sel = ctx->temp_reg;
717 alu.dst.write = 1;
718
719 alu.src[0] = r600_src[0];
720 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
721
722 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
723 alu.src[1].chan = 0;
724 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
725 alu.src[2].chan = 1;
726 alu.last = 1;
727 r = r600_bc_add_alu(ctx->bc, &alu);
728 if (r)
729 return r;
730 r = r600_bc_add_literal(ctx->bc, lit_vals);
731 if (r)
732 return r;
733
734 memset(&alu, 0, sizeof(struct r600_bc_alu));
735 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
736
737 alu.dst.chan = 0;
738 alu.dst.sel = ctx->temp_reg;
739 alu.dst.write = 1;
740
741 alu.src[0].sel = ctx->temp_reg;
742 alu.src[0].chan = 0;
743 alu.last = 1;
744 r = r600_bc_add_alu(ctx->bc, &alu);
745 if (r)
746 return r;
747
748 if (ctx->bc->chiprev == 0) {
749 lit_vals[0] = fui(3.1415926535897f * 2.0f);
750 lit_vals[1] = fui(-3.1415926535897f);
751 } else {
752 lit_vals[0] = fui(1.0f);
753 lit_vals[1] = fui(-0.5f);
754 }
755
756 memset(&alu, 0, sizeof(struct r600_bc_alu));
757 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
758 alu.is_op3 = 1;
759
760 alu.dst.chan = 0;
761 alu.dst.sel = ctx->temp_reg;
762 alu.dst.write = 1;
763
764 alu.src[0].sel = ctx->temp_reg;
765 alu.src[0].chan = 0;
766
767 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
768 alu.src[1].chan = 0;
769 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
770 alu.src[2].chan = 1;
771 alu.last = 1;
772 r = r600_bc_add_alu(ctx->bc, &alu);
773 if (r)
774 return r;
775 r = r600_bc_add_literal(ctx->bc, lit_vals);
776 if (r)
777 return r;
778
779 memset(&alu, 0, sizeof(struct r600_bc_alu));
780 alu.inst = ctx->inst_info->r600_opcode;
781 alu.dst.chan = 0;
782 alu.dst.sel = ctx->temp_reg;
783 alu.dst.write = 1;
784
785 alu.src[0].sel = ctx->temp_reg;
786 alu.src[0].chan = 0;
787 alu.last = 1;
788 r = r600_bc_add_alu(ctx->bc, &alu);
789 if (r)
790 return r;
791
792 /* replicate result */
793 for (i = 0; i < 4; i++) {
794 memset(&alu, 0, sizeof(struct r600_bc_alu));
795 alu.src[0].sel = ctx->temp_reg;
796 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
797 alu.dst.chan = i;
798 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
799 if (r)
800 return r;
801 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
802 if (i == 3)
803 alu.last = 1;
804 r = r600_bc_add_alu(ctx->bc, &alu);
805 if (r)
806 return r;
807 }
808 return 0;
809 }
810
811 static int tgsi_kill(struct r600_shader_ctx *ctx)
812 {
813 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
814 struct r600_bc_alu alu;
815 int i, r;
816
817 for (i = 0; i < 4; i++) {
818 memset(&alu, 0, sizeof(struct r600_bc_alu));
819 alu.inst = ctx->inst_info->r600_opcode;
820 alu.dst.chan = i;
821 alu.src[0].sel = V_SQ_ALU_SRC_0;
822 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
823 if (r)
824 return r;
825 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
826 if (i == 3) {
827 alu.last = 1;
828 }
829 r = r600_bc_add_alu(ctx->bc, &alu);
830 if (r)
831 return r;
832 }
833 return 0;
834 }
835
836 static int tgsi_slt(struct r600_shader_ctx *ctx)
837 {
838 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
839 struct r600_bc_alu_src r600_src[3];
840 struct r600_bc_alu alu;
841 int i, r;
842
843 r = tgsi_split_constant(ctx, r600_src);
844 if (r)
845 return r;
846 for (i = 0; i < 4; i++) {
847 memset(&alu, 0, sizeof(struct r600_bc_alu));
848 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
849 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
850 alu.dst.chan = i;
851 } else {
852 alu.inst = ctx->inst_info->r600_opcode;
853 alu.src[1] = r600_src[0];
854 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
855 alu.src[0] = r600_src[1];
856 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
857 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
858 if (r)
859 return r;
860 }
861 if (i == 3) {
862 alu.last = 1;
863 }
864 r = r600_bc_add_alu(ctx->bc, &alu);
865 if (r)
866 return r;
867 }
868 return 0;
869 }
870
871 static int tgsi_lit(struct r600_shader_ctx *ctx)
872 {
873 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
874 struct r600_bc_alu alu;
875 int r;
876
877 /* dst.x, <- 1.0 */
878 memset(&alu, 0, sizeof(struct r600_bc_alu));
879 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
880 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
881 alu.src[0].chan = 0;
882 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
883 if (r)
884 return r;
885 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
886 r = r600_bc_add_alu(ctx->bc, &alu);
887 if (r)
888 return r;
889
890 /* dst.y = max(src.x, 0.0) */
891 memset(&alu, 0, sizeof(struct r600_bc_alu));
892 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
893 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
894 if (r)
895 return r;
896 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
897 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
898 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
899 if (r)
900 return r;
901 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
902 r = r600_bc_add_alu(ctx->bc, &alu);
903 if (r)
904 return r;
905
906 /* dst.z = NOP - fill Z slot */
907 memset(&alu, 0, sizeof(struct r600_bc_alu));
908 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
909 alu.dst.chan = 2;
910 r = r600_bc_add_alu(ctx->bc, &alu);
911 if (r)
912 return r;
913
914 /* dst.w, <- 1.0 */
915 memset(&alu, 0, sizeof(struct r600_bc_alu));
916 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
917 alu.src[0].sel = V_SQ_ALU_SRC_1;
918 alu.src[0].chan = 0;
919 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
920 if (r)
921 return r;
922 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
923 alu.last = 1;
924 r = r600_bc_add_alu(ctx->bc, &alu);
925 if (r)
926 return r;
927
928 if (inst->Dst[0].Register.WriteMask & (1 << 2))
929 {
930 int chan;
931 int sel;
932
933 /* dst.z = log(src.y) */
934 memset(&alu, 0, sizeof(struct r600_bc_alu));
935 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
936 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
937 if (r)
938 return r;
939 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
940 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
941 if (r)
942 return r;
943 alu.last = 1;
944 r = r600_bc_add_alu(ctx->bc, &alu);
945 if (r)
946 return r;
947
948 chan = alu.dst.chan;
949 sel = alu.dst.sel;
950
951 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
952 memset(&alu, 0, sizeof(struct r600_bc_alu));
953 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
954 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
955 if (r)
956 return r;
957 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
958 alu.src[1].sel = sel;
959 alu.src[1].chan = chan;
960 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
961 if (r)
962 return r;
963 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
964 alu.dst.sel = ctx->temp_reg;
965 alu.dst.chan = 0;
966 alu.dst.write = 1;
967 alu.is_op3 = 1;
968 alu.last = 1;
969 r = r600_bc_add_alu(ctx->bc, &alu);
970 if (r)
971 return r;
972
973 /* dst.z = exp(tmp.x) */
974 memset(&alu, 0, sizeof(struct r600_bc_alu));
975 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
976 alu.src[0].sel = ctx->temp_reg;
977 alu.src[0].chan = 0;
978 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
979 if (r)
980 return r;
981 alu.last = 1;
982 r = r600_bc_add_alu(ctx->bc, &alu);
983 if (r)
984 return r;
985 }
986 return 0;
987 }
988
989 static int tgsi_trans(struct r600_shader_ctx *ctx)
990 {
991 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
992 struct r600_bc_alu alu;
993 int i, j, r;
994
995 for (i = 0; i < 4; i++) {
996 memset(&alu, 0, sizeof(struct r600_bc_alu));
997 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
998 alu.inst = ctx->inst_info->r600_opcode;
999 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1000 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1001 if (r)
1002 return r;
1003 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1004 }
1005 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1006 if (r)
1007 return r;
1008 alu.last = 1;
1009 r = r600_bc_add_alu(ctx->bc, &alu);
1010 if (r)
1011 return r;
1012 }
1013 }
1014 return 0;
1015 }
1016
1017 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1018 {
1019 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1020 struct r600_bc_alu alu;
1021 int i, r;
1022
1023 for (i = 0; i < 4; i++) {
1024 memset(&alu, 0, sizeof(struct r600_bc_alu));
1025 alu.src[0].sel = ctx->temp_reg;
1026 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1027 alu.dst.chan = i;
1028 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1029 if (r)
1030 return r;
1031 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1032 if (i == 3)
1033 alu.last = 1;
1034 r = r600_bc_add_alu(ctx->bc, &alu);
1035 if (r)
1036 return r;
1037 }
1038 return 0;
1039 }
1040
1041 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1042 {
1043 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1044 struct r600_bc_alu alu;
1045 int i, r;
1046
1047 memset(&alu, 0, sizeof(struct r600_bc_alu));
1048 alu.inst = ctx->inst_info->r600_opcode;
1049 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1050 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1051 if (r)
1052 return r;
1053 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1054 }
1055 alu.dst.sel = ctx->temp_reg;
1056 alu.dst.write = 1;
1057 alu.last = 1;
1058 r = r600_bc_add_alu(ctx->bc, &alu);
1059 if (r)
1060 return r;
1061 /* replicate result */
1062 return tgsi_helper_tempx_replicate(ctx);
1063 }
1064
1065 static int tgsi_pow(struct r600_shader_ctx *ctx)
1066 {
1067 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1068 struct r600_bc_alu alu;
1069 int r;
1070
1071 /* LOG2(a) */
1072 memset(&alu, 0, sizeof(struct r600_bc_alu));
1073 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1074 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1075 if (r)
1076 return r;
1077 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1078 alu.dst.sel = ctx->temp_reg;
1079 alu.dst.write = 1;
1080 alu.last = 1;
1081 r = r600_bc_add_alu(ctx->bc, &alu);
1082 if (r)
1083 return r;
1084 /* b * LOG2(a) */
1085 memset(&alu, 0, sizeof(struct r600_bc_alu));
1086 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1087 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1088 if (r)
1089 return r;
1090 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1091 alu.src[1].sel = ctx->temp_reg;
1092 alu.dst.sel = ctx->temp_reg;
1093 alu.dst.write = 1;
1094 alu.last = 1;
1095 r = r600_bc_add_alu(ctx->bc, &alu);
1096 if (r)
1097 return r;
1098 /* POW(a,b) = EXP2(b * LOG2(a))*/
1099 memset(&alu, 0, sizeof(struct r600_bc_alu));
1100 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1101 alu.src[0].sel = ctx->temp_reg;
1102 alu.dst.sel = ctx->temp_reg;
1103 alu.dst.write = 1;
1104 alu.last = 1;
1105 r = r600_bc_add_alu(ctx->bc, &alu);
1106 if (r)
1107 return r;
1108 return tgsi_helper_tempx_replicate(ctx);
1109 }
1110
1111 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1112 {
1113 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1114 struct r600_bc_alu alu;
1115 struct r600_bc_alu_src r600_src[3];
1116 int i, r;
1117
1118 r = tgsi_split_constant(ctx, r600_src);
1119 if (r)
1120 return r;
1121
1122 /* tmp = (src > 0 ? 1 : src) */
1123 for (i = 0; i < 4; i++) {
1124 memset(&alu, 0, sizeof(struct r600_bc_alu));
1125 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1126 alu.is_op3 = 1;
1127 alu.dst.sel = ctx->temp_reg;
1128 alu.dst.write = 1;
1129
1130 alu.src[0] = r600_src[0];
1131 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1132
1133 alu.src[1].sel = V_SQ_ALU_SRC_1;
1134
1135 alu.src[2] = r600_src[0];
1136 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1137 if (i == 3)
1138 alu.last = 1;
1139 r = r600_bc_add_alu(ctx->bc, &alu);
1140 if (r)
1141 return r;
1142 }
1143
1144 /* dst = (-tmp > 0 ? -1 : tmp) */
1145 for (i = 0; i < 4; i++) {
1146 memset(&alu, 0, sizeof(struct r600_bc_alu));
1147 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1148 alu.is_op3 = 1;
1149 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1150 if (r)
1151 return r;
1152
1153 alu.src[0].sel = ctx->temp_reg;
1154 alu.src[0].neg = 1;
1155
1156 alu.src[1].sel = V_SQ_ALU_SRC_1;
1157 alu.src[1].neg = 1;
1158
1159 alu.src[2].sel = ctx->temp_reg;
1160
1161 alu.dst.write = 1;
1162 if (i == 3)
1163 alu.last = 1;
1164 r = r600_bc_add_alu(ctx->bc, &alu);
1165 if (r)
1166 return r;
1167 }
1168 return 0;
1169 }
1170
1171 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1172 {
1173 struct r600_bc_alu alu;
1174 int i, r;
1175
1176 r = r600_bc_add_literal(ctx->bc, ctx->value);
1177 if (r)
1178 return r;
1179 for (i = 0; i < 4; i++) {
1180 memset(&alu, 0, sizeof(struct r600_bc_alu));
1181 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1182 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1183 alu.dst.chan = i;
1184 } else {
1185 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1186 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1187 if (r)
1188 return r;
1189 alu.src[0].sel = ctx->temp_reg;
1190 alu.src[0].chan = i;
1191 }
1192 if (i == 3) {
1193 alu.last = 1;
1194 }
1195 r = r600_bc_add_alu(ctx->bc, &alu);
1196 if (r)
1197 return r;
1198 }
1199 return 0;
1200 }
1201
1202 static int tgsi_op3(struct r600_shader_ctx *ctx)
1203 {
1204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1205 struct r600_bc_alu_src r600_src[3];
1206 struct r600_bc_alu alu;
1207 int i, j, r;
1208
1209 r = tgsi_split_constant(ctx, r600_src);
1210 if (r)
1211 return r;
1212 /* do it in 2 step as op3 doesn't support writemask */
1213 for (i = 0; i < 4; i++) {
1214 memset(&alu, 0, sizeof(struct r600_bc_alu));
1215 alu.inst = ctx->inst_info->r600_opcode;
1216 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1217 alu.src[j] = r600_src[j];
1218 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1219 }
1220 alu.dst.sel = ctx->temp_reg;
1221 alu.dst.chan = i;
1222 alu.dst.write = 1;
1223 alu.is_op3 = 1;
1224 if (i == 3) {
1225 alu.last = 1;
1226 }
1227 r = r600_bc_add_alu(ctx->bc, &alu);
1228 if (r)
1229 return r;
1230 }
1231 return tgsi_helper_copy(ctx, inst);
1232 }
1233
1234 static int tgsi_dp(struct r600_shader_ctx *ctx)
1235 {
1236 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1237 struct r600_bc_alu_src r600_src[3];
1238 struct r600_bc_alu alu;
1239 int i, j, r;
1240
1241 r = tgsi_split_constant(ctx, r600_src);
1242 if (r)
1243 return r;
1244 for (i = 0; i < 4; i++) {
1245 memset(&alu, 0, sizeof(struct r600_bc_alu));
1246 alu.inst = ctx->inst_info->r600_opcode;
1247 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1248 alu.src[j] = r600_src[j];
1249 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1250 }
1251 alu.dst.sel = ctx->temp_reg;
1252 alu.dst.chan = i;
1253 alu.dst.write = 1;
1254 /* handle some special cases */
1255 switch (ctx->inst_info->tgsi_opcode) {
1256 case TGSI_OPCODE_DP2:
1257 if (i > 1) {
1258 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1259 alu.src[0].chan = alu.src[1].chan = 0;
1260 }
1261 break;
1262 case TGSI_OPCODE_DP3:
1263 if (i > 2) {
1264 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1265 alu.src[0].chan = alu.src[1].chan = 0;
1266 }
1267 break;
1268 case TGSI_OPCODE_DPH:
1269 if (i == 3) {
1270 alu.src[0].sel = V_SQ_ALU_SRC_1;
1271 alu.src[0].chan = 0;
1272 alu.src[0].neg = 0;
1273 }
1274 break;
1275 default:
1276 break;
1277 }
1278 if (i == 3) {
1279 alu.last = 1;
1280 }
1281 r = r600_bc_add_alu(ctx->bc, &alu);
1282 if (r)
1283 return r;
1284 }
1285 return tgsi_helper_copy(ctx, inst);
1286 }
1287
1288 static int tgsi_tex(struct r600_shader_ctx *ctx)
1289 {
1290 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1291 struct r600_bc_tex tex;
1292 struct r600_bc_alu alu;
1293 unsigned src_gpr;
1294 int r, i;
1295
1296 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1297
1298 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1299 /* Add perspective divide */
1300 memset(&alu, 0, sizeof(struct r600_bc_alu));
1301 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1302 alu.src[0].sel = src_gpr;
1303 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1304 alu.dst.sel = ctx->temp_reg;
1305 alu.dst.chan = 3;
1306 alu.last = 1;
1307 alu.dst.write = 1;
1308 r = r600_bc_add_alu(ctx->bc, &alu);
1309 if (r)
1310 return r;
1311
1312 for (i = 0; i < 3; i++) {
1313 memset(&alu, 0, sizeof(struct r600_bc_alu));
1314 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1315 alu.src[0].sel = ctx->temp_reg;
1316 alu.src[0].chan = 3;
1317 alu.src[1].sel = src_gpr;
1318 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1319 alu.dst.sel = ctx->temp_reg;
1320 alu.dst.chan = i;
1321 alu.dst.write = 1;
1322 r = r600_bc_add_alu(ctx->bc, &alu);
1323 if (r)
1324 return r;
1325 }
1326 memset(&alu, 0, sizeof(struct r600_bc_alu));
1327 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1328 alu.src[0].sel = V_SQ_ALU_SRC_1;
1329 alu.src[0].chan = 0;
1330 alu.dst.sel = ctx->temp_reg;
1331 alu.dst.chan = 3;
1332 alu.last = 1;
1333 alu.dst.write = 1;
1334 r = r600_bc_add_alu(ctx->bc, &alu);
1335 if (r)
1336 return r;
1337 src_gpr = ctx->temp_reg;
1338 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1339 for (i = 0; i < 4; i++) {
1340 memset(&alu, 0, sizeof(struct r600_bc_alu));
1341 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1342 alu.src[0].sel = src_gpr;
1343 alu.src[0].chan = i;
1344 alu.dst.sel = ctx->temp_reg;
1345 alu.dst.chan = i;
1346 if (i == 3)
1347 alu.last = 1;
1348 alu.dst.write = 1;
1349 r = r600_bc_add_alu(ctx->bc, &alu);
1350 if (r)
1351 return r;
1352 }
1353 src_gpr = ctx->temp_reg;
1354 }
1355
1356 memset(&tex, 0, sizeof(struct r600_bc_tex));
1357 tex.inst = ctx->inst_info->r600_opcode;
1358 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1359 tex.sampler_id = tex.resource_id;
1360 tex.src_gpr = src_gpr;
1361 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1362 tex.dst_sel_x = 0;
1363 tex.dst_sel_y = 1;
1364 tex.dst_sel_z = 2;
1365 tex.dst_sel_w = 3;
1366 tex.src_sel_x = 0;
1367 tex.src_sel_y = 1;
1368 tex.src_sel_z = 2;
1369 tex.src_sel_w = 3;
1370
1371 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1372 tex.coord_type_x = 1;
1373 tex.coord_type_y = 1;
1374 tex.coord_type_z = 1;
1375 tex.coord_type_w = 1;
1376 }
1377 return r600_bc_add_tex(ctx->bc, &tex);
1378 }
1379
1380 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1381 {
1382 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1383 struct r600_bc_alu_src r600_src[3];
1384 struct r600_bc_alu alu;
1385 unsigned i;
1386 int r;
1387
1388 r = tgsi_split_constant(ctx, r600_src);
1389 if (r)
1390 return r;
1391 /* 1 - src0 */
1392 for (i = 0; i < 4; i++) {
1393 memset(&alu, 0, sizeof(struct r600_bc_alu));
1394 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1395 alu.src[0].sel = V_SQ_ALU_SRC_1;
1396 alu.src[0].chan = 0;
1397 alu.src[1] = r600_src[0];
1398 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1399 alu.src[1].neg = 1;
1400 alu.dst.sel = ctx->temp_reg;
1401 alu.dst.chan = i;
1402 if (i == 3) {
1403 alu.last = 1;
1404 }
1405 alu.dst.write = 1;
1406 r = r600_bc_add_alu(ctx->bc, &alu);
1407 if (r)
1408 return r;
1409 }
1410 r = r600_bc_add_literal(ctx->bc, ctx->value);
1411 if (r)
1412 return r;
1413
1414 /* (1 - src0) * src2 */
1415 for (i = 0; i < 4; i++) {
1416 memset(&alu, 0, sizeof(struct r600_bc_alu));
1417 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1418 alu.src[0].sel = ctx->temp_reg;
1419 alu.src[0].chan = i;
1420 alu.src[1] = r600_src[2];
1421 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1422 alu.dst.sel = ctx->temp_reg;
1423 alu.dst.chan = i;
1424 if (i == 3) {
1425 alu.last = 1;
1426 }
1427 alu.dst.write = 1;
1428 r = r600_bc_add_alu(ctx->bc, &alu);
1429 if (r)
1430 return r;
1431 }
1432 r = r600_bc_add_literal(ctx->bc, ctx->value);
1433 if (r)
1434 return r;
1435
1436 /* src0 * src1 + (1 - src0) * src2 */
1437 for (i = 0; i < 4; i++) {
1438 memset(&alu, 0, sizeof(struct r600_bc_alu));
1439 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1440 alu.is_op3 = 1;
1441 alu.src[0] = r600_src[0];
1442 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1443 alu.src[1] = r600_src[1];
1444 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1445 alu.src[2].sel = ctx->temp_reg;
1446 alu.src[2].chan = i;
1447 alu.dst.sel = ctx->temp_reg;
1448 alu.dst.chan = i;
1449 if (i == 3) {
1450 alu.last = 1;
1451 }
1452 r = r600_bc_add_alu(ctx->bc, &alu);
1453 if (r)
1454 return r;
1455 }
1456 return tgsi_helper_copy(ctx, inst);
1457 }
1458
1459 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1460 {
1461 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1462 struct r600_bc_alu_src r600_src[3];
1463 struct r600_bc_alu alu;
1464 int use_temp = 0;
1465 int i, r;
1466
1467 r = tgsi_split_constant(ctx, r600_src);
1468 if (r)
1469 return r;
1470
1471 if (inst->Dst[0].Register.WriteMask != 0xf)
1472 use_temp = 1;
1473
1474 for (i = 0; i < 4; i++) {
1475 memset(&alu, 0, sizeof(struct r600_bc_alu));
1476 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1477 alu.src[0] = r600_src[0];
1478 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1479
1480 alu.src[1] = r600_src[2];
1481 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1482
1483 alu.src[2] = r600_src[1];
1484 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1485
1486 if (use_temp)
1487 alu.dst.sel = ctx->temp_reg;
1488 else {
1489 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1490 if (r)
1491 return r;
1492 }
1493 alu.dst.chan = i;
1494 alu.dst.write = 1;
1495 alu.is_op3 = 1;
1496 if (i == 3)
1497 alu.last = 1;
1498 r = r600_bc_add_alu(ctx->bc, &alu);
1499 if (r)
1500 return r;
1501 }
1502 if (use_temp)
1503 return tgsi_helper_copy(ctx, inst);
1504 return 0;
1505 }
1506
1507 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1508 {
1509 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1510 struct r600_bc_alu_src r600_src[3];
1511 struct r600_bc_alu alu;
1512 uint32_t use_temp = 0;
1513 int i, r;
1514
1515 if (inst->Dst[0].Register.WriteMask != 0xf)
1516 use_temp = 1;
1517
1518 r = tgsi_split_constant(ctx, r600_src);
1519 if (r)
1520 return r;
1521
1522 for (i = 0; i < 4; i++) {
1523 memset(&alu, 0, sizeof(struct r600_bc_alu));
1524 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1525
1526 alu.src[0] = r600_src[0];
1527 switch (i) {
1528 case 0:
1529 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1530 break;
1531 case 1:
1532 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1533 break;
1534 case 2:
1535 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1536 break;
1537 case 3:
1538 alu.src[0].sel = V_SQ_ALU_SRC_0;
1539 alu.src[0].chan = i;
1540 }
1541
1542 alu.src[1] = r600_src[1];
1543 switch (i) {
1544 case 0:
1545 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1546 break;
1547 case 1:
1548 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1549 break;
1550 case 2:
1551 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1552 break;
1553 case 3:
1554 alu.src[1].sel = V_SQ_ALU_SRC_0;
1555 alu.src[1].chan = i;
1556 }
1557
1558 alu.dst.sel = ctx->temp_reg;
1559 alu.dst.chan = i;
1560 alu.dst.write = 1;
1561
1562 if (i == 3)
1563 alu.last = 1;
1564 r = r600_bc_add_alu(ctx->bc, &alu);
1565 if (r)
1566 return r;
1567 }
1568
1569 for (i = 0; i < 4; i++) {
1570 memset(&alu, 0, sizeof(struct r600_bc_alu));
1571 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1572
1573 alu.src[0] = r600_src[0];
1574 switch (i) {
1575 case 0:
1576 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1577 break;
1578 case 1:
1579 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1580 break;
1581 case 2:
1582 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1583 break;
1584 case 3:
1585 alu.src[0].sel = V_SQ_ALU_SRC_0;
1586 alu.src[0].chan = i;
1587 }
1588
1589 alu.src[1] = r600_src[1];
1590 switch (i) {
1591 case 0:
1592 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1593 break;
1594 case 1:
1595 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1596 break;
1597 case 2:
1598 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1599 break;
1600 case 3:
1601 alu.src[1].sel = V_SQ_ALU_SRC_0;
1602 alu.src[1].chan = i;
1603 }
1604
1605 alu.src[2].sel = ctx->temp_reg;
1606 alu.src[2].neg = 1;
1607 alu.src[2].chan = i;
1608
1609 if (use_temp)
1610 alu.dst.sel = ctx->temp_reg;
1611 else {
1612 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1613 if (r)
1614 return r;
1615 }
1616 alu.dst.chan = i;
1617 alu.dst.write = 1;
1618 alu.is_op3 = 1;
1619 if (i == 3)
1620 alu.last = 1;
1621 r = r600_bc_add_alu(ctx->bc, &alu);
1622 if (r)
1623 return r;
1624 }
1625 if (use_temp)
1626 return tgsi_helper_copy(ctx, inst);
1627 return 0;
1628 }
1629
1630
1631 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1632 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1633 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1634 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1635 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1636 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1637 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1638 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1639 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1640 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1641 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1642 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1643 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1644 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1645 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1646 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1647 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
1648 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1649 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1650 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1651 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1652 /* gap */
1653 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1654 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1655 /* gap */
1656 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1657 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1658 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
1659 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1660 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
1661 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1662 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1663 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
1664 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
1665 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
1666 /* gap */
1667 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1668 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1669 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1670 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1671 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
1672 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
1673 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
1674 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1675 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1676 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1677 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1678 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1679 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1680 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
1681 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1682 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
1683 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
1684 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
1685 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
1686 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1687 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1688 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1689 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1690 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1691 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1692 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1693 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1694 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1695 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1696 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1697 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1698 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1699 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1700 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
1701 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
1702 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1703 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
1704 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1705 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1706 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1707 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1708 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1709 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1710 /* gap */
1711 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1712 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1713 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1714 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1715 /* gap */
1716 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1717 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1718 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1719 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1720 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1721 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1722 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1723 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
1724 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1725 /* gap */
1726 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1727 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1728 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1729 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1730 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1731 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1732 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1733 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1734 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1735 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1736 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1737 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1738 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1739 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1740 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1741 /* gap */
1742 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1743 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1744 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1745 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1746 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1747 /* gap */
1748 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1749 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1750 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1751 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1752 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1753 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1754 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1755 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1756 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1757 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1758 /* gap */
1759 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1760 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1761 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1762 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1763 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1764 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1765 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1766 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1767 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1768 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1769 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1770 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1771 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1772 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1773 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1774 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1775 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1776 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1777 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1778 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1779 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1780 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1781 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1782 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1783 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1784 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1785 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1786 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1787 };