r600g: partialy fix texturing from depth buffer + initial support for untiling
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 };
52
53 struct r600_shader_tgsi_instruction {
54 unsigned tgsi_opcode;
55 unsigned is_op3;
56 unsigned r600_opcode;
57 int (*process)(struct r600_shader_ctx *ctx);
58 };
59
60 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64 {
65 struct r600_context *rctx = r600_context(ctx);
66 const struct util_format_description *desc;
67 enum pipe_format resource_format[160];
68 unsigned i, nresources = 0;
69 struct r600_bc *bc = &shader->bc;
70 struct r600_bc_cf *cf;
71 struct r600_bc_vtx *vtx;
72
73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74 return 0;
75 for (i = 0; i < rctx->vertex_elements->count; i++) {
76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77 }
78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79 switch (cf->inst) {
80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83 desc = util_format_description(resource_format[vtx->buffer_id]);
84 if (desc == NULL) {
85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86 return -EINVAL;
87 }
88 vtx->dst_sel_x = desc->swizzle[0];
89 vtx->dst_sel_y = desc->swizzle[1];
90 vtx->dst_sel_z = desc->swizzle[2];
91 vtx->dst_sel_w = desc->swizzle[3];
92 }
93 break;
94 default:
95 break;
96 }
97 }
98 return r600_bc_build(&shader->bc);
99 }
100
101 int r600_pipe_shader_create(struct pipe_context *ctx,
102 struct r600_context_state *rpshader,
103 const struct tgsi_token *tokens)
104 {
105 struct r600_screen *rscreen = r600_screen(ctx->screen);
106 int r;
107
108 //fprintf(stderr, "--------------------------------------------------------------\n");
109 //tgsi_dump(tokens, 0);
110 if (rpshader == NULL)
111 return -ENOMEM;
112 rpshader->shader.family = radeon_get_family(rscreen->rw);
113 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114 if (r) {
115 R600_ERR("translation from TGSI failed !\n");
116 return r;
117 }
118 r = r600_bc_build(&rpshader->shader.bc);
119 if (r) {
120 R600_ERR("building bytecode failed !\n");
121 return r;
122 }
123 //fprintf(stderr, "______________________________________________________________\n");
124 return 0;
125 }
126
127 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128 {
129 struct r600_screen *rscreen = r600_screen(ctx->screen);
130 struct r600_shader *rshader = &rpshader->shader;
131 struct radeon_state *state;
132 unsigned i, tmp;
133
134 rpshader->rstate = radeon_state_decref(rpshader->rstate);
135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136 if (state == NULL)
137 return -ENOMEM;
138 for (i = 0; i < 10; i++) {
139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140 }
141 /* so far never got proper semantic id from tgsi */
142 for (i = 0; i < 32; i++) {
143 tmp = i << ((i & 3) * 8);
144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145 }
146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148 rpshader->rstate = state;
149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151 rpshader->rstate->nbo = 2;
152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153 return radeon_state_pm4(state);
154 }
155
156 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
157 {
158 const struct pipe_rasterizer_state *rasterizer;
159 struct r600_screen *rscreen = r600_screen(ctx->screen);
160 struct r600_shader *rshader = &rpshader->shader;
161 struct r600_context *rctx = r600_context(ctx);
162 struct radeon_state *state;
163 unsigned i, tmp, exports_ps, num_cout;
164
165 rasterizer = &rctx->rasterizer->state.rasterizer;
166 rpshader->rstate = radeon_state_decref(rpshader->rstate);
167 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
168 if (state == NULL)
169 return -ENOMEM;
170 for (i = 0; i < rshader->ninput; i++) {
171 tmp = S_028644_SEMANTIC(i);
172 tmp |= S_028644_SEL_CENTROID(1);
173 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
174 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
175 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
176 }
177 if (rasterizer->sprite_coord_enable & (1 << i)) {
178 tmp |= S_028644_PT_SPRITE_TEX(1);
179 }
180 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
181 }
182
183 exports_ps = 0;
184 num_cout = 0;
185 for (i = 0; i < rshader->noutput; i++) {
186 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
187 exports_ps |= 1;
188 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
189 exports_ps |= (1 << (num_cout+1));
190 num_cout++;
191 }
192 }
193 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
194 S_0286CC_PERSP_GRADIENT_ENA(1);
195 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
196 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
197 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
198 rpshader->rstate = state;
199 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
200 rpshader->rstate->nbo = 1;
201 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
202 return radeon_state_pm4(state);
203 }
204
205 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
206 {
207 struct r600_screen *rscreen = r600_screen(ctx->screen);
208 struct r600_context *rctx = r600_context(ctx);
209 struct r600_shader *rshader = &rpshader->shader;
210 int r;
211
212 /* copy new shader */
213 radeon_bo_decref(rscreen->rw, rpshader->bo);
214 rpshader->bo = NULL;
215 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
216 4096, NULL);
217 if (rpshader->bo == NULL) {
218 return -ENOMEM;
219 }
220 radeon_bo_map(rscreen->rw, rpshader->bo);
221 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
222 radeon_bo_unmap(rscreen->rw, rpshader->bo);
223 /* build state */
224 rshader->flat_shade = rctx->flat_shade;
225 switch (rshader->processor_type) {
226 case TGSI_PROCESSOR_VERTEX:
227 r = r600_pipe_shader_vs(ctx, rpshader);
228 break;
229 case TGSI_PROCESSOR_FRAGMENT:
230 r = r600_pipe_shader_ps(ctx, rpshader);
231 break;
232 default:
233 r = -EINVAL;
234 break;
235 }
236 return r;
237 }
238
239 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
240 {
241 struct r600_context *rctx = r600_context(ctx);
242 int r;
243
244 if (rpshader == NULL)
245 return -EINVAL;
246 /* there should be enough input */
247 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
248 R600_ERR("%d resources provided, expecting %d\n",
249 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
250 return -EINVAL;
251 }
252 r = r600_shader_update(ctx, &rpshader->shader);
253 if (r)
254 return r;
255 return r600_pipe_shader(ctx, rpshader);
256 }
257
258 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
259 {
260 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
261 int j;
262
263 if (i->Instruction.NumDstRegs > 1) {
264 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
265 return -EINVAL;
266 }
267 if (i->Instruction.Predicate) {
268 R600_ERR("predicate unsupported\n");
269 return -EINVAL;
270 }
271 if (i->Instruction.Label) {
272 R600_ERR("label unsupported\n");
273 return -EINVAL;
274 }
275 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
276 if (i->Src[j].Register.Indirect ||
277 i->Src[j].Register.Dimension ||
278 i->Src[j].Register.Absolute) {
279 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
280 return -EINVAL;
281 }
282 }
283 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
284 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
285 R600_ERR("unsupported dst (indirect|dimension)\n");
286 return -EINVAL;
287 }
288 }
289 return 0;
290 }
291
292 static int tgsi_declaration(struct r600_shader_ctx *ctx)
293 {
294 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
295 struct r600_bc_vtx vtx;
296 unsigned i;
297 int r;
298
299 switch (d->Declaration.File) {
300 case TGSI_FILE_INPUT:
301 i = ctx->shader->ninput++;
302 ctx->shader->input[i].name = d->Semantic.Name;
303 ctx->shader->input[i].sid = d->Semantic.Index;
304 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
305 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
306 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
307 /* turn input into fetch */
308 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
309 vtx.inst = 0;
310 vtx.fetch_type = 0;
311 vtx.buffer_id = i;
312 /* register containing the index into the buffer */
313 vtx.src_gpr = 0;
314 vtx.src_sel_x = 0;
315 vtx.mega_fetch_count = 0x1F;
316 vtx.dst_gpr = ctx->shader->input[i].gpr;
317 vtx.dst_sel_x = 0;
318 vtx.dst_sel_y = 1;
319 vtx.dst_sel_z = 2;
320 vtx.dst_sel_w = 3;
321 r = r600_bc_add_vtx(ctx->bc, &vtx);
322 if (r)
323 return r;
324 }
325 break;
326 case TGSI_FILE_OUTPUT:
327 i = ctx->shader->noutput++;
328 ctx->shader->output[i].name = d->Semantic.Name;
329 ctx->shader->output[i].sid = d->Semantic.Index;
330 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
331 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
332 break;
333 case TGSI_FILE_CONSTANT:
334 case TGSI_FILE_TEMPORARY:
335 case TGSI_FILE_SAMPLER:
336 break;
337 default:
338 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
339 return -EINVAL;
340 }
341 return 0;
342 }
343
344 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
345 {
346 struct tgsi_full_immediate *immediate;
347 struct r600_shader_ctx ctx;
348 struct r600_bc_output output[32];
349 unsigned output_done, noutput;
350 unsigned opcode;
351 int i, r = 0, pos0;
352
353 ctx.bc = &shader->bc;
354 ctx.shader = shader;
355 r = r600_bc_init(ctx.bc, shader->family);
356 if (r)
357 return r;
358 ctx.tokens = tokens;
359 tgsi_scan_shader(tokens, &ctx.info);
360 tgsi_parse_init(&ctx.parse, tokens);
361 ctx.type = ctx.parse.FullHeader.Processor.Processor;
362 shader->processor_type = ctx.type;
363
364 /* register allocations */
365 /* Values [0,127] correspond to GPR[0..127].
366 * Values [128,159] correspond to constant buffer bank 0
367 * Values [160,191] correspond to constant buffer bank 1
368 * Values [256,511] correspond to cfile constants c[0..255].
369 * Other special values are shown in the list below.
370 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
371 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
372 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
373 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
374 * 248 SQ_ALU_SRC_0: special constant 0.0.
375 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
376 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
377 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
378 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
379 * 253 SQ_ALU_SRC_LITERAL: literal constant.
380 * 254 SQ_ALU_SRC_PV: previous vector result.
381 * 255 SQ_ALU_SRC_PS: previous scalar result.
382 */
383 for (i = 0; i < TGSI_FILE_COUNT; i++) {
384 ctx.file_offset[i] = 0;
385 }
386 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
387 ctx.file_offset[TGSI_FILE_INPUT] = 1;
388 }
389 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
390 ctx.info.file_count[TGSI_FILE_INPUT];
391 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
392 ctx.info.file_count[TGSI_FILE_OUTPUT];
393 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
394 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
395 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
396 ctx.info.file_count[TGSI_FILE_TEMPORARY];
397
398 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
399 tgsi_parse_token(&ctx.parse);
400 switch (ctx.parse.FullToken.Token.Type) {
401 case TGSI_TOKEN_TYPE_IMMEDIATE:
402 immediate = &ctx.parse.FullToken.FullImmediate;
403 ctx.value[0] = immediate->u[0].Uint;
404 ctx.value[1] = immediate->u[1].Uint;
405 ctx.value[2] = immediate->u[2].Uint;
406 ctx.value[3] = immediate->u[3].Uint;
407 break;
408 case TGSI_TOKEN_TYPE_DECLARATION:
409 r = tgsi_declaration(&ctx);
410 if (r)
411 goto out_err;
412 break;
413 case TGSI_TOKEN_TYPE_INSTRUCTION:
414 r = tgsi_is_supported(&ctx);
415 if (r)
416 goto out_err;
417 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
418 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
419 r = ctx.inst_info->process(&ctx);
420 if (r)
421 goto out_err;
422 r = r600_bc_add_literal(ctx.bc, ctx.value);
423 if (r)
424 goto out_err;
425 break;
426 default:
427 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
428 r = -EINVAL;
429 goto out_err;
430 }
431 }
432 /* export output */
433 noutput = shader->noutput;
434 for (i = 0, pos0 = 0; i < noutput; i++) {
435 memset(&output[i], 0, sizeof(struct r600_bc_output));
436 output[i].gpr = shader->output[i].gpr;
437 output[i].elem_size = 3;
438 output[i].swizzle_x = 0;
439 output[i].swizzle_y = 1;
440 output[i].swizzle_z = 2;
441 output[i].swizzle_w = 3;
442 output[i].barrier = 1;
443 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
444 output[i].array_base = i - pos0;
445 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
446 switch (ctx.type) {
447 case TGSI_PROCESSOR_VERTEX:
448 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
449 output[i].array_base = 60;
450 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
451 /* position doesn't count in array_base */
452 pos0++;
453 }
454 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
455 output[i].array_base = 61;
456 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
457 /* position doesn't count in array_base */
458 pos0++;
459 }
460 break;
461 case TGSI_PROCESSOR_FRAGMENT:
462 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
463 output[i].array_base = shader->output[i].sid;
464 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
465 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
466 output[i].array_base = 61;
467 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
468 } else {
469 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
470 r = -EINVAL;
471 goto out_err;
472 }
473 break;
474 default:
475 R600_ERR("unsupported processor type %d\n", ctx.type);
476 r = -EINVAL;
477 goto out_err;
478 }
479 }
480 /* add fake param output for vertex shader if no param is exported */
481 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
482 for (i = 0, pos0 = 0; i < noutput; i++) {
483 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
484 pos0 = 1;
485 break;
486 }
487 }
488 if (!pos0) {
489 memset(&output[i], 0, sizeof(struct r600_bc_output));
490 output[i].gpr = 0;
491 output[i].elem_size = 3;
492 output[i].swizzle_x = 0;
493 output[i].swizzle_y = 1;
494 output[i].swizzle_z = 2;
495 output[i].swizzle_w = 3;
496 output[i].barrier = 1;
497 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
498 output[i].array_base = 0;
499 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
500 noutput++;
501 }
502 }
503 /* add fake pixel export */
504 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
505 memset(&output[0], 0, sizeof(struct r600_bc_output));
506 output[0].gpr = 0;
507 output[0].elem_size = 3;
508 output[0].swizzle_x = 7;
509 output[0].swizzle_y = 7;
510 output[0].swizzle_z = 7;
511 output[0].swizzle_w = 7;
512 output[0].barrier = 1;
513 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
514 output[0].array_base = 0;
515 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
516 noutput++;
517 }
518 /* set export done on last export of each type */
519 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
520 if (i == (noutput - 1)) {
521 output[i].end_of_program = 1;
522 }
523 if (!(output_done & (1 << output[i].type))) {
524 output_done |= (1 << output[i].type);
525 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
526 }
527 }
528 /* add output to bytecode */
529 for (i = 0; i < noutput; i++) {
530 r = r600_bc_add_output(ctx.bc, &output[i]);
531 if (r)
532 goto out_err;
533 }
534 tgsi_parse_free(&ctx.parse);
535 return 0;
536 out_err:
537 tgsi_parse_free(&ctx.parse);
538 return r;
539 }
540
541 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
542 {
543 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
544 return -EINVAL;
545 }
546
547 static int tgsi_end(struct r600_shader_ctx *ctx)
548 {
549 return 0;
550 }
551
552 static int tgsi_src(struct r600_shader_ctx *ctx,
553 const struct tgsi_full_src_register *tgsi_src,
554 struct r600_bc_alu_src *r600_src)
555 {
556 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
557 r600_src->sel = tgsi_src->Register.Index;
558 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
559 r600_src->sel = 0;
560 }
561 r600_src->neg = tgsi_src->Register.Negate;
562 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
563 return 0;
564 }
565
566 static int tgsi_dst(struct r600_shader_ctx *ctx,
567 const struct tgsi_full_dst_register *tgsi_dst,
568 unsigned swizzle,
569 struct r600_bc_alu_dst *r600_dst)
570 {
571 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
572
573 r600_dst->sel = tgsi_dst->Register.Index;
574 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
575 r600_dst->chan = swizzle;
576 r600_dst->write = 1;
577 if (inst->Instruction.Saturate) {
578 r600_dst->clamp = 1;
579 }
580 return 0;
581 }
582
583 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
584 {
585 switch (swizzle) {
586 case 0:
587 return tgsi_src->Register.SwizzleX;
588 case 1:
589 return tgsi_src->Register.SwizzleY;
590 case 2:
591 return tgsi_src->Register.SwizzleZ;
592 case 3:
593 return tgsi_src->Register.SwizzleW;
594 default:
595 return 0;
596 }
597 }
598
599 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
600 {
601 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
602 struct r600_bc_alu alu;
603 int i, j, k, nconst, r;
604
605 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
606 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
607 nconst++;
608 }
609 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
610 if (r) {
611 return r;
612 }
613 }
614 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
615 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
616 for (k = 0; k < 4; k++) {
617 memset(&alu, 0, sizeof(struct r600_bc_alu));
618 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
619 alu.src[0].sel = r600_src[0].sel;
620 alu.src[0].chan = k;
621 alu.dst.sel = ctx->temp_reg + j;
622 alu.dst.chan = k;
623 alu.dst.write = 1;
624 if (k == 3)
625 alu.last = 1;
626 r = r600_bc_add_alu(ctx->bc, &alu);
627 if (r)
628 return r;
629 }
630 r600_src[0].sel = ctx->temp_reg + j;
631 j--;
632 }
633 }
634 return 0;
635 }
636
637 static int tgsi_op2(struct r600_shader_ctx *ctx)
638 {
639 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
640 struct r600_bc_alu_src r600_src[3];
641 struct r600_bc_alu alu;
642 int i, j, r;
643
644 r = tgsi_split_constant(ctx, r600_src);
645 if (r)
646 return r;
647 for (i = 0; i < 4; i++) {
648 memset(&alu, 0, sizeof(struct r600_bc_alu));
649 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
650 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
651 alu.dst.chan = i;
652 } else {
653 alu.inst = ctx->inst_info->r600_opcode;
654 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
655 alu.src[j] = r600_src[j];
656 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
657 }
658 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
659 if (r)
660 return r;
661 }
662 /* handle some special cases */
663 switch (ctx->inst_info->tgsi_opcode) {
664 case TGSI_OPCODE_SUB:
665 alu.src[1].neg = 1;
666 break;
667 case TGSI_OPCODE_ABS:
668 alu.src[0].abs = 1;
669 break;
670 default:
671 break;
672 }
673 if (i == 3) {
674 alu.last = 1;
675 }
676 r = r600_bc_add_alu(ctx->bc, &alu);
677 if (r)
678 return r;
679 }
680 return 0;
681 }
682
683 /*
684 * r600 - trunc to -PI..PI range
685 * r700 - normalize by dividing by 2PI
686 * see fdo bug 27901
687 */
688 static int tgsi_trig(struct r600_shader_ctx *ctx)
689 {
690 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
691 struct r600_bc_alu_src r600_src[3];
692 struct r600_bc_alu alu;
693 int i, r;
694 uint32_t lit_vals[4];
695
696 memset(lit_vals, 0, 4*4);
697 r = tgsi_split_constant(ctx, r600_src);
698 if (r)
699 return r;
700 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
701 lit_vals[1] = fui(0.5f);
702
703 memset(&alu, 0, sizeof(struct r600_bc_alu));
704 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
705 alu.is_op3 = 1;
706
707 alu.dst.chan = 0;
708 alu.dst.sel = ctx->temp_reg;
709 alu.dst.write = 1;
710
711 alu.src[0] = r600_src[0];
712 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
713
714 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
715 alu.src[1].chan = 0;
716 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
717 alu.src[2].chan = 1;
718 alu.last = 1;
719 r = r600_bc_add_alu(ctx->bc, &alu);
720 if (r)
721 return r;
722 r = r600_bc_add_literal(ctx->bc, lit_vals);
723 if (r)
724 return r;
725
726 memset(&alu, 0, sizeof(struct r600_bc_alu));
727 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
728
729 alu.dst.chan = 0;
730 alu.dst.sel = ctx->temp_reg;
731 alu.dst.write = 1;
732
733 alu.src[0].sel = ctx->temp_reg;
734 alu.src[0].chan = 0;
735 alu.last = 1;
736 r = r600_bc_add_alu(ctx->bc, &alu);
737 if (r)
738 return r;
739
740 if (ctx->bc->chiprev == 0) {
741 lit_vals[0] = fui(3.1415926535897f * 2.0f);
742 lit_vals[1] = fui(-3.1415926535897f);
743 } else {
744 lit_vals[0] = fui(1.0f);
745 lit_vals[1] = fui(-0.5f);
746 }
747
748 memset(&alu, 0, sizeof(struct r600_bc_alu));
749 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
750 alu.is_op3 = 1;
751
752 alu.dst.chan = 0;
753 alu.dst.sel = ctx->temp_reg;
754 alu.dst.write = 1;
755
756 alu.src[0].sel = ctx->temp_reg;
757 alu.src[0].chan = 0;
758
759 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
760 alu.src[1].chan = 0;
761 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
762 alu.src[2].chan = 1;
763 alu.last = 1;
764 r = r600_bc_add_alu(ctx->bc, &alu);
765 if (r)
766 return r;
767 r = r600_bc_add_literal(ctx->bc, lit_vals);
768 if (r)
769 return r;
770
771 memset(&alu, 0, sizeof(struct r600_bc_alu));
772 alu.inst = ctx->inst_info->r600_opcode;
773 alu.dst.chan = 0;
774 alu.dst.sel = ctx->temp_reg;
775 alu.dst.write = 1;
776
777 alu.src[0].sel = ctx->temp_reg;
778 alu.src[0].chan = 0;
779 alu.last = 1;
780 r = r600_bc_add_alu(ctx->bc, &alu);
781 if (r)
782 return r;
783
784 /* replicate result */
785 for (i = 0; i < 4; i++) {
786 memset(&alu, 0, sizeof(struct r600_bc_alu));
787 alu.src[0].sel = ctx->temp_reg;
788 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
789 alu.dst.chan = i;
790 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
791 if (r)
792 return r;
793 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
794 if (i == 3)
795 alu.last = 1;
796 r = r600_bc_add_alu(ctx->bc, &alu);
797 if (r)
798 return r;
799 }
800 return 0;
801 }
802
803 static int tgsi_kill(struct r600_shader_ctx *ctx)
804 {
805 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
806 struct r600_bc_alu alu;
807 int i, r;
808
809 for (i = 0; i < 4; i++) {
810 memset(&alu, 0, sizeof(struct r600_bc_alu));
811 alu.inst = ctx->inst_info->r600_opcode;
812 alu.dst.chan = i;
813 alu.src[0].sel = V_SQ_ALU_SRC_0;
814 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
815 if (r)
816 return r;
817 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
818 if (i == 3) {
819 alu.last = 1;
820 }
821 r = r600_bc_add_alu(ctx->bc, &alu);
822 if (r)
823 return r;
824 }
825 return 0;
826 }
827
828 static int tgsi_slt(struct r600_shader_ctx *ctx)
829 {
830 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
831 struct r600_bc_alu_src r600_src[3];
832 struct r600_bc_alu alu;
833 int i, r;
834
835 r = tgsi_split_constant(ctx, r600_src);
836 if (r)
837 return r;
838 for (i = 0; i < 4; i++) {
839 memset(&alu, 0, sizeof(struct r600_bc_alu));
840 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
841 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
842 alu.dst.chan = i;
843 } else {
844 alu.inst = ctx->inst_info->r600_opcode;
845 alu.src[1] = r600_src[0];
846 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
847 alu.src[0] = r600_src[1];
848 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
849 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
850 if (r)
851 return r;
852 }
853 if (i == 3) {
854 alu.last = 1;
855 }
856 r = r600_bc_add_alu(ctx->bc, &alu);
857 if (r)
858 return r;
859 }
860 return 0;
861 }
862
863 static int tgsi_lit(struct r600_shader_ctx *ctx)
864 {
865 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
866 struct r600_bc_alu alu;
867 int r;
868
869 /* dst.x, <- 1.0 */
870 memset(&alu, 0, sizeof(struct r600_bc_alu));
871 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
872 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
873 alu.src[0].chan = 0;
874 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
875 if (r)
876 return r;
877 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
878 r = r600_bc_add_alu(ctx->bc, &alu);
879 if (r)
880 return r;
881
882 /* dst.y = max(src.x, 0.0) */
883 memset(&alu, 0, sizeof(struct r600_bc_alu));
884 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
885 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
886 if (r)
887 return r;
888 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
889 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
890 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
891 if (r)
892 return r;
893 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
894 r = r600_bc_add_alu(ctx->bc, &alu);
895 if (r)
896 return r;
897
898 /* dst.z = NOP - fill Z slot */
899 memset(&alu, 0, sizeof(struct r600_bc_alu));
900 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
901 alu.dst.chan = 2;
902 r = r600_bc_add_alu(ctx->bc, &alu);
903 if (r)
904 return r;
905
906 /* dst.w, <- 1.0 */
907 memset(&alu, 0, sizeof(struct r600_bc_alu));
908 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
909 alu.src[0].sel = V_SQ_ALU_SRC_1;
910 alu.src[0].chan = 0;
911 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
912 if (r)
913 return r;
914 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
915 alu.last = 1;
916 r = r600_bc_add_alu(ctx->bc, &alu);
917 if (r)
918 return r;
919
920 if (inst->Dst[0].Register.WriteMask & (1 << 2))
921 {
922 int chan;
923 int sel;
924
925 /* dst.z = log(src.y) */
926 memset(&alu, 0, sizeof(struct r600_bc_alu));
927 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
928 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
929 if (r)
930 return r;
931 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
932 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
933 if (r)
934 return r;
935 alu.last = 1;
936 r = r600_bc_add_alu(ctx->bc, &alu);
937 if (r)
938 return r;
939
940 chan = alu.dst.chan;
941 sel = alu.dst.sel;
942
943 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
944 memset(&alu, 0, sizeof(struct r600_bc_alu));
945 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
946 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
947 if (r)
948 return r;
949 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
950 alu.src[1].sel = sel;
951 alu.src[1].chan = chan;
952 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
953 if (r)
954 return r;
955 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
956 alu.dst.sel = ctx->temp_reg;
957 alu.dst.chan = 0;
958 alu.dst.write = 1;
959 alu.is_op3 = 1;
960 alu.last = 1;
961 r = r600_bc_add_alu(ctx->bc, &alu);
962 if (r)
963 return r;
964
965 /* dst.z = exp(tmp.x) */
966 memset(&alu, 0, sizeof(struct r600_bc_alu));
967 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
968 alu.src[0].sel = ctx->temp_reg;
969 alu.src[0].chan = 0;
970 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
971 if (r)
972 return r;
973 alu.last = 1;
974 r = r600_bc_add_alu(ctx->bc, &alu);
975 if (r)
976 return r;
977 }
978 return 0;
979 }
980
981 static int tgsi_trans(struct r600_shader_ctx *ctx)
982 {
983 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
984 struct r600_bc_alu alu;
985 int i, j, r;
986
987 for (i = 0; i < 4; i++) {
988 memset(&alu, 0, sizeof(struct r600_bc_alu));
989 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
990 alu.inst = ctx->inst_info->r600_opcode;
991 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
992 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
993 if (r)
994 return r;
995 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
996 }
997 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
998 if (r)
999 return r;
1000 alu.last = 1;
1001 r = r600_bc_add_alu(ctx->bc, &alu);
1002 if (r)
1003 return r;
1004 }
1005 }
1006 return 0;
1007 }
1008
1009 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1010 {
1011 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1012 struct r600_bc_alu alu;
1013 int i, r;
1014
1015 for (i = 0; i < 4; i++) {
1016 memset(&alu, 0, sizeof(struct r600_bc_alu));
1017 alu.src[0].sel = ctx->temp_reg;
1018 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1019 alu.dst.chan = i;
1020 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1021 if (r)
1022 return r;
1023 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1024 if (i == 3)
1025 alu.last = 1;
1026 r = r600_bc_add_alu(ctx->bc, &alu);
1027 if (r)
1028 return r;
1029 }
1030 return 0;
1031 }
1032
1033 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1034 {
1035 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1036 struct r600_bc_alu alu;
1037 int i, r;
1038
1039 memset(&alu, 0, sizeof(struct r600_bc_alu));
1040 alu.inst = ctx->inst_info->r600_opcode;
1041 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1042 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1043 if (r)
1044 return r;
1045 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1046 }
1047 alu.dst.sel = ctx->temp_reg;
1048 alu.dst.write = 1;
1049 alu.last = 1;
1050 r = r600_bc_add_alu(ctx->bc, &alu);
1051 if (r)
1052 return r;
1053 /* replicate result */
1054 return tgsi_helper_tempx_replicate(ctx);
1055 }
1056
1057 static int tgsi_pow(struct r600_shader_ctx *ctx)
1058 {
1059 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1060 struct r600_bc_alu alu;
1061 int r;
1062
1063 /* LOG2(a) */
1064 memset(&alu, 0, sizeof(struct r600_bc_alu));
1065 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1066 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1067 if (r)
1068 return r;
1069 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1070 alu.dst.sel = ctx->temp_reg;
1071 alu.dst.write = 1;
1072 alu.last = 1;
1073 r = r600_bc_add_alu(ctx->bc, &alu);
1074 if (r)
1075 return r;
1076 /* b * LOG2(a) */
1077 memset(&alu, 0, sizeof(struct r600_bc_alu));
1078 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1079 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1080 if (r)
1081 return r;
1082 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1083 alu.src[1].sel = ctx->temp_reg;
1084 alu.dst.sel = ctx->temp_reg;
1085 alu.dst.write = 1;
1086 alu.last = 1;
1087 r = r600_bc_add_alu(ctx->bc, &alu);
1088 if (r)
1089 return r;
1090 /* POW(a,b) = EXP2(b * LOG2(a))*/
1091 memset(&alu, 0, sizeof(struct r600_bc_alu));
1092 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1093 alu.src[0].sel = ctx->temp_reg;
1094 alu.dst.sel = ctx->temp_reg;
1095 alu.dst.write = 1;
1096 alu.last = 1;
1097 r = r600_bc_add_alu(ctx->bc, &alu);
1098 if (r)
1099 return r;
1100 return tgsi_helper_tempx_replicate(ctx);
1101 }
1102
1103 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1104 {
1105 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1106 struct r600_bc_alu alu;
1107 struct r600_bc_alu_src r600_src[3];
1108 int i, r;
1109
1110 r = tgsi_split_constant(ctx, r600_src);
1111 if (r)
1112 return r;
1113
1114 /* tmp = (src > 0 ? 1 : src) */
1115 for (i = 0; i < 4; i++) {
1116 memset(&alu, 0, sizeof(struct r600_bc_alu));
1117 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1118 alu.is_op3 = 1;
1119 alu.dst.sel = ctx->temp_reg;
1120 alu.dst.write = 1;
1121
1122 alu.src[0] = r600_src[0];
1123 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1124
1125 alu.src[1].sel = V_SQ_ALU_SRC_1;
1126
1127 alu.src[2] = r600_src[0];
1128 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1129 if (i == 3)
1130 alu.last = 1;
1131 r = r600_bc_add_alu(ctx->bc, &alu);
1132 if (r)
1133 return r;
1134 }
1135
1136 /* dst = (-tmp > 0 ? -1 : tmp) */
1137 for (i = 0; i < 4; i++) {
1138 memset(&alu, 0, sizeof(struct r600_bc_alu));
1139 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1140 alu.is_op3 = 1;
1141 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1142 if (r)
1143 return r;
1144
1145 alu.src[0].sel = ctx->temp_reg;
1146 alu.src[0].neg = 1;
1147
1148 alu.src[1].sel = V_SQ_ALU_SRC_1;
1149 alu.src[1].neg = 1;
1150
1151 alu.src[2].sel = ctx->temp_reg;
1152
1153 alu.dst.write = 1;
1154 if (i == 3)
1155 alu.last = 1;
1156 r = r600_bc_add_alu(ctx->bc, &alu);
1157 if (r)
1158 return r;
1159 }
1160 return 0;
1161 }
1162
1163 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1164 {
1165 struct r600_bc_alu alu;
1166 int i, r;
1167
1168 r = r600_bc_add_literal(ctx->bc, ctx->value);
1169 if (r)
1170 return r;
1171 for (i = 0; i < 4; i++) {
1172 memset(&alu, 0, sizeof(struct r600_bc_alu));
1173 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1174 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1175 alu.dst.chan = i;
1176 } else {
1177 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1178 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1179 if (r)
1180 return r;
1181 alu.src[0].sel = ctx->temp_reg;
1182 alu.src[0].chan = i;
1183 }
1184 if (i == 3) {
1185 alu.last = 1;
1186 }
1187 r = r600_bc_add_alu(ctx->bc, &alu);
1188 if (r)
1189 return r;
1190 }
1191 return 0;
1192 }
1193
1194 static int tgsi_op3(struct r600_shader_ctx *ctx)
1195 {
1196 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1197 struct r600_bc_alu_src r600_src[3];
1198 struct r600_bc_alu alu;
1199 int i, j, r;
1200
1201 r = tgsi_split_constant(ctx, r600_src);
1202 if (r)
1203 return r;
1204 /* do it in 2 step as op3 doesn't support writemask */
1205 for (i = 0; i < 4; i++) {
1206 memset(&alu, 0, sizeof(struct r600_bc_alu));
1207 alu.inst = ctx->inst_info->r600_opcode;
1208 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1209 alu.src[j] = r600_src[j];
1210 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1211 }
1212 alu.dst.sel = ctx->temp_reg;
1213 alu.dst.chan = i;
1214 alu.dst.write = 1;
1215 alu.is_op3 = 1;
1216 if (i == 3) {
1217 alu.last = 1;
1218 }
1219 r = r600_bc_add_alu(ctx->bc, &alu);
1220 if (r)
1221 return r;
1222 }
1223 return tgsi_helper_copy(ctx, inst);
1224 }
1225
1226 static int tgsi_dp(struct r600_shader_ctx *ctx)
1227 {
1228 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1229 struct r600_bc_alu_src r600_src[3];
1230 struct r600_bc_alu alu;
1231 int i, j, r;
1232
1233 r = tgsi_split_constant(ctx, r600_src);
1234 if (r)
1235 return r;
1236 for (i = 0; i < 4; i++) {
1237 memset(&alu, 0, sizeof(struct r600_bc_alu));
1238 alu.inst = ctx->inst_info->r600_opcode;
1239 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1240 alu.src[j] = r600_src[j];
1241 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1242 }
1243 alu.dst.sel = ctx->temp_reg;
1244 alu.dst.chan = i;
1245 alu.dst.write = 1;
1246 /* handle some special cases */
1247 switch (ctx->inst_info->tgsi_opcode) {
1248 case TGSI_OPCODE_DP2:
1249 if (i > 1) {
1250 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1251 alu.src[0].chan = alu.src[1].chan = 0;
1252 }
1253 break;
1254 case TGSI_OPCODE_DP3:
1255 if (i > 2) {
1256 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1257 alu.src[0].chan = alu.src[1].chan = 0;
1258 }
1259 break;
1260 default:
1261 break;
1262 }
1263 if (i == 3) {
1264 alu.last = 1;
1265 }
1266 r = r600_bc_add_alu(ctx->bc, &alu);
1267 if (r)
1268 return r;
1269 }
1270 return tgsi_helper_copy(ctx, inst);
1271 }
1272
1273 static int tgsi_tex(struct r600_shader_ctx *ctx)
1274 {
1275 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1276 struct r600_bc_tex tex;
1277 struct r600_bc_alu alu;
1278 unsigned src_gpr;
1279 int r, i;
1280
1281 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1282
1283 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1284 /* Add perspective divide */
1285 memset(&alu, 0, sizeof(struct r600_bc_alu));
1286 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1287 alu.src[0].sel = src_gpr;
1288 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1289 alu.dst.sel = ctx->temp_reg;
1290 alu.dst.chan = 3;
1291 alu.last = 1;
1292 alu.dst.write = 1;
1293 r = r600_bc_add_alu(ctx->bc, &alu);
1294 if (r)
1295 return r;
1296
1297 for (i = 0; i < 3; i++) {
1298 memset(&alu, 0, sizeof(struct r600_bc_alu));
1299 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1300 alu.src[0].sel = ctx->temp_reg;
1301 alu.src[0].chan = 3;
1302 alu.src[1].sel = src_gpr;
1303 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1304 alu.dst.sel = ctx->temp_reg;
1305 alu.dst.chan = i;
1306 alu.dst.write = 1;
1307 r = r600_bc_add_alu(ctx->bc, &alu);
1308 if (r)
1309 return r;
1310 }
1311 memset(&alu, 0, sizeof(struct r600_bc_alu));
1312 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1313 alu.src[0].sel = V_SQ_ALU_SRC_1;
1314 alu.src[0].chan = 0;
1315 alu.dst.sel = ctx->temp_reg;
1316 alu.dst.chan = 3;
1317 alu.last = 1;
1318 alu.dst.write = 1;
1319 r = r600_bc_add_alu(ctx->bc, &alu);
1320 if (r)
1321 return r;
1322 src_gpr = ctx->temp_reg;
1323 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1324 for (i = 0; i < 4; i++) {
1325 memset(&alu, 0, sizeof(struct r600_bc_alu));
1326 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1327 alu.src[0].sel = src_gpr;
1328 alu.src[0].chan = i;
1329 alu.dst.sel = ctx->temp_reg;
1330 alu.dst.chan = i;
1331 if (i == 3)
1332 alu.last = 1;
1333 alu.dst.write = 1;
1334 r = r600_bc_add_alu(ctx->bc, &alu);
1335 if (r)
1336 return r;
1337 }
1338 src_gpr = ctx->temp_reg;
1339 }
1340
1341 memset(&tex, 0, sizeof(struct r600_bc_tex));
1342 tex.inst = ctx->inst_info->r600_opcode;
1343 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1344 tex.sampler_id = tex.resource_id;
1345 tex.src_gpr = src_gpr;
1346 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1347 tex.dst_sel_x = 0;
1348 tex.dst_sel_y = 1;
1349 tex.dst_sel_z = 2;
1350 tex.dst_sel_w = 3;
1351 tex.src_sel_x = 0;
1352 tex.src_sel_y = 1;
1353 tex.src_sel_z = 2;
1354 tex.src_sel_w = 3;
1355
1356 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1357 tex.coord_type_x = 1;
1358 tex.coord_type_y = 1;
1359 tex.coord_type_z = 1;
1360 tex.coord_type_w = 1;
1361 }
1362 return r600_bc_add_tex(ctx->bc, &tex);
1363 }
1364
1365 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1366 {
1367 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1368 struct r600_bc_alu_src r600_src[3];
1369 struct r600_bc_alu alu;
1370 unsigned i;
1371 int r;
1372
1373 r = tgsi_split_constant(ctx, r600_src);
1374 if (r)
1375 return r;
1376 /* 1 - src0 */
1377 for (i = 0; i < 4; i++) {
1378 memset(&alu, 0, sizeof(struct r600_bc_alu));
1379 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1380 alu.src[0].sel = V_SQ_ALU_SRC_1;
1381 alu.src[0].chan = 0;
1382 alu.src[1] = r600_src[0];
1383 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1384 alu.src[1].neg = 1;
1385 alu.dst.sel = ctx->temp_reg;
1386 alu.dst.chan = i;
1387 if (i == 3) {
1388 alu.last = 1;
1389 }
1390 alu.dst.write = 1;
1391 r = r600_bc_add_alu(ctx->bc, &alu);
1392 if (r)
1393 return r;
1394 }
1395 r = r600_bc_add_literal(ctx->bc, ctx->value);
1396 if (r)
1397 return r;
1398
1399 /* (1 - src0) * src2 */
1400 for (i = 0; i < 4; i++) {
1401 memset(&alu, 0, sizeof(struct r600_bc_alu));
1402 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1403 alu.src[0].sel = ctx->temp_reg;
1404 alu.src[0].chan = i;
1405 alu.src[1] = r600_src[2];
1406 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1407 alu.dst.sel = ctx->temp_reg;
1408 alu.dst.chan = i;
1409 if (i == 3) {
1410 alu.last = 1;
1411 }
1412 alu.dst.write = 1;
1413 r = r600_bc_add_alu(ctx->bc, &alu);
1414 if (r)
1415 return r;
1416 }
1417 r = r600_bc_add_literal(ctx->bc, ctx->value);
1418 if (r)
1419 return r;
1420
1421 /* src0 * src1 + (1 - src0) * src2 */
1422 for (i = 0; i < 4; i++) {
1423 memset(&alu, 0, sizeof(struct r600_bc_alu));
1424 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1425 alu.is_op3 = 1;
1426 alu.src[0] = r600_src[0];
1427 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1428 alu.src[1] = r600_src[1];
1429 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1430 alu.src[2].sel = ctx->temp_reg;
1431 alu.src[2].chan = i;
1432 alu.dst.sel = ctx->temp_reg;
1433 alu.dst.chan = i;
1434 if (i == 3) {
1435 alu.last = 1;
1436 }
1437 r = r600_bc_add_alu(ctx->bc, &alu);
1438 if (r)
1439 return r;
1440 }
1441 return tgsi_helper_copy(ctx, inst);
1442 }
1443
1444 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1445 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1446 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1447 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1448 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1449 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1450 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1451 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1452 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1453 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1454 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1455 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1456 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1457 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1458 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1459 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1460 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
1461 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1462 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1463 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1464 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1465 /* gap */
1466 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1467 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1468 /* gap */
1469 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1470 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1471 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
1472 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1473 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
1474 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1475 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1476 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
1477 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
1478 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1479 /* gap */
1480 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1481 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1482 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1483 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1484 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
1485 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
1486 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
1487 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
1488 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1489 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1490 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1491 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1492 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1493 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
1494 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1495 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
1496 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
1497 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
1498 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
1499 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1500 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1501 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1502 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1503 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1504 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1505 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1506 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1507 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1508 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1509 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1510 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1511 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1512 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1513 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
1514 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1515 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1516 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
1517 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1518 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1519 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1520 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1521 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1522 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1523 /* gap */
1524 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1525 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1526 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1527 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1528 /* gap */
1529 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1530 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1531 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1532 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1533 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1534 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1535 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1536 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
1537 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1538 /* gap */
1539 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1540 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1541 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1542 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1543 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1544 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1545 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1546 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1547 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1548 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1549 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1550 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1551 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1552 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1553 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1554 /* gap */
1555 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1556 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1557 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1558 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1559 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1560 /* gap */
1561 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1562 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1563 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1564 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1565 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1566 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1567 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1568 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1569 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
1570 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
1571 /* gap */
1572 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1573 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1574 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1575 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1576 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1577 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1578 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1579 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1580 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1581 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1582 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1583 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1584 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1585 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1586 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1587 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1588 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1589 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1590 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1591 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1592 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1593 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1594 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1595 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1596 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1597 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1598 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1599 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1600 };