r600g: add KILP support
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 u32 *literals;
52 u32 nliterals;
53 };
54
55 struct r600_shader_tgsi_instruction {
56 unsigned tgsi_opcode;
57 unsigned is_op3;
58 unsigned r600_opcode;
59 int (*process)(struct r600_shader_ctx *ctx);
60 };
61
62 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
63 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
64
65 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
66 {
67 struct r600_context *rctx = r600_context(ctx);
68 const struct util_format_description *desc;
69 enum pipe_format resource_format[160];
70 unsigned i, nresources = 0;
71 struct r600_bc *bc = &shader->bc;
72 struct r600_bc_cf *cf;
73 struct r600_bc_vtx *vtx;
74
75 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
76 return 0;
77 for (i = 0; i < rctx->vertex_elements->count; i++) {
78 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
79 }
80 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
81 switch (cf->inst) {
82 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
84 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
85 desc = util_format_description(resource_format[vtx->buffer_id]);
86 if (desc == NULL) {
87 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
88 return -EINVAL;
89 }
90 vtx->dst_sel_x = desc->swizzle[0];
91 vtx->dst_sel_y = desc->swizzle[1];
92 vtx->dst_sel_z = desc->swizzle[2];
93 vtx->dst_sel_w = desc->swizzle[3];
94 }
95 break;
96 default:
97 break;
98 }
99 }
100 return r600_bc_build(&shader->bc);
101 }
102
103 int r600_pipe_shader_create(struct pipe_context *ctx,
104 struct r600_context_state *rpshader,
105 const struct tgsi_token *tokens)
106 {
107 struct r600_screen *rscreen = r600_screen(ctx->screen);
108 int r;
109
110 //fprintf(stderr, "--------------------------------------------------------------\n");
111 //tgsi_dump(tokens, 0);
112 if (rpshader == NULL)
113 return -ENOMEM;
114 rpshader->shader.family = radeon_get_family(rscreen->rw);
115 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
116 if (r) {
117 R600_ERR("translation from TGSI failed !\n");
118 return r;
119 }
120 r = r600_bc_build(&rpshader->shader.bc);
121 if (r) {
122 R600_ERR("building bytecode failed !\n");
123 return r;
124 }
125 //fprintf(stderr, "______________________________________________________________\n");
126 return 0;
127 }
128
129 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
130 {
131 struct r600_screen *rscreen = r600_screen(ctx->screen);
132 struct r600_shader *rshader = &rpshader->shader;
133 struct radeon_state *state;
134 unsigned i, tmp;
135
136 rpshader->rstate = radeon_state_decref(rpshader->rstate);
137 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
138 if (state == NULL)
139 return -ENOMEM;
140 for (i = 0; i < 10; i++) {
141 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
142 }
143 /* so far never got proper semantic id from tgsi */
144 for (i = 0; i < 32; i++) {
145 tmp = i << ((i & 3) * 8);
146 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
147 }
148 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
149 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
150 S_028868_STACK_SIZE(rshader->bc.nstack);
151 rpshader->rstate = state;
152 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
153 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
154 rpshader->rstate->nbo = 2;
155 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
156 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
157 return radeon_state_pm4(state);
158 }
159
160 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
161 {
162 const struct pipe_rasterizer_state *rasterizer;
163 struct r600_screen *rscreen = r600_screen(ctx->screen);
164 struct r600_shader *rshader = &rpshader->shader;
165 struct r600_context *rctx = r600_context(ctx);
166 struct radeon_state *state;
167 unsigned i, tmp, exports_ps, num_cout;
168
169 rasterizer = &rctx->rasterizer->state.rasterizer;
170 rpshader->rstate = radeon_state_decref(rpshader->rstate);
171 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
172 if (state == NULL)
173 return -ENOMEM;
174 for (i = 0; i < rshader->ninput; i++) {
175 tmp = S_028644_SEMANTIC(i);
176 tmp |= S_028644_SEL_CENTROID(1);
177 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
178 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
179 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
180 }
181 if (rasterizer->sprite_coord_enable & (1 << i)) {
182 tmp |= S_028644_PT_SPRITE_TEX(1);
183 }
184 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
185 }
186
187 exports_ps = 0;
188 num_cout = 0;
189 for (i = 0; i < rshader->noutput; i++) {
190 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
191 exports_ps |= 1;
192 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
193 exports_ps |= (1 << (num_cout+1));
194 num_cout++;
195 }
196 }
197 if (!exports_ps) {
198 /* always at least export 1 component per pixel */
199 exports_ps = 2;
200 }
201 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
202 S_0286CC_PERSP_GRADIENT_ENA(1);
203 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
204 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
205 S_028868_STACK_SIZE(rshader->bc.nstack);
206 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
207 rpshader->rstate = state;
208 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
209 rpshader->rstate->nbo = 1;
210 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
211 return radeon_state_pm4(state);
212 }
213
214 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
215 {
216 struct r600_screen *rscreen = r600_screen(ctx->screen);
217 struct r600_context *rctx = r600_context(ctx);
218 struct r600_shader *rshader = &rpshader->shader;
219 int r;
220
221 /* copy new shader */
222 radeon_bo_decref(rscreen->rw, rpshader->bo);
223 rpshader->bo = NULL;
224 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
225 4096, NULL);
226 if (rpshader->bo == NULL) {
227 return -ENOMEM;
228 }
229 radeon_bo_map(rscreen->rw, rpshader->bo);
230 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
231 radeon_bo_unmap(rscreen->rw, rpshader->bo);
232 /* build state */
233 rshader->flat_shade = rctx->flat_shade;
234 switch (rshader->processor_type) {
235 case TGSI_PROCESSOR_VERTEX:
236 r = r600_pipe_shader_vs(ctx, rpshader);
237 break;
238 case TGSI_PROCESSOR_FRAGMENT:
239 r = r600_pipe_shader_ps(ctx, rpshader);
240 break;
241 default:
242 r = -EINVAL;
243 break;
244 }
245 return r;
246 }
247
248 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
249 {
250 struct r600_context *rctx = r600_context(ctx);
251 int r;
252
253 if (rpshader == NULL)
254 return -EINVAL;
255 /* there should be enough input */
256 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
257 R600_ERR("%d resources provided, expecting %d\n",
258 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
259 return -EINVAL;
260 }
261 r = r600_shader_update(ctx, &rpshader->shader);
262 if (r)
263 return r;
264 return r600_pipe_shader(ctx, rpshader);
265 }
266
267 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
268 {
269 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
270 int j;
271
272 if (i->Instruction.NumDstRegs > 1) {
273 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
274 return -EINVAL;
275 }
276 if (i->Instruction.Predicate) {
277 R600_ERR("predicate unsupported\n");
278 return -EINVAL;
279 }
280 #if 0
281 if (i->Instruction.Label) {
282 R600_ERR("label unsupported\n");
283 return -EINVAL;
284 }
285 #endif
286 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
287 if (i->Src[j].Register.Indirect ||
288 i->Src[j].Register.Dimension ||
289 i->Src[j].Register.Absolute) {
290 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
291 return -EINVAL;
292 }
293 }
294 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
295 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
296 R600_ERR("unsupported dst (indirect|dimension)\n");
297 return -EINVAL;
298 }
299 }
300 return 0;
301 }
302
303 static int tgsi_declaration(struct r600_shader_ctx *ctx)
304 {
305 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
306 struct r600_bc_vtx vtx;
307 unsigned i;
308 int r;
309
310 switch (d->Declaration.File) {
311 case TGSI_FILE_INPUT:
312 i = ctx->shader->ninput++;
313 ctx->shader->input[i].name = d->Semantic.Name;
314 ctx->shader->input[i].sid = d->Semantic.Index;
315 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
316 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
317 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
318 /* turn input into fetch */
319 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
320 vtx.inst = 0;
321 vtx.fetch_type = 0;
322 vtx.buffer_id = i;
323 /* register containing the index into the buffer */
324 vtx.src_gpr = 0;
325 vtx.src_sel_x = 0;
326 vtx.mega_fetch_count = 0x1F;
327 vtx.dst_gpr = ctx->shader->input[i].gpr;
328 vtx.dst_sel_x = 0;
329 vtx.dst_sel_y = 1;
330 vtx.dst_sel_z = 2;
331 vtx.dst_sel_w = 3;
332 r = r600_bc_add_vtx(ctx->bc, &vtx);
333 if (r)
334 return r;
335 }
336 break;
337 case TGSI_FILE_OUTPUT:
338 i = ctx->shader->noutput++;
339 ctx->shader->output[i].name = d->Semantic.Name;
340 ctx->shader->output[i].sid = d->Semantic.Index;
341 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
342 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
343 break;
344 case TGSI_FILE_CONSTANT:
345 case TGSI_FILE_TEMPORARY:
346 case TGSI_FILE_SAMPLER:
347 break;
348 default:
349 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
350 return -EINVAL;
351 }
352 return 0;
353 }
354
355 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
356 {
357 struct tgsi_full_immediate *immediate;
358 struct r600_shader_ctx ctx;
359 struct r600_bc_output output[32];
360 unsigned output_done, noutput;
361 unsigned opcode;
362 int i, r = 0, pos0;
363
364 ctx.bc = &shader->bc;
365 ctx.shader = shader;
366 r = r600_bc_init(ctx.bc, shader->family);
367 if (r)
368 return r;
369 ctx.tokens = tokens;
370 tgsi_scan_shader(tokens, &ctx.info);
371 tgsi_parse_init(&ctx.parse, tokens);
372 ctx.type = ctx.parse.FullHeader.Processor.Processor;
373 shader->processor_type = ctx.type;
374
375 /* register allocations */
376 /* Values [0,127] correspond to GPR[0..127].
377 * Values [128,159] correspond to constant buffer bank 0
378 * Values [160,191] correspond to constant buffer bank 1
379 * Values [256,511] correspond to cfile constants c[0..255].
380 * Other special values are shown in the list below.
381 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
382 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
383 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
384 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
385 * 248 SQ_ALU_SRC_0: special constant 0.0.
386 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
387 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
388 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
389 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
390 * 253 SQ_ALU_SRC_LITERAL: literal constant.
391 * 254 SQ_ALU_SRC_PV: previous vector result.
392 * 255 SQ_ALU_SRC_PS: previous scalar result.
393 */
394 for (i = 0; i < TGSI_FILE_COUNT; i++) {
395 ctx.file_offset[i] = 0;
396 }
397 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
398 ctx.file_offset[TGSI_FILE_INPUT] = 1;
399 }
400 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
401 ctx.info.file_count[TGSI_FILE_INPUT];
402 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
403 ctx.info.file_count[TGSI_FILE_OUTPUT];
404 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
405 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
406 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
407 ctx.info.file_count[TGSI_FILE_TEMPORARY];
408
409 ctx.nliterals = 0;
410 ctx.literals = NULL;
411
412 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
413 tgsi_parse_token(&ctx.parse);
414 switch (ctx.parse.FullToken.Token.Type) {
415 case TGSI_TOKEN_TYPE_IMMEDIATE:
416 immediate = &ctx.parse.FullToken.FullImmediate;
417 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
418 if(ctx.literals == NULL) {
419 r = -ENOMEM;
420 goto out_err;
421 }
422 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
423 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
424 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
425 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
426 ctx.nliterals++;
427 break;
428 case TGSI_TOKEN_TYPE_DECLARATION:
429 r = tgsi_declaration(&ctx);
430 if (r)
431 goto out_err;
432 break;
433 case TGSI_TOKEN_TYPE_INSTRUCTION:
434 r = tgsi_is_supported(&ctx);
435 if (r)
436 goto out_err;
437 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
438 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
439 r = ctx.inst_info->process(&ctx);
440 if (r)
441 goto out_err;
442 r = r600_bc_add_literal(ctx.bc, ctx.value);
443 if (r)
444 goto out_err;
445 break;
446 default:
447 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
448 r = -EINVAL;
449 goto out_err;
450 }
451 }
452 /* export output */
453 noutput = shader->noutput;
454 for (i = 0, pos0 = 0; i < noutput; i++) {
455 memset(&output[i], 0, sizeof(struct r600_bc_output));
456 output[i].gpr = shader->output[i].gpr;
457 output[i].elem_size = 3;
458 output[i].swizzle_x = 0;
459 output[i].swizzle_y = 1;
460 output[i].swizzle_z = 2;
461 output[i].swizzle_w = 3;
462 output[i].barrier = 1;
463 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
464 output[i].array_base = i - pos0;
465 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
466 switch (ctx.type) {
467 case TGSI_PROCESSOR_VERTEX:
468 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
469 output[i].array_base = 60;
470 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
471 /* position doesn't count in array_base */
472 pos0++;
473 }
474 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
475 output[i].array_base = 61;
476 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
477 /* position doesn't count in array_base */
478 pos0++;
479 }
480 break;
481 case TGSI_PROCESSOR_FRAGMENT:
482 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
483 output[i].array_base = shader->output[i].sid;
484 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
485 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
486 output[i].array_base = 61;
487 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
488 } else {
489 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
490 r = -EINVAL;
491 goto out_err;
492 }
493 break;
494 default:
495 R600_ERR("unsupported processor type %d\n", ctx.type);
496 r = -EINVAL;
497 goto out_err;
498 }
499 }
500 /* add fake param output for vertex shader if no param is exported */
501 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
502 for (i = 0, pos0 = 0; i < noutput; i++) {
503 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
504 pos0 = 1;
505 break;
506 }
507 }
508 if (!pos0) {
509 memset(&output[i], 0, sizeof(struct r600_bc_output));
510 output[i].gpr = 0;
511 output[i].elem_size = 3;
512 output[i].swizzle_x = 0;
513 output[i].swizzle_y = 1;
514 output[i].swizzle_z = 2;
515 output[i].swizzle_w = 3;
516 output[i].barrier = 1;
517 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
518 output[i].array_base = 0;
519 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
520 noutput++;
521 }
522 }
523 /* add fake pixel export */
524 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
525 memset(&output[0], 0, sizeof(struct r600_bc_output));
526 output[0].gpr = 0;
527 output[0].elem_size = 3;
528 output[0].swizzle_x = 7;
529 output[0].swizzle_y = 7;
530 output[0].swizzle_z = 7;
531 output[0].swizzle_w = 7;
532 output[0].barrier = 1;
533 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
534 output[0].array_base = 0;
535 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
536 noutput++;
537 }
538 /* set export done on last export of each type */
539 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
540 if (i == (noutput - 1)) {
541 output[i].end_of_program = 1;
542 }
543 if (!(output_done & (1 << output[i].type))) {
544 output_done |= (1 << output[i].type);
545 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
546 }
547 }
548 /* add output to bytecode */
549 for (i = 0; i < noutput; i++) {
550 r = r600_bc_add_output(ctx.bc, &output[i]);
551 if (r)
552 goto out_err;
553 }
554 free(ctx.literals);
555 tgsi_parse_free(&ctx.parse);
556 return 0;
557 out_err:
558 free(ctx.literals);
559 tgsi_parse_free(&ctx.parse);
560 return r;
561 }
562
563 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
564 {
565 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
566 return -EINVAL;
567 }
568
569 static int tgsi_end(struct r600_shader_ctx *ctx)
570 {
571 return 0;
572 }
573
574 static int tgsi_src(struct r600_shader_ctx *ctx,
575 const struct tgsi_full_src_register *tgsi_src,
576 struct r600_bc_alu_src *r600_src)
577 {
578 int index;
579 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
580 r600_src->sel = tgsi_src->Register.Index;
581 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
582 r600_src->sel = 0;
583 index = tgsi_src->Register.Index;
584 ctx->value[0] = ctx->literals[index * 4 + 0];
585 ctx->value[1] = ctx->literals[index * 4 + 1];
586 ctx->value[2] = ctx->literals[index * 4 + 2];
587 ctx->value[3] = ctx->literals[index * 4 + 3];
588 }
589 r600_src->neg = tgsi_src->Register.Negate;
590 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
591 return 0;
592 }
593
594 static int tgsi_dst(struct r600_shader_ctx *ctx,
595 const struct tgsi_full_dst_register *tgsi_dst,
596 unsigned swizzle,
597 struct r600_bc_alu_dst *r600_dst)
598 {
599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
600
601 r600_dst->sel = tgsi_dst->Register.Index;
602 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
603 r600_dst->chan = swizzle;
604 r600_dst->write = 1;
605 if (inst->Instruction.Saturate) {
606 r600_dst->clamp = 1;
607 }
608 return 0;
609 }
610
611 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
612 {
613 switch (swizzle) {
614 case 0:
615 return tgsi_src->Register.SwizzleX;
616 case 1:
617 return tgsi_src->Register.SwizzleY;
618 case 2:
619 return tgsi_src->Register.SwizzleZ;
620 case 3:
621 return tgsi_src->Register.SwizzleW;
622 default:
623 return 0;
624 }
625 }
626
627 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
628 {
629 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
630 struct r600_bc_alu alu;
631 int i, j, k, nconst, r;
632
633 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
634 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
635 nconst++;
636 }
637 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
638 if (r) {
639 return r;
640 }
641 }
642 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
643 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
644 for (k = 0; k < 4; k++) {
645 memset(&alu, 0, sizeof(struct r600_bc_alu));
646 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
647 alu.src[0].sel = r600_src[0].sel;
648 alu.src[0].chan = k;
649 alu.dst.sel = ctx->temp_reg + j;
650 alu.dst.chan = k;
651 alu.dst.write = 1;
652 if (k == 3)
653 alu.last = 1;
654 r = r600_bc_add_alu(ctx->bc, &alu);
655 if (r)
656 return r;
657 }
658 r600_src[0].sel = ctx->temp_reg + j;
659 j--;
660 }
661 }
662 return 0;
663 }
664
665 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
666 {
667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
668 struct r600_bc_alu_src r600_src[3];
669 struct r600_bc_alu alu;
670 int i, j, r;
671 int lasti = 0;
672
673 for (i = 0; i < 4; i++) {
674 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
675 lasti = i;
676 }
677 }
678
679 r = tgsi_split_constant(ctx, r600_src);
680 if (r)
681 return r;
682 for (i = 0; i < lasti + 1; i++) {
683 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
684 continue;
685
686 memset(&alu, 0, sizeof(struct r600_bc_alu));
687 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
688 if (r)
689 return r;
690
691 alu.inst = ctx->inst_info->r600_opcode;
692 if (!swap) {
693 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
694 alu.src[j] = r600_src[j];
695 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
696 }
697 } else {
698 alu.src[0] = r600_src[1];
699 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
700
701 alu.src[1] = r600_src[0];
702 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
703 }
704 /* handle some special cases */
705 switch (ctx->inst_info->tgsi_opcode) {
706 case TGSI_OPCODE_SUB:
707 alu.src[1].neg = 1;
708 break;
709 case TGSI_OPCODE_ABS:
710 alu.src[0].abs = 1;
711 break;
712 default:
713 break;
714 }
715 if (i == lasti) {
716 alu.last = 1;
717 }
718 r = r600_bc_add_alu(ctx->bc, &alu);
719 if (r)
720 return r;
721 }
722 return 0;
723 }
724
725 static int tgsi_op2(struct r600_shader_ctx *ctx)
726 {
727 return tgsi_op2_s(ctx, 0);
728 }
729
730 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
731 {
732 return tgsi_op2_s(ctx, 1);
733 }
734
735 /*
736 * r600 - trunc to -PI..PI range
737 * r700 - normalize by dividing by 2PI
738 * see fdo bug 27901
739 */
740 static int tgsi_trig(struct r600_shader_ctx *ctx)
741 {
742 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
743 struct r600_bc_alu_src r600_src[3];
744 struct r600_bc_alu alu;
745 int i, r;
746 uint32_t lit_vals[4];
747
748 memset(lit_vals, 0, 4*4);
749 r = tgsi_split_constant(ctx, r600_src);
750 if (r)
751 return r;
752 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
753 lit_vals[1] = fui(0.5f);
754
755 memset(&alu, 0, sizeof(struct r600_bc_alu));
756 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
757 alu.is_op3 = 1;
758
759 alu.dst.chan = 0;
760 alu.dst.sel = ctx->temp_reg;
761 alu.dst.write = 1;
762
763 alu.src[0] = r600_src[0];
764 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
765
766 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
767 alu.src[1].chan = 0;
768 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
769 alu.src[2].chan = 1;
770 alu.last = 1;
771 r = r600_bc_add_alu(ctx->bc, &alu);
772 if (r)
773 return r;
774 r = r600_bc_add_literal(ctx->bc, lit_vals);
775 if (r)
776 return r;
777
778 memset(&alu, 0, sizeof(struct r600_bc_alu));
779 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
780
781 alu.dst.chan = 0;
782 alu.dst.sel = ctx->temp_reg;
783 alu.dst.write = 1;
784
785 alu.src[0].sel = ctx->temp_reg;
786 alu.src[0].chan = 0;
787 alu.last = 1;
788 r = r600_bc_add_alu(ctx->bc, &alu);
789 if (r)
790 return r;
791
792 if (ctx->bc->chiprev == 0) {
793 lit_vals[0] = fui(3.1415926535897f * 2.0f);
794 lit_vals[1] = fui(-3.1415926535897f);
795 } else {
796 lit_vals[0] = fui(1.0f);
797 lit_vals[1] = fui(-0.5f);
798 }
799
800 memset(&alu, 0, sizeof(struct r600_bc_alu));
801 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
802 alu.is_op3 = 1;
803
804 alu.dst.chan = 0;
805 alu.dst.sel = ctx->temp_reg;
806 alu.dst.write = 1;
807
808 alu.src[0].sel = ctx->temp_reg;
809 alu.src[0].chan = 0;
810
811 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
812 alu.src[1].chan = 0;
813 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
814 alu.src[2].chan = 1;
815 alu.last = 1;
816 r = r600_bc_add_alu(ctx->bc, &alu);
817 if (r)
818 return r;
819 r = r600_bc_add_literal(ctx->bc, lit_vals);
820 if (r)
821 return r;
822
823 memset(&alu, 0, sizeof(struct r600_bc_alu));
824 alu.inst = ctx->inst_info->r600_opcode;
825 alu.dst.chan = 0;
826 alu.dst.sel = ctx->temp_reg;
827 alu.dst.write = 1;
828
829 alu.src[0].sel = ctx->temp_reg;
830 alu.src[0].chan = 0;
831 alu.last = 1;
832 r = r600_bc_add_alu(ctx->bc, &alu);
833 if (r)
834 return r;
835
836 /* replicate result */
837 for (i = 0; i < 4; i++) {
838 memset(&alu, 0, sizeof(struct r600_bc_alu));
839 alu.src[0].sel = ctx->temp_reg;
840 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
841 alu.dst.chan = i;
842 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
843 if (r)
844 return r;
845 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
846 if (i == 3)
847 alu.last = 1;
848 r = r600_bc_add_alu(ctx->bc, &alu);
849 if (r)
850 return r;
851 }
852 return 0;
853 }
854
855 static int tgsi_kill(struct r600_shader_ctx *ctx)
856 {
857 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
858 struct r600_bc_alu alu;
859 int i, r;
860
861 for (i = 0; i < 4; i++) {
862 memset(&alu, 0, sizeof(struct r600_bc_alu));
863 alu.inst = ctx->inst_info->r600_opcode;
864
865 alu.dst.chan = i;
866
867 alu.src[0].sel = V_SQ_ALU_SRC_0;
868
869 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
870 alu.src[1].sel = V_SQ_ALU_SRC_1;
871 alu.src[1].neg = 1;
872 } else {
873 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
874 if (r)
875 return r;
876 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
877 }
878 if (i == 3) {
879 alu.last = 1;
880 }
881 r = r600_bc_add_alu(ctx->bc, &alu);
882 if (r)
883 return r;
884 }
885 r = r600_bc_add_literal(ctx->bc, ctx->value);
886 if (r)
887 return r;
888
889 /* kill must be last in ALU */
890 ctx->bc->force_add_cf = 1;
891 ctx->shader->uses_kill = TRUE;
892 return 0;
893 }
894
895 static int tgsi_lit(struct r600_shader_ctx *ctx)
896 {
897 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
898 struct r600_bc_alu alu;
899 int r;
900
901 /* dst.x, <- 1.0 */
902 memset(&alu, 0, sizeof(struct r600_bc_alu));
903 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
904 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
905 alu.src[0].chan = 0;
906 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
907 if (r)
908 return r;
909 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
910 r = r600_bc_add_alu(ctx->bc, &alu);
911 if (r)
912 return r;
913
914 /* dst.y = max(src.x, 0.0) */
915 memset(&alu, 0, sizeof(struct r600_bc_alu));
916 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
917 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
918 if (r)
919 return r;
920 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
921 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
922 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
923 if (r)
924 return r;
925 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
926 r = r600_bc_add_alu(ctx->bc, &alu);
927 if (r)
928 return r;
929
930 /* dst.z = NOP - fill Z slot */
931 memset(&alu, 0, sizeof(struct r600_bc_alu));
932 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
933 alu.dst.chan = 2;
934 r = r600_bc_add_alu(ctx->bc, &alu);
935 if (r)
936 return r;
937
938 /* dst.w, <- 1.0 */
939 memset(&alu, 0, sizeof(struct r600_bc_alu));
940 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
941 alu.src[0].sel = V_SQ_ALU_SRC_1;
942 alu.src[0].chan = 0;
943 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
944 if (r)
945 return r;
946 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
947 alu.last = 1;
948 r = r600_bc_add_alu(ctx->bc, &alu);
949 if (r)
950 return r;
951
952 if (inst->Dst[0].Register.WriteMask & (1 << 2))
953 {
954 int chan;
955 int sel;
956
957 /* dst.z = log(src.y) */
958 memset(&alu, 0, sizeof(struct r600_bc_alu));
959 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
960 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
961 if (r)
962 return r;
963 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
964 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
965 if (r)
966 return r;
967 alu.last = 1;
968 r = r600_bc_add_alu(ctx->bc, &alu);
969 if (r)
970 return r;
971
972 chan = alu.dst.chan;
973 sel = alu.dst.sel;
974
975 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
976 memset(&alu, 0, sizeof(struct r600_bc_alu));
977 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
978 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
979 if (r)
980 return r;
981 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
982 alu.src[1].sel = sel;
983 alu.src[1].chan = chan;
984 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
985 if (r)
986 return r;
987 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
988 alu.dst.sel = ctx->temp_reg;
989 alu.dst.chan = 0;
990 alu.dst.write = 1;
991 alu.is_op3 = 1;
992 alu.last = 1;
993 r = r600_bc_add_alu(ctx->bc, &alu);
994 if (r)
995 return r;
996
997 /* dst.z = exp(tmp.x) */
998 memset(&alu, 0, sizeof(struct r600_bc_alu));
999 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1000 alu.src[0].sel = ctx->temp_reg;
1001 alu.src[0].chan = 0;
1002 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1003 if (r)
1004 return r;
1005 alu.last = 1;
1006 r = r600_bc_add_alu(ctx->bc, &alu);
1007 if (r)
1008 return r;
1009 }
1010 return 0;
1011 }
1012
1013 static int tgsi_trans(struct r600_shader_ctx *ctx)
1014 {
1015 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1016 struct r600_bc_alu alu;
1017 int i, j, r;
1018
1019 for (i = 0; i < 4; i++) {
1020 memset(&alu, 0, sizeof(struct r600_bc_alu));
1021 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1022 alu.inst = ctx->inst_info->r600_opcode;
1023 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1024 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1025 if (r)
1026 return r;
1027 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1028 }
1029 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1030 if (r)
1031 return r;
1032 alu.last = 1;
1033 r = r600_bc_add_alu(ctx->bc, &alu);
1034 if (r)
1035 return r;
1036 }
1037 }
1038 return 0;
1039 }
1040
1041 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1042 {
1043 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1044 struct r600_bc_alu alu;
1045 int i, r;
1046
1047 for (i = 0; i < 4; i++) {
1048 memset(&alu, 0, sizeof(struct r600_bc_alu));
1049 alu.src[0].sel = ctx->temp_reg;
1050 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1051 alu.dst.chan = i;
1052 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1053 if (r)
1054 return r;
1055 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1056 if (i == 3)
1057 alu.last = 1;
1058 r = r600_bc_add_alu(ctx->bc, &alu);
1059 if (r)
1060 return r;
1061 }
1062 return 0;
1063 }
1064
1065 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1066 {
1067 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1068 struct r600_bc_alu alu;
1069 int i, r;
1070
1071 memset(&alu, 0, sizeof(struct r600_bc_alu));
1072 alu.inst = ctx->inst_info->r600_opcode;
1073 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1074 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1075 if (r)
1076 return r;
1077 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1078 }
1079 alu.dst.sel = ctx->temp_reg;
1080 alu.dst.write = 1;
1081 alu.last = 1;
1082 r = r600_bc_add_alu(ctx->bc, &alu);
1083 if (r)
1084 return r;
1085 /* replicate result */
1086 return tgsi_helper_tempx_replicate(ctx);
1087 }
1088
1089 static int tgsi_pow(struct r600_shader_ctx *ctx)
1090 {
1091 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1092 struct r600_bc_alu alu;
1093 int r;
1094
1095 /* LOG2(a) */
1096 memset(&alu, 0, sizeof(struct r600_bc_alu));
1097 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1098 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1099 if (r)
1100 return r;
1101 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1102 alu.dst.sel = ctx->temp_reg;
1103 alu.dst.write = 1;
1104 alu.last = 1;
1105 r = r600_bc_add_alu(ctx->bc, &alu);
1106 if (r)
1107 return r;
1108 r = r600_bc_add_literal(ctx->bc,ctx->value);
1109 if (r)
1110 return r;
1111 /* b * LOG2(a) */
1112 memset(&alu, 0, sizeof(struct r600_bc_alu));
1113 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1114 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1115 if (r)
1116 return r;
1117 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1118 alu.src[1].sel = ctx->temp_reg;
1119 alu.dst.sel = ctx->temp_reg;
1120 alu.dst.write = 1;
1121 alu.last = 1;
1122 r = r600_bc_add_alu(ctx->bc, &alu);
1123 if (r)
1124 return r;
1125 r = r600_bc_add_literal(ctx->bc,ctx->value);
1126 if (r)
1127 return r;
1128 /* POW(a,b) = EXP2(b * LOG2(a))*/
1129 memset(&alu, 0, sizeof(struct r600_bc_alu));
1130 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1131 alu.src[0].sel = ctx->temp_reg;
1132 alu.dst.sel = ctx->temp_reg;
1133 alu.dst.write = 1;
1134 alu.last = 1;
1135 r = r600_bc_add_alu(ctx->bc, &alu);
1136 if (r)
1137 return r;
1138 r = r600_bc_add_literal(ctx->bc,ctx->value);
1139 if (r)
1140 return r;
1141 return tgsi_helper_tempx_replicate(ctx);
1142 }
1143
1144 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1145 {
1146 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1147 struct r600_bc_alu alu;
1148 struct r600_bc_alu_src r600_src[3];
1149 int i, r;
1150
1151 r = tgsi_split_constant(ctx, r600_src);
1152 if (r)
1153 return r;
1154
1155 /* tmp = (src > 0 ? 1 : src) */
1156 for (i = 0; i < 4; i++) {
1157 memset(&alu, 0, sizeof(struct r600_bc_alu));
1158 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1159 alu.is_op3 = 1;
1160
1161 alu.dst.sel = ctx->temp_reg;
1162 alu.dst.chan = i;
1163
1164 alu.src[0] = r600_src[0];
1165 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1166
1167 alu.src[1].sel = V_SQ_ALU_SRC_1;
1168
1169 alu.src[2] = r600_src[0];
1170 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1171 if (i == 3)
1172 alu.last = 1;
1173 r = r600_bc_add_alu(ctx->bc, &alu);
1174 if (r)
1175 return r;
1176 }
1177 r = r600_bc_add_literal(ctx->bc, ctx->value);
1178 if (r)
1179 return r;
1180
1181 /* dst = (-tmp > 0 ? -1 : tmp) */
1182 for (i = 0; i < 4; i++) {
1183 memset(&alu, 0, sizeof(struct r600_bc_alu));
1184 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1185 alu.is_op3 = 1;
1186 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1187 if (r)
1188 return r;
1189
1190 alu.src[0].sel = ctx->temp_reg;
1191 alu.src[0].chan = i;
1192 alu.src[0].neg = 1;
1193
1194 alu.src[1].sel = V_SQ_ALU_SRC_1;
1195 alu.src[1].neg = 1;
1196
1197 alu.src[2].sel = ctx->temp_reg;
1198 alu.src[2].chan = i;
1199
1200 if (i == 3)
1201 alu.last = 1;
1202 r = r600_bc_add_alu(ctx->bc, &alu);
1203 if (r)
1204 return r;
1205 }
1206 return 0;
1207 }
1208
1209 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1210 {
1211 struct r600_bc_alu alu;
1212 int i, r;
1213
1214 r = r600_bc_add_literal(ctx->bc, ctx->value);
1215 if (r)
1216 return r;
1217 for (i = 0; i < 4; i++) {
1218 memset(&alu, 0, sizeof(struct r600_bc_alu));
1219 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1220 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1221 alu.dst.chan = i;
1222 } else {
1223 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1224 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1225 if (r)
1226 return r;
1227 alu.src[0].sel = ctx->temp_reg;
1228 alu.src[0].chan = i;
1229 }
1230 if (i == 3) {
1231 alu.last = 1;
1232 }
1233 r = r600_bc_add_alu(ctx->bc, &alu);
1234 if (r)
1235 return r;
1236 }
1237 return 0;
1238 }
1239
1240 static int tgsi_op3(struct r600_shader_ctx *ctx)
1241 {
1242 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1243 struct r600_bc_alu_src r600_src[3];
1244 struct r600_bc_alu alu;
1245 int i, j, r;
1246
1247 r = tgsi_split_constant(ctx, r600_src);
1248 if (r)
1249 return r;
1250 /* do it in 2 step as op3 doesn't support writemask */
1251 for (i = 0; i < 4; i++) {
1252 memset(&alu, 0, sizeof(struct r600_bc_alu));
1253 alu.inst = ctx->inst_info->r600_opcode;
1254 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1255 alu.src[j] = r600_src[j];
1256 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1257 }
1258 alu.dst.sel = ctx->temp_reg;
1259 alu.dst.chan = i;
1260 alu.dst.write = 1;
1261 alu.is_op3 = 1;
1262 if (i == 3) {
1263 alu.last = 1;
1264 }
1265 r = r600_bc_add_alu(ctx->bc, &alu);
1266 if (r)
1267 return r;
1268 }
1269 return tgsi_helper_copy(ctx, inst);
1270 }
1271
1272 static int tgsi_dp(struct r600_shader_ctx *ctx)
1273 {
1274 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1275 struct r600_bc_alu_src r600_src[3];
1276 struct r600_bc_alu alu;
1277 int i, j, r;
1278
1279 r = tgsi_split_constant(ctx, r600_src);
1280 if (r)
1281 return r;
1282 for (i = 0; i < 4; i++) {
1283 memset(&alu, 0, sizeof(struct r600_bc_alu));
1284 alu.inst = ctx->inst_info->r600_opcode;
1285 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1286 alu.src[j] = r600_src[j];
1287 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1288 }
1289 alu.dst.sel = ctx->temp_reg;
1290 alu.dst.chan = i;
1291 alu.dst.write = 1;
1292 /* handle some special cases */
1293 switch (ctx->inst_info->tgsi_opcode) {
1294 case TGSI_OPCODE_DP2:
1295 if (i > 1) {
1296 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1297 alu.src[0].chan = alu.src[1].chan = 0;
1298 }
1299 break;
1300 case TGSI_OPCODE_DP3:
1301 if (i > 2) {
1302 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1303 alu.src[0].chan = alu.src[1].chan = 0;
1304 }
1305 break;
1306 case TGSI_OPCODE_DPH:
1307 if (i == 3) {
1308 alu.src[0].sel = V_SQ_ALU_SRC_1;
1309 alu.src[0].chan = 0;
1310 alu.src[0].neg = 0;
1311 }
1312 break;
1313 default:
1314 break;
1315 }
1316 if (i == 3) {
1317 alu.last = 1;
1318 }
1319 r = r600_bc_add_alu(ctx->bc, &alu);
1320 if (r)
1321 return r;
1322 }
1323 return tgsi_helper_copy(ctx, inst);
1324 }
1325
1326 static int tgsi_tex(struct r600_shader_ctx *ctx)
1327 {
1328 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1329 struct r600_bc_tex tex;
1330 struct r600_bc_alu alu;
1331 unsigned src_gpr;
1332 int r, i;
1333
1334 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1335
1336 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1337 /* Add perspective divide */
1338 memset(&alu, 0, sizeof(struct r600_bc_alu));
1339 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1340 alu.src[0].sel = src_gpr;
1341 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1342 alu.dst.sel = ctx->temp_reg;
1343 alu.dst.chan = 3;
1344 alu.last = 1;
1345 alu.dst.write = 1;
1346 r = r600_bc_add_alu(ctx->bc, &alu);
1347 if (r)
1348 return r;
1349
1350 for (i = 0; i < 3; i++) {
1351 memset(&alu, 0, sizeof(struct r600_bc_alu));
1352 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1353 alu.src[0].sel = ctx->temp_reg;
1354 alu.src[0].chan = 3;
1355 alu.src[1].sel = src_gpr;
1356 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1357 alu.dst.sel = ctx->temp_reg;
1358 alu.dst.chan = i;
1359 alu.dst.write = 1;
1360 r = r600_bc_add_alu(ctx->bc, &alu);
1361 if (r)
1362 return r;
1363 }
1364 memset(&alu, 0, sizeof(struct r600_bc_alu));
1365 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1366 alu.src[0].sel = V_SQ_ALU_SRC_1;
1367 alu.src[0].chan = 0;
1368 alu.dst.sel = ctx->temp_reg;
1369 alu.dst.chan = 3;
1370 alu.last = 1;
1371 alu.dst.write = 1;
1372 r = r600_bc_add_alu(ctx->bc, &alu);
1373 if (r)
1374 return r;
1375 src_gpr = ctx->temp_reg;
1376 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1377 for (i = 0; i < 4; i++) {
1378 memset(&alu, 0, sizeof(struct r600_bc_alu));
1379 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1380 alu.src[0].sel = src_gpr;
1381 alu.src[0].chan = i;
1382 alu.dst.sel = ctx->temp_reg;
1383 alu.dst.chan = i;
1384 if (i == 3)
1385 alu.last = 1;
1386 alu.dst.write = 1;
1387 r = r600_bc_add_alu(ctx->bc, &alu);
1388 if (r)
1389 return r;
1390 }
1391 src_gpr = ctx->temp_reg;
1392 }
1393
1394 memset(&tex, 0, sizeof(struct r600_bc_tex));
1395 tex.inst = ctx->inst_info->r600_opcode;
1396 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1397 tex.sampler_id = tex.resource_id;
1398 tex.src_gpr = src_gpr;
1399 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1400 tex.dst_sel_x = 0;
1401 tex.dst_sel_y = 1;
1402 tex.dst_sel_z = 2;
1403 tex.dst_sel_w = 3;
1404 tex.src_sel_x = 0;
1405 tex.src_sel_y = 1;
1406 tex.src_sel_z = 2;
1407 tex.src_sel_w = 3;
1408
1409 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1410 tex.coord_type_x = 1;
1411 tex.coord_type_y = 1;
1412 tex.coord_type_z = 1;
1413 tex.coord_type_w = 1;
1414 }
1415 return r600_bc_add_tex(ctx->bc, &tex);
1416 }
1417
1418 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1419 {
1420 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1421 struct r600_bc_alu_src r600_src[3];
1422 struct r600_bc_alu alu;
1423 unsigned i;
1424 int r;
1425
1426 r = tgsi_split_constant(ctx, r600_src);
1427 if (r)
1428 return r;
1429 /* 1 - src0 */
1430 for (i = 0; i < 4; i++) {
1431 memset(&alu, 0, sizeof(struct r600_bc_alu));
1432 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1433 alu.src[0].sel = V_SQ_ALU_SRC_1;
1434 alu.src[0].chan = 0;
1435 alu.src[1] = r600_src[0];
1436 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1437 alu.src[1].neg = 1;
1438 alu.dst.sel = ctx->temp_reg;
1439 alu.dst.chan = i;
1440 if (i == 3) {
1441 alu.last = 1;
1442 }
1443 alu.dst.write = 1;
1444 r = r600_bc_add_alu(ctx->bc, &alu);
1445 if (r)
1446 return r;
1447 }
1448 r = r600_bc_add_literal(ctx->bc, ctx->value);
1449 if (r)
1450 return r;
1451
1452 /* (1 - src0) * src2 */
1453 for (i = 0; i < 4; i++) {
1454 memset(&alu, 0, sizeof(struct r600_bc_alu));
1455 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1456 alu.src[0].sel = ctx->temp_reg;
1457 alu.src[0].chan = i;
1458 alu.src[1] = r600_src[2];
1459 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1460 alu.dst.sel = ctx->temp_reg;
1461 alu.dst.chan = i;
1462 if (i == 3) {
1463 alu.last = 1;
1464 }
1465 alu.dst.write = 1;
1466 r = r600_bc_add_alu(ctx->bc, &alu);
1467 if (r)
1468 return r;
1469 }
1470 r = r600_bc_add_literal(ctx->bc, ctx->value);
1471 if (r)
1472 return r;
1473
1474 /* src0 * src1 + (1 - src0) * src2 */
1475 for (i = 0; i < 4; i++) {
1476 memset(&alu, 0, sizeof(struct r600_bc_alu));
1477 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1478 alu.is_op3 = 1;
1479 alu.src[0] = r600_src[0];
1480 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1481 alu.src[1] = r600_src[1];
1482 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1483 alu.src[2].sel = ctx->temp_reg;
1484 alu.src[2].chan = i;
1485 alu.dst.sel = ctx->temp_reg;
1486 alu.dst.chan = i;
1487 if (i == 3) {
1488 alu.last = 1;
1489 }
1490 r = r600_bc_add_alu(ctx->bc, &alu);
1491 if (r)
1492 return r;
1493 }
1494 return tgsi_helper_copy(ctx, inst);
1495 }
1496
1497 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1498 {
1499 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1500 struct r600_bc_alu_src r600_src[3];
1501 struct r600_bc_alu alu;
1502 int use_temp = 0;
1503 int i, r;
1504
1505 r = tgsi_split_constant(ctx, r600_src);
1506 if (r)
1507 return r;
1508
1509 if (inst->Dst[0].Register.WriteMask != 0xf)
1510 use_temp = 1;
1511
1512 for (i = 0; i < 4; i++) {
1513 memset(&alu, 0, sizeof(struct r600_bc_alu));
1514 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1515 alu.src[0] = r600_src[0];
1516 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1517
1518 alu.src[1] = r600_src[2];
1519 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1520
1521 alu.src[2] = r600_src[1];
1522 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1523
1524 if (use_temp)
1525 alu.dst.sel = ctx->temp_reg;
1526 else {
1527 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1528 if (r)
1529 return r;
1530 }
1531 alu.dst.chan = i;
1532 alu.dst.write = 1;
1533 alu.is_op3 = 1;
1534 if (i == 3)
1535 alu.last = 1;
1536 r = r600_bc_add_alu(ctx->bc, &alu);
1537 if (r)
1538 return r;
1539 }
1540 if (use_temp)
1541 return tgsi_helper_copy(ctx, inst);
1542 return 0;
1543 }
1544
1545 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1546 {
1547 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1548 struct r600_bc_alu_src r600_src[3];
1549 struct r600_bc_alu alu;
1550 uint32_t use_temp = 0;
1551 int i, r;
1552
1553 if (inst->Dst[0].Register.WriteMask != 0xf)
1554 use_temp = 1;
1555
1556 r = tgsi_split_constant(ctx, r600_src);
1557 if (r)
1558 return r;
1559
1560 for (i = 0; i < 4; i++) {
1561 memset(&alu, 0, sizeof(struct r600_bc_alu));
1562 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1563
1564 alu.src[0] = r600_src[0];
1565 switch (i) {
1566 case 0:
1567 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1568 break;
1569 case 1:
1570 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1571 break;
1572 case 2:
1573 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1574 break;
1575 case 3:
1576 alu.src[0].sel = V_SQ_ALU_SRC_0;
1577 alu.src[0].chan = i;
1578 }
1579
1580 alu.src[1] = r600_src[1];
1581 switch (i) {
1582 case 0:
1583 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1584 break;
1585 case 1:
1586 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1587 break;
1588 case 2:
1589 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1590 break;
1591 case 3:
1592 alu.src[1].sel = V_SQ_ALU_SRC_0;
1593 alu.src[1].chan = i;
1594 }
1595
1596 alu.dst.sel = ctx->temp_reg;
1597 alu.dst.chan = i;
1598 alu.dst.write = 1;
1599
1600 if (i == 3)
1601 alu.last = 1;
1602 r = r600_bc_add_alu(ctx->bc, &alu);
1603 if (r)
1604 return r;
1605 }
1606
1607 for (i = 0; i < 4; i++) {
1608 memset(&alu, 0, sizeof(struct r600_bc_alu));
1609 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1610
1611 alu.src[0] = r600_src[0];
1612 switch (i) {
1613 case 0:
1614 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1615 break;
1616 case 1:
1617 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1618 break;
1619 case 2:
1620 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1621 break;
1622 case 3:
1623 alu.src[0].sel = V_SQ_ALU_SRC_0;
1624 alu.src[0].chan = i;
1625 }
1626
1627 alu.src[1] = r600_src[1];
1628 switch (i) {
1629 case 0:
1630 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1631 break;
1632 case 1:
1633 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1634 break;
1635 case 2:
1636 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1637 break;
1638 case 3:
1639 alu.src[1].sel = V_SQ_ALU_SRC_0;
1640 alu.src[1].chan = i;
1641 }
1642
1643 alu.src[2].sel = ctx->temp_reg;
1644 alu.src[2].neg = 1;
1645 alu.src[2].chan = i;
1646
1647 if (use_temp)
1648 alu.dst.sel = ctx->temp_reg;
1649 else {
1650 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1651 if (r)
1652 return r;
1653 }
1654 alu.dst.chan = i;
1655 alu.dst.write = 1;
1656 alu.is_op3 = 1;
1657 if (i == 3)
1658 alu.last = 1;
1659 r = r600_bc_add_alu(ctx->bc, &alu);
1660 if (r)
1661 return r;
1662 }
1663 if (use_temp)
1664 return tgsi_helper_copy(ctx, inst);
1665 return 0;
1666 }
1667
1668 static int tgsi_exp(struct r600_shader_ctx *ctx)
1669 {
1670 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1671 struct r600_bc_alu_src r600_src[3];
1672 struct r600_bc_alu alu;
1673 int r;
1674
1675 /* result.x = 2^floor(src); */
1676 if (inst->Dst[0].Register.WriteMask & 1) {
1677 memset(&alu, 0, sizeof(struct r600_bc_alu));
1678
1679 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1680 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1681 if (r)
1682 return r;
1683
1684 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1685
1686 alu.dst.sel = ctx->temp_reg;
1687 alu.dst.chan = 0;
1688 alu.dst.write = 1;
1689 alu.last = 1;
1690 r = r600_bc_add_alu(ctx->bc, &alu);
1691 if (r)
1692 return r;
1693
1694 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1695 alu.src[0].sel = ctx->temp_reg;
1696 alu.src[0].chan = 0;
1697
1698 alu.dst.sel = ctx->temp_reg;
1699 alu.dst.chan = 0;
1700 alu.dst.write = 1;
1701 alu.last = 1;
1702 r = r600_bc_add_alu(ctx->bc, &alu);
1703 if (r)
1704 return r;
1705 }
1706
1707 /* result.y = tmp - floor(tmp); */
1708 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1709 memset(&alu, 0, sizeof(struct r600_bc_alu));
1710
1711 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1712 alu.src[0] = r600_src[0];
1713 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1714 if (r)
1715 return r;
1716 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1717
1718 alu.dst.sel = ctx->temp_reg;
1719 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1720 // if (r)
1721 // return r;
1722 alu.dst.write = 1;
1723 alu.dst.chan = 1;
1724
1725 alu.last = 1;
1726
1727 r = r600_bc_add_alu(ctx->bc, &alu);
1728 if (r)
1729 return r;
1730 }
1731
1732 /* result.z = RoughApprox2ToX(tmp);*/
1733 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1734 memset(&alu, 0, sizeof(struct r600_bc_alu));
1735 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1736 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1737 if (r)
1738 return r;
1739 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1740
1741 alu.dst.sel = ctx->temp_reg;
1742 alu.dst.write = 1;
1743 alu.dst.chan = 2;
1744
1745 alu.last = 1;
1746
1747 r = r600_bc_add_alu(ctx->bc, &alu);
1748 if (r)
1749 return r;
1750
1751 }
1752
1753 /* result.w = 1.0;*/
1754 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1755 memset(&alu, 0, sizeof(struct r600_bc_alu));
1756
1757 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1758 alu.src[0].sel = V_SQ_ALU_SRC_1;
1759 alu.src[0].chan = 0;
1760
1761 alu.dst.sel = ctx->temp_reg;
1762 alu.dst.chan = 3;
1763 alu.dst.write = 1;
1764 alu.last = 1;
1765 r = r600_bc_add_alu(ctx->bc, &alu);
1766 if (r)
1767 return r;
1768 }
1769 return tgsi_helper_copy(ctx, inst);
1770 }
1771
1772 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
1773 {
1774 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1775 struct r600_bc_alu alu;
1776 int r;
1777
1778 memset(&alu, 0, sizeof(struct r600_bc_alu));
1779 alu.inst = opcode;
1780 alu.predicate = 1;
1781
1782 alu.dst.sel = ctx->temp_reg;
1783 alu.dst.write = 1;
1784 alu.dst.chan = 0;
1785
1786 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1787 if (r)
1788 return r;
1789 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1790 alu.src[1].sel = V_SQ_ALU_SRC_0;
1791 alu.src[1].chan = 0;
1792
1793 alu.last = 1;
1794
1795 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
1796 if (r)
1797 return r;
1798 return 0;
1799 }
1800
1801 static int pops(struct r600_shader_ctx *ctx, int pops)
1802 {
1803 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
1804 ctx->bc->cf_last->pop_count = pops;
1805 return 0;
1806 }
1807
1808 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
1809 {
1810 switch(reason) {
1811 case FC_PUSH_VPM:
1812 ctx->bc->callstack[ctx->bc->call_sp].current--;
1813 break;
1814 case FC_PUSH_WQM:
1815 case FC_LOOP:
1816 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
1817 break;
1818 case FC_REP:
1819 /* TOODO : for 16 vp asic should -= 2; */
1820 ctx->bc->callstack[ctx->bc->call_sp].current --;
1821 break;
1822 }
1823 }
1824
1825 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
1826 {
1827 if (check_max_only) {
1828 int diff;
1829 switch (reason) {
1830 case FC_PUSH_VPM:
1831 diff = 1;
1832 break;
1833 case FC_PUSH_WQM:
1834 diff = 4;
1835 break;
1836 }
1837 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
1838 ctx->bc->callstack[ctx->bc->call_sp].max) {
1839 ctx->bc->callstack[ctx->bc->call_sp].max =
1840 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
1841 }
1842 return;
1843 }
1844 switch (reason) {
1845 case FC_PUSH_VPM:
1846 ctx->bc->callstack[ctx->bc->call_sp].current++;
1847 break;
1848 case FC_PUSH_WQM:
1849 case FC_LOOP:
1850 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
1851 break;
1852 case FC_REP:
1853 ctx->bc->callstack[ctx->bc->call_sp].current++;
1854 break;
1855 }
1856
1857 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
1858 ctx->bc->callstack[ctx->bc->call_sp].max) {
1859 ctx->bc->callstack[ctx->bc->call_sp].max =
1860 ctx->bc->callstack[ctx->bc->call_sp].current;
1861 }
1862 }
1863
1864 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
1865 {
1866 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
1867
1868 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
1869 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
1870 sp->mid[sp->num_mid] = ctx->bc->cf_last;
1871 sp->num_mid++;
1872 }
1873
1874 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
1875 {
1876 ctx->bc->fc_sp++;
1877 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
1878 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
1879 }
1880
1881 static void fc_poplevel(struct r600_shader_ctx *ctx)
1882 {
1883 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
1884 if (sp->mid) {
1885 free(sp->mid);
1886 sp->mid = NULL;
1887 }
1888 sp->num_mid = 0;
1889 sp->start = NULL;
1890 sp->type = 0;
1891 ctx->bc->fc_sp--;
1892 }
1893
1894 #if 0
1895 static int emit_return(struct r600_shader_ctx *ctx)
1896 {
1897 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
1898 return 0;
1899 }
1900
1901 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
1902 {
1903
1904 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
1905 ctx->bc->cf_last->pop_count = pops;
1906 /* TODO work out offset */
1907 return 0;
1908 }
1909
1910 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
1911 {
1912 return 0;
1913 }
1914
1915 static void emit_testflag(struct r600_shader_ctx *ctx)
1916 {
1917
1918 }
1919
1920 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
1921 {
1922 emit_testflag(ctx);
1923 emit_jump_to_offset(ctx, 1, 4);
1924 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
1925 pops(ctx, ifidx + 1);
1926 emit_return(ctx);
1927 }
1928
1929 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
1930 {
1931 emit_testflag(ctx);
1932
1933 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
1934 ctx->bc->cf_last->pop_count = 1;
1935
1936 fc_set_mid(ctx, fc_sp);
1937
1938 pops(ctx, 1);
1939 }
1940 #endif
1941
1942 static int tgsi_if(struct r600_shader_ctx *ctx)
1943 {
1944 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
1945
1946 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
1947
1948 fc_pushlevel(ctx, FC_IF);
1949
1950 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
1951 return 0;
1952 }
1953
1954 static int tgsi_else(struct r600_shader_ctx *ctx)
1955 {
1956 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
1957 ctx->bc->cf_last->pop_count = 1;
1958
1959 fc_set_mid(ctx, ctx->bc->fc_sp);
1960 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
1961 return 0;
1962 }
1963
1964 static int tgsi_endif(struct r600_shader_ctx *ctx)
1965 {
1966 pops(ctx, 1);
1967 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
1968 R600_ERR("if/endif unbalanced in shader\n");
1969 return -1;
1970 }
1971
1972 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
1973 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
1974 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
1975 } else {
1976 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
1977 }
1978 fc_poplevel(ctx);
1979
1980 callstack_decrease_current(ctx, FC_PUSH_VPM);
1981 return 0;
1982 }
1983
1984 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
1985 {
1986 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
1987
1988 fc_pushlevel(ctx, FC_LOOP);
1989
1990 /* check stack depth */
1991 callstack_check_depth(ctx, FC_LOOP, 0);
1992 return 0;
1993 }
1994
1995 static int tgsi_endloop(struct r600_shader_ctx *ctx)
1996 {
1997 int i;
1998
1999 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
2000
2001 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2002 R600_ERR("loop/endloop in shader code are not paired.\n");
2003 return -EINVAL;
2004 }
2005
2006 /* fixup loop pointers - from r600isa
2007 LOOP END points to CF after LOOP START,
2008 LOOP START point to CF after LOOP END
2009 BRK/CONT point to LOOP END CF
2010 */
2011 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2012
2013 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2014
2015 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2016 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2017 }
2018 /* TODO add LOOPRET support */
2019 fc_poplevel(ctx);
2020 callstack_decrease_current(ctx, FC_LOOP);
2021 return 0;
2022 }
2023
2024 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2025 {
2026 unsigned int fscp;
2027
2028 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2029 {
2030 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2031 break;
2032 }
2033
2034 if (fscp == 0) {
2035 R600_ERR("Break not inside loop/endloop pair\n");
2036 return -EINVAL;
2037 }
2038
2039 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2040 ctx->bc->cf_last->pop_count = 1;
2041
2042 fc_set_mid(ctx, fscp);
2043
2044 pops(ctx, 1);
2045 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2046 return 0;
2047 }
2048
2049 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2050 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2051 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2052 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2053 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2054 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2055 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2056 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2057 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2058 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2059 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2060 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2061 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2062 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2063 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2064 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2065 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2066 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2067 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2068 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2069 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2070 /* gap */
2071 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2072 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2073 /* gap */
2074 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2075 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2076 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2077 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2078 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2079 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2080 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2081 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2082 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2083 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2084 /* gap */
2085 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2086 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2087 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2088 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2089 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2090 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2091 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2092 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
2093 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2094 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2095 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2096 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2097 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2098 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2099 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2100 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2101 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2102 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2103 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2104 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2105 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2106 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2107 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2108 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2109 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2110 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2111 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2112 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2113 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2114 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2115 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2116 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2117 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2118 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2119 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2120 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2121 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2122 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2123 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2124 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2125 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2126 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2127 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2128 /* gap */
2129 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2130 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2131 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2132 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2133 /* gap */
2134 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2135 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2136 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2137 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2138 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2139 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2140 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2141 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2142 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2143 /* gap */
2144 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2145 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2146 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2147 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2148 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2149 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2150 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2151 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2152 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2153 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2154 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2155 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2156 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2157 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2158 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2159 /* gap */
2160 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2161 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2162 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2163 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2164 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2165 /* gap */
2166 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2167 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2168 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2169 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2170 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2171 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2172 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2173 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2174 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2175 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2176 /* gap */
2177 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2178 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2179 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2180 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2181 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2182 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2183 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2184 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2185 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2186 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2187 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2188 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2189 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2190 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2191 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2192 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2193 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2194 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2195 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2196 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2197 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2198 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2199 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2200 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2201 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2202 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2203 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2204 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2205 };