r600g: fix SSG and op3 neg writing
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "pipe/p_shader_tokens.h"
24 #include "tgsi/tgsi_parse.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_dump.h"
27 #include "util/u_format.h"
28 #include "r600_screen.h"
29 #include "r600_context.h"
30 #include "r600_shader.h"
31 #include "r600_asm.h"
32 #include "r600_sq.h"
33 #include "r600d.h"
34 #include <stdio.h>
35 #include <errno.h>
36
37
38 struct r600_shader_tgsi_instruction;
39
40 struct r600_shader_ctx {
41 struct tgsi_shader_info info;
42 struct tgsi_parse_context parse;
43 const struct tgsi_token *tokens;
44 unsigned type;
45 unsigned file_offset[TGSI_FILE_COUNT];
46 unsigned temp_reg;
47 struct r600_shader_tgsi_instruction *inst_info;
48 struct r600_bc *bc;
49 struct r600_shader *shader;
50 u32 value[4];
51 u32 *literals;
52 u32 nliterals;
53 };
54
55 struct r600_shader_tgsi_instruction {
56 unsigned tgsi_opcode;
57 unsigned is_op3;
58 unsigned r600_opcode;
59 int (*process)(struct r600_shader_ctx *ctx);
60 };
61
62 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
63 static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
64
65 static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
66 {
67 struct r600_context *rctx = r600_context(ctx);
68 const struct util_format_description *desc;
69 enum pipe_format resource_format[160];
70 unsigned i, nresources = 0;
71 struct r600_bc *bc = &shader->bc;
72 struct r600_bc_cf *cf;
73 struct r600_bc_vtx *vtx;
74
75 if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
76 return 0;
77 for (i = 0; i < rctx->vertex_elements->count; i++) {
78 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
79 }
80 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
81 switch (cf->inst) {
82 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
84 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
85 desc = util_format_description(resource_format[vtx->buffer_id]);
86 if (desc == NULL) {
87 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
88 return -EINVAL;
89 }
90 vtx->dst_sel_x = desc->swizzle[0];
91 vtx->dst_sel_y = desc->swizzle[1];
92 vtx->dst_sel_z = desc->swizzle[2];
93 vtx->dst_sel_w = desc->swizzle[3];
94 }
95 break;
96 default:
97 break;
98 }
99 }
100 return r600_bc_build(&shader->bc);
101 }
102
103 int r600_pipe_shader_create(struct pipe_context *ctx,
104 struct r600_context_state *rpshader,
105 const struct tgsi_token *tokens)
106 {
107 struct r600_screen *rscreen = r600_screen(ctx->screen);
108 int r;
109
110 //fprintf(stderr, "--------------------------------------------------------------\n");
111 //tgsi_dump(tokens, 0);
112 if (rpshader == NULL)
113 return -ENOMEM;
114 rpshader->shader.family = radeon_get_family(rscreen->rw);
115 r = r600_shader_from_tgsi(tokens, &rpshader->shader);
116 if (r) {
117 R600_ERR("translation from TGSI failed !\n");
118 return r;
119 }
120 r = r600_bc_build(&rpshader->shader.bc);
121 if (r) {
122 R600_ERR("building bytecode failed !\n");
123 return r;
124 }
125 //fprintf(stderr, "______________________________________________________________\n");
126 return 0;
127 }
128
129 static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
130 {
131 struct r600_screen *rscreen = r600_screen(ctx->screen);
132 struct r600_shader *rshader = &rpshader->shader;
133 struct radeon_state *state;
134 unsigned i, tmp;
135
136 rpshader->rstate = radeon_state_decref(rpshader->rstate);
137 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
138 if (state == NULL)
139 return -ENOMEM;
140 for (i = 0; i < 10; i++) {
141 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
142 }
143 /* so far never got proper semantic id from tgsi */
144 for (i = 0; i < 32; i++) {
145 tmp = i << ((i & 3) * 8);
146 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
147 }
148 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
149 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
150 S_028868_STACK_SIZE(rshader->bc.nstack);
151 rpshader->rstate = state;
152 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
153 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
154 rpshader->rstate->nbo = 2;
155 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
156 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
157 return radeon_state_pm4(state);
158 }
159
160 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
161 {
162 const struct pipe_rasterizer_state *rasterizer;
163 struct r600_screen *rscreen = r600_screen(ctx->screen);
164 struct r600_shader *rshader = &rpshader->shader;
165 struct r600_context *rctx = r600_context(ctx);
166 struct radeon_state *state;
167 unsigned i, tmp, exports_ps, num_cout;
168
169 rasterizer = &rctx->rasterizer->state.rasterizer;
170 rpshader->rstate = radeon_state_decref(rpshader->rstate);
171 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
172 if (state == NULL)
173 return -ENOMEM;
174 for (i = 0; i < rshader->ninput; i++) {
175 tmp = S_028644_SEMANTIC(i);
176 tmp |= S_028644_SEL_CENTROID(1);
177 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
178 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
179 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
180 }
181 if (rasterizer->sprite_coord_enable & (1 << i)) {
182 tmp |= S_028644_PT_SPRITE_TEX(1);
183 }
184 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
185 }
186
187 exports_ps = 0;
188 num_cout = 0;
189 for (i = 0; i < rshader->noutput; i++) {
190 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
191 exports_ps |= 1;
192 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
193 exports_ps |= (1 << (num_cout+1));
194 num_cout++;
195 }
196 }
197 if (!exports_ps) {
198 /* always at least export 1 component per pixel */
199 exports_ps = 2;
200 }
201 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
202 S_0286CC_PERSP_GRADIENT_ENA(1);
203 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
204 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
205 S_028868_STACK_SIZE(rshader->bc.nstack);
206 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
207 rpshader->rstate = state;
208 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
209 rpshader->rstate->nbo = 1;
210 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
211 return radeon_state_pm4(state);
212 }
213
214 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
215 {
216 struct r600_screen *rscreen = r600_screen(ctx->screen);
217 struct r600_context *rctx = r600_context(ctx);
218 struct r600_shader *rshader = &rpshader->shader;
219 int r;
220
221 /* copy new shader */
222 radeon_bo_decref(rscreen->rw, rpshader->bo);
223 rpshader->bo = NULL;
224 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
225 4096, NULL);
226 if (rpshader->bo == NULL) {
227 return -ENOMEM;
228 }
229 radeon_bo_map(rscreen->rw, rpshader->bo);
230 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
231 radeon_bo_unmap(rscreen->rw, rpshader->bo);
232 /* build state */
233 rshader->flat_shade = rctx->flat_shade;
234 switch (rshader->processor_type) {
235 case TGSI_PROCESSOR_VERTEX:
236 r = r600_pipe_shader_vs(ctx, rpshader);
237 break;
238 case TGSI_PROCESSOR_FRAGMENT:
239 r = r600_pipe_shader_ps(ctx, rpshader);
240 break;
241 default:
242 r = -EINVAL;
243 break;
244 }
245 return r;
246 }
247
248 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
249 {
250 struct r600_context *rctx = r600_context(ctx);
251 int r;
252
253 if (rpshader == NULL)
254 return -EINVAL;
255 /* there should be enough input */
256 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
257 R600_ERR("%d resources provided, expecting %d\n",
258 rctx->vertex_elements->count, rpshader->shader.bc.nresource);
259 return -EINVAL;
260 }
261 r = r600_shader_update(ctx, &rpshader->shader);
262 if (r)
263 return r;
264 return r600_pipe_shader(ctx, rpshader);
265 }
266
267 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
268 {
269 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
270 int j;
271
272 if (i->Instruction.NumDstRegs > 1) {
273 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
274 return -EINVAL;
275 }
276 if (i->Instruction.Predicate) {
277 R600_ERR("predicate unsupported\n");
278 return -EINVAL;
279 }
280 #if 0
281 if (i->Instruction.Label) {
282 R600_ERR("label unsupported\n");
283 return -EINVAL;
284 }
285 #endif
286 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
287 if (i->Src[j].Register.Indirect ||
288 i->Src[j].Register.Dimension ||
289 i->Src[j].Register.Absolute) {
290 R600_ERR("unsupported src (indirect|dimension|absolute)\n");
291 return -EINVAL;
292 }
293 }
294 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
295 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
296 R600_ERR("unsupported dst (indirect|dimension)\n");
297 return -EINVAL;
298 }
299 }
300 return 0;
301 }
302
303 static int tgsi_declaration(struct r600_shader_ctx *ctx)
304 {
305 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
306 struct r600_bc_vtx vtx;
307 unsigned i;
308 int r;
309
310 switch (d->Declaration.File) {
311 case TGSI_FILE_INPUT:
312 i = ctx->shader->ninput++;
313 ctx->shader->input[i].name = d->Semantic.Name;
314 ctx->shader->input[i].sid = d->Semantic.Index;
315 ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
316 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
317 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
318 /* turn input into fetch */
319 memset(&vtx, 0, sizeof(struct r600_bc_vtx));
320 vtx.inst = 0;
321 vtx.fetch_type = 0;
322 vtx.buffer_id = i;
323 /* register containing the index into the buffer */
324 vtx.src_gpr = 0;
325 vtx.src_sel_x = 0;
326 vtx.mega_fetch_count = 0x1F;
327 vtx.dst_gpr = ctx->shader->input[i].gpr;
328 vtx.dst_sel_x = 0;
329 vtx.dst_sel_y = 1;
330 vtx.dst_sel_z = 2;
331 vtx.dst_sel_w = 3;
332 r = r600_bc_add_vtx(ctx->bc, &vtx);
333 if (r)
334 return r;
335 }
336 break;
337 case TGSI_FILE_OUTPUT:
338 i = ctx->shader->noutput++;
339 ctx->shader->output[i].name = d->Semantic.Name;
340 ctx->shader->output[i].sid = d->Semantic.Index;
341 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
342 ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
343 break;
344 case TGSI_FILE_CONSTANT:
345 case TGSI_FILE_TEMPORARY:
346 case TGSI_FILE_SAMPLER:
347 break;
348 default:
349 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
350 return -EINVAL;
351 }
352 return 0;
353 }
354
355 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
356 {
357 struct tgsi_full_immediate *immediate;
358 struct r600_shader_ctx ctx;
359 struct r600_bc_output output[32];
360 unsigned output_done, noutput;
361 unsigned opcode;
362 int i, r = 0, pos0;
363
364 ctx.bc = &shader->bc;
365 ctx.shader = shader;
366 r = r600_bc_init(ctx.bc, shader->family);
367 if (r)
368 return r;
369 ctx.tokens = tokens;
370 tgsi_scan_shader(tokens, &ctx.info);
371 tgsi_parse_init(&ctx.parse, tokens);
372 ctx.type = ctx.parse.FullHeader.Processor.Processor;
373 shader->processor_type = ctx.type;
374
375 /* register allocations */
376 /* Values [0,127] correspond to GPR[0..127].
377 * Values [128,159] correspond to constant buffer bank 0
378 * Values [160,191] correspond to constant buffer bank 1
379 * Values [256,511] correspond to cfile constants c[0..255].
380 * Other special values are shown in the list below.
381 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
382 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
383 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
384 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
385 * 248 SQ_ALU_SRC_0: special constant 0.0.
386 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
387 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
388 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
389 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
390 * 253 SQ_ALU_SRC_LITERAL: literal constant.
391 * 254 SQ_ALU_SRC_PV: previous vector result.
392 * 255 SQ_ALU_SRC_PS: previous scalar result.
393 */
394 for (i = 0; i < TGSI_FILE_COUNT; i++) {
395 ctx.file_offset[i] = 0;
396 }
397 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
398 ctx.file_offset[TGSI_FILE_INPUT] = 1;
399 }
400 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
401 ctx.info.file_count[TGSI_FILE_INPUT];
402 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
403 ctx.info.file_count[TGSI_FILE_OUTPUT];
404 ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
405 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
406 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
407 ctx.info.file_count[TGSI_FILE_TEMPORARY];
408
409 ctx.nliterals = 0;
410 ctx.literals = NULL;
411
412 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
413 tgsi_parse_token(&ctx.parse);
414 switch (ctx.parse.FullToken.Token.Type) {
415 case TGSI_TOKEN_TYPE_IMMEDIATE:
416 immediate = &ctx.parse.FullToken.FullImmediate;
417 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
418 if(ctx.literals == NULL) {
419 r = -ENOMEM;
420 goto out_err;
421 }
422 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
423 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
424 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
425 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
426 ctx.nliterals++;
427 break;
428 case TGSI_TOKEN_TYPE_DECLARATION:
429 r = tgsi_declaration(&ctx);
430 if (r)
431 goto out_err;
432 break;
433 case TGSI_TOKEN_TYPE_INSTRUCTION:
434 r = tgsi_is_supported(&ctx);
435 if (r)
436 goto out_err;
437 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
438 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
439 r = ctx.inst_info->process(&ctx);
440 if (r)
441 goto out_err;
442 r = r600_bc_add_literal(ctx.bc, ctx.value);
443 if (r)
444 goto out_err;
445 break;
446 default:
447 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
448 r = -EINVAL;
449 goto out_err;
450 }
451 }
452 /* export output */
453 noutput = shader->noutput;
454 for (i = 0, pos0 = 0; i < noutput; i++) {
455 memset(&output[i], 0, sizeof(struct r600_bc_output));
456 output[i].gpr = shader->output[i].gpr;
457 output[i].elem_size = 3;
458 output[i].swizzle_x = 0;
459 output[i].swizzle_y = 1;
460 output[i].swizzle_z = 2;
461 output[i].swizzle_w = 3;
462 output[i].barrier = 1;
463 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
464 output[i].array_base = i - pos0;
465 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
466 switch (ctx.type) {
467 case TGSI_PROCESSOR_VERTEX:
468 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
469 output[i].array_base = 60;
470 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
471 /* position doesn't count in array_base */
472 pos0++;
473 }
474 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
475 output[i].array_base = 61;
476 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
477 /* position doesn't count in array_base */
478 pos0++;
479 }
480 break;
481 case TGSI_PROCESSOR_FRAGMENT:
482 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
483 output[i].array_base = shader->output[i].sid;
484 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
485 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
486 output[i].array_base = 61;
487 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
488 } else {
489 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
490 r = -EINVAL;
491 goto out_err;
492 }
493 break;
494 default:
495 R600_ERR("unsupported processor type %d\n", ctx.type);
496 r = -EINVAL;
497 goto out_err;
498 }
499 }
500 /* add fake param output for vertex shader if no param is exported */
501 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
502 for (i = 0, pos0 = 0; i < noutput; i++) {
503 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
504 pos0 = 1;
505 break;
506 }
507 }
508 if (!pos0) {
509 memset(&output[i], 0, sizeof(struct r600_bc_output));
510 output[i].gpr = 0;
511 output[i].elem_size = 3;
512 output[i].swizzle_x = 0;
513 output[i].swizzle_y = 1;
514 output[i].swizzle_z = 2;
515 output[i].swizzle_w = 3;
516 output[i].barrier = 1;
517 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
518 output[i].array_base = 0;
519 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
520 noutput++;
521 }
522 }
523 /* add fake pixel export */
524 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
525 memset(&output[0], 0, sizeof(struct r600_bc_output));
526 output[0].gpr = 0;
527 output[0].elem_size = 3;
528 output[0].swizzle_x = 7;
529 output[0].swizzle_y = 7;
530 output[0].swizzle_z = 7;
531 output[0].swizzle_w = 7;
532 output[0].barrier = 1;
533 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
534 output[0].array_base = 0;
535 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
536 noutput++;
537 }
538 /* set export done on last export of each type */
539 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
540 if (i == (noutput - 1)) {
541 output[i].end_of_program = 1;
542 }
543 if (!(output_done & (1 << output[i].type))) {
544 output_done |= (1 << output[i].type);
545 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
546 }
547 }
548 /* add output to bytecode */
549 for (i = 0; i < noutput; i++) {
550 r = r600_bc_add_output(ctx.bc, &output[i]);
551 if (r)
552 goto out_err;
553 }
554 free(ctx.literals);
555 tgsi_parse_free(&ctx.parse);
556 return 0;
557 out_err:
558 free(ctx.literals);
559 tgsi_parse_free(&ctx.parse);
560 return r;
561 }
562
563 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
564 {
565 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
566 return -EINVAL;
567 }
568
569 static int tgsi_end(struct r600_shader_ctx *ctx)
570 {
571 return 0;
572 }
573
574 static int tgsi_src(struct r600_shader_ctx *ctx,
575 const struct tgsi_full_src_register *tgsi_src,
576 struct r600_bc_alu_src *r600_src)
577 {
578 int index;
579 memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
580 r600_src->sel = tgsi_src->Register.Index;
581 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
582 r600_src->sel = 0;
583 index = tgsi_src->Register.Index;
584 ctx->value[0] = ctx->literals[index * 4 + 0];
585 ctx->value[1] = ctx->literals[index * 4 + 1];
586 ctx->value[2] = ctx->literals[index * 4 + 2];
587 ctx->value[3] = ctx->literals[index * 4 + 3];
588 }
589 r600_src->neg = tgsi_src->Register.Negate;
590 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
591 return 0;
592 }
593
594 static int tgsi_dst(struct r600_shader_ctx *ctx,
595 const struct tgsi_full_dst_register *tgsi_dst,
596 unsigned swizzle,
597 struct r600_bc_alu_dst *r600_dst)
598 {
599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
600
601 r600_dst->sel = tgsi_dst->Register.Index;
602 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
603 r600_dst->chan = swizzle;
604 r600_dst->write = 1;
605 if (inst->Instruction.Saturate) {
606 r600_dst->clamp = 1;
607 }
608 return 0;
609 }
610
611 static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
612 {
613 switch (swizzle) {
614 case 0:
615 return tgsi_src->Register.SwizzleX;
616 case 1:
617 return tgsi_src->Register.SwizzleY;
618 case 2:
619 return tgsi_src->Register.SwizzleZ;
620 case 3:
621 return tgsi_src->Register.SwizzleW;
622 default:
623 return 0;
624 }
625 }
626
627 static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
628 {
629 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
630 struct r600_bc_alu alu;
631 int i, j, k, nconst, r;
632
633 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
634 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
635 nconst++;
636 }
637 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
638 if (r) {
639 return r;
640 }
641 }
642 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
643 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
644 for (k = 0; k < 4; k++) {
645 memset(&alu, 0, sizeof(struct r600_bc_alu));
646 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
647 alu.src[0].sel = r600_src[0].sel;
648 alu.src[0].chan = k;
649 alu.dst.sel = ctx->temp_reg + j;
650 alu.dst.chan = k;
651 alu.dst.write = 1;
652 if (k == 3)
653 alu.last = 1;
654 r = r600_bc_add_alu(ctx->bc, &alu);
655 if (r)
656 return r;
657 }
658 r600_src[0].sel = ctx->temp_reg + j;
659 j--;
660 }
661 }
662 return 0;
663 }
664
665 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
666 {
667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
668 struct r600_bc_alu_src r600_src[3];
669 struct r600_bc_alu alu;
670 int i, j, r;
671 int lasti = 0;
672
673 for (i = 0; i < 4; i++) {
674 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
675 lasti = i;
676 }
677 }
678
679 r = tgsi_split_constant(ctx, r600_src);
680 if (r)
681 return r;
682 for (i = 0; i < lasti + 1; i++) {
683 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
684 continue;
685
686 memset(&alu, 0, sizeof(struct r600_bc_alu));
687 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
688 if (r)
689 return r;
690
691 alu.inst = ctx->inst_info->r600_opcode;
692 if (!swap) {
693 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
694 alu.src[j] = r600_src[j];
695 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
696 }
697 } else {
698 alu.src[0] = r600_src[1];
699 alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
700
701 alu.src[1] = r600_src[0];
702 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
703 }
704 /* handle some special cases */
705 switch (ctx->inst_info->tgsi_opcode) {
706 case TGSI_OPCODE_SUB:
707 alu.src[1].neg = 1;
708 break;
709 case TGSI_OPCODE_ABS:
710 alu.src[0].abs = 1;
711 break;
712 default:
713 break;
714 }
715 if (i == lasti) {
716 alu.last = 1;
717 }
718 r = r600_bc_add_alu(ctx->bc, &alu);
719 if (r)
720 return r;
721 }
722 return 0;
723 }
724
725 static int tgsi_op2(struct r600_shader_ctx *ctx)
726 {
727 return tgsi_op2_s(ctx, 0);
728 }
729
730 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
731 {
732 return tgsi_op2_s(ctx, 1);
733 }
734
735 /*
736 * r600 - trunc to -PI..PI range
737 * r700 - normalize by dividing by 2PI
738 * see fdo bug 27901
739 */
740 static int tgsi_trig(struct r600_shader_ctx *ctx)
741 {
742 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
743 struct r600_bc_alu_src r600_src[3];
744 struct r600_bc_alu alu;
745 int i, r;
746 uint32_t lit_vals[4];
747
748 memset(lit_vals, 0, 4*4);
749 r = tgsi_split_constant(ctx, r600_src);
750 if (r)
751 return r;
752 lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
753 lit_vals[1] = fui(0.5f);
754
755 memset(&alu, 0, sizeof(struct r600_bc_alu));
756 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
757 alu.is_op3 = 1;
758
759 alu.dst.chan = 0;
760 alu.dst.sel = ctx->temp_reg;
761 alu.dst.write = 1;
762
763 alu.src[0] = r600_src[0];
764 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
765
766 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
767 alu.src[1].chan = 0;
768 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
769 alu.src[2].chan = 1;
770 alu.last = 1;
771 r = r600_bc_add_alu(ctx->bc, &alu);
772 if (r)
773 return r;
774 r = r600_bc_add_literal(ctx->bc, lit_vals);
775 if (r)
776 return r;
777
778 memset(&alu, 0, sizeof(struct r600_bc_alu));
779 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
780
781 alu.dst.chan = 0;
782 alu.dst.sel = ctx->temp_reg;
783 alu.dst.write = 1;
784
785 alu.src[0].sel = ctx->temp_reg;
786 alu.src[0].chan = 0;
787 alu.last = 1;
788 r = r600_bc_add_alu(ctx->bc, &alu);
789 if (r)
790 return r;
791
792 if (ctx->bc->chiprev == 0) {
793 lit_vals[0] = fui(3.1415926535897f * 2.0f);
794 lit_vals[1] = fui(-3.1415926535897f);
795 } else {
796 lit_vals[0] = fui(1.0f);
797 lit_vals[1] = fui(-0.5f);
798 }
799
800 memset(&alu, 0, sizeof(struct r600_bc_alu));
801 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
802 alu.is_op3 = 1;
803
804 alu.dst.chan = 0;
805 alu.dst.sel = ctx->temp_reg;
806 alu.dst.write = 1;
807
808 alu.src[0].sel = ctx->temp_reg;
809 alu.src[0].chan = 0;
810
811 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
812 alu.src[1].chan = 0;
813 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
814 alu.src[2].chan = 1;
815 alu.last = 1;
816 r = r600_bc_add_alu(ctx->bc, &alu);
817 if (r)
818 return r;
819 r = r600_bc_add_literal(ctx->bc, lit_vals);
820 if (r)
821 return r;
822
823 memset(&alu, 0, sizeof(struct r600_bc_alu));
824 alu.inst = ctx->inst_info->r600_opcode;
825 alu.dst.chan = 0;
826 alu.dst.sel = ctx->temp_reg;
827 alu.dst.write = 1;
828
829 alu.src[0].sel = ctx->temp_reg;
830 alu.src[0].chan = 0;
831 alu.last = 1;
832 r = r600_bc_add_alu(ctx->bc, &alu);
833 if (r)
834 return r;
835
836 /* replicate result */
837 for (i = 0; i < 4; i++) {
838 memset(&alu, 0, sizeof(struct r600_bc_alu));
839 alu.src[0].sel = ctx->temp_reg;
840 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
841 alu.dst.chan = i;
842 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
843 if (r)
844 return r;
845 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
846 if (i == 3)
847 alu.last = 1;
848 r = r600_bc_add_alu(ctx->bc, &alu);
849 if (r)
850 return r;
851 }
852 return 0;
853 }
854
855 static int tgsi_kill(struct r600_shader_ctx *ctx)
856 {
857 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
858 struct r600_bc_alu alu;
859 int i, r;
860
861 for (i = 0; i < 4; i++) {
862 memset(&alu, 0, sizeof(struct r600_bc_alu));
863 alu.inst = ctx->inst_info->r600_opcode;
864 alu.dst.chan = i;
865 alu.src[0].sel = V_SQ_ALU_SRC_0;
866 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
867 if (r)
868 return r;
869 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
870 if (i == 3) {
871 alu.last = 1;
872 }
873 r = r600_bc_add_alu(ctx->bc, &alu);
874 if (r)
875 return r;
876 }
877 return 0;
878 }
879
880 static int tgsi_lit(struct r600_shader_ctx *ctx)
881 {
882 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
883 struct r600_bc_alu alu;
884 int r;
885
886 /* dst.x, <- 1.0 */
887 memset(&alu, 0, sizeof(struct r600_bc_alu));
888 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
889 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
890 alu.src[0].chan = 0;
891 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
892 if (r)
893 return r;
894 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
895 r = r600_bc_add_alu(ctx->bc, &alu);
896 if (r)
897 return r;
898
899 /* dst.y = max(src.x, 0.0) */
900 memset(&alu, 0, sizeof(struct r600_bc_alu));
901 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
902 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
903 if (r)
904 return r;
905 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
906 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
907 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
908 if (r)
909 return r;
910 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
911 r = r600_bc_add_alu(ctx->bc, &alu);
912 if (r)
913 return r;
914
915 /* dst.z = NOP - fill Z slot */
916 memset(&alu, 0, sizeof(struct r600_bc_alu));
917 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
918 alu.dst.chan = 2;
919 r = r600_bc_add_alu(ctx->bc, &alu);
920 if (r)
921 return r;
922
923 /* dst.w, <- 1.0 */
924 memset(&alu, 0, sizeof(struct r600_bc_alu));
925 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
926 alu.src[0].sel = V_SQ_ALU_SRC_1;
927 alu.src[0].chan = 0;
928 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
929 if (r)
930 return r;
931 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
932 alu.last = 1;
933 r = r600_bc_add_alu(ctx->bc, &alu);
934 if (r)
935 return r;
936
937 if (inst->Dst[0].Register.WriteMask & (1 << 2))
938 {
939 int chan;
940 int sel;
941
942 /* dst.z = log(src.y) */
943 memset(&alu, 0, sizeof(struct r600_bc_alu));
944 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
945 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
946 if (r)
947 return r;
948 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
949 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
950 if (r)
951 return r;
952 alu.last = 1;
953 r = r600_bc_add_alu(ctx->bc, &alu);
954 if (r)
955 return r;
956
957 chan = alu.dst.chan;
958 sel = alu.dst.sel;
959
960 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
961 memset(&alu, 0, sizeof(struct r600_bc_alu));
962 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
963 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
964 if (r)
965 return r;
966 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
967 alu.src[1].sel = sel;
968 alu.src[1].chan = chan;
969 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
970 if (r)
971 return r;
972 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
973 alu.dst.sel = ctx->temp_reg;
974 alu.dst.chan = 0;
975 alu.dst.write = 1;
976 alu.is_op3 = 1;
977 alu.last = 1;
978 r = r600_bc_add_alu(ctx->bc, &alu);
979 if (r)
980 return r;
981
982 /* dst.z = exp(tmp.x) */
983 memset(&alu, 0, sizeof(struct r600_bc_alu));
984 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
985 alu.src[0].sel = ctx->temp_reg;
986 alu.src[0].chan = 0;
987 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
988 if (r)
989 return r;
990 alu.last = 1;
991 r = r600_bc_add_alu(ctx->bc, &alu);
992 if (r)
993 return r;
994 }
995 return 0;
996 }
997
998 static int tgsi_trans(struct r600_shader_ctx *ctx)
999 {
1000 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1001 struct r600_bc_alu alu;
1002 int i, j, r;
1003
1004 for (i = 0; i < 4; i++) {
1005 memset(&alu, 0, sizeof(struct r600_bc_alu));
1006 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1007 alu.inst = ctx->inst_info->r600_opcode;
1008 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1009 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1010 if (r)
1011 return r;
1012 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1013 }
1014 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1015 if (r)
1016 return r;
1017 alu.last = 1;
1018 r = r600_bc_add_alu(ctx->bc, &alu);
1019 if (r)
1020 return r;
1021 }
1022 }
1023 return 0;
1024 }
1025
1026 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1027 {
1028 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1029 struct r600_bc_alu alu;
1030 int i, r;
1031
1032 for (i = 0; i < 4; i++) {
1033 memset(&alu, 0, sizeof(struct r600_bc_alu));
1034 alu.src[0].sel = ctx->temp_reg;
1035 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1036 alu.dst.chan = i;
1037 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1038 if (r)
1039 return r;
1040 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1041 if (i == 3)
1042 alu.last = 1;
1043 r = r600_bc_add_alu(ctx->bc, &alu);
1044 if (r)
1045 return r;
1046 }
1047 return 0;
1048 }
1049
1050 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1051 {
1052 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1053 struct r600_bc_alu alu;
1054 int i, r;
1055
1056 memset(&alu, 0, sizeof(struct r600_bc_alu));
1057 alu.inst = ctx->inst_info->r600_opcode;
1058 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1059 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1060 if (r)
1061 return r;
1062 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1063 }
1064 alu.dst.sel = ctx->temp_reg;
1065 alu.dst.write = 1;
1066 alu.last = 1;
1067 r = r600_bc_add_alu(ctx->bc, &alu);
1068 if (r)
1069 return r;
1070 /* replicate result */
1071 return tgsi_helper_tempx_replicate(ctx);
1072 }
1073
1074 static int tgsi_pow(struct r600_shader_ctx *ctx)
1075 {
1076 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1077 struct r600_bc_alu alu;
1078 int r;
1079
1080 /* LOG2(a) */
1081 memset(&alu, 0, sizeof(struct r600_bc_alu));
1082 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1083 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1084 if (r)
1085 return r;
1086 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1087 alu.dst.sel = ctx->temp_reg;
1088 alu.dst.write = 1;
1089 alu.last = 1;
1090 r = r600_bc_add_alu(ctx->bc, &alu);
1091 if (r)
1092 return r;
1093 r = r600_bc_add_literal(ctx->bc,ctx->value);
1094 if (r)
1095 return r;
1096 /* b * LOG2(a) */
1097 memset(&alu, 0, sizeof(struct r600_bc_alu));
1098 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1099 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1100 if (r)
1101 return r;
1102 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1103 alu.src[1].sel = ctx->temp_reg;
1104 alu.dst.sel = ctx->temp_reg;
1105 alu.dst.write = 1;
1106 alu.last = 1;
1107 r = r600_bc_add_alu(ctx->bc, &alu);
1108 if (r)
1109 return r;
1110 r = r600_bc_add_literal(ctx->bc,ctx->value);
1111 if (r)
1112 return r;
1113 /* POW(a,b) = EXP2(b * LOG2(a))*/
1114 memset(&alu, 0, sizeof(struct r600_bc_alu));
1115 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1116 alu.src[0].sel = ctx->temp_reg;
1117 alu.dst.sel = ctx->temp_reg;
1118 alu.dst.write = 1;
1119 alu.last = 1;
1120 r = r600_bc_add_alu(ctx->bc, &alu);
1121 if (r)
1122 return r;
1123 r = r600_bc_add_literal(ctx->bc,ctx->value);
1124 if (r)
1125 return r;
1126 return tgsi_helper_tempx_replicate(ctx);
1127 }
1128
1129 static int tgsi_ssg(struct r600_shader_ctx *ctx)
1130 {
1131 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1132 struct r600_bc_alu alu;
1133 struct r600_bc_alu_src r600_src[3];
1134 int i, r;
1135
1136 r = tgsi_split_constant(ctx, r600_src);
1137 if (r)
1138 return r;
1139
1140 /* tmp = (src > 0 ? 1 : src) */
1141 for (i = 0; i < 4; i++) {
1142 memset(&alu, 0, sizeof(struct r600_bc_alu));
1143 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1144 alu.is_op3 = 1;
1145
1146 alu.dst.sel = ctx->temp_reg;
1147 alu.dst.chan = i;
1148
1149 alu.src[0] = r600_src[0];
1150 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1151
1152 alu.src[1].sel = V_SQ_ALU_SRC_1;
1153
1154 alu.src[2] = r600_src[0];
1155 alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1156 if (i == 3)
1157 alu.last = 1;
1158 r = r600_bc_add_alu(ctx->bc, &alu);
1159 if (r)
1160 return r;
1161 }
1162 r = r600_bc_add_literal(ctx->bc, ctx->value);
1163 if (r)
1164 return r;
1165
1166 /* dst = (-tmp > 0 ? -1 : tmp) */
1167 for (i = 0; i < 4; i++) {
1168 memset(&alu, 0, sizeof(struct r600_bc_alu));
1169 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1170 alu.is_op3 = 1;
1171 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1172 if (r)
1173 return r;
1174
1175 alu.src[0].sel = ctx->temp_reg;
1176 alu.src[0].chan = i;
1177 alu.src[0].neg = 1;
1178
1179 alu.src[1].sel = V_SQ_ALU_SRC_1;
1180 alu.src[1].neg = 1;
1181
1182 alu.src[2].sel = ctx->temp_reg;
1183 alu.src[2].chan = i;
1184
1185 if (i == 3)
1186 alu.last = 1;
1187 r = r600_bc_add_alu(ctx->bc, &alu);
1188 if (r)
1189 return r;
1190 }
1191 return 0;
1192 }
1193
1194 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1195 {
1196 struct r600_bc_alu alu;
1197 int i, r;
1198
1199 r = r600_bc_add_literal(ctx->bc, ctx->value);
1200 if (r)
1201 return r;
1202 for (i = 0; i < 4; i++) {
1203 memset(&alu, 0, sizeof(struct r600_bc_alu));
1204 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1205 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1206 alu.dst.chan = i;
1207 } else {
1208 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1209 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1210 if (r)
1211 return r;
1212 alu.src[0].sel = ctx->temp_reg;
1213 alu.src[0].chan = i;
1214 }
1215 if (i == 3) {
1216 alu.last = 1;
1217 }
1218 r = r600_bc_add_alu(ctx->bc, &alu);
1219 if (r)
1220 return r;
1221 }
1222 return 0;
1223 }
1224
1225 static int tgsi_op3(struct r600_shader_ctx *ctx)
1226 {
1227 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1228 struct r600_bc_alu_src r600_src[3];
1229 struct r600_bc_alu alu;
1230 int i, j, r;
1231
1232 r = tgsi_split_constant(ctx, r600_src);
1233 if (r)
1234 return r;
1235 /* do it in 2 step as op3 doesn't support writemask */
1236 for (i = 0; i < 4; i++) {
1237 memset(&alu, 0, sizeof(struct r600_bc_alu));
1238 alu.inst = ctx->inst_info->r600_opcode;
1239 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1240 alu.src[j] = r600_src[j];
1241 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1242 }
1243 alu.dst.sel = ctx->temp_reg;
1244 alu.dst.chan = i;
1245 alu.dst.write = 1;
1246 alu.is_op3 = 1;
1247 if (i == 3) {
1248 alu.last = 1;
1249 }
1250 r = r600_bc_add_alu(ctx->bc, &alu);
1251 if (r)
1252 return r;
1253 }
1254 return tgsi_helper_copy(ctx, inst);
1255 }
1256
1257 static int tgsi_dp(struct r600_shader_ctx *ctx)
1258 {
1259 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1260 struct r600_bc_alu_src r600_src[3];
1261 struct r600_bc_alu alu;
1262 int i, j, r;
1263
1264 r = tgsi_split_constant(ctx, r600_src);
1265 if (r)
1266 return r;
1267 for (i = 0; i < 4; i++) {
1268 memset(&alu, 0, sizeof(struct r600_bc_alu));
1269 alu.inst = ctx->inst_info->r600_opcode;
1270 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1271 alu.src[j] = r600_src[j];
1272 alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1273 }
1274 alu.dst.sel = ctx->temp_reg;
1275 alu.dst.chan = i;
1276 alu.dst.write = 1;
1277 /* handle some special cases */
1278 switch (ctx->inst_info->tgsi_opcode) {
1279 case TGSI_OPCODE_DP2:
1280 if (i > 1) {
1281 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1282 alu.src[0].chan = alu.src[1].chan = 0;
1283 }
1284 break;
1285 case TGSI_OPCODE_DP3:
1286 if (i > 2) {
1287 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1288 alu.src[0].chan = alu.src[1].chan = 0;
1289 }
1290 break;
1291 case TGSI_OPCODE_DPH:
1292 if (i == 3) {
1293 alu.src[0].sel = V_SQ_ALU_SRC_1;
1294 alu.src[0].chan = 0;
1295 alu.src[0].neg = 0;
1296 }
1297 break;
1298 default:
1299 break;
1300 }
1301 if (i == 3) {
1302 alu.last = 1;
1303 }
1304 r = r600_bc_add_alu(ctx->bc, &alu);
1305 if (r)
1306 return r;
1307 }
1308 return tgsi_helper_copy(ctx, inst);
1309 }
1310
1311 static int tgsi_tex(struct r600_shader_ctx *ctx)
1312 {
1313 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1314 struct r600_bc_tex tex;
1315 struct r600_bc_alu alu;
1316 unsigned src_gpr;
1317 int r, i;
1318
1319 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1320
1321 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1322 /* Add perspective divide */
1323 memset(&alu, 0, sizeof(struct r600_bc_alu));
1324 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1325 alu.src[0].sel = src_gpr;
1326 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1327 alu.dst.sel = ctx->temp_reg;
1328 alu.dst.chan = 3;
1329 alu.last = 1;
1330 alu.dst.write = 1;
1331 r = r600_bc_add_alu(ctx->bc, &alu);
1332 if (r)
1333 return r;
1334
1335 for (i = 0; i < 3; i++) {
1336 memset(&alu, 0, sizeof(struct r600_bc_alu));
1337 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1338 alu.src[0].sel = ctx->temp_reg;
1339 alu.src[0].chan = 3;
1340 alu.src[1].sel = src_gpr;
1341 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1342 alu.dst.sel = ctx->temp_reg;
1343 alu.dst.chan = i;
1344 alu.dst.write = 1;
1345 r = r600_bc_add_alu(ctx->bc, &alu);
1346 if (r)
1347 return r;
1348 }
1349 memset(&alu, 0, sizeof(struct r600_bc_alu));
1350 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1351 alu.src[0].sel = V_SQ_ALU_SRC_1;
1352 alu.src[0].chan = 0;
1353 alu.dst.sel = ctx->temp_reg;
1354 alu.dst.chan = 3;
1355 alu.last = 1;
1356 alu.dst.write = 1;
1357 r = r600_bc_add_alu(ctx->bc, &alu);
1358 if (r)
1359 return r;
1360 src_gpr = ctx->temp_reg;
1361 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1362 for (i = 0; i < 4; i++) {
1363 memset(&alu, 0, sizeof(struct r600_bc_alu));
1364 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1365 alu.src[0].sel = src_gpr;
1366 alu.src[0].chan = i;
1367 alu.dst.sel = ctx->temp_reg;
1368 alu.dst.chan = i;
1369 if (i == 3)
1370 alu.last = 1;
1371 alu.dst.write = 1;
1372 r = r600_bc_add_alu(ctx->bc, &alu);
1373 if (r)
1374 return r;
1375 }
1376 src_gpr = ctx->temp_reg;
1377 }
1378
1379 memset(&tex, 0, sizeof(struct r600_bc_tex));
1380 tex.inst = ctx->inst_info->r600_opcode;
1381 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1382 tex.sampler_id = tex.resource_id;
1383 tex.src_gpr = src_gpr;
1384 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1385 tex.dst_sel_x = 0;
1386 tex.dst_sel_y = 1;
1387 tex.dst_sel_z = 2;
1388 tex.dst_sel_w = 3;
1389 tex.src_sel_x = 0;
1390 tex.src_sel_y = 1;
1391 tex.src_sel_z = 2;
1392 tex.src_sel_w = 3;
1393
1394 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1395 tex.coord_type_x = 1;
1396 tex.coord_type_y = 1;
1397 tex.coord_type_z = 1;
1398 tex.coord_type_w = 1;
1399 }
1400 return r600_bc_add_tex(ctx->bc, &tex);
1401 }
1402
1403 static int tgsi_lrp(struct r600_shader_ctx *ctx)
1404 {
1405 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1406 struct r600_bc_alu_src r600_src[3];
1407 struct r600_bc_alu alu;
1408 unsigned i;
1409 int r;
1410
1411 r = tgsi_split_constant(ctx, r600_src);
1412 if (r)
1413 return r;
1414 /* 1 - src0 */
1415 for (i = 0; i < 4; i++) {
1416 memset(&alu, 0, sizeof(struct r600_bc_alu));
1417 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1418 alu.src[0].sel = V_SQ_ALU_SRC_1;
1419 alu.src[0].chan = 0;
1420 alu.src[1] = r600_src[0];
1421 alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1422 alu.src[1].neg = 1;
1423 alu.dst.sel = ctx->temp_reg;
1424 alu.dst.chan = i;
1425 if (i == 3) {
1426 alu.last = 1;
1427 }
1428 alu.dst.write = 1;
1429 r = r600_bc_add_alu(ctx->bc, &alu);
1430 if (r)
1431 return r;
1432 }
1433 r = r600_bc_add_literal(ctx->bc, ctx->value);
1434 if (r)
1435 return r;
1436
1437 /* (1 - src0) * src2 */
1438 for (i = 0; i < 4; i++) {
1439 memset(&alu, 0, sizeof(struct r600_bc_alu));
1440 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1441 alu.src[0].sel = ctx->temp_reg;
1442 alu.src[0].chan = i;
1443 alu.src[1] = r600_src[2];
1444 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1445 alu.dst.sel = ctx->temp_reg;
1446 alu.dst.chan = i;
1447 if (i == 3) {
1448 alu.last = 1;
1449 }
1450 alu.dst.write = 1;
1451 r = r600_bc_add_alu(ctx->bc, &alu);
1452 if (r)
1453 return r;
1454 }
1455 r = r600_bc_add_literal(ctx->bc, ctx->value);
1456 if (r)
1457 return r;
1458
1459 /* src0 * src1 + (1 - src0) * src2 */
1460 for (i = 0; i < 4; i++) {
1461 memset(&alu, 0, sizeof(struct r600_bc_alu));
1462 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1463 alu.is_op3 = 1;
1464 alu.src[0] = r600_src[0];
1465 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1466 alu.src[1] = r600_src[1];
1467 alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1468 alu.src[2].sel = ctx->temp_reg;
1469 alu.src[2].chan = i;
1470 alu.dst.sel = ctx->temp_reg;
1471 alu.dst.chan = i;
1472 if (i == 3) {
1473 alu.last = 1;
1474 }
1475 r = r600_bc_add_alu(ctx->bc, &alu);
1476 if (r)
1477 return r;
1478 }
1479 return tgsi_helper_copy(ctx, inst);
1480 }
1481
1482 static int tgsi_cmp(struct r600_shader_ctx *ctx)
1483 {
1484 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1485 struct r600_bc_alu_src r600_src[3];
1486 struct r600_bc_alu alu;
1487 int use_temp = 0;
1488 int i, r;
1489
1490 r = tgsi_split_constant(ctx, r600_src);
1491 if (r)
1492 return r;
1493
1494 if (inst->Dst[0].Register.WriteMask != 0xf)
1495 use_temp = 1;
1496
1497 for (i = 0; i < 4; i++) {
1498 memset(&alu, 0, sizeof(struct r600_bc_alu));
1499 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1500 alu.src[0] = r600_src[0];
1501 alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1502
1503 alu.src[1] = r600_src[2];
1504 alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1505
1506 alu.src[2] = r600_src[1];
1507 alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1508
1509 if (use_temp)
1510 alu.dst.sel = ctx->temp_reg;
1511 else {
1512 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1513 if (r)
1514 return r;
1515 }
1516 alu.dst.chan = i;
1517 alu.dst.write = 1;
1518 alu.is_op3 = 1;
1519 if (i == 3)
1520 alu.last = 1;
1521 r = r600_bc_add_alu(ctx->bc, &alu);
1522 if (r)
1523 return r;
1524 }
1525 if (use_temp)
1526 return tgsi_helper_copy(ctx, inst);
1527 return 0;
1528 }
1529
1530 static int tgsi_xpd(struct r600_shader_ctx *ctx)
1531 {
1532 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1533 struct r600_bc_alu_src r600_src[3];
1534 struct r600_bc_alu alu;
1535 uint32_t use_temp = 0;
1536 int i, r;
1537
1538 if (inst->Dst[0].Register.WriteMask != 0xf)
1539 use_temp = 1;
1540
1541 r = tgsi_split_constant(ctx, r600_src);
1542 if (r)
1543 return r;
1544
1545 for (i = 0; i < 4; i++) {
1546 memset(&alu, 0, sizeof(struct r600_bc_alu));
1547 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1548
1549 alu.src[0] = r600_src[0];
1550 switch (i) {
1551 case 0:
1552 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1553 break;
1554 case 1:
1555 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1556 break;
1557 case 2:
1558 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1559 break;
1560 case 3:
1561 alu.src[0].sel = V_SQ_ALU_SRC_0;
1562 alu.src[0].chan = i;
1563 }
1564
1565 alu.src[1] = r600_src[1];
1566 switch (i) {
1567 case 0:
1568 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1569 break;
1570 case 1:
1571 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1572 break;
1573 case 2:
1574 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1575 break;
1576 case 3:
1577 alu.src[1].sel = V_SQ_ALU_SRC_0;
1578 alu.src[1].chan = i;
1579 }
1580
1581 alu.dst.sel = ctx->temp_reg;
1582 alu.dst.chan = i;
1583 alu.dst.write = 1;
1584
1585 if (i == 3)
1586 alu.last = 1;
1587 r = r600_bc_add_alu(ctx->bc, &alu);
1588 if (r)
1589 return r;
1590 }
1591
1592 for (i = 0; i < 4; i++) {
1593 memset(&alu, 0, sizeof(struct r600_bc_alu));
1594 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1595
1596 alu.src[0] = r600_src[0];
1597 switch (i) {
1598 case 0:
1599 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1600 break;
1601 case 1:
1602 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1603 break;
1604 case 2:
1605 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1606 break;
1607 case 3:
1608 alu.src[0].sel = V_SQ_ALU_SRC_0;
1609 alu.src[0].chan = i;
1610 }
1611
1612 alu.src[1] = r600_src[1];
1613 switch (i) {
1614 case 0:
1615 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1616 break;
1617 case 1:
1618 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1619 break;
1620 case 2:
1621 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1622 break;
1623 case 3:
1624 alu.src[1].sel = V_SQ_ALU_SRC_0;
1625 alu.src[1].chan = i;
1626 }
1627
1628 alu.src[2].sel = ctx->temp_reg;
1629 alu.src[2].neg = 1;
1630 alu.src[2].chan = i;
1631
1632 if (use_temp)
1633 alu.dst.sel = ctx->temp_reg;
1634 else {
1635 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1636 if (r)
1637 return r;
1638 }
1639 alu.dst.chan = i;
1640 alu.dst.write = 1;
1641 alu.is_op3 = 1;
1642 if (i == 3)
1643 alu.last = 1;
1644 r = r600_bc_add_alu(ctx->bc, &alu);
1645 if (r)
1646 return r;
1647 }
1648 if (use_temp)
1649 return tgsi_helper_copy(ctx, inst);
1650 return 0;
1651 }
1652
1653 static int tgsi_exp(struct r600_shader_ctx *ctx)
1654 {
1655 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1656 struct r600_bc_alu_src r600_src[3];
1657 struct r600_bc_alu alu;
1658 int r;
1659
1660 /* result.x = 2^floor(src); */
1661 if (inst->Dst[0].Register.WriteMask & 1) {
1662 memset(&alu, 0, sizeof(struct r600_bc_alu));
1663
1664 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1665 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1666 if (r)
1667 return r;
1668
1669 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1670
1671 alu.dst.sel = ctx->temp_reg;
1672 alu.dst.chan = 0;
1673 alu.dst.write = 1;
1674 alu.last = 1;
1675 r = r600_bc_add_alu(ctx->bc, &alu);
1676 if (r)
1677 return r;
1678
1679 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1680 alu.src[0].sel = ctx->temp_reg;
1681 alu.src[0].chan = 0;
1682
1683 alu.dst.sel = ctx->temp_reg;
1684 alu.dst.chan = 0;
1685 alu.dst.write = 1;
1686 alu.last = 1;
1687 r = r600_bc_add_alu(ctx->bc, &alu);
1688 if (r)
1689 return r;
1690 }
1691
1692 /* result.y = tmp - floor(tmp); */
1693 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1694 memset(&alu, 0, sizeof(struct r600_bc_alu));
1695
1696 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1697 alu.src[0] = r600_src[0];
1698 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1699 if (r)
1700 return r;
1701 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1702
1703 alu.dst.sel = ctx->temp_reg;
1704 // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1705 // if (r)
1706 // return r;
1707 alu.dst.write = 1;
1708 alu.dst.chan = 1;
1709
1710 alu.last = 1;
1711
1712 r = r600_bc_add_alu(ctx->bc, &alu);
1713 if (r)
1714 return r;
1715 }
1716
1717 /* result.z = RoughApprox2ToX(tmp);*/
1718 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1719 memset(&alu, 0, sizeof(struct r600_bc_alu));
1720 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1721 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1722 if (r)
1723 return r;
1724 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1725
1726 alu.dst.sel = ctx->temp_reg;
1727 alu.dst.write = 1;
1728 alu.dst.chan = 2;
1729
1730 alu.last = 1;
1731
1732 r = r600_bc_add_alu(ctx->bc, &alu);
1733 if (r)
1734 return r;
1735
1736 }
1737
1738 /* result.w = 1.0;*/
1739 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1740 memset(&alu, 0, sizeof(struct r600_bc_alu));
1741
1742 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1743 alu.src[0].sel = V_SQ_ALU_SRC_1;
1744 alu.src[0].chan = 0;
1745
1746 alu.dst.sel = ctx->temp_reg;
1747 alu.dst.chan = 3;
1748 alu.dst.write = 1;
1749 alu.last = 1;
1750 r = r600_bc_add_alu(ctx->bc, &alu);
1751 if (r)
1752 return r;
1753 }
1754 return tgsi_helper_copy(ctx, inst);
1755 }
1756
1757 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
1758 {
1759 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1760 struct r600_bc_alu alu;
1761 int r;
1762
1763 memset(&alu, 0, sizeof(struct r600_bc_alu));
1764 alu.inst = opcode;
1765 alu.predicate = 1;
1766
1767 alu.dst.sel = ctx->temp_reg;
1768 alu.dst.write = 1;
1769 alu.dst.chan = 0;
1770
1771 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1772 if (r)
1773 return r;
1774 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1775 alu.src[1].sel = V_SQ_ALU_SRC_0;
1776 alu.src[1].chan = 0;
1777
1778 alu.last = 1;
1779
1780 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
1781 if (r)
1782 return r;
1783 return 0;
1784 }
1785
1786 static int pops(struct r600_shader_ctx *ctx, int pops)
1787 {
1788 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
1789 ctx->bc->cf_last->pop_count = pops;
1790 return 0;
1791 }
1792
1793 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
1794 {
1795 switch(reason) {
1796 case FC_PUSH_VPM:
1797 ctx->bc->callstack[ctx->bc->call_sp].current--;
1798 break;
1799 case FC_PUSH_WQM:
1800 case FC_LOOP:
1801 ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
1802 break;
1803 case FC_REP:
1804 /* TOODO : for 16 vp asic should -= 2; */
1805 ctx->bc->callstack[ctx->bc->call_sp].current --;
1806 break;
1807 }
1808 }
1809
1810 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
1811 {
1812 if (check_max_only) {
1813 int diff;
1814 switch (reason) {
1815 case FC_PUSH_VPM:
1816 diff = 1;
1817 break;
1818 case FC_PUSH_WQM:
1819 diff = 4;
1820 break;
1821 }
1822 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
1823 ctx->bc->callstack[ctx->bc->call_sp].max) {
1824 ctx->bc->callstack[ctx->bc->call_sp].max =
1825 ctx->bc->callstack[ctx->bc->call_sp].current + diff;
1826 }
1827 return;
1828 }
1829 switch (reason) {
1830 case FC_PUSH_VPM:
1831 ctx->bc->callstack[ctx->bc->call_sp].current++;
1832 break;
1833 case FC_PUSH_WQM:
1834 case FC_LOOP:
1835 ctx->bc->callstack[ctx->bc->call_sp].current += 4;
1836 break;
1837 case FC_REP:
1838 ctx->bc->callstack[ctx->bc->call_sp].current++;
1839 break;
1840 }
1841
1842 if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
1843 ctx->bc->callstack[ctx->bc->call_sp].max) {
1844 ctx->bc->callstack[ctx->bc->call_sp].max =
1845 ctx->bc->callstack[ctx->bc->call_sp].current;
1846 }
1847 }
1848
1849 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
1850 {
1851 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
1852
1853 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
1854 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
1855 sp->mid[sp->num_mid] = ctx->bc->cf_last;
1856 sp->num_mid++;
1857 }
1858
1859 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
1860 {
1861 ctx->bc->fc_sp++;
1862 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
1863 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
1864 }
1865
1866 static void fc_poplevel(struct r600_shader_ctx *ctx)
1867 {
1868 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
1869 if (sp->mid) {
1870 free(sp->mid);
1871 sp->mid = NULL;
1872 }
1873 sp->num_mid = 0;
1874 sp->start = NULL;
1875 sp->type = 0;
1876 ctx->bc->fc_sp--;
1877 }
1878
1879 #if 0
1880 static int emit_return(struct r600_shader_ctx *ctx)
1881 {
1882 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
1883 return 0;
1884 }
1885
1886 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
1887 {
1888
1889 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
1890 ctx->bc->cf_last->pop_count = pops;
1891 /* TODO work out offset */
1892 return 0;
1893 }
1894
1895 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
1896 {
1897 return 0;
1898 }
1899
1900 static void emit_testflag(struct r600_shader_ctx *ctx)
1901 {
1902
1903 }
1904
1905 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
1906 {
1907 emit_testflag(ctx);
1908 emit_jump_to_offset(ctx, 1, 4);
1909 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
1910 pops(ctx, ifidx + 1);
1911 emit_return(ctx);
1912 }
1913
1914 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
1915 {
1916 emit_testflag(ctx);
1917
1918 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
1919 ctx->bc->cf_last->pop_count = 1;
1920
1921 fc_set_mid(ctx, fc_sp);
1922
1923 pops(ctx, 1);
1924 }
1925 #endif
1926
1927 static int tgsi_if(struct r600_shader_ctx *ctx)
1928 {
1929 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
1930
1931 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
1932
1933 fc_pushlevel(ctx, FC_IF);
1934
1935 callstack_check_depth(ctx, FC_PUSH_VPM, 0);
1936 return 0;
1937 }
1938
1939 static int tgsi_else(struct r600_shader_ctx *ctx)
1940 {
1941 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
1942 ctx->bc->cf_last->pop_count = 1;
1943
1944 fc_set_mid(ctx, ctx->bc->fc_sp);
1945 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
1946 return 0;
1947 }
1948
1949 static int tgsi_endif(struct r600_shader_ctx *ctx)
1950 {
1951 pops(ctx, 1);
1952 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
1953 R600_ERR("if/endif unbalanced in shader\n");
1954 return -1;
1955 }
1956
1957 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
1958 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
1959 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
1960 } else {
1961 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
1962 }
1963 fc_poplevel(ctx);
1964
1965 callstack_decrease_current(ctx, FC_PUSH_VPM);
1966 return 0;
1967 }
1968
1969 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
1970 {
1971 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
1972
1973 fc_pushlevel(ctx, FC_LOOP);
1974
1975 /* check stack depth */
1976 callstack_check_depth(ctx, FC_LOOP, 0);
1977 return 0;
1978 }
1979
1980 static int tgsi_endloop(struct r600_shader_ctx *ctx)
1981 {
1982 int i;
1983
1984 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
1985
1986 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
1987 R600_ERR("loop/endloop in shader code are not paired.\n");
1988 return -EINVAL;
1989 }
1990
1991 /* fixup loop pointers - from r600isa
1992 LOOP END points to CF after LOOP START,
1993 LOOP START point to CF after LOOP END
1994 BRK/CONT point to LOOP END CF
1995 */
1996 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
1997
1998 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
1999
2000 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2001 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2002 }
2003 /* TODO add LOOPRET support */
2004 fc_poplevel(ctx);
2005 callstack_decrease_current(ctx, FC_LOOP);
2006 return 0;
2007 }
2008
2009 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2010 {
2011 unsigned int fscp;
2012
2013 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2014 {
2015 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2016 break;
2017 }
2018
2019 if (fscp == 0) {
2020 R600_ERR("Break not inside loop/endloop pair\n");
2021 return -EINVAL;
2022 }
2023
2024 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2025 ctx->bc->cf_last->pop_count = 1;
2026
2027 fc_set_mid(ctx, fscp);
2028
2029 pops(ctx, 1);
2030 callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2031 return 0;
2032 }
2033
2034 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2035 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2036 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2037 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2038 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2039 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2040 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2041 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2042 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2043 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2044 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2045 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2046 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2047 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2048 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2049 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2050 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2051 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2052 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2053 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2054 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2055 /* gap */
2056 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2057 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2058 /* gap */
2059 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2060 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2061 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2062 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2063 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2064 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2065 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2066 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2067 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2068 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2069 /* gap */
2070 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2071 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2072 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2073 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2074 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2075 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2076 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2077 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
2078 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2079 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2080 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2081 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2082 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2083 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2084 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2085 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2086 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2087 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2088 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2089 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2090 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2091 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2092 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2093 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2094 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2095 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2096 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2097 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2098 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2099 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2100 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2101 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2102 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2103 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2104 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2105 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2106 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2107 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2108 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2109 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2110 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2111 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2112 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2113 /* gap */
2114 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2115 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2116 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2117 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2118 /* gap */
2119 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2120 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2121 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2122 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2123 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2124 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2125 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2126 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2127 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2128 /* gap */
2129 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2130 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2131 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2132 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2133 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2134 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2135 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2136 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2137 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2138 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2139 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2140 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2141 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2142 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2143 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2144 /* gap */
2145 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2146 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2147 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2148 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2149 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2150 /* gap */
2151 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2152 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2153 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2154 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2155 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2156 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2157 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2158 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2159 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
2160 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
2161 /* gap */
2162 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2163 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2164 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2165 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2166 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2167 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2168 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2169 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2170 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2171 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2172 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2173 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2174 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2175 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2176 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2177 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2178 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2179 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2180 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2181 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2182 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2183 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2184 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2185 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2186 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2187 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2188 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2189 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2190 };