r600: Drop the "/* gap */" notes.
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600_shader.h"
28 #include "r600d.h"
29
30 #include "sb/sb_public.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_info.h"
34 #include "tgsi/tgsi_parse.h"
35 #include "tgsi/tgsi_scan.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include <stdio.h>
40 #include <errno.h>
41
42 /* CAYMAN notes
43 Why CAYMAN got loops for lots of instructions is explained here.
44
45 -These 8xx t-slot only ops are implemented in all vector slots.
46 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
47 These 8xx t-slot only opcodes become vector ops, with all four
48 slots expecting the arguments on sources a and b. Result is
49 broadcast to all channels.
50 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
51 These 8xx t-slot only opcodes become vector ops in the z, y, and
52 x slots.
53 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
54 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
55 SQRT_IEEE/_64
56 SIN/COS
57 The w slot may have an independent co-issued operation, or if the
58 result is required to be in the w slot, the opcode above may be
59 issued in the w slot as well.
60 The compiler must issue the source argument to slots z, y, and x
61 */
62
63 static int r600_shader_from_tgsi(struct r600_context *rctx,
64 struct r600_pipe_shader *pipeshader,
65 struct r600_shader_key key);
66
67
68 static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
69 int size, unsigned comp_mask) {
70
71 if (!size)
72 return;
73
74 if (ps->num_arrays == ps->max_arrays) {
75 ps->max_arrays += 64;
76 ps->arrays = realloc(ps->arrays, ps->max_arrays *
77 sizeof(struct r600_shader_array));
78 }
79
80 int n = ps->num_arrays;
81 ++ps->num_arrays;
82
83 ps->arrays[n].comp_mask = comp_mask;
84 ps->arrays[n].gpr_start = start_gpr;
85 ps->arrays[n].gpr_count = size;
86 }
87
88 static void r600_dump_streamout(struct pipe_stream_output_info *so)
89 {
90 unsigned i;
91
92 fprintf(stderr, "STREAMOUT\n");
93 for (i = 0; i < so->num_outputs; i++) {
94 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
95 so->output[i].start_component;
96 fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
97 i, so->output[i].output_buffer,
98 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
99 so->output[i].register_index,
100 mask & 1 ? "x" : "",
101 mask & 2 ? "y" : "",
102 mask & 4 ? "z" : "",
103 mask & 8 ? "w" : "",
104 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : "");
105 }
106 }
107
108 static int store_shader(struct pipe_context *ctx,
109 struct r600_pipe_shader *shader)
110 {
111 struct r600_context *rctx = (struct r600_context *)ctx;
112 uint32_t *ptr, i;
113
114 if (shader->bo == NULL) {
115 shader->bo = (struct r600_resource*)
116 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
117 if (shader->bo == NULL) {
118 return -ENOMEM;
119 }
120 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
121 if (R600_BIG_ENDIAN) {
122 for (i = 0; i < shader->shader.bc.ndw; ++i) {
123 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
124 }
125 } else {
126 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
127 }
128 rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
129 }
130
131 return 0;
132 }
133
134 int r600_pipe_shader_create(struct pipe_context *ctx,
135 struct r600_pipe_shader *shader,
136 struct r600_shader_key key)
137 {
138 struct r600_context *rctx = (struct r600_context *)ctx;
139 struct r600_pipe_shader_selector *sel = shader->selector;
140 int r;
141 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
142 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
143 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
144 unsigned export_shader = key.vs_as_es;
145
146 shader->shader.bc.isa = rctx->isa;
147
148 if (dump) {
149 fprintf(stderr, "--------------------------------------------------------------\n");
150 tgsi_dump(sel->tokens, 0);
151
152 if (sel->so.num_outputs) {
153 r600_dump_streamout(&sel->so);
154 }
155 }
156 r = r600_shader_from_tgsi(rctx, shader, key);
157 if (r) {
158 R600_ERR("translation from TGSI failed !\n");
159 goto error;
160 }
161
162 /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
163 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
164 /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
165 use_sb &= !shader->shader.uses_index_registers;
166
167 /* Check if the bytecode has already been built. When using the llvm
168 * backend, r600_shader_from_tgsi() will take care of building the
169 * bytecode.
170 */
171 if (!shader->shader.bc.bytecode) {
172 r = r600_bytecode_build(&shader->shader.bc);
173 if (r) {
174 R600_ERR("building bytecode failed !\n");
175 goto error;
176 }
177 }
178
179 if (dump && !sb_disasm) {
180 fprintf(stderr, "--------------------------------------------------------------\n");
181 r600_bytecode_disasm(&shader->shader.bc);
182 fprintf(stderr, "______________________________________________________________\n");
183 } else if ((dump && sb_disasm) || use_sb) {
184 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
185 dump, use_sb);
186 if (r) {
187 R600_ERR("r600_sb_bytecode_process failed !\n");
188 goto error;
189 }
190 }
191
192 if (shader->gs_copy_shader) {
193 if (dump) {
194 // dump copy shader
195 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
196 &shader->gs_copy_shader->shader, dump, 0);
197 if (r)
198 goto error;
199 }
200
201 if ((r = store_shader(ctx, shader->gs_copy_shader)))
202 goto error;
203 }
204
205 /* Store the shader in a buffer. */
206 if ((r = store_shader(ctx, shader)))
207 goto error;
208
209 /* Build state. */
210 switch (shader->shader.processor_type) {
211 case TGSI_PROCESSOR_GEOMETRY:
212 if (rctx->b.chip_class >= EVERGREEN) {
213 evergreen_update_gs_state(ctx, shader);
214 evergreen_update_vs_state(ctx, shader->gs_copy_shader);
215 } else {
216 r600_update_gs_state(ctx, shader);
217 r600_update_vs_state(ctx, shader->gs_copy_shader);
218 }
219 break;
220 case TGSI_PROCESSOR_VERTEX:
221 if (rctx->b.chip_class >= EVERGREEN) {
222 if (export_shader)
223 evergreen_update_es_state(ctx, shader);
224 else
225 evergreen_update_vs_state(ctx, shader);
226 } else {
227 if (export_shader)
228 r600_update_es_state(ctx, shader);
229 else
230 r600_update_vs_state(ctx, shader);
231 }
232 break;
233 case TGSI_PROCESSOR_FRAGMENT:
234 if (rctx->b.chip_class >= EVERGREEN) {
235 evergreen_update_ps_state(ctx, shader);
236 } else {
237 r600_update_ps_state(ctx, shader);
238 }
239 break;
240 default:
241 r = -EINVAL;
242 goto error;
243 }
244 return 0;
245
246 error:
247 r600_pipe_shader_destroy(ctx, shader);
248 return r;
249 }
250
251 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
252 {
253 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
254 r600_bytecode_clear(&shader->shader.bc);
255 r600_release_command_buffer(&shader->command_buffer);
256 }
257
258 /*
259 * tgsi -> r600 shader
260 */
261 struct r600_shader_tgsi_instruction;
262
263 struct r600_shader_src {
264 unsigned sel;
265 unsigned swizzle[4];
266 unsigned neg;
267 unsigned abs;
268 unsigned rel;
269 unsigned kc_bank;
270 boolean kc_rel; /* true if cache bank is indexed */
271 uint32_t value[4];
272 };
273
274 struct eg_interp {
275 boolean enabled;
276 unsigned ij_index;
277 };
278
279 struct r600_shader_ctx {
280 struct tgsi_shader_info info;
281 struct tgsi_parse_context parse;
282 const struct tgsi_token *tokens;
283 unsigned type;
284 unsigned file_offset[TGSI_FILE_COUNT];
285 unsigned temp_reg;
286 struct r600_shader_tgsi_instruction *inst_info;
287 struct r600_bytecode *bc;
288 struct r600_shader *shader;
289 struct r600_shader_src src[4];
290 uint32_t *literals;
291 uint32_t nliterals;
292 uint32_t max_driver_temp_used;
293 boolean use_llvm;
294 /* needed for evergreen interpolation */
295 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
296 /* evergreen/cayman also store sample mask in face register */
297 int face_gpr;
298 /* sample id is .w component stored in fixed point position register */
299 int fixed_pt_position_gpr;
300 int colors_used;
301 boolean clip_vertex_write;
302 unsigned cv_output;
303 unsigned edgeflag_output;
304 int fragcoord_input;
305 int native_integers;
306 int next_ring_offset;
307 int gs_out_ring_offset;
308 int gs_next_vertex;
309 struct r600_shader *gs_for_vs;
310 int gs_export_gpr_treg;
311 };
312
313 struct r600_shader_tgsi_instruction {
314 unsigned tgsi_opcode;
315 unsigned is_op3;
316 unsigned op;
317 int (*process)(struct r600_shader_ctx *ctx);
318 };
319
320 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
321 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
322 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
323 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
324 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
325 static int tgsi_else(struct r600_shader_ctx *ctx);
326 static int tgsi_endif(struct r600_shader_ctx *ctx);
327 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
328 static int tgsi_endloop(struct r600_shader_ctx *ctx);
329 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
330 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
331 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
332 unsigned int dst_reg);
333 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
334 const struct r600_shader_src *shader_src,
335 unsigned chan);
336
337 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
338 {
339 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
340 int j;
341
342 if (i->Instruction.NumDstRegs > 1) {
343 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
344 return -EINVAL;
345 }
346 if (i->Instruction.Predicate) {
347 R600_ERR("predicate unsupported\n");
348 return -EINVAL;
349 }
350 #if 0
351 if (i->Instruction.Label) {
352 R600_ERR("label unsupported\n");
353 return -EINVAL;
354 }
355 #endif
356 for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
357 if (i->Src[j].Register.Dimension) {
358 switch (i->Src[j].Register.File) {
359 case TGSI_FILE_CONSTANT:
360 break;
361 case TGSI_FILE_INPUT:
362 if (ctx->type == TGSI_PROCESSOR_GEOMETRY)
363 break;
364 default:
365 R600_ERR("unsupported src %d (dimension %d)\n", j,
366 i->Src[j].Register.Dimension);
367 return -EINVAL;
368 }
369 }
370 }
371 for (j = 0; j < i->Instruction.NumDstRegs; j++) {
372 if (i->Dst[j].Register.Dimension) {
373 R600_ERR("unsupported dst (dimension)\n");
374 return -EINVAL;
375 }
376 }
377 return 0;
378 }
379
380 int eg_get_interpolator_index(unsigned interpolate, unsigned location)
381 {
382 if (interpolate == TGSI_INTERPOLATE_COLOR ||
383 interpolate == TGSI_INTERPOLATE_LINEAR ||
384 interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
385 {
386 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
387 int loc;
388
389 switch(location) {
390 case TGSI_INTERPOLATE_LOC_CENTER:
391 loc = 1;
392 break;
393 case TGSI_INTERPOLATE_LOC_CENTROID:
394 loc = 2;
395 break;
396 case TGSI_INTERPOLATE_LOC_SAMPLE:
397 default:
398 loc = 0; break;
399 }
400
401 return is_linear * 3 + loc;
402 }
403
404 return -1;
405 }
406
407 static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
408 int input)
409 {
410 int i = eg_get_interpolator_index(
411 ctx->shader->input[input].interpolate,
412 ctx->shader->input[input].interpolate_location);
413 assert(i >= 0);
414 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
415 }
416
417 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
418 {
419 int i, r;
420 struct r600_bytecode_alu alu;
421 int gpr = 0, base_chan = 0;
422 int ij_index = ctx->shader->input[input].ij_index;
423
424 /* work out gpr and base_chan from index */
425 gpr = ij_index / 2;
426 base_chan = (2 * (ij_index % 2)) + 1;
427
428 for (i = 0; i < 8; i++) {
429 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
430
431 if (i < 4)
432 alu.op = ALU_OP2_INTERP_ZW;
433 else
434 alu.op = ALU_OP2_INTERP_XY;
435
436 if ((i > 1) && (i < 6)) {
437 alu.dst.sel = ctx->shader->input[input].gpr;
438 alu.dst.write = 1;
439 }
440
441 alu.dst.chan = i % 4;
442
443 alu.src[0].sel = gpr;
444 alu.src[0].chan = (base_chan - (i % 2));
445
446 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
447
448 alu.bank_swizzle_force = SQ_ALU_VEC_210;
449 if ((i % 4) == 3)
450 alu.last = 1;
451 r = r600_bytecode_add_alu(ctx->bc, &alu);
452 if (r)
453 return r;
454 }
455 return 0;
456 }
457
458 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
459 {
460 int i, r;
461 struct r600_bytecode_alu alu;
462
463 for (i = 0; i < 4; i++) {
464 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
465
466 alu.op = ALU_OP1_INTERP_LOAD_P0;
467
468 alu.dst.sel = ctx->shader->input[input].gpr;
469 alu.dst.write = 1;
470
471 alu.dst.chan = i;
472
473 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
474 alu.src[0].chan = i;
475
476 if (i == 3)
477 alu.last = 1;
478 r = r600_bytecode_add_alu(ctx->bc, &alu);
479 if (r)
480 return r;
481 }
482 return 0;
483 }
484
485 /*
486 * Special export handling in shaders
487 *
488 * shader export ARRAY_BASE for EXPORT_POS:
489 * 60 is position
490 * 61 is misc vector
491 * 62, 63 are clip distance vectors
492 *
493 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
494 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
495 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
496 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
497 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
498 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
499 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
500 * exclusive from render target index)
501 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
502 *
503 *
504 * shader export ARRAY_BASE for EXPORT_PIXEL:
505 * 0-7 CB targets
506 * 61 computed Z vector
507 *
508 * The use of the values exported in the computed Z vector are controlled
509 * by DB_SHADER_CONTROL:
510 * Z_EXPORT_ENABLE - Z as a float in RED
511 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
512 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
513 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
514 * DB_SOURCE_FORMAT - export control restrictions
515 *
516 */
517
518
519 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
520 static int r600_spi_sid(struct r600_shader_io * io)
521 {
522 int index, name = io->name;
523
524 /* These params are handled differently, they don't need
525 * semantic indices, so we'll use 0 for them.
526 */
527 if (name == TGSI_SEMANTIC_POSITION ||
528 name == TGSI_SEMANTIC_PSIZE ||
529 name == TGSI_SEMANTIC_EDGEFLAG ||
530 name == TGSI_SEMANTIC_FACE ||
531 name == TGSI_SEMANTIC_SAMPLEMASK)
532 index = 0;
533 else {
534 if (name == TGSI_SEMANTIC_GENERIC) {
535 /* For generic params simply use sid from tgsi */
536 index = io->sid;
537 } else {
538 /* For non-generic params - pack name and sid into 8 bits */
539 index = 0x80 | (name<<3) | (io->sid);
540 }
541
542 /* Make sure that all really used indices have nonzero value, so
543 * we can just compare it to 0 later instead of comparing the name
544 * with different values to detect special cases. */
545 index++;
546 }
547
548 return index;
549 };
550
551 /* turn input into interpolate on EG */
552 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
553 {
554 int r = 0;
555
556 if (ctx->shader->input[index].spi_sid) {
557 ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
558 if (ctx->shader->input[index].interpolate > 0) {
559 evergreen_interp_assign_ij_index(ctx, index);
560 if (!ctx->use_llvm)
561 r = evergreen_interp_alu(ctx, index);
562 } else {
563 if (!ctx->use_llvm)
564 r = evergreen_interp_flat(ctx, index);
565 }
566 }
567 return r;
568 }
569
570 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
571 {
572 struct r600_bytecode_alu alu;
573 int i, r;
574 int gpr_front = ctx->shader->input[front].gpr;
575 int gpr_back = ctx->shader->input[back].gpr;
576
577 for (i = 0; i < 4; i++) {
578 memset(&alu, 0, sizeof(alu));
579 alu.op = ALU_OP3_CNDGT;
580 alu.is_op3 = 1;
581 alu.dst.write = 1;
582 alu.dst.sel = gpr_front;
583 alu.src[0].sel = ctx->face_gpr;
584 alu.src[1].sel = gpr_front;
585 alu.src[2].sel = gpr_back;
586
587 alu.dst.chan = i;
588 alu.src[1].chan = i;
589 alu.src[2].chan = i;
590 alu.last = (i==3);
591
592 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
593 return r;
594 }
595
596 return 0;
597 }
598
599 static int tgsi_declaration(struct r600_shader_ctx *ctx)
600 {
601 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
602 int r, i, j, count = d->Range.Last - d->Range.First + 1;
603
604 switch (d->Declaration.File) {
605 case TGSI_FILE_INPUT:
606 i = ctx->shader->ninput;
607 assert(i < Elements(ctx->shader->input));
608 ctx->shader->ninput += count;
609 ctx->shader->input[i].name = d->Semantic.Name;
610 ctx->shader->input[i].sid = d->Semantic.Index;
611 ctx->shader->input[i].interpolate = d->Interp.Interpolate;
612 ctx->shader->input[i].interpolate_location = d->Interp.Location;
613 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
614 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
615 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
616 switch (ctx->shader->input[i].name) {
617 case TGSI_SEMANTIC_FACE:
618 if (ctx->face_gpr != -1)
619 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
620 else
621 ctx->face_gpr = ctx->shader->input[i].gpr;
622 break;
623 case TGSI_SEMANTIC_COLOR:
624 ctx->colors_used++;
625 break;
626 case TGSI_SEMANTIC_POSITION:
627 ctx->fragcoord_input = i;
628 break;
629 }
630 if (ctx->bc->chip_class >= EVERGREEN) {
631 if ((r = evergreen_interp_input(ctx, i)))
632 return r;
633 }
634 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
635 /* FIXME probably skip inputs if they aren't passed in the ring */
636 ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
637 ctx->next_ring_offset += 16;
638 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
639 ctx->shader->gs_prim_id_input = true;
640 }
641 for (j = 1; j < count; ++j) {
642 ctx->shader->input[i + j] = ctx->shader->input[i];
643 ctx->shader->input[i + j].gpr += j;
644 }
645 break;
646 case TGSI_FILE_OUTPUT:
647 i = ctx->shader->noutput++;
648 assert(i < Elements(ctx->shader->output));
649 ctx->shader->output[i].name = d->Semantic.Name;
650 ctx->shader->output[i].sid = d->Semantic.Index;
651 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
652 ctx->shader->output[i].interpolate = d->Interp.Interpolate;
653 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
654 if (ctx->type == TGSI_PROCESSOR_VERTEX ||
655 ctx->type == TGSI_PROCESSOR_GEOMETRY) {
656 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
657 switch (d->Semantic.Name) {
658 case TGSI_SEMANTIC_CLIPDIST:
659 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
660 break;
661 case TGSI_SEMANTIC_PSIZE:
662 ctx->shader->vs_out_misc_write = 1;
663 ctx->shader->vs_out_point_size = 1;
664 break;
665 case TGSI_SEMANTIC_EDGEFLAG:
666 ctx->shader->vs_out_misc_write = 1;
667 ctx->shader->vs_out_edgeflag = 1;
668 ctx->edgeflag_output = i;
669 break;
670 case TGSI_SEMANTIC_VIEWPORT_INDEX:
671 ctx->shader->vs_out_misc_write = 1;
672 ctx->shader->vs_out_viewport = 1;
673 break;
674 case TGSI_SEMANTIC_LAYER:
675 ctx->shader->vs_out_misc_write = 1;
676 ctx->shader->vs_out_layer = 1;
677 break;
678 case TGSI_SEMANTIC_CLIPVERTEX:
679 ctx->clip_vertex_write = TRUE;
680 ctx->cv_output = i;
681 break;
682 }
683 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
684 ctx->gs_out_ring_offset += 16;
685 }
686 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
687 switch (d->Semantic.Name) {
688 case TGSI_SEMANTIC_COLOR:
689 ctx->shader->nr_ps_max_color_exports++;
690 break;
691 }
692 }
693 break;
694 case TGSI_FILE_TEMPORARY:
695 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
696 if (d->Array.ArrayID) {
697 r600_add_gpr_array(ctx->shader,
698 ctx->file_offset[TGSI_FILE_TEMPORARY] +
699 d->Range.First,
700 d->Range.Last - d->Range.First + 1, 0x0F);
701 }
702 }
703 break;
704
705 case TGSI_FILE_CONSTANT:
706 case TGSI_FILE_SAMPLER:
707 case TGSI_FILE_ADDRESS:
708 break;
709
710 case TGSI_FILE_SYSTEM_VALUE:
711 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
712 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
713 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
714 break; /* Already handled from allocate_system_value_inputs */
715 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
716 if (!ctx->native_integers) {
717 struct r600_bytecode_alu alu;
718 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
719
720 alu.op = ALU_OP1_INT_TO_FLT;
721 alu.src[0].sel = 0;
722 alu.src[0].chan = 3;
723
724 alu.dst.sel = 0;
725 alu.dst.chan = 3;
726 alu.dst.write = 1;
727 alu.last = 1;
728
729 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
730 return r;
731 }
732 break;
733 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
734 break;
735 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
736 break;
737 default:
738 R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
739 return -EINVAL;
740 }
741 return 0;
742 }
743
744 static int r600_get_temp(struct r600_shader_ctx *ctx)
745 {
746 return ctx->temp_reg + ctx->max_driver_temp_used++;
747 }
748
749 static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
750 {
751 struct tgsi_parse_context parse;
752 struct {
753 boolean enabled;
754 int *reg;
755 unsigned name, alternate_name;
756 } inputs[2] = {
757 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
758
759 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
760 };
761 int i, k, num_regs = 0;
762
763 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
764 return 0;
765 }
766
767 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
768 while (!tgsi_parse_end_of_tokens(&parse)) {
769 tgsi_parse_token(&parse);
770
771 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
772 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
773 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
774 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
775 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
776 {
777 int interpolate, location, k;
778
779 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
780 location = TGSI_INTERPOLATE_LOC_CENTER;
781 inputs[1].enabled = true; /* needs SAMPLEID */
782 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
783 location = TGSI_INTERPOLATE_LOC_CENTER;
784 /* Needs sample positions, currently those are always available */
785 } else {
786 location = TGSI_INTERPOLATE_LOC_CENTROID;
787 }
788
789 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
790 k = eg_get_interpolator_index(interpolate, location);
791 ctx->eg_interpolators[k].enabled = true;
792 }
793 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
794 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
795 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
796 for (k = 0; k < Elements(inputs); k++) {
797 if (d->Semantic.Name == inputs[k].name ||
798 d->Semantic.Name == inputs[k].alternate_name) {
799 inputs[k].enabled = true;
800 }
801 }
802 }
803 }
804 }
805
806 tgsi_parse_free(&parse);
807
808 for (i = 0; i < Elements(inputs); i++) {
809 boolean enabled = inputs[i].enabled;
810 int *reg = inputs[i].reg;
811 unsigned name = inputs[i].name;
812
813 if (enabled) {
814 int gpr = gpr_offset + num_regs++;
815
816 // add to inputs, allocate a gpr
817 k = ctx->shader->ninput ++;
818 ctx->shader->input[k].name = name;
819 ctx->shader->input[k].sid = 0;
820 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
821 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
822 *reg = ctx->shader->input[k].gpr = gpr;
823 }
824 }
825
826 return gpr_offset + num_regs;
827 }
828
829 /*
830 * for evergreen we need to scan the shader to find the number of GPRs we need to
831 * reserve for interpolation and system values
832 *
833 * we need to know if we are going to emit
834 * any sample or centroid inputs
835 * if perspective and linear are required
836 */
837 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
838 {
839 int i;
840 int num_baryc;
841 struct tgsi_parse_context parse;
842
843 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
844
845 for (i = 0; i < ctx->info.num_inputs; i++) {
846 int k;
847 /* skip position/face/mask/sampleid */
848 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
849 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
850 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
851 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
852 continue;
853
854 k = eg_get_interpolator_index(
855 ctx->info.input_interpolate[i],
856 ctx->info.input_interpolate_loc[i]);
857 if (k >= 0)
858 ctx->eg_interpolators[k].enabled = TRUE;
859 }
860
861 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
862 return 0;
863 }
864
865 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
866 while (!tgsi_parse_end_of_tokens(&parse)) {
867 tgsi_parse_token(&parse);
868
869 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
870 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
871 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
872 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
873 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
874 {
875 int interpolate, location, k;
876
877 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
878 location = TGSI_INTERPOLATE_LOC_CENTER;
879 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
880 location = TGSI_INTERPOLATE_LOC_CENTER;
881 } else {
882 location = TGSI_INTERPOLATE_LOC_CENTROID;
883 }
884
885 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
886 k = eg_get_interpolator_index(interpolate, location);
887 ctx->eg_interpolators[k].enabled = true;
888 }
889 }
890 }
891
892 tgsi_parse_free(&parse);
893
894 /* assign gpr to each interpolator according to priority */
895 num_baryc = 0;
896 for (i = 0; i < Elements(ctx->eg_interpolators); i++) {
897 if (ctx->eg_interpolators[i].enabled) {
898 ctx->eg_interpolators[i].ij_index = num_baryc;
899 num_baryc ++;
900 }
901 }
902
903 /* XXX PULL MODEL and LINE STIPPLE */
904
905 num_baryc = (num_baryc + 1) >> 1;
906 return allocate_system_value_inputs(ctx, num_baryc);
907 }
908
909 /* sample_id_sel == NULL means fetch for current sample */
910 static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
911 {
912 struct r600_bytecode_vtx vtx;
913 int r, t1;
914
915 assert(ctx->fixed_pt_position_gpr != -1);
916
917 t1 = r600_get_temp(ctx);
918
919 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
920 vtx.op = FETCH_OP_VFETCH;
921 vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
922 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
923 if (sample_id == NULL) {
924 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
925 vtx.src_sel_x = 3;
926 }
927 else {
928 struct r600_bytecode_alu alu;
929
930 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
931 alu.op = ALU_OP1_MOV;
932 r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
933 alu.dst.sel = t1;
934 alu.dst.write = 1;
935 alu.last = 1;
936 r = r600_bytecode_add_alu(ctx->bc, &alu);
937 if (r)
938 return r;
939
940 vtx.src_gpr = t1;
941 vtx.src_sel_x = 0;
942 }
943 vtx.mega_fetch_count = 16;
944 vtx.dst_gpr = t1;
945 vtx.dst_sel_x = 0;
946 vtx.dst_sel_y = 1;
947 vtx.dst_sel_z = 2;
948 vtx.dst_sel_w = 3;
949 vtx.data_format = FMT_32_32_32_32_FLOAT;
950 vtx.num_format_all = 2;
951 vtx.format_comp_all = 1;
952 vtx.use_const_fields = 0;
953 vtx.offset = 1; // first element is size of buffer
954 vtx.endian = r600_endian_swap(32);
955 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
956
957 r = r600_bytecode_add_vtx(ctx->bc, &vtx);
958 if (r)
959 return r;
960
961 return t1;
962 }
963
964 static void tgsi_src(struct r600_shader_ctx *ctx,
965 const struct tgsi_full_src_register *tgsi_src,
966 struct r600_shader_src *r600_src)
967 {
968 memset(r600_src, 0, sizeof(*r600_src));
969 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
970 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
971 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
972 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
973 r600_src->neg = tgsi_src->Register.Negate;
974 r600_src->abs = tgsi_src->Register.Absolute;
975
976 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
977 int index;
978 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
979 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
980 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
981
982 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
983 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
984 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
985 return;
986 }
987 index = tgsi_src->Register.Index;
988 r600_src->sel = V_SQ_ALU_SRC_LITERAL;
989 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
990 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
991 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
992 r600_src->swizzle[0] = 2; // Z value
993 r600_src->swizzle[1] = 2;
994 r600_src->swizzle[2] = 2;
995 r600_src->swizzle[3] = 2;
996 r600_src->sel = ctx->face_gpr;
997 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
998 r600_src->swizzle[0] = 3; // W value
999 r600_src->swizzle[1] = 3;
1000 r600_src->swizzle[2] = 3;
1001 r600_src->swizzle[3] = 3;
1002 r600_src->sel = ctx->fixed_pt_position_gpr;
1003 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
1004 r600_src->swizzle[0] = 0;
1005 r600_src->swizzle[1] = 1;
1006 r600_src->swizzle[2] = 4;
1007 r600_src->swizzle[3] = 4;
1008 r600_src->sel = load_sample_position(ctx, NULL, -1);
1009 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
1010 r600_src->swizzle[0] = 3;
1011 r600_src->swizzle[1] = 3;
1012 r600_src->swizzle[2] = 3;
1013 r600_src->swizzle[3] = 3;
1014 r600_src->sel = 0;
1015 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
1016 r600_src->swizzle[0] = 0;
1017 r600_src->swizzle[1] = 0;
1018 r600_src->swizzle[2] = 0;
1019 r600_src->swizzle[3] = 0;
1020 r600_src->sel = 0;
1021 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
1022 r600_src->swizzle[0] = 3;
1023 r600_src->swizzle[1] = 3;
1024 r600_src->swizzle[2] = 3;
1025 r600_src->swizzle[3] = 3;
1026 r600_src->sel = 1;
1027 }
1028 } else {
1029 if (tgsi_src->Register.Indirect)
1030 r600_src->rel = V_SQ_REL_RELATIVE;
1031 r600_src->sel = tgsi_src->Register.Index;
1032 r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
1033 }
1034 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) {
1035 if (tgsi_src->Register.Dimension) {
1036 r600_src->kc_bank = tgsi_src->Dimension.Index;
1037 if (tgsi_src->Dimension.Indirect) {
1038 r600_src->kc_rel = 1;
1039 }
1040 }
1041 }
1042 }
1043
1044 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
1045 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
1046 unsigned int dst_reg)
1047 {
1048 struct r600_bytecode_vtx vtx;
1049 unsigned int ar_reg;
1050 int r;
1051
1052 if (offset) {
1053 struct r600_bytecode_alu alu;
1054
1055 memset(&alu, 0, sizeof(alu));
1056
1057 alu.op = ALU_OP2_ADD_INT;
1058 alu.src[0].sel = ctx->bc->ar_reg;
1059 alu.src[0].chan = ar_chan;
1060
1061 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1062 alu.src[1].value = offset;
1063
1064 alu.dst.sel = dst_reg;
1065 alu.dst.chan = ar_chan;
1066 alu.dst.write = 1;
1067 alu.last = 1;
1068
1069 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1070 return r;
1071
1072 ar_reg = dst_reg;
1073 } else {
1074 ar_reg = ctx->bc->ar_reg;
1075 }
1076
1077 memset(&vtx, 0, sizeof(vtx));
1078 vtx.buffer_id = cb_idx;
1079 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1080 vtx.src_gpr = ar_reg;
1081 vtx.src_sel_x = ar_chan;
1082 vtx.mega_fetch_count = 16;
1083 vtx.dst_gpr = dst_reg;
1084 vtx.dst_sel_x = 0; /* SEL_X */
1085 vtx.dst_sel_y = 1; /* SEL_Y */
1086 vtx.dst_sel_z = 2; /* SEL_Z */
1087 vtx.dst_sel_w = 3; /* SEL_W */
1088 vtx.data_format = FMT_32_32_32_32_FLOAT;
1089 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
1090 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
1091 vtx.endian = r600_endian_swap(32);
1092 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE;
1093
1094 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1095 return r;
1096
1097 return 0;
1098 }
1099
1100 static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
1101 {
1102 struct r600_bytecode_vtx vtx;
1103 int r;
1104 unsigned index = src->Register.Index;
1105 unsigned vtx_id = src->Dimension.Index;
1106 int offset_reg = vtx_id / 3;
1107 int offset_chan = vtx_id % 3;
1108
1109 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
1110 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
1111
1112 if (offset_reg == 0 && offset_chan == 2)
1113 offset_chan = 3;
1114
1115 if (src->Dimension.Indirect) {
1116 int treg[3];
1117 int t2;
1118 struct r600_bytecode_alu alu;
1119 int r, i;
1120
1121 /* you have got to be shitting me -
1122 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
1123 at least this is what fglrx seems to do. */
1124 for (i = 0; i < 3; i++) {
1125 treg[i] = r600_get_temp(ctx);
1126 }
1127 t2 = r600_get_temp(ctx);
1128 for (i = 0; i < 3; i++) {
1129 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1130 alu.op = ALU_OP1_MOV;
1131 alu.src[0].sel = 0;
1132 alu.src[0].chan = i == 2 ? 3 : i;
1133 alu.dst.sel = treg[i];
1134 alu.dst.chan = 0;
1135 alu.dst.write = 1;
1136 alu.last = 1;
1137 r = r600_bytecode_add_alu(ctx->bc, &alu);
1138 if (r)
1139 return r;
1140 }
1141 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1142 alu.op = ALU_OP1_MOV;
1143 alu.src[0].sel = treg[0];
1144 alu.src[0].rel = 1;
1145 alu.dst.sel = t2;
1146 alu.dst.write = 1;
1147 alu.last = 1;
1148 r = r600_bytecode_add_alu(ctx->bc, &alu);
1149 if (r)
1150 return r;
1151 offset_reg = t2;
1152 }
1153
1154
1155 memset(&vtx, 0, sizeof(vtx));
1156 vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
1157 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
1158 vtx.src_gpr = offset_reg;
1159 vtx.src_sel_x = offset_chan;
1160 vtx.offset = index * 16; /*bytes*/
1161 vtx.mega_fetch_count = 16;
1162 vtx.dst_gpr = dst_reg;
1163 vtx.dst_sel_x = 0; /* SEL_X */
1164 vtx.dst_sel_y = 1; /* SEL_Y */
1165 vtx.dst_sel_z = 2; /* SEL_Z */
1166 vtx.dst_sel_w = 3; /* SEL_W */
1167 if (ctx->bc->chip_class >= EVERGREEN) {
1168 vtx.use_const_fields = 1;
1169 } else {
1170 vtx.data_format = FMT_32_32_32_32_FLOAT;
1171 }
1172
1173 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1174 return r;
1175
1176 return 0;
1177 }
1178
1179 static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
1180 {
1181 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1182 int i;
1183
1184 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1185 struct tgsi_full_src_register *src = &inst->Src[i];
1186
1187 if (src->Register.File == TGSI_FILE_INPUT) {
1188 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) {
1189 /* primitive id is in R0.z */
1190 ctx->src[i].sel = 0;
1191 ctx->src[i].swizzle[0] = 2;
1192 }
1193 }
1194 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
1195 int treg = r600_get_temp(ctx);
1196
1197 fetch_gs_input(ctx, src, treg);
1198 ctx->src[i].sel = treg;
1199 }
1200 }
1201 return 0;
1202 }
1203
1204 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1205 {
1206 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1207 struct r600_bytecode_alu alu;
1208 int i, j, k, nconst, r;
1209
1210 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1211 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1212 nconst++;
1213 }
1214 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1215 }
1216 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1217 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1218 continue;
1219 }
1220
1221 if (ctx->src[i].kc_rel)
1222 ctx->shader->uses_index_registers = true;
1223
1224 if (ctx->src[i].rel) {
1225 int chan = inst->Src[i].Indirect.Swizzle;
1226 int treg = r600_get_temp(ctx);
1227 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg)))
1228 return r;
1229
1230 ctx->src[i].kc_bank = 0;
1231 ctx->src[i].kc_rel = 0;
1232 ctx->src[i].sel = treg;
1233 ctx->src[i].rel = 0;
1234 j--;
1235 } else if (j > 0) {
1236 int treg = r600_get_temp(ctx);
1237 for (k = 0; k < 4; k++) {
1238 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1239 alu.op = ALU_OP1_MOV;
1240 alu.src[0].sel = ctx->src[i].sel;
1241 alu.src[0].chan = k;
1242 alu.src[0].rel = ctx->src[i].rel;
1243 alu.src[0].kc_bank = ctx->src[i].kc_bank;
1244 alu.src[0].kc_rel = ctx->src[i].kc_rel;
1245 alu.dst.sel = treg;
1246 alu.dst.chan = k;
1247 alu.dst.write = 1;
1248 if (k == 3)
1249 alu.last = 1;
1250 r = r600_bytecode_add_alu(ctx->bc, &alu);
1251 if (r)
1252 return r;
1253 }
1254 ctx->src[i].sel = treg;
1255 ctx->src[i].rel =0;
1256 j--;
1257 }
1258 }
1259 return 0;
1260 }
1261
1262 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
1263 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1264 {
1265 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1266 struct r600_bytecode_alu alu;
1267 int i, j, k, nliteral, r;
1268
1269 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1270 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1271 nliteral++;
1272 }
1273 }
1274 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1275 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1276 int treg = r600_get_temp(ctx);
1277 for (k = 0; k < 4; k++) {
1278 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1279 alu.op = ALU_OP1_MOV;
1280 alu.src[0].sel = ctx->src[i].sel;
1281 alu.src[0].chan = k;
1282 alu.src[0].value = ctx->src[i].value[k];
1283 alu.dst.sel = treg;
1284 alu.dst.chan = k;
1285 alu.dst.write = 1;
1286 if (k == 3)
1287 alu.last = 1;
1288 r = r600_bytecode_add_alu(ctx->bc, &alu);
1289 if (r)
1290 return r;
1291 }
1292 ctx->src[i].sel = treg;
1293 j--;
1294 }
1295 }
1296 return 0;
1297 }
1298
1299 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1300 {
1301 int i, r, count = ctx->shader->ninput;
1302
1303 for (i = 0; i < count; i++) {
1304 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1305 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input);
1306 if (r)
1307 return r;
1308 }
1309 }
1310 return 0;
1311 }
1312
1313 static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
1314 {
1315 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
1316 int i, j, r;
1317
1318 /* Sanity checking. */
1319 if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
1320 R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
1321 r = -EINVAL;
1322 goto out_err;
1323 }
1324 for (i = 0; i < so->num_outputs; i++) {
1325 if (so->output[i].output_buffer >= 4) {
1326 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
1327 so->output[i].output_buffer);
1328 r = -EINVAL;
1329 goto out_err;
1330 }
1331 }
1332
1333 /* Initialize locations where the outputs are stored. */
1334 for (i = 0; i < so->num_outputs; i++) {
1335 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
1336
1337 /* Lower outputs with dst_offset < start_component.
1338 *
1339 * We can only output 4D vectors with a write mask, e.g. we can
1340 * only output the W component at offset 3, etc. If we want
1341 * to store Y, Z, or W at buffer offset 0, we need to use MOV
1342 * to move it to X and output X. */
1343 if (so->output[i].dst_offset < so->output[i].start_component) {
1344 unsigned tmp = r600_get_temp(ctx);
1345
1346 for (j = 0; j < so->output[i].num_components; j++) {
1347 struct r600_bytecode_alu alu;
1348 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1349 alu.op = ALU_OP1_MOV;
1350 alu.src[0].sel = so_gpr[i];
1351 alu.src[0].chan = so->output[i].start_component + j;
1352
1353 alu.dst.sel = tmp;
1354 alu.dst.chan = j;
1355 alu.dst.write = 1;
1356 if (j == so->output[i].num_components - 1)
1357 alu.last = 1;
1358 r = r600_bytecode_add_alu(ctx->bc, &alu);
1359 if (r)
1360 return r;
1361 }
1362 so->output[i].start_component = 0;
1363 so_gpr[i] = tmp;
1364 }
1365 }
1366
1367 /* Write outputs to buffers. */
1368 for (i = 0; i < so->num_outputs; i++) {
1369 struct r600_bytecode_output output;
1370
1371 memset(&output, 0, sizeof(struct r600_bytecode_output));
1372 output.gpr = so_gpr[i];
1373 output.elem_size = so->output[i].num_components;
1374 output.array_base = so->output[i].dst_offset - so->output[i].start_component;
1375 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1376 output.burst_count = 1;
1377 /* array_size is an upper limit for the burst_count
1378 * with MEM_STREAM instructions */
1379 output.array_size = 0xFFF;
1380 output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
1381 if (ctx->bc->chip_class >= EVERGREEN) {
1382 switch (so->output[i].output_buffer) {
1383 case 0:
1384 output.op = CF_OP_MEM_STREAM0_BUF0;
1385 break;
1386 case 1:
1387 output.op = CF_OP_MEM_STREAM0_BUF1;
1388 break;
1389 case 2:
1390 output.op = CF_OP_MEM_STREAM0_BUF2;
1391 break;
1392 case 3:
1393 output.op = CF_OP_MEM_STREAM0_BUF3;
1394 break;
1395 }
1396 } else {
1397 switch (so->output[i].output_buffer) {
1398 case 0:
1399 output.op = CF_OP_MEM_STREAM0;
1400 break;
1401 case 1:
1402 output.op = CF_OP_MEM_STREAM1;
1403 break;
1404 case 2:
1405 output.op = CF_OP_MEM_STREAM2;
1406 break;
1407 case 3:
1408 output.op = CF_OP_MEM_STREAM3;
1409 break;
1410 }
1411 }
1412 r = r600_bytecode_add_output(ctx->bc, &output);
1413 if (r)
1414 goto out_err;
1415 }
1416 return 0;
1417 out_err:
1418 return r;
1419 }
1420
1421 static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
1422 {
1423 struct r600_bytecode_alu alu;
1424 unsigned reg;
1425
1426 if (!ctx->shader->vs_out_edgeflag)
1427 return;
1428
1429 reg = ctx->shader->output[ctx->edgeflag_output].gpr;
1430
1431 /* clamp(x, 0, 1) */
1432 memset(&alu, 0, sizeof(alu));
1433 alu.op = ALU_OP1_MOV;
1434 alu.src[0].sel = reg;
1435 alu.dst.sel = reg;
1436 alu.dst.write = 1;
1437 alu.dst.clamp = 1;
1438 alu.last = 1;
1439 r600_bytecode_add_alu(ctx->bc, &alu);
1440
1441 memset(&alu, 0, sizeof(alu));
1442 alu.op = ALU_OP1_FLT_TO_INT;
1443 alu.src[0].sel = reg;
1444 alu.dst.sel = reg;
1445 alu.dst.write = 1;
1446 alu.last = 1;
1447 r600_bytecode_add_alu(ctx->bc, &alu);
1448 }
1449
1450 static int generate_gs_copy_shader(struct r600_context *rctx,
1451 struct r600_pipe_shader *gs,
1452 struct pipe_stream_output_info *so)
1453 {
1454 struct r600_shader_ctx ctx = {};
1455 struct r600_shader *gs_shader = &gs->shader;
1456 struct r600_pipe_shader *cshader;
1457 int ocnt = gs_shader->noutput;
1458 struct r600_bytecode_alu alu;
1459 struct r600_bytecode_vtx vtx;
1460 struct r600_bytecode_output output;
1461 struct r600_bytecode_cf *cf_jump, *cf_pop,
1462 *last_exp_pos = NULL, *last_exp_param = NULL;
1463 int i, next_clip_pos = 61, next_param = 0;
1464
1465 cshader = calloc(1, sizeof(struct r600_pipe_shader));
1466 if (!cshader)
1467 return 0;
1468
1469 memcpy(cshader->shader.output, gs_shader->output, ocnt *
1470 sizeof(struct r600_shader_io));
1471
1472 cshader->shader.noutput = ocnt;
1473
1474 ctx.shader = &cshader->shader;
1475 ctx.bc = &ctx.shader->bc;
1476 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX;
1477
1478 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family,
1479 rctx->screen->has_compressed_msaa_texturing);
1480
1481 ctx.bc->isa = rctx->isa;
1482
1483 /* R0.x = R0.x & 0x3fffffff */
1484 memset(&alu, 0, sizeof(alu));
1485 alu.op = ALU_OP2_AND_INT;
1486 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1487 alu.src[1].value = 0x3fffffff;
1488 alu.dst.write = 1;
1489 r600_bytecode_add_alu(ctx.bc, &alu);
1490
1491 /* R0.y = R0.x >> 30 */
1492 memset(&alu, 0, sizeof(alu));
1493 alu.op = ALU_OP2_LSHR_INT;
1494 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1495 alu.src[1].value = 0x1e;
1496 alu.dst.chan = 1;
1497 alu.dst.write = 1;
1498 alu.last = 1;
1499 r600_bytecode_add_alu(ctx.bc, &alu);
1500
1501 /* PRED_SETE_INT __, R0.y, 0 */
1502 memset(&alu, 0, sizeof(alu));
1503 alu.op = ALU_OP2_PRED_SETE_INT;
1504 alu.src[0].chan = 1;
1505 alu.src[1].sel = V_SQ_ALU_SRC_0;
1506 alu.execute_mask = 1;
1507 alu.update_pred = 1;
1508 alu.last = 1;
1509 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
1510
1511 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
1512 cf_jump = ctx.bc->cf_last;
1513
1514 /* fetch vertex data from GSVS ring */
1515 for (i = 0; i < ocnt; ++i) {
1516 struct r600_shader_io *out = &ctx.shader->output[i];
1517 out->gpr = i + 1;
1518 out->ring_offset = i * 16;
1519
1520 memset(&vtx, 0, sizeof(vtx));
1521 vtx.op = FETCH_OP_VFETCH;
1522 vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
1523 vtx.fetch_type = 2;
1524 vtx.offset = out->ring_offset;
1525 vtx.dst_gpr = out->gpr;
1526 vtx.dst_sel_x = 0;
1527 vtx.dst_sel_y = 1;
1528 vtx.dst_sel_z = 2;
1529 vtx.dst_sel_w = 3;
1530 if (rctx->b.chip_class >= EVERGREEN) {
1531 vtx.use_const_fields = 1;
1532 } else {
1533 vtx.data_format = FMT_32_32_32_32_FLOAT;
1534 }
1535
1536 r600_bytecode_add_vtx(ctx.bc, &vtx);
1537 }
1538
1539 /* XXX handle clipvertex, streamout? */
1540 emit_streamout(&ctx, so);
1541
1542 /* export vertex data */
1543 /* XXX factor out common code with r600_shader_from_tgsi ? */
1544 for (i = 0; i < ocnt; ++i) {
1545 struct r600_shader_io *out = &ctx.shader->output[i];
1546
1547 if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
1548 continue;
1549
1550 memset(&output, 0, sizeof(output));
1551 output.gpr = out->gpr;
1552 output.elem_size = 3;
1553 output.swizzle_x = 0;
1554 output.swizzle_y = 1;
1555 output.swizzle_z = 2;
1556 output.swizzle_w = 3;
1557 output.burst_count = 1;
1558 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1559 output.op = CF_OP_EXPORT;
1560 switch (out->name) {
1561 case TGSI_SEMANTIC_POSITION:
1562 output.array_base = 60;
1563 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1564 break;
1565
1566 case TGSI_SEMANTIC_PSIZE:
1567 output.array_base = 61;
1568 if (next_clip_pos == 61)
1569 next_clip_pos = 62;
1570 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1571 output.swizzle_y = 7;
1572 output.swizzle_z = 7;
1573 output.swizzle_w = 7;
1574 ctx.shader->vs_out_misc_write = 1;
1575 ctx.shader->vs_out_point_size = 1;
1576 break;
1577 case TGSI_SEMANTIC_LAYER:
1578 if (out->spi_sid) {
1579 /* duplicate it as PARAM to pass to the pixel shader */
1580 output.array_base = next_param++;
1581 r600_bytecode_add_output(ctx.bc, &output);
1582 last_exp_param = ctx.bc->cf_last;
1583 }
1584 output.array_base = 61;
1585 if (next_clip_pos == 61)
1586 next_clip_pos = 62;
1587 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1588 output.swizzle_x = 7;
1589 output.swizzle_y = 7;
1590 output.swizzle_z = 0;
1591 output.swizzle_w = 7;
1592 ctx.shader->vs_out_misc_write = 1;
1593 ctx.shader->vs_out_layer = 1;
1594 break;
1595 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1596 if (out->spi_sid) {
1597 /* duplicate it as PARAM to pass to the pixel shader */
1598 output.array_base = next_param++;
1599 r600_bytecode_add_output(ctx.bc, &output);
1600 last_exp_param = ctx.bc->cf_last;
1601 }
1602 output.array_base = 61;
1603 if (next_clip_pos == 61)
1604 next_clip_pos = 62;
1605 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1606 ctx.shader->vs_out_misc_write = 1;
1607 ctx.shader->vs_out_viewport = 1;
1608 output.swizzle_x = 7;
1609 output.swizzle_y = 7;
1610 output.swizzle_z = 7;
1611 output.swizzle_w = 0;
1612 break;
1613 case TGSI_SEMANTIC_CLIPDIST:
1614 /* spi_sid is 0 for clipdistance outputs that were generated
1615 * for clipvertex - we don't need to pass them to PS */
1616 ctx.shader->clip_dist_write = gs->shader.clip_dist_write;
1617 if (out->spi_sid) {
1618 /* duplicate it as PARAM to pass to the pixel shader */
1619 output.array_base = next_param++;
1620 r600_bytecode_add_output(ctx.bc, &output);
1621 last_exp_param = ctx.bc->cf_last;
1622 }
1623 output.array_base = next_clip_pos++;
1624 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1625 break;
1626 case TGSI_SEMANTIC_FOG:
1627 output.swizzle_y = 4; /* 0 */
1628 output.swizzle_z = 4; /* 0 */
1629 output.swizzle_w = 5; /* 1 */
1630 break;
1631 default:
1632 output.array_base = next_param++;
1633 break;
1634 }
1635 r600_bytecode_add_output(ctx.bc, &output);
1636 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
1637 last_exp_param = ctx.bc->cf_last;
1638 else
1639 last_exp_pos = ctx.bc->cf_last;
1640 }
1641
1642 if (!last_exp_pos) {
1643 memset(&output, 0, sizeof(output));
1644 output.gpr = 0;
1645 output.elem_size = 3;
1646 output.swizzle_x = 7;
1647 output.swizzle_y = 7;
1648 output.swizzle_z = 7;
1649 output.swizzle_w = 7;
1650 output.burst_count = 1;
1651 output.type = 2;
1652 output.op = CF_OP_EXPORT;
1653 output.array_base = 60;
1654 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1655 r600_bytecode_add_output(ctx.bc, &output);
1656 last_exp_pos = ctx.bc->cf_last;
1657 }
1658
1659 if (!last_exp_param) {
1660 memset(&output, 0, sizeof(output));
1661 output.gpr = 0;
1662 output.elem_size = 3;
1663 output.swizzle_x = 7;
1664 output.swizzle_y = 7;
1665 output.swizzle_z = 7;
1666 output.swizzle_w = 7;
1667 output.burst_count = 1;
1668 output.type = 2;
1669 output.op = CF_OP_EXPORT;
1670 output.array_base = next_param++;
1671 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1672 r600_bytecode_add_output(ctx.bc, &output);
1673 last_exp_param = ctx.bc->cf_last;
1674 }
1675
1676 last_exp_pos->op = CF_OP_EXPORT_DONE;
1677 last_exp_param->op = CF_OP_EXPORT_DONE;
1678
1679 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
1680 cf_pop = ctx.bc->cf_last;
1681
1682 cf_jump->cf_addr = cf_pop->id + 2;
1683 cf_jump->pop_count = 1;
1684 cf_pop->cf_addr = cf_pop->id + 2;
1685 cf_pop->pop_count = 1;
1686
1687 if (ctx.bc->chip_class == CAYMAN)
1688 cm_bytecode_add_cf_end(ctx.bc);
1689 else {
1690 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
1691 ctx.bc->cf_last->end_of_program = 1;
1692 }
1693
1694 gs->gs_copy_shader = cshader;
1695
1696 ctx.bc->nstack = 1;
1697 cshader->shader.ring_item_size = ocnt * 16;
1698
1699 return r600_bytecode_build(ctx.bc);
1700 }
1701
1702 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
1703 {
1704 struct r600_bytecode_output output;
1705 int i, k, ring_offset;
1706
1707 for (i = 0; i < ctx->shader->noutput; i++) {
1708 if (ctx->gs_for_vs) {
1709 /* for ES we need to lookup corresponding ring offset expected by GS
1710 * (map this output to GS input by name and sid) */
1711 /* FIXME precompute offsets */
1712 ring_offset = -1;
1713 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
1714 struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
1715 struct r600_shader_io *out = &ctx->shader->output[i];
1716 if (in->name == out->name && in->sid == out->sid)
1717 ring_offset = in->ring_offset;
1718 }
1719
1720 if (ring_offset == -1)
1721 continue;
1722 } else
1723 ring_offset = i * 16;
1724
1725 /* next_ring_offset after parsing input decls contains total size of
1726 * single vertex data, gs_next_vertex - current vertex index */
1727 if (!ind)
1728 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
1729
1730 /* get a temp and add the ring offset to the next vertex base in the shader */
1731 memset(&output, 0, sizeof(struct r600_bytecode_output));
1732 output.gpr = ctx->shader->output[i].gpr;
1733 output.elem_size = 3;
1734 output.comp_mask = 0xF;
1735 output.burst_count = 1;
1736
1737 if (ind)
1738 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
1739 else
1740 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1741 output.op = CF_OP_MEM_RING;
1742
1743
1744 if (ind) {
1745 output.array_base = ring_offset >> 2; /* in dwords */
1746 output.array_size = 0xfff;
1747 output.index_gpr = ctx->gs_export_gpr_treg;
1748 } else
1749 output.array_base = ring_offset >> 2; /* in dwords */
1750 r600_bytecode_add_output(ctx->bc, &output);
1751 }
1752
1753 if (ind) {
1754 struct r600_bytecode_alu alu;
1755 int r;
1756
1757 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1758 alu.op = ALU_OP2_ADD_INT;
1759 alu.src[0].sel = ctx->gs_export_gpr_treg;
1760 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1761 alu.src[1].value = ctx->gs_out_ring_offset >> 4;
1762 alu.dst.sel = ctx->gs_export_gpr_treg;
1763 alu.dst.write = 1;
1764 alu.last = 1;
1765 r = r600_bytecode_add_alu(ctx->bc, &alu);
1766 if (r)
1767 return r;
1768 }
1769 ++ctx->gs_next_vertex;
1770 return 0;
1771 }
1772
1773 static int r600_shader_from_tgsi(struct r600_context *rctx,
1774 struct r600_pipe_shader *pipeshader,
1775 struct r600_shader_key key)
1776 {
1777 struct r600_screen *rscreen = rctx->screen;
1778 struct r600_shader *shader = &pipeshader->shader;
1779 struct tgsi_token *tokens = pipeshader->selector->tokens;
1780 struct pipe_stream_output_info so = pipeshader->selector->so;
1781 struct tgsi_full_immediate *immediate;
1782 struct tgsi_full_property *property;
1783 struct r600_shader_ctx ctx;
1784 struct r600_bytecode_output output[32];
1785 unsigned output_done, noutput;
1786 unsigned opcode;
1787 int i, j, k, r = 0;
1788 int next_param_base = 0, next_clip_base;
1789 int max_color_exports = MAX2(key.nr_cbufs, 1);
1790 /* Declarations used by llvm code */
1791 bool use_llvm = false;
1792 bool indirect_gprs;
1793 bool ring_outputs = false;
1794 bool pos_emitted = false;
1795
1796 #ifdef R600_USE_LLVM
1797 use_llvm = rscreen->b.debug_flags & DBG_LLVM;
1798 #endif
1799 ctx.bc = &shader->bc;
1800 ctx.shader = shader;
1801 ctx.native_integers = true;
1802
1803 shader->vs_as_es = key.vs_as_es;
1804
1805 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
1806 rscreen->has_compressed_msaa_texturing);
1807 ctx.tokens = tokens;
1808 tgsi_scan_shader(tokens, &ctx.info);
1809 shader->indirect_files = ctx.info.indirect_files;
1810 indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
1811 tgsi_parse_init(&ctx.parse, tokens);
1812 ctx.type = ctx.parse.FullHeader.Processor.Processor;
1813 shader->processor_type = ctx.type;
1814 ctx.bc->type = shader->processor_type;
1815
1816 ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
1817
1818 if (key.vs_as_es) {
1819 ctx.gs_for_vs = &rctx->gs_shader->current->shader;
1820 } else {
1821 ctx.gs_for_vs = NULL;
1822 }
1823
1824 ctx.next_ring_offset = 0;
1825 ctx.gs_out_ring_offset = 0;
1826 ctx.gs_next_vertex = 0;
1827
1828 shader->uses_index_registers = false;
1829 ctx.face_gpr = -1;
1830 ctx.fixed_pt_position_gpr = -1;
1831 ctx.fragcoord_input = -1;
1832 ctx.colors_used = 0;
1833 ctx.clip_vertex_write = 0;
1834
1835 shader->nr_ps_color_exports = 0;
1836 shader->nr_ps_max_color_exports = 0;
1837
1838 shader->two_side = key.color_two_side;
1839
1840 /* register allocations */
1841 /* Values [0,127] correspond to GPR[0..127].
1842 * Values [128,159] correspond to constant buffer bank 0
1843 * Values [160,191] correspond to constant buffer bank 1
1844 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1845 * Values [256,287] correspond to constant buffer bank 2 (EG)
1846 * Values [288,319] correspond to constant buffer bank 3 (EG)
1847 * Other special values are shown in the list below.
1848 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1849 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1850 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1851 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1852 * 248 SQ_ALU_SRC_0: special constant 0.0.
1853 * 249 SQ_ALU_SRC_1: special constant 1.0 float.
1854 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1855 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1856 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1857 * 253 SQ_ALU_SRC_LITERAL: literal constant.
1858 * 254 SQ_ALU_SRC_PV: previous vector result.
1859 * 255 SQ_ALU_SRC_PS: previous scalar result.
1860 */
1861 for (i = 0; i < TGSI_FILE_COUNT; i++) {
1862 ctx.file_offset[i] = 0;
1863 }
1864
1865 #ifdef R600_USE_LLVM
1866 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) {
1867 fprintf(stderr, "Warning: R600 LLVM backend does not support "
1868 "indirect adressing. Falling back to TGSI "
1869 "backend.\n");
1870 use_llvm = 0;
1871 }
1872 #endif
1873 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1874 ctx.file_offset[TGSI_FILE_INPUT] = 1;
1875 if (!use_llvm) {
1876 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
1877 }
1878 }
1879 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
1880 if (ctx.bc->chip_class >= EVERGREEN)
1881 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1882 else
1883 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
1884 }
1885 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
1886 /* FIXME 1 would be enough in some cases (3 or less input vertices) */
1887 ctx.file_offset[TGSI_FILE_INPUT] = 2;
1888 }
1889 ctx.use_llvm = use_llvm;
1890
1891 if (use_llvm) {
1892 ctx.file_offset[TGSI_FILE_OUTPUT] =
1893 ctx.file_offset[TGSI_FILE_INPUT];
1894 } else {
1895 ctx.file_offset[TGSI_FILE_OUTPUT] =
1896 ctx.file_offset[TGSI_FILE_INPUT] +
1897 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1898 }
1899 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1900 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1901
1902 /* Outside the GPR range. This will be translated to one of the
1903 * kcache banks later. */
1904 ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1905
1906 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1907 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1908 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1909 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
1910 ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
1911 ctx.temp_reg = ctx.bc->ar_reg + 2;
1912 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
1913 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
1914 } else {
1915 ctx.temp_reg = ctx.bc->ar_reg + 1;
1916 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
1917 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
1918 }
1919
1920 if (indirect_gprs) {
1921 shader->max_arrays = 0;
1922 shader->num_arrays = 0;
1923
1924 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
1925 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
1926 ctx.file_offset[TGSI_FILE_OUTPUT] -
1927 ctx.file_offset[TGSI_FILE_INPUT],
1928 0x0F);
1929 }
1930 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1931 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
1932 ctx.file_offset[TGSI_FILE_TEMPORARY] -
1933 ctx.file_offset[TGSI_FILE_OUTPUT],
1934 0x0F);
1935 }
1936 }
1937
1938 ctx.nliterals = 0;
1939 ctx.literals = NULL;
1940 shader->fs_write_all = FALSE;
1941 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1942 tgsi_parse_token(&ctx.parse);
1943 switch (ctx.parse.FullToken.Token.Type) {
1944 case TGSI_TOKEN_TYPE_IMMEDIATE:
1945 immediate = &ctx.parse.FullToken.FullImmediate;
1946 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1947 if(ctx.literals == NULL) {
1948 r = -ENOMEM;
1949 goto out_err;
1950 }
1951 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1952 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1953 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1954 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1955 ctx.nliterals++;
1956 break;
1957 case TGSI_TOKEN_TYPE_DECLARATION:
1958 r = tgsi_declaration(&ctx);
1959 if (r)
1960 goto out_err;
1961 break;
1962 case TGSI_TOKEN_TYPE_INSTRUCTION:
1963 break;
1964 case TGSI_TOKEN_TYPE_PROPERTY:
1965 property = &ctx.parse.FullToken.FullProperty;
1966 switch (property->Property.PropertyName) {
1967 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1968 if (property->u[0].Data == 1)
1969 shader->fs_write_all = TRUE;
1970 break;
1971 case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
1972 if (property->u[0].Data == 1)
1973 shader->vs_position_window_space = TRUE;
1974 break;
1975 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1976 /* we don't need this one */
1977 break;
1978 case TGSI_PROPERTY_GS_INPUT_PRIM:
1979 shader->gs_input_prim = property->u[0].Data;
1980 break;
1981 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1982 shader->gs_output_prim = property->u[0].Data;
1983 break;
1984 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1985 shader->gs_max_out_vertices = property->u[0].Data;
1986 break;
1987 case TGSI_PROPERTY_GS_INVOCATIONS:
1988 shader->gs_num_invocations = property->u[0].Data;
1989 break;
1990 }
1991 break;
1992 default:
1993 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1994 r = -EINVAL;
1995 goto out_err;
1996 }
1997 }
1998
1999 shader->ring_item_size = ctx.next_ring_offset;
2000
2001 /* Process two side if needed */
2002 if (shader->two_side && ctx.colors_used) {
2003 int i, count = ctx.shader->ninput;
2004 unsigned next_lds_loc = ctx.shader->nlds;
2005
2006 /* additional inputs will be allocated right after the existing inputs,
2007 * we won't need them after the color selection, so we don't need to
2008 * reserve these gprs for the rest of the shader code and to adjust
2009 * output offsets etc. */
2010 int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
2011 ctx.info.file_max[TGSI_FILE_INPUT] + 1;
2012
2013 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
2014 if (ctx.face_gpr == -1) {
2015 i = ctx.shader->ninput++;
2016 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
2017 ctx.shader->input[i].spi_sid = 0;
2018 ctx.shader->input[i].gpr = gpr++;
2019 ctx.face_gpr = ctx.shader->input[i].gpr;
2020 }
2021
2022 for (i = 0; i < count; i++) {
2023 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
2024 int ni = ctx.shader->ninput++;
2025 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
2026 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
2027 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
2028 ctx.shader->input[ni].gpr = gpr++;
2029 // TGSI to LLVM needs to know the lds position of inputs.
2030 // Non LLVM path computes it later (in process_twoside_color)
2031 ctx.shader->input[ni].lds_pos = next_lds_loc++;
2032 ctx.shader->input[i].back_color_input = ni;
2033 if (ctx.bc->chip_class >= EVERGREEN) {
2034 if ((r = evergreen_interp_input(&ctx, ni)))
2035 return r;
2036 }
2037 }
2038 }
2039 }
2040
2041 /* LLVM backend setup */
2042 #ifdef R600_USE_LLVM
2043 if (use_llvm) {
2044 struct radeon_llvm_context radeon_llvm_ctx;
2045 LLVMModuleRef mod;
2046 bool dump = r600_can_dump_shader(&rscreen->b, tokens);
2047 boolean use_kill = false;
2048
2049 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
2050 radeon_llvm_ctx.type = ctx.type;
2051 radeon_llvm_ctx.two_side = shader->two_side;
2052 radeon_llvm_ctx.face_gpr = ctx.face_gpr;
2053 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1;
2054 radeon_llvm_ctx.r600_inputs = ctx.shader->input;
2055 radeon_llvm_ctx.r600_outputs = ctx.shader->output;
2056 radeon_llvm_ctx.color_buffer_count = max_color_exports;
2057 radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
2058 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
2059 radeon_llvm_ctx.stream_outputs = &so;
2060 radeon_llvm_ctx.clip_vertex = ctx.cv_output;
2061 radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
2062 radeon_llvm_ctx.has_compressed_msaa_texturing =
2063 ctx.bc->has_compressed_msaa_texturing;
2064 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
2065 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
2066 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
2067
2068 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) {
2069 radeon_llvm_dispose(&radeon_llvm_ctx);
2070 use_llvm = 0;
2071 fprintf(stderr, "R600 LLVM backend failed to compile "
2072 "shader. Falling back to TGSI\n");
2073 } else {
2074 ctx.file_offset[TGSI_FILE_OUTPUT] =
2075 ctx.file_offset[TGSI_FILE_INPUT];
2076 }
2077 if (use_kill)
2078 ctx.shader->uses_kill = use_kill;
2079 radeon_llvm_dispose(&radeon_llvm_ctx);
2080 }
2081 #endif
2082 /* End of LLVM backend setup */
2083
2084 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
2085 shader->nr_ps_max_color_exports = 8;
2086
2087 if (!use_llvm) {
2088 if (ctx.fragcoord_input >= 0) {
2089 if (ctx.bc->chip_class == CAYMAN) {
2090 for (j = 0 ; j < 4; j++) {
2091 struct r600_bytecode_alu alu;
2092 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2093 alu.op = ALU_OP1_RECIP_IEEE;
2094 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
2095 alu.src[0].chan = 3;
2096
2097 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
2098 alu.dst.chan = j;
2099 alu.dst.write = (j == 3);
2100 alu.last = 1;
2101 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
2102 return r;
2103 }
2104 } else {
2105 struct r600_bytecode_alu alu;
2106 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2107 alu.op = ALU_OP1_RECIP_IEEE;
2108 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
2109 alu.src[0].chan = 3;
2110
2111 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
2112 alu.dst.chan = 3;
2113 alu.dst.write = 1;
2114 alu.last = 1;
2115 if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
2116 return r;
2117 }
2118 }
2119
2120 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
2121 struct r600_bytecode_alu alu;
2122 int r;
2123
2124 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2125 alu.op = ALU_OP1_MOV;
2126 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
2127 alu.src[0].value = 0;
2128 alu.dst.sel = ctx.gs_export_gpr_treg;
2129 alu.dst.write = 1;
2130 alu.last = 1;
2131 r = r600_bytecode_add_alu(ctx.bc, &alu);
2132 if (r)
2133 return r;
2134 }
2135 if (shader->two_side && ctx.colors_used) {
2136 if ((r = process_twoside_color_inputs(&ctx)))
2137 return r;
2138 }
2139
2140 tgsi_parse_init(&ctx.parse, tokens);
2141 while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
2142 tgsi_parse_token(&ctx.parse);
2143 switch (ctx.parse.FullToken.Token.Type) {
2144 case TGSI_TOKEN_TYPE_INSTRUCTION:
2145 r = tgsi_is_supported(&ctx);
2146 if (r)
2147 goto out_err;
2148 ctx.max_driver_temp_used = 0;
2149 /* reserve first tmp for everyone */
2150 r600_get_temp(&ctx);
2151
2152 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
2153 if ((r = tgsi_split_constant(&ctx)))
2154 goto out_err;
2155 if ((r = tgsi_split_literal_constant(&ctx)))
2156 goto out_err;
2157 if (ctx.type == TGSI_PROCESSOR_GEOMETRY)
2158 if ((r = tgsi_split_gs_inputs(&ctx)))
2159 goto out_err;
2160 if (ctx.bc->chip_class == CAYMAN)
2161 ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
2162 else if (ctx.bc->chip_class >= EVERGREEN)
2163 ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
2164 else
2165 ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
2166 r = ctx.inst_info->process(&ctx);
2167 if (r)
2168 goto out_err;
2169 break;
2170 default:
2171 break;
2172 }
2173 }
2174 }
2175
2176 /* Reset the temporary register counter. */
2177 ctx.max_driver_temp_used = 0;
2178
2179 noutput = shader->noutput;
2180
2181 if (!ring_outputs && ctx.clip_vertex_write) {
2182 unsigned clipdist_temp[2];
2183
2184 clipdist_temp[0] = r600_get_temp(&ctx);
2185 clipdist_temp[1] = r600_get_temp(&ctx);
2186
2187 /* need to convert a clipvertex write into clipdistance writes and not export
2188 the clip vertex anymore */
2189
2190 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
2191 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
2192 shader->output[noutput].gpr = clipdist_temp[0];
2193 noutput++;
2194 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
2195 shader->output[noutput].gpr = clipdist_temp[1];
2196 noutput++;
2197
2198 /* reset spi_sid for clipvertex output to avoid confusing spi */
2199 shader->output[ctx.cv_output].spi_sid = 0;
2200
2201 shader->clip_dist_write = 0xFF;
2202
2203 for (i = 0; i < 8; i++) {
2204 int oreg = i >> 2;
2205 int ochan = i & 3;
2206
2207 for (j = 0; j < 4; j++) {
2208 struct r600_bytecode_alu alu;
2209 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2210 alu.op = ALU_OP2_DOT4;
2211 alu.src[0].sel = shader->output[ctx.cv_output].gpr;
2212 alu.src[0].chan = j;
2213
2214 alu.src[1].sel = 512 + i;
2215 alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
2216 alu.src[1].chan = j;
2217
2218 alu.dst.sel = clipdist_temp[oreg];
2219 alu.dst.chan = j;
2220 alu.dst.write = (j == ochan);
2221 if (j == 3)
2222 alu.last = 1;
2223 if (!use_llvm)
2224 r = r600_bytecode_add_alu(ctx.bc, &alu);
2225 if (r)
2226 return r;
2227 }
2228 }
2229 }
2230
2231 /* Add stream outputs. */
2232 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
2233 so.num_outputs && !use_llvm)
2234 emit_streamout(&ctx, &so);
2235
2236 convert_edgeflag_to_int(&ctx);
2237
2238 if (ring_outputs) {
2239 if (key.vs_as_es)
2240 emit_gs_ring_writes(&ctx, FALSE);
2241 } else {
2242 /* Export output */
2243 next_clip_base = shader->vs_out_misc_write ? 62 : 61;
2244
2245 for (i = 0, j = 0; i < noutput; i++, j++) {
2246 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
2247 output[j].gpr = shader->output[i].gpr;
2248 output[j].elem_size = 3;
2249 output[j].swizzle_x = 0;
2250 output[j].swizzle_y = 1;
2251 output[j].swizzle_z = 2;
2252 output[j].swizzle_w = 3;
2253 output[j].burst_count = 1;
2254 output[j].type = -1;
2255 output[j].op = CF_OP_EXPORT;
2256 switch (ctx.type) {
2257 case TGSI_PROCESSOR_VERTEX:
2258 switch (shader->output[i].name) {
2259 case TGSI_SEMANTIC_POSITION:
2260 output[j].array_base = 60;
2261 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2262 pos_emitted = true;
2263 break;
2264
2265 case TGSI_SEMANTIC_PSIZE:
2266 output[j].array_base = 61;
2267 output[j].swizzle_y = 7;
2268 output[j].swizzle_z = 7;
2269 output[j].swizzle_w = 7;
2270 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2271 pos_emitted = true;
2272 break;
2273 case TGSI_SEMANTIC_EDGEFLAG:
2274 output[j].array_base = 61;
2275 output[j].swizzle_x = 7;
2276 output[j].swizzle_y = 0;
2277 output[j].swizzle_z = 7;
2278 output[j].swizzle_w = 7;
2279 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2280 pos_emitted = true;
2281 break;
2282 case TGSI_SEMANTIC_LAYER:
2283 /* spi_sid is 0 for outputs that are
2284 * not consumed by PS */
2285 if (shader->output[i].spi_sid) {
2286 output[j].array_base = next_param_base++;
2287 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2288 j++;
2289 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
2290 }
2291 output[j].array_base = 61;
2292 output[j].swizzle_x = 7;
2293 output[j].swizzle_y = 7;
2294 output[j].swizzle_z = 0;
2295 output[j].swizzle_w = 7;
2296 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2297 pos_emitted = true;
2298 break;
2299 case TGSI_SEMANTIC_VIEWPORT_INDEX:
2300 /* spi_sid is 0 for outputs that are
2301 * not consumed by PS */
2302 if (shader->output[i].spi_sid) {
2303 output[j].array_base = next_param_base++;
2304 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2305 j++;
2306 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
2307 }
2308 output[j].array_base = 61;
2309 output[j].swizzle_x = 7;
2310 output[j].swizzle_y = 7;
2311 output[j].swizzle_z = 7;
2312 output[j].swizzle_w = 0;
2313 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2314 pos_emitted = true;
2315 break;
2316 case TGSI_SEMANTIC_CLIPVERTEX:
2317 j--;
2318 break;
2319 case TGSI_SEMANTIC_CLIPDIST:
2320 output[j].array_base = next_clip_base++;
2321 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2322 pos_emitted = true;
2323 /* spi_sid is 0 for clipdistance outputs that were generated
2324 * for clipvertex - we don't need to pass them to PS */
2325 if (shader->output[i].spi_sid) {
2326 j++;
2327 /* duplicate it as PARAM to pass to the pixel shader */
2328 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
2329 output[j].array_base = next_param_base++;
2330 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2331 }
2332 break;
2333 case TGSI_SEMANTIC_FOG:
2334 output[j].swizzle_y = 4; /* 0 */
2335 output[j].swizzle_z = 4; /* 0 */
2336 output[j].swizzle_w = 5; /* 1 */
2337 break;
2338 }
2339 break;
2340 case TGSI_PROCESSOR_FRAGMENT:
2341 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
2342 /* never export more colors than the number of CBs */
2343 if (shader->output[i].sid >= max_color_exports) {
2344 /* skip export */
2345 j--;
2346 continue;
2347 }
2348 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
2349 output[j].array_base = shader->output[i].sid;
2350 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2351 shader->nr_ps_color_exports++;
2352 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
2353 for (k = 1; k < max_color_exports; k++) {
2354 j++;
2355 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
2356 output[j].gpr = shader->output[i].gpr;
2357 output[j].elem_size = 3;
2358 output[j].swizzle_x = 0;
2359 output[j].swizzle_y = 1;
2360 output[j].swizzle_z = 2;
2361 output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
2362 output[j].burst_count = 1;
2363 output[j].array_base = k;
2364 output[j].op = CF_OP_EXPORT;
2365 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2366 shader->nr_ps_color_exports++;
2367 }
2368 }
2369 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
2370 output[j].array_base = 61;
2371 output[j].swizzle_x = 2;
2372 output[j].swizzle_y = 7;
2373 output[j].swizzle_z = output[j].swizzle_w = 7;
2374 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2375 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
2376 output[j].array_base = 61;
2377 output[j].swizzle_x = 7;
2378 output[j].swizzle_y = 1;
2379 output[j].swizzle_z = output[j].swizzle_w = 7;
2380 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2381 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
2382 output[j].array_base = 61;
2383 output[j].swizzle_x = 7;
2384 output[j].swizzle_y = 7;
2385 output[j].swizzle_z = 0;
2386 output[j].swizzle_w = 7;
2387 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2388 } else {
2389 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
2390 r = -EINVAL;
2391 goto out_err;
2392 }
2393 break;
2394 default:
2395 R600_ERR("unsupported processor type %d\n", ctx.type);
2396 r = -EINVAL;
2397 goto out_err;
2398 }
2399
2400 if (output[j].type==-1) {
2401 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2402 output[j].array_base = next_param_base++;
2403 }
2404 }
2405
2406 /* add fake position export */
2407 if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) {
2408 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
2409 output[j].gpr = 0;
2410 output[j].elem_size = 3;
2411 output[j].swizzle_x = 7;
2412 output[j].swizzle_y = 7;
2413 output[j].swizzle_z = 7;
2414 output[j].swizzle_w = 7;
2415 output[j].burst_count = 1;
2416 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2417 output[j].array_base = 60;
2418 output[j].op = CF_OP_EXPORT;
2419 j++;
2420 }
2421
2422 /* add fake param output for vertex shader if no param is exported */
2423 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
2424 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
2425 output[j].gpr = 0;
2426 output[j].elem_size = 3;
2427 output[j].swizzle_x = 7;
2428 output[j].swizzle_y = 7;
2429 output[j].swizzle_z = 7;
2430 output[j].swizzle_w = 7;
2431 output[j].burst_count = 1;
2432 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2433 output[j].array_base = 0;
2434 output[j].op = CF_OP_EXPORT;
2435 j++;
2436 }
2437
2438 /* add fake pixel export */
2439 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) {
2440 memset(&output[j], 0, sizeof(struct r600_bytecode_output));
2441 output[j].gpr = 0;
2442 output[j].elem_size = 3;
2443 output[j].swizzle_x = 7;
2444 output[j].swizzle_y = 7;
2445 output[j].swizzle_z = 7;
2446 output[j].swizzle_w = 7;
2447 output[j].burst_count = 1;
2448 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
2449 output[j].array_base = 0;
2450 output[j].op = CF_OP_EXPORT;
2451 j++;
2452 }
2453
2454 noutput = j;
2455
2456 /* set export done on last export of each type */
2457 for (i = noutput - 1, output_done = 0; i >= 0; i--) {
2458 if (!(output_done & (1 << output[i].type))) {
2459 output_done |= (1 << output[i].type);
2460 output[i].op = CF_OP_EXPORT_DONE;
2461 }
2462 }
2463 /* add output to bytecode */
2464 if (!use_llvm) {
2465 for (i = 0; i < noutput; i++) {
2466 r = r600_bytecode_add_output(ctx.bc, &output[i]);
2467 if (r)
2468 goto out_err;
2469 }
2470 }
2471 }
2472
2473 /* add program end */
2474 if (!use_llvm) {
2475 if (ctx.bc->chip_class == CAYMAN)
2476 cm_bytecode_add_cf_end(ctx.bc);
2477 else {
2478 const struct cf_op_info *last = NULL;
2479
2480 if (ctx.bc->cf_last)
2481 last = r600_isa_cf(ctx.bc->cf_last->op);
2482
2483 /* alu clause instructions don't have EOP bit, so add NOP */
2484 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS)
2485 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
2486
2487 ctx.bc->cf_last->end_of_program = 1;
2488 }
2489 }
2490
2491 /* check GPR limit - we have 124 = 128 - 4
2492 * (4 are reserved as alu clause temporary registers) */
2493 if (ctx.bc->ngpr > 124) {
2494 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
2495 r = -ENOMEM;
2496 goto out_err;
2497 }
2498
2499 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
2500 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so)))
2501 return r;
2502 }
2503
2504 free(ctx.literals);
2505 tgsi_parse_free(&ctx.parse);
2506 return 0;
2507 out_err:
2508 free(ctx.literals);
2509 tgsi_parse_free(&ctx.parse);
2510 return r;
2511 }
2512
2513 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
2514 {
2515 R600_ERR("%s tgsi opcode unsupported\n",
2516 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
2517 return -EINVAL;
2518 }
2519
2520 static int tgsi_end(struct r600_shader_ctx *ctx)
2521 {
2522 return 0;
2523 }
2524
2525 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
2526 const struct r600_shader_src *shader_src,
2527 unsigned chan)
2528 {
2529 bc_src->sel = shader_src->sel;
2530 bc_src->chan = shader_src->swizzle[chan];
2531 bc_src->neg = shader_src->neg;
2532 bc_src->abs = shader_src->abs;
2533 bc_src->rel = shader_src->rel;
2534 bc_src->value = shader_src->value[bc_src->chan];
2535 bc_src->kc_bank = shader_src->kc_bank;
2536 bc_src->kc_rel = shader_src->kc_rel;
2537 }
2538
2539 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
2540 {
2541 bc_src->abs = 1;
2542 bc_src->neg = 0;
2543 }
2544
2545 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
2546 {
2547 bc_src->neg = !bc_src->neg;
2548 }
2549
2550 static void tgsi_dst(struct r600_shader_ctx *ctx,
2551 const struct tgsi_full_dst_register *tgsi_dst,
2552 unsigned swizzle,
2553 struct r600_bytecode_alu_dst *r600_dst)
2554 {
2555 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2556
2557 r600_dst->sel = tgsi_dst->Register.Index;
2558 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
2559 r600_dst->chan = swizzle;
2560 r600_dst->write = 1;
2561 if (tgsi_dst->Register.Indirect)
2562 r600_dst->rel = V_SQ_REL_RELATIVE;
2563 if (inst->Instruction.Saturate) {
2564 r600_dst->clamp = 1;
2565 }
2566 }
2567
2568 static int tgsi_last_instruction(unsigned writemask)
2569 {
2570 int i, lasti = 0;
2571
2572 for (i = 0; i < 4; i++) {
2573 if (writemask & (1 << i)) {
2574 lasti = i;
2575 }
2576 }
2577 return lasti;
2578 }
2579
2580 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
2581 {
2582 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2583 struct r600_bytecode_alu alu;
2584 unsigned write_mask = inst->Dst[0].Register.WriteMask;
2585 int i, j, r, lasti = tgsi_last_instruction(write_mask);
2586 /* use temp register if trans_only and more than one dst component */
2587 int use_tmp = trans_only && (write_mask ^ (1 << lasti));
2588
2589 for (i = 0; i <= lasti; i++) {
2590 if (!(write_mask & (1 << i)))
2591 continue;
2592
2593 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2594 if (use_tmp) {
2595 alu.dst.sel = ctx->temp_reg;
2596 alu.dst.chan = i;
2597 alu.dst.write = 1;
2598 } else
2599 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2600
2601 alu.op = ctx->inst_info->op;
2602 if (!swap) {
2603 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2604 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
2605 }
2606 } else {
2607 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2608 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2609 }
2610 /* handle some special cases */
2611 switch (ctx->inst_info->tgsi_opcode) {
2612 case TGSI_OPCODE_SUB:
2613 r600_bytecode_src_toggle_neg(&alu.src[1]);
2614 break;
2615 case TGSI_OPCODE_ABS:
2616 r600_bytecode_src_set_abs(&alu.src[0]);
2617 break;
2618 default:
2619 break;
2620 }
2621 if (i == lasti || trans_only) {
2622 alu.last = 1;
2623 }
2624 r = r600_bytecode_add_alu(ctx->bc, &alu);
2625 if (r)
2626 return r;
2627 }
2628
2629 if (use_tmp) {
2630 /* move result from temp to dst */
2631 for (i = 0; i <= lasti; i++) {
2632 if (!(write_mask & (1 << i)))
2633 continue;
2634
2635 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2636 alu.op = ALU_OP1_MOV;
2637 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2638 alu.src[0].sel = ctx->temp_reg;
2639 alu.src[0].chan = i;
2640 alu.last = (i == lasti);
2641
2642 r = r600_bytecode_add_alu(ctx->bc, &alu);
2643 if (r)
2644 return r;
2645 }
2646 }
2647 return 0;
2648 }
2649
2650 static int tgsi_op2(struct r600_shader_ctx *ctx)
2651 {
2652 return tgsi_op2_s(ctx, 0, 0);
2653 }
2654
2655 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
2656 {
2657 return tgsi_op2_s(ctx, 1, 0);
2658 }
2659
2660 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
2661 {
2662 return tgsi_op2_s(ctx, 0, 1);
2663 }
2664
2665 static int tgsi_ineg(struct r600_shader_ctx *ctx)
2666 {
2667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2668 struct r600_bytecode_alu alu;
2669 int i, r;
2670 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2671
2672 for (i = 0; i < lasti + 1; i++) {
2673
2674 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2675 continue;
2676 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2677 alu.op = ctx->inst_info->op;
2678
2679 alu.src[0].sel = V_SQ_ALU_SRC_0;
2680
2681 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2682
2683 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2684
2685 if (i == lasti) {
2686 alu.last = 1;
2687 }
2688 r = r600_bytecode_add_alu(ctx->bc, &alu);
2689 if (r)
2690 return r;
2691 }
2692 return 0;
2693
2694 }
2695
2696 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
2697 {
2698 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2699 int i, j, r;
2700 struct r600_bytecode_alu alu;
2701 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2702
2703 for (i = 0 ; i < last_slot; i++) {
2704 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2705 alu.op = ctx->inst_info->op;
2706 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2707 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
2708
2709 /* RSQ should take the absolute value of src */
2710 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
2711 r600_bytecode_src_set_abs(&alu.src[j]);
2712 }
2713 }
2714 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2715 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2716
2717 if (i == last_slot - 1)
2718 alu.last = 1;
2719 r = r600_bytecode_add_alu(ctx->bc, &alu);
2720 if (r)
2721 return r;
2722 }
2723 return 0;
2724 }
2725
2726 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
2727 {
2728 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2729 int i, j, k, r;
2730 struct r600_bytecode_alu alu;
2731 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2732 int t1 = ctx->temp_reg;
2733
2734 for (k = 0; k <= lasti; k++) {
2735 if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
2736 continue;
2737
2738 for (i = 0 ; i < 4; i++) {
2739 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2740 alu.op = ctx->inst_info->op;
2741 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2742 r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
2743 }
2744 alu.dst.sel = t1;
2745 alu.dst.chan = i;
2746 alu.dst.write = (i == k);
2747 if (i == 3)
2748 alu.last = 1;
2749 r = r600_bytecode_add_alu(ctx->bc, &alu);
2750 if (r)
2751 return r;
2752 }
2753 }
2754
2755 for (i = 0 ; i <= lasti; i++) {
2756 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2757 continue;
2758 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2759 alu.op = ALU_OP1_MOV;
2760 alu.src[0].sel = t1;
2761 alu.src[0].chan = i;
2762 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2763 alu.dst.write = 1;
2764 if (i == lasti)
2765 alu.last = 1;
2766 r = r600_bytecode_add_alu(ctx->bc, &alu);
2767 if (r)
2768 return r;
2769 }
2770
2771 return 0;
2772 }
2773
2774 /*
2775 * r600 - trunc to -PI..PI range
2776 * r700 - normalize by dividing by 2PI
2777 * see fdo bug 27901
2778 */
2779 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
2780 {
2781 static float half_inv_pi = 1.0 /(3.1415926535 * 2);
2782 static float double_pi = 3.1415926535 * 2;
2783 static float neg_pi = -3.1415926535;
2784
2785 int r;
2786 struct r600_bytecode_alu alu;
2787
2788 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2789 alu.op = ALU_OP3_MULADD;
2790 alu.is_op3 = 1;
2791
2792 alu.dst.chan = 0;
2793 alu.dst.sel = ctx->temp_reg;
2794 alu.dst.write = 1;
2795
2796 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2797
2798 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2799 alu.src[1].chan = 0;
2800 alu.src[1].value = *(uint32_t *)&half_inv_pi;
2801 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2802 alu.src[2].chan = 0;
2803 alu.last = 1;
2804 r = r600_bytecode_add_alu(ctx->bc, &alu);
2805 if (r)
2806 return r;
2807
2808 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2809 alu.op = ALU_OP1_FRACT;
2810
2811 alu.dst.chan = 0;
2812 alu.dst.sel = ctx->temp_reg;
2813 alu.dst.write = 1;
2814
2815 alu.src[0].sel = ctx->temp_reg;
2816 alu.src[0].chan = 0;
2817 alu.last = 1;
2818 r = r600_bytecode_add_alu(ctx->bc, &alu);
2819 if (r)
2820 return r;
2821
2822 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2823 alu.op = ALU_OP3_MULADD;
2824 alu.is_op3 = 1;
2825
2826 alu.dst.chan = 0;
2827 alu.dst.sel = ctx->temp_reg;
2828 alu.dst.write = 1;
2829
2830 alu.src[0].sel = ctx->temp_reg;
2831 alu.src[0].chan = 0;
2832
2833 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2834 alu.src[1].chan = 0;
2835 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2836 alu.src[2].chan = 0;
2837
2838 if (ctx->bc->chip_class == R600) {
2839 alu.src[1].value = *(uint32_t *)&double_pi;
2840 alu.src[2].value = *(uint32_t *)&neg_pi;
2841 } else {
2842 alu.src[1].sel = V_SQ_ALU_SRC_1;
2843 alu.src[2].sel = V_SQ_ALU_SRC_0_5;
2844 alu.src[2].neg = 1;
2845 }
2846
2847 alu.last = 1;
2848 r = r600_bytecode_add_alu(ctx->bc, &alu);
2849 if (r)
2850 return r;
2851 return 0;
2852 }
2853
2854 static int cayman_trig(struct r600_shader_ctx *ctx)
2855 {
2856 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2857 struct r600_bytecode_alu alu;
2858 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2859 int i, r;
2860
2861 r = tgsi_setup_trig(ctx);
2862 if (r)
2863 return r;
2864
2865
2866 for (i = 0; i < last_slot; i++) {
2867 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2868 alu.op = ctx->inst_info->op;
2869 alu.dst.chan = i;
2870
2871 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2872 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2873
2874 alu.src[0].sel = ctx->temp_reg;
2875 alu.src[0].chan = 0;
2876 if (i == last_slot - 1)
2877 alu.last = 1;
2878 r = r600_bytecode_add_alu(ctx->bc, &alu);
2879 if (r)
2880 return r;
2881 }
2882 return 0;
2883 }
2884
2885 static int tgsi_trig(struct r600_shader_ctx *ctx)
2886 {
2887 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2888 struct r600_bytecode_alu alu;
2889 int i, r;
2890 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2891
2892 r = tgsi_setup_trig(ctx);
2893 if (r)
2894 return r;
2895
2896 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2897 alu.op = ctx->inst_info->op;
2898 alu.dst.chan = 0;
2899 alu.dst.sel = ctx->temp_reg;
2900 alu.dst.write = 1;
2901
2902 alu.src[0].sel = ctx->temp_reg;
2903 alu.src[0].chan = 0;
2904 alu.last = 1;
2905 r = r600_bytecode_add_alu(ctx->bc, &alu);
2906 if (r)
2907 return r;
2908
2909 /* replicate result */
2910 for (i = 0; i < lasti + 1; i++) {
2911 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2912 continue;
2913
2914 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2915 alu.op = ALU_OP1_MOV;
2916
2917 alu.src[0].sel = ctx->temp_reg;
2918 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2919 if (i == lasti)
2920 alu.last = 1;
2921 r = r600_bytecode_add_alu(ctx->bc, &alu);
2922 if (r)
2923 return r;
2924 }
2925 return 0;
2926 }
2927
2928 static int tgsi_scs(struct r600_shader_ctx *ctx)
2929 {
2930 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2931 struct r600_bytecode_alu alu;
2932 int i, r;
2933
2934 /* We'll only need the trig stuff if we are going to write to the
2935 * X or Y components of the destination vector.
2936 */
2937 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2938 r = tgsi_setup_trig(ctx);
2939 if (r)
2940 return r;
2941 }
2942
2943 /* dst.x = COS */
2944 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2945 if (ctx->bc->chip_class == CAYMAN) {
2946 for (i = 0 ; i < 3; i++) {
2947 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2948 alu.op = ALU_OP1_COS;
2949 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2950
2951 if (i == 0)
2952 alu.dst.write = 1;
2953 else
2954 alu.dst.write = 0;
2955 alu.src[0].sel = ctx->temp_reg;
2956 alu.src[0].chan = 0;
2957 if (i == 2)
2958 alu.last = 1;
2959 r = r600_bytecode_add_alu(ctx->bc, &alu);
2960 if (r)
2961 return r;
2962 }
2963 } else {
2964 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2965 alu.op = ALU_OP1_COS;
2966 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2967
2968 alu.src[0].sel = ctx->temp_reg;
2969 alu.src[0].chan = 0;
2970 alu.last = 1;
2971 r = r600_bytecode_add_alu(ctx->bc, &alu);
2972 if (r)
2973 return r;
2974 }
2975 }
2976
2977 /* dst.y = SIN */
2978 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2979 if (ctx->bc->chip_class == CAYMAN) {
2980 for (i = 0 ; i < 3; i++) {
2981 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2982 alu.op = ALU_OP1_SIN;
2983 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2984 if (i == 1)
2985 alu.dst.write = 1;
2986 else
2987 alu.dst.write = 0;
2988 alu.src[0].sel = ctx->temp_reg;
2989 alu.src[0].chan = 0;
2990 if (i == 2)
2991 alu.last = 1;
2992 r = r600_bytecode_add_alu(ctx->bc, &alu);
2993 if (r)
2994 return r;
2995 }
2996 } else {
2997 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2998 alu.op = ALU_OP1_SIN;
2999 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
3000
3001 alu.src[0].sel = ctx->temp_reg;
3002 alu.src[0].chan = 0;
3003 alu.last = 1;
3004 r = r600_bytecode_add_alu(ctx->bc, &alu);
3005 if (r)
3006 return r;
3007 }
3008 }
3009
3010 /* dst.z = 0.0; */
3011 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3012 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3013
3014 alu.op = ALU_OP1_MOV;
3015
3016 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
3017
3018 alu.src[0].sel = V_SQ_ALU_SRC_0;
3019 alu.src[0].chan = 0;
3020
3021 alu.last = 1;
3022
3023 r = r600_bytecode_add_alu(ctx->bc, &alu);
3024 if (r)
3025 return r;
3026 }
3027
3028 /* dst.w = 1.0; */
3029 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3030 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3031
3032 alu.op = ALU_OP1_MOV;
3033
3034 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
3035
3036 alu.src[0].sel = V_SQ_ALU_SRC_1;
3037 alu.src[0].chan = 0;
3038
3039 alu.last = 1;
3040
3041 r = r600_bytecode_add_alu(ctx->bc, &alu);
3042 if (r)
3043 return r;
3044 }
3045
3046 return 0;
3047 }
3048
3049 static int tgsi_kill(struct r600_shader_ctx *ctx)
3050 {
3051 struct r600_bytecode_alu alu;
3052 int i, r;
3053
3054 for (i = 0; i < 4; i++) {
3055 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056 alu.op = ctx->inst_info->op;
3057
3058 alu.dst.chan = i;
3059
3060 alu.src[0].sel = V_SQ_ALU_SRC_0;
3061
3062 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) {
3063 alu.src[1].sel = V_SQ_ALU_SRC_1;
3064 alu.src[1].neg = 1;
3065 } else {
3066 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3067 }
3068 if (i == 3) {
3069 alu.last = 1;
3070 }
3071 r = r600_bytecode_add_alu(ctx->bc, &alu);
3072 if (r)
3073 return r;
3074 }
3075
3076 /* kill must be last in ALU */
3077 ctx->bc->force_add_cf = 1;
3078 ctx->shader->uses_kill = TRUE;
3079 return 0;
3080 }
3081
3082 static int tgsi_lit(struct r600_shader_ctx *ctx)
3083 {
3084 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3085 struct r600_bytecode_alu alu;
3086 int r;
3087
3088 /* tmp.x = max(src.y, 0.0) */
3089 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3090 alu.op = ALU_OP2_MAX;
3091 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
3092 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
3093 alu.src[1].chan = 1;
3094
3095 alu.dst.sel = ctx->temp_reg;
3096 alu.dst.chan = 0;
3097 alu.dst.write = 1;
3098
3099 alu.last = 1;
3100 r = r600_bytecode_add_alu(ctx->bc, &alu);
3101 if (r)
3102 return r;
3103
3104 if (inst->Dst[0].Register.WriteMask & (1 << 2))
3105 {
3106 int chan;
3107 int sel;
3108 int i;
3109
3110 if (ctx->bc->chip_class == CAYMAN) {
3111 for (i = 0; i < 3; i++) {
3112 /* tmp.z = log(tmp.x) */
3113 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3114 alu.op = ALU_OP1_LOG_CLAMPED;
3115 alu.src[0].sel = ctx->temp_reg;
3116 alu.src[0].chan = 0;
3117 alu.dst.sel = ctx->temp_reg;
3118 alu.dst.chan = i;
3119 if (i == 2) {
3120 alu.dst.write = 1;
3121 alu.last = 1;
3122 } else
3123 alu.dst.write = 0;
3124
3125 r = r600_bytecode_add_alu(ctx->bc, &alu);
3126 if (r)
3127 return r;
3128 }
3129 } else {
3130 /* tmp.z = log(tmp.x) */
3131 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3132 alu.op = ALU_OP1_LOG_CLAMPED;
3133 alu.src[0].sel = ctx->temp_reg;
3134 alu.src[0].chan = 0;
3135 alu.dst.sel = ctx->temp_reg;
3136 alu.dst.chan = 2;
3137 alu.dst.write = 1;
3138 alu.last = 1;
3139 r = r600_bytecode_add_alu(ctx->bc, &alu);
3140 if (r)
3141 return r;
3142 }
3143
3144 chan = alu.dst.chan;
3145 sel = alu.dst.sel;
3146
3147 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
3148 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3149 alu.op = ALU_OP3_MUL_LIT;
3150 alu.src[0].sel = sel;
3151 alu.src[0].chan = chan;
3152 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
3153 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
3154 alu.dst.sel = ctx->temp_reg;
3155 alu.dst.chan = 0;
3156 alu.dst.write = 1;
3157 alu.is_op3 = 1;
3158 alu.last = 1;
3159 r = r600_bytecode_add_alu(ctx->bc, &alu);
3160 if (r)
3161 return r;
3162
3163 if (ctx->bc->chip_class == CAYMAN) {
3164 for (i = 0; i < 3; i++) {
3165 /* dst.z = exp(tmp.x) */
3166 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3167 alu.op = ALU_OP1_EXP_IEEE;
3168 alu.src[0].sel = ctx->temp_reg;
3169 alu.src[0].chan = 0;
3170 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3171 if (i == 2) {
3172 alu.dst.write = 1;
3173 alu.last = 1;
3174 } else
3175 alu.dst.write = 0;
3176 r = r600_bytecode_add_alu(ctx->bc, &alu);
3177 if (r)
3178 return r;
3179 }
3180 } else {
3181 /* dst.z = exp(tmp.x) */
3182 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3183 alu.op = ALU_OP1_EXP_IEEE;
3184 alu.src[0].sel = ctx->temp_reg;
3185 alu.src[0].chan = 0;
3186 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
3187 alu.last = 1;
3188 r = r600_bytecode_add_alu(ctx->bc, &alu);
3189 if (r)
3190 return r;
3191 }
3192 }
3193
3194 /* dst.x, <- 1.0 */
3195 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3196 alu.op = ALU_OP1_MOV;
3197 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
3198 alu.src[0].chan = 0;
3199 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
3200 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
3201 r = r600_bytecode_add_alu(ctx->bc, &alu);
3202 if (r)
3203 return r;
3204
3205 /* dst.y = max(src.x, 0.0) */
3206 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3207 alu.op = ALU_OP2_MAX;
3208 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3209 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
3210 alu.src[1].chan = 0;
3211 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
3212 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
3213 r = r600_bytecode_add_alu(ctx->bc, &alu);
3214 if (r)
3215 return r;
3216
3217 /* dst.w, <- 1.0 */
3218 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3219 alu.op = ALU_OP1_MOV;
3220 alu.src[0].sel = V_SQ_ALU_SRC_1;
3221 alu.src[0].chan = 0;
3222 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
3223 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
3224 alu.last = 1;
3225 r = r600_bytecode_add_alu(ctx->bc, &alu);
3226 if (r)
3227 return r;
3228
3229 return 0;
3230 }
3231
3232 static int tgsi_rsq(struct r600_shader_ctx *ctx)
3233 {
3234 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3235 struct r600_bytecode_alu alu;
3236 int i, r;
3237
3238 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3239
3240 /* XXX:
3241 * For state trackers other than OpenGL, we'll want to use
3242 * _RECIPSQRT_IEEE instead.
3243 */
3244 alu.op = ALU_OP1_RECIPSQRT_CLAMPED;
3245
3246 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
3247 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
3248 r600_bytecode_src_set_abs(&alu.src[i]);
3249 }
3250 alu.dst.sel = ctx->temp_reg;
3251 alu.dst.write = 1;
3252 alu.last = 1;
3253 r = r600_bytecode_add_alu(ctx->bc, &alu);
3254 if (r)
3255 return r;
3256 /* replicate result */
3257 return tgsi_helper_tempx_replicate(ctx);
3258 }
3259
3260 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
3261 {
3262 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3263 struct r600_bytecode_alu alu;
3264 int i, r;
3265
3266 for (i = 0; i < 4; i++) {
3267 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3268 alu.src[0].sel = ctx->temp_reg;
3269 alu.op = ALU_OP1_MOV;
3270 alu.dst.chan = i;
3271 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3272 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3273 if (i == 3)
3274 alu.last = 1;
3275 r = r600_bytecode_add_alu(ctx->bc, &alu);
3276 if (r)
3277 return r;
3278 }
3279 return 0;
3280 }
3281
3282 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
3283 {
3284 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3285 struct r600_bytecode_alu alu;
3286 int i, r;
3287
3288 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3289 alu.op = ctx->inst_info->op;
3290 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
3291 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
3292 }
3293 alu.dst.sel = ctx->temp_reg;
3294 alu.dst.write = 1;
3295 alu.last = 1;
3296 r = r600_bytecode_add_alu(ctx->bc, &alu);
3297 if (r)
3298 return r;
3299 /* replicate result */
3300 return tgsi_helper_tempx_replicate(ctx);
3301 }
3302
3303 static int cayman_pow(struct r600_shader_ctx *ctx)
3304 {
3305 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3306 int i, r;
3307 struct r600_bytecode_alu alu;
3308 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
3309
3310 for (i = 0; i < 3; i++) {
3311 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3312 alu.op = ALU_OP1_LOG_IEEE;
3313 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3314 alu.dst.sel = ctx->temp_reg;
3315 alu.dst.chan = i;
3316 alu.dst.write = 1;
3317 if (i == 2)
3318 alu.last = 1;
3319 r = r600_bytecode_add_alu(ctx->bc, &alu);
3320 if (r)
3321 return r;
3322 }
3323
3324 /* b * LOG2(a) */
3325 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3326 alu.op = ALU_OP2_MUL;
3327 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
3328 alu.src[1].sel = ctx->temp_reg;
3329 alu.dst.sel = ctx->temp_reg;
3330 alu.dst.write = 1;
3331 alu.last = 1;
3332 r = r600_bytecode_add_alu(ctx->bc, &alu);
3333 if (r)
3334 return r;
3335
3336 for (i = 0; i < last_slot; i++) {
3337 /* POW(a,b) = EXP2(b * LOG2(a))*/
3338 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3339 alu.op = ALU_OP1_EXP_IEEE;
3340 alu.src[0].sel = ctx->temp_reg;
3341
3342 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3343 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3344 if (i == last_slot - 1)
3345 alu.last = 1;
3346 r = r600_bytecode_add_alu(ctx->bc, &alu);
3347 if (r)
3348 return r;
3349 }
3350 return 0;
3351 }
3352
3353 static int tgsi_pow(struct r600_shader_ctx *ctx)
3354 {
3355 struct r600_bytecode_alu alu;
3356 int r;
3357
3358 /* LOG2(a) */
3359 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3360 alu.op = ALU_OP1_LOG_IEEE;
3361 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3362 alu.dst.sel = ctx->temp_reg;
3363 alu.dst.write = 1;
3364 alu.last = 1;
3365 r = r600_bytecode_add_alu(ctx->bc, &alu);
3366 if (r)
3367 return r;
3368 /* b * LOG2(a) */
3369 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3370 alu.op = ALU_OP2_MUL;
3371 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
3372 alu.src[1].sel = ctx->temp_reg;
3373 alu.dst.sel = ctx->temp_reg;
3374 alu.dst.write = 1;
3375 alu.last = 1;
3376 r = r600_bytecode_add_alu(ctx->bc, &alu);
3377 if (r)
3378 return r;
3379 /* POW(a,b) = EXP2(b * LOG2(a))*/
3380 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3381 alu.op = ALU_OP1_EXP_IEEE;
3382 alu.src[0].sel = ctx->temp_reg;
3383 alu.dst.sel = ctx->temp_reg;
3384 alu.dst.write = 1;
3385 alu.last = 1;
3386 r = r600_bytecode_add_alu(ctx->bc, &alu);
3387 if (r)
3388 return r;
3389 return tgsi_helper_tempx_replicate(ctx);
3390 }
3391
3392 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
3393 {
3394 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3395 struct r600_bytecode_alu alu;
3396 int i, r, j;
3397 unsigned write_mask = inst->Dst[0].Register.WriteMask;
3398 int tmp0 = ctx->temp_reg;
3399 int tmp1 = r600_get_temp(ctx);
3400 int tmp2 = r600_get_temp(ctx);
3401 int tmp3 = r600_get_temp(ctx);
3402 /* Unsigned path:
3403 *
3404 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
3405 *
3406 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error
3407 * 2. tmp0.z = lo (tmp0.x * src2)
3408 * 3. tmp0.w = -tmp0.z
3409 * 4. tmp0.y = hi (tmp0.x * src2)
3410 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2))
3411 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error
3412 * 7. tmp1.x = tmp0.x - tmp0.w
3413 * 8. tmp1.y = tmp0.x + tmp0.w
3414 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
3415 * 10. tmp0.z = hi(tmp0.x * src1) = q
3416 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r
3417 *
3418 * 12. tmp0.w = src1 - tmp0.y = r
3419 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison)
3420 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison)
3421 *
3422 * if DIV
3423 *
3424 * 15. tmp1.z = tmp0.z + 1 = q + 1
3425 * 16. tmp1.w = tmp0.z - 1 = q - 1
3426 *
3427 * else MOD
3428 *
3429 * 15. tmp1.z = tmp0.w - src2 = r - src2
3430 * 16. tmp1.w = tmp0.w + src2 = r + src2
3431 *
3432 * endif
3433 *
3434 * 17. tmp1.x = tmp1.x & tmp1.y
3435 *
3436 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
3437 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
3438 *
3439 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
3440 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
3441 *
3442 * Signed path:
3443 *
3444 * Same as unsigned, using abs values of the operands,
3445 * and fixing the sign of the result in the end.
3446 */
3447
3448 for (i = 0; i < 4; i++) {
3449 if (!(write_mask & (1<<i)))
3450 continue;
3451
3452 if (signed_op) {
3453
3454 /* tmp2.x = -src0 */
3455 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3456 alu.op = ALU_OP2_SUB_INT;
3457
3458 alu.dst.sel = tmp2;
3459 alu.dst.chan = 0;
3460 alu.dst.write = 1;
3461
3462 alu.src[0].sel = V_SQ_ALU_SRC_0;
3463
3464 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3465
3466 alu.last = 1;
3467 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3468 return r;
3469
3470 /* tmp2.y = -src1 */
3471 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3472 alu.op = ALU_OP2_SUB_INT;
3473
3474 alu.dst.sel = tmp2;
3475 alu.dst.chan = 1;
3476 alu.dst.write = 1;
3477
3478 alu.src[0].sel = V_SQ_ALU_SRC_0;
3479
3480 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3481
3482 alu.last = 1;
3483 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3484 return r;
3485
3486 /* tmp2.z sign bit is set if src0 and src2 signs are different */
3487 /* it will be a sign of the quotient */
3488 if (!mod) {
3489
3490 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3491 alu.op = ALU_OP2_XOR_INT;
3492
3493 alu.dst.sel = tmp2;
3494 alu.dst.chan = 2;
3495 alu.dst.write = 1;
3496
3497 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3498 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3499
3500 alu.last = 1;
3501 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3502 return r;
3503 }
3504
3505 /* tmp2.x = |src0| */
3506 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3507 alu.op = ALU_OP3_CNDGE_INT;
3508 alu.is_op3 = 1;
3509
3510 alu.dst.sel = tmp2;
3511 alu.dst.chan = 0;
3512 alu.dst.write = 1;
3513
3514 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3515 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3516 alu.src[2].sel = tmp2;
3517 alu.src[2].chan = 0;
3518
3519 alu.last = 1;
3520 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3521 return r;
3522
3523 /* tmp2.y = |src1| */
3524 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3525 alu.op = ALU_OP3_CNDGE_INT;
3526 alu.is_op3 = 1;
3527
3528 alu.dst.sel = tmp2;
3529 alu.dst.chan = 1;
3530 alu.dst.write = 1;
3531
3532 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3533 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3534 alu.src[2].sel = tmp2;
3535 alu.src[2].chan = 1;
3536
3537 alu.last = 1;
3538 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3539 return r;
3540
3541 }
3542
3543 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */
3544 if (ctx->bc->chip_class == CAYMAN) {
3545 /* tmp3.x = u2f(src2) */
3546 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3547 alu.op = ALU_OP1_UINT_TO_FLT;
3548
3549 alu.dst.sel = tmp3;
3550 alu.dst.chan = 0;
3551 alu.dst.write = 1;
3552
3553 if (signed_op) {
3554 alu.src[0].sel = tmp2;
3555 alu.src[0].chan = 1;
3556 } else {
3557 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3558 }
3559
3560 alu.last = 1;
3561 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3562 return r;
3563
3564 /* tmp0.x = recip(tmp3.x) */
3565 for (j = 0 ; j < 3; j++) {
3566 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3567 alu.op = ALU_OP1_RECIP_IEEE;
3568
3569 alu.dst.sel = tmp0;
3570 alu.dst.chan = j;
3571 alu.dst.write = (j == 0);
3572
3573 alu.src[0].sel = tmp3;
3574 alu.src[0].chan = 0;
3575
3576 if (j == 2)
3577 alu.last = 1;
3578 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3579 return r;
3580 }
3581
3582 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3583 alu.op = ALU_OP2_MUL;
3584
3585 alu.src[0].sel = tmp0;
3586 alu.src[0].chan = 0;
3587
3588 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
3589 alu.src[1].value = 0x4f800000;
3590
3591 alu.dst.sel = tmp3;
3592 alu.dst.write = 1;
3593 alu.last = 1;
3594 r = r600_bytecode_add_alu(ctx->bc, &alu);
3595 if (r)
3596 return r;
3597
3598 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3599 alu.op = ALU_OP1_FLT_TO_UINT;
3600
3601 alu.dst.sel = tmp0;
3602 alu.dst.chan = 0;
3603 alu.dst.write = 1;
3604
3605 alu.src[0].sel = tmp3;
3606 alu.src[0].chan = 0;
3607
3608 alu.last = 1;
3609 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3610 return r;
3611
3612 } else {
3613 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3614 alu.op = ALU_OP1_RECIP_UINT;
3615
3616 alu.dst.sel = tmp0;
3617 alu.dst.chan = 0;
3618 alu.dst.write = 1;
3619
3620 if (signed_op) {
3621 alu.src[0].sel = tmp2;
3622 alu.src[0].chan = 1;
3623 } else {
3624 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3625 }
3626
3627 alu.last = 1;
3628 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3629 return r;
3630 }
3631
3632 /* 2. tmp0.z = lo (tmp0.x * src2) */
3633 if (ctx->bc->chip_class == CAYMAN) {
3634 for (j = 0 ; j < 4; j++) {
3635 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3636 alu.op = ALU_OP2_MULLO_UINT;
3637
3638 alu.dst.sel = tmp0;
3639 alu.dst.chan = j;
3640 alu.dst.write = (j == 2);
3641
3642 alu.src[0].sel = tmp0;
3643 alu.src[0].chan = 0;
3644 if (signed_op) {
3645 alu.src[1].sel = tmp2;
3646 alu.src[1].chan = 1;
3647 } else {
3648 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3649 }
3650
3651 alu.last = (j == 3);
3652 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3653 return r;
3654 }
3655 } else {
3656 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3657 alu.op = ALU_OP2_MULLO_UINT;
3658
3659 alu.dst.sel = tmp0;
3660 alu.dst.chan = 2;
3661 alu.dst.write = 1;
3662
3663 alu.src[0].sel = tmp0;
3664 alu.src[0].chan = 0;
3665 if (signed_op) {
3666 alu.src[1].sel = tmp2;
3667 alu.src[1].chan = 1;
3668 } else {
3669 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3670 }
3671
3672 alu.last = 1;
3673 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3674 return r;
3675 }
3676
3677 /* 3. tmp0.w = -tmp0.z */
3678 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3679 alu.op = ALU_OP2_SUB_INT;
3680
3681 alu.dst.sel = tmp0;
3682 alu.dst.chan = 3;
3683 alu.dst.write = 1;
3684
3685 alu.src[0].sel = V_SQ_ALU_SRC_0;
3686 alu.src[1].sel = tmp0;
3687 alu.src[1].chan = 2;
3688
3689 alu.last = 1;
3690 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3691 return r;
3692
3693 /* 4. tmp0.y = hi (tmp0.x * src2) */
3694 if (ctx->bc->chip_class == CAYMAN) {
3695 for (j = 0 ; j < 4; j++) {
3696 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3697 alu.op = ALU_OP2_MULHI_UINT;
3698
3699 alu.dst.sel = tmp0;
3700 alu.dst.chan = j;
3701 alu.dst.write = (j == 1);
3702
3703 alu.src[0].sel = tmp0;
3704 alu.src[0].chan = 0;
3705
3706 if (signed_op) {
3707 alu.src[1].sel = tmp2;
3708 alu.src[1].chan = 1;
3709 } else {
3710 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3711 }
3712 alu.last = (j == 3);
3713 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3714 return r;
3715 }
3716 } else {
3717 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3718 alu.op = ALU_OP2_MULHI_UINT;
3719
3720 alu.dst.sel = tmp0;
3721 alu.dst.chan = 1;
3722 alu.dst.write = 1;
3723
3724 alu.src[0].sel = tmp0;
3725 alu.src[0].chan = 0;
3726
3727 if (signed_op) {
3728 alu.src[1].sel = tmp2;
3729 alu.src[1].chan = 1;
3730 } else {
3731 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3732 }
3733
3734 alu.last = 1;
3735 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3736 return r;
3737 }
3738
3739 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
3740 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3741 alu.op = ALU_OP3_CNDE_INT;
3742 alu.is_op3 = 1;
3743
3744 alu.dst.sel = tmp0;
3745 alu.dst.chan = 2;
3746 alu.dst.write = 1;
3747
3748 alu.src[0].sel = tmp0;
3749 alu.src[0].chan = 1;
3750 alu.src[1].sel = tmp0;
3751 alu.src[1].chan = 3;
3752 alu.src[2].sel = tmp0;
3753 alu.src[2].chan = 2;
3754
3755 alu.last = 1;
3756 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3757 return r;
3758
3759 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
3760 if (ctx->bc->chip_class == CAYMAN) {
3761 for (j = 0 ; j < 4; j++) {
3762 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3763 alu.op = ALU_OP2_MULHI_UINT;
3764
3765 alu.dst.sel = tmp0;
3766 alu.dst.chan = j;
3767 alu.dst.write = (j == 3);
3768
3769 alu.src[0].sel = tmp0;
3770 alu.src[0].chan = 2;
3771
3772 alu.src[1].sel = tmp0;
3773 alu.src[1].chan = 0;
3774
3775 alu.last = (j == 3);
3776 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3777 return r;
3778 }
3779 } else {
3780 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3781 alu.op = ALU_OP2_MULHI_UINT;
3782
3783 alu.dst.sel = tmp0;
3784 alu.dst.chan = 3;
3785 alu.dst.write = 1;
3786
3787 alu.src[0].sel = tmp0;
3788 alu.src[0].chan = 2;
3789
3790 alu.src[1].sel = tmp0;
3791 alu.src[1].chan = 0;
3792
3793 alu.last = 1;
3794 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3795 return r;
3796 }
3797
3798 /* 7. tmp1.x = tmp0.x - tmp0.w */
3799 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3800 alu.op = ALU_OP2_SUB_INT;
3801
3802 alu.dst.sel = tmp1;
3803 alu.dst.chan = 0;
3804 alu.dst.write = 1;
3805
3806 alu.src[0].sel = tmp0;
3807 alu.src[0].chan = 0;
3808 alu.src[1].sel = tmp0;
3809 alu.src[1].chan = 3;
3810
3811 alu.last = 1;
3812 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3813 return r;
3814
3815 /* 8. tmp1.y = tmp0.x + tmp0.w */
3816 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3817 alu.op = ALU_OP2_ADD_INT;
3818
3819 alu.dst.sel = tmp1;
3820 alu.dst.chan = 1;
3821 alu.dst.write = 1;
3822
3823 alu.src[0].sel = tmp0;
3824 alu.src[0].chan = 0;
3825 alu.src[1].sel = tmp0;
3826 alu.src[1].chan = 3;
3827
3828 alu.last = 1;
3829 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3830 return r;
3831
3832 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
3833 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3834 alu.op = ALU_OP3_CNDE_INT;
3835 alu.is_op3 = 1;
3836
3837 alu.dst.sel = tmp0;
3838 alu.dst.chan = 0;
3839 alu.dst.write = 1;
3840
3841 alu.src[0].sel = tmp0;
3842 alu.src[0].chan = 1;
3843 alu.src[1].sel = tmp1;
3844 alu.src[1].chan = 1;
3845 alu.src[2].sel = tmp1;
3846 alu.src[2].chan = 0;
3847
3848 alu.last = 1;
3849 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3850 return r;
3851
3852 /* 10. tmp0.z = hi(tmp0.x * src1) = q */
3853 if (ctx->bc->chip_class == CAYMAN) {
3854 for (j = 0 ; j < 4; j++) {
3855 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3856 alu.op = ALU_OP2_MULHI_UINT;
3857
3858 alu.dst.sel = tmp0;
3859 alu.dst.chan = j;
3860 alu.dst.write = (j == 2);
3861
3862 alu.src[0].sel = tmp0;
3863 alu.src[0].chan = 0;
3864
3865 if (signed_op) {
3866 alu.src[1].sel = tmp2;
3867 alu.src[1].chan = 0;
3868 } else {
3869 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3870 }
3871
3872 alu.last = (j == 3);
3873 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3874 return r;
3875 }
3876 } else {
3877 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3878 alu.op = ALU_OP2_MULHI_UINT;
3879
3880 alu.dst.sel = tmp0;
3881 alu.dst.chan = 2;
3882 alu.dst.write = 1;
3883
3884 alu.src[0].sel = tmp0;
3885 alu.src[0].chan = 0;
3886
3887 if (signed_op) {
3888 alu.src[1].sel = tmp2;
3889 alu.src[1].chan = 0;
3890 } else {
3891 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3892 }
3893
3894 alu.last = 1;
3895 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3896 return r;
3897 }
3898
3899 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
3900 if (ctx->bc->chip_class == CAYMAN) {
3901 for (j = 0 ; j < 4; j++) {
3902 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3903 alu.op = ALU_OP2_MULLO_UINT;
3904
3905 alu.dst.sel = tmp0;
3906 alu.dst.chan = j;
3907 alu.dst.write = (j == 1);
3908
3909 if (signed_op) {
3910 alu.src[0].sel = tmp2;
3911 alu.src[0].chan = 1;
3912 } else {
3913 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3914 }
3915
3916 alu.src[1].sel = tmp0;
3917 alu.src[1].chan = 2;
3918
3919 alu.last = (j == 3);
3920 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3921 return r;
3922 }
3923 } else {
3924 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3925 alu.op = ALU_OP2_MULLO_UINT;
3926
3927 alu.dst.sel = tmp0;
3928 alu.dst.chan = 1;
3929 alu.dst.write = 1;
3930
3931 if (signed_op) {
3932 alu.src[0].sel = tmp2;
3933 alu.src[0].chan = 1;
3934 } else {
3935 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3936 }
3937
3938 alu.src[1].sel = tmp0;
3939 alu.src[1].chan = 2;
3940
3941 alu.last = 1;
3942 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3943 return r;
3944 }
3945
3946 /* 12. tmp0.w = src1 - tmp0.y = r */
3947 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3948 alu.op = ALU_OP2_SUB_INT;
3949
3950 alu.dst.sel = tmp0;
3951 alu.dst.chan = 3;
3952 alu.dst.write = 1;
3953
3954 if (signed_op) {
3955 alu.src[0].sel = tmp2;
3956 alu.src[0].chan = 0;
3957 } else {
3958 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3959 }
3960
3961 alu.src[1].sel = tmp0;
3962 alu.src[1].chan = 1;
3963
3964 alu.last = 1;
3965 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3966 return r;
3967
3968 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
3969 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3970 alu.op = ALU_OP2_SETGE_UINT;
3971
3972 alu.dst.sel = tmp1;
3973 alu.dst.chan = 0;
3974 alu.dst.write = 1;
3975
3976 alu.src[0].sel = tmp0;
3977 alu.src[0].chan = 3;
3978 if (signed_op) {
3979 alu.src[1].sel = tmp2;
3980 alu.src[1].chan = 1;
3981 } else {
3982 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3983 }
3984
3985 alu.last = 1;
3986 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3987 return r;
3988
3989 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
3990 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3991 alu.op = ALU_OP2_SETGE_UINT;
3992
3993 alu.dst.sel = tmp1;
3994 alu.dst.chan = 1;
3995 alu.dst.write = 1;
3996
3997 if (signed_op) {
3998 alu.src[0].sel = tmp2;
3999 alu.src[0].chan = 0;
4000 } else {
4001 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4002 }
4003
4004 alu.src[1].sel = tmp0;
4005 alu.src[1].chan = 1;
4006
4007 alu.last = 1;
4008 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4009 return r;
4010
4011 if (mod) { /* UMOD */
4012
4013 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
4014 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4015 alu.op = ALU_OP2_SUB_INT;
4016
4017 alu.dst.sel = tmp1;
4018 alu.dst.chan = 2;
4019 alu.dst.write = 1;
4020
4021 alu.src[0].sel = tmp0;
4022 alu.src[0].chan = 3;
4023
4024 if (signed_op) {
4025 alu.src[1].sel = tmp2;
4026 alu.src[1].chan = 1;
4027 } else {
4028 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4029 }
4030
4031 alu.last = 1;
4032 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4033 return r;
4034
4035 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
4036 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4037 alu.op = ALU_OP2_ADD_INT;
4038
4039 alu.dst.sel = tmp1;
4040 alu.dst.chan = 3;
4041 alu.dst.write = 1;
4042
4043 alu.src[0].sel = tmp0;
4044 alu.src[0].chan = 3;
4045 if (signed_op) {
4046 alu.src[1].sel = tmp2;
4047 alu.src[1].chan = 1;
4048 } else {
4049 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4050 }
4051
4052 alu.last = 1;
4053 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4054 return r;
4055
4056 } else { /* UDIV */
4057
4058 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
4059 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4060 alu.op = ALU_OP2_ADD_INT;
4061
4062 alu.dst.sel = tmp1;
4063 alu.dst.chan = 2;
4064 alu.dst.write = 1;
4065
4066 alu.src[0].sel = tmp0;
4067 alu.src[0].chan = 2;
4068 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
4069
4070 alu.last = 1;
4071 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4072 return r;
4073
4074 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
4075 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4076 alu.op = ALU_OP2_ADD_INT;
4077
4078 alu.dst.sel = tmp1;
4079 alu.dst.chan = 3;
4080 alu.dst.write = 1;
4081
4082 alu.src[0].sel = tmp0;
4083 alu.src[0].chan = 2;
4084 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
4085
4086 alu.last = 1;
4087 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4088 return r;
4089
4090 }
4091
4092 /* 17. tmp1.x = tmp1.x & tmp1.y */
4093 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4094 alu.op = ALU_OP2_AND_INT;
4095
4096 alu.dst.sel = tmp1;
4097 alu.dst.chan = 0;
4098 alu.dst.write = 1;
4099
4100 alu.src[0].sel = tmp1;
4101 alu.src[0].chan = 0;
4102 alu.src[1].sel = tmp1;
4103 alu.src[1].chan = 1;
4104
4105 alu.last = 1;
4106 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4107 return r;
4108
4109 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
4110 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
4111 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4112 alu.op = ALU_OP3_CNDE_INT;
4113 alu.is_op3 = 1;
4114
4115 alu.dst.sel = tmp0;
4116 alu.dst.chan = 2;
4117 alu.dst.write = 1;
4118
4119 alu.src[0].sel = tmp1;
4120 alu.src[0].chan = 0;
4121 alu.src[1].sel = tmp0;
4122 alu.src[1].chan = mod ? 3 : 2;
4123 alu.src[2].sel = tmp1;
4124 alu.src[2].chan = 2;
4125
4126 alu.last = 1;
4127 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4128 return r;
4129
4130 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
4131 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4132 alu.op = ALU_OP3_CNDE_INT;
4133 alu.is_op3 = 1;
4134
4135 if (signed_op) {
4136 alu.dst.sel = tmp0;
4137 alu.dst.chan = 2;
4138 alu.dst.write = 1;
4139 } else {
4140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4141 }
4142
4143 alu.src[0].sel = tmp1;
4144 alu.src[0].chan = 1;
4145 alu.src[1].sel = tmp1;
4146 alu.src[1].chan = 3;
4147 alu.src[2].sel = tmp0;
4148 alu.src[2].chan = 2;
4149
4150 alu.last = 1;
4151 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4152 return r;
4153
4154 if (signed_op) {
4155
4156 /* fix the sign of the result */
4157
4158 if (mod) {
4159
4160 /* tmp0.x = -tmp0.z */
4161 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4162 alu.op = ALU_OP2_SUB_INT;
4163
4164 alu.dst.sel = tmp0;
4165 alu.dst.chan = 0;
4166 alu.dst.write = 1;
4167
4168 alu.src[0].sel = V_SQ_ALU_SRC_0;
4169 alu.src[1].sel = tmp0;
4170 alu.src[1].chan = 2;
4171
4172 alu.last = 1;
4173 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4174 return r;
4175
4176 /* sign of the remainder is the same as the sign of src0 */
4177 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
4178 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4179 alu.op = ALU_OP3_CNDGE_INT;
4180 alu.is_op3 = 1;
4181
4182 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4183
4184 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4185 alu.src[1].sel = tmp0;
4186 alu.src[1].chan = 2;
4187 alu.src[2].sel = tmp0;
4188 alu.src[2].chan = 0;
4189
4190 alu.last = 1;
4191 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4192 return r;
4193
4194 } else {
4195
4196 /* tmp0.x = -tmp0.z */
4197 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4198 alu.op = ALU_OP2_SUB_INT;
4199
4200 alu.dst.sel = tmp0;
4201 alu.dst.chan = 0;
4202 alu.dst.write = 1;
4203
4204 alu.src[0].sel = V_SQ_ALU_SRC_0;
4205 alu.src[1].sel = tmp0;
4206 alu.src[1].chan = 2;
4207
4208 alu.last = 1;
4209 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4210 return r;
4211
4212 /* fix the quotient sign (same as the sign of src0*src1) */
4213 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
4214 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4215 alu.op = ALU_OP3_CNDGE_INT;
4216 alu.is_op3 = 1;
4217
4218 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4219
4220 alu.src[0].sel = tmp2;
4221 alu.src[0].chan = 2;
4222 alu.src[1].sel = tmp0;
4223 alu.src[1].chan = 2;
4224 alu.src[2].sel = tmp0;
4225 alu.src[2].chan = 0;
4226
4227 alu.last = 1;
4228 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4229 return r;
4230 }
4231 }
4232 }
4233 return 0;
4234 }
4235
4236 static int tgsi_udiv(struct r600_shader_ctx *ctx)
4237 {
4238 return tgsi_divmod(ctx, 0, 0);
4239 }
4240
4241 static int tgsi_umod(struct r600_shader_ctx *ctx)
4242 {
4243 return tgsi_divmod(ctx, 1, 0);
4244 }
4245
4246 static int tgsi_idiv(struct r600_shader_ctx *ctx)
4247 {
4248 return tgsi_divmod(ctx, 0, 1);
4249 }
4250
4251 static int tgsi_imod(struct r600_shader_ctx *ctx)
4252 {
4253 return tgsi_divmod(ctx, 1, 1);
4254 }
4255
4256
4257 static int tgsi_f2i(struct r600_shader_ctx *ctx)
4258 {
4259 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4260 struct r600_bytecode_alu alu;
4261 int i, r;
4262 unsigned write_mask = inst->Dst[0].Register.WriteMask;
4263 int last_inst = tgsi_last_instruction(write_mask);
4264
4265 for (i = 0; i < 4; i++) {
4266 if (!(write_mask & (1<<i)))
4267 continue;
4268
4269 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4270 alu.op = ALU_OP1_TRUNC;
4271
4272 alu.dst.sel = ctx->temp_reg;
4273 alu.dst.chan = i;
4274 alu.dst.write = 1;
4275
4276 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4277 if (i == last_inst)
4278 alu.last = 1;
4279 r = r600_bytecode_add_alu(ctx->bc, &alu);
4280 if (r)
4281 return r;
4282 }
4283
4284 for (i = 0; i < 4; i++) {
4285 if (!(write_mask & (1<<i)))
4286 continue;
4287
4288 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4289 alu.op = ctx->inst_info->op;
4290
4291 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4292
4293 alu.src[0].sel = ctx->temp_reg;
4294 alu.src[0].chan = i;
4295
4296 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT)
4297 alu.last = 1;
4298 r = r600_bytecode_add_alu(ctx->bc, &alu);
4299 if (r)
4300 return r;
4301 }
4302
4303 return 0;
4304 }
4305
4306 static int tgsi_iabs(struct r600_shader_ctx *ctx)
4307 {
4308 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4309 struct r600_bytecode_alu alu;
4310 int i, r;
4311 unsigned write_mask = inst->Dst[0].Register.WriteMask;
4312 int last_inst = tgsi_last_instruction(write_mask);
4313
4314 /* tmp = -src */
4315 for (i = 0; i < 4; i++) {
4316 if (!(write_mask & (1<<i)))
4317 continue;
4318
4319 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4320 alu.op = ALU_OP2_SUB_INT;
4321
4322 alu.dst.sel = ctx->temp_reg;
4323 alu.dst.chan = i;
4324 alu.dst.write = 1;
4325
4326 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4327 alu.src[0].sel = V_SQ_ALU_SRC_0;
4328
4329 if (i == last_inst)
4330 alu.last = 1;
4331 r = r600_bytecode_add_alu(ctx->bc, &alu);
4332 if (r)
4333 return r;
4334 }
4335
4336 /* dst = (src >= 0 ? src : tmp) */
4337 for (i = 0; i < 4; i++) {
4338 if (!(write_mask & (1<<i)))
4339 continue;
4340
4341 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4342 alu.op = ALU_OP3_CNDGE_INT;
4343 alu.is_op3 = 1;
4344 alu.dst.write = 1;
4345
4346 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4347
4348 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4349 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4350 alu.src[2].sel = ctx->temp_reg;
4351 alu.src[2].chan = i;
4352
4353 if (i == last_inst)
4354 alu.last = 1;
4355 r = r600_bytecode_add_alu(ctx->bc, &alu);
4356 if (r)
4357 return r;
4358 }
4359 return 0;
4360 }
4361
4362 static int tgsi_issg(struct r600_shader_ctx *ctx)
4363 {
4364 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4365 struct r600_bytecode_alu alu;
4366 int i, r;
4367 unsigned write_mask = inst->Dst[0].Register.WriteMask;
4368 int last_inst = tgsi_last_instruction(write_mask);
4369
4370 /* tmp = (src >= 0 ? src : -1) */
4371 for (i = 0; i < 4; i++) {
4372 if (!(write_mask & (1<<i)))
4373 continue;
4374
4375 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4376 alu.op = ALU_OP3_CNDGE_INT;
4377 alu.is_op3 = 1;
4378
4379 alu.dst.sel = ctx->temp_reg;
4380 alu.dst.chan = i;
4381 alu.dst.write = 1;
4382
4383 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4384 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4385 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
4386
4387 if (i == last_inst)
4388 alu.last = 1;
4389 r = r600_bytecode_add_alu(ctx->bc, &alu);
4390 if (r)
4391 return r;
4392 }
4393
4394 /* dst = (tmp > 0 ? 1 : tmp) */
4395 for (i = 0; i < 4; i++) {
4396 if (!(write_mask & (1<<i)))
4397 continue;
4398
4399 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4400 alu.op = ALU_OP3_CNDGT_INT;
4401 alu.is_op3 = 1;
4402 alu.dst.write = 1;
4403
4404 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4405
4406 alu.src[0].sel = ctx->temp_reg;
4407 alu.src[0].chan = i;
4408
4409 alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
4410
4411 alu.src[2].sel = ctx->temp_reg;
4412 alu.src[2].chan = i;
4413
4414 if (i == last_inst)
4415 alu.last = 1;
4416 r = r600_bytecode_add_alu(ctx->bc, &alu);
4417 if (r)
4418 return r;
4419 }
4420 return 0;
4421 }
4422
4423
4424
4425 static int tgsi_ssg(struct r600_shader_ctx *ctx)
4426 {
4427 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4428 struct r600_bytecode_alu alu;
4429 int i, r;
4430
4431 /* tmp = (src > 0 ? 1 : src) */
4432 for (i = 0; i < 4; i++) {
4433 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4434 alu.op = ALU_OP3_CNDGT;
4435 alu.is_op3 = 1;
4436
4437 alu.dst.sel = ctx->temp_reg;
4438 alu.dst.chan = i;
4439
4440 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4441 alu.src[1].sel = V_SQ_ALU_SRC_1;
4442 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
4443
4444 if (i == 3)
4445 alu.last = 1;
4446 r = r600_bytecode_add_alu(ctx->bc, &alu);
4447 if (r)
4448 return r;
4449 }
4450
4451 /* dst = (-tmp > 0 ? -1 : tmp) */
4452 for (i = 0; i < 4; i++) {
4453 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4454 alu.op = ALU_OP3_CNDGT;
4455 alu.is_op3 = 1;
4456 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4457
4458 alu.src[0].sel = ctx->temp_reg;
4459 alu.src[0].chan = i;
4460 alu.src[0].neg = 1;
4461
4462 alu.src[1].sel = V_SQ_ALU_SRC_1;
4463 alu.src[1].neg = 1;
4464
4465 alu.src[2].sel = ctx->temp_reg;
4466 alu.src[2].chan = i;
4467
4468 if (i == 3)
4469 alu.last = 1;
4470 r = r600_bytecode_add_alu(ctx->bc, &alu);
4471 if (r)
4472 return r;
4473 }
4474 return 0;
4475 }
4476
4477 static int tgsi_bfi(struct r600_shader_ctx *ctx)
4478 {
4479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4480 struct r600_bytecode_alu alu;
4481 int i, r, t1, t2;
4482
4483 unsigned write_mask = inst->Dst[0].Register.WriteMask;
4484 int last_inst = tgsi_last_instruction(write_mask);
4485
4486 t1 = ctx->temp_reg;
4487
4488 for (i = 0; i < 4; i++) {
4489 if (!(write_mask & (1<<i)))
4490 continue;
4491
4492 /* create mask tmp */
4493 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4494 alu.op = ALU_OP2_BFM_INT;
4495 alu.dst.sel = t1;
4496 alu.dst.chan = i;
4497 alu.dst.write = 1;
4498 alu.last = i == last_inst;
4499
4500 r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
4501 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4502
4503 r = r600_bytecode_add_alu(ctx->bc, &alu);
4504 if (r)
4505 return r;
4506 }
4507
4508 t2 = r600_get_temp(ctx);
4509
4510 for (i = 0; i < 4; i++) {
4511 if (!(write_mask & (1<<i)))
4512 continue;
4513
4514 /* shift insert left */
4515 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4516 alu.op = ALU_OP2_LSHL_INT;
4517 alu.dst.sel = t2;
4518 alu.dst.chan = i;
4519 alu.dst.write = 1;
4520 alu.last = i == last_inst;
4521
4522 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4523 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4524
4525 r = r600_bytecode_add_alu(ctx->bc, &alu);
4526 if (r)
4527 return r;
4528 }
4529
4530 for (i = 0; i < 4; i++) {
4531 if (!(write_mask & (1<<i)))
4532 continue;
4533
4534 /* actual bitfield insert */
4535 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4536 alu.op = ALU_OP3_BFI_INT;
4537 alu.is_op3 = 1;
4538 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4539 alu.dst.chan = i;
4540 alu.dst.write = 1;
4541 alu.last = i == last_inst;
4542
4543 alu.src[0].sel = t1;
4544 alu.src[0].chan = i;
4545 alu.src[1].sel = t2;
4546 alu.src[1].chan = i;
4547 r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
4548
4549 r = r600_bytecode_add_alu(ctx->bc, &alu);
4550 if (r)
4551 return r;
4552 }
4553
4554 return 0;
4555 }
4556
4557 static int tgsi_msb(struct r600_shader_ctx *ctx)
4558 {
4559 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4560 struct r600_bytecode_alu alu;
4561 int i, r, t1, t2;
4562
4563 unsigned write_mask = inst->Dst[0].Register.WriteMask;
4564 int last_inst = tgsi_last_instruction(write_mask);
4565
4566 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
4567 ctx->inst_info->op == ALU_OP1_FFBH_UINT);
4568
4569 t1 = ctx->temp_reg;
4570
4571 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */
4572 for (i = 0; i < 4; i++) {
4573 if (!(write_mask & (1<<i)))
4574 continue;
4575
4576 /* t1 = FFBH_INT / FFBH_UINT */
4577 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4578 alu.op = ctx->inst_info->op;
4579 alu.dst.sel = t1;
4580 alu.dst.chan = i;
4581 alu.dst.write = 1;
4582 alu.last = i == last_inst;
4583
4584 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4585
4586 r = r600_bytecode_add_alu(ctx->bc, &alu);
4587 if (r)
4588 return r;
4589 }
4590
4591 t2 = r600_get_temp(ctx);
4592
4593 for (i = 0; i < 4; i++) {
4594 if (!(write_mask & (1<<i)))
4595 continue;
4596
4597 /* t2 = 31 - t1 */
4598 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4599 alu.op = ALU_OP2_SUB_INT;
4600 alu.dst.sel = t2;
4601 alu.dst.chan = i;
4602 alu.dst.write = 1;
4603 alu.last = i == last_inst;
4604
4605 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
4606 alu.src[0].value = 31;
4607 alu.src[1].sel = t1;
4608 alu.src[1].chan = i;
4609
4610 r = r600_bytecode_add_alu(ctx->bc, &alu);
4611 if (r)
4612 return r;
4613 }
4614
4615 for (i = 0; i < 4; i++) {
4616 if (!(write_mask & (1<<i)))
4617 continue;
4618
4619 /* result = t1 >= 0 ? t2 : t1 */
4620 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4621 alu.op = ALU_OP3_CNDGE_INT;
4622 alu.is_op3 = 1;
4623 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4624 alu.dst.chan = i;
4625 alu.dst.write = 1;
4626 alu.last = i == last_inst;
4627
4628 alu.src[0].sel = t1;
4629 alu.src[0].chan = i;
4630 alu.src[1].sel = t2;
4631 alu.src[1].chan = i;
4632 alu.src[2].sel = t1;
4633 alu.src[2].chan = i;
4634
4635 r = r600_bytecode_add_alu(ctx->bc, &alu);
4636 if (r)
4637 return r;
4638 }
4639
4640 return 0;
4641 }
4642
4643 static int tgsi_interp_egcm(struct r600_shader_ctx *ctx)
4644 {
4645 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4646 struct r600_bytecode_alu alu;
4647 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
4648 unsigned location;
4649 int input;
4650
4651 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
4652
4653 input = inst->Src[0].Register.Index;
4654
4655 /* Interpolators have been marked for use already by allocate_system_value_inputs */
4656 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
4657 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4658 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */
4659 }
4660 else {
4661 location = TGSI_INTERPOLATE_LOC_CENTROID;
4662 }
4663
4664 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location);
4665 if (k < 0)
4666 k = 0;
4667 interp_gpr = ctx->eg_interpolators[k].ij_index / 2;
4668 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2);
4669
4670 /* NOTE: currently offset is not perspective correct */
4671 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
4672 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4673 int sample_gpr = -1;
4674 int gradientsH, gradientsV;
4675 struct r600_bytecode_tex tex;
4676
4677 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4678 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]);
4679 }
4680
4681 gradientsH = r600_get_temp(ctx);
4682 gradientsV = r600_get_temp(ctx);
4683 for (i = 0; i < 2; i++) {
4684 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4685 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V;
4686 tex.src_gpr = interp_gpr;
4687 tex.src_sel_x = interp_base_chan + 0;
4688 tex.src_sel_y = interp_base_chan + 1;
4689 tex.src_sel_z = 0;
4690 tex.src_sel_w = 0;
4691 tex.dst_gpr = i == 0 ? gradientsH : gradientsV;
4692 tex.dst_sel_x = 0;
4693 tex.dst_sel_y = 1;
4694 tex.dst_sel_z = 7;
4695 tex.dst_sel_w = 7;
4696 tex.inst_mod = 1; // Use per pixel gradient calculation
4697 tex.sampler_id = 0;
4698 tex.resource_id = tex.sampler_id;
4699 r = r600_bytecode_add_tex(ctx->bc, &tex);
4700 if (r)
4701 return r;
4702 }
4703
4704 for (i = 0; i < 2; i++) {
4705 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4706 alu.op = ALU_OP3_MULADD;
4707 alu.is_op3 = 1;
4708 alu.src[0].sel = gradientsH;
4709 alu.src[0].chan = i;
4710 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4711 alu.src[1].sel = sample_gpr;
4712 alu.src[1].chan = 2;
4713 }
4714 else {
4715 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
4716 }
4717 alu.src[2].sel = interp_gpr;
4718 alu.src[2].chan = interp_base_chan + i;
4719 alu.dst.sel = ctx->temp_reg;
4720 alu.dst.chan = i;
4721 alu.last = i == 1;
4722
4723 r = r600_bytecode_add_alu(ctx->bc, &alu);
4724 if (r)
4725 return r;
4726 }
4727
4728 for (i = 0; i < 2; i++) {
4729 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4730 alu.op = ALU_OP3_MULADD;
4731 alu.is_op3 = 1;
4732 alu.src[0].sel = gradientsV;
4733 alu.src[0].chan = i;
4734 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4735 alu.src[1].sel = sample_gpr;
4736 alu.src[1].chan = 3;
4737 }
4738 else {
4739 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
4740 }
4741 alu.src[2].sel = ctx->temp_reg;
4742 alu.src[2].chan = i;
4743 alu.dst.sel = ctx->temp_reg;
4744 alu.dst.chan = i;
4745 alu.last = i == 1;
4746
4747 r = r600_bytecode_add_alu(ctx->bc, &alu);
4748 if (r)
4749 return r;
4750 }
4751 }
4752
4753 tmp = r600_get_temp(ctx);
4754 for (i = 0; i < 8; i++) {
4755 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4756 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY;
4757
4758 alu.dst.sel = tmp;
4759 if ((i > 1 && i < 6)) {
4760 alu.dst.write = 1;
4761 }
4762 else {
4763 alu.dst.write = 0;
4764 }
4765 alu.dst.chan = i % 4;
4766
4767 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
4768 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
4769 alu.src[0].sel = ctx->temp_reg;
4770 alu.src[0].chan = 1 - (i % 2);
4771 } else {
4772 alu.src[0].sel = interp_gpr;
4773 alu.src[0].chan = interp_base_chan + 1 - (i % 2);
4774 }
4775 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
4776 alu.src[1].chan = 0;
4777
4778 alu.last = i % 4 == 3;
4779 alu.bank_swizzle_force = SQ_ALU_VEC_210;
4780
4781 r = r600_bytecode_add_alu(ctx->bc, &alu);
4782 if (r)
4783 return r;
4784 }
4785
4786 // INTERP can't swizzle dst
4787 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4788 for (i = 0; i <= lasti; i++) {
4789 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4790 continue;
4791
4792 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4793 alu.op = ALU_OP1_MOV;
4794 alu.src[0].sel = tmp;
4795 alu.src[0].chan = ctx->src[0].swizzle[i];
4796 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4797 alu.dst.write = 1;
4798 alu.last = i == lasti;
4799 r = r600_bytecode_add_alu(ctx->bc, &alu);
4800 if (r)
4801 return r;
4802 }
4803
4804 return 0;
4805 }
4806
4807
4808 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
4809 {
4810 struct r600_bytecode_alu alu;
4811 int i, r;
4812
4813 for (i = 0; i < 4; i++) {
4814 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4815 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
4816 alu.op = ALU_OP0_NOP;
4817 alu.dst.chan = i;
4818 } else {
4819 alu.op = ALU_OP1_MOV;
4820 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4821 alu.src[0].sel = ctx->temp_reg;
4822 alu.src[0].chan = i;
4823 }
4824 if (i == 3) {
4825 alu.last = 1;
4826 }
4827 r = r600_bytecode_add_alu(ctx->bc, &alu);
4828 if (r)
4829 return r;
4830 }
4831 return 0;
4832 }
4833
4834 static int tgsi_op3(struct r600_shader_ctx *ctx)
4835 {
4836 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4837 struct r600_bytecode_alu alu;
4838 int i, j, r;
4839 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4840
4841 for (i = 0; i < lasti + 1; i++) {
4842 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4843 continue;
4844
4845 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4846 alu.op = ctx->inst_info->op;
4847 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4848 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
4849 }
4850
4851 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4852 alu.dst.chan = i;
4853 alu.dst.write = 1;
4854 alu.is_op3 = 1;
4855 if (i == lasti) {
4856 alu.last = 1;
4857 }
4858 r = r600_bytecode_add_alu(ctx->bc, &alu);
4859 if (r)
4860 return r;
4861 }
4862 return 0;
4863 }
4864
4865 static int tgsi_dp(struct r600_shader_ctx *ctx)
4866 {
4867 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4868 struct r600_bytecode_alu alu;
4869 int i, j, r;
4870
4871 for (i = 0; i < 4; i++) {
4872 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4873 alu.op = ctx->inst_info->op;
4874 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4875 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
4876 }
4877
4878 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4879 alu.dst.chan = i;
4880 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
4881 /* handle some special cases */
4882 switch (ctx->inst_info->tgsi_opcode) {
4883 case TGSI_OPCODE_DP2:
4884 if (i > 1) {
4885 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
4886 alu.src[0].chan = alu.src[1].chan = 0;
4887 }
4888 break;
4889 case TGSI_OPCODE_DP3:
4890 if (i > 2) {
4891 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
4892 alu.src[0].chan = alu.src[1].chan = 0;
4893 }
4894 break;
4895 case TGSI_OPCODE_DPH:
4896 if (i == 3) {
4897 alu.src[0].sel = V_SQ_ALU_SRC_1;
4898 alu.src[0].chan = 0;
4899 alu.src[0].neg = 0;
4900 }
4901 break;
4902 default:
4903 break;
4904 }
4905 if (i == 3) {
4906 alu.last = 1;
4907 }
4908 r = r600_bytecode_add_alu(ctx->bc, &alu);
4909 if (r)
4910 return r;
4911 }
4912 return 0;
4913 }
4914
4915 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
4916 unsigned index)
4917 {
4918 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4919 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
4920 inst->Src[index].Register.File != TGSI_FILE_INPUT &&
4921 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
4922 ctx->src[index].neg || ctx->src[index].abs ||
4923 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY);
4924 }
4925
4926 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
4927 unsigned index)
4928 {
4929 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4930 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
4931 }
4932
4933 static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
4934 {
4935 struct r600_bytecode_vtx vtx;
4936 struct r600_bytecode_alu alu;
4937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4938 int src_gpr, r, i;
4939 int id = tgsi_tex_get_src_gpr(ctx, 1);
4940
4941 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
4942 if (src_requires_loading) {
4943 for (i = 0; i < 4; i++) {
4944 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4945 alu.op = ALU_OP1_MOV;
4946 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4947 alu.dst.sel = ctx->temp_reg;
4948 alu.dst.chan = i;
4949 if (i == 3)
4950 alu.last = 1;
4951 alu.dst.write = 1;
4952 r = r600_bytecode_add_alu(ctx->bc, &alu);
4953 if (r)
4954 return r;
4955 }
4956 src_gpr = ctx->temp_reg;
4957 }
4958
4959 memset(&vtx, 0, sizeof(vtx));
4960 vtx.op = FETCH_OP_VFETCH;
4961 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
4962 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
4963 vtx.src_gpr = src_gpr;
4964 vtx.mega_fetch_count = 16;
4965 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4966 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */
4967 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */
4968 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
4969 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
4970 vtx.use_const_fields = 1;
4971
4972 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
4973 return r;
4974
4975 if (ctx->bc->chip_class >= EVERGREEN)
4976 return 0;
4977
4978 for (i = 0; i < 4; i++) {
4979 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4980 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4981 continue;
4982
4983 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4984 alu.op = ALU_OP2_AND_INT;
4985
4986 alu.dst.chan = i;
4987 alu.dst.sel = vtx.dst_gpr;
4988 alu.dst.write = 1;
4989
4990 alu.src[0].sel = vtx.dst_gpr;
4991 alu.src[0].chan = i;
4992
4993 alu.src[1].sel = 512 + (id * 2);
4994 alu.src[1].chan = i % 4;
4995 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
4996
4997 if (i == lasti)
4998 alu.last = 1;
4999 r = r600_bytecode_add_alu(ctx->bc, &alu);
5000 if (r)
5001 return r;
5002 }
5003
5004 if (inst->Dst[0].Register.WriteMask & 3) {
5005 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5006 alu.op = ALU_OP2_OR_INT;
5007
5008 alu.dst.chan = 3;
5009 alu.dst.sel = vtx.dst_gpr;
5010 alu.dst.write = 1;
5011
5012 alu.src[0].sel = vtx.dst_gpr;
5013 alu.src[0].chan = 3;
5014
5015 alu.src[1].sel = 512 + (id * 2) + 1;
5016 alu.src[1].chan = 0;
5017 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
5018
5019 alu.last = 1;
5020 r = r600_bytecode_add_alu(ctx->bc, &alu);
5021 if (r)
5022 return r;
5023 }
5024 return 0;
5025 }
5026
5027 static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
5028 {
5029 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5030 struct r600_bytecode_alu alu;
5031 int r;
5032 int id = tgsi_tex_get_src_gpr(ctx, 1);
5033
5034 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5035 alu.op = ALU_OP1_MOV;
5036
5037 if (ctx->bc->chip_class >= EVERGREEN) {
5038 alu.src[0].sel = 512 + (id / 4);
5039 alu.src[0].chan = id % 4;
5040 } else {
5041 /* r600 we have them at channel 2 of the second dword */
5042 alu.src[0].sel = 512 + (id * 2) + 1;
5043 alu.src[0].chan = 1;
5044 }
5045 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
5046 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
5047 alu.last = 1;
5048 r = r600_bytecode_add_alu(ctx->bc, &alu);
5049 if (r)
5050 return r;
5051 return 0;
5052 }
5053
5054 static int tgsi_tex(struct r600_shader_ctx *ctx)
5055 {
5056 static float one_point_five = 1.5f;
5057 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5058 struct r600_bytecode_tex tex;
5059 struct r600_bytecode_alu alu;
5060 unsigned src_gpr;
5061 int r, i, j;
5062 int opcode;
5063 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing &&
5064 inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
5065 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
5066 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
5067
5068 bool txf_add_offsets = inst->Texture.NumOffsets &&
5069 inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
5070 inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
5071
5072 /* Texture fetch instructions can only use gprs as source.
5073 * Also they cannot negate the source or take the absolute value */
5074 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
5075 tgsi_tex_src_requires_loading(ctx, 0)) ||
5076 read_compressed_msaa || txf_add_offsets;
5077
5078 boolean src_loaded = FALSE;
5079 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
5080 int8_t offset_x = 0, offset_y = 0, offset_z = 0;
5081 boolean has_txq_cube_array_z = false;
5082 unsigned sampler_index_mode;
5083
5084 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
5085 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
5086 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
5087 if (inst->Dst[0].Register.WriteMask & 4) {
5088 ctx->shader->has_txq_cube_array_z_comp = true;
5089 has_txq_cube_array_z = true;
5090 }
5091
5092 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
5093 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
5094 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
5095 inst->Instruction.Opcode == TGSI_OPCODE_TG4)
5096 sampler_src_reg = 2;
5097
5098 /* TGSI moves the sampler to src reg 3 for TXD */
5099 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
5100 sampler_src_reg = 3;
5101
5102 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
5103 if (sampler_index_mode)
5104 ctx->shader->uses_index_registers = true;
5105
5106 src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
5107
5108 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
5109 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
5110 ctx->shader->uses_tex_buffers = true;
5111 return r600_do_buffer_txq(ctx);
5112 }
5113 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
5114 if (ctx->bc->chip_class < EVERGREEN)
5115 ctx->shader->uses_tex_buffers = true;
5116 return do_vtx_fetch_inst(ctx, src_requires_loading);
5117 }
5118 }
5119
5120 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
5121 int out_chan;
5122 /* Add perspective divide */
5123 if (ctx->bc->chip_class == CAYMAN) {
5124 out_chan = 2;
5125 for (i = 0; i < 3; i++) {
5126 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5127 alu.op = ALU_OP1_RECIP_IEEE;
5128 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5129
5130 alu.dst.sel = ctx->temp_reg;
5131 alu.dst.chan = i;
5132 if (i == 2)
5133 alu.last = 1;
5134 if (out_chan == i)
5135 alu.dst.write = 1;
5136 r = r600_bytecode_add_alu(ctx->bc, &alu);
5137 if (r)
5138 return r;
5139 }
5140
5141 } else {
5142 out_chan = 3;
5143 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5144 alu.op = ALU_OP1_RECIP_IEEE;
5145 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5146
5147 alu.dst.sel = ctx->temp_reg;
5148 alu.dst.chan = out_chan;
5149 alu.last = 1;
5150 alu.dst.write = 1;
5151 r = r600_bytecode_add_alu(ctx->bc, &alu);
5152 if (r)
5153 return r;
5154 }
5155
5156 for (i = 0; i < 3; i++) {
5157 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5158 alu.op = ALU_OP2_MUL;
5159 alu.src[0].sel = ctx->temp_reg;
5160 alu.src[0].chan = out_chan;
5161 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5162 alu.dst.sel = ctx->temp_reg;
5163 alu.dst.chan = i;
5164 alu.dst.write = 1;
5165 r = r600_bytecode_add_alu(ctx->bc, &alu);
5166 if (r)
5167 return r;
5168 }
5169 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5170 alu.op = ALU_OP1_MOV;
5171 alu.src[0].sel = V_SQ_ALU_SRC_1;
5172 alu.src[0].chan = 0;
5173 alu.dst.sel = ctx->temp_reg;
5174 alu.dst.chan = 3;
5175 alu.last = 1;
5176 alu.dst.write = 1;
5177 r = r600_bytecode_add_alu(ctx->bc, &alu);
5178 if (r)
5179 return r;
5180 src_loaded = TRUE;
5181 src_gpr = ctx->temp_reg;
5182 }
5183
5184
5185 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
5186 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
5187 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
5188 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
5189 inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
5190 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
5191
5192 static const unsigned src0_swizzle[] = {2, 2, 0, 1};
5193 static const unsigned src1_swizzle[] = {1, 0, 2, 2};
5194
5195 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5196 for (i = 0; i < 4; i++) {
5197 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5198 alu.op = ALU_OP2_CUBE;
5199 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
5200 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
5201 alu.dst.sel = ctx->temp_reg;
5202 alu.dst.chan = i;
5203 if (i == 3)
5204 alu.last = 1;
5205 alu.dst.write = 1;
5206 r = r600_bytecode_add_alu(ctx->bc, &alu);
5207 if (r)
5208 return r;
5209 }
5210
5211 /* tmp1.z = RCP_e(|tmp1.z|) */
5212 if (ctx->bc->chip_class == CAYMAN) {
5213 for (i = 0; i < 3; i++) {
5214 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5215 alu.op = ALU_OP1_RECIP_IEEE;
5216 alu.src[0].sel = ctx->temp_reg;
5217 alu.src[0].chan = 2;
5218 alu.src[0].abs = 1;
5219 alu.dst.sel = ctx->temp_reg;
5220 alu.dst.chan = i;
5221 if (i == 2)
5222 alu.dst.write = 1;
5223 if (i == 2)
5224 alu.last = 1;
5225 r = r600_bytecode_add_alu(ctx->bc, &alu);
5226 if (r)
5227 return r;
5228 }
5229 } else {
5230 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5231 alu.op = ALU_OP1_RECIP_IEEE;
5232 alu.src[0].sel = ctx->temp_reg;
5233 alu.src[0].chan = 2;
5234 alu.src[0].abs = 1;
5235 alu.dst.sel = ctx->temp_reg;
5236 alu.dst.chan = 2;
5237 alu.dst.write = 1;
5238 alu.last = 1;
5239 r = r600_bytecode_add_alu(ctx->bc, &alu);
5240 if (r)
5241 return r;
5242 }
5243
5244 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5245 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5246 * muladd has no writemask, have to use another temp
5247 */
5248 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5249 alu.op = ALU_OP3_MULADD;
5250 alu.is_op3 = 1;
5251
5252 alu.src[0].sel = ctx->temp_reg;
5253 alu.src[0].chan = 0;
5254 alu.src[1].sel = ctx->temp_reg;
5255 alu.src[1].chan = 2;
5256
5257 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
5258 alu.src[2].chan = 0;
5259 alu.src[2].value = *(uint32_t *)&one_point_five;
5260
5261 alu.dst.sel = ctx->temp_reg;
5262 alu.dst.chan = 0;
5263 alu.dst.write = 1;
5264
5265 r = r600_bytecode_add_alu(ctx->bc, &alu);
5266 if (r)
5267 return r;
5268
5269 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5270 alu.op = ALU_OP3_MULADD;
5271 alu.is_op3 = 1;
5272
5273 alu.src[0].sel = ctx->temp_reg;
5274 alu.src[0].chan = 1;
5275 alu.src[1].sel = ctx->temp_reg;
5276 alu.src[1].chan = 2;
5277
5278 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
5279 alu.src[2].chan = 0;
5280 alu.src[2].value = *(uint32_t *)&one_point_five;
5281
5282 alu.dst.sel = ctx->temp_reg;
5283 alu.dst.chan = 1;
5284 alu.dst.write = 1;
5285
5286 alu.last = 1;
5287 r = r600_bytecode_add_alu(ctx->bc, &alu);
5288 if (r)
5289 return r;
5290 /* write initial compare value into Z component
5291 - W src 0 for shadow cube
5292 - X src 1 for shadow cube array */
5293 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
5294 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
5295 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5296 alu.op = ALU_OP1_MOV;
5297 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
5298 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
5299 else
5300 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5301 alu.dst.sel = ctx->temp_reg;
5302 alu.dst.chan = 2;
5303 alu.dst.write = 1;
5304 alu.last = 1;
5305 r = r600_bytecode_add_alu(ctx->bc, &alu);
5306 if (r)
5307 return r;
5308 }
5309
5310 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
5311 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
5312 if (ctx->bc->chip_class >= EVERGREEN) {
5313 int mytmp = r600_get_temp(ctx);
5314 static const float eight = 8.0f;
5315 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5316 alu.op = ALU_OP1_MOV;
5317 alu.src[0].sel = ctx->temp_reg;
5318 alu.src[0].chan = 3;
5319 alu.dst.sel = mytmp;
5320 alu.dst.chan = 0;
5321 alu.dst.write = 1;
5322 alu.last = 1;
5323 r = r600_bytecode_add_alu(ctx->bc, &alu);
5324 if (r)
5325 return r;
5326
5327 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */
5328 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5329 alu.op = ALU_OP3_MULADD;
5330 alu.is_op3 = 1;
5331 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5332 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5333 alu.src[1].chan = 0;
5334 alu.src[1].value = *(uint32_t *)&eight;
5335 alu.src[2].sel = mytmp;
5336 alu.src[2].chan = 0;
5337 alu.dst.sel = ctx->temp_reg;
5338 alu.dst.chan = 3;
5339 alu.dst.write = 1;
5340 alu.last = 1;
5341 r = r600_bytecode_add_alu(ctx->bc, &alu);
5342 if (r)
5343 return r;
5344 } else if (ctx->bc->chip_class < EVERGREEN) {
5345 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
5346 tex.op = FETCH_OP_SET_CUBEMAP_INDEX;
5347 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5348 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
5349 tex.src_gpr = r600_get_temp(ctx);
5350 tex.src_sel_x = 0;
5351 tex.src_sel_y = 0;
5352 tex.src_sel_z = 0;
5353 tex.src_sel_w = 0;
5354 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
5355 tex.coord_type_x = 1;
5356 tex.coord_type_y = 1;
5357 tex.coord_type_z = 1;
5358 tex.coord_type_w = 1;
5359 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5360 alu.op = ALU_OP1_MOV;
5361 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5362 alu.dst.sel = tex.src_gpr;
5363 alu.dst.chan = 0;
5364 alu.last = 1;
5365 alu.dst.write = 1;
5366 r = r600_bytecode_add_alu(ctx->bc, &alu);
5367 if (r)
5368 return r;
5369
5370 r = r600_bytecode_add_tex(ctx->bc, &tex);
5371 if (r)
5372 return r;
5373 }
5374
5375 }
5376
5377 /* for cube forms of lod and bias we need to route things */
5378 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
5379 inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
5380 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
5381 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
5382 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5383 alu.op = ALU_OP1_MOV;
5384 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
5385 inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
5386 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
5387 else
5388 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
5389 alu.dst.sel = ctx->temp_reg;
5390 alu.dst.chan = 2;
5391 alu.last = 1;
5392 alu.dst.write = 1;
5393 r = r600_bytecode_add_alu(ctx->bc, &alu);
5394 if (r)
5395 return r;
5396 }
5397
5398 src_loaded = TRUE;
5399 src_gpr = ctx->temp_reg;
5400 }
5401
5402 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
5403 int temp_h = 0, temp_v = 0;
5404 int start_val = 0;
5405
5406 /* if we've already loaded the src (i.e. CUBE don't reload it). */
5407 if (src_loaded == TRUE)
5408 start_val = 1;
5409 else
5410 src_loaded = TRUE;
5411 for (i = start_val; i < 3; i++) {
5412 int treg = r600_get_temp(ctx);
5413
5414 if (i == 0)
5415 src_gpr = treg;
5416 else if (i == 1)
5417 temp_h = treg;
5418 else
5419 temp_v = treg;
5420
5421 for (j = 0; j < 4; j++) {
5422 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5423 alu.op = ALU_OP1_MOV;
5424 r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
5425 alu.dst.sel = treg;
5426 alu.dst.chan = j;
5427 if (j == 3)
5428 alu.last = 1;
5429 alu.dst.write = 1;
5430 r = r600_bytecode_add_alu(ctx->bc, &alu);
5431 if (r)
5432 return r;
5433 }
5434 }
5435 for (i = 1; i < 3; i++) {
5436 /* set gradients h/v */
5437 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
5438 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
5439 FETCH_OP_SET_GRADIENTS_V;
5440 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5441 tex.sampler_index_mode = sampler_index_mode;
5442 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
5443 tex.resource_index_mode = sampler_index_mode;
5444
5445 tex.src_gpr = (i == 1) ? temp_h : temp_v;
5446 tex.src_sel_x = 0;
5447 tex.src_sel_y = 1;
5448 tex.src_sel_z = 2;
5449 tex.src_sel_w = 3;
5450
5451 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */
5452 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
5453 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
5454 tex.coord_type_x = 1;
5455 tex.coord_type_y = 1;
5456 tex.coord_type_z = 1;
5457 tex.coord_type_w = 1;
5458 }
5459 r = r600_bytecode_add_tex(ctx->bc, &tex);
5460 if (r)
5461 return r;
5462 }
5463 }
5464
5465 if (src_requires_loading && !src_loaded) {
5466 for (i = 0; i < 4; i++) {
5467 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5468 alu.op = ALU_OP1_MOV;
5469 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
5470 alu.dst.sel = ctx->temp_reg;
5471 alu.dst.chan = i;
5472 if (i == 3)
5473 alu.last = 1;
5474 alu.dst.write = 1;
5475 r = r600_bytecode_add_alu(ctx->bc, &alu);
5476 if (r)
5477 return r;
5478 }
5479 src_loaded = TRUE;
5480 src_gpr = ctx->temp_reg;
5481 }
5482
5483 /* get offset values */
5484 if (inst->Texture.NumOffsets) {
5485 assert(inst->Texture.NumOffsets == 1);
5486
5487 /* The texture offset feature doesn't work with the TXF instruction
5488 * and must be emulated by adding the offset to the texture coordinates. */
5489 if (txf_add_offsets) {
5490 const struct tgsi_texture_offset *off = inst->TexOffsets;
5491
5492 switch (inst->Texture.Texture) {
5493 case TGSI_TEXTURE_3D:
5494 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5495 alu.op = ALU_OP2_ADD_INT;
5496 alu.src[0].sel = src_gpr;
5497 alu.src[0].chan = 2;
5498 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5499 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
5500 alu.dst.sel = src_gpr;
5501 alu.dst.chan = 2;
5502 alu.dst.write = 1;
5503 alu.last = 1;
5504 r = r600_bytecode_add_alu(ctx->bc, &alu);
5505 if (r)
5506 return r;
5507 /* fall through */
5508
5509 case TGSI_TEXTURE_2D:
5510 case TGSI_TEXTURE_SHADOW2D:
5511 case TGSI_TEXTURE_RECT:
5512 case TGSI_TEXTURE_SHADOWRECT:
5513 case TGSI_TEXTURE_2D_ARRAY:
5514 case TGSI_TEXTURE_SHADOW2D_ARRAY:
5515 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5516 alu.op = ALU_OP2_ADD_INT;
5517 alu.src[0].sel = src_gpr;
5518 alu.src[0].chan = 1;
5519 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5520 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
5521 alu.dst.sel = src_gpr;
5522 alu.dst.chan = 1;
5523 alu.dst.write = 1;
5524 alu.last = 1;
5525 r = r600_bytecode_add_alu(ctx->bc, &alu);
5526 if (r)
5527 return r;
5528 /* fall through */
5529
5530 case TGSI_TEXTURE_1D:
5531 case TGSI_TEXTURE_SHADOW1D:
5532 case TGSI_TEXTURE_1D_ARRAY:
5533 case TGSI_TEXTURE_SHADOW1D_ARRAY:
5534 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5535 alu.op = ALU_OP2_ADD_INT;
5536 alu.src[0].sel = src_gpr;
5537 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5538 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
5539 alu.dst.sel = src_gpr;
5540 alu.dst.write = 1;
5541 alu.last = 1;
5542 r = r600_bytecode_add_alu(ctx->bc, &alu);
5543 if (r)
5544 return r;
5545 break;
5546 /* texture offsets do not apply to other texture targets */
5547 }
5548 } else {
5549 switch (inst->Texture.Texture) {
5550 case TGSI_TEXTURE_3D:
5551 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
5552 /* fallthrough */
5553 case TGSI_TEXTURE_2D:
5554 case TGSI_TEXTURE_SHADOW2D:
5555 case TGSI_TEXTURE_RECT:
5556 case TGSI_TEXTURE_SHADOWRECT:
5557 case TGSI_TEXTURE_2D_ARRAY:
5558 case TGSI_TEXTURE_SHADOW2D_ARRAY:
5559 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
5560 /* fallthrough */
5561 case TGSI_TEXTURE_1D:
5562 case TGSI_TEXTURE_SHADOW1D:
5563 case TGSI_TEXTURE_1D_ARRAY:
5564 case TGSI_TEXTURE_SHADOW1D_ARRAY:
5565 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
5566 }
5567 }
5568 }
5569
5570 /* Obtain the sample index for reading a compressed MSAA color texture.
5571 * To read the FMASK, we use the ldfptr instruction, which tells us
5572 * where the samples are stored.
5573 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
5574 * which is the identity mapping. Each nibble says which physical sample
5575 * should be fetched to get that sample.
5576 *
5577 * Assume src.z contains the sample index. It should be modified like this:
5578 * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
5579 * Then fetch the texel with src.
5580 */
5581 if (read_compressed_msaa) {
5582 unsigned sample_chan = 3;
5583 unsigned temp = r600_get_temp(ctx);
5584 assert(src_loaded);
5585
5586 /* temp.w = ldfptr() */
5587 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
5588 tex.op = FETCH_OP_LD;
5589 tex.inst_mod = 1; /* to indicate this is ldfptr */
5590 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5591 tex.sampler_index_mode = sampler_index_mode;
5592 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
5593 tex.resource_index_mode = sampler_index_mode;
5594 tex.src_gpr = src_gpr;
5595 tex.dst_gpr = temp;
5596 tex.dst_sel_x = 7; /* mask out these components */
5597 tex.dst_sel_y = 7;
5598 tex.dst_sel_z = 7;
5599 tex.dst_sel_w = 0; /* store X */
5600 tex.src_sel_x = 0;
5601 tex.src_sel_y = 1;
5602 tex.src_sel_z = 2;
5603 tex.src_sel_w = 3;
5604 tex.offset_x = offset_x;
5605 tex.offset_y = offset_y;
5606 tex.offset_z = offset_z;
5607 r = r600_bytecode_add_tex(ctx->bc, &tex);
5608 if (r)
5609 return r;
5610
5611 /* temp.x = sample_index*4 */
5612 if (ctx->bc->chip_class == CAYMAN) {
5613 for (i = 0 ; i < 4; i++) {
5614 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5615 alu.op = ALU_OP2_MULLO_INT;
5616 alu.src[0].sel = src_gpr;
5617 alu.src[0].chan = sample_chan;
5618 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5619 alu.src[1].value = 4;
5620 alu.dst.sel = temp;
5621 alu.dst.chan = i;
5622 alu.dst.write = i == 0;
5623 if (i == 3)
5624 alu.last = 1;
5625 r = r600_bytecode_add_alu(ctx->bc, &alu);
5626 if (r)
5627 return r;
5628 }
5629 } else {
5630 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5631 alu.op = ALU_OP2_MULLO_INT;
5632 alu.src[0].sel = src_gpr;
5633 alu.src[0].chan = sample_chan;
5634 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5635 alu.src[1].value = 4;
5636 alu.dst.sel = temp;
5637 alu.dst.chan = 0;
5638 alu.dst.write = 1;
5639 alu.last = 1;
5640 r = r600_bytecode_add_alu(ctx->bc, &alu);
5641 if (r)
5642 return r;
5643 }
5644
5645 /* sample_index = temp.w >> temp.x */
5646 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5647 alu.op = ALU_OP2_LSHR_INT;
5648 alu.src[0].sel = temp;
5649 alu.src[0].chan = 3;
5650 alu.src[1].sel = temp;
5651 alu.src[1].chan = 0;
5652 alu.dst.sel = src_gpr;
5653 alu.dst.chan = sample_chan;
5654 alu.dst.write = 1;
5655 alu.last = 1;
5656 r = r600_bytecode_add_alu(ctx->bc, &alu);
5657 if (r)
5658 return r;
5659
5660 /* sample_index & 0xF */
5661 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5662 alu.op = ALU_OP2_AND_INT;
5663 alu.src[0].sel = src_gpr;
5664 alu.src[0].chan = sample_chan;
5665 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5666 alu.src[1].value = 0xF;
5667 alu.dst.sel = src_gpr;
5668 alu.dst.chan = sample_chan;
5669 alu.dst.write = 1;
5670 alu.last = 1;
5671 r = r600_bytecode_add_alu(ctx->bc, &alu);
5672 if (r)
5673 return r;
5674 #if 0
5675 /* visualize the FMASK */
5676 for (i = 0; i < 4; i++) {
5677 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5678 alu.op = ALU_OP1_INT_TO_FLT;
5679 alu.src[0].sel = src_gpr;
5680 alu.src[0].chan = sample_chan;
5681 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
5682 alu.dst.chan = i;
5683 alu.dst.write = 1;
5684 alu.last = 1;
5685 r = r600_bytecode_add_alu(ctx->bc, &alu);
5686 if (r)
5687 return r;
5688 }
5689 return 0;
5690 #endif
5691 }
5692
5693 /* does this shader want a num layers from TXQ for a cube array? */
5694 if (has_txq_cube_array_z) {
5695 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5696
5697 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5698 alu.op = ALU_OP1_MOV;
5699
5700 alu.src[0].sel = 512 + (id / 4);
5701 alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
5702 alu.src[0].chan = id % 4;
5703 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
5704 alu.last = 1;
5705 r = r600_bytecode_add_alu(ctx->bc, &alu);
5706 if (r)
5707 return r;
5708 /* disable writemask from texture instruction */
5709 inst->Dst[0].Register.WriteMask &= ~4;
5710 }
5711
5712 opcode = ctx->inst_info->op;
5713 if (opcode == FETCH_OP_GATHER4 &&
5714 inst->TexOffsets[0].File != TGSI_FILE_NULL &&
5715 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
5716 opcode = FETCH_OP_GATHER4_O;
5717
5718 /* GATHER4_O/GATHER4_C_O use offset values loaded by
5719 SET_TEXTURE_OFFSETS instruction. The immediate offset values
5720 encoded in the instruction are ignored. */
5721 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
5722 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS;
5723 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5724 tex.sampler_index_mode = sampler_index_mode;
5725 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
5726 tex.resource_index_mode = sampler_index_mode;
5727
5728 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
5729 tex.src_sel_x = inst->TexOffsets[0].SwizzleX;
5730 tex.src_sel_y = inst->TexOffsets[0].SwizzleY;
5731 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ;
5732 tex.src_sel_w = 4;
5733
5734 tex.dst_sel_x = 7;
5735 tex.dst_sel_y = 7;
5736 tex.dst_sel_z = 7;
5737 tex.dst_sel_w = 7;
5738
5739 r = r600_bytecode_add_tex(ctx->bc, &tex);
5740 if (r)
5741 return r;
5742 }
5743
5744 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
5745 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
5746 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
5747 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
5748 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
5749 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
5750 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
5751 switch (opcode) {
5752 case FETCH_OP_SAMPLE:
5753 opcode = FETCH_OP_SAMPLE_C;
5754 break;
5755 case FETCH_OP_SAMPLE_L:
5756 opcode = FETCH_OP_SAMPLE_C_L;
5757 break;
5758 case FETCH_OP_SAMPLE_LB:
5759 opcode = FETCH_OP_SAMPLE_C_LB;
5760 break;
5761 case FETCH_OP_SAMPLE_G:
5762 opcode = FETCH_OP_SAMPLE_C_G;
5763 break;
5764 /* Texture gather variants */
5765 case FETCH_OP_GATHER4:
5766 opcode = FETCH_OP_GATHER4_C;
5767 break;
5768 case FETCH_OP_GATHER4_O:
5769 opcode = FETCH_OP_GATHER4_C_O;
5770 break;
5771 }
5772 }
5773
5774 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
5775 tex.op = opcode;
5776
5777 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
5778 tex.sampler_index_mode = sampler_index_mode;
5779 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
5780 tex.resource_index_mode = sampler_index_mode;
5781 tex.src_gpr = src_gpr;
5782 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
5783
5784 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
5785 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
5786 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */
5787 }
5788
5789 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
5790 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
5791 tex.inst_mod = texture_component_select;
5792
5793 if (ctx->bc->chip_class == CAYMAN) {
5794 /* GATHER4 result order is different from TGSI TG4 */
5795 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7;
5796 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7;
5797 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7;
5798 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
5799 } else {
5800 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
5801 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
5802 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
5803 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
5804 }
5805 }
5806 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
5807 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
5808 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
5809 tex.dst_sel_z = 7;
5810 tex.dst_sel_w = 7;
5811 }
5812 else {
5813 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
5814 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
5815 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
5816 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
5817 }
5818
5819
5820 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
5821 tex.src_sel_x = 4;
5822 tex.src_sel_y = 4;
5823 tex.src_sel_z = 4;
5824 tex.src_sel_w = 4;
5825 } else if (src_loaded) {
5826 tex.src_sel_x = 0;
5827 tex.src_sel_y = 1;
5828 tex.src_sel_z = 2;
5829 tex.src_sel_w = 3;
5830 } else {
5831 tex.src_sel_x = ctx->src[0].swizzle[0];
5832 tex.src_sel_y = ctx->src[0].swizzle[1];
5833 tex.src_sel_z = ctx->src[0].swizzle[2];
5834 tex.src_sel_w = ctx->src[0].swizzle[3];
5835 tex.src_rel = ctx->src[0].rel;
5836 }
5837
5838 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
5839 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
5840 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
5841 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
5842 tex.src_sel_x = 1;
5843 tex.src_sel_y = 0;
5844 tex.src_sel_z = 3;
5845 tex.src_sel_w = 2; /* route Z compare or Lod value into W */
5846 }
5847
5848 if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
5849 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
5850 tex.coord_type_x = 1;
5851 tex.coord_type_y = 1;
5852 }
5853 tex.coord_type_z = 1;
5854 tex.coord_type_w = 1;
5855
5856 tex.offset_x = offset_x;
5857 tex.offset_y = offset_y;
5858 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
5859 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
5860 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
5861 tex.offset_z = 0;
5862 }
5863 else {
5864 tex.offset_z = offset_z;
5865 }
5866
5867 /* Put the depth for comparison in W.
5868 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
5869 * Some instructions expect the depth in Z. */
5870 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
5871 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
5872 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
5873 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
5874 opcode != FETCH_OP_SAMPLE_C_L &&
5875 opcode != FETCH_OP_SAMPLE_C_LB) {
5876 tex.src_sel_w = tex.src_sel_z;
5877 }
5878
5879 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
5880 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
5881 if (opcode == FETCH_OP_SAMPLE_C_L ||
5882 opcode == FETCH_OP_SAMPLE_C_LB) {
5883 /* the array index is read from Y */
5884 tex.coord_type_y = 0;
5885 } else {
5886 /* the array index is read from Z */
5887 tex.coord_type_z = 0;
5888 tex.src_sel_z = tex.src_sel_y;
5889 }
5890 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
5891 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
5892 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
5893 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
5894 (ctx->bc->chip_class >= EVERGREEN)))
5895 /* the array index is read from Z */
5896 tex.coord_type_z = 0;
5897
5898 /* mask unused source components */
5899 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
5900 switch (inst->Texture.Texture) {
5901 case TGSI_TEXTURE_2D:
5902 case TGSI_TEXTURE_RECT:
5903 tex.src_sel_z = 7;
5904 tex.src_sel_w = 7;
5905 break;
5906 case TGSI_TEXTURE_1D_ARRAY:
5907 tex.src_sel_y = 7;
5908 tex.src_sel_w = 7;
5909 break;
5910 case TGSI_TEXTURE_1D:
5911 tex.src_sel_y = 7;
5912 tex.src_sel_z = 7;
5913 tex.src_sel_w = 7;
5914 break;
5915 }
5916 }
5917
5918 r = r600_bytecode_add_tex(ctx->bc, &tex);
5919 if (r)
5920 return r;
5921
5922 /* add shadow ambient support - gallium doesn't do it yet */
5923 return 0;
5924 }
5925
5926 static int tgsi_lrp(struct r600_shader_ctx *ctx)
5927 {
5928 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5929 struct r600_bytecode_alu alu;
5930 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5931 unsigned i;
5932 int r;
5933
5934 /* optimize if it's just an equal balance */
5935 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
5936 for (i = 0; i < lasti + 1; i++) {
5937 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5938 continue;
5939
5940 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5941 alu.op = ALU_OP2_ADD;
5942 r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
5943 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5944 alu.omod = 3;
5945 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5946 alu.dst.chan = i;
5947 if (i == lasti) {
5948 alu.last = 1;
5949 }
5950 r = r600_bytecode_add_alu(ctx->bc, &alu);
5951 if (r)
5952 return r;
5953 }
5954 return 0;
5955 }
5956
5957 /* 1 - src0 */
5958 for (i = 0; i < lasti + 1; i++) {
5959 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5960 continue;
5961
5962 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5963 alu.op = ALU_OP2_ADD;
5964 alu.src[0].sel = V_SQ_ALU_SRC_1;
5965 alu.src[0].chan = 0;
5966 r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5967 r600_bytecode_src_toggle_neg(&alu.src[1]);
5968 alu.dst.sel = ctx->temp_reg;
5969 alu.dst.chan = i;
5970 if (i == lasti) {
5971 alu.last = 1;
5972 }
5973 alu.dst.write = 1;
5974 r = r600_bytecode_add_alu(ctx->bc, &alu);
5975 if (r)
5976 return r;
5977 }
5978
5979 /* (1 - src0) * src2 */
5980 for (i = 0; i < lasti + 1; i++) {
5981 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5982 continue;
5983
5984 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5985 alu.op = ALU_OP2_MUL;
5986 alu.src[0].sel = ctx->temp_reg;
5987 alu.src[0].chan = i;
5988 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5989 alu.dst.sel = ctx->temp_reg;
5990 alu.dst.chan = i;
5991 if (i == lasti) {
5992 alu.last = 1;
5993 }
5994 alu.dst.write = 1;
5995 r = r600_bytecode_add_alu(ctx->bc, &alu);
5996 if (r)
5997 return r;
5998 }
5999
6000 /* src0 * src1 + (1 - src0) * src2 */
6001 for (i = 0; i < lasti + 1; i++) {
6002 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
6003 continue;
6004
6005 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6006 alu.op = ALU_OP3_MULADD;
6007 alu.is_op3 = 1;
6008 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6009 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6010 alu.src[2].sel = ctx->temp_reg;
6011 alu.src[2].chan = i;
6012
6013 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6014 alu.dst.chan = i;
6015 if (i == lasti) {
6016 alu.last = 1;
6017 }
6018 r = r600_bytecode_add_alu(ctx->bc, &alu);
6019 if (r)
6020 return r;
6021 }
6022 return 0;
6023 }
6024
6025 static int tgsi_cmp(struct r600_shader_ctx *ctx)
6026 {
6027 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6028 struct r600_bytecode_alu alu;
6029 int i, r;
6030 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
6031
6032 for (i = 0; i < lasti + 1; i++) {
6033 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
6034 continue;
6035
6036 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6037 alu.op = ALU_OP3_CNDGE;
6038 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6039 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
6040 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
6041 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6042 alu.dst.chan = i;
6043 alu.dst.write = 1;
6044 alu.is_op3 = 1;
6045 if (i == lasti)
6046 alu.last = 1;
6047 r = r600_bytecode_add_alu(ctx->bc, &alu);
6048 if (r)
6049 return r;
6050 }
6051 return 0;
6052 }
6053
6054 static int tgsi_ucmp(struct r600_shader_ctx *ctx)
6055 {
6056 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6057 struct r600_bytecode_alu alu;
6058 int i, r;
6059 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
6060
6061 for (i = 0; i < lasti + 1; i++) {
6062 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
6063 continue;
6064
6065 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6066 alu.op = ALU_OP3_CNDGE_INT;
6067 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6068 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
6069 r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
6070 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6071 alu.dst.chan = i;
6072 alu.dst.write = 1;
6073 alu.is_op3 = 1;
6074 if (i == lasti)
6075 alu.last = 1;
6076 r = r600_bytecode_add_alu(ctx->bc, &alu);
6077 if (r)
6078 return r;
6079 }
6080 return 0;
6081 }
6082
6083 static int tgsi_xpd(struct r600_shader_ctx *ctx)
6084 {
6085 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6086 static const unsigned int src0_swizzle[] = {2, 0, 1};
6087 static const unsigned int src1_swizzle[] = {1, 2, 0};
6088 struct r600_bytecode_alu alu;
6089 uint32_t use_temp = 0;
6090 int i, r;
6091
6092 if (inst->Dst[0].Register.WriteMask != 0xf)
6093 use_temp = 1;
6094
6095 for (i = 0; i < 4; i++) {
6096 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6097 alu.op = ALU_OP2_MUL;
6098 if (i < 3) {
6099 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
6100 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
6101 } else {
6102 alu.src[0].sel = V_SQ_ALU_SRC_0;
6103 alu.src[0].chan = i;
6104 alu.src[1].sel = V_SQ_ALU_SRC_0;
6105 alu.src[1].chan = i;
6106 }
6107
6108 alu.dst.sel = ctx->temp_reg;
6109 alu.dst.chan = i;
6110 alu.dst.write = 1;
6111
6112 if (i == 3)
6113 alu.last = 1;
6114 r = r600_bytecode_add_alu(ctx->bc, &alu);
6115 if (r)
6116 return r;
6117 }
6118
6119 for (i = 0; i < 4; i++) {
6120 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6121 alu.op = ALU_OP3_MULADD;
6122
6123 if (i < 3) {
6124 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
6125 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
6126 } else {
6127 alu.src[0].sel = V_SQ_ALU_SRC_0;
6128 alu.src[0].chan = i;
6129 alu.src[1].sel = V_SQ_ALU_SRC_0;
6130 alu.src[1].chan = i;
6131 }
6132
6133 alu.src[2].sel = ctx->temp_reg;
6134 alu.src[2].neg = 1;
6135 alu.src[2].chan = i;
6136
6137 if (use_temp)
6138 alu.dst.sel = ctx->temp_reg;
6139 else
6140 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6141 alu.dst.chan = i;
6142 alu.dst.write = 1;
6143 alu.is_op3 = 1;
6144 if (i == 3)
6145 alu.last = 1;
6146 r = r600_bytecode_add_alu(ctx->bc, &alu);
6147 if (r)
6148 return r;
6149 }
6150 if (use_temp)
6151 return tgsi_helper_copy(ctx, inst);
6152 return 0;
6153 }
6154
6155 static int tgsi_exp(struct r600_shader_ctx *ctx)
6156 {
6157 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6158 struct r600_bytecode_alu alu;
6159 int r;
6160 int i;
6161
6162 /* result.x = 2^floor(src); */
6163 if (inst->Dst[0].Register.WriteMask & 1) {
6164 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6165
6166 alu.op = ALU_OP1_FLOOR;
6167 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6168
6169 alu.dst.sel = ctx->temp_reg;
6170 alu.dst.chan = 0;
6171 alu.dst.write = 1;
6172 alu.last = 1;
6173 r = r600_bytecode_add_alu(ctx->bc, &alu);
6174 if (r)
6175 return r;
6176
6177 if (ctx->bc->chip_class == CAYMAN) {
6178 for (i = 0; i < 3; i++) {
6179 alu.op = ALU_OP1_EXP_IEEE;
6180 alu.src[0].sel = ctx->temp_reg;
6181 alu.src[0].chan = 0;
6182
6183 alu.dst.sel = ctx->temp_reg;
6184 alu.dst.chan = i;
6185 alu.dst.write = i == 0;
6186 alu.last = i == 2;
6187 r = r600_bytecode_add_alu(ctx->bc, &alu);
6188 if (r)
6189 return r;
6190 }
6191 } else {
6192 alu.op = ALU_OP1_EXP_IEEE;
6193 alu.src[0].sel = ctx->temp_reg;
6194 alu.src[0].chan = 0;
6195
6196 alu.dst.sel = ctx->temp_reg;
6197 alu.dst.chan = 0;
6198 alu.dst.write = 1;
6199 alu.last = 1;
6200 r = r600_bytecode_add_alu(ctx->bc, &alu);
6201 if (r)
6202 return r;
6203 }
6204 }
6205
6206 /* result.y = tmp - floor(tmp); */
6207 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
6208 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6209
6210 alu.op = ALU_OP1_FRACT;
6211 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6212
6213 alu.dst.sel = ctx->temp_reg;
6214 #if 0
6215 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6216 if (r)
6217 return r;
6218 #endif
6219 alu.dst.write = 1;
6220 alu.dst.chan = 1;
6221
6222 alu.last = 1;
6223
6224 r = r600_bytecode_add_alu(ctx->bc, &alu);
6225 if (r)
6226 return r;
6227 }
6228
6229 /* result.z = RoughApprox2ToX(tmp);*/
6230 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
6231 if (ctx->bc->chip_class == CAYMAN) {
6232 for (i = 0; i < 3; i++) {
6233 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6234 alu.op = ALU_OP1_EXP_IEEE;
6235 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6236
6237 alu.dst.sel = ctx->temp_reg;
6238 alu.dst.chan = i;
6239 if (i == 2) {
6240 alu.dst.write = 1;
6241 alu.last = 1;
6242 }
6243
6244 r = r600_bytecode_add_alu(ctx->bc, &alu);
6245 if (r)
6246 return r;
6247 }
6248 } else {
6249 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6250 alu.op = ALU_OP1_EXP_IEEE;
6251 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6252
6253 alu.dst.sel = ctx->temp_reg;
6254 alu.dst.write = 1;
6255 alu.dst.chan = 2;
6256
6257 alu.last = 1;
6258
6259 r = r600_bytecode_add_alu(ctx->bc, &alu);
6260 if (r)
6261 return r;
6262 }
6263 }
6264
6265 /* result.w = 1.0;*/
6266 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
6267 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6268
6269 alu.op = ALU_OP1_MOV;
6270 alu.src[0].sel = V_SQ_ALU_SRC_1;
6271 alu.src[0].chan = 0;
6272
6273 alu.dst.sel = ctx->temp_reg;
6274 alu.dst.chan = 3;
6275 alu.dst.write = 1;
6276 alu.last = 1;
6277 r = r600_bytecode_add_alu(ctx->bc, &alu);
6278 if (r)
6279 return r;
6280 }
6281 return tgsi_helper_copy(ctx, inst);
6282 }
6283
6284 static int tgsi_log(struct r600_shader_ctx *ctx)
6285 {
6286 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6287 struct r600_bytecode_alu alu;
6288 int r;
6289 int i;
6290
6291 /* result.x = floor(log2(|src|)); */
6292 if (inst->Dst[0].Register.WriteMask & 1) {
6293 if (ctx->bc->chip_class == CAYMAN) {
6294 for (i = 0; i < 3; i++) {
6295 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6296
6297 alu.op = ALU_OP1_LOG_IEEE;
6298 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6299 r600_bytecode_src_set_abs(&alu.src[0]);
6300
6301 alu.dst.sel = ctx->temp_reg;
6302 alu.dst.chan = i;
6303 if (i == 0)
6304 alu.dst.write = 1;
6305 if (i == 2)
6306 alu.last = 1;
6307 r = r600_bytecode_add_alu(ctx->bc, &alu);
6308 if (r)
6309 return r;
6310 }
6311
6312 } else {
6313 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6314
6315 alu.op = ALU_OP1_LOG_IEEE;
6316 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6317 r600_bytecode_src_set_abs(&alu.src[0]);
6318
6319 alu.dst.sel = ctx->temp_reg;
6320 alu.dst.chan = 0;
6321 alu.dst.write = 1;
6322 alu.last = 1;
6323 r = r600_bytecode_add_alu(ctx->bc, &alu);
6324 if (r)
6325 return r;
6326 }
6327
6328 alu.op = ALU_OP1_FLOOR;
6329 alu.src[0].sel = ctx->temp_reg;
6330 alu.src[0].chan = 0;
6331
6332 alu.dst.sel = ctx->temp_reg;
6333 alu.dst.chan = 0;
6334 alu.dst.write = 1;
6335 alu.last = 1;
6336
6337 r = r600_bytecode_add_alu(ctx->bc, &alu);
6338 if (r)
6339 return r;
6340 }
6341
6342 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
6343 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
6344
6345 if (ctx->bc->chip_class == CAYMAN) {
6346 for (i = 0; i < 3; i++) {
6347 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6348
6349 alu.op = ALU_OP1_LOG_IEEE;
6350 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6351 r600_bytecode_src_set_abs(&alu.src[0]);
6352
6353 alu.dst.sel = ctx->temp_reg;
6354 alu.dst.chan = i;
6355 if (i == 1)
6356 alu.dst.write = 1;
6357 if (i == 2)
6358 alu.last = 1;
6359
6360 r = r600_bytecode_add_alu(ctx->bc, &alu);
6361 if (r)
6362 return r;
6363 }
6364 } else {
6365 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6366
6367 alu.op = ALU_OP1_LOG_IEEE;
6368 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6369 r600_bytecode_src_set_abs(&alu.src[0]);
6370
6371 alu.dst.sel = ctx->temp_reg;
6372 alu.dst.chan = 1;
6373 alu.dst.write = 1;
6374 alu.last = 1;
6375
6376 r = r600_bytecode_add_alu(ctx->bc, &alu);
6377 if (r)
6378 return r;
6379 }
6380
6381 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6382
6383 alu.op = ALU_OP1_FLOOR;
6384 alu.src[0].sel = ctx->temp_reg;
6385 alu.src[0].chan = 1;
6386
6387 alu.dst.sel = ctx->temp_reg;
6388 alu.dst.chan = 1;
6389 alu.dst.write = 1;
6390 alu.last = 1;
6391
6392 r = r600_bytecode_add_alu(ctx->bc, &alu);
6393 if (r)
6394 return r;
6395
6396 if (ctx->bc->chip_class == CAYMAN) {
6397 for (i = 0; i < 3; i++) {
6398 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6399 alu.op = ALU_OP1_EXP_IEEE;
6400 alu.src[0].sel = ctx->temp_reg;
6401 alu.src[0].chan = 1;
6402
6403 alu.dst.sel = ctx->temp_reg;
6404 alu.dst.chan = i;
6405 if (i == 1)
6406 alu.dst.write = 1;
6407 if (i == 2)
6408 alu.last = 1;
6409
6410 r = r600_bytecode_add_alu(ctx->bc, &alu);
6411 if (r)
6412 return r;
6413 }
6414 } else {
6415 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6416 alu.op = ALU_OP1_EXP_IEEE;
6417 alu.src[0].sel = ctx->temp_reg;
6418 alu.src[0].chan = 1;
6419
6420 alu.dst.sel = ctx->temp_reg;
6421 alu.dst.chan = 1;
6422 alu.dst.write = 1;
6423 alu.last = 1;
6424
6425 r = r600_bytecode_add_alu(ctx->bc, &alu);
6426 if (r)
6427 return r;
6428 }
6429
6430 if (ctx->bc->chip_class == CAYMAN) {
6431 for (i = 0; i < 3; i++) {
6432 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6433 alu.op = ALU_OP1_RECIP_IEEE;
6434 alu.src[0].sel = ctx->temp_reg;
6435 alu.src[0].chan = 1;
6436
6437 alu.dst.sel = ctx->temp_reg;
6438 alu.dst.chan = i;
6439 if (i == 1)
6440 alu.dst.write = 1;
6441 if (i == 2)
6442 alu.last = 1;
6443
6444 r = r600_bytecode_add_alu(ctx->bc, &alu);
6445 if (r)
6446 return r;
6447 }
6448 } else {
6449 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6450 alu.op = ALU_OP1_RECIP_IEEE;
6451 alu.src[0].sel = ctx->temp_reg;
6452 alu.src[0].chan = 1;
6453
6454 alu.dst.sel = ctx->temp_reg;
6455 alu.dst.chan = 1;
6456 alu.dst.write = 1;
6457 alu.last = 1;
6458
6459 r = r600_bytecode_add_alu(ctx->bc, &alu);
6460 if (r)
6461 return r;
6462 }
6463
6464 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6465
6466 alu.op = ALU_OP2_MUL;
6467
6468 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6469 r600_bytecode_src_set_abs(&alu.src[0]);
6470
6471 alu.src[1].sel = ctx->temp_reg;
6472 alu.src[1].chan = 1;
6473
6474 alu.dst.sel = ctx->temp_reg;
6475 alu.dst.chan = 1;
6476 alu.dst.write = 1;
6477 alu.last = 1;
6478
6479 r = r600_bytecode_add_alu(ctx->bc, &alu);
6480 if (r)
6481 return r;
6482 }
6483
6484 /* result.z = log2(|src|);*/
6485 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
6486 if (ctx->bc->chip_class == CAYMAN) {
6487 for (i = 0; i < 3; i++) {
6488 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6489
6490 alu.op = ALU_OP1_LOG_IEEE;
6491 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6492 r600_bytecode_src_set_abs(&alu.src[0]);
6493
6494 alu.dst.sel = ctx->temp_reg;
6495 if (i == 2)
6496 alu.dst.write = 1;
6497 alu.dst.chan = i;
6498 if (i == 2)
6499 alu.last = 1;
6500
6501 r = r600_bytecode_add_alu(ctx->bc, &alu);
6502 if (r)
6503 return r;
6504 }
6505 } else {
6506 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6507
6508 alu.op = ALU_OP1_LOG_IEEE;
6509 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6510 r600_bytecode_src_set_abs(&alu.src[0]);
6511
6512 alu.dst.sel = ctx->temp_reg;
6513 alu.dst.write = 1;
6514 alu.dst.chan = 2;
6515 alu.last = 1;
6516
6517 r = r600_bytecode_add_alu(ctx->bc, &alu);
6518 if (r)
6519 return r;
6520 }
6521 }
6522
6523 /* result.w = 1.0; */
6524 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
6525 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6526
6527 alu.op = ALU_OP1_MOV;
6528 alu.src[0].sel = V_SQ_ALU_SRC_1;
6529 alu.src[0].chan = 0;
6530
6531 alu.dst.sel = ctx->temp_reg;
6532 alu.dst.chan = 3;
6533 alu.dst.write = 1;
6534 alu.last = 1;
6535
6536 r = r600_bytecode_add_alu(ctx->bc, &alu);
6537 if (r)
6538 return r;
6539 }
6540
6541 return tgsi_helper_copy(ctx, inst);
6542 }
6543
6544 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
6545 {
6546 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6547 struct r600_bytecode_alu alu;
6548 int r;
6549 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
6550 unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg;
6551
6552 assert(inst->Dst[0].Register.Index < 3);
6553 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6554
6555 switch (inst->Instruction.Opcode) {
6556 case TGSI_OPCODE_ARL:
6557 alu.op = ALU_OP1_FLT_TO_INT_FLOOR;
6558 break;
6559 case TGSI_OPCODE_ARR:
6560 alu.op = ALU_OP1_FLT_TO_INT;
6561 break;
6562 case TGSI_OPCODE_UARL:
6563 alu.op = ALU_OP1_MOV;
6564 break;
6565 default:
6566 assert(0);
6567 return -1;
6568 }
6569
6570 for (i = 0; i <= lasti; ++i) {
6571 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
6572 continue;
6573 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6574 alu.last = i == lasti;
6575 alu.dst.sel = reg;
6576 alu.dst.chan = i;
6577 alu.dst.write = 1;
6578 r = r600_bytecode_add_alu(ctx->bc, &alu);
6579 if (r)
6580 return r;
6581 }
6582
6583 if (inst->Dst[0].Register.Index > 0)
6584 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0;
6585 else
6586 ctx->bc->ar_loaded = 0;
6587
6588 return 0;
6589 }
6590 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
6591 {
6592 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6593 struct r600_bytecode_alu alu;
6594 int r;
6595 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
6596
6597 switch (inst->Instruction.Opcode) {
6598 case TGSI_OPCODE_ARL:
6599 memset(&alu, 0, sizeof(alu));
6600 alu.op = ALU_OP1_FLOOR;
6601 alu.dst.sel = ctx->bc->ar_reg;
6602 alu.dst.write = 1;
6603 for (i = 0; i <= lasti; ++i) {
6604 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
6605 alu.dst.chan = i;
6606 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6607 alu.last = i == lasti;
6608 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6609 return r;
6610 }
6611 }
6612
6613 memset(&alu, 0, sizeof(alu));
6614 alu.op = ALU_OP1_FLT_TO_INT;
6615 alu.src[0].sel = ctx->bc->ar_reg;
6616 alu.dst.sel = ctx->bc->ar_reg;
6617 alu.dst.write = 1;
6618 /* FLT_TO_INT is trans-only on r600/r700 */
6619 alu.last = TRUE;
6620 for (i = 0; i <= lasti; ++i) {
6621 alu.dst.chan = i;
6622 alu.src[0].chan = i;
6623 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6624 return r;
6625 }
6626 break;
6627 case TGSI_OPCODE_ARR:
6628 memset(&alu, 0, sizeof(alu));
6629 alu.op = ALU_OP1_FLT_TO_INT;
6630 alu.dst.sel = ctx->bc->ar_reg;
6631 alu.dst.write = 1;
6632 /* FLT_TO_INT is trans-only on r600/r700 */
6633 alu.last = TRUE;
6634 for (i = 0; i <= lasti; ++i) {
6635 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
6636 alu.dst.chan = i;
6637 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6638 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6639 return r;
6640 }
6641 }
6642 break;
6643 case TGSI_OPCODE_UARL:
6644 memset(&alu, 0, sizeof(alu));
6645 alu.op = ALU_OP1_MOV;
6646 alu.dst.sel = ctx->bc->ar_reg;
6647 alu.dst.write = 1;
6648 for (i = 0; i <= lasti; ++i) {
6649 if (inst->Dst[0].Register.WriteMask & (1 << i)) {
6650 alu.dst.chan = i;
6651 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6652 alu.last = i == lasti;
6653 if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6654 return r;
6655 }
6656 }
6657 break;
6658 default:
6659 assert(0);
6660 return -1;
6661 }
6662
6663 ctx->bc->ar_loaded = 0;
6664 return 0;
6665 }
6666
6667 static int tgsi_opdst(struct r600_shader_ctx *ctx)
6668 {
6669 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6670 struct r600_bytecode_alu alu;
6671 int i, r = 0;
6672
6673 for (i = 0; i < 4; i++) {
6674 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6675
6676 alu.op = ALU_OP2_MUL;
6677 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6678
6679 if (i == 0 || i == 3) {
6680 alu.src[0].sel = V_SQ_ALU_SRC_1;
6681 } else {
6682 r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6683 }
6684
6685 if (i == 0 || i == 2) {
6686 alu.src[1].sel = V_SQ_ALU_SRC_1;
6687 } else {
6688 r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6689 }
6690 if (i == 3)
6691 alu.last = 1;
6692 r = r600_bytecode_add_alu(ctx->bc, &alu);
6693 if (r)
6694 return r;
6695 }
6696 return 0;
6697 }
6698
6699 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type)
6700 {
6701 struct r600_bytecode_alu alu;
6702 int r;
6703
6704 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6705 alu.op = opcode;
6706 alu.execute_mask = 1;
6707 alu.update_pred = 1;
6708
6709 alu.dst.sel = ctx->temp_reg;
6710 alu.dst.write = 1;
6711 alu.dst.chan = 0;
6712
6713 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
6714 alu.src[1].sel = V_SQ_ALU_SRC_0;
6715 alu.src[1].chan = 0;
6716
6717 alu.last = 1;
6718
6719 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
6720 if (r)
6721 return r;
6722 return 0;
6723 }
6724
6725 static int pops(struct r600_shader_ctx *ctx, int pops)
6726 {
6727 unsigned force_pop = ctx->bc->force_add_cf;
6728
6729 if (!force_pop) {
6730 int alu_pop = 3;
6731 if (ctx->bc->cf_last) {
6732 if (ctx->bc->cf_last->op == CF_OP_ALU)
6733 alu_pop = 0;
6734 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER)
6735 alu_pop = 1;
6736 }
6737 alu_pop += pops;
6738 if (alu_pop == 1) {
6739 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER;
6740 ctx->bc->force_add_cf = 1;
6741 } else if (alu_pop == 2) {
6742 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
6743 ctx->bc->force_add_cf = 1;
6744 } else {
6745 force_pop = 1;
6746 }
6747 }
6748
6749 if (force_pop) {
6750 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
6751 ctx->bc->cf_last->pop_count = pops;
6752 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
6753 }
6754
6755 return 0;
6756 }
6757
6758 static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
6759 unsigned reason)
6760 {
6761 struct r600_stack_info *stack = &ctx->bc->stack;
6762 unsigned elements, entries;
6763
6764 unsigned entry_size = stack->entry_size;
6765
6766 elements = (stack->loop + stack->push_wqm ) * entry_size;
6767 elements += stack->push;
6768
6769 switch (ctx->bc->chip_class) {
6770 case R600:
6771 case R700:
6772 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
6773 * the stack must be reserved to hold the current active/continue
6774 * masks */
6775 if (reason == FC_PUSH_VPM) {
6776 elements += 2;
6777 }
6778 break;
6779
6780 case CAYMAN:
6781 /* r9xx: any stack operation on empty stack consumes 2 additional
6782 * elements */
6783 elements += 2;
6784
6785 /* fallthrough */
6786 /* FIXME: do the two elements added above cover the cases for the
6787 * r8xx+ below? */
6788
6789 case EVERGREEN:
6790 /* r8xx+: 2 extra elements are not always required, but one extra
6791 * element must be added for each of the following cases:
6792 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
6793 * stack usage.
6794 * (Currently we don't use ALU_ELSE_AFTER.)
6795 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
6796 * PUSH instruction executed.
6797 *
6798 * NOTE: it seems we also need to reserve additional element in some
6799 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
6800 * then STACK_SIZE should be 2 instead of 1 */
6801 if (reason == FC_PUSH_VPM) {
6802 elements += 1;
6803 }
6804 break;
6805
6806 default:
6807 assert(0);
6808 break;
6809 }
6810
6811 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4
6812 * for all chips, so we use 4 in the final formula, not the real entry_size
6813 * for the chip */
6814 entry_size = 4;
6815
6816 entries = (elements + (entry_size - 1)) / entry_size;
6817
6818 if (entries > stack->max_entries)
6819 stack->max_entries = entries;
6820 }
6821
6822 static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
6823 {
6824 switch(reason) {
6825 case FC_PUSH_VPM:
6826 --ctx->bc->stack.push;
6827 assert(ctx->bc->stack.push >= 0);
6828 break;
6829 case FC_PUSH_WQM:
6830 --ctx->bc->stack.push_wqm;
6831 assert(ctx->bc->stack.push_wqm >= 0);
6832 break;
6833 case FC_LOOP:
6834 --ctx->bc->stack.loop;
6835 assert(ctx->bc->stack.loop >= 0);
6836 break;
6837 default:
6838 assert(0);
6839 break;
6840 }
6841 }
6842
6843 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
6844 {
6845 switch (reason) {
6846 case FC_PUSH_VPM:
6847 ++ctx->bc->stack.push;
6848 break;
6849 case FC_PUSH_WQM:
6850 ++ctx->bc->stack.push_wqm;
6851 case FC_LOOP:
6852 ++ctx->bc->stack.loop;
6853 break;
6854 default:
6855 assert(0);
6856 }
6857
6858 callstack_update_max_depth(ctx, reason);
6859 }
6860
6861 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
6862 {
6863 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
6864
6865 sp->mid = realloc((void *)sp->mid,
6866 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
6867 sp->mid[sp->num_mid] = ctx->bc->cf_last;
6868 sp->num_mid++;
6869 }
6870
6871 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
6872 {
6873 ctx->bc->fc_sp++;
6874 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
6875 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
6876 }
6877
6878 static void fc_poplevel(struct r600_shader_ctx *ctx)
6879 {
6880 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
6881 free(sp->mid);
6882 sp->mid = NULL;
6883 sp->num_mid = 0;
6884 sp->start = NULL;
6885 sp->type = 0;
6886 ctx->bc->fc_sp--;
6887 }
6888
6889 #if 0
6890 static int emit_return(struct r600_shader_ctx *ctx)
6891 {
6892 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN));
6893 return 0;
6894 }
6895
6896 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
6897 {
6898
6899 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP));
6900 ctx->bc->cf_last->pop_count = pops;
6901 /* XXX work out offset */
6902 return 0;
6903 }
6904
6905 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
6906 {
6907 return 0;
6908 }
6909
6910 static void emit_testflag(struct r600_shader_ctx *ctx)
6911 {
6912
6913 }
6914
6915 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
6916 {
6917 emit_testflag(ctx);
6918 emit_jump_to_offset(ctx, 1, 4);
6919 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
6920 pops(ctx, ifidx + 1);
6921 emit_return(ctx);
6922 }
6923
6924 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
6925 {
6926 emit_testflag(ctx);
6927
6928 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
6929 ctx->bc->cf_last->pop_count = 1;
6930
6931 fc_set_mid(ctx, fc_sp);
6932
6933 pops(ctx, 1);
6934 }
6935 #endif
6936
6937 static int emit_if(struct r600_shader_ctx *ctx, int opcode)
6938 {
6939 int alu_type = CF_OP_ALU_PUSH_BEFORE;
6940
6941 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
6942 * LOOP_STARTxxx for nested loops may put the branch stack into a state
6943 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
6944 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
6945 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
6946 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
6947 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
6948 alu_type = CF_OP_ALU;
6949 }
6950
6951 emit_logic_pred(ctx, opcode, alu_type);
6952
6953 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
6954
6955 fc_pushlevel(ctx, FC_IF);
6956
6957 callstack_push(ctx, FC_PUSH_VPM);
6958 return 0;
6959 }
6960
6961 static int tgsi_if(struct r600_shader_ctx *ctx)
6962 {
6963 return emit_if(ctx, ALU_OP2_PRED_SETNE);
6964 }
6965
6966 static int tgsi_uif(struct r600_shader_ctx *ctx)
6967 {
6968 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT);
6969 }
6970
6971 static int tgsi_else(struct r600_shader_ctx *ctx)
6972 {
6973 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
6974 ctx->bc->cf_last->pop_count = 1;
6975
6976 fc_set_mid(ctx, ctx->bc->fc_sp);
6977 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
6978 return 0;
6979 }
6980
6981 static int tgsi_endif(struct r600_shader_ctx *ctx)
6982 {
6983 pops(ctx, 1);
6984 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
6985 R600_ERR("if/endif unbalanced in shader\n");
6986 return -1;
6987 }
6988
6989 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
6990 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
6991 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
6992 } else {
6993 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
6994 }
6995 fc_poplevel(ctx);
6996
6997 callstack_pop(ctx, FC_PUSH_VPM);
6998 return 0;
6999 }
7000
7001 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
7002 {
7003 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
7004 * limited to 4096 iterations, like the other LOOP_* instructions. */
7005 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
7006
7007 fc_pushlevel(ctx, FC_LOOP);
7008
7009 /* check stack depth */
7010 callstack_push(ctx, FC_LOOP);
7011 return 0;
7012 }
7013
7014 static int tgsi_endloop(struct r600_shader_ctx *ctx)
7015 {
7016 int i;
7017
7018 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
7019
7020 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
7021 R600_ERR("loop/endloop in shader code are not paired.\n");
7022 return -EINVAL;
7023 }
7024
7025 /* fixup loop pointers - from r600isa
7026 LOOP END points to CF after LOOP START,
7027 LOOP START point to CF after LOOP END
7028 BRK/CONT point to LOOP END CF
7029 */
7030 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
7031
7032 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
7033
7034 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
7035 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
7036 }
7037 /* XXX add LOOPRET support */
7038 fc_poplevel(ctx);
7039 callstack_pop(ctx, FC_LOOP);
7040 return 0;
7041 }
7042
7043 static int tgsi_loop_breakc(struct r600_shader_ctx *ctx)
7044 {
7045 int r;
7046 unsigned int fscp;
7047
7048 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
7049 {
7050 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
7051 break;
7052 }
7053 if (fscp == 0) {
7054 R600_ERR("BREAKC not inside loop/endloop pair\n");
7055 return -EINVAL;
7056 }
7057
7058 if (ctx->bc->chip_class == EVERGREEN &&
7059 ctx->bc->family != CHIP_CYPRESS &&
7060 ctx->bc->family != CHIP_JUNIPER) {
7061 /* HW bug: ALU_BREAK does not save the active mask correctly */
7062 r = tgsi_uif(ctx);
7063 if (r)
7064 return r;
7065
7066 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
7067 if (r)
7068 return r;
7069 fc_set_mid(ctx, fscp);
7070
7071 return tgsi_endif(ctx);
7072 } else {
7073 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
7074 if (r)
7075 return r;
7076 fc_set_mid(ctx, fscp);
7077 }
7078
7079 return 0;
7080 }
7081
7082 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
7083 {
7084 unsigned int fscp;
7085
7086 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
7087 {
7088 if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
7089 break;
7090 }
7091
7092 if (fscp == 0) {
7093 R600_ERR("Break not inside loop/endloop pair\n");
7094 return -EINVAL;
7095 }
7096
7097 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
7098
7099 fc_set_mid(ctx, fscp);
7100
7101 return 0;
7102 }
7103
7104 static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
7105 {
7106 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
7107 emit_gs_ring_writes(ctx, TRUE);
7108
7109 return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
7110 }
7111
7112 static int tgsi_umad(struct r600_shader_ctx *ctx)
7113 {
7114 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7115 struct r600_bytecode_alu alu;
7116 int i, j, k, r;
7117 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
7118
7119 /* src0 * src1 */
7120 for (i = 0; i < lasti + 1; i++) {
7121 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
7122 continue;
7123
7124 if (ctx->bc->chip_class == CAYMAN) {
7125 for (j = 0 ; j < 4; j++) {
7126 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7127
7128 alu.op = ALU_OP2_MULLO_UINT;
7129 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) {
7130 r600_bytecode_src(&alu.src[k], &ctx->src[k], i);
7131 }
7132 alu.dst.chan = j;
7133 alu.dst.sel = ctx->temp_reg;
7134 alu.dst.write = (j == i);
7135 if (j == 3)
7136 alu.last = 1;
7137 r = r600_bytecode_add_alu(ctx->bc, &alu);
7138 if (r)
7139 return r;
7140 }
7141 } else {
7142 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7143
7144 alu.dst.chan = i;
7145 alu.dst.sel = ctx->temp_reg;
7146 alu.dst.write = 1;
7147
7148 alu.op = ALU_OP2_MULLO_UINT;
7149 for (j = 0; j < 2; j++) {
7150 r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
7151 }
7152
7153 alu.last = 1;
7154 r = r600_bytecode_add_alu(ctx->bc, &alu);
7155 if (r)
7156 return r;
7157 }
7158 }
7159
7160
7161 for (i = 0; i < lasti + 1; i++) {
7162 if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
7163 continue;
7164
7165 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7166 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
7167
7168 alu.op = ALU_OP2_ADD_INT;
7169
7170 alu.src[0].sel = ctx->temp_reg;
7171 alu.src[0].chan = i;
7172
7173 r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
7174 if (i == lasti) {
7175 alu.last = 1;
7176 }
7177 r = r600_bytecode_add_alu(ctx->bc, &alu);
7178 if (r)
7179 return r;
7180 }
7181 return 0;
7182 }
7183
7184 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
7185 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_r600_arl},
7186 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
7187 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
7188
7189 /* XXX:
7190 * For state trackers other than OpenGL, we'll want to use
7191 * _RECIP_IEEE instead.
7192 */
7193 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
7194
7195 {TGSI_OPCODE_RSQ, 0, ALU_OP0_NOP, tgsi_rsq},
7196 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
7197 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
7198 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
7199 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
7200 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
7201 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
7202 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
7203 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
7204 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
7205 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
7206 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
7207 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
7208 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
7209 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
7210 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
7211 {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
7212 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
7213 {22, 0, ALU_OP0_NOP, tgsi_unsupported},
7214 {23, 0, ALU_OP0_NOP, tgsi_unsupported},
7215 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
7216 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
7217 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
7218 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
7219 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
7220 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
7221 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
7222 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
7223 {32, 0, ALU_OP0_NOP, tgsi_unsupported},
7224 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
7225 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
7226 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
7227 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
7228 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
7229 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
7230 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
7231 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7232 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7233 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7234 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7235 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7236 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
7237 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7238 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
7239 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
7240 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
7241 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
7242 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
7243 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
7244 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
7245 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
7246 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7247 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7248 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7249 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7250 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
7251 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
7252 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl},
7253 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
7254 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
7255 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
7256 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
7257 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
7258 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
7259 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7260 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
7261 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
7262 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
7263 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7264 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
7265 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
7266 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
7267 {76, 0, ALU_OP0_NOP, tgsi_unsupported},
7268 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
7269 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
7270 {TGSI_OPCODE_DDX_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
7271 {TGSI_OPCODE_DDY_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
7272 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
7273 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
7274 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
7275 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
7276 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
7277 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
7278 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans},
7279 {88, 0, ALU_OP0_NOP, tgsi_unsupported},
7280 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
7281 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
7282 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
7283 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
7284 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7285 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
7286 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7287 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
7288 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
7289 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
7290 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
7291 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7292 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
7293 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7294 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7295 {104, 0, ALU_OP0_NOP, tgsi_unsupported},
7296 {105, 0, ALU_OP0_NOP, tgsi_unsupported},
7297 {106, 0, ALU_OP0_NOP, tgsi_unsupported},
7298 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
7299 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
7300 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
7301 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
7302 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
7303 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
7304 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
7305 {114, 0, ALU_OP0_NOP, tgsi_unsupported},
7306 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc},
7307 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
7308 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
7309 {118, 0, ALU_OP0_NOP, tgsi_unsupported},
7310 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
7311 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
7312 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
7313 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
7314 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
7315 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
7316 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2_trans},
7317 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
7318 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2_trans},
7319 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
7320 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
7321 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
7322 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
7323 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
7324 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
7325 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
7326 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans},
7327 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
7328 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
7329 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2_trans},
7330 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
7331 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2_swap},
7332 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7333 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
7334 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
7335 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7336 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
7337 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
7338 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
7339 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
7340 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
7341 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
7342 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
7343 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
7344 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
7345 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
7346 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
7347 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
7348 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_r600_arl},
7349 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
7350 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
7351 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
7352 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7353 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
7354 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7355 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7356 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7357 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
7358 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
7359 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
7360 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
7361 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
7362 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7363 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7364 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7365 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7366 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7367 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7368 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
7369 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7370 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7371 {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
7372 {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
7373 {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_unsupported},
7374 {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_unsupported},
7375 {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_unsupported},
7376 {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_unsupported},
7377 {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_unsupported},
7378 {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_unsupported},
7379 {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_unsupported},
7380 {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_unsupported},
7381 {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_unsupported},
7382 {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
7383 {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_unsupported},
7384 {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_unsupported},
7385 {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_unsupported},
7386 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
7387 };
7388
7389 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
7390 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl},
7391 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
7392 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
7393 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
7394 {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
7395 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
7396 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
7397 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
7398 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
7399 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
7400 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
7401 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
7402 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
7403 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
7404 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
7405 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
7406 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
7407 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
7408 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
7409 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
7410 {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
7411 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
7412 {22, 0, ALU_OP0_NOP, tgsi_unsupported},
7413 {23, 0, ALU_OP0_NOP, tgsi_unsupported},
7414 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
7415 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
7416 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
7417 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
7418 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
7419 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
7420 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
7421 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
7422 {32, 0, ALU_OP0_NOP, tgsi_unsupported},
7423 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
7424 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
7425 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
7426 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
7427 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
7428 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
7429 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
7430 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7431 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7432 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7433 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7434 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7435 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
7436 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7437 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
7438 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
7439 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
7440 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
7441 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
7442 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
7443 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
7444 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
7445 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7446 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7447 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7448 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7449 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
7450 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
7451 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
7452 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
7453 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
7454 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
7455 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
7456 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
7457 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
7458 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7459 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
7460 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
7461 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
7462 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7463 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
7464 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
7465 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
7466 {76, 0, ALU_OP0_NOP, tgsi_unsupported},
7467 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
7468 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
7469 {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
7470 {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
7471 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
7472 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
7473 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
7474 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
7475 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
7476 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
7477 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
7478 {88, 0, ALU_OP0_NOP, tgsi_unsupported},
7479 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
7480 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
7481 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
7482 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
7483 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7484 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
7485 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7486 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
7487 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
7488 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
7489 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
7490 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7491 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
7492 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7493 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7494 {104, 0, ALU_OP0_NOP, tgsi_unsupported},
7495 {105, 0, ALU_OP0_NOP, tgsi_unsupported},
7496 {106, 0, ALU_OP0_NOP, tgsi_unsupported},
7497 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
7498 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
7499 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
7500 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
7501 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
7502 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
7503 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
7504 {114, 0, ALU_OP0_NOP, tgsi_unsupported},
7505 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
7506 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
7507 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
7508 {118, 0, ALU_OP0_NOP, tgsi_unsupported},
7509 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i},
7510 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
7511 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
7512 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
7513 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
7514 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
7515 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2},
7516 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
7517 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_f2i},
7518 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
7519 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
7520 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
7521 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
7522 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
7523 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
7524 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
7525 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans},
7526 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
7527 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
7528 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2},
7529 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
7530 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2},
7531 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7532 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
7533 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
7534 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7535 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
7536 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
7537 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
7538 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
7539 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
7540 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
7541 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
7542 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
7543 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
7544 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
7545 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
7546 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
7547 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl},
7548 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
7549 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
7550 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
7551 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7552 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
7553 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7554 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7555 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7556 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
7557 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
7558 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
7559 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
7560 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
7561 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7562 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7563 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7564 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7565 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7566 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7567 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
7568 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7569 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7570 {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
7571 {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
7572 {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
7573 {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
7574 {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
7575 {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
7576 {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
7577 {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
7578 {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
7579 {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
7580 {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
7581 {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
7582 {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7583 {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7584 {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7585 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
7586 };
7587
7588 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
7589 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl},
7590 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
7591 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
7592 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, cayman_emit_float_instr},
7593 {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
7594 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
7595 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
7596 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
7597 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
7598 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
7599 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
7600 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
7601 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
7602 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
7603 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
7604 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
7605 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
7606 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
7607 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
7608 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
7609 {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
7610 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
7611 {22, 0, ALU_OP0_NOP, tgsi_unsupported},
7612 {23, 0, ALU_OP0_NOP, tgsi_unsupported},
7613 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
7614 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
7615 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
7616 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
7617 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
7618 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
7619 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow},
7620 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
7621 {32, 0, ALU_OP0_NOP, tgsi_unsupported},
7622 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
7623 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
7624 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
7625 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig},
7626 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
7627 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
7628 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
7629 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7630 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7631 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7632 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7633 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7634 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
7635 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
7636 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
7637 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig},
7638 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
7639 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
7640 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
7641 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
7642 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
7643 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
7644 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
7645 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
7646 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
7647 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
7648 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
7649 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
7650 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
7651 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
7652 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
7653 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
7654 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
7655 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
7656 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
7657 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7658 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
7659 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
7660 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
7661 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7662 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
7663 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
7664 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
7665 {76, 0, ALU_OP0_NOP, tgsi_unsupported},
7666 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
7667 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
7668 {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
7669 {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
7670 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
7671 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
7672 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
7673 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2},
7674 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
7675 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
7676 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
7677 {88, 0, ALU_OP0_NOP, tgsi_unsupported},
7678 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
7679 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
7680 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
7681 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
7682 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7683 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
7684 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7685 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
7686 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
7687 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
7688 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
7689 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7690 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
7691 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
7692 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
7693 {104, 0, ALU_OP0_NOP, tgsi_unsupported},
7694 {105, 0, ALU_OP0_NOP, tgsi_unsupported},
7695 {106, 0, ALU_OP0_NOP, tgsi_unsupported},
7696 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
7697 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
7698 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
7699 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
7700 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
7701 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
7702 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
7703 {114, 0, ALU_OP0_NOP, tgsi_unsupported},
7704 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
7705 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
7706 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
7707 {118, 0, ALU_OP0_NOP, tgsi_unsupported},
7708 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2},
7709 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
7710 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
7711 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
7712 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
7713 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
7714 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2},
7715 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
7716 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2},
7717 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2},
7718 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
7719 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
7720 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
7721 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
7722 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
7723 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
7724 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr},
7725 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
7726 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
7727 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2},
7728 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
7729 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2},
7730 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7731 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
7732 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
7733 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
7734 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
7735 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
7736 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
7737 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
7738 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
7739 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
7740 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
7741 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
7742 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
7743 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
7744 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
7745 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
7746 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl},
7747 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
7748 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
7749 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
7750 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
7751 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
7752 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7753 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7754 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
7755 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
7756 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
7757 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
7758 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
7759 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
7760 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7761 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
7762 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7763 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7764 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
7765 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
7766 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
7767 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
7768 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
7769 {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, cayman_mul_int_instr},
7770 {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
7771 {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
7772 {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
7773 {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
7774 {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
7775 {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
7776 {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
7777 {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
7778 {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
7779 {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
7780 {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
7781 {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7782 {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7783 {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
7784 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
7785 };