Merge branch 'mesa_7_5_branch'
[mesa.git] / src / gallium / drivers / r300 / r300_state_shader.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_state_shader.h"
24
25 static void r300_fs_declare(struct r300_fs_asm* assembler,
26 struct tgsi_full_declaration* decl)
27 {
28 switch (decl->Declaration.File) {
29 case TGSI_FILE_INPUT:
30 switch (decl->Semantic.SemanticName) {
31 case TGSI_SEMANTIC_COLOR:
32 assembler->color_count++;
33 break;
34 case TGSI_SEMANTIC_FOG:
35 case TGSI_SEMANTIC_GENERIC:
36 assembler->tex_count++;
37 break;
38 default:
39 debug_printf("r300: fs: Bad semantic declaration %d\n",
40 decl->Semantic.SemanticName);
41 break;
42 }
43 break;
44 case TGSI_FILE_OUTPUT:
45 /* Depth write. Mark the position of the output so we can
46 * identify it later. */
47 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
48 assembler->depth_output = decl->DeclarationRange.First;
49 }
50 break;
51 case TGSI_FILE_CONSTANT:
52 break;
53 case TGSI_FILE_TEMPORARY:
54 assembler->temp_count++;
55 break;
56 default:
57 debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File);
58 break;
59 }
60
61 assembler->temp_offset = assembler->color_count + assembler->tex_count;
62 }
63
64 static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler,
65 struct tgsi_src_register* src)
66 {
67 switch (src->File) {
68 case TGSI_FILE_NULL:
69 return 0;
70 case TGSI_FILE_INPUT:
71 /* XXX may be wrong */
72 return src->Index;
73 break;
74 case TGSI_FILE_TEMPORARY:
75 return src->Index + assembler->temp_offset;
76 break;
77 case TGSI_FILE_IMMEDIATE:
78 return (src->Index + assembler->imm_offset) | (1 << 8);
79 break;
80 case TGSI_FILE_CONSTANT:
81 /* XXX magic */
82 return src->Index | (1 << 8);
83 break;
84 default:
85 debug_printf("r300: fs: Unimplemented src %d\n", src->File);
86 break;
87 }
88 return 0;
89 }
90
91 static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
92 struct tgsi_dst_register* dst)
93 {
94 switch (dst->File) {
95 case TGSI_FILE_NULL:
96 /* This happens during KIL instructions. */
97 return 0;
98 break;
99 case TGSI_FILE_OUTPUT:
100 return 0;
101 break;
102 case TGSI_FILE_TEMPORARY:
103 return dst->Index + assembler->temp_offset;
104 break;
105 default:
106 debug_printf("r300: fs: Unimplemented dst %d\n", dst->File);
107 break;
108 }
109 return 0;
110 }
111
112 static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler,
113 struct tgsi_dst_register* dst)
114 {
115 return (assembler->writes_depth &&
116 (dst->File == TGSI_FILE_OUTPUT) &&
117 (dst->Index == assembler->depth_output));
118 }
119
120 static INLINE unsigned r500_fix_swiz(unsigned s)
121 {
122 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
123 * equivalent to the actual machine code, but 1 is not. Thus, we just
124 * adjust it a bit... */
125 if (s == TGSI_EXTSWIZZLE_ONE) {
126 return R500_SWIZZLE_ONE;
127 } else {
128 return s;
129 }
130 }
131
132 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg)
133 {
134 if (reg->SrcRegister.Extended) {
135 return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) |
136 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) |
137 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) |
138 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9);
139 } else {
140 return reg->SrcRegister.SwizzleX |
141 (reg->SrcRegister.SwizzleY << 3) |
142 (reg->SrcRegister.SwizzleZ << 6) |
143 (reg->SrcRegister.SwizzleW << 9);
144 }
145 }
146
147 static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg)
148 {
149 return reg->SrcRegister.SwizzleX |
150 (reg->SrcRegister.SwizzleY << 2) |
151 (reg->SrcRegister.SwizzleZ << 4) |
152 (reg->SrcRegister.SwizzleW << 6);
153 }
154
155 static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg)
156 {
157 /* Only the first 9 bits... */
158 return (r500_rgba_swiz(reg) & 0x1ff) |
159 (reg->SrcRegister.Negate ? (1 << 9) : 0) |
160 (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0);
161 }
162
163 static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg)
164 {
165 /* Only the last 3 bits... */
166 return (r500_rgba_swiz(reg) >> 9) |
167 (reg->SrcRegister.Negate ? (1 << 9) : 0) |
168 (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0);
169 }
170
171 static INLINE uint32_t r300_rgb_op(unsigned op)
172 {
173 switch (op) {
174 case TGSI_OPCODE_MOV:
175 return R300_ALU_OUTC_CMP;
176 default:
177 return 0;
178 }
179 }
180
181 static INLINE uint32_t r300_alpha_op(unsigned op)
182 {
183 switch (op) {
184 case TGSI_OPCODE_MOV:
185 return R300_ALU_OUTA_CMP;
186 default:
187 return 0;
188 }
189 }
190
191 static INLINE uint32_t r500_rgba_op(unsigned op)
192 {
193 switch (op) {
194 case TGSI_OPCODE_COS:
195 case TGSI_OPCODE_EX2:
196 case TGSI_OPCODE_LG2:
197 case TGSI_OPCODE_RCP:
198 case TGSI_OPCODE_RSQ:
199 case TGSI_OPCODE_SIN:
200 return R500_ALU_RGBA_OP_SOP;
201 case TGSI_OPCODE_DDX:
202 return R500_ALU_RGBA_OP_MDH;
203 case TGSI_OPCODE_DDY:
204 return R500_ALU_RGBA_OP_MDV;
205 case TGSI_OPCODE_FRC:
206 return R500_ALU_RGBA_OP_FRC;
207 case TGSI_OPCODE_DP3:
208 return R500_ALU_RGBA_OP_DP3;
209 case TGSI_OPCODE_DP4:
210 case TGSI_OPCODE_DPH:
211 return R500_ALU_RGBA_OP_DP4;
212 case TGSI_OPCODE_ABS:
213 case TGSI_OPCODE_CMP:
214 case TGSI_OPCODE_MOV:
215 case TGSI_OPCODE_SWZ:
216 return R500_ALU_RGBA_OP_CMP;
217 case TGSI_OPCODE_ADD:
218 case TGSI_OPCODE_MAD:
219 case TGSI_OPCODE_MUL:
220 case TGSI_OPCODE_SUB:
221 return R500_ALU_RGBA_OP_MAD;
222 default:
223 return 0;
224 }
225 }
226
227 static INLINE uint32_t r500_alpha_op(unsigned op)
228 {
229 switch (op) {
230 case TGSI_OPCODE_COS:
231 return R500_ALPHA_OP_COS;
232 case TGSI_OPCODE_EX2:
233 return R500_ALPHA_OP_EX2;
234 case TGSI_OPCODE_LG2:
235 return R500_ALPHA_OP_LN2;
236 case TGSI_OPCODE_RCP:
237 return R500_ALPHA_OP_RCP;
238 case TGSI_OPCODE_RSQ:
239 return R500_ALPHA_OP_RSQ;
240 case TGSI_OPCODE_FRC:
241 return R500_ALPHA_OP_FRC;
242 case TGSI_OPCODE_SIN:
243 return R500_ALPHA_OP_SIN;
244 case TGSI_OPCODE_DDX:
245 return R500_ALPHA_OP_MDH;
246 case TGSI_OPCODE_DDY:
247 return R500_ALPHA_OP_MDV;
248 case TGSI_OPCODE_DP3:
249 case TGSI_OPCODE_DP4:
250 case TGSI_OPCODE_DPH:
251 return R500_ALPHA_OP_DP;
252 case TGSI_OPCODE_ABS:
253 case TGSI_OPCODE_CMP:
254 case TGSI_OPCODE_MOV:
255 case TGSI_OPCODE_SWZ:
256 return R500_ALPHA_OP_CMP;
257 case TGSI_OPCODE_ADD:
258 case TGSI_OPCODE_MAD:
259 case TGSI_OPCODE_MUL:
260 case TGSI_OPCODE_SUB:
261 return R500_ALPHA_OP_MAD;
262 default:
263 return 0;
264 }
265 }
266
267 static INLINE uint32_t r500_tex_op(unsigned op)
268 {
269 switch (op) {
270 case TGSI_OPCODE_KIL:
271 return R500_TEX_INST_TEXKILL;
272 case TGSI_OPCODE_TEX:
273 return R500_TEX_INST_LD;
274 case TGSI_OPCODE_TXB:
275 return R500_TEX_INST_LODBIAS;
276 case TGSI_OPCODE_TXP:
277 return R500_TEX_INST_PROJ;
278 default:
279 return 0;
280 }
281 }
282
283 static INLINE void r300_emit_maths(struct r300_fragment_shader* fs,
284 struct r300_fs_asm* assembler,
285 struct tgsi_full_src_register* src,
286 struct tgsi_full_dst_register* dst,
287 unsigned op,
288 unsigned count)
289 {
290 int i = fs->alu_instruction_count;
291
292 fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
293 R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
294 R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
295 r300_rgb_op(op);
296 fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
297 R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ;
298 fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
299 R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
300 R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
301 r300_alpha_op(op);
302 fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) |
303 R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT;
304
305 fs->alu_instruction_count++;
306 }
307
308 /* Setup an ALU operation. */
309 static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
310 struct r300_fs_asm* assembler,
311 struct tgsi_full_src_register* src,
312 struct tgsi_full_dst_register* dst,
313 unsigned op,
314 unsigned count)
315 {
316 int i = fs->instruction_count;
317
318 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
319 fs->instructions[i].inst0 = R500_INST_TYPE_OUT;
320 if (r300_fs_is_depr(assembler, dst)) {
321 fs->instructions[i].inst4 = R500_W_OMASK;
322 } else {
323 fs->instructions[i].inst0 |=
324 R500_ALU_OMASK(dst->DstRegister.WriteMask);
325 }
326 } else {
327 fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
328 R500_ALU_WMASK(dst->DstRegister.WriteMask);
329 }
330
331 fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT;
332
333 fs->instructions[i].inst4 |=
334 R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
335 fs->instructions[i].inst5 =
336 R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
337
338 switch (count) {
339 case 3:
340 fs->instructions[i].inst1 =
341 R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
342 fs->instructions[i].inst2 =
343 R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
344 fs->instructions[i].inst5 |=
345 R500_ALU_RGBA_SEL_C_SRC2 |
346 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src[2])) |
347 R500_ALU_RGBA_ALPHA_SEL_C_SRC2 |
348 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src[2]));
349 case 2:
350 fs->instructions[i].inst1 |=
351 R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
352 fs->instructions[i].inst2 |=
353 R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
354 fs->instructions[i].inst3 =
355 R500_ALU_RGB_SEL_B_SRC1 |
356 R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1]));
357 fs->instructions[i].inst4 |=
358 R500_ALPHA_SEL_B_SRC1 |
359 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1]));
360 case 1:
361 case 0:
362 default:
363 fs->instructions[i].inst1 |=
364 R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
365 fs->instructions[i].inst2 |=
366 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
367 fs->instructions[i].inst3 |=
368 R500_ALU_RGB_SEL_A_SRC0 |
369 R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0]));
370 fs->instructions[i].inst4 |=
371 R500_ALPHA_SEL_A_SRC0 |
372 R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0]));
373 break;
374 }
375
376 fs->instructions[i].inst4 |= r500_alpha_op(op);
377 fs->instructions[i].inst5 |= r500_rgba_op(op);
378
379 fs->instruction_count++;
380 }
381
382 static INLINE void r500_emit_tex(struct r500_fragment_shader* fs,
383 struct r300_fs_asm* assembler,
384 struct tgsi_full_src_register* src,
385 struct tgsi_full_dst_register* dst,
386 uint32_t op)
387 {
388 int i = fs->instruction_count;
389
390 fs->instructions[i].inst0 = R500_INST_TYPE_TEX |
391 R500_TEX_WMASK(dst->DstRegister.WriteMask) |
392 R500_INST_TEX_SEM_WAIT;
393 fs->instructions[i].inst1 = R500_TEX_ID(0) |
394 R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED |
395 r500_tex_op(op);
396 fs->instructions[i].inst2 =
397 R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) |
398 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) |
399 R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) |
400 R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
401 R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
402
403 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
404 fs->instructions[i].inst2 |=
405 R500_TEX_DST_ADDR(assembler->temp_count +
406 assembler->temp_offset);
407
408 fs->instruction_count++;
409
410 /* Setup and emit a MOV. */
411 src[0].SrcRegister.Index = assembler->temp_count;
412 src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
413
414 src[1] = src[0];
415 src[2] = r500_constant_zero;
416 r500_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3);
417 } else {
418 fs->instruction_count++;
419 }
420 }
421
422 static void r300_fs_instruction(struct r300_fragment_shader* fs,
423 struct r300_fs_asm* assembler,
424 struct tgsi_full_instruction* inst)
425 {
426 switch (inst->Instruction.Opcode) {
427 case TGSI_OPCODE_MOV:
428 /* src0 -> src1 and src2 forced to zero */
429 inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
430 inst->FullSrcRegisters[2] = r500_constant_zero;
431 r300_emit_maths(fs, assembler, inst->FullSrcRegisters,
432 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
433 break;
434 case TGSI_OPCODE_END:
435 break;
436 default:
437 debug_printf("r300: fs: Bad opcode %d\n",
438 inst->Instruction.Opcode);
439 break;
440 }
441 }
442
443 static void r500_fs_instruction(struct r500_fragment_shader* fs,
444 struct r300_fs_asm* assembler,
445 struct tgsi_full_instruction* inst)
446 {
447 /* Switch between opcodes. When possible, prefer using the official
448 * AMD/ATI names for opcodes, please, as it facilitates using the
449 * documentation. */
450 switch (inst->Instruction.Opcode) {
451 /* XXX trig needs extra prep */
452 case TGSI_OPCODE_COS:
453 case TGSI_OPCODE_SIN:
454 /* The simple scalar ops. */
455 case TGSI_OPCODE_EX2:
456 case TGSI_OPCODE_LG2:
457 case TGSI_OPCODE_RCP:
458 case TGSI_OPCODE_RSQ:
459 /* Copy red swizzle to alpha for src0 */
460 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
461 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX;
462 inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
463 inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
464 /* Fall through */
465 case TGSI_OPCODE_DDX:
466 case TGSI_OPCODE_DDY:
467 case TGSI_OPCODE_FRC:
468 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
469 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1);
470 break;
471
472 /* The dot products. */
473 case TGSI_OPCODE_DPH:
474 /* Set alpha swizzle to one for src0 */
475 if (!inst->FullSrcRegisters[0].SrcRegister.Extended) {
476 inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE;
477 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX =
478 inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
479 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY =
480 inst->FullSrcRegisters[0].SrcRegister.SwizzleY;
481 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ =
482 inst->FullSrcRegisters[0].SrcRegister.SwizzleZ;
483 }
484 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
485 TGSI_EXTSWIZZLE_ONE;
486 /* Fall through */
487 case TGSI_OPCODE_DP3:
488 case TGSI_OPCODE_DP4:
489 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
490 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2);
491 break;
492
493 /* Simple three-source operations. */
494 case TGSI_OPCODE_CMP:
495 /* Swap src0 and src2 */
496 inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2];
497 inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0];
498 inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3];
499 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
500 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
501 break;
502
503 /* The MAD variants. */
504 case TGSI_OPCODE_SUB:
505 /* Just like ADD, but flip the negation on src1 first */
506 inst->FullSrcRegisters[1].SrcRegister.Negate =
507 !inst->FullSrcRegisters[1].SrcRegister.Negate;
508 /* Fall through */
509 case TGSI_OPCODE_ADD:
510 /* Force src0 to one, move all registers over */
511 inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1];
512 inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
513 inst->FullSrcRegisters[0] = r500_constant_one;
514 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
515 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
516 break;
517 case TGSI_OPCODE_MUL:
518 /* Force our src2 to zero */
519 inst->FullSrcRegisters[2] = r500_constant_zero;
520 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
521 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
522 break;
523 case TGSI_OPCODE_MAD:
524 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
525 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
526 break;
527
528 /* The MOV variants. */
529 case TGSI_OPCODE_ABS:
530 /* Set absolute value modifiers. */
531 inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE;
532 /* Fall through */
533 case TGSI_OPCODE_MOV:
534 case TGSI_OPCODE_SWZ:
535 /* src0 -> src1 and src2 forced to zero */
536 inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
537 inst->FullSrcRegisters[2] = r500_constant_zero;
538 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
539 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
540 break;
541
542 /* The compound and hybrid insts. */
543 case TGSI_OPCODE_LRP:
544 /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */
545 inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1];
546 inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2];
547 inst->FullSrcRegisters[0].SrcRegister.Negate =
548 !(inst->FullSrcRegisters[0].SrcRegister.Negate);
549 inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
550 inst->FullDstRegisters[0].DstRegister.Index =
551 assembler->temp_count;
552 inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
553 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
554 &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
555 inst->FullSrcRegisters[2].SrcRegister.Index =
556 assembler->temp_count;
557 inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
558 inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
559 inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
560 inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
561 inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
562 inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3];
563 inst->FullSrcRegisters[0].SrcRegister.Negate =
564 !(inst->FullSrcRegisters[0].SrcRegister.Negate);
565 inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
566 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
567 &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
568 break;
569 case TGSI_OPCODE_POW:
570 /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */
571 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
572 inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX;
573 inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
574 inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
575 inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
576 inst->FullDstRegisters[0].DstRegister.Index =
577 assembler->temp_count;
578 inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
579 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
580 &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1);
581 inst->FullSrcRegisters[0].SrcRegister.Index =
582 assembler->temp_count;
583 inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
584 inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
585 inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
586 inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
587 inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
588 inst->FullSrcRegisters[2] = r500_constant_zero;
589 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
590 &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3);
591 inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
592 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
593 &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1);
594 break;
595
596 /* The texture instruction set. */
597 case TGSI_OPCODE_KIL:
598 case TGSI_OPCODE_TEX:
599 case TGSI_OPCODE_TXB:
600 case TGSI_OPCODE_TXP:
601 r500_emit_tex(fs, assembler, &inst->FullSrcRegisters[0],
602 &inst->FullDstRegisters[0], inst->Instruction.Opcode);
603 break;
604
605 /* This is the end. My only friend, the end. */
606 case TGSI_OPCODE_END:
607 break;
608 default:
609 debug_printf("r300: fs: Bad opcode %d\n",
610 inst->Instruction.Opcode);
611 break;
612 }
613
614 /* Clamp, if saturation flags are set. */
615 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) {
616 fs->instructions[fs->instruction_count - 1].inst0 |=
617 R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
618 }
619 }
620
621 static void r300_fs_finalize(struct r3xx_fragment_shader* fs,
622 struct r300_fs_asm* assembler)
623 {
624 fs->stack_size = assembler->temp_count + assembler->temp_offset + 1;
625 }
626
627 static void r500_fs_finalize(struct r500_fragment_shader* fs,
628 struct r300_fs_asm* assembler)
629 {
630 /* XXX should this just go with OPCODE_END? */
631 fs->instructions[fs->instruction_count - 1].inst0 |=
632 R500_INST_LAST;
633 }
634
635 void r300_translate_fragment_shader(struct r300_context* r300,
636 struct r3xx_fragment_shader* fs)
637 {
638 struct tgsi_parse_context parser;
639 int i;
640 boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
641 struct r300_constant_buffer* consts =
642 &r300->shader_constants[PIPE_SHADER_FRAGMENT];
643
644 struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm);
645 if (assembler == NULL) {
646 return;
647 }
648 /* Setup starting offset for immediates. */
649 assembler->imm_offset = consts->user_count;
650 /* Enable depth writes, if needed. */
651 assembler->writes_depth = fs->info.writes_z;
652
653 /* Make sure we start at the beginning of the shader. */
654 if (is_r500) {
655 ((struct r500_fragment_shader*)fs)->instruction_count = 0;
656 }
657
658 tgsi_parse_init(&parser, fs->state.tokens);
659
660 while (!tgsi_parse_end_of_tokens(&parser)) {
661 tgsi_parse_token(&parser);
662
663 /* This is seriously the lamest way to create fragment programs ever.
664 * I blame TGSI. */
665 switch (parser.FullToken.Token.Type) {
666 case TGSI_TOKEN_TYPE_DECLARATION:
667 /* Allocated registers sitting at the beginning
668 * of the program. */
669 r300_fs_declare(assembler, &parser.FullToken.FullDeclaration);
670 break;
671 case TGSI_TOKEN_TYPE_IMMEDIATE:
672 debug_printf("r300: Emitting immediate to constant buffer, "
673 "position %d\n",
674 assembler->imm_offset + assembler->imm_count);
675 /* I am not amused by the length of these. */
676 for (i = 0; i < 4; i++) {
677 consts->constants[assembler->imm_offset +
678 assembler->imm_count][i] =
679 parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
680 .Float;
681 }
682 assembler->imm_count++;
683 break;
684 case TGSI_TOKEN_TYPE_INSTRUCTION:
685 if (is_r500) {
686 r500_fs_instruction((struct r500_fragment_shader*)fs,
687 assembler, &parser.FullToken.FullInstruction);
688 } else {
689 r300_fs_instruction((struct r300_fragment_shader*)fs,
690 assembler, &parser.FullToken.FullInstruction);
691 }
692 break;
693 }
694 }
695
696 debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n",
697 assembler->tex_count, assembler->color_count,
698 assembler->tex_count + assembler->color_count);
699
700 consts->count = consts->user_count + assembler->imm_count;
701 fs->uses_imms = assembler->imm_count;
702 debug_printf("r300: fs: %d total constants, "
703 "%d from user and %d from immediates\n", consts->count,
704 consts->user_count, assembler->imm_count);
705 r300_fs_finalize(fs, assembler);
706 if (is_r500) {
707 r500_fs_finalize((struct r500_fragment_shader*)fs, assembler);
708 }
709
710 tgsi_dump(fs->state.tokens);
711 /* XXX finish r300 dumper too */
712 if (is_r500) {
713 r500_fs_dump((struct r500_fragment_shader*)fs);
714 }
715
716 tgsi_parse_free(&parser);
717 FREE(assembler);
718 }