r300-gallium: Cleanup a few things.
[mesa.git] / src / gallium / drivers / r300 / r300_state_shader.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_state_shader.h"
24
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs)
26 {
27 struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader;
28 fs->shader.stack_size = pt->shader.stack_size;
29 fs->alu_instruction_count = pt->alu_instruction_count;
30 fs->tex_instruction_count = pt->tex_instruction_count;
31 fs->indirections = pt->indirections;
32 fs->instructions[0] = pt->instructions[0];
33 }
34
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs)
36 {
37 struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader;
38 fs->shader.stack_size = pt->shader.stack_size;
39 fs->instruction_count = pt->instruction_count;
40 fs->instructions[0] = pt->instructions[0];
41 }
42
43 static void r300_fs_declare(struct r300_fs_asm* assembler,
44 struct tgsi_full_declaration* decl)
45 {
46 switch (decl->Declaration.File) {
47 case TGSI_FILE_INPUT:
48 switch (decl->Semantic.SemanticName) {
49 case TGSI_SEMANTIC_COLOR:
50 assembler->color_count++;
51 break;
52 case TGSI_SEMANTIC_GENERIC:
53 assembler->tex_count++;
54 break;
55 default:
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl->Semantic.SemanticName);
58 break;
59 }
60 break;
61 case TGSI_FILE_OUTPUT:
62 case TGSI_FILE_CONSTANT:
63 break;
64 case TGSI_FILE_TEMPORARY:
65 assembler->temp_count++;
66 break;
67 default:
68 debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File);
69 break;
70 }
71
72 assembler->temp_offset = assembler->color_count + assembler->tex_count;
73 }
74
75 static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler,
76 struct tgsi_src_register* src)
77 {
78 switch (src->File) {
79 case TGSI_FILE_INPUT:
80 /* XXX may be wrong */
81 return src->Index;
82 break;
83 case TGSI_FILE_TEMPORARY:
84 return src->Index + assembler->temp_offset;
85 break;
86 case TGSI_FILE_IMMEDIATE:
87 return (src->Index + assembler->imm_offset) | (1 << 8);
88 break;
89 case TGSI_FILE_CONSTANT:
90 /* XXX magic */
91 return src->Index | (1 << 8);
92 break;
93 default:
94 debug_printf("r300: fs: Unimplemented src %d\n", src->File);
95 break;
96 }
97 return 0;
98 }
99
100 static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
101 struct tgsi_dst_register* dst)
102 {
103 switch (dst->File) {
104 case TGSI_FILE_OUTPUT:
105 return 0;
106 break;
107 case TGSI_FILE_TEMPORARY:
108 return dst->Index + assembler->temp_offset;
109 break;
110 default:
111 debug_printf("r300: fs: Unimplemented dst %d\n", dst->File);
112 break;
113 }
114 return 0;
115 }
116
117 static INLINE unsigned r500_fix_swiz(unsigned s)
118 {
119 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
120 * equivalent to the actual machine code, but 1 is not. Thus, we just
121 * adjust it a bit... */
122 if (s == TGSI_EXTSWIZZLE_ONE) {
123 return R500_SWIZZLE_ONE;
124 } else {
125 return s;
126 }
127 }
128
129 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg)
130 {
131 if (reg->SrcRegister.Extended) {
132 return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) |
133 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) |
134 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) |
135 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9);
136 } else {
137 return reg->SrcRegister.SwizzleX |
138 (reg->SrcRegister.SwizzleY << 3) |
139 (reg->SrcRegister.SwizzleZ << 6) |
140 (reg->SrcRegister.SwizzleW << 9);
141 }
142 }
143
144 static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg)
145 {
146 return reg->SrcRegister.SwizzleX |
147 (reg->SrcRegister.SwizzleY << 2) |
148 (reg->SrcRegister.SwizzleZ << 4) |
149 (reg->SrcRegister.SwizzleW << 6);
150 }
151
152 static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg)
153 {
154 /* Only the first 9 bits... */
155 return r500_rgba_swiz(reg) & 0x1ff;
156 }
157
158 static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg)
159 {
160 /* Only the last 3 bits... */
161 return r500_rgba_swiz(reg) >> 9;
162 }
163
164 static INLINE uint32_t r500_sop_swiz(struct tgsi_full_src_register* reg)
165 {
166 /* Only the first 3 bits... */
167 return r500_rgba_swiz(reg) & 0x7;
168 }
169
170 static INLINE uint32_t r500_rgba_op(unsigned op)
171 {
172 switch (op) {
173 case TGSI_OPCODE_EX2:
174 case TGSI_OPCODE_LG2:
175 case TGSI_OPCODE_RCP:
176 case TGSI_OPCODE_RSQ:
177 return R500_ALU_RGBA_OP_SOP;
178 case TGSI_OPCODE_DP3:
179 return R500_ALU_RGBA_OP_DP3;
180 case TGSI_OPCODE_DP4:
181 case TGSI_OPCODE_DPH:
182 return R500_ALU_RGBA_OP_DP4;
183 case TGSI_OPCODE_MAD:
184 return R500_ALU_RGBA_OP_MAD;
185 default:
186 return 0;
187 }
188 }
189
190 static INLINE uint32_t r500_alpha_op(unsigned op)
191 {
192 switch (op) {
193 case TGSI_OPCODE_EX2:
194 return R500_ALPHA_OP_EX2;
195 case TGSI_OPCODE_LG2:
196 return R500_ALPHA_OP_LN2;
197 case TGSI_OPCODE_RCP:
198 return R500_ALPHA_OP_RCP;
199 case TGSI_OPCODE_RSQ:
200 return R500_ALPHA_OP_RSQ;
201 case TGSI_OPCODE_DP3:
202 case TGSI_OPCODE_DP4:
203 case TGSI_OPCODE_DPH:
204 return R500_ALPHA_OP_DP;
205 case TGSI_OPCODE_MAD:
206 return R500_ALPHA_OP_MAD;
207 default:
208 return 0;
209 }
210 }
211
212 /* Setup an ALU operation. */
213 static INLINE void r500_emit_alu(struct r500_fragment_shader* fs,
214 struct r300_fs_asm* assembler,
215 struct tgsi_full_dst_register* dst)
216 {
217 int i = fs->instruction_count;
218
219 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
220 fs->instructions[i].inst0 = R500_INST_TYPE_OUT |
221 R500_ALU_OMASK(dst->DstRegister.WriteMask);
222 } else {
223 fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
224 R500_ALU_WMASK(dst->DstRegister.WriteMask);
225 }
226
227 fs->instructions[i].inst0 |=
228 R500_INST_TEX_SEM_WAIT |
229 R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
230
231 fs->instructions[i].inst4 =
232 R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
233 fs->instructions[i].inst5 =
234 R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
235 }
236
237 static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
238 struct r300_fs_asm* assembler,
239 struct tgsi_full_src_register* src,
240 struct tgsi_full_dst_register* dst,
241 unsigned op,
242 unsigned count,
243 boolean is_sop)
244 {
245 int i = fs->instruction_count;
246
247 r500_emit_alu(fs, assembler, dst);
248
249 switch (count) {
250 case 3:
251 fs->instructions[i].inst1 =
252 R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
253 fs->instructions[i].inst2 =
254 R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
255 fs->instructions[i].inst5 |=
256 R500_ALU_RGBA_ALPHA_SEL_C_SRC2 |
257 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src[2])) |
258 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src[2]));
259 case 2:
260 fs->instructions[i].inst1 |=
261 R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
262 fs->instructions[i].inst2 |=
263 R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
264 fs->instructions[i].inst3 =
265 R500_ALU_RGB_SEL_B_SRC1 |
266 R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1]));
267 fs->instructions[i].inst4 |=
268 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) |
269 R500_ALPHA_SEL_B_SRC1;
270 case 1:
271 case 0:
272 default:
273 fs->instructions[i].inst1 |=
274 R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
275 fs->instructions[i].inst2 |=
276 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
277 fs->instructions[i].inst3 |=
278 R500_ALU_RGB_SEL_A_SRC0 |
279 R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0]));
280 fs->instructions[i].inst4 |=
281 R500_SWIZ_ALPHA_A(is_sop ? r500_sop_swiz(&src[0]) :
282 r500_alpha_swiz(&src[0])) |
283 R500_ALPHA_SEL_A_SRC0;
284 break;
285 }
286
287 fs->instructions[i].inst4 |= r500_alpha_op(op);
288 fs->instructions[i].inst5 |= r500_rgba_op(op);
289
290 fs->instruction_count++;
291 }
292
293 static INLINE void r500_emit_mov(struct r500_fragment_shader* fs,
294 struct r300_fs_asm* assembler,
295 struct tgsi_full_src_register* src,
296 struct tgsi_full_dst_register* dst)
297 {
298 int i = fs->instruction_count;
299
300 r500_emit_alu(fs, assembler, dst);
301
302 fs->instructions[i].inst1 =
303 R500_RGB_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
304 fs->instructions[i].inst2 =
305 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
306 fs->instructions[i].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
307 R500_SWIZ_RGB_A(r500_rgb_swiz(src)) |
308 R500_ALU_RGB_SEL_B_SRC0 |
309 R500_SWIZ_RGB_B(r500_rgb_swiz(src));
310 fs->instructions[i].inst4 |= R500_ALPHA_OP_CMP |
311 R500_SWIZ_ALPHA_A(r500_alpha_swiz(src)) |
312 R500_SWIZ_ALPHA_B(r500_alpha_swiz(src));
313 fs->instructions[i].inst5 |=
314 R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
315 R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
316 R500_ALU_RGBA_A_SWIZ_0;
317
318 fs->instruction_count++;
319 }
320
321 static INLINE void r500_emit_tex(struct r500_fragment_shader* fs,
322 struct r300_fs_asm* assembler,
323 uint32_t op,
324 struct tgsi_full_src_register* src,
325 struct tgsi_full_dst_register* dst)
326 {
327 int i = fs->instruction_count;
328
329 fs->instructions[i].inst0 = R500_INST_TYPE_TEX |
330 R500_TEX_WMASK(dst->DstRegister.WriteMask) |
331 R500_INST_TEX_SEM_WAIT;
332 fs->instructions[i].inst1 = R500_TEX_ID(0) |
333 R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED |
334 R500_TEX_INST_PROJ;
335 fs->instructions[i].inst2 =
336 R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) |
337 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) |
338 R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) |
339 R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
340 R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
341
342 fs->instruction_count++;
343 }
344
345 static void r500_fs_instruction(struct r500_fragment_shader* fs,
346 struct r300_fs_asm* assembler,
347 struct tgsi_full_instruction* inst)
348 {
349 int i;
350 /* Switch between opcodes. When possible, prefer using the official
351 * AMD/ATI names for opcodes, please, as it facilitates using the
352 * documentation. */
353 switch (inst->Instruction.Opcode) {
354 case TGSI_OPCODE_EX2:
355 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
356 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1,
357 true);
358 break;
359 case TGSI_OPCODE_DP3:
360 case TGSI_OPCODE_DP4:
361 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
362 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2,
363 false);
364 break;
365 case TGSI_OPCODE_DPH:
366 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
367 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2,
368 false);
369 /* Force alpha swizzle to one */
370 i = fs->instruction_count - 1;
371 fs->instructions[i].inst4 &= ~R500_SWIZ_ALPHA_A(0x7);
372 fs->instructions[i].inst4 |= R500_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
373 break;
374 case TGSI_OPCODE_MUL:
375 /* Force our src2 to zero */
376 inst->FullSrcRegisters[2] = r500_constant_zero;
377 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
378 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3,
379 false);
380 break;
381 case TGSI_OPCODE_MAD:
382 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
383 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3,
384 false);
385 break;
386 case TGSI_OPCODE_MOV:
387 case TGSI_OPCODE_SWZ:
388 r500_emit_mov(fs, assembler, &inst->FullSrcRegisters[0],
389 &inst->FullDstRegisters[0]);
390 break;
391 case TGSI_OPCODE_TXP:
392 r500_emit_tex(fs, assembler, 0, &inst->FullSrcRegisters[0],
393 &inst->FullDstRegisters[0]);
394 break;
395 case TGSI_OPCODE_END:
396 break;
397 default:
398 debug_printf("r300: fs: Bad opcode %d\n",
399 inst->Instruction.Opcode);
400 break;
401 }
402 }
403
404 static void r500_fs_finalize(struct r500_fragment_shader* fs,
405 struct r300_fs_asm* assembler)
406 {
407 /* XXX subtly wrong */
408 fs->shader.stack_size = assembler->temp_offset;
409
410 /* XXX should this just go with OPCODE_END? */
411 fs->instructions[fs->instruction_count - 1].inst0 |=
412 R500_INST_LAST;
413 }
414
415 void r300_translate_fragment_shader(struct r300_context* r300,
416 struct r300_fragment_shader* fs)
417 {
418 struct tgsi_parse_context parser;
419
420 tgsi_parse_init(&parser, fs->shader.state.tokens);
421
422 while (!tgsi_parse_end_of_tokens(&parser)) {
423 tgsi_parse_token(&parser);
424 }
425
426 r300_copy_passthrough_shader(fs);
427 }
428
429 void r500_translate_fragment_shader(struct r300_context* r300,
430 struct r500_fragment_shader* fs)
431 {
432 struct tgsi_parse_context parser;
433 int i;
434 struct r300_constant_buffer* consts =
435 &r300->shader_constants[PIPE_SHADER_FRAGMENT];
436
437 struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm);
438 if (assembler == NULL) {
439 return;
440 }
441 /* Setup starting offset for immediates. */
442 assembler->imm_offset = consts->user_count;
443
444 tgsi_parse_init(&parser, fs->shader.state.tokens);
445
446 while (!tgsi_parse_end_of_tokens(&parser)) {
447 tgsi_parse_token(&parser);
448
449 /* This is seriously the lamest way to create fragment programs ever.
450 * I blame TGSI. */
451 switch (parser.FullToken.Token.Type) {
452 case TGSI_TOKEN_TYPE_DECLARATION:
453 /* Allocated registers sitting at the beginning
454 * of the program. */
455 r300_fs_declare(assembler, &parser.FullToken.FullDeclaration);
456 break;
457 case TGSI_TOKEN_TYPE_IMMEDIATE:
458 debug_printf("r300: Emitting immediate to constant buffer, "
459 "position %d\n", consts->user_count);
460 /* I am not amused by the length of these. */
461 for (i = 0; i < 4; i++) {
462 consts->constants[assembler->imm_offset][i] =
463 parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
464 .Float;
465 }
466 assembler->imm_count++;
467 break;
468 case TGSI_TOKEN_TYPE_INSTRUCTION:
469 r500_fs_instruction(fs, assembler,
470 &parser.FullToken.FullInstruction);
471 break;
472 }
473
474 }
475
476 debug_printf("r300: %d texs and %d colors, first free reg is %d\n",
477 assembler->tex_count, assembler->color_count,
478 assembler->tex_count + assembler->color_count);
479
480 consts->count = consts->user_count + assembler->imm_count;
481 debug_printf("r300: %d total constants, "
482 "%d from user and %d from immediates\n", consts->count,
483 consts->user_count, assembler->imm_count);
484 r500_fs_finalize(fs, assembler);
485
486 tgsi_dump(fs->shader.state.tokens);
487 r500_fs_dump(fs);
488
489 tgsi_parse_free(&parser);
490 FREE(assembler);
491 }