9f4024c4bab03e738b9a8bc4ca679a8bb5cc1a56
[mesa.git] / src / gallium / drivers / r300 / r300_state_shader.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_state_shader.h"
24
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs)
26 {
27 struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader;
28 fs->shader.stack_size = pt->shader.stack_size;
29 fs->alu_instruction_count = pt->alu_instruction_count;
30 fs->tex_instruction_count = pt->tex_instruction_count;
31 fs->indirections = pt->indirections;
32 fs->instructions[0] = pt->instructions[0];
33 }
34
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs)
36 {
37 struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader;
38 fs->shader.stack_size = pt->shader.stack_size;
39 fs->instruction_count = pt->instruction_count;
40 fs->instructions[0] = pt->instructions[0];
41 }
42
43 static void r300_fs_declare(struct r300_fs_asm* assembler,
44 struct tgsi_full_declaration* decl)
45 {
46 switch (decl->Declaration.File) {
47 case TGSI_FILE_INPUT:
48 switch (decl->Semantic.SemanticName) {
49 case TGSI_SEMANTIC_COLOR:
50 assembler->color_count++;
51 break;
52 case TGSI_SEMANTIC_GENERIC:
53 assembler->tex_count++;
54 break;
55 default:
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl->Semantic.SemanticName);
58 break;
59 }
60 break;
61 case TGSI_FILE_OUTPUT:
62 case TGSI_FILE_CONSTANT:
63 break;
64 case TGSI_FILE_TEMPORARY:
65 assembler->temp_count++;
66 break;
67 default:
68 debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File);
69 break;
70 }
71
72 assembler->temp_offset = assembler->color_count + assembler->tex_count;
73 }
74
75 static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler,
76 struct tgsi_src_register* src)
77 {
78 switch (src->File) {
79 case TGSI_FILE_INPUT:
80 /* XXX may be wrong */
81 return src->Index;
82 break;
83 case TGSI_FILE_TEMPORARY:
84 return src->Index + assembler->temp_offset;
85 break;
86 case TGSI_FILE_IMMEDIATE:
87 return src->Index + assembler->imm_offset | (1 << 8);
88 break;
89 case TGSI_FILE_CONSTANT:
90 /* XXX magic */
91 return src->Index | (1 << 8);
92 break;
93 default:
94 debug_printf("r300: fs: Unimplemented src %d\n", src->File);
95 break;
96 }
97 return 0;
98 }
99
100 static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
101 struct tgsi_dst_register* dst)
102 {
103 switch (dst->File) {
104 case TGSI_FILE_OUTPUT:
105 return 0;
106 break;
107 case TGSI_FILE_TEMPORARY:
108 return dst->Index + assembler->temp_offset;
109 break;
110 default:
111 debug_printf("r300: fs: Unimplemented dst %d\n", dst->File);
112 break;
113 }
114 return 0;
115 }
116
117 static INLINE unsigned r500_fix_swiz(unsigned s)
118 {
119 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
120 * equivalent to the actual machine code, but 1 is not. Thus, we just
121 * adjust it a bit... */
122 if (s == TGSI_EXTSWIZZLE_ONE) {
123 return R500_SWIZZLE_ONE;
124 } else {
125 return s;
126 }
127 }
128
129 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg)
130 {
131 if (reg->SrcRegister.Extended) {
132 return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) |
133 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) |
134 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) |
135 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9);
136 } else {
137 return reg->SrcRegister.SwizzleX |
138 (reg->SrcRegister.SwizzleY << 3) |
139 (reg->SrcRegister.SwizzleZ << 6) |
140 (reg->SrcRegister.SwizzleW << 9);
141 }
142 }
143
144 static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg)
145 {
146 return reg->SrcRegister.SwizzleX |
147 (reg->SrcRegister.SwizzleY << 2) |
148 (reg->SrcRegister.SwizzleZ << 4) |
149 (reg->SrcRegister.SwizzleW << 6);
150 }
151
152 static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg)
153 {
154 /* Only the first 9 bits... */
155 return r500_rgba_swiz(reg) & 0x1ff;
156 }
157
158 static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg)
159 {
160 /* Only the last 3 bits... */
161 return r500_rgba_swiz(reg) >> 9;
162 }
163
164 static INLINE uint32_t r500_sop_swiz(struct tgsi_full_src_register* reg)
165 {
166 /* Only the first 3 bits... */
167 return r500_rgba_swiz(reg) & 0x7;
168 }
169
170 static INLINE uint32_t r500_rgba_op(unsigned op)
171 {
172 switch (op) {
173 case TGSI_OPCODE_EX2:
174 case TGSI_OPCODE_LG2:
175 case TGSI_OPCODE_RCP:
176 case TGSI_OPCODE_RSQ:
177 return R500_ALU_RGBA_OP_SOP;
178 case TGSI_OPCODE_DP3:
179 return R500_ALU_RGBA_OP_DP3;
180 case TGSI_OPCODE_DP4:
181 case TGSI_OPCODE_DPH:
182 return R500_ALU_RGBA_OP_DP4;
183 case TGSI_OPCODE_MAD:
184 return R500_ALU_RGBA_OP_MAD;
185 default:
186 return 0;
187 }
188 }
189
190 static INLINE uint32_t r500_alpha_op(unsigned op)
191 {
192 switch (op) {
193 case TGSI_OPCODE_EX2:
194 return R500_ALPHA_OP_EX2;
195 case TGSI_OPCODE_LG2:
196 return R500_ALPHA_OP_LN2;
197 case TGSI_OPCODE_RCP:
198 return R500_ALPHA_OP_RCP;
199 case TGSI_OPCODE_RSQ:
200 return R500_ALPHA_OP_RSQ;
201 case TGSI_OPCODE_DP3:
202 case TGSI_OPCODE_DP4:
203 case TGSI_OPCODE_DPH:
204 return R500_ALPHA_OP_DP;
205 case TGSI_OPCODE_MAD:
206 return R500_ALPHA_OP_MAD;
207 default:
208 return 0;
209 }
210 }
211
212 /* Setup an ALU operation. */
213 static INLINE void r500_emit_alu(struct r500_fragment_shader* fs,
214 struct r300_fs_asm* assembler,
215 struct tgsi_full_dst_register* dst)
216 {
217 int i = fs->instruction_count;
218
219 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
220 fs->instructions[i].inst0 = R500_INST_TYPE_OUT |
221 R500_ALU_OMASK(dst->DstRegister.WriteMask);
222 } else {
223 fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
224 R500_ALU_WMASK(dst->DstRegister.WriteMask);
225 }
226
227 fs->instructions[i].inst0 |=
228 R500_INST_TEX_SEM_WAIT |
229 R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
230 }
231
232 static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
233 struct r300_fs_asm* assembler,
234 struct tgsi_full_src_register* src,
235 struct tgsi_full_dst_register* dst,
236 unsigned op,
237 unsigned count,
238 boolean is_sop)
239 {
240 int i = fs->instruction_count;
241
242 r500_emit_alu(fs, assembler, dst);
243
244 switch (count) {
245 case 3:
246 fs->instructions[i].inst1 =
247 R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
248 fs->instructions[i].inst2 =
249 R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
250 fs->instructions[i].inst5 =
251 R500_ALU_RGBA_ALPHA_SEL_C_SRC2 |
252 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src[2])) |
253 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src[2]));
254 case 2:
255 fs->instructions[i].inst1 |=
256 R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
257 fs->instructions[i].inst2 |=
258 R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
259 fs->instructions[i].inst3 =
260 R500_ALU_RGB_SEL_B_SRC1 |
261 R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1]));
262 fs->instructions[i].inst4 =
263 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) |
264 R500_ALPHA_SEL_B_SRC1;
265 case 1:
266 case 0:
267 default:
268 fs->instructions[i].inst1 |=
269 R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
270 fs->instructions[i].inst2 |=
271 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
272 fs->instructions[i].inst3 |=
273 R500_ALU_RGB_SEL_A_SRC0 |
274 R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0]));
275 fs->instructions[i].inst4 |=
276 R500_SWIZ_ALPHA_A(is_sop ? r500_sop_swiz(&src[0]) :
277 r500_alpha_swiz(&src[0])) |
278 R500_ALPHA_SEL_A_SRC0;
279 break;
280 }
281
282 fs->instructions[i].inst4 |= r500_alpha_op(op);
283 fs->instructions[i].inst5 |= r500_rgba_op(op);
284
285 fs->instruction_count++;
286 }
287
288 static INLINE void r500_emit_mov(struct r500_fragment_shader* fs,
289 struct r300_fs_asm* assembler,
290 struct tgsi_full_src_register* src,
291 struct tgsi_full_dst_register* dst)
292 {
293 int i = fs->instruction_count;
294
295 r500_emit_alu(fs, assembler, dst);
296
297 fs->instructions[i].inst1 =
298 R500_RGB_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
299 fs->instructions[i].inst2 =
300 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
301 fs->instructions[i].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
302 R500_SWIZ_RGB_A(r500_rgb_swiz(src)) |
303 R500_ALU_RGB_SEL_B_SRC0 |
304 R500_SWIZ_RGB_B(r500_rgb_swiz(src));
305 fs->instructions[i].inst4 = R500_ALPHA_OP_CMP |
306 R500_SWIZ_ALPHA_A(r500_alpha_swiz(src)) |
307 R500_SWIZ_ALPHA_B(r500_alpha_swiz(src));
308 fs->instructions[i].inst5 =
309 R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
310 R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
311 R500_ALU_RGBA_A_SWIZ_0;
312
313 fs->instruction_count++;
314 }
315
316 static INLINE void r500_emit_tex(struct r500_fragment_shader* fs,
317 struct r300_fs_asm* assembler,
318 uint32_t op,
319 struct tgsi_full_src_register* src,
320 struct tgsi_full_dst_register* dst)
321 {
322 int i = fs->instruction_count;
323
324 fs->instructions[i].inst0 = R500_INST_TYPE_TEX |
325 R500_TEX_WMASK(dst->DstRegister.WriteMask) |
326 R500_INST_TEX_SEM_WAIT;
327 fs->instructions[i].inst1 = R500_TEX_ID(0) |
328 R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED |
329 R500_TEX_INST_PROJ;
330 fs->instructions[i].inst2 =
331 R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) |
332 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) |
333 R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) |
334 R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
335 R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
336
337 fs->instruction_count++;
338 }
339
340 static void r500_fs_instruction(struct r500_fragment_shader* fs,
341 struct r300_fs_asm* assembler,
342 struct tgsi_full_instruction* inst)
343 {
344 int i;
345 /* Switch between opcodes. When possible, prefer using the official
346 * AMD/ATI names for opcodes, please, as it facilitates using the
347 * documentation. */
348 switch (inst->Instruction.Opcode) {
349 case TGSI_OPCODE_EX2:
350 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
351 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1,
352 true);
353 break;
354 case TGSI_OPCODE_DP3:
355 case TGSI_OPCODE_DP4:
356 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
357 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2,
358 false);
359 break;
360 case TGSI_OPCODE_DPH:
361 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
362 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2,
363 false);
364 /* Force alpha swizzle to one */
365 i = fs->instruction_count - 1;
366 fs->instructions[i].inst4 &= ~R500_SWIZ_ALPHA_A(0x7);
367 fs->instructions[i].inst4 |= R500_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
368 break;
369 case TGSI_OPCODE_MUL:
370 /* Force our src2 to zero */
371 inst->FullSrcRegisters[2] = r500_constant_zero;
372 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
373 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3,
374 false);
375 break;
376 case TGSI_OPCODE_MAD:
377 r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
378 &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3,
379 false);
380 break;
381 case TGSI_OPCODE_MOV:
382 case TGSI_OPCODE_SWZ:
383 r500_emit_mov(fs, assembler, &inst->FullSrcRegisters[0],
384 &inst->FullDstRegisters[0]);
385 break;
386 case TGSI_OPCODE_TXP:
387 r500_emit_tex(fs, assembler, 0, &inst->FullSrcRegisters[0],
388 &inst->FullDstRegisters[0]);
389 break;
390 case TGSI_OPCODE_END:
391 break;
392 default:
393 debug_printf("r300: fs: Bad opcode %d\n",
394 inst->Instruction.Opcode);
395 break;
396 }
397 }
398
399 static void r500_fs_finalize(struct r500_fragment_shader* fs,
400 struct r300_fs_asm* assembler)
401 {
402 /* XXX subtly wrong */
403 fs->shader.stack_size = assembler->temp_offset;
404
405 /* XXX should this just go with OPCODE_END? */
406 fs->instructions[fs->instruction_count - 1].inst0 |=
407 R500_INST_LAST;
408 }
409
410 void r300_translate_fragment_shader(struct r300_context* r300,
411 struct r300_fragment_shader* fs)
412 {
413 struct tgsi_parse_context parser;
414
415 tgsi_parse_init(&parser, fs->shader.state.tokens);
416
417 while (!tgsi_parse_end_of_tokens(&parser)) {
418 tgsi_parse_token(&parser);
419 }
420
421 r300_copy_passthrough_shader(fs);
422 }
423
424 void r500_translate_fragment_shader(struct r300_context* r300,
425 struct r500_fragment_shader* fs)
426 {
427 struct tgsi_parse_context parser;
428 int i;
429 struct r300_constant_buffer* consts =
430 &r300->shader_constants[PIPE_SHADER_FRAGMENT];
431
432 struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm);
433 if (assembler == NULL) {
434 return;
435 }
436 /* Setup starting offset for immediates. */
437 assembler->imm_offset = consts->user_count;
438
439 tgsi_parse_init(&parser, fs->shader.state.tokens);
440
441 while (!tgsi_parse_end_of_tokens(&parser)) {
442 tgsi_parse_token(&parser);
443
444 /* This is seriously the lamest way to create fragment programs ever.
445 * I blame TGSI. */
446 switch (parser.FullToken.Token.Type) {
447 case TGSI_TOKEN_TYPE_DECLARATION:
448 /* Allocated registers sitting at the beginning
449 * of the program. */
450 r300_fs_declare(assembler, &parser.FullToken.FullDeclaration);
451 break;
452 case TGSI_TOKEN_TYPE_IMMEDIATE:
453 debug_printf("r300: Emitting immediate to constant buffer, "
454 "position %d\n", consts->user_count);
455 /* I am not amused by the length of these. */
456 for (i = 0; i < 4; i++) {
457 consts->constants[assembler->imm_offset][i] =
458 parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
459 .Float;
460 }
461 assembler->imm_count++;
462 break;
463 case TGSI_TOKEN_TYPE_INSTRUCTION:
464 r500_fs_instruction(fs, assembler,
465 &parser.FullToken.FullInstruction);
466 break;
467 }
468
469 }
470
471 debug_printf("r300: %d texs and %d colors, first free reg is %d\n",
472 assembler->tex_count, assembler->color_count,
473 assembler->tex_count + assembler->color_count);
474
475 consts->count = consts->user_count + assembler->imm_count;
476 debug_printf("r300: %d total constants, "
477 "%d from user and %d from immediates\n", consts->count,
478 consts->user_count, assembler->imm_count);
479 r500_fs_finalize(fs, assembler);
480
481 tgsi_dump(fs->shader.state.tokens);
482 r500_fs_dump(fs);
483
484 //r500_copy_passthrough_shader(fs);
485
486 tgsi_parse_free(&parser);
487 FREE(assembler);
488 }