2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "r300_state_shader.h"
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader
* fs
)
27 struct r300_fragment_shader
* pt
= &r300_passthrough_fragment_shader
;
28 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
29 fs
->alu_instruction_count
= pt
->alu_instruction_count
;
30 fs
->tex_instruction_count
= pt
->tex_instruction_count
;
31 fs
->indirections
= pt
->indirections
;
32 fs
->instructions
[0] = pt
->instructions
[0];
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader
* fs
)
37 struct r500_fragment_shader
* pt
= &r500_passthrough_fragment_shader
;
38 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
39 fs
->instruction_count
= pt
->instruction_count
;
40 fs
->instructions
[0] = pt
->instructions
[0];
43 static void r300_fs_declare(struct r300_fs_asm
* assembler
,
44 struct tgsi_full_declaration
* decl
)
46 switch (decl
->Declaration
.File
) {
48 switch (decl
->Semantic
.SemanticName
) {
49 case TGSI_SEMANTIC_COLOR
:
50 assembler
->color_count
++;
52 case TGSI_SEMANTIC_GENERIC
:
53 assembler
->tex_count
++;
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl
->Semantic
.SemanticName
);
61 case TGSI_FILE_OUTPUT
:
62 case TGSI_FILE_CONSTANT
:
64 case TGSI_FILE_TEMPORARY
:
65 assembler
->temp_count
++;
68 debug_printf("r300: fs: Bad file %d\n", decl
->Declaration
.File
);
72 assembler
->temp_offset
= assembler
->color_count
+ assembler
->tex_count
;
75 static INLINE
unsigned r300_fs_src(struct r300_fs_asm
* assembler
,
76 struct tgsi_src_register
* src
)
80 /* XXX may be wrong */
83 case TGSI_FILE_TEMPORARY
:
84 return src
->Index
+ assembler
->temp_offset
;
86 case TGSI_FILE_IMMEDIATE
:
87 return (src
->Index
+ assembler
->imm_offset
) | (1 << 8);
89 case TGSI_FILE_CONSTANT
:
91 return src
->Index
| (1 << 8);
94 debug_printf("r300: fs: Unimplemented src %d\n", src
->File
);
100 static INLINE
unsigned r300_fs_dst(struct r300_fs_asm
* assembler
,
101 struct tgsi_dst_register
* dst
)
104 case TGSI_FILE_OUTPUT
:
107 case TGSI_FILE_TEMPORARY
:
108 return dst
->Index
+ assembler
->temp_offset
;
111 debug_printf("r300: fs: Unimplemented dst %d\n", dst
->File
);
117 static INLINE
unsigned r500_fix_swiz(unsigned s
)
119 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
120 * equivalent to the actual machine code, but 1 is not. Thus, we just
121 * adjust it a bit... */
122 if (s
== TGSI_EXTSWIZZLE_ONE
) {
123 return R500_SWIZZLE_ONE
;
129 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register
* reg
)
131 if (reg
->SrcRegister
.Extended
) {
132 return r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleX
) |
133 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleY
) << 3) |
134 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleZ
) << 6) |
135 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleW
) << 9);
137 return reg
->SrcRegister
.SwizzleX
|
138 (reg
->SrcRegister
.SwizzleY
<< 3) |
139 (reg
->SrcRegister
.SwizzleZ
<< 6) |
140 (reg
->SrcRegister
.SwizzleW
<< 9);
144 static uint32_t r500_strq_swiz(struct tgsi_full_src_register
* reg
)
146 return reg
->SrcRegister
.SwizzleX
|
147 (reg
->SrcRegister
.SwizzleY
<< 2) |
148 (reg
->SrcRegister
.SwizzleZ
<< 4) |
149 (reg
->SrcRegister
.SwizzleW
<< 6);
152 static INLINE
uint32_t r500_rgb_swiz(struct tgsi_full_src_register
* reg
)
154 /* Only the first 9 bits... */
155 return r500_rgba_swiz(reg
) & 0x1ff;
158 static INLINE
uint32_t r500_alpha_swiz(struct tgsi_full_src_register
* reg
)
160 /* Only the last 3 bits... */
161 return r500_rgba_swiz(reg
) >> 9;
164 static INLINE
uint32_t r500_sop_swiz(struct tgsi_full_src_register
* reg
)
166 /* Only the first 3 bits... */
167 return r500_rgba_swiz(reg
) & 0x7;
170 static INLINE
uint32_t r500_rgba_op(unsigned op
)
173 case TGSI_OPCODE_EX2
:
174 case TGSI_OPCODE_LG2
:
175 case TGSI_OPCODE_RCP
:
176 case TGSI_OPCODE_RSQ
:
177 return R500_ALU_RGBA_OP_SOP
;
178 case TGSI_OPCODE_DP3
:
179 return R500_ALU_RGBA_OP_DP3
;
180 case TGSI_OPCODE_DP4
:
181 case TGSI_OPCODE_DPH
:
182 return R500_ALU_RGBA_OP_DP4
;
183 case TGSI_OPCODE_MAD
:
184 return R500_ALU_RGBA_OP_MAD
;
190 static INLINE
uint32_t r500_alpha_op(unsigned op
)
193 case TGSI_OPCODE_EX2
:
194 return R500_ALPHA_OP_EX2
;
195 case TGSI_OPCODE_LG2
:
196 return R500_ALPHA_OP_LN2
;
197 case TGSI_OPCODE_RCP
:
198 return R500_ALPHA_OP_RCP
;
199 case TGSI_OPCODE_RSQ
:
200 return R500_ALPHA_OP_RSQ
;
201 case TGSI_OPCODE_DP3
:
202 case TGSI_OPCODE_DP4
:
203 case TGSI_OPCODE_DPH
:
204 return R500_ALPHA_OP_DP
;
205 case TGSI_OPCODE_MAD
:
206 return R500_ALPHA_OP_MAD
;
212 /* Setup an ALU operation. */
213 static INLINE
void r500_emit_alu(struct r500_fragment_shader
* fs
,
214 struct r300_fs_asm
* assembler
,
215 struct tgsi_full_dst_register
* dst
)
217 int i
= fs
->instruction_count
;
219 if (dst
->DstRegister
.File
== TGSI_FILE_OUTPUT
) {
220 fs
->instructions
[i
].inst0
= R500_INST_TYPE_OUT
|
221 R500_ALU_OMASK(dst
->DstRegister
.WriteMask
);
223 fs
->instructions
[i
].inst0
= R500_INST_TYPE_ALU
|
224 R500_ALU_WMASK(dst
->DstRegister
.WriteMask
);
227 fs
->instructions
[i
].inst0
|=
228 R500_INST_TEX_SEM_WAIT
|
229 R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
231 fs
->instructions
[i
].inst4
=
232 R500_ALPHA_ADDRD(r300_fs_dst(assembler
, &dst
->DstRegister
));
233 fs
->instructions
[i
].inst5
=
234 R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler
, &dst
->DstRegister
));
237 static INLINE
void r500_emit_maths(struct r500_fragment_shader
* fs
,
238 struct r300_fs_asm
* assembler
,
239 struct tgsi_full_src_register
* src
,
240 struct tgsi_full_dst_register
* dst
,
245 int i
= fs
->instruction_count
;
247 r500_emit_alu(fs
, assembler
, dst
);
251 fs
->instructions
[i
].inst1
=
252 R500_RGB_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
253 fs
->instructions
[i
].inst2
=
254 R500_ALPHA_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
255 fs
->instructions
[i
].inst5
|=
256 R500_ALU_RGBA_ALPHA_SEL_C_SRC2
|
257 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src
[2])) |
258 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src
[2]));
260 fs
->instructions
[i
].inst1
|=
261 R500_RGB_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
262 fs
->instructions
[i
].inst2
|=
263 R500_ALPHA_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
264 fs
->instructions
[i
].inst3
=
265 R500_ALU_RGB_SEL_B_SRC1
|
266 R500_SWIZ_RGB_B(r500_rgb_swiz(&src
[1]));
267 fs
->instructions
[i
].inst4
|=
268 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src
[1])) |
269 R500_ALPHA_SEL_B_SRC1
;
273 fs
->instructions
[i
].inst1
|=
274 R500_RGB_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
275 fs
->instructions
[i
].inst2
|=
276 R500_ALPHA_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
277 fs
->instructions
[i
].inst3
|=
278 R500_ALU_RGB_SEL_A_SRC0
|
279 R500_SWIZ_RGB_A(r500_rgb_swiz(&src
[0]));
280 fs
->instructions
[i
].inst4
|=
281 R500_SWIZ_ALPHA_A(is_sop
? r500_sop_swiz(&src
[0]) :
282 r500_alpha_swiz(&src
[0])) |
283 R500_ALPHA_SEL_A_SRC0
;
287 fs
->instructions
[i
].inst4
|= r500_alpha_op(op
);
288 fs
->instructions
[i
].inst5
|= r500_rgba_op(op
);
290 fs
->instruction_count
++;
293 static INLINE
void r500_emit_mov(struct r500_fragment_shader
* fs
,
294 struct r300_fs_asm
* assembler
,
295 struct tgsi_full_src_register
* src
,
296 struct tgsi_full_dst_register
* dst
)
298 int i
= fs
->instruction_count
;
300 r500_emit_alu(fs
, assembler
, dst
);
302 fs
->instructions
[i
].inst1
=
303 R500_RGB_ADDR0(r300_fs_src(assembler
, &src
->SrcRegister
));
304 fs
->instructions
[i
].inst2
=
305 R500_ALPHA_ADDR0(r300_fs_src(assembler
, &src
->SrcRegister
));
306 fs
->instructions
[i
].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
307 R500_SWIZ_RGB_A(r500_rgb_swiz(src
)) |
308 R500_ALU_RGB_SEL_B_SRC0
|
309 R500_SWIZ_RGB_B(r500_rgb_swiz(src
));
310 fs
->instructions
[i
].inst4
|= R500_ALPHA_OP_CMP
|
311 R500_SWIZ_ALPHA_A(r500_alpha_swiz(src
)) |
312 R500_SWIZ_ALPHA_B(r500_alpha_swiz(src
));
313 fs
->instructions
[i
].inst5
|=
314 R500_ALU_RGBA_OP_CMP
| R500_ALU_RGBA_R_SWIZ_0
|
315 R500_ALU_RGBA_G_SWIZ_0
| R500_ALU_RGBA_B_SWIZ_0
|
316 R500_ALU_RGBA_A_SWIZ_0
;
318 fs
->instruction_count
++;
321 static INLINE
void r500_emit_tex(struct r500_fragment_shader
* fs
,
322 struct r300_fs_asm
* assembler
,
324 struct tgsi_full_src_register
* src
,
325 struct tgsi_full_dst_register
* dst
)
327 int i
= fs
->instruction_count
;
329 fs
->instructions
[i
].inst0
= R500_INST_TYPE_TEX
|
330 R500_TEX_WMASK(dst
->DstRegister
.WriteMask
) |
331 R500_INST_TEX_SEM_WAIT
;
332 fs
->instructions
[i
].inst1
= R500_TEX_ID(0) |
333 R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
|
335 fs
->instructions
[i
].inst2
=
336 R500_TEX_SRC_ADDR(r300_fs_src(assembler
, &src
->SrcRegister
)) |
337 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src
)) |
338 R500_TEX_DST_ADDR(r300_fs_dst(assembler
, &dst
->DstRegister
)) |
339 R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
|
340 R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
342 fs
->instruction_count
++;
345 static void r500_fs_instruction(struct r500_fragment_shader
* fs
,
346 struct r300_fs_asm
* assembler
,
347 struct tgsi_full_instruction
* inst
)
350 /* Switch between opcodes. When possible, prefer using the official
351 * AMD/ATI names for opcodes, please, as it facilitates using the
353 switch (inst
->Instruction
.Opcode
) {
354 case TGSI_OPCODE_EX2
:
355 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
356 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 1,
359 case TGSI_OPCODE_DP3
:
360 case TGSI_OPCODE_DP4
:
361 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
362 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 2,
365 case TGSI_OPCODE_DPH
:
366 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
367 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 2,
369 /* Force alpha swizzle to one */
370 i
= fs
->instruction_count
- 1;
371 fs
->instructions
[i
].inst4
&= ~R500_SWIZ_ALPHA_A(0x7);
372 fs
->instructions
[i
].inst4
|= R500_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
);
374 case TGSI_OPCODE_MUL
:
375 /* Force our src2 to zero */
376 inst
->FullSrcRegisters
[2] = r500_constant_zero
;
377 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
378 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3,
381 case TGSI_OPCODE_MAD
:
382 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
383 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3,
386 case TGSI_OPCODE_MOV
:
387 case TGSI_OPCODE_SWZ
:
388 r500_emit_mov(fs
, assembler
, &inst
->FullSrcRegisters
[0],
389 &inst
->FullDstRegisters
[0]);
391 case TGSI_OPCODE_TXP
:
392 r500_emit_tex(fs
, assembler
, 0, &inst
->FullSrcRegisters
[0],
393 &inst
->FullDstRegisters
[0]);
395 case TGSI_OPCODE_END
:
398 debug_printf("r300: fs: Bad opcode %d\n",
399 inst
->Instruction
.Opcode
);
404 static void r500_fs_finalize(struct r500_fragment_shader
* fs
,
405 struct r300_fs_asm
* assembler
)
407 /* XXX subtly wrong */
408 fs
->shader
.stack_size
= assembler
->temp_offset
;
410 /* XXX should this just go with OPCODE_END? */
411 fs
->instructions
[fs
->instruction_count
- 1].inst0
|=
415 void r300_translate_fragment_shader(struct r300_context
* r300
,
416 struct r300_fragment_shader
* fs
)
418 struct tgsi_parse_context parser
;
420 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
422 while (!tgsi_parse_end_of_tokens(&parser
)) {
423 tgsi_parse_token(&parser
);
426 r300_copy_passthrough_shader(fs
);
429 void r500_translate_fragment_shader(struct r300_context
* r300
,
430 struct r500_fragment_shader
* fs
)
432 struct tgsi_parse_context parser
;
434 struct r300_constant_buffer
* consts
=
435 &r300
->shader_constants
[PIPE_SHADER_FRAGMENT
];
437 struct r300_fs_asm
* assembler
= CALLOC_STRUCT(r300_fs_asm
);
438 if (assembler
== NULL
) {
441 /* Setup starting offset for immediates. */
442 assembler
->imm_offset
= consts
->user_count
;
444 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
446 while (!tgsi_parse_end_of_tokens(&parser
)) {
447 tgsi_parse_token(&parser
);
449 /* This is seriously the lamest way to create fragment programs ever.
451 switch (parser
.FullToken
.Token
.Type
) {
452 case TGSI_TOKEN_TYPE_DECLARATION
:
453 /* Allocated registers sitting at the beginning
455 r300_fs_declare(assembler
, &parser
.FullToken
.FullDeclaration
);
457 case TGSI_TOKEN_TYPE_IMMEDIATE
:
458 debug_printf("r300: Emitting immediate to constant buffer, "
459 "position %d\n", consts
->user_count
);
460 /* I am not amused by the length of these. */
461 for (i
= 0; i
< 4; i
++) {
462 consts
->constants
[assembler
->imm_offset
][i
] =
463 parser
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
]
466 assembler
->imm_count
++;
468 case TGSI_TOKEN_TYPE_INSTRUCTION
:
469 r500_fs_instruction(fs
, assembler
,
470 &parser
.FullToken
.FullInstruction
);
476 debug_printf("r300: %d texs and %d colors, first free reg is %d\n",
477 assembler
->tex_count
, assembler
->color_count
,
478 assembler
->tex_count
+ assembler
->color_count
);
480 consts
->count
= consts
->user_count
+ assembler
->imm_count
;
481 debug_printf("r300: %d total constants, "
482 "%d from user and %d from immediates\n", consts
->count
,
483 consts
->user_count
, assembler
->imm_count
);
484 r500_fs_finalize(fs
, assembler
);
486 tgsi_dump(fs
->shader
.state
.tokens
);
489 tgsi_parse_free(&parser
);