2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "r300_state_shader.h"
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader
* fs
)
27 struct r300_fragment_shader
* pt
= &r300_passthrough_fragment_shader
;
28 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
29 fs
->alu_instruction_count
= pt
->alu_instruction_count
;
30 fs
->tex_instruction_count
= pt
->tex_instruction_count
;
31 fs
->indirections
= pt
->indirections
;
32 fs
->instructions
[0] = pt
->instructions
[0];
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader
* fs
)
37 struct r500_fragment_shader
* pt
= &r500_passthrough_fragment_shader
;
38 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
39 fs
->instruction_count
= pt
->instruction_count
;
40 fs
->instructions
[0] = pt
->instructions
[0];
43 static void r300_fs_declare(struct r300_fs_asm
* assembler
,
44 struct tgsi_full_declaration
* decl
)
46 switch (decl
->Declaration
.File
) {
48 switch (decl
->Semantic
.SemanticName
) {
49 case TGSI_SEMANTIC_COLOR
:
50 assembler
->color_count
++;
52 case TGSI_SEMANTIC_GENERIC
:
53 assembler
->tex_count
++;
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl
->Semantic
.SemanticName
);
61 case TGSI_FILE_OUTPUT
:
62 case TGSI_FILE_CONSTANT
:
64 case TGSI_FILE_TEMPORARY
:
65 assembler
->temp_count
++;
68 debug_printf("r300: fs: Bad file %d\n", decl
->Declaration
.File
);
72 assembler
->temp_offset
= assembler
->color_count
+ assembler
->tex_count
;
75 static INLINE
unsigned r300_fs_src(struct r300_fs_asm
* assembler
,
76 struct tgsi_src_register
* src
)
80 /* XXX may be wrong */
83 case TGSI_FILE_TEMPORARY
:
84 return src
->Index
+ assembler
->temp_offset
;
86 case TGSI_FILE_IMMEDIATE
:
87 return src
->Index
+ assembler
->imm_offset
| (1 << 8);
89 case TGSI_FILE_CONSTANT
:
91 return src
->Index
| (1 << 8);
94 debug_printf("r300: fs: Unimplemented src %d\n", src
->File
);
100 static INLINE
unsigned r300_fs_dst(struct r300_fs_asm
* assembler
,
101 struct tgsi_dst_register
* dst
)
104 case TGSI_FILE_OUTPUT
:
107 case TGSI_FILE_TEMPORARY
:
108 return dst
->Index
+ assembler
->temp_offset
;
111 debug_printf("r300: fs: Unimplemented dst %d\n", dst
->File
);
117 static INLINE
unsigned r500_fix_swiz(unsigned s
)
119 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
120 * equivalent to the actual machine code, but 1 is not. Thus, we just
121 * adjust it a bit... */
122 if (s
== TGSI_EXTSWIZZLE_ONE
) {
123 return R500_SWIZZLE_ONE
;
129 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register
* reg
)
131 if (reg
->SrcRegister
.Extended
) {
132 return r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleX
) |
133 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleY
) << 3) |
134 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleZ
) << 6) |
135 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleW
) << 9);
137 return reg
->SrcRegister
.SwizzleX
|
138 (reg
->SrcRegister
.SwizzleY
<< 3) |
139 (reg
->SrcRegister
.SwizzleZ
<< 6) |
140 (reg
->SrcRegister
.SwizzleW
<< 9);
144 static uint32_t r500_strq_swiz(struct tgsi_full_src_register
* reg
)
146 return reg
->SrcRegister
.SwizzleX
|
147 (reg
->SrcRegister
.SwizzleY
<< 2) |
148 (reg
->SrcRegister
.SwizzleZ
<< 4) |
149 (reg
->SrcRegister
.SwizzleW
<< 6);
152 static INLINE
uint32_t r500_rgb_swiz(struct tgsi_full_src_register
* reg
)
154 /* Only the first 9 bits... */
155 return r500_rgba_swiz(reg
) & 0x1ff;
158 static INLINE
uint32_t r500_alpha_swiz(struct tgsi_full_src_register
* reg
)
160 /* Only the last 3 bits... */
161 return r500_rgba_swiz(reg
) >> 9;
164 static INLINE
uint32_t r500_sop_swiz(struct tgsi_full_src_register
* reg
)
166 /* Only the first 3 bits... */
167 return r500_rgba_swiz(reg
) & 0x7;
170 static INLINE
uint32_t r500_rgba_op(unsigned op
)
173 case TGSI_OPCODE_EX2
:
174 case TGSI_OPCODE_LG2
:
175 case TGSI_OPCODE_RCP
:
176 case TGSI_OPCODE_RSQ
:
177 return R500_ALU_RGBA_OP_SOP
;
178 case TGSI_OPCODE_DP3
:
179 return R500_ALU_RGBA_OP_DP3
;
180 case TGSI_OPCODE_DP4
:
181 case TGSI_OPCODE_DPH
:
182 return R500_ALU_RGBA_OP_DP4
;
183 case TGSI_OPCODE_MAD
:
184 return R500_ALU_RGBA_OP_MAD
;
190 static INLINE
uint32_t r500_alpha_op(unsigned op
)
193 case TGSI_OPCODE_EX2
:
194 return R500_ALPHA_OP_EX2
;
195 case TGSI_OPCODE_LG2
:
196 return R500_ALPHA_OP_LN2
;
197 case TGSI_OPCODE_RCP
:
198 return R500_ALPHA_OP_RCP
;
199 case TGSI_OPCODE_RSQ
:
200 return R500_ALPHA_OP_RSQ
;
201 case TGSI_OPCODE_DP3
:
202 case TGSI_OPCODE_DP4
:
203 case TGSI_OPCODE_DPH
:
204 return R500_ALPHA_OP_DP
;
205 case TGSI_OPCODE_MAD
:
206 return R500_ALPHA_OP_MAD
;
212 /* Setup an ALU operation. */
213 static INLINE
void r500_emit_alu(struct r500_fragment_shader
* fs
,
214 struct r300_fs_asm
* assembler
,
215 struct tgsi_full_dst_register
* dst
)
217 int i
= fs
->instruction_count
;
219 if (dst
->DstRegister
.File
== TGSI_FILE_OUTPUT
) {
220 fs
->instructions
[i
].inst0
= R500_INST_TYPE_OUT
|
221 R500_ALU_OMASK(dst
->DstRegister
.WriteMask
);
223 fs
->instructions
[i
].inst0
= R500_INST_TYPE_ALU
|
224 R500_ALU_WMASK(dst
->DstRegister
.WriteMask
);
227 fs
->instructions
[i
].inst0
|=
228 R500_INST_TEX_SEM_WAIT
|
229 R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
232 static INLINE
void r500_emit_maths(struct r500_fragment_shader
* fs
,
233 struct r300_fs_asm
* assembler
,
234 struct tgsi_full_src_register
* src
,
235 struct tgsi_full_dst_register
* dst
,
240 int i
= fs
->instruction_count
;
242 r500_emit_alu(fs
, assembler
, dst
);
246 fs
->instructions
[i
].inst1
=
247 R500_RGB_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
248 fs
->instructions
[i
].inst2
=
249 R500_ALPHA_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
250 fs
->instructions
[i
].inst5
=
251 R500_ALU_RGBA_ALPHA_SEL_C_SRC2
|
252 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src
[2])) |
253 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src
[2]));
255 fs
->instructions
[i
].inst1
|=
256 R500_RGB_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
257 fs
->instructions
[i
].inst2
|=
258 R500_ALPHA_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
259 fs
->instructions
[i
].inst3
=
260 R500_ALU_RGB_SEL_B_SRC1
|
261 R500_SWIZ_RGB_B(r500_rgb_swiz(&src
[1]));
262 fs
->instructions
[i
].inst4
=
263 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src
[1])) |
264 R500_ALPHA_SEL_B_SRC1
;
268 fs
->instructions
[i
].inst1
|=
269 R500_RGB_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
270 fs
->instructions
[i
].inst2
|=
271 R500_ALPHA_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
272 fs
->instructions
[i
].inst3
|=
273 R500_ALU_RGB_SEL_A_SRC0
|
274 R500_SWIZ_RGB_A(r500_rgb_swiz(&src
[0]));
275 fs
->instructions
[i
].inst4
|=
276 R500_SWIZ_ALPHA_A(is_sop
? r500_sop_swiz(&src
[0]) :
277 r500_alpha_swiz(&src
[0])) |
278 R500_ALPHA_SEL_A_SRC0
;
282 fs
->instructions
[i
].inst4
|= r500_alpha_op(op
);
283 fs
->instructions
[i
].inst5
|= r500_rgba_op(op
);
285 fs
->instruction_count
++;
288 static INLINE
void r500_emit_mov(struct r500_fragment_shader
* fs
,
289 struct r300_fs_asm
* assembler
,
290 struct tgsi_full_src_register
* src
,
291 struct tgsi_full_dst_register
* dst
)
293 int i
= fs
->instruction_count
;
295 r500_emit_alu(fs
, assembler
, dst
);
297 fs
->instructions
[i
].inst1
=
298 R500_RGB_ADDR0(r300_fs_src(assembler
, &src
->SrcRegister
));
299 fs
->instructions
[i
].inst2
=
300 R500_ALPHA_ADDR0(r300_fs_src(assembler
, &src
->SrcRegister
));
301 fs
->instructions
[i
].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
302 R500_SWIZ_RGB_A(r500_rgb_swiz(src
)) |
303 R500_ALU_RGB_SEL_B_SRC0
|
304 R500_SWIZ_RGB_B(r500_rgb_swiz(src
));
305 fs
->instructions
[i
].inst4
= R500_ALPHA_OP_CMP
|
306 R500_SWIZ_ALPHA_A(r500_alpha_swiz(src
)) |
307 R500_SWIZ_ALPHA_B(r500_alpha_swiz(src
));
308 fs
->instructions
[i
].inst5
=
309 R500_ALU_RGBA_OP_CMP
| R500_ALU_RGBA_R_SWIZ_0
|
310 R500_ALU_RGBA_G_SWIZ_0
| R500_ALU_RGBA_B_SWIZ_0
|
311 R500_ALU_RGBA_A_SWIZ_0
;
313 fs
->instruction_count
++;
316 static INLINE
void r500_emit_tex(struct r500_fragment_shader
* fs
,
317 struct r300_fs_asm
* assembler
,
319 struct tgsi_full_src_register
* src
,
320 struct tgsi_full_dst_register
* dst
)
322 int i
= fs
->instruction_count
;
324 fs
->instructions
[i
].inst0
= R500_INST_TYPE_TEX
|
325 R500_TEX_WMASK(dst
->DstRegister
.WriteMask
) |
326 R500_INST_TEX_SEM_WAIT
;
327 fs
->instructions
[i
].inst1
= R500_TEX_ID(0) |
328 R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
|
330 fs
->instructions
[i
].inst2
=
331 R500_TEX_SRC_ADDR(r300_fs_src(assembler
, &src
->SrcRegister
)) |
332 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src
)) |
333 R500_TEX_DST_ADDR(r300_fs_dst(assembler
, &dst
->DstRegister
)) |
334 R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
|
335 R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
337 fs
->instruction_count
++;
340 static void r500_fs_instruction(struct r500_fragment_shader
* fs
,
341 struct r300_fs_asm
* assembler
,
342 struct tgsi_full_instruction
* inst
)
345 /* Switch between opcodes. When possible, prefer using the official
346 * AMD/ATI names for opcodes, please, as it facilitates using the
348 switch (inst
->Instruction
.Opcode
) {
349 case TGSI_OPCODE_EX2
:
350 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
351 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 1,
354 case TGSI_OPCODE_DP3
:
355 case TGSI_OPCODE_DP4
:
356 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
357 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 2,
360 case TGSI_OPCODE_DPH
:
361 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
362 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 2,
364 /* Force alpha swizzle to one */
365 i
= fs
->instruction_count
- 1;
366 fs
->instructions
[i
].inst4
&= ~R500_SWIZ_ALPHA_A(0x7);
367 fs
->instructions
[i
].inst4
|= R500_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
);
369 case TGSI_OPCODE_MUL
:
370 /* Force our src2 to zero */
371 inst
->FullSrcRegisters
[2] = r500_constant_zero
;
372 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
373 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3,
376 case TGSI_OPCODE_MAD
:
377 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
378 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3,
381 case TGSI_OPCODE_MOV
:
382 case TGSI_OPCODE_SWZ
:
383 r500_emit_mov(fs
, assembler
, &inst
->FullSrcRegisters
[0],
384 &inst
->FullDstRegisters
[0]);
386 case TGSI_OPCODE_TXP
:
387 r500_emit_tex(fs
, assembler
, 0, &inst
->FullSrcRegisters
[0],
388 &inst
->FullDstRegisters
[0]);
390 case TGSI_OPCODE_END
:
393 debug_printf("r300: fs: Bad opcode %d\n",
394 inst
->Instruction
.Opcode
);
399 static void r500_fs_finalize(struct r500_fragment_shader
* fs
,
400 struct r300_fs_asm
* assembler
)
402 /* XXX subtly wrong */
403 fs
->shader
.stack_size
= assembler
->temp_offset
;
405 /* XXX should this just go with OPCODE_END? */
406 fs
->instructions
[fs
->instruction_count
- 1].inst0
|=
410 void r300_translate_fragment_shader(struct r300_context
* r300
,
411 struct r300_fragment_shader
* fs
)
413 struct tgsi_parse_context parser
;
415 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
417 while (!tgsi_parse_end_of_tokens(&parser
)) {
418 tgsi_parse_token(&parser
);
421 r300_copy_passthrough_shader(fs
);
424 void r500_translate_fragment_shader(struct r300_context
* r300
,
425 struct r500_fragment_shader
* fs
)
427 struct tgsi_parse_context parser
;
429 struct r300_constant_buffer
* consts
=
430 &r300
->shader_constants
[PIPE_SHADER_FRAGMENT
];
432 struct r300_fs_asm
* assembler
= CALLOC_STRUCT(r300_fs_asm
);
433 if (assembler
== NULL
) {
436 /* Setup starting offset for immediates. */
437 assembler
->imm_offset
= consts
->user_count
;
439 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
441 while (!tgsi_parse_end_of_tokens(&parser
)) {
442 tgsi_parse_token(&parser
);
444 /* This is seriously the lamest way to create fragment programs ever.
446 switch (parser
.FullToken
.Token
.Type
) {
447 case TGSI_TOKEN_TYPE_DECLARATION
:
448 /* Allocated registers sitting at the beginning
450 r300_fs_declare(assembler
, &parser
.FullToken
.FullDeclaration
);
452 case TGSI_TOKEN_TYPE_IMMEDIATE
:
453 debug_printf("r300: Emitting immediate to constant buffer, "
454 "position %d\n", consts
->user_count
);
455 /* I am not amused by the length of these. */
456 for (i
= 0; i
< 4; i
++) {
457 consts
->constants
[assembler
->imm_offset
][i
] =
458 parser
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
]
461 assembler
->imm_count
++;
463 case TGSI_TOKEN_TYPE_INSTRUCTION
:
464 r500_fs_instruction(fs
, assembler
,
465 &parser
.FullToken
.FullInstruction
);
471 debug_printf("r300: %d texs and %d colors, first free reg is %d\n",
472 assembler
->tex_count
, assembler
->color_count
,
473 assembler
->tex_count
+ assembler
->color_count
);
475 consts
->count
= consts
->user_count
+ assembler
->imm_count
;
476 debug_printf("r300: %d total constants, "
477 "%d from user and %d from immediates\n", consts
->count
,
478 consts
->user_count
, assembler
->imm_count
);
479 r500_fs_finalize(fs
, assembler
);
481 tgsi_dump(fs
->shader
.state
.tokens
);
484 //r500_copy_passthrough_shader(fs);
486 tgsi_parse_free(&parser
);