2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "r300_state_shader.h"
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader
* fs
)
27 struct r300_fragment_shader
* pt
= &r300_passthrough_fragment_shader
;
28 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
29 fs
->alu_instruction_count
= pt
->alu_instruction_count
;
30 fs
->tex_instruction_count
= pt
->tex_instruction_count
;
31 fs
->indirections
= pt
->indirections
;
32 fs
->instructions
[0] = pt
->instructions
[0];
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader
* fs
)
37 struct r500_fragment_shader
* pt
= &r500_passthrough_fragment_shader
;
38 fs
->shader
.stack_size
= pt
->shader
.stack_size
;
39 fs
->instruction_count
= pt
->instruction_count
;
40 fs
->instructions
[0] = pt
->instructions
[0];
43 static void r300_fs_declare(struct r300_fs_asm
* assembler
,
44 struct tgsi_full_declaration
* decl
)
46 switch (decl
->Declaration
.File
) {
48 switch (decl
->Semantic
.SemanticName
) {
49 case TGSI_SEMANTIC_COLOR
:
50 assembler
->color_count
++;
52 case TGSI_SEMANTIC_GENERIC
:
53 assembler
->tex_count
++;
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl
->Semantic
.SemanticName
);
61 case TGSI_FILE_OUTPUT
:
62 case TGSI_FILE_CONSTANT
:
64 case TGSI_FILE_TEMPORARY
:
65 assembler
->temp_count
++;
68 debug_printf("r300: fs: Bad file %d\n", decl
->Declaration
.File
);
72 assembler
->temp_offset
= assembler
->color_count
+ assembler
->tex_count
;
75 static INLINE
unsigned r300_fs_src(struct r300_fs_asm
* assembler
,
76 struct tgsi_src_register
* src
)
82 /* XXX may be wrong */
85 case TGSI_FILE_TEMPORARY
:
86 return src
->Index
+ assembler
->temp_offset
;
88 case TGSI_FILE_IMMEDIATE
:
89 return (src
->Index
+ assembler
->imm_offset
) | (1 << 8);
91 case TGSI_FILE_CONSTANT
:
93 return src
->Index
| (1 << 8);
96 debug_printf("r300: fs: Unimplemented src %d\n", src
->File
);
102 static INLINE
unsigned r300_fs_dst(struct r300_fs_asm
* assembler
,
103 struct tgsi_dst_register
* dst
)
107 /* This happens during KIL instructions. */
110 case TGSI_FILE_OUTPUT
:
113 case TGSI_FILE_TEMPORARY
:
114 return dst
->Index
+ assembler
->temp_offset
;
117 debug_printf("r300: fs: Unimplemented dst %d\n", dst
->File
);
123 static INLINE
unsigned r500_fix_swiz(unsigned s
)
125 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
126 * equivalent to the actual machine code, but 1 is not. Thus, we just
127 * adjust it a bit... */
128 if (s
== TGSI_EXTSWIZZLE_ONE
) {
129 return R500_SWIZZLE_ONE
;
135 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register
* reg
)
137 if (reg
->SrcRegister
.Extended
) {
138 return r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleX
) |
139 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleY
) << 3) |
140 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleZ
) << 6) |
141 (r500_fix_swiz(reg
->SrcRegisterExtSwz
.ExtSwizzleW
) << 9);
143 return reg
->SrcRegister
.SwizzleX
|
144 (reg
->SrcRegister
.SwizzleY
<< 3) |
145 (reg
->SrcRegister
.SwizzleZ
<< 6) |
146 (reg
->SrcRegister
.SwizzleW
<< 9);
150 static uint32_t r500_strq_swiz(struct tgsi_full_src_register
* reg
)
152 return reg
->SrcRegister
.SwizzleX
|
153 (reg
->SrcRegister
.SwizzleY
<< 2) |
154 (reg
->SrcRegister
.SwizzleZ
<< 4) |
155 (reg
->SrcRegister
.SwizzleW
<< 6);
158 static INLINE
uint32_t r500_rgb_swiz(struct tgsi_full_src_register
* reg
)
160 /* Only the first 9 bits... */
161 return (r500_rgba_swiz(reg
) & 0x1ff) |
162 (reg
->SrcRegister
.Negate
? (1 << 9) : 0) |
163 (reg
->SrcRegisterExtMod
.Absolute
? (1 << 10) : 0);
166 static INLINE
uint32_t r500_alpha_swiz(struct tgsi_full_src_register
* reg
)
168 /* Only the last 3 bits... */
169 return (r500_rgba_swiz(reg
) >> 9) |
170 (reg
->SrcRegister
.Negate
? (1 << 9) : 0) |
171 (reg
->SrcRegisterExtMod
.Absolute
? (1 << 10) : 0);
174 static INLINE
uint32_t r500_rgba_op(unsigned op
)
177 case TGSI_OPCODE_EX2
:
178 case TGSI_OPCODE_LG2
:
179 case TGSI_OPCODE_RCP
:
180 case TGSI_OPCODE_RSQ
:
181 return R500_ALU_RGBA_OP_SOP
;
182 case TGSI_OPCODE_FRC
:
183 return R500_ALU_RGBA_OP_FRC
;
184 case TGSI_OPCODE_DP3
:
185 return R500_ALU_RGBA_OP_DP3
;
186 case TGSI_OPCODE_DP4
:
187 case TGSI_OPCODE_DPH
:
188 return R500_ALU_RGBA_OP_DP4
;
189 case TGSI_OPCODE_ABS
:
190 case TGSI_OPCODE_CMP
:
191 case TGSI_OPCODE_MOV
:
192 case TGSI_OPCODE_SWZ
:
193 return R500_ALU_RGBA_OP_CMP
;
194 case TGSI_OPCODE_ADD
:
195 case TGSI_OPCODE_MAD
:
196 case TGSI_OPCODE_MUL
:
197 case TGSI_OPCODE_SUB
:
198 return R500_ALU_RGBA_OP_MAD
;
204 static INLINE
uint32_t r500_alpha_op(unsigned op
)
207 case TGSI_OPCODE_EX2
:
208 return R500_ALPHA_OP_EX2
;
209 case TGSI_OPCODE_LG2
:
210 return R500_ALPHA_OP_LN2
;
211 case TGSI_OPCODE_RCP
:
212 return R500_ALPHA_OP_RCP
;
213 case TGSI_OPCODE_RSQ
:
214 return R500_ALPHA_OP_RSQ
;
215 case TGSI_OPCODE_FRC
:
216 return R500_ALPHA_OP_FRC
;
217 case TGSI_OPCODE_DP3
:
218 case TGSI_OPCODE_DP4
:
219 case TGSI_OPCODE_DPH
:
220 return R500_ALPHA_OP_DP
;
221 case TGSI_OPCODE_ABS
:
222 case TGSI_OPCODE_CMP
:
223 case TGSI_OPCODE_MOV
:
224 case TGSI_OPCODE_SWZ
:
225 return R500_ALPHA_OP_CMP
;
226 case TGSI_OPCODE_ADD
:
227 case TGSI_OPCODE_MAD
:
228 case TGSI_OPCODE_MUL
:
229 case TGSI_OPCODE_SUB
:
230 return R500_ALPHA_OP_MAD
;
236 static INLINE
uint32_t r500_tex_op(unsigned op
)
239 case TGSI_OPCODE_KIL
:
240 return R500_TEX_INST_TEXKILL
;
241 case TGSI_OPCODE_TEX
:
242 return R500_TEX_INST_LD
;
243 case TGSI_OPCODE_TXB
:
244 return R500_TEX_INST_LODBIAS
;
245 case TGSI_OPCODE_TXP
:
246 return R500_TEX_INST_PROJ
;
252 /* Setup an ALU operation. */
253 static INLINE
void r500_emit_alu(struct r500_fragment_shader
* fs
,
254 struct r300_fs_asm
* assembler
,
255 struct tgsi_full_dst_register
* dst
)
257 int i
= fs
->instruction_count
;
259 if (dst
->DstRegister
.File
== TGSI_FILE_OUTPUT
) {
260 fs
->instructions
[i
].inst0
= R500_INST_TYPE_OUT
|
261 R500_ALU_OMASK(dst
->DstRegister
.WriteMask
);
263 fs
->instructions
[i
].inst0
= R500_INST_TYPE_ALU
|
264 R500_ALU_WMASK(dst
->DstRegister
.WriteMask
);
267 fs
->instructions
[i
].inst0
|= R500_INST_TEX_SEM_WAIT
;
269 fs
->instructions
[i
].inst4
=
270 R500_ALPHA_ADDRD(r300_fs_dst(assembler
, &dst
->DstRegister
));
271 fs
->instructions
[i
].inst5
=
272 R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler
, &dst
->DstRegister
));
275 static INLINE
void r500_emit_maths(struct r500_fragment_shader
* fs
,
276 struct r300_fs_asm
* assembler
,
277 struct tgsi_full_src_register
* src
,
278 struct tgsi_full_dst_register
* dst
,
282 int i
= fs
->instruction_count
;
284 r500_emit_alu(fs
, assembler
, dst
);
288 fs
->instructions
[i
].inst1
=
289 R500_RGB_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
290 fs
->instructions
[i
].inst2
=
291 R500_ALPHA_ADDR2(r300_fs_src(assembler
, &src
[2].SrcRegister
));
292 fs
->instructions
[i
].inst5
|=
293 R500_ALU_RGBA_SEL_C_SRC2
|
294 R500_SWIZ_RGBA_C(r500_rgb_swiz(&src
[2])) |
295 R500_ALU_RGBA_ALPHA_SEL_C_SRC2
|
296 R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src
[2]));
298 fs
->instructions
[i
].inst1
|=
299 R500_RGB_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
300 fs
->instructions
[i
].inst2
|=
301 R500_ALPHA_ADDR1(r300_fs_src(assembler
, &src
[1].SrcRegister
));
302 fs
->instructions
[i
].inst3
=
303 R500_ALU_RGB_SEL_B_SRC1
|
304 R500_SWIZ_RGB_B(r500_rgb_swiz(&src
[1]));
305 fs
->instructions
[i
].inst4
|=
306 R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src
[1])) |
307 R500_ALPHA_SEL_B_SRC1
;
311 fs
->instructions
[i
].inst1
|=
312 R500_RGB_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
313 fs
->instructions
[i
].inst2
|=
314 R500_ALPHA_ADDR0(r300_fs_src(assembler
, &src
[0].SrcRegister
));
315 fs
->instructions
[i
].inst3
|=
316 R500_ALU_RGB_SEL_A_SRC0
|
317 R500_SWIZ_RGB_A(r500_rgb_swiz(&src
[0]));
318 fs
->instructions
[i
].inst4
|=
319 R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src
[0])) |
320 R500_ALPHA_SEL_A_SRC0
;
324 fs
->instructions
[i
].inst4
|= r500_alpha_op(op
);
325 fs
->instructions
[i
].inst5
|= r500_rgba_op(op
);
327 fs
->instruction_count
++;
330 static INLINE
void r500_emit_tex(struct r500_fragment_shader
* fs
,
331 struct r300_fs_asm
* assembler
,
332 struct tgsi_full_src_register
* src
,
333 struct tgsi_full_dst_register
* dst
,
336 int i
= fs
->instruction_count
;
338 fs
->instructions
[i
].inst0
= R500_INST_TYPE_TEX
|
339 R500_TEX_WMASK(dst
->DstRegister
.WriteMask
) |
340 R500_INST_TEX_SEM_WAIT
;
341 fs
->instructions
[i
].inst1
= R500_TEX_ID(0) |
342 R500_TEX_SEM_ACQUIRE
| //R500_TEX_IGNORE_UNCOVERED |
344 fs
->instructions
[i
].inst2
=
345 R500_TEX_SRC_ADDR(r300_fs_src(assembler
, &src
->SrcRegister
)) |
346 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src
)) |
347 R500_TEX_DST_ADDR(r300_fs_dst(assembler
, &dst
->DstRegister
)) |
348 R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
|
349 R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
351 if (dst
->DstRegister
.File
== TGSI_FILE_OUTPUT
) {
352 fs
->instructions
[i
].inst2
|=
353 R500_TEX_DST_ADDR(assembler
->temp_offset
+
354 assembler
->temp_count
);
357 fs
->instruction_count
++;
360 static void r500_fs_instruction(struct r500_fragment_shader
* fs
,
361 struct r300_fs_asm
* assembler
,
362 struct tgsi_full_instruction
* inst
)
365 /* Switch between opcodes. When possible, prefer using the official
366 * AMD/ATI names for opcodes, please, as it facilitates using the
368 switch (inst
->Instruction
.Opcode
) {
369 /* The simple scalar ops. */
370 case TGSI_OPCODE_EX2
:
371 case TGSI_OPCODE_LG2
:
372 case TGSI_OPCODE_RCP
:
373 case TGSI_OPCODE_RSQ
:
374 /* Copy red swizzle to alpha for src0 */
375 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleW
=
376 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleX
;
377 inst
->FullSrcRegisters
[0].SrcRegister
.SwizzleW
=
378 inst
->FullSrcRegisters
[0].SrcRegister
.SwizzleX
;
380 case TGSI_OPCODE_FRC
:
381 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
382 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 1);
385 /* The dot products. */
386 case TGSI_OPCODE_DPH
:
387 /* Set alpha swizzle to one for src0 */
388 if (!inst
->FullSrcRegisters
[0].SrcRegister
.Extended
) {
389 inst
->FullSrcRegisters
[0].SrcRegister
.Extended
= TRUE
;
390 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleX
=
391 inst
->FullSrcRegisters
[0].SrcRegister
.SwizzleX
;
392 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleY
=
393 inst
->FullSrcRegisters
[0].SrcRegister
.SwizzleY
;
394 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleZ
=
395 inst
->FullSrcRegisters
[0].SrcRegister
.SwizzleZ
;
397 inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtSwizzleW
=
400 case TGSI_OPCODE_DP3
:
401 case TGSI_OPCODE_DP4
:
402 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
403 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 2);
406 /* Simple three-source operations. */
407 case TGSI_OPCODE_CMP
:
408 /* Swap src0 and src2 */
409 inst
->FullSrcRegisters
[3] = inst
->FullSrcRegisters
[2];
410 inst
->FullSrcRegisters
[2] = inst
->FullSrcRegisters
[0];
411 inst
->FullSrcRegisters
[0] = inst
->FullSrcRegisters
[3];
412 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
413 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3);
416 /* The MAD variants. */
417 case TGSI_OPCODE_SUB
:
418 /* Just like ADD, but flip the negation on src1 first */
419 inst
->FullSrcRegisters
[1].SrcRegister
.Negate
=
420 !inst
->FullSrcRegisters
[1].SrcRegister
.Negate
;
422 case TGSI_OPCODE_ADD
:
423 /* Force src0 to one, move all registers over */
424 inst
->FullSrcRegisters
[2] = inst
->FullSrcRegisters
[1];
425 inst
->FullSrcRegisters
[1] = inst
->FullSrcRegisters
[0];
426 inst
->FullSrcRegisters
[0] = r500_constant_one
;
427 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
428 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3);
430 case TGSI_OPCODE_MUL
:
431 /* Force our src2 to zero */
432 inst
->FullSrcRegisters
[2] = r500_constant_zero
;
433 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
434 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3);
436 case TGSI_OPCODE_MAD
:
437 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
438 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3);
441 /* The MOV variants. */
442 case TGSI_OPCODE_ABS
:
443 /* Set absolute value modifiers. */
444 inst
->FullSrcRegisters
[0].SrcRegisterExtMod
.Absolute
= TRUE
;
446 case TGSI_OPCODE_MOV
:
447 case TGSI_OPCODE_SWZ
:
448 /* src0 -> src1 and src2 forced to zero */
449 inst
->FullSrcRegisters
[1] = inst
->FullSrcRegisters
[0];
450 inst
->FullSrcRegisters
[2] = r500_constant_zero
;
451 r500_emit_maths(fs
, assembler
, inst
->FullSrcRegisters
,
452 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
, 3);
455 /* The texture instruction set. */
456 case TGSI_OPCODE_KIL
:
457 case TGSI_OPCODE_TEX
:
458 case TGSI_OPCODE_TXB
:
459 case TGSI_OPCODE_TXP
:
460 r500_emit_tex(fs
, assembler
, &inst
->FullSrcRegisters
[0],
461 &inst
->FullDstRegisters
[0], inst
->Instruction
.Opcode
);
464 /* This is the end. My only friend, the end. */
465 case TGSI_OPCODE_END
:
468 debug_printf("r300: fs: Bad opcode %d\n",
469 inst
->Instruction
.Opcode
);
473 /* Clamp, if saturation flags are set. */
474 if (inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
) {
475 fs
->instructions
[fs
->instruction_count
- 1].inst0
|=
476 R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
480 static void r500_fs_finalize(struct r500_fragment_shader
* fs
,
481 struct r300_fs_asm
* assembler
)
483 fs
->shader
.stack_size
= assembler
->temp_count
+ assembler
->temp_offset
;
485 /* XXX should this just go with OPCODE_END? */
486 fs
->instructions
[fs
->instruction_count
- 1].inst0
|=
490 void r300_translate_fragment_shader(struct r300_context
* r300
,
491 struct r300_fragment_shader
* fs
)
493 struct tgsi_parse_context parser
;
495 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
497 while (!tgsi_parse_end_of_tokens(&parser
)) {
498 tgsi_parse_token(&parser
);
501 r300_copy_passthrough_shader(fs
);
504 void r500_translate_fragment_shader(struct r300_context
* r300
,
505 struct r500_fragment_shader
* fs
)
507 struct tgsi_parse_context parser
;
509 struct r300_constant_buffer
* consts
=
510 &r300
->shader_constants
[PIPE_SHADER_FRAGMENT
];
512 struct r300_fs_asm
* assembler
= CALLOC_STRUCT(r300_fs_asm
);
513 if (assembler
== NULL
) {
516 /* Setup starting offset for immediates. */
517 assembler
->imm_offset
= consts
->user_count
;
519 tgsi_parse_init(&parser
, fs
->shader
.state
.tokens
);
521 while (!tgsi_parse_end_of_tokens(&parser
)) {
522 tgsi_parse_token(&parser
);
524 /* This is seriously the lamest way to create fragment programs ever.
526 switch (parser
.FullToken
.Token
.Type
) {
527 case TGSI_TOKEN_TYPE_DECLARATION
:
528 /* Allocated registers sitting at the beginning
530 r300_fs_declare(assembler
, &parser
.FullToken
.FullDeclaration
);
532 case TGSI_TOKEN_TYPE_IMMEDIATE
:
533 debug_printf("r300: Emitting immediate to constant buffer, "
534 "position %d\n", consts
->user_count
);
535 /* I am not amused by the length of these. */
536 for (i
= 0; i
< 4; i
++) {
537 consts
->constants
[assembler
->imm_offset
][i
] =
538 parser
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
]
541 assembler
->imm_count
++;
543 case TGSI_TOKEN_TYPE_INSTRUCTION
:
544 r500_fs_instruction(fs
, assembler
,
545 &parser
.FullToken
.FullInstruction
);
551 debug_printf("r300: %d texs and %d colors, first free reg is %d\n",
552 assembler
->tex_count
, assembler
->color_count
,
553 assembler
->tex_count
+ assembler
->color_count
);
555 consts
->count
= consts
->user_count
+ assembler
->imm_count
;
556 debug_printf("r300: %d total constants, "
557 "%d from user and %d from immediates\n", consts
->count
,
558 consts
->user_count
, assembler
->imm_count
);
559 r500_fs_finalize(fs
, assembler
);
561 tgsi_dump(fs
->shader
.state
.tokens
);
564 tgsi_parse_free(&parser
);