2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
39 #include "r300_fragprog.h"
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
47 struct r300_emit_state
{
48 struct r300_fragment_program_compiler
* compiler
;
50 unsigned current_node
: 2;
51 unsigned node_first_tex
: 8;
52 unsigned node_first_alu
: 8;
57 struct r300_fragment_program_compiler *c = emit->compiler; \
58 struct r300_fragment_program_code *code = &c->code->code.r300
60 #define error(fmt, args...) do { \
61 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
65 static unsigned int get_msbs_alu(unsigned int bits
)
67 return (bits
>> 6) & 0x7;
71 * @param lsbs The number of least significant bits
73 static unsigned int get_msbs_tex(unsigned int bits
, unsigned int lsbs
)
75 return (bits
>> lsbs
) & 0x15;
78 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81 * Mark a temporary register as used.
83 static void use_temporary(struct r300_fragment_program_code
*code
, unsigned int index
)
85 if (index
> code
->pixsize
)
86 code
->pixsize
= index
;
89 static unsigned int use_source(struct r300_fragment_program_code
* code
, struct rc_pair_instruction_source src
)
94 if (src
.File
== RC_FILE_CONSTANT
) {
95 return src
.Index
| (1 << 5);
96 } else if (src
.File
== RC_FILE_TEMPORARY
|| src
.File
== RC_FILE_INPUT
) {
97 use_temporary(code
, src
.Index
);
98 return src
.Index
& 0x1f;
105 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
108 case RC_OPCODE_CMP
: return R300_ALU_OUTC_CMP
;
109 case RC_OPCODE_CND
: return R300_ALU_OUTC_CND
;
110 case RC_OPCODE_DP3
: return R300_ALU_OUTC_DP3
;
111 case RC_OPCODE_DP4
: return R300_ALU_OUTC_DP4
;
112 case RC_OPCODE_FRC
: return R300_ALU_OUTC_FRC
;
114 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode
)->Name
);
118 case RC_OPCODE_MAD
: return R300_ALU_OUTC_MAD
;
119 case RC_OPCODE_MAX
: return R300_ALU_OUTC_MAX
;
120 case RC_OPCODE_MIN
: return R300_ALU_OUTC_MIN
;
121 case RC_OPCODE_REPL_ALPHA
: return R300_ALU_OUTC_REPL_ALPHA
;
125 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
128 case RC_OPCODE_CMP
: return R300_ALU_OUTA_CMP
;
129 case RC_OPCODE_CND
: return R300_ALU_OUTA_CND
;
130 case RC_OPCODE_DP3
: return R300_ALU_OUTA_DP4
;
131 case RC_OPCODE_DP4
: return R300_ALU_OUTA_DP4
;
132 case RC_OPCODE_EX2
: return R300_ALU_OUTA_EX2
;
133 case RC_OPCODE_FRC
: return R300_ALU_OUTA_FRC
;
134 case RC_OPCODE_LG2
: return R300_ALU_OUTA_LG2
;
136 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode
)->Name
);
140 case RC_OPCODE_MAD
: return R300_ALU_OUTA_MAD
;
141 case RC_OPCODE_MAX
: return R300_ALU_OUTA_MAX
;
142 case RC_OPCODE_MIN
: return R300_ALU_OUTA_MIN
;
143 case RC_OPCODE_RCP
: return R300_ALU_OUTA_RCP
;
144 case RC_OPCODE_RSQ
: return R300_ALU_OUTA_RSQ
;
149 * Emit one paired ALU instruction.
151 static int emit_alu(struct r300_emit_state
* emit
, struct rc_pair_instruction
* inst
)
157 if (code
->alu
.length
>= c
->Base
.max_alu_insts
) {
158 error("Too many ALU instructions");
162 ip
= code
->alu
.length
++;
164 code
->alu
.inst
[ip
].rgb_inst
= translate_rgb_opcode(c
, inst
->RGB
.Opcode
);
165 code
->alu
.inst
[ip
].alpha_inst
= translate_alpha_opcode(c
, inst
->Alpha
.Opcode
);
167 for(j
= 0; j
< 3; ++j
) {
168 /* Set the RGB address */
169 unsigned int src
= use_source(code
, inst
->RGB
.Src
[j
]);
171 if (inst
->RGB
.Src
[j
].Index
>= R300_PFS_NUM_TEMP_REGS
)
172 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDR_EXT_RGB_MSB_BIT(j
);
174 code
->alu
.inst
[ip
].rgb_addr
|= src
<< (6*j
);
176 /* Set the Alpha address */
177 src
= use_source(code
, inst
->Alpha
.Src
[j
]);
178 if (inst
->Alpha
.Src
[j
].Index
>= R300_PFS_NUM_TEMP_REGS
)
179 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDR_EXT_A_MSB_BIT(j
);
181 code
->alu
.inst
[ip
].alpha_addr
|= src
<< (6*j
);
183 arg
= r300FPTranslateRGBSwizzle(inst
->RGB
.Arg
[j
].Source
, inst
->RGB
.Arg
[j
].Swizzle
);
184 arg
|= inst
->RGB
.Arg
[j
].Abs
<< 6;
185 arg
|= inst
->RGB
.Arg
[j
].Negate
<< 5;
186 code
->alu
.inst
[ip
].rgb_inst
|= arg
<< (7*j
);
188 arg
= r300FPTranslateAlphaSwizzle(inst
->Alpha
.Arg
[j
].Source
, inst
->Alpha
.Arg
[j
].Swizzle
);
189 arg
|= inst
->Alpha
.Arg
[j
].Abs
<< 6;
190 arg
|= inst
->Alpha
.Arg
[j
].Negate
<< 5;
191 code
->alu
.inst
[ip
].alpha_inst
|= arg
<< (7*j
);
195 if (inst
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
196 switch(inst
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
198 code
->alu
.inst
[ip
].rgb_inst
|=
199 R300_ALU_SRCP_1_MINUS_2_SRC0
;
202 code
->alu
.inst
[ip
].rgb_inst
|=
203 R300_ALU_SRCP_SRC1_PLUS_SRC0
;
206 code
->alu
.inst
[ip
].rgb_inst
|=
207 R300_ALU_SRCP_SRC1_MINUS_SRC0
;
210 code
->alu
.inst
[ip
].rgb_inst
|=
211 R300_ALU_SRCP_1_MINUS_SRC0
;
218 if (inst
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
219 switch(inst
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
221 code
->alu
.inst
[ip
].alpha_inst
|=
222 R300_ALU_SRCP_1_MINUS_2_SRC0
;
225 code
->alu
.inst
[ip
].alpha_inst
|=
226 R300_ALU_SRCP_SRC1_PLUS_SRC0
;
229 code
->alu
.inst
[ip
].alpha_inst
|=
230 R300_ALU_SRCP_SRC1_MINUS_SRC0
;
233 code
->alu
.inst
[ip
].alpha_inst
|=
234 R300_ALU_SRCP_1_MINUS_SRC0
;
241 if (inst
->RGB
.Saturate
)
242 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_OUTC_CLAMP
;
243 if (inst
->Alpha
.Saturate
)
244 code
->alu
.inst
[ip
].alpha_inst
|= R300_ALU_OUTA_CLAMP
;
246 if (inst
->RGB
.WriteMask
) {
247 use_temporary(code
, inst
->RGB
.DestIndex
);
248 if (inst
->RGB
.DestIndex
>= R300_PFS_NUM_TEMP_REGS
)
249 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDRD_EXT_RGB_MSB_BIT
;
250 code
->alu
.inst
[ip
].rgb_addr
|=
251 ((inst
->RGB
.DestIndex
& 0x1f) << R300_ALU_DSTC_SHIFT
) |
252 (inst
->RGB
.WriteMask
<< R300_ALU_DSTC_REG_MASK_SHIFT
);
254 if (inst
->RGB
.OutputWriteMask
) {
255 code
->alu
.inst
[ip
].rgb_addr
|=
256 (inst
->RGB
.OutputWriteMask
<< R300_ALU_DSTC_OUTPUT_MASK_SHIFT
) |
257 R300_RGB_TARGET(inst
->RGB
.Target
);
258 emit
->node_flags
|= R300_RGBA_OUT
;
261 if (inst
->Alpha
.WriteMask
) {
262 use_temporary(code
, inst
->Alpha
.DestIndex
);
263 if (inst
->Alpha
.DestIndex
>= R300_PFS_NUM_TEMP_REGS
)
264 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDRD_EXT_A_MSB_BIT
;
265 code
->alu
.inst
[ip
].alpha_addr
|=
266 ((inst
->Alpha
.DestIndex
& 0x1f) << R300_ALU_DSTA_SHIFT
) |
269 if (inst
->Alpha
.OutputWriteMask
) {
270 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_OUTPUT
|
271 R300_ALPHA_TARGET(inst
->Alpha
.Target
);
272 emit
->node_flags
|= R300_RGBA_OUT
;
274 if (inst
->Alpha
.DepthWriteMask
) {
275 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_DEPTH
;
276 emit
->node_flags
|= R300_W_OUT
;
277 c
->code
->writes_depth
= 1;
280 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_INSERT_NOP
;
282 /* Handle Output Modifier
283 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
284 if (inst
->RGB
.Omod
) {
285 if (inst
->RGB
.Omod
== RC_OMOD_DISABLE
) {
286 rc_error(&c
->Base
, "RC_OMOD_DISABLE not supported");
288 code
->alu
.inst
[ip
].rgb_inst
|=
289 (inst
->RGB
.Omod
<< R300_ALU_OUTC_MOD_SHIFT
);
291 if (inst
->Alpha
.Omod
) {
292 if (inst
->Alpha
.Omod
== RC_OMOD_DISABLE
) {
293 rc_error(&c
->Base
, "RC_OMOD_DISABLE not supported");
295 code
->alu
.inst
[ip
].alpha_inst
|=
296 (inst
->Alpha
.Omod
<< R300_ALU_OUTC_MOD_SHIFT
);
303 * Finish the current node without advancing to the next one.
305 static int finish_node(struct r300_emit_state
* emit
)
307 struct r300_fragment_program_compiler
* c
= emit
->compiler
;
308 struct r300_fragment_program_code
*code
= &emit
->compiler
->code
->code
.r300
;
314 unsigned int alu_offset_msbs
, alu_end_msbs
;
316 if (code
->alu
.length
== emit
->node_first_alu
) {
317 /* Generate a single NOP for this node */
318 struct rc_pair_instruction inst
;
319 memset(&inst
, 0, sizeof(inst
));
320 if (!emit_alu(emit
, &inst
))
324 alu_offset
= emit
->node_first_alu
;
325 alu_end
= code
->alu
.length
- alu_offset
- 1;
326 tex_offset
= emit
->node_first_tex
;
327 tex_end
= code
->tex
.length
- tex_offset
- 1;
329 if (code
->tex
.length
== emit
->node_first_tex
) {
330 if (emit
->current_node
> 0) {
331 error("Node %i has no TEX instructions", emit
->current_node
);
337 if (emit
->current_node
== 0)
338 code
->config
|= R300_PFS_CNTL_FIRST_NODE_HAS_TEX
;
341 /* Write the config register.
342 * Note: The order in which the words for each node are written
343 * is not correct here and needs to be fixed up once we're entirely
346 * Also note that the register specification from AMD is slightly
347 * incorrect in its description of this register. */
348 code
->code_addr
[emit
->current_node
] =
349 ((alu_offset
<< R300_ALU_START_SHIFT
)
350 & R300_ALU_START_MASK
)
351 | ((alu_end
<< R300_ALU_SIZE_SHIFT
)
352 & R300_ALU_SIZE_MASK
)
353 | ((tex_offset
<< R300_TEX_START_SHIFT
)
354 & R300_TEX_START_MASK
)
355 | ((tex_end
<< R300_TEX_SIZE_SHIFT
)
356 & R300_TEX_SIZE_MASK
)
358 | (get_msbs_tex(tex_offset
, 5)
359 << R400_TEX_START_MSB_SHIFT
)
360 | (get_msbs_tex(tex_end
, 5)
361 << R400_TEX_SIZE_MSB_SHIFT
)
364 /* Write r400 extended instruction fields. These will be ignored on
366 alu_offset_msbs
= get_msbs_alu(alu_offset
);
367 alu_end_msbs
= get_msbs_alu(alu_end
);
368 switch(emit
->current_node
) {
370 code
->r400_code_offset_ext
|=
371 alu_offset_msbs
<< R400_ALU_START3_MSB_SHIFT
372 | alu_end_msbs
<< R400_ALU_SIZE3_MSB_SHIFT
;
375 code
->r400_code_offset_ext
|=
376 alu_offset_msbs
<< R400_ALU_START2_MSB_SHIFT
377 | alu_end_msbs
<< R400_ALU_SIZE2_MSB_SHIFT
;
380 code
->r400_code_offset_ext
|=
381 alu_offset_msbs
<< R400_ALU_START1_MSB_SHIFT
382 | alu_end_msbs
<< R400_ALU_SIZE1_MSB_SHIFT
;
385 code
->r400_code_offset_ext
|=
386 alu_offset_msbs
<< R400_ALU_START0_MSB_SHIFT
387 | alu_end_msbs
<< R400_ALU_SIZE0_MSB_SHIFT
;
395 * Begin a block of texture instructions.
396 * Create the necessary indirection.
398 static int begin_tex(struct r300_emit_state
* emit
)
402 if (code
->alu
.length
== emit
->node_first_alu
&&
403 code
->tex
.length
== emit
->node_first_tex
) {
407 if (emit
->current_node
== 3) {
408 error("Too many texture indirections");
412 if (!finish_node(emit
))
415 emit
->current_node
++;
416 emit
->node_first_tex
= code
->tex
.length
;
417 emit
->node_first_alu
= code
->alu
.length
;
418 emit
->node_flags
= 0;
423 static int emit_tex(struct r300_emit_state
* emit
, struct rc_instruction
* inst
)
430 if (code
->tex
.length
>= emit
->compiler
->Base
.max_tex_insts
) {
431 error("Too many TEX instructions");
435 unit
= inst
->U
.I
.TexSrcUnit
;
436 dest
= inst
->U
.I
.DstReg
.Index
;
438 switch(inst
->U
.I
.Opcode
) {
439 case RC_OPCODE_KIL
: opcode
= R300_TEX_OP_KIL
; break;
440 case RC_OPCODE_TEX
: opcode
= R300_TEX_OP_LD
; break;
441 case RC_OPCODE_TXB
: opcode
= R300_TEX_OP_TXB
; break;
442 case RC_OPCODE_TXP
: opcode
= R300_TEX_OP_TXP
; break;
444 error("Unknown texture opcode %s", rc_get_opcode_info(inst
->U
.I
.Opcode
)->Name
);
448 if (inst
->U
.I
.Opcode
== RC_OPCODE_KIL
) {
452 use_temporary(code
, dest
);
455 use_temporary(code
, inst
->U
.I
.SrcReg
[0].Index
);
457 code
->tex
.inst
[code
->tex
.length
++] =
458 ((inst
->U
.I
.SrcReg
[0].Index
<< R300_SRC_ADDR_SHIFT
)
459 & R300_SRC_ADDR_MASK
)
460 | ((dest
<< R300_DST_ADDR_SHIFT
)
461 & R300_DST_ADDR_MASK
)
462 | (unit
<< R300_TEX_ID_SHIFT
)
463 | (opcode
<< R300_TEX_INST_SHIFT
)
464 | (inst
->U
.I
.SrcReg
[0].Index
>= R300_PFS_NUM_TEMP_REGS
?
465 R400_SRC_ADDR_EXT_BIT
: 0)
466 | (dest
>= R300_PFS_NUM_TEMP_REGS
?
467 R400_DST_ADDR_EXT_BIT
: 0)
474 * Final compilation step: Turn the intermediate radeon_program into
475 * machine-readable instructions.
477 void r300BuildFragmentProgramHwCode(struct radeon_compiler
*c
, void *user
)
479 struct r300_fragment_program_compiler
*compiler
= (struct r300_fragment_program_compiler
*)c
;
480 struct r300_emit_state emit
;
481 struct r300_fragment_program_code
*code
= &compiler
->code
->code
.r300
;
482 unsigned int tex_end
;
484 memset(&emit
, 0, sizeof(emit
));
485 emit
.compiler
= compiler
;
487 memset(code
, 0, sizeof(struct r300_fragment_program_code
));
489 for(struct rc_instruction
* inst
= compiler
->Base
.Program
.Instructions
.Next
;
490 inst
!= &compiler
->Base
.Program
.Instructions
&& !compiler
->Base
.Error
;
492 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
493 if (inst
->U
.I
.Opcode
== RC_OPCODE_BEGIN_TEX
) {
498 emit_tex(&emit
, inst
);
500 emit_alu(&emit
, &inst
->U
.P
);
504 if (code
->pixsize
>= compiler
->Base
.max_temp_regs
)
505 rc_error(&compiler
->Base
, "Too many hardware temporaries used.\n");
507 if (compiler
->Base
.Error
)
510 /* Finish the program */
513 code
->config
|= emit
.current_node
; /* FIRST_NODE_HAS_TEX set by finish_node */
515 /* Set r400 extended instruction fields. These values will be ignored
517 code
->r400_code_offset_ext
|=
519 << R400_ALU_OFFSET_MSB_SHIFT
)
520 | (get_msbs_alu(code
->alu
.length
- 1)
521 << R400_ALU_SIZE_MSB_SHIFT
);
523 tex_end
= code
->tex
.length
? code
->tex
.length
- 1 : 0;
525 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT
)
526 & R300_PFS_CNTL_ALU_OFFSET_MASK
)
527 | (((code
->alu
.length
- 1) << R300_PFS_CNTL_ALU_END_SHIFT
)
528 & R300_PFS_CNTL_ALU_END_MASK
)
529 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT
)
530 & R300_PFS_CNTL_TEX_OFFSET_MASK
)
531 | ((tex_end
<< R300_PFS_CNTL_TEX_END_SHIFT
)
532 & R300_PFS_CNTL_TEX_END_MASK
)
533 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT
)
534 | (get_msbs_tex(tex_end
, 6) << R400_TEX_SIZE_MSB_SHIFT
)
537 if (emit
.current_node
< 3) {
538 int shift
= 3 - emit
.current_node
;
540 for(i
= emit
.current_node
; i
>= 0; --i
)
541 code
->code_addr
[shift
+ i
] = code
->code_addr
[i
];
542 for(i
= 0; i
< shift
; ++i
)
543 code
->code_addr
[i
] = 0;
546 if (code
->pixsize
>= R300_PFS_NUM_TEMP_REGS
547 || code
->alu
.length
> R300_PFS_MAX_ALU_INST
548 || code
->tex
.length
> R300_PFS_MAX_TEX_INST
) {