2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
39 #include "r300_fragprog.h"
41 #include "../r300_reg.h"
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
47 struct r300_emit_state
{
48 struct r300_fragment_program_compiler
* compiler
;
50 unsigned current_node
: 2;
51 unsigned node_first_tex
: 8;
52 unsigned node_first_alu
: 8;
57 struct r300_fragment_program_compiler *c = emit->compiler; \
58 struct r300_fragment_program_code *code = &c->code->code.r300
60 #define error(fmt, args...) do { \
61 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
65 static unsigned int get_msbs_alu(unsigned int bits
)
67 return (bits
>> 6) & 0x7;
71 * @param lsbs The number of least significant bits
73 static unsigned int get_msbs_tex(unsigned int bits
, unsigned int lsbs
)
75 return (bits
>> lsbs
) & 0x15;
78 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81 * Mark a temporary register as used.
83 static void use_temporary(struct r300_fragment_program_code
*code
, unsigned int index
)
85 if (index
> code
->pixsize
)
86 code
->pixsize
= index
;
89 static unsigned int use_source(struct r300_fragment_program_code
* code
, struct rc_pair_instruction_source src
)
94 if (src
.File
== RC_FILE_CONSTANT
) {
95 return src
.Index
| (1 << 5);
96 } else if (src
.File
== RC_FILE_TEMPORARY
|| src
.File
== RC_FILE_INPUT
) {
97 use_temporary(code
, src
.Index
);
98 return src
.Index
& 0x1f;
105 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
108 case RC_OPCODE_CMP
: return R300_ALU_OUTC_CMP
;
109 case RC_OPCODE_CND
: return R300_ALU_OUTC_CND
;
110 case RC_OPCODE_DP3
: return R300_ALU_OUTC_DP3
;
111 case RC_OPCODE_DP4
: return R300_ALU_OUTC_DP4
;
112 case RC_OPCODE_FRC
: return R300_ALU_OUTC_FRC
;
114 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode
)->Name
);
118 case RC_OPCODE_MAD
: return R300_ALU_OUTC_MAD
;
119 case RC_OPCODE_MAX
: return R300_ALU_OUTC_MAX
;
120 case RC_OPCODE_MIN
: return R300_ALU_OUTC_MIN
;
121 case RC_OPCODE_REPL_ALPHA
: return R300_ALU_OUTC_REPL_ALPHA
;
125 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
128 case RC_OPCODE_CMP
: return R300_ALU_OUTA_CMP
;
129 case RC_OPCODE_CND
: return R300_ALU_OUTA_CND
;
130 case RC_OPCODE_DP3
: return R300_ALU_OUTA_DP4
;
131 case RC_OPCODE_DP4
: return R300_ALU_OUTA_DP4
;
132 case RC_OPCODE_EX2
: return R300_ALU_OUTA_EX2
;
133 case RC_OPCODE_FRC
: return R300_ALU_OUTA_FRC
;
134 case RC_OPCODE_LG2
: return R300_ALU_OUTA_LG2
;
136 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode
)->Name
);
140 case RC_OPCODE_MAD
: return R300_ALU_OUTA_MAD
;
141 case RC_OPCODE_MAX
: return R300_ALU_OUTA_MAX
;
142 case RC_OPCODE_MIN
: return R300_ALU_OUTA_MIN
;
143 case RC_OPCODE_RCP
: return R300_ALU_OUTA_RCP
;
144 case RC_OPCODE_RSQ
: return R300_ALU_OUTA_RSQ
;
149 * Emit one paired ALU instruction.
151 static int emit_alu(struct r300_emit_state
* emit
, struct rc_pair_instruction
* inst
)
157 if (code
->alu
.length
>= c
->Base
.max_alu_insts
) {
158 error("Too many ALU instructions");
162 ip
= code
->alu
.length
++;
164 code
->alu
.inst
[ip
].rgb_inst
= translate_rgb_opcode(c
, inst
->RGB
.Opcode
);
165 code
->alu
.inst
[ip
].alpha_inst
= translate_alpha_opcode(c
, inst
->Alpha
.Opcode
);
167 for(j
= 0; j
< 3; ++j
) {
168 /* Set the RGB address */
169 unsigned int src
= use_source(code
, inst
->RGB
.Src
[j
]);
171 if (inst
->RGB
.Src
[j
].Index
>= R300_PFS_NUM_TEMP_REGS
)
172 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDR_EXT_RGB_MSB_BIT(j
);
174 code
->alu
.inst
[ip
].rgb_addr
|= src
<< (6*j
);
176 /* Set the Alpha address */
177 src
= use_source(code
, inst
->Alpha
.Src
[j
]);
178 if (inst
->Alpha
.Src
[j
].Index
>= R300_PFS_NUM_TEMP_REGS
)
179 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDR_EXT_A_MSB_BIT(j
);
181 code
->alu
.inst
[ip
].alpha_addr
|= src
<< (6*j
);
183 arg
= r300FPTranslateRGBSwizzle(inst
->RGB
.Arg
[j
].Source
, inst
->RGB
.Arg
[j
].Swizzle
);
184 arg
|= inst
->RGB
.Arg
[j
].Abs
<< 6;
185 arg
|= inst
->RGB
.Arg
[j
].Negate
<< 5;
186 code
->alu
.inst
[ip
].rgb_inst
|= arg
<< (7*j
);
188 arg
= r300FPTranslateAlphaSwizzle(inst
->Alpha
.Arg
[j
].Source
, inst
->Alpha
.Arg
[j
].Swizzle
);
189 arg
|= inst
->Alpha
.Arg
[j
].Abs
<< 6;
190 arg
|= inst
->Alpha
.Arg
[j
].Negate
<< 5;
191 code
->alu
.inst
[ip
].alpha_inst
|= arg
<< (7*j
);
195 if (inst
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
196 switch(inst
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
198 code
->alu
.inst
[ip
].rgb_inst
|=
199 R300_ALU_SRCP_1_MINUS_2_SRC0
;
202 code
->alu
.inst
[ip
].rgb_inst
|=
203 R300_ALU_SRCP_SRC1_PLUS_SRC0
;
206 code
->alu
.inst
[ip
].rgb_inst
|=
207 R300_ALU_SRCP_SRC1_MINUS_SRC0
;
210 code
->alu
.inst
[ip
].rgb_inst
|=
211 R300_ALU_SRCP_1_MINUS_SRC0
;
218 if (inst
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
219 switch(inst
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
221 code
->alu
.inst
[ip
].alpha_inst
|=
222 R300_ALU_SRCP_1_MINUS_2_SRC0
;
225 code
->alu
.inst
[ip
].alpha_inst
|=
226 R300_ALU_SRCP_SRC1_PLUS_SRC0
;
229 code
->alu
.inst
[ip
].alpha_inst
|=
230 R300_ALU_SRCP_SRC1_MINUS_SRC0
;
233 code
->alu
.inst
[ip
].alpha_inst
|=
234 R300_ALU_SRCP_1_MINUS_SRC0
;
241 if (inst
->RGB
.Saturate
)
242 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_OUTC_CLAMP
;
243 if (inst
->Alpha
.Saturate
)
244 code
->alu
.inst
[ip
].alpha_inst
|= R300_ALU_OUTA_CLAMP
;
246 if (inst
->RGB
.WriteMask
) {
247 use_temporary(code
, inst
->RGB
.DestIndex
);
248 if (inst
->RGB
.DestIndex
>= R300_PFS_NUM_TEMP_REGS
)
249 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDRD_EXT_RGB_MSB_BIT
;
250 code
->alu
.inst
[ip
].rgb_addr
|=
251 ((inst
->RGB
.DestIndex
& 0x1f) << R300_ALU_DSTC_SHIFT
) |
252 (inst
->RGB
.WriteMask
<< R300_ALU_DSTC_REG_MASK_SHIFT
);
254 if (inst
->RGB
.OutputWriteMask
) {
255 code
->alu
.inst
[ip
].rgb_addr
|=
256 (inst
->RGB
.OutputWriteMask
<< R300_ALU_DSTC_OUTPUT_MASK_SHIFT
) |
257 R300_RGB_TARGET(inst
->RGB
.Target
);
258 emit
->node_flags
|= R300_RGBA_OUT
;
261 if (inst
->Alpha
.WriteMask
) {
262 use_temporary(code
, inst
->Alpha
.DestIndex
);
263 if (inst
->Alpha
.DestIndex
>= R300_PFS_NUM_TEMP_REGS
)
264 code
->alu
.inst
[ip
].r400_ext_addr
|= R400_ADDRD_EXT_A_MSB_BIT
;
265 code
->alu
.inst
[ip
].alpha_addr
|=
266 ((inst
->Alpha
.DestIndex
& 0x1f) << R300_ALU_DSTA_SHIFT
) |
269 if (inst
->Alpha
.OutputWriteMask
) {
270 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_OUTPUT
|
271 R300_ALPHA_TARGET(inst
->Alpha
.Target
);
272 emit
->node_flags
|= R300_RGBA_OUT
;
274 if (inst
->Alpha
.DepthWriteMask
) {
275 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_DEPTH
;
276 emit
->node_flags
|= R300_W_OUT
;
277 c
->code
->writes_depth
= 1;
280 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_INSERT_NOP
;
287 * Finish the current node without advancing to the next one.
289 static int finish_node(struct r300_emit_state
* emit
)
291 struct r300_fragment_program_compiler
* c
= emit
->compiler
;
292 struct r300_fragment_program_code
*code
= &emit
->compiler
->code
->code
.r300
;
298 unsigned int alu_offset_msbs
, alu_end_msbs
;
300 if (code
->alu
.length
== emit
->node_first_alu
) {
301 /* Generate a single NOP for this node */
302 struct rc_pair_instruction inst
;
303 memset(&inst
, 0, sizeof(inst
));
304 if (!emit_alu(emit
, &inst
))
308 alu_offset
= emit
->node_first_alu
;
309 alu_end
= code
->alu
.length
- alu_offset
- 1;
310 tex_offset
= emit
->node_first_tex
;
311 tex_end
= code
->tex
.length
- tex_offset
- 1;
313 if (code
->tex
.length
== emit
->node_first_tex
) {
314 if (emit
->current_node
> 0) {
315 error("Node %i has no TEX instructions", emit
->current_node
);
321 if (emit
->current_node
== 0)
322 code
->config
|= R300_PFS_CNTL_FIRST_NODE_HAS_TEX
;
325 /* Write the config register.
326 * Note: The order in which the words for each node are written
327 * is not correct here and needs to be fixed up once we're entirely
330 * Also note that the register specification from AMD is slightly
331 * incorrect in its description of this register. */
332 code
->code_addr
[emit
->current_node
] =
333 ((alu_offset
<< R300_ALU_START_SHIFT
)
334 & R300_ALU_START_MASK
)
335 | ((alu_end
<< R300_ALU_SIZE_SHIFT
)
336 & R300_ALU_SIZE_MASK
)
337 | ((tex_offset
<< R300_TEX_START_SHIFT
)
338 & R300_TEX_START_MASK
)
339 | ((tex_end
<< R300_TEX_SIZE_SHIFT
)
340 & R300_TEX_SIZE_MASK
)
342 | (get_msbs_tex(tex_offset
, 5)
343 << R400_TEX_START_MSB_SHIFT
)
344 | (get_msbs_tex(tex_end
, 5)
345 << R400_TEX_SIZE_MSB_SHIFT
)
348 /* Write r400 extended instruction fields. These will be ignored on
350 alu_offset_msbs
= get_msbs_alu(alu_offset
);
351 alu_end_msbs
= get_msbs_alu(alu_end
);
352 switch(emit
->current_node
) {
354 code
->r400_code_offset_ext
|=
355 alu_offset_msbs
<< R400_ALU_START3_MSB_SHIFT
356 | alu_end_msbs
<< R400_ALU_SIZE3_MSB_SHIFT
;
359 code
->r400_code_offset_ext
|=
360 alu_offset_msbs
<< R400_ALU_START2_MSB_SHIFT
361 | alu_end_msbs
<< R400_ALU_SIZE2_MSB_SHIFT
;
364 code
->r400_code_offset_ext
|=
365 alu_offset_msbs
<< R400_ALU_START1_MSB_SHIFT
366 | alu_end_msbs
<< R400_ALU_SIZE1_MSB_SHIFT
;
369 code
->r400_code_offset_ext
|=
370 alu_offset_msbs
<< R400_ALU_START0_MSB_SHIFT
371 | alu_end_msbs
<< R400_ALU_SIZE0_MSB_SHIFT
;
379 * Begin a block of texture instructions.
380 * Create the necessary indirection.
382 static int begin_tex(struct r300_emit_state
* emit
)
386 if (code
->alu
.length
== emit
->node_first_alu
&&
387 code
->tex
.length
== emit
->node_first_tex
) {
391 if (emit
->current_node
== 3) {
392 error("Too many texture indirections");
396 if (!finish_node(emit
))
399 emit
->current_node
++;
400 emit
->node_first_tex
= code
->tex
.length
;
401 emit
->node_first_alu
= code
->alu
.length
;
402 emit
->node_flags
= 0;
407 static int emit_tex(struct r300_emit_state
* emit
, struct rc_instruction
* inst
)
414 if (code
->tex
.length
>= emit
->compiler
->Base
.max_tex_insts
) {
415 error("Too many TEX instructions");
419 unit
= inst
->U
.I
.TexSrcUnit
;
420 dest
= inst
->U
.I
.DstReg
.Index
;
422 switch(inst
->U
.I
.Opcode
) {
423 case RC_OPCODE_KIL
: opcode
= R300_TEX_OP_KIL
; break;
424 case RC_OPCODE_TEX
: opcode
= R300_TEX_OP_LD
; break;
425 case RC_OPCODE_TXB
: opcode
= R300_TEX_OP_TXB
; break;
426 case RC_OPCODE_TXP
: opcode
= R300_TEX_OP_TXP
; break;
428 error("Unknown texture opcode %s", rc_get_opcode_info(inst
->U
.I
.Opcode
)->Name
);
432 if (inst
->U
.I
.Opcode
== RC_OPCODE_KIL
) {
436 use_temporary(code
, dest
);
439 use_temporary(code
, inst
->U
.I
.SrcReg
[0].Index
);
441 code
->tex
.inst
[code
->tex
.length
++] =
442 ((inst
->U
.I
.SrcReg
[0].Index
<< R300_SRC_ADDR_SHIFT
)
443 & R300_SRC_ADDR_MASK
)
444 | ((dest
<< R300_DST_ADDR_SHIFT
)
445 & R300_DST_ADDR_MASK
)
446 | (unit
<< R300_TEX_ID_SHIFT
)
447 | (opcode
<< R300_TEX_INST_SHIFT
)
448 | (inst
->U
.I
.SrcReg
[0].Index
>= R300_PFS_NUM_TEMP_REGS
?
449 R400_SRC_ADDR_EXT_BIT
: 0)
450 | (dest
>= R300_PFS_NUM_TEMP_REGS
?
451 R400_DST_ADDR_EXT_BIT
: 0)
458 * Final compilation step: Turn the intermediate radeon_program into
459 * machine-readable instructions.
461 void r300BuildFragmentProgramHwCode(struct radeon_compiler
*c
, void *user
)
463 struct r300_fragment_program_compiler
*compiler
= (struct r300_fragment_program_compiler
*)c
;
464 struct r300_emit_state emit
;
465 struct r300_fragment_program_code
*code
= &compiler
->code
->code
.r300
;
466 unsigned int tex_end
;
468 memset(&emit
, 0, sizeof(emit
));
469 emit
.compiler
= compiler
;
471 memset(code
, 0, sizeof(struct r300_fragment_program_code
));
473 for(struct rc_instruction
* inst
= compiler
->Base
.Program
.Instructions
.Next
;
474 inst
!= &compiler
->Base
.Program
.Instructions
&& !compiler
->Base
.Error
;
476 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
477 if (inst
->U
.I
.Opcode
== RC_OPCODE_BEGIN_TEX
) {
482 emit_tex(&emit
, inst
);
484 emit_alu(&emit
, &inst
->U
.P
);
488 if (code
->pixsize
>= compiler
->Base
.max_temp_regs
)
489 rc_error(&compiler
->Base
, "Too many hardware temporaries used.\n");
491 if (compiler
->Base
.Error
)
494 /* Finish the program */
497 code
->config
|= emit
.current_node
; /* FIRST_NODE_HAS_TEX set by finish_node */
499 /* Set r400 extended instruction fields. These values will be ignored
501 code
->r400_code_offset_ext
|=
503 << R400_ALU_OFFSET_MSB_SHIFT
)
504 | (get_msbs_alu(code
->alu
.length
- 1)
505 << R400_ALU_SIZE_MSB_SHIFT
);
507 tex_end
= code
->tex
.length
? code
->tex
.length
- 1 : 0;
509 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT
)
510 & R300_PFS_CNTL_ALU_OFFSET_MASK
)
511 | (((code
->alu
.length
- 1) << R300_PFS_CNTL_ALU_END_SHIFT
)
512 & R300_PFS_CNTL_ALU_END_MASK
)
513 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT
)
514 & R300_PFS_CNTL_TEX_OFFSET_MASK
)
515 | ((tex_end
<< R300_PFS_CNTL_TEX_END_SHIFT
)
516 & R300_PFS_CNTL_TEX_END_MASK
)
517 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT
)
518 | (get_msbs_tex(tex_end
, 6) << R400_TEX_SIZE_MSB_SHIFT
)
521 if (emit
.current_node
< 3) {
522 int shift
= 3 - emit
.current_node
;
524 for(i
= emit
.current_node
; i
>= 0; --i
)
525 code
->code_addr
[shift
+ i
] = code
->code_addr
[i
];
526 for(i
= 0; i
< shift
; ++i
)
527 code
->code_addr
[i
] = 0;
530 if (code
->pixsize
>= R300_PFS_NUM_TEMP_REGS
531 || code
->alu
.length
> R300_PFS_MAX_ALU_INST
532 || code
->tex
.length
> R300_PFS_MAX_TEX_INST
) {