2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
41 #include "r300_fragprog.h"
43 #include "../r300_reg.h"
45 #include "radeon_program_pair.h"
46 #include "r300_fragprog_swizzle.h"
49 struct r300_emit_state
{
50 struct r300_fragment_program_compiler
* compiler
;
52 unsigned current_node
: 2;
53 unsigned node_first_tex
: 8;
54 unsigned node_first_alu
: 8;
59 struct r300_fragment_program_compiler *c = emit->compiler; \
60 struct r300_fragment_program_code *code = &c->code->code.r300
62 #define error(fmt, args...) do { \
63 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
64 __FILE__, __FUNCTION__, ##args); \
69 * Mark a temporary register as used.
71 static void use_temporary(struct r300_fragment_program_code
*code
, unsigned int index
)
73 if (index
> code
->pixsize
)
74 code
->pixsize
= index
;
77 static unsigned int use_source(struct r300_fragment_program_code
* code
, struct radeon_pair_instruction_source src
)
79 if (src
.File
== RC_FILE_CONSTANT
) {
80 return src
.Index
| (1 << 5);
81 } else if (src
.File
== RC_FILE_TEMPORARY
) {
82 use_temporary(code
, src
.Index
);
90 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
93 case RC_OPCODE_CMP
: return R300_ALU_OUTC_CMP
;
94 case RC_OPCODE_DP3
: return R300_ALU_OUTC_DP3
;
95 case RC_OPCODE_DP4
: return R300_ALU_OUTC_DP4
;
96 case RC_OPCODE_FRC
: return R300_ALU_OUTC_FRC
;
98 error("translate_rgb_opcode(%i): Unknown opcode", opcode
);
102 case RC_OPCODE_MAD
: return R300_ALU_OUTC_MAD
;
103 case RC_OPCODE_MAX
: return R300_ALU_OUTC_MAX
;
104 case RC_OPCODE_MIN
: return R300_ALU_OUTC_MIN
;
105 case RC_OPCODE_REPL_ALPHA
: return R300_ALU_OUTC_REPL_ALPHA
;
109 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
* c
, rc_opcode opcode
)
112 case RC_OPCODE_CMP
: return R300_ALU_OUTA_CMP
;
113 case RC_OPCODE_DP3
: return R300_ALU_OUTA_DP4
;
114 case RC_OPCODE_DP4
: return R300_ALU_OUTA_DP4
;
115 case RC_OPCODE_EX2
: return R300_ALU_OUTA_EX2
;
116 case RC_OPCODE_FRC
: return R300_ALU_OUTA_FRC
;
117 case RC_OPCODE_LG2
: return R300_ALU_OUTA_LG2
;
119 error("translate_rgb_opcode(%i): Unknown opcode", opcode
);
123 case RC_OPCODE_MAD
: return R300_ALU_OUTA_MAD
;
124 case RC_OPCODE_MAX
: return R300_ALU_OUTA_MAX
;
125 case RC_OPCODE_MIN
: return R300_ALU_OUTA_MIN
;
126 case RC_OPCODE_RCP
: return R300_ALU_OUTA_RCP
;
127 case RC_OPCODE_RSQ
: return R300_ALU_OUTA_RSQ
;
132 * Emit one paired ALU instruction.
134 static int emit_alu(struct r300_emit_state
* emit
, struct rc_pair_instruction
* inst
)
138 if (code
->alu
.length
>= R300_PFS_MAX_ALU_INST
) {
139 error("Too many ALU instructions");
143 int ip
= code
->alu
.length
++;
146 code
->alu
.inst
[ip
].rgb_inst
= translate_rgb_opcode(c
, inst
->RGB
.Opcode
);
147 code
->alu
.inst
[ip
].alpha_inst
= translate_alpha_opcode(c
, inst
->Alpha
.Opcode
);
149 for(j
= 0; j
< 3; ++j
) {
150 unsigned int src
= use_source(code
, inst
->RGB
.Src
[j
]);
151 code
->alu
.inst
[ip
].rgb_addr
|= src
<< (6*j
);
153 src
= use_source(code
, inst
->Alpha
.Src
[j
]);
154 code
->alu
.inst
[ip
].alpha_addr
|= src
<< (6*j
);
156 unsigned int arg
= r300FPTranslateRGBSwizzle(inst
->RGB
.Arg
[j
].Source
, inst
->RGB
.Arg
[j
].Swizzle
);
157 arg
|= inst
->RGB
.Arg
[j
].Abs
<< 6;
158 arg
|= inst
->RGB
.Arg
[j
].Negate
<< 5;
159 code
->alu
.inst
[ip
].rgb_inst
|= arg
<< (7*j
);
161 arg
= r300FPTranslateAlphaSwizzle(inst
->Alpha
.Arg
[j
].Source
, inst
->Alpha
.Arg
[j
].Swizzle
);
162 arg
|= inst
->Alpha
.Arg
[j
].Abs
<< 6;
163 arg
|= inst
->Alpha
.Arg
[j
].Negate
<< 5;
164 code
->alu
.inst
[ip
].alpha_inst
|= arg
<< (7*j
);
167 if (inst
->RGB
.Saturate
)
168 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_OUTC_CLAMP
;
169 if (inst
->Alpha
.Saturate
)
170 code
->alu
.inst
[ip
].alpha_inst
|= R300_ALU_OUTA_CLAMP
;
172 if (inst
->RGB
.WriteMask
) {
173 use_temporary(code
, inst
->RGB
.DestIndex
);
174 code
->alu
.inst
[ip
].rgb_addr
|=
175 (inst
->RGB
.DestIndex
<< R300_ALU_DSTC_SHIFT
) |
176 (inst
->RGB
.WriteMask
<< R300_ALU_DSTC_REG_MASK_SHIFT
);
178 if (inst
->RGB
.OutputWriteMask
) {
179 code
->alu
.inst
[ip
].rgb_addr
|= (inst
->RGB
.OutputWriteMask
<< R300_ALU_DSTC_OUTPUT_MASK_SHIFT
);
180 emit
->node_flags
|= R300_RGBA_OUT
;
183 if (inst
->Alpha
.WriteMask
) {
184 use_temporary(code
, inst
->Alpha
.DestIndex
);
185 code
->alu
.inst
[ip
].alpha_addr
|=
186 (inst
->Alpha
.DestIndex
<< R300_ALU_DSTA_SHIFT
) |
189 if (inst
->Alpha
.OutputWriteMask
) {
190 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_OUTPUT
;
191 emit
->node_flags
|= R300_RGBA_OUT
;
193 if (inst
->Alpha
.DepthWriteMask
) {
194 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_DEPTH
;
195 emit
->node_flags
|= R300_W_OUT
;
196 c
->code
->writes_depth
= 1;
204 * Finish the current node without advancing to the next one.
206 static int finish_node(struct r300_emit_state
* emit
)
208 struct r300_fragment_program_compiler
* c
= emit
->compiler
;
209 struct r300_fragment_program_code
*code
= &emit
->compiler
->code
->code
.r300
;
211 if (code
->alu
.length
== emit
->node_first_alu
) {
212 /* Generate a single NOP for this node */
213 struct rc_pair_instruction inst
;
214 memset(&inst
, 0, sizeof(inst
));
215 if (!emit_alu(emit
, &inst
))
219 unsigned alu_offset
= emit
->node_first_alu
;
220 unsigned alu_end
= code
->alu
.length
- alu_offset
- 1;
221 unsigned tex_offset
= emit
->node_first_tex
;
222 unsigned tex_end
= code
->tex
.length
- tex_offset
- 1;
224 if (code
->tex
.length
== emit
->node_first_tex
) {
225 if (emit
->current_node
> 0) {
226 error("Node %i has no TEX instructions", emit
->current_node
);
232 if (emit
->current_node
== 0)
233 code
->config
|= R300_PFS_CNTL_FIRST_NODE_HAS_TEX
;
236 /* Write the config register.
237 * Note: The order in which the words for each node are written
238 * is not correct here and needs to be fixed up once we're entirely
241 * Also note that the register specification from AMD is slightly
242 * incorrect in its description of this register. */
243 code
->code_addr
[emit
->current_node
] =
244 (alu_offset
<< R300_ALU_START_SHIFT
) |
245 (alu_end
<< R300_ALU_SIZE_SHIFT
) |
246 (tex_offset
<< R300_TEX_START_SHIFT
) |
247 (tex_end
<< R300_TEX_SIZE_SHIFT
) |
255 * Begin a block of texture instructions.
256 * Create the necessary indirection.
258 static int begin_tex(struct r300_emit_state
* emit
)
262 if (code
->alu
.length
== emit
->node_first_alu
&&
263 code
->tex
.length
== emit
->node_first_tex
) {
267 if (emit
->current_node
== 3) {
268 error("Too many texture indirections");
272 if (!finish_node(emit
))
275 emit
->current_node
++;
276 emit
->node_first_tex
= code
->tex
.length
;
277 emit
->node_first_alu
= code
->alu
.length
;
278 emit
->node_flags
= 0;
283 static int emit_tex(struct r300_emit_state
* emit
, struct rc_instruction
* inst
)
287 if (code
->tex
.length
>= R300_PFS_MAX_TEX_INST
) {
288 error("Too many TEX instructions");
292 unsigned int unit
= inst
->U
.I
.TexSrcUnit
;
293 unsigned int dest
= inst
->U
.I
.DstReg
.Index
;
296 switch(inst
->U
.I
.Opcode
) {
297 case RC_OPCODE_KIL
: opcode
= R300_TEX_OP_KIL
; break;
298 case RC_OPCODE_TEX
: opcode
= R300_TEX_OP_LD
; break;
299 case RC_OPCODE_TXB
: opcode
= R300_TEX_OP_TXB
; break;
300 case RC_OPCODE_TXP
: opcode
= R300_TEX_OP_TXP
; break;
302 error("Unknown texture opcode %i", inst
->U
.I
.Opcode
);
306 if (inst
->U
.I
.Opcode
== RC_OPCODE_KIL
) {
310 use_temporary(code
, dest
);
313 use_temporary(code
, inst
->U
.I
.SrcReg
[0].Index
);
315 code
->tex
.inst
[code
->tex
.length
++] =
316 (inst
->U
.I
.SrcReg
[0].Index
<< R300_SRC_ADDR_SHIFT
) |
317 (dest
<< R300_DST_ADDR_SHIFT
) |
318 (unit
<< R300_TEX_ID_SHIFT
) |
319 (opcode
<< R300_TEX_INST_SHIFT
);
325 * Final compilation step: Turn the intermediate radeon_program into
326 * machine-readable instructions.
328 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
*compiler
)
330 struct r300_emit_state emit
;
331 struct r300_fragment_program_code
*code
= &compiler
->code
->code
.r300
;
333 memset(&emit
, 0, sizeof(emit
));
334 emit
.compiler
= compiler
;
336 memset(code
, 0, sizeof(struct r300_fragment_program_code
));
338 for(struct rc_instruction
* inst
= compiler
->Base
.Program
.Instructions
.Next
;
339 inst
!= &compiler
->Base
.Program
.Instructions
&& !compiler
->Base
.Error
;
341 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
342 if (inst
->U
.I
.Opcode
== RC_OPCODE_BEGIN_TEX
) {
347 emit_tex(&emit
, inst
);
349 emit_alu(&emit
, &inst
->U
.P
);
353 if (code
->pixsize
>= R300_PFS_NUM_TEMP_REGS
)
354 rc_error(&compiler
->Base
, "Too many hardware temporaries used.\n");
356 if (compiler
->Base
.Error
)
359 /* Finish the program */
362 code
->config
|= emit
.current_node
; /* FIRST_NODE_HAS_TEX set by finish_node */
364 (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT
) |
365 ((code
->alu
.length
-1) << R300_PFS_CNTL_ALU_END_SHIFT
) |
366 (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT
) |
367 ((code
->tex
.length
? code
->tex
.length
-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT
);
369 if (emit
.current_node
< 3) {
370 int shift
= 3 - emit
.current_node
;
372 for(i
= emit
.current_node
; i
>= 0; --i
)
373 code
->code_addr
[shift
+ i
] = code
->code_addr
[i
];
374 for(i
= 0; i
< shift
; ++i
)
375 code
->code_addr
[i
] = 0;