2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
41 #include "r300_fragprog.h"
43 #include "../r300_reg.h"
45 #include "radeon_program_pair.h"
46 #include "r300_fragprog_swizzle.h"
49 struct r300_emit_state
{
50 struct r300_fragment_program_compiler
* compiler
;
52 unsigned current_node
: 2;
53 unsigned node_first_tex
: 8;
54 unsigned node_first_alu
: 8;
59 struct r300_emit_state * emit = (struct r300_emit_state*)data; \
60 struct r300_fragment_program_compiler *c = emit->compiler; \
61 struct r300_fragment_program_code *code = &c->code->code.r300
63 #define error(fmt, args...) do { \
64 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
65 __FILE__, __FUNCTION__, ##args); \
70 * Mark a temporary register as used.
72 static void use_temporary(struct r300_fragment_program_code
*code
, GLuint index
)
74 if (index
> code
->pixsize
)
75 code
->pixsize
= index
;
79 static GLuint
translate_rgb_opcode(struct r300_fragment_program_compiler
* c
, GLuint opcode
)
82 case OPCODE_CMP
: return R300_ALU_OUTC_CMP
;
83 case OPCODE_DP3
: return R300_ALU_OUTC_DP3
;
84 case OPCODE_DP4
: return R300_ALU_OUTC_DP4
;
85 case OPCODE_FRC
: return R300_ALU_OUTC_FRC
;
87 error("translate_rgb_opcode(%i): Unknown opcode", opcode
);
91 case OPCODE_MAD
: return R300_ALU_OUTC_MAD
;
92 case OPCODE_MAX
: return R300_ALU_OUTC_MAX
;
93 case OPCODE_MIN
: return R300_ALU_OUTC_MIN
;
94 case OPCODE_REPL_ALPHA
: return R300_ALU_OUTC_REPL_ALPHA
;
98 static GLuint
translate_alpha_opcode(struct r300_fragment_program_compiler
* c
, GLuint opcode
)
101 case OPCODE_CMP
: return R300_ALU_OUTA_CMP
;
102 case OPCODE_DP3
: return R300_ALU_OUTA_DP4
;
103 case OPCODE_DP4
: return R300_ALU_OUTA_DP4
;
104 case OPCODE_EX2
: return R300_ALU_OUTA_EX2
;
105 case OPCODE_FRC
: return R300_ALU_OUTA_FRC
;
106 case OPCODE_LG2
: return R300_ALU_OUTA_LG2
;
108 error("translate_rgb_opcode(%i): Unknown opcode", opcode
);
112 case OPCODE_MAD
: return R300_ALU_OUTA_MAD
;
113 case OPCODE_MAX
: return R300_ALU_OUTA_MAX
;
114 case OPCODE_MIN
: return R300_ALU_OUTA_MIN
;
115 case OPCODE_RCP
: return R300_ALU_OUTA_RCP
;
116 case OPCODE_RSQ
: return R300_ALU_OUTA_RSQ
;
121 * Emit one paired ALU instruction.
123 static GLboolean
emit_alu(void* data
, struct radeon_pair_instruction
* inst
)
127 if (code
->alu
.length
>= R300_PFS_MAX_ALU_INST
) {
128 error("Too many ALU instructions");
132 int ip
= code
->alu
.length
++;
135 code
->alu
.inst
[ip
].rgb_inst
= translate_rgb_opcode(c
, inst
->RGB
.Opcode
);
136 code
->alu
.inst
[ip
].alpha_inst
= translate_alpha_opcode(c
, inst
->Alpha
.Opcode
);
138 for(j
= 0; j
< 3; ++j
) {
139 GLuint src
= inst
->RGB
.Src
[j
].Index
| (inst
->RGB
.Src
[j
].Constant
<< 5);
140 if (!inst
->RGB
.Src
[j
].Constant
)
141 use_temporary(code
, inst
->RGB
.Src
[j
].Index
);
142 code
->alu
.inst
[ip
].rgb_addr
|= src
<< (6*j
);
144 src
= inst
->Alpha
.Src
[j
].Index
| (inst
->Alpha
.Src
[j
].Constant
<< 5);
145 if (!inst
->Alpha
.Src
[j
].Constant
)
146 use_temporary(code
, inst
->Alpha
.Src
[j
].Index
);
147 code
->alu
.inst
[ip
].alpha_addr
|= src
<< (6*j
);
149 GLuint arg
= r300FPTranslateRGBSwizzle(inst
->RGB
.Arg
[j
].Source
, inst
->RGB
.Arg
[j
].Swizzle
);
150 arg
|= inst
->RGB
.Arg
[j
].Abs
<< 6;
151 arg
|= inst
->RGB
.Arg
[j
].Negate
<< 5;
152 code
->alu
.inst
[ip
].rgb_inst
|= arg
<< (7*j
);
154 arg
= r300FPTranslateAlphaSwizzle(inst
->Alpha
.Arg
[j
].Source
, inst
->Alpha
.Arg
[j
].Swizzle
);
155 arg
|= inst
->Alpha
.Arg
[j
].Abs
<< 6;
156 arg
|= inst
->Alpha
.Arg
[j
].Negate
<< 5;
157 code
->alu
.inst
[ip
].alpha_inst
|= arg
<< (7*j
);
160 if (inst
->RGB
.Saturate
)
161 code
->alu
.inst
[ip
].rgb_inst
|= R300_ALU_OUTC_CLAMP
;
162 if (inst
->Alpha
.Saturate
)
163 code
->alu
.inst
[ip
].alpha_inst
|= R300_ALU_OUTA_CLAMP
;
165 if (inst
->RGB
.WriteMask
) {
166 use_temporary(code
, inst
->RGB
.DestIndex
);
167 code
->alu
.inst
[ip
].rgb_addr
|=
168 (inst
->RGB
.DestIndex
<< R300_ALU_DSTC_SHIFT
) |
169 (inst
->RGB
.WriteMask
<< R300_ALU_DSTC_REG_MASK_SHIFT
);
171 if (inst
->RGB
.OutputWriteMask
) {
172 code
->alu
.inst
[ip
].rgb_addr
|= (inst
->RGB
.OutputWriteMask
<< R300_ALU_DSTC_OUTPUT_MASK_SHIFT
);
173 emit
->node_flags
|= R300_RGBA_OUT
;
176 if (inst
->Alpha
.WriteMask
) {
177 use_temporary(code
, inst
->Alpha
.DestIndex
);
178 code
->alu
.inst
[ip
].alpha_addr
|=
179 (inst
->Alpha
.DestIndex
<< R300_ALU_DSTA_SHIFT
) |
182 if (inst
->Alpha
.OutputWriteMask
) {
183 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_OUTPUT
;
184 emit
->node_flags
|= R300_RGBA_OUT
;
186 if (inst
->Alpha
.DepthWriteMask
) {
187 code
->alu
.inst
[ip
].alpha_addr
|= R300_ALU_DSTA_DEPTH
;
188 emit
->node_flags
|= R300_W_OUT
;
189 c
->code
->writes_depth
= GL_TRUE
;
197 * Finish the current node without advancing to the next one.
199 static GLboolean
finish_node(struct r300_emit_state
* emit
)
201 struct r300_fragment_program_compiler
* c
= emit
->compiler
;
202 struct r300_fragment_program_code
*code
= &emit
->compiler
->code
->code
.r300
;
204 if (code
->alu
.length
== emit
->node_first_alu
) {
205 /* Generate a single NOP for this node */
206 struct radeon_pair_instruction inst
;
207 _mesa_bzero(&inst
, sizeof(inst
));
208 if (!emit_alu(emit
, &inst
))
212 unsigned alu_offset
= emit
->node_first_alu
;
213 unsigned alu_end
= code
->alu
.length
- alu_offset
- 1;
214 unsigned tex_offset
= emit
->node_first_tex
;
215 unsigned tex_end
= code
->tex
.length
- tex_offset
- 1;
217 if (code
->tex
.length
== emit
->node_first_tex
) {
218 if (emit
->current_node
> 0) {
219 error("Node %i has no TEX instructions", emit
->current_node
);
225 if (emit
->current_node
== 0)
226 code
->config
|= R300_PFS_CNTL_FIRST_NODE_HAS_TEX
;
229 /* Write the config register.
230 * Note: The order in which the words for each node are written
231 * is not correct here and needs to be fixed up once we're entirely
234 * Also note that the register specification from AMD is slightly
235 * incorrect in its description of this register. */
236 code
->code_addr
[emit
->current_node
] =
237 (alu_offset
<< R300_ALU_START_SHIFT
) |
238 (alu_end
<< R300_ALU_SIZE_SHIFT
) |
239 (tex_offset
<< R300_TEX_START_SHIFT
) |
240 (tex_end
<< R300_TEX_SIZE_SHIFT
) |
248 * Begin a block of texture instructions.
249 * Create the necessary indirection.
251 static GLboolean
begin_tex(void* data
)
255 if (code
->alu
.length
== emit
->node_first_alu
&&
256 code
->tex
.length
== emit
->node_first_tex
) {
260 if (emit
->current_node
== 3) {
261 error("Too many texture indirections");
265 if (!finish_node(emit
))
268 emit
->current_node
++;
269 emit
->node_first_tex
= code
->tex
.length
;
270 emit
->node_first_alu
= code
->alu
.length
;
271 emit
->node_flags
= 0;
276 static GLboolean
emit_tex(void* data
, struct radeon_pair_texture_instruction
* inst
)
280 if (code
->tex
.length
>= R300_PFS_MAX_TEX_INST
) {
281 error("Too many TEX instructions");
285 GLuint unit
= inst
->TexSrcUnit
;
286 GLuint dest
= inst
->DestIndex
;
289 switch(inst
->Opcode
) {
290 case RADEON_OPCODE_KIL
: opcode
= R300_TEX_OP_KIL
; break;
291 case RADEON_OPCODE_TEX
: opcode
= R300_TEX_OP_LD
; break;
292 case RADEON_OPCODE_TXB
: opcode
= R300_TEX_OP_TXB
; break;
293 case RADEON_OPCODE_TXP
: opcode
= R300_TEX_OP_TXP
; break;
295 error("Unknown texture opcode %i", inst
->Opcode
);
299 if (inst
->Opcode
== RADEON_OPCODE_KIL
) {
303 use_temporary(code
, dest
);
306 use_temporary(code
, inst
->SrcIndex
);
308 code
->tex
.inst
[code
->tex
.length
++] =
309 (inst
->SrcIndex
<< R300_SRC_ADDR_SHIFT
) |
310 (dest
<< R300_DST_ADDR_SHIFT
) |
311 (unit
<< R300_TEX_ID_SHIFT
) |
312 (opcode
<< R300_TEX_INST_SHIFT
);
317 static const struct radeon_pair_handler pair_handler
= {
318 .EmitPaired
= &emit_alu
,
319 .EmitTex
= &emit_tex
,
320 .BeginTexBlock
= &begin_tex
,
321 .MaxHwTemps
= R300_PFS_NUM_TEMP_REGS
325 * Final compilation step: Turn the intermediate radeon_program into
326 * machine-readable instructions.
328 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
*compiler
)
330 struct r300_emit_state emit
;
331 struct r300_fragment_program_code
*code
= &compiler
->code
->code
.r300
;
333 memset(&emit
, 0, sizeof(emit
));
334 emit
.compiler
= compiler
;
336 _mesa_bzero(code
, sizeof(struct r300_fragment_program_code
));
338 radeonPairProgram(compiler
, &pair_handler
, &emit
);
339 if (compiler
->Base
.Error
)
342 /* Finish the program */
345 code
->config
|= emit
.current_node
; /* FIRST_NODE_HAS_TEX set by finish_node */
347 (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT
) |
348 ((code
->alu
.length
-1) << R300_PFS_CNTL_ALU_END_SHIFT
) |
349 (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT
) |
350 ((code
->tex
.length
? code
->tex
.length
-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT
);
352 if (emit
.current_node
< 3) {
353 int shift
= 3 - emit
.current_node
;
355 for(i
= 0; i
<= emit
.current_node
; ++i
)
356 code
->code_addr
[shift
+ i
] = code
->code_addr
[i
];
357 for(i
= 0; i
< shift
; ++i
)
358 code
->code_addr
[i
] = 0;