2 * Copyright (C) 2005 Ben Skeggs.
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
40 * \todo Depth write, WPOS/FOGC inputs
46 #include "r500_fragprog.h"
48 #include "radeon_program_pair.h"
52 struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \
53 struct r500_fragment_program_code *code = c->code
55 #define error(fmt, args...) do { \
56 fprintf(stderr, "%s::%s(): " fmt "\n", \
57 __FILE__, __FUNCTION__, ##args); \
62 * Callback to register hardware constants.
64 static GLboolean
emit_const(void *data
, GLuint file
, GLuint idx
, GLuint
*hwindex
)
68 for (*hwindex
= 0; *hwindex
< code
->const_nr
; ++*hwindex
) {
69 if (code
->constant
[*hwindex
].File
== file
&&
70 code
->constant
[*hwindex
].Index
== idx
)
74 if (*hwindex
>= code
->const_nr
) {
75 if (*hwindex
>= PFS_NUM_CONST_REGS
) {
76 error("Out of hw constants!\n");
81 code
->constant
[*hwindex
].File
= file
;
82 code
->constant
[*hwindex
].Index
= idx
;
88 static GLuint
translate_rgb_op(GLuint opcode
)
91 case OPCODE_CMP
: return R500_ALU_RGBA_OP_CMP
;
92 case OPCODE_DDX
: return R500_ALU_RGBA_OP_MDH
;
93 case OPCODE_DDY
: return R500_ALU_RGBA_OP_MDV
;
94 case OPCODE_DP3
: return R500_ALU_RGBA_OP_DP3
;
95 case OPCODE_DP4
: return R500_ALU_RGBA_OP_DP4
;
96 case OPCODE_FRC
: return R500_ALU_RGBA_OP_FRC
;
98 error("translate_rgb_op(%d): unknown opcode\n", opcode
);
102 case OPCODE_MAD
: return R500_ALU_RGBA_OP_MAD
;
103 case OPCODE_MAX
: return R500_ALU_RGBA_OP_MAX
;
104 case OPCODE_MIN
: return R500_ALU_RGBA_OP_MIN
;
105 case OPCODE_REPL_ALPHA
: return R500_ALU_RGBA_OP_SOP
;
109 static GLuint
translate_alpha_op(GLuint opcode
)
112 case OPCODE_CMP
: return R500_ALPHA_OP_CMP
;
113 case OPCODE_COS
: return R500_ALPHA_OP_COS
;
114 case OPCODE_DDX
: return R500_ALPHA_OP_MDH
;
115 case OPCODE_DDY
: return R500_ALPHA_OP_MDV
;
116 case OPCODE_DP3
: return R500_ALPHA_OP_DP
;
117 case OPCODE_DP4
: return R500_ALPHA_OP_DP
;
118 case OPCODE_EX2
: return R500_ALPHA_OP_EX2
;
119 case OPCODE_FRC
: return R500_ALPHA_OP_FRC
;
120 case OPCODE_LG2
: return R500_ALPHA_OP_LN2
;
122 error("translate_alpha_op(%d): unknown opcode\n", opcode
);
126 case OPCODE_MAD
: return R500_ALPHA_OP_MAD
;
127 case OPCODE_MAX
: return R500_ALPHA_OP_MAX
;
128 case OPCODE_MIN
: return R500_ALPHA_OP_MIN
;
129 case OPCODE_RCP
: return R500_ALPHA_OP_RCP
;
130 case OPCODE_RSQ
: return R500_ALPHA_OP_RSQ
;
131 case OPCODE_SIN
: return R500_ALPHA_OP_SIN
;
135 static GLuint
fix_hw_swizzle(GLuint swz
)
137 if (swz
== 5) swz
= 6;
138 if (swz
== SWIZZLE_NIL
) swz
= 4;
142 static GLuint
translate_arg_rgb(struct radeon_pair_instruction
*inst
, int arg
)
144 GLuint t
= inst
->RGB
.Arg
[arg
].Source
;
146 t
|= inst
->RGB
.Arg
[arg
].Negate
<< 11;
147 t
|= inst
->RGB
.Arg
[arg
].Abs
<< 12;
149 for(comp
= 0; comp
< 3; ++comp
)
150 t
|= fix_hw_swizzle(GET_SWZ(inst
->RGB
.Arg
[arg
].Swizzle
, comp
)) << (3*comp
+ 2);
155 static GLuint
translate_arg_alpha(struct radeon_pair_instruction
*inst
, int i
)
157 GLuint t
= inst
->Alpha
.Arg
[i
].Source
;
158 t
|= fix_hw_swizzle(inst
->Alpha
.Arg
[i
].Swizzle
) << 2;
159 t
|= inst
->Alpha
.Arg
[i
].Negate
<< 5;
160 t
|= inst
->Alpha
.Arg
[i
].Abs
<< 6;
164 static void use_temporary(struct r500_fragment_program_code
* code
, GLuint index
)
166 if (index
> code
->max_temp_idx
)
167 code
->max_temp_idx
= index
;
170 static GLuint
use_source(struct r500_fragment_program_code
* code
, struct radeon_pair_instruction_source src
)
173 use_temporary(code
, src
.Index
);
174 return src
.Index
| src
.Constant
<< 8;
179 * Emit a paired ALU instruction.
181 static GLboolean
emit_paired(void *data
, struct radeon_pair_instruction
*inst
)
185 if (code
->inst_end
>= 511) {
186 error("emit_alu: Too many instructions");
190 int ip
= ++code
->inst_end
;
192 code
->inst
[ip
].inst5
= translate_rgb_op(inst
->RGB
.Opcode
);
193 code
->inst
[ip
].inst4
= translate_alpha_op(inst
->Alpha
.Opcode
);
195 if (inst
->RGB
.OutputWriteMask
|| inst
->Alpha
.OutputWriteMask
|| inst
->Alpha
.DepthWriteMask
)
196 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
;
198 code
->inst
[ip
].inst0
= R500_INST_TYPE_ALU
;
199 code
->inst
[ip
].inst0
|= R500_INST_TEX_SEM_WAIT
;
201 code
->inst
[ip
].inst0
|= (inst
->RGB
.WriteMask
<< 11) | (inst
->Alpha
.WriteMask
<< 14);
202 code
->inst
[ip
].inst0
|= (inst
->RGB
.OutputWriteMask
<< 15) | (inst
->Alpha
.OutputWriteMask
<< 18);
203 if (inst
->Alpha
.DepthWriteMask
) {
204 code
->inst
[ip
].inst4
|= R500_ALPHA_W_OMASK
;
205 c
->fp
->writes_depth
= GL_TRUE
;
208 code
->inst
[ip
].inst4
|= R500_ALPHA_ADDRD(inst
->Alpha
.DestIndex
);
209 code
->inst
[ip
].inst5
|= R500_ALU_RGBA_ADDRD(inst
->RGB
.DestIndex
);
210 use_temporary(code
, inst
->Alpha
.DestIndex
);
211 use_temporary(code
, inst
->RGB
.DestIndex
);
213 if (inst
->RGB
.Saturate
)
214 code
->inst
[ip
].inst0
|= R500_INST_RGB_CLAMP
;
215 if (inst
->Alpha
.Saturate
)
216 code
->inst
[ip
].inst0
|= R500_INST_ALPHA_CLAMP
;
218 code
->inst
[ip
].inst1
|= R500_RGB_ADDR0(use_source(code
, inst
->RGB
.Src
[0]));
219 code
->inst
[ip
].inst1
|= R500_RGB_ADDR1(use_source(code
, inst
->RGB
.Src
[1]));
220 code
->inst
[ip
].inst1
|= R500_RGB_ADDR2(use_source(code
, inst
->RGB
.Src
[2]));
222 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR0(use_source(code
, inst
->Alpha
.Src
[0]));
223 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR1(use_source(code
, inst
->Alpha
.Src
[1]));
224 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR2(use_source(code
, inst
->Alpha
.Src
[2]));
226 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 0) << R500_ALU_RGB_SEL_A_SHIFT
;
227 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 1) << R500_ALU_RGB_SEL_B_SHIFT
;
228 code
->inst
[ip
].inst5
|= translate_arg_rgb(inst
, 2) << R500_ALU_RGBA_SEL_C_SHIFT
;
230 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 0) << R500_ALPHA_SEL_A_SHIFT
;
231 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 1) << R500_ALPHA_SEL_B_SHIFT
;
232 code
->inst
[ip
].inst5
|= translate_arg_alpha(inst
, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT
;
237 static GLuint
translate_strq_swizzle(struct prog_src_register src
)
241 for (i
= 0; i
< 4; i
++)
242 swiz
|= (GET_SWZ(src
.Swizzle
, i
) & 0x3) << i
*2;
247 * Emit a single TEX instruction
249 static GLboolean
emit_tex(void *data
, struct prog_instruction
*inst
)
253 if (code
->inst_end
>= 511) {
254 error("emit_tex: Too many instructions");
258 int ip
= ++code
->inst_end
;
260 code
->inst
[ip
].inst0
= R500_INST_TYPE_TEX
261 | (inst
->DstReg
.WriteMask
<< 11)
262 | R500_INST_TEX_SEM_WAIT
;
263 code
->inst
[ip
].inst1
= R500_TEX_ID(inst
->TexSrcUnit
)
264 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
266 if (inst
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
267 code
->inst
[ip
].inst1
|= R500_TEX_UNSCALED
;
269 switch (inst
->Opcode
) {
271 code
->inst
[ip
].inst1
|= R500_TEX_INST_TEXKILL
;
274 code
->inst
[ip
].inst1
|= R500_TEX_INST_LD
;
277 code
->inst
[ip
].inst1
|= R500_TEX_INST_LODBIAS
;
280 code
->inst
[ip
].inst1
|= R500_TEX_INST_PROJ
;
283 error("emit_tex can't handle opcode %x\n", inst
->Opcode
);
286 code
->inst
[ip
].inst2
= R500_TEX_SRC_ADDR(inst
->SrcReg
[0].Index
)
287 | (translate_strq_swizzle(inst
->SrcReg
[0]) << 8)
288 | R500_TEX_DST_ADDR(inst
->DstReg
.Index
)
289 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
290 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
295 static const struct radeon_pair_handler pair_handler
= {
296 .EmitConst
= emit_const
,
297 .EmitPaired
= emit_paired
,
302 GLboolean
r500FragmentProgramEmit(struct r500_fragment_program_compiler
*compiler
)
304 struct r500_fragment_program_code
*code
= compiler
->code
;
306 _mesa_bzero(code
, sizeof(*code
));
307 code
->max_temp_idx
= 1;
308 code
->inst_offset
= 0;
311 if (!radeonPairProgram(compiler
->r300
->radeon
.glCtx
, compiler
->program
, &pair_handler
, compiler
))
314 if ((code
->inst
[code
->inst_end
].inst0
& R500_INST_TYPE_MASK
) != R500_INST_TYPE_OUT
) {
315 /* This may happen when dead-code elimination is disabled or
316 * when most of the fragment program logic is leading to a KIL */
317 if (code
->inst_end
>= 511) {
318 error("Introducing fake OUT: Too many instructions");
322 int ip
= ++code
->inst_end
;
323 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
| R500_INST_TEX_SEM_WAIT
;