2 * Copyright (C) 2004 David Airlie All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/context.h"
25 #include "main/macros.h"
26 #include "shader/program.h"
27 #include "shader/atifragshader.h"
28 #include "swrast/s_atifragshader.h"
32 * State for executing ATI fragment shader.
36 GLfloat Registers
[6][4]; /** six temporary registers */
37 GLfloat PrevPassRegisters
[6][4];
38 GLfloat Inputs
[2][4]; /** Primary, secondary input colors */
47 fetch_texel(GLcontext
* ctx
, const GLfloat texcoord
[4], GLfloat lambda
,
48 GLuint unit
, GLfloat color
[4])
51 SWcontext
*swrast
= SWRAST_CONTEXT(ctx
);
53 /* XXX use a float-valued TextureSample routine here!!! */
54 swrast
->TextureSample
[unit
](ctx
, ctx
->Texture
.Unit
[unit
]._Current
,
55 1, (const GLfloat(*)[4]) texcoord
,
57 color
[0] = CHAN_TO_FLOAT(rgba
[0]);
58 color
[1] = CHAN_TO_FLOAT(rgba
[1]);
59 color
[2] = CHAN_TO_FLOAT(rgba
[2]);
60 color
[3] = CHAN_TO_FLOAT(rgba
[3]);
64 apply_swizzle(GLfloat values
[4], GLuint swizzle
)
74 case GL_SWIZZLE_STR_ATI
:
79 case GL_SWIZZLE_STQ_ATI
:
84 case GL_SWIZZLE_STR_DR_ATI
:
89 case GL_SWIZZLE_STQ_DQ_ATI
:
90 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
91 if (q
== 0.0F
) q
= 0.000000001;
101 apply_src_rep(GLint optype
, GLuint rep
, GLfloat
* val
)
108 start
= optype
? 3 : 0;
111 for (i
= start
; i
< end
; i
++) {
130 apply_src_mod(GLint optype
, GLuint mod
, GLfloat
* val
)
138 start
= optype
? 3 : 0;
141 for (i
= start
; i
< end
; i
++) {
142 if (mod
& GL_COMP_BIT_ATI
)
145 if (mod
& GL_BIAS_BIT_ATI
)
146 val
[i
] = val
[i
] - 0.5;
148 if (mod
& GL_2X_BIT_ATI
)
151 if (mod
& GL_NEGATE_BIT_ATI
)
157 apply_dst_mod(GLuint optype
, GLuint mod
, GLfloat
* val
)
160 GLint has_sat
= mod
& GL_SATURATE_BIT_ATI
;
163 mod
&= ~GL_SATURATE_BIT_ATI
;
165 start
= optype
? 3 : 0;
166 end
= optype
? 4 : 3;
168 for (i
= start
; i
< end
; i
++) {
179 case GL_HALF_BIT_ATI
:
180 val
[i
] = val
[i
] * 0.5;
182 case GL_QUARTER_BIT_ATI
:
183 val
[i
] = val
[i
] * 0.25;
185 case GL_EIGHTH_BIT_ATI
:
186 val
[i
] = val
[i
] * 0.125;
193 else if (val
[i
] > 1.0)
199 else if (val
[i
] > 8.0)
207 write_dst_addr(GLuint optype
, GLuint mod
, GLuint mask
, GLfloat
* src
,
211 apply_dst_mod(optype
, mod
, src
);
213 if (optype
== ATI_FRAGMENT_SHADER_COLOR_OP
) {
215 if (mask
& GL_RED_BIT_ATI
)
218 if (mask
& GL_GREEN_BIT_ATI
)
221 if (mask
& GL_BLUE_BIT_ATI
)
225 for (i
= 0; i
< 3; i
++)
234 finish_pass(struct atifs_machine
*machine
)
238 for (i
= 0; i
< 6; i
++) {
239 COPY_4V(machine
->PrevPassRegisters
[i
], machine
->Registers
[i
]);
243 struct ati_fs_opcode_st ati_fs_opcodes
[] = {
252 {GL_DOT2_ADD_ATI
, 3},
260 handle_pass_op(struct atifs_machine
*machine
, struct atifs_setupinst
*texinst
,
261 const SWspan
*span
, GLuint column
, GLuint idx
)
263 GLuint swizzle
= texinst
->swizzle
;
264 GLuint pass_tex
= texinst
->src
;
266 if (pass_tex
>= GL_TEXTURE0_ARB
&& pass_tex
<= GL_TEXTURE7_ARB
) {
267 pass_tex
-= GL_TEXTURE0_ARB
;
268 COPY_4V(machine
->Registers
[idx
],
269 span
->array
->attribs
[FRAG_ATTRIB_TEX0
+ pass_tex
][column
]);
271 else if (pass_tex
>= GL_REG_0_ATI
&& pass_tex
<= GL_REG_5_ATI
) {
272 pass_tex
-= GL_REG_0_ATI
;
273 COPY_4V(machine
->Registers
[idx
], machine
->PrevPassRegisters
[pass_tex
]);
275 apply_swizzle(machine
->Registers
[idx
], swizzle
);
280 handle_sample_op(GLcontext
* ctx
, struct atifs_machine
*machine
,
281 struct atifs_setupinst
*texinst
, const SWspan
*span
,
282 GLuint column
, GLuint idx
)
284 /* sample from unit idx using texinst->src as coords */
285 GLuint swizzle
= texinst
->swizzle
;
286 GLuint coord_source
= texinst
->src
;
287 GLfloat tex_coords
[4];
289 if (coord_source
>= GL_TEXTURE0_ARB
&& coord_source
<= GL_TEXTURE7_ARB
) {
290 coord_source
-= GL_TEXTURE0_ARB
;
292 span
->array
->attribs
[FRAG_ATTRIB_TEX0
+ coord_source
][column
]);
294 else if (coord_source
>= GL_REG_0_ATI
&& coord_source
<= GL_REG_5_ATI
) {
295 coord_source
-= GL_REG_0_ATI
;
296 COPY_4V(tex_coords
, machine
->PrevPassRegisters
[coord_source
]);
298 apply_swizzle(tex_coords
, swizzle
);
299 fetch_texel(ctx
, tex_coords
, 0.0F
, idx
, machine
->Registers
[idx
]);
302 #define SETUP_SRC_REG(optype, i, x) \
304 COPY_4V(src[optype][i], x); \
310 * Execute the given fragment shader.
311 * NOTE: we do everything in single-precision floating point
312 * \param ctx - rendering context
313 * \param shader - the shader to execute
314 * \param machine - virtual machine state
315 * \param span - the SWspan we're operating on
316 * \param column - which pixel [i] we're operating on in the span
319 execute_shader(GLcontext
*ctx
, const struct ati_fragment_shader
*shader
,
320 struct atifs_machine
*machine
, const SWspan
*span
,
324 struct atifs_instruction
*inst
;
325 struct atifs_setupinst
*texinst
;
330 GLfloat src
[2][3][4];
331 GLfloat zeros
[4] = { 0.0, 0.0, 0.0, 0.0 };
332 GLfloat ones
[4] = { 1.0, 1.0, 1.0, 1.0 };
333 GLfloat dst
[2][4], *dstp
;
335 for (pass
= 0; pass
< shader
->NumPasses
; pass
++) {
337 finish_pass(machine
);
338 for (j
= 0; j
< MAX_NUM_FRAGMENT_REGISTERS_ATI
; j
++) {
339 texinst
= &shader
->SetupInst
[pass
][j
];
340 if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_PASS_OP
)
341 handle_pass_op(machine
, texinst
, span
, column
, j
);
342 else if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_SAMPLE_OP
)
343 handle_sample_op(ctx
, machine
, texinst
, span
, column
, j
);
346 for (pc
= 0; pc
< shader
->numArithInstr
[pass
]; pc
++) {
347 inst
= &shader
->Instructions
[pass
][pc
];
349 /* setup the source registers for color and alpha ops */
350 for (optype
= 0; optype
< 2; optype
++) {
351 for (i
= 0; i
< inst
->ArgCount
[optype
]; i
++) {
352 GLint index
= inst
->SrcReg
[optype
][i
].Index
;
354 if (index
>= GL_REG_0_ATI
&& index
<= GL_REG_5_ATI
)
355 SETUP_SRC_REG(optype
, i
,
356 machine
->Registers
[index
- GL_REG_0_ATI
]);
357 else if (index
>= GL_CON_0_ATI
&& index
<= GL_CON_7_ATI
) {
358 if (shader
->LocalConstDef
& (1 << (index
- GL_CON_0_ATI
))) {
359 SETUP_SRC_REG(optype
, i
,
360 shader
->Constants
[index
- GL_CON_0_ATI
]);
362 SETUP_SRC_REG(optype
, i
,
363 ctx
->ATIFragmentShader
.GlobalConstants
[index
- GL_CON_0_ATI
]);
366 else if (index
== GL_ONE
)
367 SETUP_SRC_REG(optype
, i
, ones
);
368 else if (index
== GL_ZERO
)
369 SETUP_SRC_REG(optype
, i
, zeros
);
370 else if (index
== GL_PRIMARY_COLOR_EXT
)
371 SETUP_SRC_REG(optype
, i
,
372 machine
->Inputs
[ATI_FS_INPUT_PRIMARY
]);
373 else if (index
== GL_SECONDARY_INTERPOLATOR_ATI
)
374 SETUP_SRC_REG(optype
, i
,
375 machine
->Inputs
[ATI_FS_INPUT_SECONDARY
]);
377 apply_src_rep(optype
, inst
->SrcReg
[optype
][i
].argRep
,
379 apply_src_mod(optype
, inst
->SrcReg
[optype
][i
].argMod
,
384 /* Execute the operations - color then alpha */
385 for (optype
= 0; optype
< 2; optype
++) {
386 if (inst
->Opcode
[optype
]) {
387 switch (inst
->Opcode
[optype
]) {
390 for (i
= 0; i
< 3; i
++) {
392 src
[optype
][0][i
] + src
[optype
][1][i
];
395 dst
[optype
][3] = src
[optype
][0][3] + src
[optype
][1][3];
399 for (i
= 0; i
< 3; i
++) {
401 src
[optype
][0][i
] - src
[optype
][1][i
];
404 dst
[optype
][3] = src
[optype
][0][3] - src
[optype
][1][3];
408 for (i
= 0; i
< 3; i
++) {
410 src
[optype
][0][i
] * src
[optype
][1][i
];
413 dst
[optype
][3] = src
[optype
][0][3] * src
[optype
][1][3];
417 for (i
= 0; i
< 3; i
++) {
419 src
[optype
][0][i
] * src
[optype
][1][i
] +
424 src
[optype
][0][3] * src
[optype
][1][3] +
429 for (i
= 0; i
< 3; i
++) {
431 src
[optype
][0][i
] * src
[optype
][1][i
] + (1 -
439 src
[optype
][0][3] * src
[optype
][1][3] + (1 -
447 for (i
= 0; i
< 3; i
++) {
448 dst
[optype
][i
] = src
[optype
][0][i
];
451 dst
[optype
][3] = src
[optype
][0][3];
455 for (i
= 0; i
< 3; i
++) {
458 0.5) ? src
[optype
][0][i
] : src
[optype
][1][i
];
464 0.5) ? src
[optype
][0][3] : src
[optype
][1][3];
470 for (i
= 0; i
< 3; i
++) {
472 (src
[optype
][2][i
] >=
473 0) ? src
[optype
][0][i
] : src
[optype
][1][i
];
477 (src
[optype
][2][3] >=
478 0) ? src
[optype
][0][3] : src
[optype
][1][3];
481 case GL_DOT2_ADD_ATI
:
485 /* DOT 2 always uses the source from the color op */
486 /* could save recalculation of dot products for alpha inst */
487 result
= src
[0][0][0] * src
[0][1][0] +
488 src
[0][0][1] * src
[0][1][1] + src
[0][2][2];
490 for (i
= 0; i
< 3; i
++) {
491 dst
[optype
][i
] = result
;
495 dst
[optype
][3] = result
;
502 /* DOT 3 always uses the source from the color op */
503 result
= src
[0][0][0] * src
[0][1][0] +
504 src
[0][0][1] * src
[0][1][1] +
505 src
[0][0][2] * src
[0][1][2];
508 for (i
= 0; i
< 3; i
++) {
509 dst
[optype
][i
] = result
;
513 dst
[optype
][3] = result
;
520 /* DOT 4 always uses the source from the color op */
521 result
= src
[0][0][0] * src
[0][1][0] +
522 src
[0][0][1] * src
[0][1][1] +
523 src
[0][0][2] * src
[0][1][2] +
524 src
[0][0][3] * src
[0][1][3];
526 for (i
= 0; i
< 3; i
++) {
527 dst
[optype
][i
] = result
;
531 dst
[optype
][3] = result
;
539 /* write out the destination registers */
540 for (optype
= 0; optype
< 2; optype
++) {
541 if (inst
->Opcode
[optype
]) {
542 dstreg
= inst
->DstReg
[optype
].Index
;
543 dstp
= machine
->Registers
[dstreg
- GL_REG_0_ATI
];
545 if ((optype
== 0) || ((inst
->Opcode
[1] != GL_DOT2_ADD_ATI
) &&
546 (inst
->Opcode
[1] != GL_DOT3_ATI
) && (inst
->Opcode
[1] != GL_DOT4_ATI
)))
547 write_dst_addr(optype
, inst
->DstReg
[optype
].dstMod
,
548 inst
->DstReg
[optype
].dstMask
, dst
[optype
],
551 write_dst_addr(1, inst
->DstReg
[0].dstMod
, 0, dst
[1], dstp
);
560 * Init fragment shader virtual machine state.
563 init_machine(GLcontext
* ctx
, struct atifs_machine
*machine
,
564 const struct ati_fragment_shader
*shader
,
565 const SWspan
*span
, GLuint col
)
567 GLfloat (*inputs
)[4] = machine
->Inputs
;
570 for (i
= 0; i
< 6; i
++) {
571 for (j
= 0; j
< 4; j
++)
572 machine
->Registers
[i
][j
] = 0.0;
575 COPY_4V(inputs
[ATI_FS_INPUT_PRIMARY
], span
->array
->attribs
[FRAG_ATTRIB_COL0
][col
]);
576 COPY_4V(inputs
[ATI_FS_INPUT_SECONDARY
], span
->array
->attribs
[FRAG_ATTRIB_COL1
][col
]);
582 * Execute the current ATI shader program, operating on the given span.
585 _swrast_exec_fragment_shader(GLcontext
* ctx
, SWspan
*span
)
587 const struct ati_fragment_shader
*shader
= ctx
->ATIFragmentShader
.Current
;
588 struct atifs_machine machine
;
591 /* incoming colors should be floats */
592 ASSERT(span
->array
->ChanType
== GL_FLOAT
);
594 ctx
->_CurrentProgram
= GL_FRAGMENT_SHADER_ATI
;
596 for (i
= 0; i
< span
->end
; i
++) {
597 if (span
->array
->mask
[i
]) {
598 init_machine(ctx
, &machine
, shader
, span
, i
);
600 execute_shader(ctx
, shader
, &machine
, span
, i
);
602 /* store result color */
604 const GLfloat
*colOut
= machine
.Registers
[0];
605 /*fprintf(stderr,"outputs %f %f %f %f\n",
606 colOut[0], colOut[1], colOut[2], colOut[3]); */
607 COPY_4V(span
->array
->attribs
[FRAG_ATTRIB_COL0
][i
], colOut
);
612 ctx
->_CurrentProgram
= 0;