3 * Copyright (C) 2004 David Airlie All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "atifragshader.h"
30 #include "s_atifragshader.h"
34 * State for executing ATI fragment shader.
38 GLfloat Registers
[6][4]; /** six temporary registers */
39 GLfloat PrevPassRegisters
[6][4];
40 GLfloat Inputs
[2][4]; /** Primary, secondary input colors */
49 fetch_texel(GLcontext
* ctx
, const GLfloat texcoord
[4], GLfloat lambda
,
50 GLuint unit
, GLfloat color
[4])
53 SWcontext
*swrast
= SWRAST_CONTEXT(ctx
);
55 /* XXX use a float-valued TextureSample routine here!!! */
56 swrast
->TextureSample
[unit
](ctx
, ctx
->Texture
.Unit
[unit
]._Current
,
57 1, (const GLfloat(*)[4]) texcoord
,
59 color
[0] = CHAN_TO_FLOAT(rgba
[0]);
60 color
[1] = CHAN_TO_FLOAT(rgba
[1]);
61 color
[2] = CHAN_TO_FLOAT(rgba
[2]);
62 color
[3] = CHAN_TO_FLOAT(rgba
[3]);
66 apply_swizzle(GLfloat values
[4], GLuint swizzle
)
76 case GL_SWIZZLE_STR_ATI
:
81 case GL_SWIZZLE_STQ_ATI
:
86 case GL_SWIZZLE_STR_DR_ATI
:
91 case GL_SWIZZLE_STQ_DQ_ATI
:
92 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
93 if (q
== 0.0F
) q
= 0.000000001;
103 apply_src_rep(GLint optype
, GLuint rep
, GLfloat
* val
)
110 start
= optype
? 3 : 0;
113 for (i
= start
; i
< end
; i
++) {
132 apply_src_mod(GLint optype
, GLuint mod
, GLfloat
* val
)
140 start
= optype
? 3 : 0;
143 for (i
= start
; i
< end
; i
++) {
144 if (mod
& GL_COMP_BIT_ATI
)
147 if (mod
& GL_BIAS_BIT_ATI
)
148 val
[i
] = val
[i
] - 0.5;
150 if (mod
& GL_2X_BIT_ATI
)
153 if (mod
& GL_NEGATE_BIT_ATI
)
159 apply_dst_mod(GLuint optype
, GLuint mod
, GLfloat
* val
)
162 GLint has_sat
= mod
& GL_SATURATE_BIT_ATI
;
165 mod
&= ~GL_SATURATE_BIT_ATI
;
167 start
= optype
? 3 : 0;
168 end
= optype
? 4 : 3;
170 for (i
= start
; i
< end
; i
++) {
181 case GL_HALF_BIT_ATI
:
182 val
[i
] = val
[i
] * 0.5;
184 case GL_QUARTER_BIT_ATI
:
185 val
[i
] = val
[i
] * 0.25;
187 case GL_EIGHTH_BIT_ATI
:
188 val
[i
] = val
[i
] * 0.125;
195 else if (val
[i
] > 1.0)
201 else if (val
[i
] > 8.0)
209 write_dst_addr(GLuint optype
, GLuint mod
, GLuint mask
, GLfloat
* src
,
213 apply_dst_mod(optype
, mod
, src
);
215 if (optype
== ATI_FRAGMENT_SHADER_COLOR_OP
) {
217 if (mask
& GL_RED_BIT_ATI
)
220 if (mask
& GL_GREEN_BIT_ATI
)
223 if (mask
& GL_BLUE_BIT_ATI
)
227 for (i
= 0; i
< 3; i
++)
236 finish_pass(struct atifs_machine
*machine
)
240 for (i
= 0; i
< 6; i
++) {
241 COPY_4V(machine
->PrevPassRegisters
[i
], machine
->Registers
[i
]);
246 * Execute the given fragment shader
247 * NOTE: we do everything in single-precision floating point; we don't
248 * currently observe the single/half/fixed-precision qualifiers.
249 * \param ctx - rendering context
250 * \param program - the fragment program to execute
251 * \param machine - machine state (register file)
252 * \param maxInst - max number of instructions to execute
253 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
256 struct ati_fs_opcode_st ati_fs_opcodes
[] = {
265 {GL_DOT2_ADD_ATI
, 3},
273 handle_pass_op(struct atifs_machine
*machine
, struct atifs_setupinst
*texinst
,
274 const SWspan
*span
, GLuint column
, GLuint idx
)
276 GLuint swizzle
= texinst
->swizzle
;
277 GLuint pass_tex
= texinst
->src
;
279 if (pass_tex
>= GL_TEXTURE0_ARB
&& pass_tex
<= GL_TEXTURE7_ARB
) {
280 pass_tex
-= GL_TEXTURE0_ARB
;
281 COPY_4V(machine
->Registers
[idx
],
282 span
->array
->texcoords
[pass_tex
][column
]);
284 else if (pass_tex
>= GL_REG_0_ATI
&& pass_tex
<= GL_REG_5_ATI
) {
285 pass_tex
-= GL_REG_0_ATI
;
286 COPY_4V(machine
->Registers
[idx
], machine
->PrevPassRegisters
[pass_tex
]);
288 apply_swizzle(machine
->Registers
[idx
], swizzle
);
293 handle_sample_op(GLcontext
* ctx
, struct atifs_machine
*machine
,
294 struct atifs_setupinst
*texinst
, const SWspan
*span
,
295 GLuint column
, GLuint idx
)
297 /* sample from unit idx using texinst->src as coords */
298 GLuint swizzle
= texinst
->swizzle
;
299 GLuint coord_source
= texinst
->src
;
300 GLfloat tex_coords
[4];
302 if (coord_source
>= GL_TEXTURE0_ARB
&& coord_source
<= GL_TEXTURE7_ARB
) {
303 coord_source
-= GL_TEXTURE0_ARB
;
304 COPY_4V(tex_coords
, span
->array
->texcoords
[coord_source
][column
]);
306 else if (coord_source
>= GL_REG_0_ATI
&& coord_source
<= GL_REG_5_ATI
) {
307 coord_source
-= GL_REG_0_ATI
;
308 COPY_4V(tex_coords
, machine
->PrevPassRegisters
[coord_source
]);
310 apply_swizzle(tex_coords
, swizzle
);
311 fetch_texel(ctx
, tex_coords
, 0.0F
, idx
, machine
->Registers
[idx
]);
314 #define SETUP_SRC_REG(optype, i, x) \
316 COPY_4V(src[optype][i], x); \
320 execute_shader(GLcontext
* ctx
,
321 const struct ati_fragment_shader
*shader
, GLuint maxInst
,
322 struct atifs_machine
*machine
, const SWspan
*span
,
326 struct atifs_instruction
*inst
;
327 struct atifs_setupinst
*texinst
;
331 GLfloat src
[2][3][4];
332 GLfloat zeros
[4] = { 0.0, 0.0, 0.0, 0.0 };
333 GLfloat ones
[4] = { 1.0, 1.0, 1.0, 1.0 };
334 GLfloat dst
[2][4], *dstp
;
336 for (pass
= 0; pass
< shader
->NumPasses
; pass
++) {
338 finish_pass(machine
);
339 for (j
= 0; j
< MAX_NUM_FRAGMENT_REGISTERS_ATI
; j
++) {
340 texinst
= &shader
->SetupInst
[pass
][j
];
341 if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_PASS_OP
)
342 handle_pass_op(machine
, texinst
, span
, column
, j
);
343 else if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_SAMPLE_OP
)
344 handle_sample_op(ctx
, machine
, texinst
, span
, column
, j
);
347 for (pc
= 0; pc
< shader
->numArithInstr
[pass
]; pc
++) {
348 inst
= &shader
->Instructions
[pass
][pc
];
350 /* setup the source registers for color and alpha ops */
351 for (optype
= 0; optype
< 2; optype
++) {
352 for (i
= 0; i
< inst
->ArgCount
[optype
]; i
++) {
353 GLint index
= inst
->SrcReg
[optype
][i
].Index
;
355 if (index
>= GL_REG_0_ATI
&& index
<= GL_REG_5_ATI
)
356 SETUP_SRC_REG(optype
, i
,
357 machine
->Registers
[index
- GL_REG_0_ATI
]);
358 else if (index
>= GL_CON_0_ATI
&& index
<= GL_CON_7_ATI
) {
359 if (shader
->LocalConstDef
& (1 << (index
- GL_CON_0_ATI
))) {
360 SETUP_SRC_REG(optype
, i
,
361 shader
->Constants
[index
- GL_CON_0_ATI
]);
363 SETUP_SRC_REG(optype
, i
,
364 ctx
->ATIFragmentShader
.GlobalConstants
[index
- GL_CON_0_ATI
]);
367 else if (index
== GL_ONE
)
368 SETUP_SRC_REG(optype
, i
, ones
);
369 else if (index
== GL_ZERO
)
370 SETUP_SRC_REG(optype
, i
, zeros
);
371 else if (index
== GL_PRIMARY_COLOR_EXT
)
372 SETUP_SRC_REG(optype
, i
,
373 machine
->Inputs
[ATI_FS_INPUT_PRIMARY
]);
374 else if (index
== GL_SECONDARY_INTERPOLATOR_ATI
)
375 SETUP_SRC_REG(optype
, i
,
376 machine
->Inputs
[ATI_FS_INPUT_SECONDARY
]);
378 apply_src_rep(optype
, inst
->SrcReg
[optype
][i
].argRep
,
380 apply_src_mod(optype
, inst
->SrcReg
[optype
][i
].argMod
,
385 /* Execute the operations - color then alpha */
386 for (optype
= 0; optype
< 2; optype
++) {
387 if (inst
->Opcode
[optype
]) {
388 switch (inst
->Opcode
[optype
]) {
391 for (i
= 0; i
< 3; i
++) {
393 src
[optype
][0][i
] + src
[optype
][1][i
];
396 dst
[optype
][3] = src
[optype
][0][3] + src
[optype
][1][3];
400 for (i
= 0; i
< 3; i
++) {
402 src
[optype
][0][i
] - src
[optype
][1][i
];
405 dst
[optype
][3] = src
[optype
][0][3] - src
[optype
][1][3];
409 for (i
= 0; i
< 3; i
++) {
411 src
[optype
][0][i
] * src
[optype
][1][i
];
414 dst
[optype
][3] = src
[optype
][0][3] * src
[optype
][1][3];
418 for (i
= 0; i
< 3; i
++) {
420 src
[optype
][0][i
] * src
[optype
][1][i
] +
425 src
[optype
][0][3] * src
[optype
][1][3] +
430 for (i
= 0; i
< 3; i
++) {
432 src
[optype
][0][i
] * src
[optype
][1][i
] + (1 -
440 src
[optype
][0][3] * src
[optype
][1][3] + (1 -
448 for (i
= 0; i
< 3; i
++) {
449 dst
[optype
][i
] = src
[optype
][0][i
];
452 dst
[optype
][3] = src
[optype
][0][3];
456 for (i
= 0; i
< 3; i
++) {
459 0.5) ? src
[optype
][0][i
] : src
[optype
][1][i
];
465 0.5) ? src
[optype
][0][3] : src
[optype
][1][3];
471 for (i
= 0; i
< 3; i
++) {
473 (src
[optype
][2][i
] >=
474 0) ? src
[optype
][0][i
] : src
[optype
][1][i
];
478 (src
[optype
][2][3] >=
479 0) ? src
[optype
][0][3] : src
[optype
][1][3];
482 case GL_DOT2_ADD_ATI
:
486 /* DOT 2 always uses the source from the color op */
487 /* could save recalculation of dot products for alpha inst */
488 result
= src
[0][0][0] * src
[0][1][0] +
489 src
[0][0][1] * src
[0][1][1] + src
[0][2][2];
491 for (i
= 0; i
< 3; i
++) {
492 dst
[optype
][i
] = result
;
496 dst
[optype
][3] = result
;
503 /* DOT 3 always uses the source from the color op */
504 result
= src
[0][0][0] * src
[0][1][0] +
505 src
[0][0][1] * src
[0][1][1] +
506 src
[0][0][2] * src
[0][1][2];
509 for (i
= 0; i
< 3; i
++) {
510 dst
[optype
][i
] = result
;
514 dst
[optype
][3] = result
;
521 /* DOT 4 always uses the source from the color op */
522 result
= src
[0][0][0] * src
[0][1][0] +
523 src
[0][0][1] * src
[0][1][1] +
524 src
[0][0][2] * src
[0][1][2] +
525 src
[0][0][3] * src
[0][1][3];
527 for (i
= 0; i
< 3; i
++) {
528 dst
[optype
][i
] = result
;
532 dst
[optype
][3] = result
;
540 /* write out the destination registers */
541 for (optype
= 0; optype
< 2; optype
++) {
542 if (inst
->Opcode
[optype
]) {
543 dstreg
= inst
->DstReg
[optype
].Index
;
544 dstp
= machine
->Registers
[dstreg
- GL_REG_0_ATI
];
546 if ((optype
== 0) || ((inst
->Opcode
[1] != GL_DOT2_ADD_ATI
) &&
547 (inst
->Opcode
[1] != GL_DOT3_ATI
) && (inst
->Opcode
[1] != GL_DOT4_ATI
)))
548 write_dst_addr(optype
, inst
->DstReg
[optype
].dstMod
,
549 inst
->DstReg
[optype
].dstMask
, dst
[optype
],
552 write_dst_addr(1, inst
->DstReg
[0].dstMod
, 0, dst
[1], dstp
);
562 * Init fragment shader virtual machine state.
565 init_machine(GLcontext
* ctx
, struct atifs_machine
*machine
,
566 const struct ati_fragment_shader
*shader
,
567 const SWspan
*span
, GLuint col
)
569 GLfloat (*inputs
)[4] = machine
->Inputs
;
572 for (i
= 0; i
< 6; i
++) {
573 for (j
= 0; j
< 4; j
++)
574 machine
->Registers
[i
][j
] = 0.0;
577 if (span
->array
->ChanType
== GL_UNSIGNED_BYTE
) {
578 GLubyte (*rgba
)[4] = span
->array
->color
.sz1
.rgba
;
579 GLubyte (*spec
)[4] = span
->array
->color
.sz1
.spec
;
580 inputs
[ATI_FS_INPUT_PRIMARY
][0] = UBYTE_TO_FLOAT(rgba
[col
][0]);
581 inputs
[ATI_FS_INPUT_PRIMARY
][1] = UBYTE_TO_FLOAT(rgba
[col
][1]);
582 inputs
[ATI_FS_INPUT_PRIMARY
][2] = UBYTE_TO_FLOAT(rgba
[col
][2]);
583 inputs
[ATI_FS_INPUT_PRIMARY
][3] = UBYTE_TO_FLOAT(rgba
[col
][3]);
584 inputs
[ATI_FS_INPUT_SECONDARY
][0] = UBYTE_TO_FLOAT(spec
[col
][0]);
585 inputs
[ATI_FS_INPUT_SECONDARY
][1] = UBYTE_TO_FLOAT(spec
[col
][1]);
586 inputs
[ATI_FS_INPUT_SECONDARY
][2] = UBYTE_TO_FLOAT(spec
[col
][2]);
587 inputs
[ATI_FS_INPUT_SECONDARY
][3] = UBYTE_TO_FLOAT(spec
[col
][3]);
589 else if (span
->array
->ChanType
== GL_UNSIGNED_SHORT
) {
590 GLushort (*rgba
)[4] = span
->array
->color
.sz2
.rgba
;
591 GLushort (*spec
)[4] = span
->array
->color
.sz2
.spec
;
592 inputs
[ATI_FS_INPUT_PRIMARY
][0] = USHORT_TO_FLOAT(rgba
[col
][0]);
593 inputs
[ATI_FS_INPUT_PRIMARY
][1] = USHORT_TO_FLOAT(rgba
[col
][1]);
594 inputs
[ATI_FS_INPUT_PRIMARY
][2] = USHORT_TO_FLOAT(rgba
[col
][2]);
595 inputs
[ATI_FS_INPUT_PRIMARY
][3] = USHORT_TO_FLOAT(rgba
[col
][3]);
596 inputs
[ATI_FS_INPUT_SECONDARY
][0] = USHORT_TO_FLOAT(spec
[col
][0]);
597 inputs
[ATI_FS_INPUT_SECONDARY
][1] = USHORT_TO_FLOAT(spec
[col
][1]);
598 inputs
[ATI_FS_INPUT_SECONDARY
][2] = USHORT_TO_FLOAT(spec
[col
][2]);
599 inputs
[ATI_FS_INPUT_SECONDARY
][3] = USHORT_TO_FLOAT(spec
[col
][3]);
602 GLfloat (*rgba
)[4] = span
->array
->color
.sz4
.rgba
;
603 GLfloat (*spec
)[4] = span
->array
->color
.sz4
.spec
;
604 COPY_4V(inputs
[ATI_FS_INPUT_PRIMARY
], rgba
[col
]);
605 COPY_4V(inputs
[ATI_FS_INPUT_SECONDARY
], spec
[col
]);
612 * Execute the current ATI shader program, operating on the given span.
615 _swrast_exec_fragment_shader(GLcontext
* ctx
, SWspan
*span
)
617 const struct ati_fragment_shader
*shader
= ctx
->ATIFragmentShader
.Current
;
618 struct atifs_machine machine
;
621 ctx
->_CurrentProgram
= GL_FRAGMENT_SHADER_ATI
;
623 for (i
= 0; i
< span
->end
; i
++) {
624 if (span
->array
->mask
[i
]) {
625 init_machine(ctx
, &machine
, shader
, span
, i
);
626 /* can't really happen... */
627 if (!execute_shader(ctx
, shader
, ~0, &machine
, span
, i
)) {
628 span
->array
->mask
[i
] = GL_FALSE
;
629 span
->writeAll
= GL_FALSE
;
632 /* store result color */
634 const GLfloat
*colOut
= machine
.Registers
[0];
635 /*fprintf(stderr,"outputs %f %f %f %f\n",
636 colOut[0], colOut[1], colOut[2], colOut[3]); */
637 if (span
->array
->ChanType
== GL_UNSIGNED_BYTE
) {
638 GLubyte (*rgba
)[4] = span
->array
->color
.sz1
.rgba
;
639 UNCLAMPED_FLOAT_TO_UBYTE(rgba
[i
][RCOMP
], colOut
[0]);
640 UNCLAMPED_FLOAT_TO_UBYTE(rgba
[i
][GCOMP
], colOut
[1]);
641 UNCLAMPED_FLOAT_TO_UBYTE(rgba
[i
][BCOMP
], colOut
[2]);
642 UNCLAMPED_FLOAT_TO_UBYTE(rgba
[i
][ACOMP
], colOut
[3]);
644 else if (span
->array
->ChanType
== GL_UNSIGNED_SHORT
) {
645 GLushort (*rgba
)[4] = span
->array
->color
.sz2
.rgba
;
646 UNCLAMPED_FLOAT_TO_USHORT(rgba
[i
][RCOMP
], colOut
[0]);
647 UNCLAMPED_FLOAT_TO_USHORT(rgba
[i
][GCOMP
], colOut
[1]);
648 UNCLAMPED_FLOAT_TO_USHORT(rgba
[i
][BCOMP
], colOut
[2]);
649 UNCLAMPED_FLOAT_TO_USHORT(rgba
[i
][ACOMP
], colOut
[3]);
652 GLfloat (*rgba
)[4] = span
->array
->color
.sz4
.rgba
;
653 COPY_4V(rgba
[i
], colOut
);
659 ctx
->_CurrentProgram
= 0;