3 * Copyright (C) 2004 David Airlie All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "atifragshader.h"
30 #include "s_atifragshader.h"
31 #include "s_nvfragprog.h"
33 #include "s_texture.h"
39 fetch_texel(GLcontext
* ctx
, const GLfloat texcoord
[4], GLfloat lambda
,
40 GLuint unit
, GLfloat color
[4])
43 SWcontext
*swrast
= SWRAST_CONTEXT(ctx
);
45 /* XXX use a float-valued TextureSample routine here!!! */
46 swrast
->TextureSample
[unit
] (ctx
, unit
, ctx
->Texture
.Unit
[unit
]._Current
,
47 1, (const GLfloat(*)[4]) texcoord
,
49 color
[0] = CHAN_TO_FLOAT(rgba
[0]);
50 color
[1] = CHAN_TO_FLOAT(rgba
[1]);
51 color
[2] = CHAN_TO_FLOAT(rgba
[2]);
52 color
[3] = CHAN_TO_FLOAT(rgba
[3]);
56 apply_swizzle(struct atifs_machine
*machine
, GLuint reg
, GLuint swizzle
)
60 s
= machine
->Registers
[reg
][0];
61 t
= machine
->Registers
[reg
][1];
62 r
= machine
->Registers
[reg
][2];
63 q
= machine
->Registers
[reg
][3];
66 case GL_SWIZZLE_STR_ATI
:
67 machine
->Registers
[reg
][0] = s
;
68 machine
->Registers
[reg
][1] = t
;
69 machine
->Registers
[reg
][2] = r
;
71 case GL_SWIZZLE_STQ_ATI
:
72 machine
->Registers
[reg
][0] = s
;
73 machine
->Registers
[reg
][1] = t
;
74 machine
->Registers
[reg
][2] = q
;
76 case GL_SWIZZLE_STR_DR_ATI
:
77 machine
->Registers
[reg
][0] = s
/ r
;
78 machine
->Registers
[reg
][1] = t
/ r
;
79 machine
->Registers
[reg
][2] = 1 / r
;
81 case GL_SWIZZLE_STQ_DQ_ATI
:
82 machine
->Registers
[reg
][0] = s
/ q
;
83 machine
->Registers
[reg
][1] = t
/ q
;
84 machine
->Registers
[reg
][2] = 1 / q
;
87 machine
->Registers
[reg
][3] = 0.0;
91 apply_src_rep(GLint optype
, GLuint rep
, GLfloat
* val
)
98 start
= optype
? 3 : 0;
101 for (i
= start
; i
< end
; i
++) {
120 apply_src_mod(GLint optype
, GLuint mod
, GLfloat
* val
)
128 start
= optype
? 3 : 0;
129 end
= optype
? 4 : 3;
131 for (i
= start
; i
< end
; i
++) {
132 if (mod
& GL_COMP_BIT_ATI
)
135 if (mod
& GL_BIAS_BIT_ATI
)
136 val
[i
] = val
[i
] - 0.5;
138 if (mod
& GL_2X_BIT_ATI
)
141 if (mod
& GL_NEGATE_BIT_ATI
)
147 apply_dst_mod(GLuint optype
, GLuint mod
, GLfloat
* val
)
150 GLint has_sat
= mod
& GL_SATURATE_BIT_ATI
;
153 mod
&= ~GL_SATURATE_BIT_ATI
;
155 start
= optype
? 3 : 0;
156 end
= optype
? 4 : 3;
158 for (i
= start
; i
< end
; i
++) {
169 case GL_HALF_BIT_ATI
:
170 val
[i
] = val
[i
] * 0.5;
172 case GL_QUARTER_BIT_ATI
:
173 val
[i
] = val
[i
] * 0.25;
175 case GL_EIGHTH_BIT_ATI
:
176 val
[i
] = val
[i
] * 0.125;
183 else if (val
[i
] > 1.0)
189 else if (val
[i
] > 8.0)
197 write_dst_addr(GLuint optype
, GLuint mod
, GLuint mask
, GLfloat
* src
,
201 apply_dst_mod(optype
, mod
, src
);
203 if (optype
== ATI_FRAGMENT_SHADER_COLOR_OP
) {
205 if (mask
& GL_RED_BIT_ATI
)
208 if (mask
& GL_GREEN_BIT_ATI
)
211 if (mask
& GL_BLUE_BIT_ATI
)
215 for (i
= 0; i
< 3; i
++)
224 finish_pass(struct atifs_machine
*machine
)
228 for (i
= 0; i
< 6; i
++) {
229 COPY_4V(machine
->PrevPassRegisters
[i
], machine
->Registers
[i
]);
234 * Execute the given fragment shader
235 * NOTE: we do everything in single-precision floating point; we don't
236 * currently observe the single/half/fixed-precision qualifiers.
237 * \param ctx - rendering context
238 * \param program - the fragment program to execute
239 * \param machine - machine state (register file)
240 * \param maxInst - max number of instructions to execute
241 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
244 struct ati_fs_opcode_st ati_fs_opcodes
[] = {
253 {GL_DOT2_ADD_ATI
, 3},
261 handle_pass_op(struct atifs_machine
*machine
, struct atifs_instruction
*inst
,
262 const struct sw_span
*span
, GLuint column
)
264 GLuint idx
= inst
->DstReg
[0].Index
- GL_REG_0_ATI
;
265 GLuint swizzle
= inst
->DstReg
[0].Swizzle
;
266 GLuint pass_tex
= inst
->SrcReg
[0][0].Index
;
268 /* if we get here after passing pass one then we are starting pass two - backup the registers */
269 if (machine
->pass
== 1) {
270 finish_pass(machine
);
273 if (pass_tex
>= GL_TEXTURE0_ARB
&& pass_tex
<= GL_TEXTURE7_ARB
) {
274 pass_tex
-= GL_TEXTURE0_ARB
;
275 COPY_4V(machine
->Registers
[idx
],
276 span
->array
->texcoords
[pass_tex
][column
]);
278 else if (pass_tex
>= GL_REG_0_ATI
&& pass_tex
<= GL_REG_5_ATI
279 && machine
->pass
== 2) {
280 pass_tex
-= GL_REG_0_ATI
;
281 COPY_4V(machine
->Registers
[idx
], machine
->PrevPassRegisters
[pass_tex
]);
283 apply_swizzle(machine
, idx
, swizzle
);
288 handle_sample_op(GLcontext
* ctx
, struct atifs_machine
*machine
,
289 struct atifs_instruction
*inst
, const struct sw_span
*span
,
292 GLuint idx
= inst
->DstReg
[0].Index
- GL_REG_0_ATI
;
293 GLuint swizzle
= inst
->DstReg
[0].Swizzle
;
294 GLuint sample_tex
= inst
->SrcReg
[0][0].Index
;
296 /* if we get here after passing pass one then we are starting pass two - backup the registers */
297 if (machine
->pass
== 1) {
298 finish_pass(machine
);
302 if (sample_tex
>= GL_TEXTURE0_ARB
&& sample_tex
<= GL_TEXTURE7_ARB
) {
303 sample_tex
-= GL_TEXTURE0_ARB
;
304 fetch_texel(ctx
, span
->array
->texcoords
[sample_tex
][column
], 0.0F
,
305 sample_tex
, machine
->Registers
[idx
]);
307 else if (sample_tex
>= GL_REG_0_ATI
&& sample_tex
<= GL_REG_5_ATI
) {
308 /* this is wrong... */
309 sample_tex
-= GL_REG_0_ATI
;
310 fetch_texel(ctx
, machine
->Registers
[sample_tex
], 0, sample_tex
,
311 machine
->Registers
[idx
]);
314 apply_swizzle(machine
, idx
, swizzle
);
317 #define SETUP_SRC_REG(optype, i, x) do { \
319 src[optype][i][3] = x[3]; \
321 COPY_3V(src[optype][i], x); \
325 execute_shader(GLcontext
* ctx
,
326 const struct ati_fragment_shader
*shader
, GLuint maxInst
,
327 struct atifs_machine
*machine
, const struct sw_span
*span
,
331 struct atifs_instruction
*inst
;
335 GLfloat src
[2][3][4];
336 GLfloat zeros
[4] = { 0.0, 0.0, 0.0, 0.0 };
337 GLfloat ones
[4] = { 1.0, 1.0, 1.0, 1.0 };
338 GLfloat dst
[2][4], *dstp
;
340 for (pc
= 0; pc
< shader
->Base
.NumInstructions
; pc
++) {
341 inst
= &shader
->Instructions
[pc
];
343 if (inst
->Opcode
[0] == ATI_FRAGMENT_SHADER_PASS_OP
)
344 handle_pass_op(machine
, inst
, span
, column
);
345 else if (inst
->Opcode
[0] == ATI_FRAGMENT_SHADER_SAMPLE_OP
)
346 handle_sample_op(ctx
, machine
, inst
, span
, column
);
348 if (machine
->pass
== 0)
351 /* setup the source registers for color and alpha ops */
352 for (optype
= 0; optype
< 2; optype
++) {
353 for (i
= 0; i
< inst
->ArgCount
[optype
]; i
++) {
354 GLint index
= inst
->SrcReg
[optype
][i
].Index
;
356 if (index
>= GL_REG_0_ATI
&& index
<= GL_REG_5_ATI
)
357 SETUP_SRC_REG(optype
, i
,
358 machine
->Registers
[index
- GL_REG_0_ATI
]);
359 else if (index
>= GL_CON_0_ATI
&& index
<= GL_CON_7_ATI
)
360 SETUP_SRC_REG(optype
, i
,
361 shader
->Constants
[index
- GL_CON_0_ATI
]);
362 else if (index
== GL_ONE
)
363 SETUP_SRC_REG(optype
, i
, ones
);
364 else if (index
== GL_ZERO
)
365 SETUP_SRC_REG(optype
, i
, zeros
);
366 else if (index
== GL_PRIMARY_COLOR_EXT
)
367 SETUP_SRC_REG(optype
, i
,
368 machine
->Inputs
[ATI_FS_INPUT_PRIMARY
]);
369 else if (index
== GL_SECONDARY_INTERPOLATOR_ATI
)
370 SETUP_SRC_REG(optype
, i
,
371 machine
->Inputs
[ATI_FS_INPUT_SECONDARY
]);
373 apply_src_rep(optype
, inst
->SrcReg
[optype
][i
].argRep
,
375 apply_src_mod(optype
, inst
->SrcReg
[optype
][i
].argMod
,
380 /* Execute the operations - color then alpha */
381 for (optype
= 0; optype
< 2; optype
++) {
382 if (inst
->Opcode
[optype
]) {
383 switch (inst
->Opcode
[optype
]) {
386 for (i
= 0; i
< 3; i
++) {
388 src
[optype
][0][i
] + src
[optype
][1][i
];
391 dst
[optype
][3] = src
[optype
][0][3] + src
[optype
][1][3];
395 for (i
= 0; i
< 3; i
++) {
397 src
[optype
][0][i
] - src
[optype
][1][i
];
400 dst
[optype
][3] = src
[optype
][0][3] - src
[optype
][1][3];
404 for (i
= 0; i
< 3; i
++) {
406 src
[optype
][0][i
] * src
[optype
][1][i
];
409 dst
[optype
][3] = src
[optype
][0][3] * src
[optype
][1][3];
413 for (i
= 0; i
< 3; i
++) {
415 src
[optype
][0][i
] * src
[optype
][1][i
] +
420 src
[optype
][0][3] * src
[optype
][1][3] +
425 for (i
= 0; i
< 3; i
++) {
427 src
[optype
][0][i
] * src
[optype
][1][i
] + (1 -
435 src
[optype
][0][3] * src
[optype
][1][3] + (1 -
443 for (i
= 0; i
< 3; i
++) {
444 dst
[optype
][i
] = src
[optype
][0][i
];
447 dst
[optype
][3] = src
[optype
][0][3];
451 for (i
= 0; i
< 3; i
++) {
454 0.5) ? src
[optype
][0][i
] : src
[optype
][1][i
];
460 0.5) ? src
[optype
][0][3] : src
[optype
][1][3];
466 for (i
= 0; i
< 3; i
++) {
468 (src
[optype
][2][i
] >=
469 0) ? src
[optype
][0][i
] : src
[optype
][1][i
];
473 (src
[optype
][2][3] >=
474 0) ? src
[optype
][0][3] : src
[optype
][1][3];
477 case GL_DOT2_ADD_ATI
:
481 /* DOT 2 always uses the source from the color op */
482 result
= src
[0][0][0] * src
[0][1][0] +
483 src
[0][0][1] * src
[0][1][1] + src
[0][2][2];
485 for (i
= 0; i
< 3; i
++) {
486 dst
[optype
][i
] = result
;
490 dst
[optype
][3] = result
;
498 /* DOT 3 always uses the source from the color op */
499 result
= src
[0][0][0] * src
[0][1][0] +
500 src
[0][0][1] * src
[0][1][1] +
501 src
[0][0][2] * src
[0][1][2];
504 for (i
= 0; i
< 3; i
++) {
505 dst
[optype
][i
] = result
;
509 dst
[optype
][3] = result
;
516 /* DOT 4 always uses the source from the color op */
517 result
= src
[optype
][0][0] * src
[0][1][0] +
518 src
[0][0][1] * src
[0][1][1] +
519 src
[0][0][2] * src
[0][1][2] +
520 src
[0][0][3] * src
[0][1][3];
522 for (i
= 0; i
< 3; i
++) {
523 dst
[optype
][i
] = result
;
527 dst
[optype
][3] = result
;
535 /* write out the destination registers */
536 for (optype
= 0; optype
< 2; optype
++) {
537 if (inst
->Opcode
[optype
]) {
538 dstreg
= inst
->DstReg
[optype
].Index
;
539 dstp
= machine
->Registers
[dstreg
- GL_REG_0_ATI
];
541 write_dst_addr(optype
, inst
->DstReg
[optype
].dstMod
,
542 inst
->DstReg
[optype
].dstMask
, dst
[optype
],
552 init_machine(GLcontext
* ctx
, struct atifs_machine
*machine
,
553 const struct ati_fragment_shader
*shader
,
554 const struct sw_span
*span
, GLuint col
)
558 for (i
= 0; i
< 6; i
++) {
559 for (j
= 0; j
< 4; j
++)
560 ctx
->ATIFragmentShader
.Machine
.Registers
[i
][j
] = 0.0;
564 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_PRIMARY
][0] =
565 CHAN_TO_FLOAT(span
->array
->rgba
[col
][0]);
566 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_PRIMARY
][1] =
567 CHAN_TO_FLOAT(span
->array
->rgba
[col
][1]);
568 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_PRIMARY
][2] =
569 CHAN_TO_FLOAT(span
->array
->rgba
[col
][2]);
570 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_PRIMARY
][3] =
571 CHAN_TO_FLOAT(span
->array
->rgba
[col
][3]);
573 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_SECONDARY
][0] =
574 CHAN_TO_FLOAT(span
->array
->spec
[col
][0]);
575 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_SECONDARY
][1] =
576 CHAN_TO_FLOAT(span
->array
->spec
[col
][1]);
577 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_SECONDARY
][2] =
578 CHAN_TO_FLOAT(span
->array
->spec
[col
][2]);
579 ctx
->ATIFragmentShader
.Machine
.Inputs
[ATI_FS_INPUT_SECONDARY
][3] =
580 CHAN_TO_FLOAT(span
->array
->spec
[col
][3]);
582 ctx
->ATIFragmentShader
.Machine
.pass
= 0;
588 * Execute the current fragment program, operating on the given span.
591 _swrast_exec_fragment_shader(GLcontext
* ctx
, struct sw_span
*span
)
593 const struct ati_fragment_shader
*shader
= ctx
->ATIFragmentShader
.Current
;
596 ctx
->_CurrentProgram
= GL_FRAGMENT_SHADER_ATI
;
598 for (i
= 0; i
< span
->end
; i
++) {
599 if (span
->array
->mask
[i
]) {
600 init_machine(ctx
, &ctx
->ATIFragmentShader
.Machine
,
601 ctx
->ATIFragmentShader
.Current
, span
, i
);
603 if (execute_shader(ctx
, shader
, ~0,
604 &ctx
->ATIFragmentShader
.Machine
, span
, i
)) {
605 span
->array
->mask
[i
] = GL_FALSE
;
609 const GLfloat
*colOut
=
610 ctx
->ATIFragmentShader
.Machine
.Registers
[0];
612 /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
613 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][RCOMP
], colOut
[0]);
614 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][GCOMP
], colOut
[1]);
615 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][BCOMP
], colOut
[2]);
616 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][ACOMP
], colOut
[3]);
623 ctx
->_CurrentProgram
= 0;