1 /* $Id: s_nvfragprog.c,v 1.1 2003/01/14 04:57:47 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #include "nvfragprog.h"
36 #include "s_nvfragprog.h"
40 * Fetch a 4-element float vector from the given source register.
41 * Apply swizzling and negating as needed.
44 fetch_vector4( const struct fp_src_register
*source
,
45 const struct fp_machine
*machine
,
51 if (source->RelAddr) {
52 GLint reg = source->Register + machine->AddressReg;
53 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
56 src = machine->Registers[reg];
61 src
= machine
->Registers
[source
->Register
];
63 result
[0] = src
[source
->Swizzle
[0]];
64 result
[1] = src
[source
->Swizzle
[1]];
65 result
[2] = src
[source
->Swizzle
[2]];
66 result
[3] = src
[source
->Swizzle
[3]];
68 if (source
->NegateBase
) {
69 result
[0] = -result
[0];
70 result
[1] = -result
[1];
71 result
[2] = -result
[2];
72 result
[3] = -result
[3];
75 result
[0] = FABSF(result
[0]);
76 result
[1] = FABSF(result
[1]);
77 result
[2] = FABSF(result
[2]);
78 result
[3] = FABSF(result
[3]);
80 if (source
->NegateAbs
) {
81 result
[0] = -result
[0];
82 result
[1] = -result
[1];
83 result
[2] = -result
[2];
84 result
[3] = -result
[3];
90 * As above, but only return result[0] element.
93 fetch_vector1( const struct fp_src_register
*source
,
94 const struct fp_machine
*machine
,
100 if (source->RelAddr) {
101 GLint reg = source->Register + machine->AddressReg;
102 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
105 src = machine->Registers[reg];
110 src
= machine
->Registers
[source
->Register
];
112 result
[0] = src
[source
->Swizzle
[0]];
114 if (source
->NegateBase
) {
115 result
[0] = -result
[0];
118 result
[0] = FABSF(result
[0]);
120 if (source
->NegateAbs
) {
121 result
[0] = -result
[0];
127 * Test value against zero and return GT, LT, EQ or UN if NaN.
130 generate_cc( float value
)
133 return COND_UN
; /* NaN */
142 * Test if the ccMaskRule is satisfied by the given condition code.
143 * Used to mask destination writes according to the current condition codee.
145 static INLINE GLboolean
146 test_cc(GLuint condCode
, GLuint ccMaskRule
)
148 switch (ccMaskRule
) {
149 case COND_EQ
: return (condCode
== COND_EQ
);
150 case COND_NE
: return (condCode
!= COND_EQ
);
151 case COND_LT
: return (condCode
== COND_LT
);
152 case COND_GE
: return (condCode
== COND_GT
|| condCode
== COND_EQ
);
153 case COND_LE
: return (condCode
== COND_LT
|| condCode
== COND_EQ
);
154 case COND_GT
: return (condCode
== COND_GT
);
155 case COND_TR
: return GL_TRUE
;
156 case COND_FL
: return GL_FALSE
;
157 default: return GL_TRUE
;
163 * Store 4 floats into a register.
166 store_vector4( const struct fp_dst_register
*dest
, struct fp_machine
*machine
,
167 const GLfloat value
[4], GLboolean clamp
, GLboolean updateCC
)
169 GLfloat
*dstReg
= machine
->Registers
[dest
->Register
];
170 GLfloat clampedValue
[4];
171 const GLboolean
*writeMask
= dest
->WriteMask
;
172 GLboolean condWriteMask
[4];
175 clampedValue
[0] = CLAMP(value
[0], 0.0F
, 1.0F
);
176 clampedValue
[1] = CLAMP(value
[1], 0.0F
, 1.0F
);
177 clampedValue
[2] = CLAMP(value
[2], 0.0F
, 1.0F
);
178 clampedValue
[3] = CLAMP(value
[3], 0.0F
, 1.0F
);
179 value
= clampedValue
;
182 if (dest
->CondMask
!= COND_TR
) {
183 condWriteMask
[0] = writeMask
[0]
184 && test_cc(machine
->CondCodes
[dest
->CondSwizzle
[0]], dest
->CondMask
);
185 condWriteMask
[1] = writeMask
[1]
186 && test_cc(machine
->CondCodes
[dest
->CondSwizzle
[1]], dest
->CondMask
);
187 condWriteMask
[2] = writeMask
[2]
188 && test_cc(machine
->CondCodes
[dest
->CondSwizzle
[2]], dest
->CondMask
);
189 condWriteMask
[3] = writeMask
[3]
190 && test_cc(machine
->CondCodes
[dest
->CondSwizzle
[3]], dest
->CondMask
);
191 writeMask
= condWriteMask
;
195 dstReg
[0] = value
[0];
197 machine
->CondCodes
[0] = generate_cc(value
[0]);
200 dstReg
[1] = value
[1];
202 machine
->CondCodes
[1] = generate_cc(value
[1]);
205 dstReg
[2] = value
[2];
207 machine
->CondCodes
[2] = generate_cc(value
[2]);
210 dstReg
[3] = value
[3];
212 machine
->CondCodes
[3] = generate_cc(value
[3]);
218 * Execute the given vertex program
221 execute_program(GLcontext
*ctx
, const struct fragment_program
*program
)
223 struct fp_machine
*machine
= &ctx
->FragmentProgram
.Machine
;
224 const struct fp_instruction
*inst
;
226 for (inst
= program
->Instructions
; inst
->Opcode
!= FP_OPCODE_END
; inst
++) {
227 switch (inst
->Opcode
) {
230 GLfloat a
[4], b
[4], result
[4];
231 fetch_vector4( &inst
->SrcReg
[0], machine
, a
);
232 fetch_vector4( &inst
->SrcReg
[1], machine
, b
);
233 result
[0] = a
[0] + b
[0];
234 result
[1] = a
[1] + b
[1];
235 result
[2] = a
[2] + b
[2];
236 result
[3] = a
[3] + b
[3];
237 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
238 inst
->UpdateCondRegister
);
243 GLfloat a
[4], result
[4];
244 fetch_vector1( &inst
->SrcReg
[0], machine
, a
);
245 result
[0] = result
[1] = result
[2] = result
[3] = cos(a
[0]);
246 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
247 inst
->UpdateCondRegister
);
252 GLfloat a
[4], b
[4], result
[4];
253 fetch_vector4( &inst
->SrcReg
[0], machine
, a
);
254 fetch_vector4( &inst
->SrcReg
[1], machine
, b
);
255 result
[0] = result
[1] = result
[2] = result
[3] =
256 a
[0] + b
[0] + a
[1] * b
[1] + a
[2] * b
[2];
257 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
258 inst
->UpdateCondRegister
);
263 GLfloat a
[4], b
[4], result
[4];
264 fetch_vector4( &inst
->SrcReg
[0], machine
, a
);
265 fetch_vector4( &inst
->SrcReg
[1], machine
, b
);
266 result
[0] = result
[1] = result
[2] = result
[3] =
267 a
[0] + b
[0] + a
[1] * b
[1] + a
[2] * b
[2] + a
[3] * b
[3];
268 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
269 inst
->UpdateCondRegister
);
274 const GLuint
*swizzle
= inst
->DstReg
.CondSwizzle
;
275 const GLuint condMask
= inst
->DstReg
.CondMask
;
276 if (test_cc(machine
->CondCodes
[swizzle
[0]], condMask
) ||
277 test_cc(machine
->CondCodes
[swizzle
[1]], condMask
) ||
278 test_cc(machine
->CondCodes
[swizzle
[2]], condMask
) ||
279 test_cc(machine
->CondCodes
[swizzle
[3]], condMask
))
285 GLfloat a
[4], b
[4], c
[4], result
[4];
286 fetch_vector4( &inst
->SrcReg
[0], machine
, a
);
287 fetch_vector4( &inst
->SrcReg
[1], machine
, b
);
288 fetch_vector4( &inst
->SrcReg
[2], machine
, c
);
289 result
[0] = a
[0] * b
[0] + (1.0F
- a
[0]) * c
[0];
290 result
[1] = a
[1] * b
[1] + (1.0F
- a
[1]) * c
[1];
291 result
[2] = a
[2] * b
[2] + (1.0F
- a
[2]) * c
[2];
292 result
[3] = a
[3] * b
[3] + (1.0F
- a
[3]) * c
[3];
293 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
294 inst
->UpdateCondRegister
);
300 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
301 store_vector4( &inst
->DstReg
, machine
, t
, inst
->Saturate
,
302 inst
->UpdateCondRegister
);
307 GLfloat a
[4], b
[4], result
[4];
308 fetch_vector4( &inst
->SrcReg
[0], machine
, a
);
309 fetch_vector4( &inst
->SrcReg
[1], machine
, b
);
310 result
[0] = (a
[0] == b
[0]) ? 1.0F
: 0.0F
;
311 result
[1] = (a
[1] == b
[1]) ? 1.0F
: 0.0F
;
312 result
[2] = (a
[2] == b
[2]) ? 1.0F
: 0.0F
;
313 result
[3] = (a
[3] == b
[3]) ? 1.0F
: 0.0F
;
314 store_vector4( &inst
->DstReg
, machine
, result
, inst
->Saturate
,
315 inst
->UpdateCondRegister
);
319 _mesa_problem(ctx
, "Bad opcode in _mesa_exec_fragment_program");
329 _swrast_exec_nv_fragment_program( GLcontext
*ctx
, struct sw_span
*span
)
333 for (i
= 0; i
< span
->end
; i
++) {
334 GLfloat
*wpos
= ctx
->FragmentProgram
.Machine
.Registers
[0];
335 GLfloat
*col0
= ctx
->FragmentProgram
.Machine
.Registers
[1];
336 GLfloat
*col1
= ctx
->FragmentProgram
.Machine
.Registers
[2];
337 GLfloat
*fogc
= ctx
->FragmentProgram
.Machine
.Registers
[3];
338 const GLfloat
*colOut
= ctx
->FragmentProgram
.Machine
.Registers
[FP_OUTPUT_REG_START
];
341 /* Clear temporary registers */
342 for (j
= 0; j
< MAX_NV_FRAGMENT_PROGRAM_TEMPS
; j
++) {
343 ctx
->FragmentProgram
.Machine
.Registers
[FP_TEMP_REG_START
+j
][0] = 0.0F
;
344 ctx
->FragmentProgram
.Machine
.Registers
[FP_TEMP_REG_START
+j
][1] = 0.0F
;
345 ctx
->FragmentProgram
.Machine
.Registers
[FP_TEMP_REG_START
+j
][2] = 0.0F
;
346 ctx
->FragmentProgram
.Machine
.Registers
[FP_TEMP_REG_START
+j
][3] = 0.0F
;
349 /* Load input registers */
350 wpos
[0] = span
->x
+ i
;
351 wpos
[1] = span
->y
+ i
;
352 wpos
[2] = span
->array
->z
[i
];
355 col0
[0] = CHAN_TO_FLOAT(span
->array
->rgba
[i
][RCOMP
]);
356 col0
[1] = CHAN_TO_FLOAT(span
->array
->rgba
[i
][GCOMP
]);
357 col0
[2] = CHAN_TO_FLOAT(span
->array
->rgba
[i
][BCOMP
]);
358 col0
[3] = CHAN_TO_FLOAT(span
->array
->rgba
[i
][ACOMP
]);
360 col1
[0] = CHAN_TO_FLOAT(span
->array
->spec
[i
][RCOMP
]);
361 col1
[1] = CHAN_TO_FLOAT(span
->array
->spec
[i
][GCOMP
]);
362 col1
[2] = CHAN_TO_FLOAT(span
->array
->spec
[i
][BCOMP
]);
363 col1
[3] = CHAN_TO_FLOAT(span
->array
->spec
[i
][ACOMP
]);
365 fogc
[0] = span
->array
->fog
[i
];
367 execute_program(ctx
, ctx
->FragmentProgram
.Current
);
369 /* Store output registers */
370 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][RCOMP
], colOut
[0]);
371 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][GCOMP
], colOut
[1]);
372 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][BCOMP
], colOut
[2]);
373 UNCLAMPED_FLOAT_TO_CHAN(span
->array
->rgba
[i
][ACOMP
], colOut
[3]);