improved solve_plane_chan() (Evgeny Kotsuba)
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /* $Id: s_nvfragprog.c,v 1.1 2003/01/14 04:57:47 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28
29 #include "glheader.h"
30 #include "colormac.h"
31 #include "context.h"
32 #include "nvfragprog.h"
33 #include "macros.h"
34 #include "mmath.h"
35
36 #include "s_nvfragprog.h"
37
38
39 /**
40 * Fetch a 4-element float vector from the given source register.
41 * Apply swizzling and negating as needed.
42 */
43 static void
44 fetch_vector4( const struct fp_src_register *source,
45 const struct fp_machine *machine,
46 GLfloat result[4] )
47 {
48 const GLfloat *src;
49
50 /*
51 if (source->RelAddr) {
52 GLint reg = source->Register + machine->AddressReg;
53 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
54 src = zero;
55 else
56 src = machine->Registers[reg];
57 }
58 else
59 */
60
61 src = machine->Registers[source->Register];
62
63 result[0] = src[source->Swizzle[0]];
64 result[1] = src[source->Swizzle[1]];
65 result[2] = src[source->Swizzle[2]];
66 result[3] = src[source->Swizzle[3]];
67
68 if (source->NegateBase) {
69 result[0] = -result[0];
70 result[1] = -result[1];
71 result[2] = -result[2];
72 result[3] = -result[3];
73 }
74 if (source->Abs) {
75 result[0] = FABSF(result[0]);
76 result[1] = FABSF(result[1]);
77 result[2] = FABSF(result[2]);
78 result[3] = FABSF(result[3]);
79 }
80 if (source->NegateAbs) {
81 result[0] = -result[0];
82 result[1] = -result[1];
83 result[2] = -result[2];
84 result[3] = -result[3];
85 }
86 }
87
88
89 /**
90 * As above, but only return result[0] element.
91 */
92 static void
93 fetch_vector1( const struct fp_src_register *source,
94 const struct fp_machine *machine,
95 GLfloat result[4] )
96 {
97 const GLfloat *src;
98
99 /*
100 if (source->RelAddr) {
101 GLint reg = source->Register + machine->AddressReg;
102 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
103 src = zero;
104 else
105 src = machine->Registers[reg];
106 }
107 else
108 */
109
110 src = machine->Registers[source->Register];
111
112 result[0] = src[source->Swizzle[0]];
113
114 if (source->NegateBase) {
115 result[0] = -result[0];
116 }
117 if (source->Abs) {
118 result[0] = FABSF(result[0]);
119 }
120 if (source->NegateAbs) {
121 result[0] = -result[0];
122 }
123 }
124
125
126 /*
127 * Test value against zero and return GT, LT, EQ or UN if NaN.
128 */
129 static INLINE GLuint
130 generate_cc( float value )
131 {
132 if (value != value)
133 return COND_UN; /* NaN */
134 if (value > 0.0F)
135 return COND_GT;
136 if (value < 0.0F)
137 return COND_LT;
138 return COND_EQ;
139 }
140
141 /*
142 * Test if the ccMaskRule is satisfied by the given condition code.
143 * Used to mask destination writes according to the current condition codee.
144 */
145 static INLINE GLboolean
146 test_cc(GLuint condCode, GLuint ccMaskRule)
147 {
148 switch (ccMaskRule) {
149 case COND_EQ: return (condCode == COND_EQ);
150 case COND_NE: return (condCode != COND_EQ);
151 case COND_LT: return (condCode == COND_LT);
152 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
153 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
154 case COND_GT: return (condCode == COND_GT);
155 case COND_TR: return GL_TRUE;
156 case COND_FL: return GL_FALSE;
157 default: return GL_TRUE;
158 }
159 }
160
161
162 /**
163 * Store 4 floats into a register.
164 */
165 static void
166 store_vector4( const struct fp_dst_register *dest, struct fp_machine *machine,
167 const GLfloat value[4], GLboolean clamp, GLboolean updateCC )
168 {
169 GLfloat *dstReg = machine->Registers[dest->Register];
170 GLfloat clampedValue[4];
171 const GLboolean *writeMask = dest->WriteMask;
172 GLboolean condWriteMask[4];
173
174 if (clamp) {
175 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
176 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
177 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
178 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
179 value = clampedValue;
180 }
181
182 if (dest->CondMask != COND_TR) {
183 condWriteMask[0] = writeMask[0]
184 && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
185 condWriteMask[1] = writeMask[1]
186 && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
187 condWriteMask[2] = writeMask[2]
188 && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
189 condWriteMask[3] = writeMask[3]
190 && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
191 writeMask = condWriteMask;
192 }
193
194 if (writeMask[0]) {
195 dstReg[0] = value[0];
196 if (updateCC)
197 machine->CondCodes[0] = generate_cc(value[0]);
198 }
199 if (writeMask[1]) {
200 dstReg[1] = value[1];
201 if (updateCC)
202 machine->CondCodes[1] = generate_cc(value[1]);
203 }
204 if (writeMask[2]) {
205 dstReg[2] = value[2];
206 if (updateCC)
207 machine->CondCodes[2] = generate_cc(value[2]);
208 }
209 if (writeMask[3]) {
210 dstReg[3] = value[3];
211 if (updateCC)
212 machine->CondCodes[3] = generate_cc(value[3]);
213 }
214 }
215
216
217 /**
218 * Execute the given vertex program
219 */
220 static void
221 execute_program(GLcontext *ctx, const struct fragment_program *program)
222 {
223 struct fp_machine *machine = &ctx->FragmentProgram.Machine;
224 const struct fp_instruction *inst;
225
226 for (inst = program->Instructions; inst->Opcode != FP_OPCODE_END; inst++) {
227 switch (inst->Opcode) {
228 case FP_OPCODE_ADD:
229 {
230 GLfloat a[4], b[4], result[4];
231 fetch_vector4( &inst->SrcReg[0], machine, a );
232 fetch_vector4( &inst->SrcReg[1], machine, b );
233 result[0] = a[0] + b[0];
234 result[1] = a[1] + b[1];
235 result[2] = a[2] + b[2];
236 result[3] = a[3] + b[3];
237 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
238 inst->UpdateCondRegister );
239 }
240 break;
241 case FP_OPCODE_COS:
242 {
243 GLfloat a[4], result[4];
244 fetch_vector1( &inst->SrcReg[0], machine, a );
245 result[0] = result[1] = result[2] = result[3] = cos(a[0]);
246 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
247 inst->UpdateCondRegister );
248 }
249 break;
250 case FP_OPCODE_DP3:
251 {
252 GLfloat a[4], b[4], result[4];
253 fetch_vector4( &inst->SrcReg[0], machine, a );
254 fetch_vector4( &inst->SrcReg[1], machine, b );
255 result[0] = result[1] = result[2] = result[3] =
256 a[0] + b[0] + a[1] * b[1] + a[2] * b[2];
257 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
258 inst->UpdateCondRegister );
259 }
260 break;
261 case FP_OPCODE_DP4:
262 {
263 GLfloat a[4], b[4], result[4];
264 fetch_vector4( &inst->SrcReg[0], machine, a );
265 fetch_vector4( &inst->SrcReg[1], machine, b );
266 result[0] = result[1] = result[2] = result[3] =
267 a[0] + b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
268 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
269 inst->UpdateCondRegister );
270 }
271 break;
272 case FP_OPCODE_KIL:
273 {
274 const GLuint *swizzle = inst->DstReg.CondSwizzle;
275 const GLuint condMask = inst->DstReg.CondMask;
276 if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
277 test_cc(machine->CondCodes[swizzle[1]], condMask) ||
278 test_cc(machine->CondCodes[swizzle[2]], condMask) ||
279 test_cc(machine->CondCodes[swizzle[3]], condMask))
280 return;
281 }
282 break;
283 case FP_OPCODE_LRP:
284 {
285 GLfloat a[4], b[4], c[4], result[4];
286 fetch_vector4( &inst->SrcReg[0], machine, a );
287 fetch_vector4( &inst->SrcReg[1], machine, b );
288 fetch_vector4( &inst->SrcReg[2], machine, c );
289 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
290 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
291 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
292 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
293 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
294 inst->UpdateCondRegister );
295 }
296 break;
297 case FP_OPCODE_MOV:
298 {
299 GLfloat t[4];
300 fetch_vector4( &inst->SrcReg[0], machine, t );
301 store_vector4( &inst->DstReg, machine, t, inst->Saturate,
302 inst->UpdateCondRegister );
303 }
304 break;
305 case FP_OPCODE_SEQ:
306 {
307 GLfloat a[4], b[4], result[4];
308 fetch_vector4( &inst->SrcReg[0], machine, a );
309 fetch_vector4( &inst->SrcReg[1], machine, b );
310 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
311 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
312 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
313 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
314 store_vector4( &inst->DstReg, machine, result, inst->Saturate,
315 inst->UpdateCondRegister );
316 }
317 break;
318 default:
319 _mesa_problem(ctx, "Bad opcode in _mesa_exec_fragment_program");
320 return;
321 }
322 }
323
324 }
325
326
327
328 void
329 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
330 {
331 GLuint i;
332
333 for (i = 0; i < span->end; i++) {
334 GLfloat *wpos = ctx->FragmentProgram.Machine.Registers[0];
335 GLfloat *col0 = ctx->FragmentProgram.Machine.Registers[1];
336 GLfloat *col1 = ctx->FragmentProgram.Machine.Registers[2];
337 GLfloat *fogc = ctx->FragmentProgram.Machine.Registers[3];
338 const GLfloat *colOut = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
339 GLuint j;
340
341 /* Clear temporary registers */
342 for (j = 0; j < MAX_NV_FRAGMENT_PROGRAM_TEMPS; j++) {
343 ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][0] = 0.0F;
344 ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][1] = 0.0F;
345 ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][2] = 0.0F;
346 ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][3] = 0.0F;
347 }
348
349 /* Load input registers */
350 wpos[0] = span->x + i;
351 wpos[1] = span->y + i;
352 wpos[2] = span->array->z[i];
353 wpos[3] = 1.0;
354
355 col0[0] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
356 col0[1] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
357 col0[2] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
358 col0[3] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
359
360 col1[0] = CHAN_TO_FLOAT(span->array->spec[i][RCOMP]);
361 col1[1] = CHAN_TO_FLOAT(span->array->spec[i][GCOMP]);
362 col1[2] = CHAN_TO_FLOAT(span->array->spec[i][BCOMP]);
363 col1[3] = CHAN_TO_FLOAT(span->array->spec[i][ACOMP]);
364
365 fogc[0] = span->array->fog[i];
366
367 execute_program(ctx, ctx->FragmentProgram.Current);
368
369 /* Store output registers */
370 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
371 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
372 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
373 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
374 }
375 }
376