1 /* $Id: nvvertexec.c,v 1.1 2003/01/14 04:55:46 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 * \brief Code to execute vertex programs.
38 #include "nvvertexec.h"
39 #include "nvvertprog.h"
41 #include "math/m_matrix.h"
45 * Load/initialize the vertex program registers.
46 * This needs to be done per vertex.
49 _mesa_init_vp_registers(GLcontext
*ctx
)
51 struct vp_machine
*machine
= &(ctx
->VertexProgram
.Machine
);
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(machine
->Registers
[VP_INPUT_REG_START
],
57 16 * 4 * sizeof(GLfloat
));
59 /* Output and temp regs are initialized to [0,0,0,1] */
60 for (i
= VP_OUTPUT_REG_START
; i
<= VP_OUTPUT_REG_END
; i
++) {
61 machine
->Registers
[i
][0] = 0.0F
;
62 machine
->Registers
[i
][1] = 0.0F
;
63 machine
->Registers
[i
][2] = 0.0F
;
64 machine
->Registers
[i
][3] = 1.0F
;
66 for (i
= VP_TEMP_REG_START
; i
<= VP_TEMP_REG_END
; i
++) {
67 machine
->Registers
[i
][0] = 0.0F
;
68 machine
->Registers
[i
][1] = 0.0F
;
69 machine
->Registers
[i
][2] = 0.0F
;
70 machine
->Registers
[i
][3] = 1.0F
;
73 /* The program regs aren't touched */
79 * Copy the 16 elements of a matrix into four consecutive program
80 * registers starting at 'pos'.
83 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
86 pos
+= VP_PROG_REG_START
;
87 for (i
= 0; i
< 4; i
++) {
88 registers
[pos
+ i
][0] = mat
[0 + i
];
89 registers
[pos
+ i
][1] = mat
[4 + i
];
90 registers
[pos
+ i
][2] = mat
[8 + i
];
91 registers
[pos
+ i
][3] = mat
[12 + i
];
97 * As above, but transpose the matrix.
100 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
101 const GLfloat mat
[16])
103 pos
+= VP_PROG_REG_START
;
104 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
109 * Load all currently tracked matrices into the program registers.
110 * This needs to be done per glBegin/glEnd.
113 _mesa_init_tracked_matrices(GLcontext
*ctx
)
117 for (i
= 0; i
< VP_NUM_PROG_REGS
/ 4; i
++) {
118 /* point 'mat' at source matrix */
120 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
121 mat
= ctx
->ModelviewMatrixStack
.Top
;
123 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
124 mat
= ctx
->ProjectionMatrixStack
.Top
;
126 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
127 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
129 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
130 mat
= ctx
->ColorMatrixStack
.Top
;
132 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
133 /* XXX verify the combined matrix is up to date */
134 mat
= &ctx
->_ModelProjectMatrix
;
136 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
137 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
138 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
139 ASSERT(n
< MAX_PROGRAM_MATRICES
);
140 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
143 /* no matrix is tracked, but we leave the register values as-is */
144 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
148 /* load the matrix */
149 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
150 load_matrix(ctx
->VertexProgram
.Machine
.Registers
, i
*4, mat
->m
);
152 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
153 _math_matrix_analyse(mat
); /* update the inverse */
154 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
155 load_matrix(ctx
->VertexProgram
.Machine
.Registers
, i
*4, mat
->inv
);
157 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
158 load_transpose_matrix(ctx
->VertexProgram
.Machine
.Registers
, i
*4, mat
->m
);
161 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
162 == GL_INVERSE_TRANSPOSE_NV
);
163 _math_matrix_analyse(mat
); /* update the inverse */
164 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
165 load_transpose_matrix(ctx
->VertexProgram
.Machine
.Registers
,
174 * For debugging. Dump the current vertex program machine registers.
177 _mesa_dump_vp_machine( const struct vp_machine
*machine
)
180 _mesa_printf("VertexIn:\n");
181 for (i
= 0; i
< VP_NUM_INPUT_REGS
; i
++) {
182 _mesa_printf("%d: %f %f %f %f ", i
,
183 machine
->Registers
[i
+ VP_INPUT_REG_START
][0],
184 machine
->Registers
[i
+ VP_INPUT_REG_START
][1],
185 machine
->Registers
[i
+ VP_INPUT_REG_START
][2],
186 machine
->Registers
[i
+ VP_INPUT_REG_START
][3]);
190 _mesa_printf("VertexOut:\n");
191 for (i
= 0; i
< VP_NUM_OUTPUT_REGS
; i
++) {
192 _mesa_printf("%d: %f %f %f %f ", i
,
193 machine
->Registers
[i
+ VP_OUTPUT_REG_START
][0],
194 machine
->Registers
[i
+ VP_OUTPUT_REG_START
][1],
195 machine
->Registers
[i
+ VP_OUTPUT_REG_START
][2],
196 machine
->Registers
[i
+ VP_OUTPUT_REG_START
][3]);
200 _mesa_printf("Registers:\n");
201 for (i
= 0; i
< VP_NUM_TEMP_REGS
; i
++) {
202 _mesa_printf("%d: %f %f %f %f ", i
,
203 machine
->Registers
[i
+ VP_TEMP_REG_START
][0],
204 machine
->Registers
[i
+ VP_TEMP_REG_START
][1],
205 machine
->Registers
[i
+ VP_TEMP_REG_START
][2],
206 machine
->Registers
[i
+ VP_TEMP_REG_START
][3]);
210 _mesa_printf("Parameters:\n");
211 for (i
= 0; i
< VP_NUM_PROG_REGS
; i
++) {
212 _mesa_printf("%d: %f %f %f %f ", i
,
213 machine
->Registers
[i
+ VP_PROG_REG_START
][0],
214 machine
->Registers
[i
+ VP_PROG_REG_START
][1],
215 machine
->Registers
[i
+ VP_PROG_REG_START
][2],
216 machine
->Registers
[i
+ VP_PROG_REG_START
][3]);
223 * Fetch a 4-element float vector from the given source register.
224 * Apply swizzling and negating as needed.
227 fetch_vector4( const struct vp_src_register
*source
,
228 const struct vp_machine
*machine
,
231 static const GLfloat zero
[4] = { 0, 0, 0, 0 };
234 if (source
->RelAddr
) {
235 GLint reg
= source
->Register
+ machine
->AddressReg
;
236 if (reg
< VP_PROG_REG_START
|| reg
> VP_PROG_REG_END
)
239 src
= machine
->Registers
[reg
];
242 src
= machine
->Registers
[source
->Register
];
245 if (source
->Negate
) {
246 result
[0] = -src
[source
->Swizzle
[0]];
247 result
[1] = -src
[source
->Swizzle
[1]];
248 result
[2] = -src
[source
->Swizzle
[2]];
249 result
[3] = -src
[source
->Swizzle
[3]];
252 result
[0] = src
[source
->Swizzle
[0]];
253 result
[1] = src
[source
->Swizzle
[1]];
254 result
[2] = src
[source
->Swizzle
[2]];
255 result
[3] = src
[source
->Swizzle
[3]];
261 * As above, but only return result[0] element.
264 fetch_vector1( const struct vp_src_register
*source
,
265 const struct vp_machine
*machine
,
268 static const GLfloat zero
[4] = { 0, 0, 0, 0 };
271 if (source
->RelAddr
) {
272 GLint reg
= source
->Register
+ machine
->AddressReg
;
273 if (reg
< VP_PROG_REG_START
|| reg
> VP_PROG_REG_END
)
276 src
= machine
->Registers
[reg
];
279 src
= machine
->Registers
[source
->Register
];
282 if (source
->Negate
) {
283 result
[0] = -src
[source
->Swizzle
[0]];
286 result
[0] = src
[source
->Swizzle
[0]];
292 * Store 4 floats into a register.
295 store_vector4( const struct vp_dst_register
*dest
, struct vp_machine
*machine
,
296 const GLfloat value
[4] )
298 GLfloat
*dst
= machine
->Registers
[dest
->Register
];
300 if (dest
->WriteMask
[0])
302 if (dest
->WriteMask
[1])
304 if (dest
->WriteMask
[2])
306 if (dest
->WriteMask
[3])
312 * Set x to positive or negative infinity.
315 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
316 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
318 #define SET_POS_INFINITY(x) x = __MAXFLOAT
319 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
321 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
322 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
325 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
329 * Execute the given vertex program
332 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
334 struct vp_machine
*machine
= &ctx
->VertexProgram
.Machine
;
335 const struct vp_instruction
*inst
;
337 /* XXX load vertex fields into input registers */
338 /* and do other initialization */
341 for (inst
= program
->Instructions
; inst
->Opcode
!= VP_OPCODE_END
; inst
++) {
342 switch (inst
->Opcode
) {
346 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
347 store_vector4( &inst
->DstReg
, machine
, t
);
352 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
353 GLfloat t
[4], lit
[4];
354 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
355 if (t
[3] < -(128.0F
- epsilon
))
356 t
[3] = - (128.0F
- epsilon
);
357 else if (t
[3] > 128.0F
- epsilon
)
358 t
[3] = 128.0F
- epsilon
;
365 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
367 store_vector4( &inst
->DstReg
, machine
, lit
);
373 fetch_vector1( &inst
->SrcReg
[0], machine
, t
);
375 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
376 t
[1] = t
[2] = t
[3] = t
[0];
377 store_vector4( &inst
->DstReg
, machine
, t
);
383 fetch_vector1( &inst
->SrcReg
[0], machine
, t
);
384 t
[0] = (float) (1.0 / sqrt(fabs(t
[0])));
385 t
[1] = t
[2] = t
[3] = t
[0];
386 store_vector4( &inst
->DstReg
, machine
, t
);
391 GLfloat t
[4], q
[4], floor_t0
;
392 fetch_vector1( &inst
->SrcReg
[0], machine
, t
);
393 floor_t0
= (float) floor(t
[0]);
394 if (floor_t0
> FLT_MAX_EXP
) {
395 SET_POS_INFINITY(q
[0]);
397 SET_POS_INFINITY(q
[2]);
400 else if (floor_t0
< FLT_MIN_EXP
) {
408 GLint ii
= (GLint
) floor_t0
;
409 ii
= (ii
< 23) + 0x3f800000;
410 SET_FLOAT_BITS(q
[0], ii
);
411 q
[0] = *((GLfloat
*) &ii
);
413 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
415 q
[1] = t
[0] - floor_t0
;
416 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
419 store_vector4( &inst
->DstReg
, machine
, t
);
424 GLfloat t
[4], q
[4], abs_t0
;
425 fetch_vector1( &inst
->SrcReg
[0], machine
, t
);
426 abs_t0
= (GLfloat
) fabs(t
[0]);
427 if (abs_t0
!= 0.0F
) {
428 /* Since we really can't handle infinite values on VMS
429 * like other OSes we'll use __MAXFLOAT to represent
430 * infinity. This may need some tweaking.
433 if (abs_t0
== __MAXFLOAT
) {
435 if (IS_INF_OR_NAN(abs_t0
)) {
437 SET_POS_INFINITY(q
[0]);
439 SET_POS_INFINITY(q
[2]);
443 double mantissa
= frexp(t
[0], &exponent
);
444 q
[0] = (GLfloat
) (exponent
- 1);
445 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
446 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
450 SET_NEG_INFINITY(q
[0]);
452 SET_NEG_INFINITY(q
[2]);
455 store_vector4( &inst
->DstReg
, machine
, q
);
460 GLfloat t
[4], u
[4], prod
[4];
461 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
462 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
463 prod
[0] = t
[0] * u
[0];
464 prod
[1] = t
[1] * u
[1];
465 prod
[2] = t
[2] * u
[2];
466 prod
[3] = t
[3] * u
[3];
467 store_vector4( &inst
->DstReg
, machine
, prod
);
472 GLfloat t
[4], u
[4], sum
[4];
473 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
474 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
475 sum
[0] = t
[0] + u
[0];
476 sum
[1] = t
[1] + u
[1];
477 sum
[2] = t
[2] + u
[2];
478 sum
[3] = t
[3] + u
[3];
479 store_vector4( &inst
->DstReg
, machine
, sum
);
484 GLfloat t
[4], u
[4], dot
[4];
485 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
486 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
487 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
488 dot
[1] = dot
[2] = dot
[3] = dot
[0];
489 store_vector4( &inst
->DstReg
, machine
, dot
);
494 GLfloat t
[4], u
[4], dot
[4];
495 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
496 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
497 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
498 dot
[1] = dot
[2] = dot
[3] = dot
[0];
499 store_vector4( &inst
->DstReg
, machine
, dot
);
504 GLfloat t
[4], u
[4], dst
[4];
505 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
506 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
508 dst
[1] = t
[1] * u
[1];
511 store_vector4( &inst
->DstReg
, machine
, dst
);
516 GLfloat t
[4], u
[4], min
[4];
517 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
518 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
519 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
520 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
521 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
522 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
523 store_vector4( &inst
->DstReg
, machine
, min
);
528 GLfloat t
[4], u
[4], max
[4];
529 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
530 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
531 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
532 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
533 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
534 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
535 store_vector4( &inst
->DstReg
, machine
, max
);
540 GLfloat t
[4], u
[4], slt
[4];
541 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
542 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
543 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
544 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
545 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
546 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
547 store_vector4( &inst
->DstReg
, machine
, slt
);
552 GLfloat t
[4], u
[4], sge
[4];
553 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
554 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
555 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
556 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
557 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
558 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
559 store_vector4( &inst
->DstReg
, machine
, sge
);
564 GLfloat t
[4], u
[4], v
[4], sum
[4];
565 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
566 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
567 fetch_vector4( &inst
->SrcReg
[2], machine
, v
);
568 sum
[0] = t
[0] * u
[0] + v
[0];
569 sum
[1] = t
[1] * u
[1] + v
[1];
570 sum
[2] = t
[2] * u
[2] + v
[2];
571 sum
[3] = t
[3] * u
[3] + v
[3];
572 store_vector4( &inst
->DstReg
, machine
, sum
);
578 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
579 machine
->AddressReg
= (GLint
) floor(t
[0]);
584 GLfloat t
[4], u
[4], dot
[4];
585 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
586 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
587 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
588 dot
[1] = dot
[2] = dot
[3] = dot
[0];
589 store_vector4( &inst
->DstReg
, machine
, dot
);
595 fetch_vector1( &inst
->SrcReg
[0], machine
, t
);
601 if (u
> 1.884467e+019F
) {
602 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
604 else if (u
< 5.42101e-020F
) {
605 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
609 if (u
< -1.884467e+019F
) {
610 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
612 else if (u
> -5.42101e-020F
) {
613 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
616 t
[0] = t
[1] = t
[2] = t
[3] = u
;
617 store_vector4( &inst
->DstReg
, machine
, t
);
622 GLfloat t
[4], u
[4], sum
[4];
623 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
624 fetch_vector4( &inst
->SrcReg
[1], machine
, u
);
625 sum
[0] = t
[0] - u
[0];
626 sum
[1] = t
[1] - u
[1];
627 sum
[2] = t
[2] - u
[2];
628 sum
[3] = t
[3] - u
[3];
629 store_vector4( &inst
->DstReg
, machine
, sum
);
635 fetch_vector4( &inst
->SrcReg
[0], machine
, t
);
636 if (t
[0] < 0.0) t
[0] = -t
[0];
637 if (t
[1] < 0.0) t
[1] = -t
[1];
638 if (t
[2] < 0.0) t
[2] = -t
[2];
639 if (t
[3] < 0.0) t
[3] = -t
[3];
640 store_vector4( &inst
->DstReg
, machine
, t
);
647 /* bad instruction opcode */
648 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
657 Thoughts on vertex program optimization:
659 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
660 assembly code. That will probably be a lot of work.
662 Another approach might be to replace the vp_instruction->Opcode field with
663 a pointer to a specialized C function which executes the instruction.
664 In particular we can write functions which skip swizzling, negating,
665 masking, relative addressing, etc. when they're not needed.
669 void simple_add( struct vp_instruction *inst )
671 GLfloat *sum = machine->Registers[inst->DstReg.Register];
672 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
673 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
674 sum[0] = a[0] + b[0];
675 sum[1] = a[1] + b[1];
676 sum[2] = a[2] + b[2];
677 sum[3] = a[3] + b[3];
686 A first step would be to 'vectorize' the programs in the same way as
687 the normal transformation code in the tnl module. Thus each opcode
688 takes zero or more input vectors (registers) and produces one or more
691 These operations would intially be coded in C, with machine-specific
692 assembly following, as is currently the case for matrix
693 transformations in the math/ directory. The preprocessing scheme for
694 selecting simpler operations Brian describes above would also work
697 This should give reasonable performance without excessive effort.