Merge branch 'i915tex_privbuffers' into softpipe_0_1_branch
[mesa.git] / src / mesa / pipe / softpipe / sp_quad_fs.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /* Vertices are just an array of floats, with all the attributes
26 * packed. We currently assume a layout like:
27 *
28 * attr[0][0..3] - window position
29 * attr[1..n][0..3] - remaining attributes.
30 *
31 * Attributes are assumed to be 4 floats wide but are packed so that
32 * all the enabled attributes run contiguously.
33 */
34
35 #include "glheader.h"
36 #include "imports.h"
37 #include "sp_context.h"
38 #include "sp_headers.h"
39 #include "sp_quad.h"
40 #include "core/tgsi_core.h"
41
42 #if defined __GNUC__
43 #define ALIGNED_ATTRIBS 1
44 #else
45 #define ALIGNED_ATTRIBS 0
46 #endif
47
48 struct exec_machine {
49 const struct setup_coefficient *coef; /**< will point to quad->coef */
50
51 #if ALIGNED_ATTRIBS
52 GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] __attribute__(( aligned( 16 ) ));
53 #else
54 GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE];
55 #endif
56 };
57
58
59 /**
60 * Compute quad's attributes values, as constants (GL_FLAT shading).
61 */
62 static INLINE void cinterp( struct exec_machine *exec,
63 GLuint attrib,
64 GLuint i )
65 {
66 GLuint j;
67
68 for (j = 0; j < QUAD_SIZE; j++) {
69 exec->attr[attrib][i][j] = exec->coef[attrib].a0[i];
70 }
71 }
72
73
74 /**
75 * Compute quad's attribute values by linear interpolation.
76 *
77 * Push into the fp:
78 *
79 * INPUT[attr] = MAD COEF_A0[attr], COEF_DADX[attr], INPUT_WPOS.xxxx
80 * INPUT[attr] = MAD INPUT[attr], COEF_DADY[attr], INPUT_WPOS.yyyy
81 */
82 static INLINE void linterp( struct exec_machine *exec,
83 GLuint attrib,
84 GLuint i )
85 {
86 GLuint j;
87
88 for (j = 0; j < QUAD_SIZE; j++) {
89 const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
90 const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
91 exec->attr[attrib][i][j] = (exec->coef[attrib].a0[i] +
92 exec->coef[attrib].dadx[i] * x +
93 exec->coef[attrib].dady[i] * y);
94 }
95 }
96
97
98 /**
99 * Compute quad's attribute values by linear interpolation with
100 * perspective correction.
101 *
102 * Push into the fp:
103 *
104 * INPUT[attr] = MAD COEF_DADX[attr], INPUT_WPOS.xxxx, COEF_A0[attr]
105 * INPUT[attr] = MAD COEF_DADY[attr], INPUT_WPOS.yyyy, INPUT[attr]
106 * TMP = RCP INPUT_WPOS.w
107 * INPUT[attr] = MUL INPUT[attr], TMP.xxxx
108 *
109 */
110 static INLINE void pinterp( struct exec_machine *exec,
111 GLuint attrib,
112 GLuint i )
113 {
114 GLuint j;
115
116 for (j = 0; j < QUAD_SIZE; j++) {
117 const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
118 const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
119 /* FRAG_ATTRIB_WPOS.w here is really 1/w */
120 const GLfloat w = 1.0 / exec->attr[FRAG_ATTRIB_WPOS][3][j];
121 exec->attr[attrib][i][j] = ((exec->coef[attrib].a0[i] +
122 exec->coef[attrib].dadx[i] * x +
123 exec->coef[attrib].dady[i] * y) * w);
124 }
125 }
126
127
128
129 /* This should be done by the fragment shader execution unit (code
130 * generated from the decl instructions). Do it here for now.
131 */
132 static void
133 shade_quad( struct quad_stage *qs, struct quad_header *quad )
134 {
135 const struct softpipe_context *softpipe = qs->softpipe;
136 struct exec_machine exec;
137 const GLfloat fx = quad->x0;
138 const GLfloat fy = quad->y0;
139 GLuint attr, i;
140
141 exec.coef = quad->coef;
142
143 /* Position:
144 */
145 exec.attr[FRAG_ATTRIB_WPOS][0][0] = fx;
146 exec.attr[FRAG_ATTRIB_WPOS][0][1] = fx + 1.0;
147 exec.attr[FRAG_ATTRIB_WPOS][0][2] = fx;
148 exec.attr[FRAG_ATTRIB_WPOS][0][3] = fx + 1.0;
149
150 exec.attr[FRAG_ATTRIB_WPOS][1][0] = fy;
151 exec.attr[FRAG_ATTRIB_WPOS][1][1] = fy;
152 exec.attr[FRAG_ATTRIB_WPOS][1][2] = fy + 1.0;
153 exec.attr[FRAG_ATTRIB_WPOS][1][3] = fy + 1.0;
154
155 /* Z and W are done by linear interpolation */
156 if (softpipe->need_z) {
157 linterp(&exec, 0, 2); /* attr[0].z */
158 }
159
160 if (softpipe->need_w) {
161 linterp(&exec, 0, 3); /* attr[0].w */
162 /*invert(&exec, 0, 3);*/
163 }
164
165 /* Interpolate all the remaining attributes. This will get pushed
166 * into the fragment program's responsibilities at some point.
167 * Start at 1 to skip fragment position attribute (computed above).
168 */
169 for (attr = 1; attr < quad->nr_attrs; attr++) {
170 switch (softpipe->interp[attr]) {
171 case INTERP_CONSTANT:
172 for (i = 0; i < NUM_CHANNELS; i++)
173 cinterp(&exec, attr, i);
174 break;
175
176 case INTERP_LINEAR:
177 for (i = 0; i < NUM_CHANNELS; i++)
178 linterp(&exec, attr, i);
179 break;
180
181 case INTERP_PERSPECTIVE:
182 for (i = 0; i < NUM_CHANNELS; i++)
183 pinterp(&exec, attr, i);
184 break;
185 }
186 }
187
188 #if 1
189 /*softpipe->run_fs( tri->fp, quad, &tri->outputs );*/
190
191 {
192 struct tgsi_exec_machine machine;
193 struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1];
194 struct tgsi_exec_vector *aoutputs;
195 GLuint i;
196
197 #if !ALIGNED_ATTRIBS
198 struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1];
199 struct tgsi_exec_vector *ainputs;
200 #endif
201
202 #ifdef DEBUG
203 memset(&machine, 0, sizeof(machine));
204 #endif
205
206 /* init machine state */
207 tgsi_exec_machine_init(
208 &machine,
209 softpipe->fs.tokens );
210
211 /* Consts does not require 16 byte alignment. */
212 machine.Consts = softpipe->fs.constants->constant;
213
214 aoutputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs );
215 machine.Outputs = aoutputs;
216
217 assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) );
218
219 #if ALIGNED_ATTRIBS
220 machine.Inputs = (struct tgsi_exec_vector *) exec.attr;
221
222 for (i = 0; i < softpipe->nr_attrs; i++) {
223 /* Make sure fp_attr_to_slot[] is an identity transform. */
224 assert( softpipe->fp_attr_to_slot[i] == i );
225 }
226 #else
227 ainputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs );
228 machine.Inputs = ainputs;
229
230 /* load input registers */
231 for (i = 0; i < softpipe->nr_attrs; i++) {
232 /* Make sure fp_attr_to_slot[] is an identity transform. */
233 assert( softpipe->fp_attr_to_slot[i] == i );
234
235 memcpy(
236 &ainputs[i],
237 exec.attr[i],
238 sizeof( ainputs[0] ) );
239 }
240 #endif
241
242 /* run shader */
243 tgsi_exec_machine_run( &machine );
244
245 /* store result color */
246 memcpy(quad->outputs.color,
247 &aoutputs[FRAG_ATTRIB_COL0].xyzw[0].f[0],
248 sizeof(quad->outputs.color));
249 if (softpipe->need_z) {
250 /* XXX temporary */
251 quad->outputs.depth[0] = exec.attr[0][2][0];
252 quad->outputs.depth[1] = exec.attr[0][2][1];
253 quad->outputs.depth[2] = exec.attr[0][2][2];
254 quad->outputs.depth[3] = exec.attr[0][2][3];
255 }
256 }
257 #else
258 {
259 GLuint attr = softpipe->fp_attr_to_slot[FRAG_ATTRIB_COL0];
260 assert(attr);
261
262 memcpy(quad->outputs.color,
263 exec.attr[attr],
264 sizeof(quad->outputs.color));
265
266 if (softpipe->need_z) {
267 quad->outputs.depth[0] = exec.attr[0][2][0];
268 quad->outputs.depth[1] = exec.attr[0][2][1];
269 quad->outputs.depth[2] = exec.attr[0][2][2];
270 quad->outputs.depth[3] = exec.attr[0][2][3];
271 }
272 }
273 #endif
274
275 /* shader may cull fragments */
276 if (quad->mask)
277 qs->next->run(qs->next, quad);
278 }
279
280
281
282 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
283 {
284 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
285
286 stage->softpipe = softpipe;
287 stage->run = shade_quad;
288
289 return stage;
290 }