include surface.offset in address calculations
[mesa.git] / src / mesa / pipe / softpipe / sp_quad_fs.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /* Vertices are just an array of floats, with all the attributes
26 * packed. We currently assume a layout like:
27 *
28 * attr[0][0..3] - window position
29 * attr[1..n][0..3] - remaining attributes.
30 *
31 * Attributes are assumed to be 4 floats wide but are packed so that
32 * all the enabled attributes run contiguously.
33 */
34
35 #include "glheader.h"
36 #include "imports.h"
37 #include "sp_context.h"
38 #include "sp_headers.h"
39 #include "sp_quad.h"
40 #include "sp_tex_sample.h"
41 #include "tgsi/core/tgsi_core.h"
42
43 #if 0
44 #if defined __GNUC__
45 #define ALIGNED_ATTRIBS 1
46 #else
47 #define ALIGNED_ATTRIBS 0
48 #endif
49 #else
50 #define ALIGNED_ATTRIBS 0
51 #endif
52
53
54 struct quad_shade_stage
55 {
56 struct quad_stage stage;
57 struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
58 };
59
60
61 /** cast wrapper */
62 static INLINE struct quad_shade_stage *
63 quad_shade_stage(struct quad_stage *qs)
64 {
65 return (struct quad_shade_stage *) qs;
66 }
67
68
69
70 struct exec_machine {
71 const struct setup_coefficient *coef; /**< will point to quad->coef */
72
73 #if ALIGNED_ATTRIBS
74 GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] __attribute__(( aligned( 16 ) ));
75 #else
76 GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE];
77 #endif
78 };
79
80
81 /**
82 * Compute quad's attributes values, as constants (GL_FLAT shading).
83 */
84 static INLINE void cinterp( struct exec_machine *exec,
85 GLuint attrib,
86 GLuint i )
87 {
88 GLuint j;
89
90 for (j = 0; j < QUAD_SIZE; j++) {
91 exec->attr[attrib][i][j] = exec->coef[attrib].a0[i];
92 }
93 }
94
95
96 /**
97 * Compute quad's attribute values by linear interpolation.
98 *
99 * Push into the fp:
100 *
101 * INPUT[attr] = MAD COEF_A0[attr], COEF_DADX[attr], INPUT_WPOS.xxxx
102 * INPUT[attr] = MAD INPUT[attr], COEF_DADY[attr], INPUT_WPOS.yyyy
103 */
104 static INLINE void linterp( struct exec_machine *exec,
105 GLuint attrib,
106 GLuint i )
107 {
108 GLuint j;
109
110 for (j = 0; j < QUAD_SIZE; j++) {
111 const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
112 const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
113 exec->attr[attrib][i][j] = (exec->coef[attrib].a0[i] +
114 exec->coef[attrib].dadx[i] * x +
115 exec->coef[attrib].dady[i] * y);
116 }
117 }
118
119
120 /**
121 * Compute quad's attribute values by linear interpolation with
122 * perspective correction.
123 *
124 * Push into the fp:
125 *
126 * INPUT[attr] = MAD COEF_DADX[attr], INPUT_WPOS.xxxx, COEF_A0[attr]
127 * INPUT[attr] = MAD COEF_DADY[attr], INPUT_WPOS.yyyy, INPUT[attr]
128 * TMP = RCP INPUT_WPOS.w
129 * INPUT[attr] = MUL INPUT[attr], TMP.xxxx
130 *
131 */
132 static INLINE void pinterp( struct exec_machine *exec,
133 GLuint attrib,
134 GLuint i )
135 {
136 GLuint j;
137
138 for (j = 0; j < QUAD_SIZE; j++) {
139 const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
140 const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
141 /* FRAG_ATTRIB_WPOS.w here is really 1/w */
142 const GLfloat w = 1.0 / exec->attr[FRAG_ATTRIB_WPOS][3][j];
143 exec->attr[attrib][i][j] = ((exec->coef[attrib].a0[i] +
144 exec->coef[attrib].dadx[i] * x +
145 exec->coef[attrib].dady[i] * y) * w);
146 }
147 }
148
149
150 /* This should be done by the fragment shader execution unit (code
151 * generated from the decl instructions). Do it here for now.
152 */
153 static void
154 shade_quad( struct quad_stage *qs, struct quad_header *quad )
155 {
156 struct quad_shade_stage *qss = quad_shade_stage(qs);
157 struct softpipe_context *softpipe = qs->softpipe;
158 struct exec_machine exec;
159 const GLfloat fx = quad->x0;
160 const GLfloat fy = quad->y0;
161 GLuint attr, i;
162
163 exec.coef = quad->coef;
164
165 /* Position:
166 */
167 exec.attr[FRAG_ATTRIB_WPOS][0][0] = fx;
168 exec.attr[FRAG_ATTRIB_WPOS][0][1] = fx + 1.0;
169 exec.attr[FRAG_ATTRIB_WPOS][0][2] = fx;
170 exec.attr[FRAG_ATTRIB_WPOS][0][3] = fx + 1.0;
171
172 exec.attr[FRAG_ATTRIB_WPOS][1][0] = fy;
173 exec.attr[FRAG_ATTRIB_WPOS][1][1] = fy;
174 exec.attr[FRAG_ATTRIB_WPOS][1][2] = fy + 1.0;
175 exec.attr[FRAG_ATTRIB_WPOS][1][3] = fy + 1.0;
176
177 /* Z and W are done by linear interpolation */
178 if (softpipe->need_z) {
179 linterp(&exec, 0, 2); /* attr[0].z */
180 }
181
182 if (softpipe->need_w) {
183 linterp(&exec, 0, 3); /* attr[0].w */
184 /*invert(&exec, 0, 3);*/
185 }
186
187 /* Interpolate all the remaining attributes. This will get pushed
188 * into the fragment program's responsibilities at some point.
189 * Start at 1 to skip fragment position attribute (computed above).
190 */
191 for (attr = 1; attr < quad->nr_attrs; attr++) {
192 switch (softpipe->interp[attr]) {
193 case INTERP_CONSTANT:
194 for (i = 0; i < NUM_CHANNELS; i++)
195 cinterp(&exec, attr, i);
196 break;
197
198 case INTERP_LINEAR:
199 for (i = 0; i < NUM_CHANNELS; i++)
200 linterp(&exec, attr, i);
201 break;
202
203 case INTERP_PERSPECTIVE:
204 for (i = 0; i < NUM_CHANNELS; i++)
205 pinterp(&exec, attr, i);
206 break;
207 }
208 }
209
210 #if 1
211 /*softpipe->run_fs( tri->fp, quad, &tri->outputs );*/
212
213 {
214 struct tgsi_exec_machine machine;
215 struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1];
216 struct tgsi_exec_vector *aoutputs;
217 GLuint i;
218
219 #if !ALIGNED_ATTRIBS
220 struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1];
221 struct tgsi_exec_vector *ainputs;
222 #endif
223
224 #ifdef DEBUG
225 memset(&machine, 0, sizeof(machine));
226 #endif
227
228 /* init machine state */
229 tgsi_exec_machine_init(
230 &machine,
231 softpipe->fs.tokens,
232 PIPE_MAX_SAMPLERS, qss->samplers);
233
234 /* Consts does not require 16 byte alignment. */
235 machine.Consts = softpipe->fs.constants->constant;
236
237 aoutputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs );
238 machine.Outputs = aoutputs;
239
240 assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) );
241
242 #if ALIGNED_ATTRIBS
243 machine.Inputs = (struct tgsi_exec_vector *) exec.attr;
244
245 for (i = 0; i < softpipe->nr_attrs; i++) {
246 /* Make sure fp_attr_to_slot[] is an identity transform. */
247 assert( softpipe->fp_attr_to_slot[i] == i );
248 }
249 #else
250 ainputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs );
251 machine.Inputs = ainputs;
252
253 /* load input registers */
254 for (i = 0; i < softpipe->nr_attrs; i++) {
255 #if 01
256 /* Make sure fp_attr_to_slot[] is an identity transform. */
257 /*
258 assert( softpipe->fp_attr_to_slot[i] == i );
259 */
260 memcpy(
261 &ainputs[i],
262 exec.attr[i],
263 sizeof( ainputs[0] ) );
264 #else
265 memcpy(
266 &ainputs[i],
267 exec.attr[softpipe->fp_attr_to_slot[i]],
268 sizeof( ainputs[0] ) );
269 #endif
270 }
271 #endif
272
273 /* run shader */
274 tgsi_exec_machine_run( &machine );
275
276 /* store result color */
277 memcpy(quad->outputs.color,
278 &aoutputs[FRAG_ATTRIB_COL0].xyzw[0].f[0],
279 sizeof(quad->outputs.color));
280 if (softpipe->need_z) {
281 /* XXX temporary */
282 quad->outputs.depth[0] = exec.attr[0][2][0];
283 quad->outputs.depth[1] = exec.attr[0][2][1];
284 quad->outputs.depth[2] = exec.attr[0][2][2];
285 quad->outputs.depth[3] = exec.attr[0][2][3];
286 }
287 }
288 #else
289 {
290 GLuint attr = softpipe->fp_attr_to_slot[FRAG_ATTRIB_COL0];
291 assert(attr);
292
293 memcpy(quad->outputs.color,
294 exec.attr[attr],
295 sizeof(quad->outputs.color));
296
297 if (softpipe->need_z) {
298 quad->outputs.depth[0] = exec.attr[0][2][0];
299 quad->outputs.depth[1] = exec.attr[0][2][1];
300 quad->outputs.depth[2] = exec.attr[0][2][2];
301 quad->outputs.depth[3] = exec.attr[0][2][3];
302 }
303 }
304 #endif
305
306 /* shader may cull fragments */
307 if (quad->mask)
308 qs->next->run(qs->next, quad);
309 }
310
311
312 /**
313 * Per-primitive (or per-begin?) setup
314 */
315 static void shade_begin(struct quad_stage *qs)
316 {
317 struct quad_shade_stage *qss = quad_shade_stage(qs);
318 struct softpipe_context *softpipe = qs->softpipe;
319 GLuint i;
320 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
321 qss->samplers[i].state = &softpipe->sampler[i];
322 qss->samplers[i].texture = softpipe->texture[i];
323 qss->samplers[i].get_sample = sp_get_sample;
324 qss->samplers[i].pipe = &softpipe->pipe;
325 /* init cache info here */
326 qss->samplers[i].cache_x =
327 qss->samplers[i].cache_y = -1;
328 qss->samplers[i].cache_level = -1;
329 }
330
331 if (qs->next)
332 qs->next->begin(qs->next);
333 }
334
335
336 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
337 {
338 struct quad_shade_stage *stage = CALLOC_STRUCT(quad_shade_stage);
339
340 stage->stage.softpipe = softpipe;
341 stage->stage.begin = shade_begin;
342 stage->stage.run = shade_quad;
343
344 return &stage->stage;
345 }