Code reorganization: move files into their places.
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_fs.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Vertices are just an array of floats, with all the attributes
29 * packed. We currently assume a layout like:
30 *
31 * attr[0][0..3] - window position
32 * attr[1..n][0..3] - remaining attributes.
33 *
34 * Attributes are assumed to be 4 floats wide but are packed so that
35 * all the enabled attributes run contiguously.
36 */
37
38 #include "pipe/p_util.h"
39 #include "pipe/p_defines.h"
40 #include "pipe/p_shader_tokens.h"
41
42 #include "x86/rtasm/x86sse.h"
43
44 #ifdef MESA_LLVM
45 #include "pipe/llvm/gallivm.h"
46 #endif
47
48 #include "sp_context.h"
49 #include "sp_state.h"
50 #include "sp_headers.h"
51 #include "sp_quad.h"
52 #include "sp_texture.h"
53 #include "sp_tex_sample.h"
54
55
56 struct quad_shade_stage
57 {
58 struct quad_stage stage;
59 struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
60 struct tgsi_exec_machine machine;
61 struct tgsi_exec_vector *inputs, *outputs;
62 int colorOutSlot, depthOutSlot;
63 #ifdef MESA_LLVM
64 struct gallivm_prog *llvm_prog;
65 #endif
66 };
67
68
69 /** cast wrapper */
70 static INLINE struct quad_shade_stage *
71 quad_shade_stage(struct quad_stage *qs)
72 {
73 return (struct quad_shade_stage *) qs;
74 }
75
76
77 /**
78 * Compute quad X,Y,Z,W for the four fragments in a quad.
79 * Note that we only need to "compute" X and Y for the upper-left fragment.
80 * We could do less work if we're not depth testing, or there's no
81 * perspective-corrected attributes, but that's seldom.
82 */
83 static void
84 setup_pos_vector(const struct tgsi_interp_coef *coef,
85 float x, float y,
86 struct tgsi_exec_vector *quadpos)
87 {
88 uint chan;
89 /* do X */
90 quadpos->xyzw[0].f[0] = x;
91 /* do Y */
92 quadpos->xyzw[1].f[0] = y;
93 /* do Z and W for all fragments in the quad */
94 for (chan = 2; chan < 4; chan++) {
95 const float dadx = coef->dadx[chan];
96 const float dady = coef->dady[chan];
97 const float a0 = coef->a0[chan] + dadx * x + dady * y;
98 quadpos->xyzw[chan].f[0] = a0;
99 quadpos->xyzw[chan].f[1] = a0 + dadx;
100 quadpos->xyzw[chan].f[2] = a0 + dady;
101 quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
102 }
103 }
104
105
106 typedef void (XSTDCALL *codegen_function)(
107 const struct tgsi_exec_vector *input,
108 struct tgsi_exec_vector *output,
109 float (*constant)[4],
110 struct tgsi_exec_vector *temporary,
111 const struct tgsi_interp_coef *coef
112 #if 0
113 ,const struct tgsi_exec_vector *quadPos
114 #endif
115 );
116
117
118 /**
119 * Execute fragment shader for the four fragments in the quad.
120 */
121 static void
122 shade_quad(
123 struct quad_stage *qs,
124 struct quad_header *quad )
125 {
126 struct quad_shade_stage *qss = quad_shade_stage( qs );
127 struct softpipe_context *softpipe = qs->softpipe;
128 struct tgsi_exec_machine *machine = &qss->machine;
129
130 /* Consts do not require 16 byte alignment. */
131 machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
132
133 machine->InterpCoefs = quad->coef;
134
135 /* Compute X, Y, Z, W vals for this quad */
136 setup_pos_vector(quad->posCoef, (float) quad->x0, (float) quad->y0, &machine->QuadPos);
137
138 /* run shader */
139 #if defined(__i386__) || defined(__386__)
140 if( softpipe->use_sse ) {
141 codegen_function func = (codegen_function) x86_get_func( &softpipe->fs->sse2_program );
142 func(
143 machine->Inputs,
144 machine->Outputs,
145 machine->Consts,
146 machine->Temps,
147 machine->InterpCoefs
148 #if 0
149 ,machine->QuadPos
150 #endif
151 );
152 quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
153 }
154 else
155 #endif
156 {
157 quad->mask &= tgsi_exec_machine_run( machine );
158 }
159
160 /* store result color */
161 if (qss->colorOutSlot >= 0) {
162 /* XXX need to handle multiple color outputs someday */
163 assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
164 == TGSI_SEMANTIC_COLOR);
165 memcpy(
166 quad->outputs.color,
167 &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
168 sizeof( quad->outputs.color ) );
169 }
170
171 /*
172 * XXX the following code for updating quad->outputs.depth
173 * isn't really needed if we did early z testing.
174 */
175
176 /* store result Z */
177 if (qss->depthOutSlot >= 0) {
178 /* output[slot] is new Z */
179 uint i;
180 for (i = 0; i < 4; i++) {
181 quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
182 }
183 }
184 else {
185 /* copy input Z (which was interpolated by the executor) to output Z */
186 uint i;
187 for (i = 0; i < 4; i++) {
188 quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
189 /* XXX not sure the above line is always correct. The following
190 * might be better:
191 quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i];
192 */
193 }
194 }
195
196 /* shader may cull fragments */
197 if( quad->mask ) {
198 qs->next->run( qs->next, quad );
199 }
200 }
201
202 #if 0
203 #ifdef MESA_LLVM
204 #define DLLVM 0
205 static void
206 shade_quad_llvm(struct quad_stage *qs,
207 struct quad_header *quad)
208 {
209 struct quad_shade_stage *qss = quad_shade_stage(qs);
210 struct softpipe_context *softpipe = qs->softpipe;
211 float dests[4][16][4] ALIGN16_ATTRIB;
212 float inputs[4][16][4] ALIGN16_ATTRIB;
213 const float fx = (float) quad->x0;
214 const float fy = (float) quad->y0;
215 struct gallivm_prog *llvm = qss->llvm_prog;
216
217 inputs[0][0][0] = fx;
218 inputs[1][0][0] = fx + 1.0f;
219 inputs[2][0][0] = fx;
220 inputs[3][0][0] = fx + 1.0f;
221
222 inputs[0][0][1] = fy;
223 inputs[1][0][1] = fy;
224 inputs[2][0][1] = fy + 1.0f;
225 inputs[3][0][1] = fy + 1.0f;
226 #if DLLVM
227 debug_printf("MASK = %d\n", quad->mask);
228 #endif
229 gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
230 #if DLLVM
231 for (int i = 0; i < 4; ++i) {
232 for (int j = 0; j < 2; ++j) {
233 debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
234 inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
235 }
236 }
237 #endif
238
239 quad->mask &=
240 gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
241 softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
242 qss->samplers);
243 #if DLLVM
244 debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
245 dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
246 dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
247 #endif
248
249 /* store result color */
250 if (qss->colorOutSlot >= 0) {
251 unsigned i;
252 /* XXX need to handle multiple color outputs someday */
253 assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
254 == TGSI_SEMANTIC_COLOR);
255 for (i = 0; i < QUAD_SIZE; ++i) {
256 quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
257 quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
258 quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
259 quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
260 }
261 }
262 #if DLLVM
263 for (int i = 0; i < QUAD_SIZE; ++i) {
264 debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
265 quad->outputs.color[0][i],
266 quad->outputs.color[1][i],
267 quad->outputs.color[2][i],
268 quad->outputs.color[3][i]);
269 }
270 #endif
271
272 /* store result Z */
273 if (qss->depthOutSlot >= 0) {
274 /* output[slot] is new Z */
275 uint i;
276 for (i = 0; i < 4; i++) {
277 quad->outputs.depth[i] = dests[i][0][2];
278 }
279 }
280 else {
281 /* copy input Z (which was interpolated by the executor) to output Z */
282 uint i;
283 for (i = 0; i < 4; i++) {
284 quad->outputs.depth[i] = inputs[i][0][2];
285 }
286 }
287 #if DLLVM
288 debug_printf("D [%f, %f, %f, %f] mask = %d\n",
289 quad->outputs.depth[0],
290 quad->outputs.depth[1],
291 quad->outputs.depth[2],
292 quad->outputs.depth[3], quad->mask);
293 #endif
294
295 /* shader may cull fragments */
296 if( quad->mask ) {
297 qs->next->run( qs->next, quad );
298 }
299 }
300 #endif /*MESA_LLVM*/
301 #endif
302
303 /**
304 * Per-primitive (or per-begin?) setup
305 */
306 static void shade_begin(struct quad_stage *qs)
307 {
308 struct quad_shade_stage *qss = quad_shade_stage(qs);
309 struct softpipe_context *softpipe = qs->softpipe;
310 unsigned i;
311
312 /* set TGSI sampler state that varies */
313 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
314 qss->samplers[i].state = softpipe->sampler[i];
315 qss->samplers[i].texture = &softpipe->texture[i]->base;
316 }
317
318 #ifdef MESA_LLVM
319 qss->llvm_prog = softpipe->fs->llvm_prog;
320 #endif
321 /* XXX only do this if the fragment shader changes... */
322 tgsi_exec_machine_init(&qss->machine,
323 softpipe->fs->shader.tokens,
324 PIPE_MAX_SAMPLERS,
325 qss->samplers );
326
327 /* find output slots for depth, color */
328 qss->colorOutSlot = -1;
329 qss->depthOutSlot = -1;
330 for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) {
331 switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) {
332 case TGSI_SEMANTIC_POSITION:
333 qss->depthOutSlot = i;
334 break;
335 case TGSI_SEMANTIC_COLOR:
336 qss->colorOutSlot = i;
337 break;
338 }
339 }
340
341 qs->next->begin(qs->next);
342 }
343
344
345 static void shade_destroy(struct quad_stage *qs)
346 {
347 struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
348
349 tgsi_exec_machine_free_data(&qss->machine);
350 FREE( qss->inputs );
351 FREE( qss->outputs );
352 FREE( qs );
353 }
354
355
356 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
357 {
358 struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
359 uint i;
360
361 /* allocate storage for program inputs/outputs, aligned to 16 bytes */
362 qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16);
363 qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16);
364 qss->machine.Inputs = align16(qss->inputs);
365 qss->machine.Outputs = align16(qss->outputs);
366
367 qss->stage.softpipe = softpipe;
368 qss->stage.begin = shade_begin;
369 #ifdef MESA_LLVM
370 /* disable until ported to accept
371 * x/y and soa layout
372 qss->stage.run = shade_quad_llvm;
373 */
374 softpipe->use_sse = FALSE;
375 qss->stage.run = shade_quad;
376 #else
377 qss->stage.run = shade_quad;
378 #endif
379 qss->stage.destroy = shade_destroy;
380
381 /* set TGSI sampler state that's constant */
382 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
383 assert(softpipe->tex_cache[i]);
384 qss->samplers[i].get_samples = sp_get_samples;
385 qss->samplers[i].pipe = &softpipe->pipe;
386 qss->samplers[i].cache = softpipe->tex_cache[i];
387 }
388
389 return &qss->stage;
390 }