ed14dac18e4566487618ee31ee40cdfd2054ad59
[mesa.git] / src / mesa / pipe / softpipe / sp_quad_fs.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Vertices are just an array of floats, with all the attributes
29 * packed. We currently assume a layout like:
30 *
31 * attr[0][0..3] - window position
32 * attr[1..n][0..3] - remaining attributes.
33 *
34 * Attributes are assumed to be 4 floats wide but are packed so that
35 * all the enabled attributes run contiguously.
36 */
37
38 #include "pipe/p_util.h"
39 #include "pipe/p_defines.h"
40
41 #include "x86/rtasm/x86sse.h"
42
43 #ifdef MESA_LLVM
44 #include "pipe/llvm/gallivm.h"
45 #endif
46
47 #include "sp_context.h"
48 #include "sp_state.h"
49 #include "sp_headers.h"
50 #include "sp_quad.h"
51 #include "sp_tex_sample.h"
52
53
54 struct quad_shade_stage
55 {
56 struct quad_stage stage;
57 struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
58 struct tgsi_exec_machine machine;
59 struct tgsi_exec_vector *inputs, *outputs;
60 int colorOutSlot, depthOutSlot;
61 #ifdef MESA_LLVM
62 struct gallivm_prog *llvm_prog;
63 #endif
64 };
65
66
67 /** cast wrapper */
68 static INLINE struct quad_shade_stage *
69 quad_shade_stage(struct quad_stage *qs)
70 {
71 return (struct quad_shade_stage *) qs;
72 }
73
74
75 typedef void (XSTDCALL *codegen_function)(
76 const struct tgsi_exec_vector *input,
77 struct tgsi_exec_vector *output,
78 float (*constant)[4],
79 struct tgsi_exec_vector *temporary,
80 const struct tgsi_interp_coef *coef );
81
82 /* This should be done by the fragment shader execution unit (code
83 * generated from the decl instructions). Do it here for now.
84 */
85 static void
86 shade_quad(
87 struct quad_stage *qs,
88 struct quad_header *quad )
89 {
90 struct quad_shade_stage *qss = quad_shade_stage( qs );
91 struct softpipe_context *softpipe = qs->softpipe;
92 const float fx = (float) quad->x0;
93 const float fy = (float) quad->y0;
94 struct tgsi_exec_machine *machine = &qss->machine;
95
96 /* Consts does not require 16 byte alignment. */
97 machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
98
99 machine->SamplerUnits = softpipe->sampler_units;
100 machine->InterpCoefs = quad->coef;
101
102 machine->Inputs[0].xyzw[0].f[0] = fx;
103 machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f;
104 machine->Inputs[0].xyzw[0].f[2] = fx;
105 machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f;
106
107 /* XXX for OpenGL we need to invert the Y pos here (y=0=top).
108 * but that'll mess up linear/perspective interpolation of other
109 * attributes...
110 */
111 machine->Inputs[0].xyzw[1].f[0] = fy;
112 machine->Inputs[0].xyzw[1].f[1] = fy;
113 machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f;
114 machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f;
115
116 /* run shader */
117 #if defined(__i386__) || defined(__386__)
118 if( softpipe->use_sse ) {
119 codegen_function func = (codegen_function) x86_get_func( &softpipe->fs->sse2_program );
120 func(
121 machine->Inputs,
122 machine->Outputs,
123 machine->Consts,
124 machine->Temps,
125 machine->InterpCoefs );
126 quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
127 }
128 else
129 #endif
130 {
131 quad->mask &= tgsi_exec_machine_run( machine );
132 }
133
134 /* store result color */
135 if (qss->colorOutSlot >= 0) {
136 /* XXX need to handle multiple color outputs someday */
137 assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
138 == TGSI_SEMANTIC_COLOR);
139 memcpy(
140 quad->outputs.color,
141 &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
142 sizeof( quad->outputs.color ) );
143 }
144
145 /* store result Z */
146 if (qss->depthOutSlot >= 0) {
147 /* output[slot] is new Z */
148 uint i;
149 for (i = 0; i < 4; i++) {
150 quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
151 }
152 }
153 else {
154 /* copy input Z (which was interpolated by the executor) to output Z */
155 uint i;
156 for (i = 0; i < 4; i++) {
157 quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
158 }
159 }
160
161 /* shader may cull fragments */
162 if( quad->mask ) {
163 qs->next->run( qs->next, quad );
164 }
165 }
166
167 #ifdef MESA_LLVM
168 #define DLLVM 0
169 static void
170 shade_quad_llvm(struct quad_stage *qs,
171 struct quad_header *quad)
172 {
173 struct quad_shade_stage *qss = quad_shade_stage(qs);
174 struct softpipe_context *softpipe = qs->softpipe;
175 float dests[4][16][4];
176 const float fx = (float) quad->x0;
177 const float fy = (float) quad->y0;
178 struct gallivm_prog *llvm = qss->llvm_prog;
179 float inputs[4][16][4];
180 memset(inputs, 0, sizeof(inputs));
181
182 inputs[0][0][0] = fx;
183 inputs[1][0][0] = fx + 1.0f;
184 inputs[2][0][0] = fx;
185 inputs[3][0][0] = fx + 1.0f;
186
187 inputs[0][0][1] = fy;
188 inputs[1][0][1] = fy;
189 inputs[2][0][1] = fy + 1.0f;
190 inputs[3][0][1] = fy + 1.0f;
191 #if DLLVM
192 printf("MASK = %d\n", quad->mask);
193 #endif
194 gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
195 #if DLLVM
196 for (int i = 0; i < 4; ++i) {
197 for (int j = 0; j < 2; ++j) {
198 printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
199 inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
200 }
201 }
202 #endif
203
204 /*quad->mask &=*/
205 gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
206 softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
207 qss->samplers, softpipe->sampler_units);
208 #if DLLVM
209 printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
210 dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
211 dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
212 #endif
213
214 /* store result color */
215 if (qss->colorOutSlot >= 0) {
216 unsigned i;
217 /* XXX need to handle multiple color outputs someday */
218 assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
219 == TGSI_SEMANTIC_COLOR);
220 for (i = 0; i < QUAD_SIZE; ++i) {
221 quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
222 quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
223 quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
224 quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
225 }
226 }
227 #if DLLVM
228 for (int i = 0; i < QUAD_SIZE; ++i) {
229 printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
230 quad->outputs.color[0][i],
231 quad->outputs.color[1][i],
232 quad->outputs.color[2][i],
233 quad->outputs.color[3][i]);
234 }
235 #endif
236
237 /* store result Z */
238 if (qss->depthOutSlot >= 0) {
239 /* output[slot] is new Z */
240 uint i;
241 for (i = 0; i < 4; i++) {
242 quad->outputs.depth[i] = dests[i][0][2];
243 }
244 }
245 else {
246 /* copy input Z (which was interpolated by the executor) to output Z */
247 uint i;
248 for (i = 0; i < 4; i++) {
249 quad->outputs.depth[i] = inputs[i][0][2];
250 }
251 }
252 #if DLLVM
253 printf("D [%f, %f, %f, %f] mask = %d\n",
254 quad->outputs.depth[0],
255 quad->outputs.depth[1],
256 quad->outputs.depth[2],
257 quad->outputs.depth[3], quad->mask);
258 #endif
259
260 /* shader may cull fragments */
261 if( quad->mask ) {
262 qs->next->run( qs->next, quad );
263 }
264 }
265 #endif /*MESA_LLVM*/
266
267 /**
268 * Per-primitive (or per-begin?) setup
269 */
270 static void shade_begin(struct quad_stage *qs)
271 {
272 struct quad_shade_stage *qss = quad_shade_stage(qs);
273 struct softpipe_context *softpipe = qs->softpipe;
274 unsigned i;
275
276 /* set TGSI sampler state that varies */
277 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
278 qss->samplers[i].state = softpipe->sampler[i];
279 qss->samplers[i].texture = softpipe->texture[i];
280 }
281
282 #ifdef MESA_LLVM
283 qss->llvm_prog = softpipe->fs->llvm_prog;
284 #endif
285 /* XXX only do this if the fragment shader changes... */
286 tgsi_exec_machine_init(&qss->machine,
287 softpipe->fs->shader.tokens,
288 PIPE_MAX_SAMPLERS,
289 qss->samplers );
290
291 /* find output slots for depth, color */
292 qss->colorOutSlot = -1;
293 qss->depthOutSlot = -1;
294 for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) {
295 switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) {
296 case TGSI_SEMANTIC_POSITION:
297 qss->depthOutSlot = i;
298 break;
299 case TGSI_SEMANTIC_COLOR:
300 qss->colorOutSlot = i;
301 break;
302 }
303 }
304
305 if (qs->next)
306 qs->next->begin(qs->next);
307 }
308
309
310 static void shade_destroy(struct quad_stage *qs)
311 {
312 struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
313
314 FREE( qss->inputs );
315 FREE( qss->outputs );
316 FREE( qs );
317 }
318
319
320 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
321 {
322 struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
323 uint i;
324
325 /* allocate storage for program inputs/outputs, aligned to 16 bytes */
326 qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16);
327 qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16);
328 qss->machine.Inputs = align16(qss->inputs);
329 qss->machine.Outputs = align16(qss->outputs);
330
331 qss->stage.softpipe = softpipe;
332 qss->stage.begin = shade_begin;
333 #ifdef MESA_LLVM
334 qss->stage.run = shade_quad_llvm;
335 #else
336 qss->stage.run = shade_quad;
337 #endif
338 qss->stage.destroy = shade_destroy;
339
340 /* set TGSI sampler state that's constant */
341 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
342 assert(softpipe->tex_cache[i]);
343 qss->samplers[i].get_samples = sp_get_samples;
344 qss->samplers[i].pipe = &softpipe->pipe;
345 qss->samplers[i].cache = softpipe->tex_cache[i];
346 }
347
348 return &qss->stage;
349 }