1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 /* Vertices are just an array of floats, with all the attributes
29 * packed. We currently assume a layout like:
31 * attr[0][0..3] - window position
32 * attr[1..n][0..3] - remaining attributes.
34 * Attributes are assumed to be 4 floats wide but are packed so that
35 * all the enabled attributes run contiguously.
38 #include "pipe/p_util.h"
39 #include "pipe/p_defines.h"
40 #include "pipe/p_shader_tokens.h"
42 #include "x86/rtasm/x86sse.h"
45 #include "pipe/llvm/gallivm.h"
48 #include "sp_context.h"
50 #include "sp_headers.h"
52 #include "sp_texture.h"
53 #include "sp_tex_sample.h"
56 struct quad_shade_stage
58 struct quad_stage stage
;
59 struct tgsi_sampler samplers
[PIPE_MAX_SAMPLERS
];
60 struct tgsi_exec_machine machine
;
61 struct tgsi_exec_vector
*inputs
, *outputs
;
62 int colorOutSlot
, depthOutSlot
;
64 struct gallivm_prog
*llvm_prog
;
70 static INLINE
struct quad_shade_stage
*
71 quad_shade_stage(struct quad_stage
*qs
)
73 return (struct quad_shade_stage
*) qs
;
78 * Compute quad X,Y,Z,W for the four fragments in a quad.
79 * Note that we only need to "compute" X and Y for the upper-left fragment.
80 * We could do less work if we're not depth testing, or there's no
81 * perspective-corrected attributes, but that's seldom.
84 setup_pos_vector(const struct tgsi_interp_coef
*coef
,
86 struct tgsi_exec_vector
*quadpos
)
90 quadpos
->xyzw
[0].f
[0] = x
;
92 quadpos
->xyzw
[1].f
[0] = y
;
93 /* do Z and W for all fragments in the quad */
94 for (chan
= 2; chan
< 4; chan
++) {
95 const float dadx
= coef
->dadx
[chan
];
96 const float dady
= coef
->dady
[chan
];
97 const float a0
= coef
->a0
[chan
] + dadx
* x
+ dady
* y
;
98 quadpos
->xyzw
[chan
].f
[0] = a0
;
99 quadpos
->xyzw
[chan
].f
[1] = a0
+ dadx
;
100 quadpos
->xyzw
[chan
].f
[2] = a0
+ dady
;
101 quadpos
->xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
106 typedef void (XSTDCALL
*codegen_function
)(
107 const struct tgsi_exec_vector
*input
,
108 struct tgsi_exec_vector
*output
,
109 float (*constant
)[4],
110 struct tgsi_exec_vector
*temporary
,
111 const struct tgsi_interp_coef
*coef
113 ,const struct tgsi_exec_vector
*quadPos
119 * Execute fragment shader for the four fragments in the quad.
123 struct quad_stage
*qs
,
124 struct quad_header
*quad
)
126 struct quad_shade_stage
*qss
= quad_shade_stage( qs
);
127 struct softpipe_context
*softpipe
= qs
->softpipe
;
128 struct tgsi_exec_machine
*machine
= &qss
->machine
;
130 /* Consts do not require 16 byte alignment. */
131 machine
->Consts
= softpipe
->mapped_constants
[PIPE_SHADER_FRAGMENT
];
133 machine
->InterpCoefs
= quad
->coef
;
135 /* Compute X, Y, Z, W vals for this quad */
136 setup_pos_vector(quad
->posCoef
, (float) quad
->x0
, (float) quad
->y0
, &machine
->QuadPos
);
139 #if defined(__i386__) || defined(__386__)
140 if( softpipe
->use_sse
) {
141 codegen_function func
= (codegen_function
) x86_get_func( &softpipe
->fs
->sse2_program
);
152 quad
->mask
&= ~(machine
->Temps
[TGSI_EXEC_TEMP_KILMASK_I
].xyzw
[TGSI_EXEC_TEMP_KILMASK_C
].u
[0]);
157 quad
->mask
&= tgsi_exec_machine_run( machine
);
160 /* store result color */
161 if (qss
->colorOutSlot
>= 0) {
162 /* XXX need to handle multiple color outputs someday */
163 assert(qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[qss
->colorOutSlot
]
164 == TGSI_SEMANTIC_COLOR
);
167 &machine
->Outputs
[qss
->colorOutSlot
].xyzw
[0].f
[0],
168 sizeof( quad
->outputs
.color
) );
172 * XXX the following code for updating quad->outputs.depth
173 * isn't really needed if we did early z testing.
177 if (qss
->depthOutSlot
>= 0) {
178 /* output[slot] is new Z */
180 for (i
= 0; i
< 4; i
++) {
181 quad
->outputs
.depth
[i
] = machine
->Outputs
[0].xyzw
[2].f
[i
];
185 /* copy input Z (which was interpolated by the executor) to output Z */
187 for (i
= 0; i
< 4; i
++) {
188 quad
->outputs
.depth
[i
] = machine
->Inputs
[0].xyzw
[2].f
[i
];
189 /* XXX not sure the above line is always correct. The following
191 quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i];
196 /* shader may cull fragments */
198 qs
->next
->run( qs
->next
, quad
);
206 shade_quad_llvm(struct quad_stage
*qs
,
207 struct quad_header
*quad
)
209 struct quad_shade_stage
*qss
= quad_shade_stage(qs
);
210 struct softpipe_context
*softpipe
= qs
->softpipe
;
211 float dests
[4][16][4] ALIGN16_ATTRIB
;
212 float inputs
[4][16][4] ALIGN16_ATTRIB
;
213 const float fx
= (float) quad
->x0
;
214 const float fy
= (float) quad
->y0
;
215 struct gallivm_prog
*llvm
= qss
->llvm_prog
;
217 inputs
[0][0][0] = fx
;
218 inputs
[1][0][0] = fx
+ 1.0f
;
219 inputs
[2][0][0] = fx
;
220 inputs
[3][0][0] = fx
+ 1.0f
;
222 inputs
[0][0][1] = fy
;
223 inputs
[1][0][1] = fy
;
224 inputs
[2][0][1] = fy
+ 1.0f
;
225 inputs
[3][0][1] = fy
+ 1.0f
;
227 debug_printf("MASK = %d\n", quad
->mask
);
229 gallivm_prog_inputs_interpolate(llvm
, inputs
, quad
->coef
);
231 for (int i
= 0; i
< 4; ++i
) {
232 for (int j
= 0; j
< 2; ++j
) {
233 debug_printf("IN(%d,%d) [%f %f %f %f]\n", i
, j
,
234 inputs
[i
][j
][0], inputs
[i
][j
][1], inputs
[i
][j
][2], inputs
[i
][j
][3]);
240 gallivm_fragment_shader_exec(llvm
, fx
, fy
, dests
, inputs
,
241 softpipe
->mapped_constants
[PIPE_SHADER_FRAGMENT
],
244 debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
245 dests
[0][0][0], dests
[0][0][1], dests
[0][0][2], dests
[0][0][3],
246 dests
[0][1][0], dests
[0][1][1], dests
[0][1][2], dests
[0][1][3]);
249 /* store result color */
250 if (qss
->colorOutSlot
>= 0) {
252 /* XXX need to handle multiple color outputs someday */
253 assert(qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[qss
->colorOutSlot
]
254 == TGSI_SEMANTIC_COLOR
);
255 for (i
= 0; i
< QUAD_SIZE
; ++i
) {
256 quad
->outputs
.color
[0][i
] = dests
[i
][qss
->colorOutSlot
][0];
257 quad
->outputs
.color
[1][i
] = dests
[i
][qss
->colorOutSlot
][1];
258 quad
->outputs
.color
[2][i
] = dests
[i
][qss
->colorOutSlot
][2];
259 quad
->outputs
.color
[3][i
] = dests
[i
][qss
->colorOutSlot
][3];
263 for (int i
= 0; i
< QUAD_SIZE
; ++i
) {
264 debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i
, qss
->colorOutSlot
,
265 quad
->outputs
.color
[0][i
],
266 quad
->outputs
.color
[1][i
],
267 quad
->outputs
.color
[2][i
],
268 quad
->outputs
.color
[3][i
]);
273 if (qss
->depthOutSlot
>= 0) {
274 /* output[slot] is new Z */
276 for (i
= 0; i
< 4; i
++) {
277 quad
->outputs
.depth
[i
] = dests
[i
][0][2];
281 /* copy input Z (which was interpolated by the executor) to output Z */
283 for (i
= 0; i
< 4; i
++) {
284 quad
->outputs
.depth
[i
] = inputs
[i
][0][2];
288 debug_printf("D [%f, %f, %f, %f] mask = %d\n",
289 quad
->outputs
.depth
[0],
290 quad
->outputs
.depth
[1],
291 quad
->outputs
.depth
[2],
292 quad
->outputs
.depth
[3], quad
->mask
);
295 /* shader may cull fragments */
297 qs
->next
->run( qs
->next
, quad
);
304 * Per-primitive (or per-begin?) setup
306 static void shade_begin(struct quad_stage
*qs
)
308 struct quad_shade_stage
*qss
= quad_shade_stage(qs
);
309 struct softpipe_context
*softpipe
= qs
->softpipe
;
312 /* set TGSI sampler state that varies */
313 for (i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++) {
314 qss
->samplers
[i
].state
= softpipe
->sampler
[i
];
315 qss
->samplers
[i
].texture
= &softpipe
->texture
[i
]->base
;
319 qss
->llvm_prog
= softpipe
->fs
->llvm_prog
;
321 /* XXX only do this if the fragment shader changes... */
322 tgsi_exec_machine_init(&qss
->machine
,
323 softpipe
->fs
->shader
.tokens
,
327 /* find output slots for depth, color */
328 qss
->colorOutSlot
= -1;
329 qss
->depthOutSlot
= -1;
330 for (i
= 0; i
< qss
->stage
.softpipe
->fs
->shader
.num_outputs
; i
++) {
331 switch (qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[i
]) {
332 case TGSI_SEMANTIC_POSITION
:
333 qss
->depthOutSlot
= i
;
335 case TGSI_SEMANTIC_COLOR
:
336 qss
->colorOutSlot
= i
;
341 qs
->next
->begin(qs
->next
);
345 static void shade_destroy(struct quad_stage
*qs
)
347 struct quad_shade_stage
*qss
= (struct quad_shade_stage
*) qs
;
349 tgsi_exec_machine_free_data(&qss
->machine
);
351 FREE( qss
->outputs
);
356 struct quad_stage
*sp_quad_shade_stage( struct softpipe_context
*softpipe
)
358 struct quad_shade_stage
*qss
= CALLOC_STRUCT(quad_shade_stage
);
361 /* allocate storage for program inputs/outputs, aligned to 16 bytes */
362 qss
->inputs
= MALLOC(PIPE_ATTRIB_MAX
* sizeof(*qss
->inputs
) + 16);
363 qss
->outputs
= MALLOC(PIPE_ATTRIB_MAX
* sizeof(*qss
->outputs
) + 16);
364 qss
->machine
.Inputs
= align16(qss
->inputs
);
365 qss
->machine
.Outputs
= align16(qss
->outputs
);
367 qss
->stage
.softpipe
= softpipe
;
368 qss
->stage
.begin
= shade_begin
;
370 /* disable until ported to accept
372 qss->stage.run = shade_quad_llvm;
374 softpipe
->use_sse
= FALSE
;
375 qss
->stage
.run
= shade_quad
;
377 qss
->stage
.run
= shade_quad
;
379 qss
->stage
.destroy
= shade_destroy
;
381 /* set TGSI sampler state that's constant */
382 for (i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++) {
383 assert(softpipe
->tex_cache
[i
]);
384 qss
->samplers
[i
].get_samples
= sp_get_samples
;
385 qss
->samplers
[i
].pipe
= &softpipe
->pipe
;
386 qss
->samplers
[i
].cache
= softpipe
->tex_cache
[i
];