1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 /* Vertices are just an array of floats, with all the attributes
29 * packed. We currently assume a layout like:
31 * attr[0][0..3] - window position
32 * attr[1..n][0..3] - remaining attributes.
34 * Attributes are assumed to be 4 floats wide but are packed so that
35 * all the enabled attributes run contiguously.
38 #include "pipe/p_util.h"
39 #include "pipe/p_defines.h"
41 #include "x86/rtasm/x86sse.h"
44 #include "pipe/llvm/gallivm.h"
47 #include "sp_context.h"
49 #include "sp_headers.h"
51 #include "sp_tex_sample.h"
54 struct quad_shade_stage
56 struct quad_stage stage
;
57 struct tgsi_sampler samplers
[PIPE_MAX_SAMPLERS
];
58 struct tgsi_exec_machine machine
;
59 struct tgsi_exec_vector
*inputs
, *outputs
;
60 int colorOutSlot
, depthOutSlot
;
62 struct gallivm_prog
*llvm_prog
;
68 static INLINE
struct quad_shade_stage
*
69 quad_shade_stage(struct quad_stage
*qs
)
71 return (struct quad_shade_stage
*) qs
;
75 typedef void (XSTDCALL
*codegen_function
)(
76 const struct tgsi_exec_vector
*input
,
77 struct tgsi_exec_vector
*output
,
79 struct tgsi_exec_vector
*temporary
,
80 const struct tgsi_interp_coef
*coef
);
82 /* This should be done by the fragment shader execution unit (code
83 * generated from the decl instructions). Do it here for now.
87 struct quad_stage
*qs
,
88 struct quad_header
*quad
)
90 struct quad_shade_stage
*qss
= quad_shade_stage( qs
);
91 struct softpipe_context
*softpipe
= qs
->softpipe
;
92 const float fx
= (float) quad
->x0
;
93 const float fy
= (float) quad
->y0
;
94 struct tgsi_exec_machine
*machine
= &qss
->machine
;
96 /* Consts does not require 16 byte alignment. */
97 machine
->Consts
= softpipe
->mapped_constants
[PIPE_SHADER_FRAGMENT
];
99 machine
->SamplerUnits
= softpipe
->sampler_units
;
100 machine
->InterpCoefs
= quad
->coef
;
102 machine
->Inputs
[0].xyzw
[0].f
[0] = fx
;
103 machine
->Inputs
[0].xyzw
[0].f
[1] = fx
+ 1.0f
;
104 machine
->Inputs
[0].xyzw
[0].f
[2] = fx
;
105 machine
->Inputs
[0].xyzw
[0].f
[3] = fx
+ 1.0f
;
107 /* XXX for OpenGL we need to invert the Y pos here (y=0=top).
108 * but that'll mess up linear/perspective interpolation of other
111 machine
->Inputs
[0].xyzw
[1].f
[0] = fy
;
112 machine
->Inputs
[0].xyzw
[1].f
[1] = fy
;
113 machine
->Inputs
[0].xyzw
[1].f
[2] = fy
+ 1.0f
;
114 machine
->Inputs
[0].xyzw
[1].f
[3] = fy
+ 1.0f
;
117 #if defined(__i386__) || defined(__386__)
118 if( softpipe
->use_sse
) {
119 codegen_function func
= (codegen_function
) x86_get_func( &softpipe
->fs
->sse2_program
);
125 machine
->InterpCoefs
);
126 quad
->mask
&= ~(machine
->Temps
[TGSI_EXEC_TEMP_KILMASK_I
].xyzw
[TGSI_EXEC_TEMP_KILMASK_C
].u
[0]);
131 quad
->mask
&= tgsi_exec_machine_run( machine
);
134 /* store result color */
135 if (qss
->colorOutSlot
>= 0) {
136 /* XXX need to handle multiple color outputs someday */
137 assert(qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[qss
->colorOutSlot
]
138 == TGSI_SEMANTIC_COLOR
);
141 &machine
->Outputs
[qss
->colorOutSlot
].xyzw
[0].f
[0],
142 sizeof( quad
->outputs
.color
) );
146 if (qss
->depthOutSlot
>= 0) {
147 /* output[slot] is new Z */
149 for (i
= 0; i
< 4; i
++) {
150 quad
->outputs
.depth
[i
] = machine
->Outputs
[0].xyzw
[2].f
[i
];
154 /* copy input Z (which was interpolated by the executor) to output Z */
156 for (i
= 0; i
< 4; i
++) {
157 quad
->outputs
.depth
[i
] = machine
->Inputs
[0].xyzw
[2].f
[i
];
161 /* shader may cull fragments */
163 qs
->next
->run( qs
->next
, quad
);
170 shade_quad_llvm(struct quad_stage
*qs
,
171 struct quad_header
*quad
)
173 struct quad_shade_stage
*qss
= quad_shade_stage(qs
);
174 struct softpipe_context
*softpipe
= qs
->softpipe
;
175 float dests
[4][16][4];
176 const float fx
= (float) quad
->x0
;
177 const float fy
= (float) quad
->y0
;
178 struct gallivm_prog
*llvm
= qss
->llvm_prog
;
179 float inputs
[4][16][4];
180 memset(inputs
, 0, sizeof(inputs
));
182 inputs
[0][0][0] = fx
;
183 inputs
[1][0][0] = fx
+ 1.0f
;
184 inputs
[2][0][0] = fx
;
185 inputs
[3][0][0] = fx
+ 1.0f
;
187 inputs
[0][0][1] = fy
;
188 inputs
[1][0][1] = fy
;
189 inputs
[2][0][1] = fy
+ 1.0f
;
190 inputs
[3][0][1] = fy
+ 1.0f
;
192 printf("MASK = %d\n", quad
->mask
);
194 gallivm_prog_inputs_interpolate(llvm
, inputs
, quad
->coef
);
196 for (int i
= 0; i
< 4; ++i
) {
197 for (int j
= 0; j
< 2; ++j
) {
198 printf("IN(%d,%d) [%f %f %f %f]\n", i
, j
,
199 inputs
[i
][j
][0], inputs
[i
][j
][1], inputs
[i
][j
][2], inputs
[i
][j
][3]);
205 gallivm_fragment_shader_exec(llvm
, fx
, fy
, dests
, inputs
,
206 softpipe
->mapped_constants
[PIPE_SHADER_FRAGMENT
],
207 qss
->samplers
, softpipe
->sampler_units
);
209 printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
210 dests
[0][0][0], dests
[0][0][1], dests
[0][0][2], dests
[0][0][3],
211 dests
[0][1][0], dests
[0][1][1], dests
[0][1][2], dests
[0][1][3]);
214 /* store result color */
215 if (qss
->colorOutSlot
>= 0) {
217 /* XXX need to handle multiple color outputs someday */
218 assert(qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[qss
->colorOutSlot
]
219 == TGSI_SEMANTIC_COLOR
);
220 for (i
= 0; i
< QUAD_SIZE
; ++i
) {
221 quad
->outputs
.color
[0][i
] = dests
[i
][qss
->colorOutSlot
][0];
222 quad
->outputs
.color
[1][i
] = dests
[i
][qss
->colorOutSlot
][1];
223 quad
->outputs
.color
[2][i
] = dests
[i
][qss
->colorOutSlot
][2];
224 quad
->outputs
.color
[3][i
] = dests
[i
][qss
->colorOutSlot
][3];
228 for (int i
= 0; i
< QUAD_SIZE
; ++i
) {
229 printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i
, qss
->colorOutSlot
,
230 quad
->outputs
.color
[0][i
],
231 quad
->outputs
.color
[1][i
],
232 quad
->outputs
.color
[2][i
],
233 quad
->outputs
.color
[3][i
]);
238 if (qss
->depthOutSlot
>= 0) {
239 /* output[slot] is new Z */
241 for (i
= 0; i
< 4; i
++) {
242 quad
->outputs
.depth
[i
] = dests
[i
][0][2];
246 /* copy input Z (which was interpolated by the executor) to output Z */
248 for (i
= 0; i
< 4; i
++) {
249 quad
->outputs
.depth
[i
] = inputs
[i
][0][2];
253 printf("D [%f, %f, %f, %f] mask = %d\n",
254 quad
->outputs
.depth
[0],
255 quad
->outputs
.depth
[1],
256 quad
->outputs
.depth
[2],
257 quad
->outputs
.depth
[3], quad
->mask
);
260 /* shader may cull fragments */
262 qs
->next
->run( qs
->next
, quad
);
268 * Per-primitive (or per-begin?) setup
270 static void shade_begin(struct quad_stage
*qs
)
272 struct quad_shade_stage
*qss
= quad_shade_stage(qs
);
273 struct softpipe_context
*softpipe
= qs
->softpipe
;
276 /* set TGSI sampler state that varies */
277 for (i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++) {
278 qss
->samplers
[i
].state
= softpipe
->sampler
[i
];
279 qss
->samplers
[i
].texture
= softpipe
->texture
[i
];
283 qss
->llvm_prog
= softpipe
->fs
->llvm_prog
;
285 /* XXX only do this if the fragment shader changes... */
286 tgsi_exec_machine_init(&qss
->machine
,
287 softpipe
->fs
->shader
.tokens
,
291 /* find output slots for depth, color */
292 qss
->colorOutSlot
= -1;
293 qss
->depthOutSlot
= -1;
294 for (i
= 0; i
< qss
->stage
.softpipe
->fs
->shader
.num_outputs
; i
++) {
295 switch (qss
->stage
.softpipe
->fs
->shader
.output_semantic_name
[i
]) {
296 case TGSI_SEMANTIC_POSITION
:
297 qss
->depthOutSlot
= i
;
299 case TGSI_SEMANTIC_COLOR
:
300 qss
->colorOutSlot
= i
;
306 qs
->next
->begin(qs
->next
);
310 static void shade_destroy(struct quad_stage
*qs
)
312 struct quad_shade_stage
*qss
= (struct quad_shade_stage
*) qs
;
315 FREE( qss
->outputs
);
320 struct quad_stage
*sp_quad_shade_stage( struct softpipe_context
*softpipe
)
322 struct quad_shade_stage
*qss
= CALLOC_STRUCT(quad_shade_stage
);
325 /* allocate storage for program inputs/outputs, aligned to 16 bytes */
326 qss
->inputs
= MALLOC(PIPE_ATTRIB_MAX
* sizeof(*qss
->inputs
) + 16);
327 qss
->outputs
= MALLOC(PIPE_ATTRIB_MAX
* sizeof(*qss
->outputs
) + 16);
328 qss
->machine
.Inputs
= align16(qss
->inputs
);
329 qss
->machine
.Outputs
= align16(qss
->outputs
);
331 qss
->stage
.softpipe
= softpipe
;
332 qss
->stage
.begin
= shade_begin
;
334 qss
->stage
.run
= shade_quad_llvm
;
336 qss
->stage
.run
= shade_quad
;
338 qss
->stage
.destroy
= shade_destroy
;
340 /* set TGSI sampler state that's constant */
341 for (i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++) {
342 assert(softpipe
->tex_cache
[i
]);
343 qss
->samplers
[i
].get_samples
= sp_get_samples
;
344 qss
->samplers
[i
].pipe
= &softpipe
->pipe
;
345 qss
->samplers
[i
].cache
= softpipe
->tex_cache
[i
];