gallium: begin reworking quad stages for multiple color outputs
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_fs.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Vertices are just an array of floats, with all the attributes
29 * packed. We currently assume a layout like:
30 *
31 * attr[0][0..3] - window position
32 * attr[1..n][0..3] - remaining attributes.
33 *
34 * Attributes are assumed to be 4 floats wide but are packed so that
35 * all the enabled attributes run contiguously.
36 */
37
38 #include "pipe/p_util.h"
39 #include "pipe/p_defines.h"
40 #include "pipe/p_shader_tokens.h"
41
42 #include "sp_context.h"
43 #include "sp_state.h"
44 #include "sp_headers.h"
45 #include "sp_quad.h"
46 #include "sp_texture.h"
47 #include "sp_tex_sample.h"
48
49
50 struct quad_shade_stage
51 {
52 struct quad_stage stage;
53 struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
54 struct tgsi_exec_machine machine;
55 struct tgsi_exec_vector *inputs, *outputs;
56 int colorOutSlot, depthOutSlot;
57 };
58
59
60 /** cast wrapper */
61 static INLINE struct quad_shade_stage *
62 quad_shade_stage(struct quad_stage *qs)
63 {
64 return (struct quad_shade_stage *) qs;
65 }
66
67
68
69 /**
70 * Execute fragment shader for the four fragments in the quad.
71 */
72 static void
73 shade_quad(
74 struct quad_stage *qs,
75 struct quad_header *quad )
76 {
77 struct quad_shade_stage *qss = quad_shade_stage( qs );
78 struct softpipe_context *softpipe = qs->softpipe;
79 struct tgsi_exec_machine *machine = &qss->machine;
80
81 /* Consts do not require 16 byte alignment. */
82 machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
83
84 machine->InterpCoefs = quad->coef;
85
86 /* run shader */
87 quad->mask &= softpipe->fs->run( softpipe->fs,
88 &qss->machine,
89 quad );
90
91 #if 0 /* XXX multi color outputs - untested */
92 /* store outputs */
93 boolean z_written = FALSE;
94 {
95 const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
96 const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
97 const uint n = qss->stage.softpipe->fs->info.num_outputs;
98 uint i;
99 for (i = 0; i < n; i++) {
100 switch (sem_name[i]) {
101 case TGSI_SEMANTIC_COLOR:
102 {
103 uint cbuf = sem_index[i];
104 memcpy(quad->outputs.color[cbuf],
105 &machine->Outputs[i].xyzw[0].f[0],
106 sizeof(quad->outputs.color[0]) );
107 }
108 break;
109 case TGSI_SEMANTIC_POSITION:
110 {
111 uint j;
112 for (j = 0; j < 4; j++) {
113 quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j];
114 }
115 z_written = TRUE;
116 }
117 break;
118 }
119 }
120 }
121
122 if (!z_written) {
123 /* compute Z values now, as in the quad earlyz stage */
124 /* XXX we should really only do this if the earlyz stage is not used */
125 const float fx = (float) quad->x0;
126 const float fy = (float) quad->y0;
127 const float dzdx = quad->posCoef->dadx[2];
128 const float dzdy = quad->posCoef->dady[2];
129 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
130
131 quad->outputs.depth[0] = z0;
132 quad->outputs.depth[1] = z0 + dzdx;
133 quad->outputs.depth[2] = z0 + dzdy;
134 quad->outputs.depth[3] = z0 + dzdx + dzdy;
135 }
136 #endif
137
138 /* store result color(s) */
139 if (qss->colorOutSlot >= 0) {
140 /* XXX need to handle multiple color outputs someday */
141 assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
142 == TGSI_SEMANTIC_COLOR);
143 memcpy(
144 quad->outputs.color[0],
145 &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
146 sizeof( quad->outputs.color[0] ) );
147 }
148
149 /* store result Z */
150 if (qss->depthOutSlot >= 0) {
151 /* output[slot] is new Z */
152 uint i;
153 for (i = 0; i < 4; i++) {
154 quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
155 }
156 }
157 else {
158 /* compute Z values now, as in the quad earlyz stage */
159 /* XXX we should really only do this if the earlyz stage is not used */
160 const float fx = (float) quad->x0;
161 const float fy = (float) quad->y0;
162 const float dzdx = quad->posCoef->dadx[2];
163 const float dzdy = quad->posCoef->dady[2];
164 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
165
166 quad->outputs.depth[0] = z0;
167 quad->outputs.depth[1] = z0 + dzdx;
168 quad->outputs.depth[2] = z0 + dzdy;
169 quad->outputs.depth[3] = z0 + dzdx + dzdy;
170 }
171
172 /* shader may cull fragments */
173 if( quad->mask ) {
174 qs->next->run( qs->next, quad );
175 }
176 }
177
178 /**
179 * Per-primitive (or per-begin?) setup
180 */
181 static void shade_begin(struct quad_stage *qs)
182 {
183 struct quad_shade_stage *qss = quad_shade_stage(qs);
184 struct softpipe_context *softpipe = qs->softpipe;
185 unsigned i;
186 unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
187
188 /* set TGSI sampler state that varies */
189 for (i = 0; i < num; i++) {
190 qss->samplers[i].state = softpipe->sampler[i];
191 qss->samplers[i].texture = softpipe->texture[i];
192 }
193
194 /* find output slots for depth, color */
195 qss->colorOutSlot = -1;
196 qss->depthOutSlot = -1;
197 for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) {
198 switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) {
199 case TGSI_SEMANTIC_POSITION:
200 qss->depthOutSlot = i;
201 break;
202 case TGSI_SEMANTIC_COLOR:
203 qss->colorOutSlot = i;
204 break;
205 }
206 }
207
208 softpipe->fs->prepare( softpipe->fs,
209 &qss->machine,
210 qss->samplers );
211
212 qs->next->begin(qs->next);
213 }
214
215
216 static void shade_destroy(struct quad_stage *qs)
217 {
218 struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
219
220 tgsi_exec_machine_free_data(&qss->machine);
221 FREE( qss->inputs );
222 FREE( qss->outputs );
223 FREE( qs );
224 }
225
226
227 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
228 {
229 struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
230 uint i;
231
232 /* allocate storage for program inputs/outputs, aligned to 16 bytes */
233 qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
234 qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16);
235 qss->machine.Inputs = align16(qss->inputs);
236 qss->machine.Outputs = align16(qss->outputs);
237
238 qss->stage.softpipe = softpipe;
239 qss->stage.begin = shade_begin;
240 qss->stage.run = shade_quad;
241 qss->stage.destroy = shade_destroy;
242
243 /* set TGSI sampler state that's constant */
244 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
245 assert(softpipe->tex_cache[i]);
246 qss->samplers[i].get_samples = sp_get_samples;
247 qss->samplers[i].pipe = &softpipe->pipe;
248 qss->samplers[i].cache = softpipe->tex_cache[i];
249 }
250
251 tgsi_exec_machine_init( &qss->machine );
252
253 return &qss->stage;
254 }