51d0eb5bacbc2549ab55c5c627d2b44effce4383
[mesa.git] / src / gallium / drivers / svga / svga_state_fs.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_inlines.h"
27 #include "pipe/p_defines.h"
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_bitmask.h"
31 #include "tgsi/tgsi_ureg.h"
32
33 #include "svga_context.h"
34 #include "svga_state.h"
35 #include "svga_cmd.h"
36 #include "svga_resource_texture.h"
37 #include "svga_tgsi.h"
38
39 #include "svga_hw_reg.h"
40
41
42
43 static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
44 const struct svga_fs_compile_key *b )
45 {
46 unsigned keysize_a = svga_fs_key_size( a );
47 unsigned keysize_b = svga_fs_key_size( b );
48
49 if (keysize_a != keysize_b) {
50 return (int)(keysize_a - keysize_b);
51 }
52 return memcmp( a, b, keysize_a );
53 }
54
55
56 static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs,
57 const struct svga_fs_compile_key *key )
58 {
59 struct svga_shader_result *result = fs->base.results;
60
61 assert(key);
62
63 for ( ; result; result = result->next) {
64 if (compare_fs_keys( key, &result->key.fkey ) == 0)
65 return result;
66 }
67
68 return NULL;
69 }
70
71
72 /**
73 * If we fail to compile a fragment shader (because it uses too many
74 * registers, for example) we'll use a dummy/fallback shader that
75 * simply emits a constant color.
76 */
77 static const struct tgsi_token *
78 get_dummy_fragment_shader(void)
79 {
80 static const float red[4] = { 1.0, 0.0, 0.0, 0.0 };
81 struct ureg_program *ureg;
82 const struct tgsi_token *tokens;
83 struct ureg_src src;
84 struct ureg_dst dst;
85 unsigned num_tokens;
86
87 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
88 if (!ureg)
89 return NULL;
90
91 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
92 src = ureg_DECL_immediate(ureg, red, 4);
93 ureg_MOV(ureg, dst, src);
94 ureg_END(ureg);
95
96 tokens = ureg_get_tokens(ureg, &num_tokens);
97
98 ureg_destroy(ureg);
99
100 return tokens;
101 }
102
103
104 static enum pipe_error compile_fs( struct svga_context *svga,
105 struct svga_fragment_shader *fs,
106 const struct svga_fs_compile_key *key,
107 struct svga_shader_result **out_result )
108 {
109 struct svga_shader_result *result;
110 enum pipe_error ret = PIPE_ERROR;
111
112 result = svga_translate_fragment_program( fs, key );
113 if (result == NULL) {
114 /* some problem during translation, try the dummy shader */
115 const struct tgsi_token *dummy = get_dummy_fragment_shader();
116 if (!dummy) {
117 ret = PIPE_ERROR_OUT_OF_MEMORY;
118 goto fail;
119 }
120 debug_printf("Failed to compile fragment shader, using dummy shader instead.\n");
121 FREE((void *) fs->base.tokens);
122 fs->base.tokens = dummy;
123 result = svga_translate_fragment_program(fs, key);
124 if (result == NULL) {
125 ret = PIPE_ERROR;
126 goto fail;
127 }
128 }
129
130 result->id = util_bitmask_add(svga->fs_bm);
131 if(result->id == UTIL_BITMASK_INVALID_INDEX) {
132 ret = PIPE_ERROR_OUT_OF_MEMORY;
133 goto fail;
134 }
135
136 ret = SVGA3D_DefineShader(svga->swc,
137 result->id,
138 SVGA3D_SHADERTYPE_PS,
139 result->tokens,
140 result->nr_tokens * sizeof result->tokens[0]);
141 if (ret != PIPE_OK)
142 goto fail;
143
144 *out_result = result;
145 result->next = fs->base.results;
146 fs->base.results = result;
147 return PIPE_OK;
148
149 fail:
150 if (result) {
151 if (result->id != UTIL_BITMASK_INVALID_INDEX)
152 util_bitmask_clear( svga->fs_bm, result->id );
153 svga_destroy_shader_result( result );
154 }
155 return ret;
156 }
157
158
159 /* SVGA_NEW_TEXTURE_BINDING
160 * SVGA_NEW_RAST
161 * SVGA_NEW_NEED_SWTNL
162 * SVGA_NEW_SAMPLER
163 */
164 static enum pipe_error
165 make_fs_key(const struct svga_context *svga,
166 struct svga_fragment_shader *fs,
167 struct svga_fs_compile_key *key)
168 {
169 unsigned i;
170 int idx = 0;
171
172 memset(key, 0, sizeof *key);
173
174 /* Only need fragment shader fixup for twoside lighting if doing
175 * hwtnl. Otherwise the draw module does the whole job for us.
176 *
177 * SVGA_NEW_SWTNL
178 */
179 if (!svga->state.sw.need_swtnl) {
180 /* SVGA_NEW_RAST
181 */
182 key->light_twoside = svga->curr.rast->templ.light_twoside;
183 key->front_ccw = svga->curr.rast->templ.front_ccw;
184 }
185
186 /* The blend workaround for simulating logicop xor behaviour
187 * requires that the incoming fragment color be white. This change
188 * achieves that by creating a variant of the current fragment
189 * shader that overrides all output colors with 1,1,1,1
190 *
191 * This will work for most shaders, including those containing
192 * TEXKIL and/or depth-write. However, it will break on the
193 * combination of xor-logicop plus alphatest.
194 *
195 * Ultimately, we could implement alphatest in the shader using
196 * texkil prior to overriding the outgoing fragment color.
197 *
198 * SVGA_NEW_BLEND
199 */
200 if (svga->curr.blend->need_white_fragments) {
201 key->white_fragments = 1;
202 }
203
204 /* XXX: want to limit this to the textures that the shader actually
205 * refers to.
206 *
207 * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
208 */
209 for (i = 0; i < svga->curr.num_sampler_views; i++) {
210 if (svga->curr.sampler_views[i]) {
211 assert(svga->curr.sampler[i]);
212 assert(svga->curr.sampler_views[i]->texture);
213 key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
214 if (!svga->curr.sampler[i]->normalized_coords) {
215 key->tex[i].width_height_idx = idx++;
216 key->tex[i].unnormalized = TRUE;
217 ++key->num_unnormalized_coords;
218 }
219
220 key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
221 key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
222 key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
223 key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
224 }
225 }
226 key->num_textures = svga->curr.num_sampler_views;
227
228 idx = 0;
229 for (i = 0; i < svga->curr.num_samplers; ++i) {
230 if (svga->curr.sampler_views[i]) {
231 struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
232 struct svga_texture *stex = svga_texture(tex);
233 SVGA3dSurfaceFormat format = stex->key.format;
234
235 if (format == SVGA3D_Z_D16 ||
236 format == SVGA3D_Z_D24X8 ||
237 format == SVGA3D_Z_D24S8) {
238 /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
239 * or SVGA3D_Z_D24S8 surface, we'll automatically get
240 * shadow comparison. But we only get LEQUAL mode.
241 * Set TEX_COMPARE_NONE here so we don't emit the extra FS
242 * code for shadow comparison.
243 */
244 key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
245 key->tex[i].compare_func = PIPE_FUNC_NEVER;
246 /* These depth formats _only_ support comparison mode and
247 * not ordinary sampling so warn if the later is expected.
248 */
249 if (svga->curr.sampler[i]->compare_mode !=
250 PIPE_TEX_COMPARE_R_TO_TEXTURE) {
251 debug_warn_once("Unsupported shadow compare mode");
252 }
253 /* The only supported comparison mode is LEQUAL */
254 if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
255 debug_warn_once("Unsupported shadow compare function");
256 }
257 }
258 else {
259 /* For other texture formats, just use the compare func/mode
260 * as-is. Should be no-ops for color textures. For depth
261 * textures, we do not get automatic depth compare. We have
262 * to do it ourselves in the shader. And we don't get PCF.
263 */
264 key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
265 key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
266 }
267 }
268 }
269
270 /* sprite coord gen state */
271 for (i = 0; i < svga->curr.num_samplers; ++i) {
272 key->tex[i].sprite_texgen =
273 svga->curr.rast->templ.sprite_coord_enable & (1 << i);
274 }
275
276 key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
277 == PIPE_SPRITE_COORD_LOWER_LEFT);
278
279 return PIPE_OK;
280 }
281
282
283
284 static enum pipe_error
285 emit_hw_fs(struct svga_context *svga, unsigned dirty)
286 {
287 struct svga_shader_result *result = NULL;
288 unsigned id = SVGA3D_INVALID_ID;
289 enum pipe_error ret = PIPE_OK;
290
291 struct svga_fragment_shader *fs = svga->curr.fs;
292 struct svga_fs_compile_key key;
293
294 /* SVGA_NEW_BLEND
295 * SVGA_NEW_TEXTURE_BINDING
296 * SVGA_NEW_RAST
297 * SVGA_NEW_NEED_SWTNL
298 * SVGA_NEW_SAMPLER
299 */
300 ret = make_fs_key( svga, fs, &key );
301 if (ret != PIPE_OK)
302 return ret;
303
304 result = search_fs_key( fs, &key );
305 if (!result) {
306 ret = compile_fs( svga, fs, &key, &result );
307 if (ret != PIPE_OK)
308 return ret;
309 }
310
311 assert (result);
312 id = result->id;
313
314 assert(id != SVGA3D_INVALID_ID);
315
316 if (result != svga->state.hw_draw.fs) {
317 ret = SVGA3D_SetShader(svga->swc,
318 SVGA3D_SHADERTYPE_PS,
319 id );
320 if (ret != PIPE_OK)
321 return ret;
322
323 svga->dirty |= SVGA_NEW_FS_RESULT;
324 svga->state.hw_draw.fs = result;
325 }
326
327 return PIPE_OK;
328 }
329
330 struct svga_tracked_state svga_hw_fs =
331 {
332 "fragment shader (hwtnl)",
333 (SVGA_NEW_FS |
334 SVGA_NEW_TEXTURE_BINDING |
335 SVGA_NEW_NEED_SWTNL |
336 SVGA_NEW_RAST |
337 SVGA_NEW_SAMPLER |
338 SVGA_NEW_BLEND),
339 emit_hw_fs
340 };
341
342
343