ac/gpu_info: add has_unaligned_shader_loads
[mesa.git] / src / gallium / drivers / svga / svga_pipe_sampler.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "pipe/p_defines.h"
27 #include "util/u_bitmask.h"
28 #include "util/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_math.h"
31 #include "util/u_memory.h"
32 #include "tgsi/tgsi_parse.h"
33
34 #include "svga_context.h"
35 #include "svga_cmd.h"
36 #include "svga_debug.h"
37 #include "svga_resource_texture.h"
38 #include "svga_surface.h"
39 #include "svga_sampler_view.h"
40
41
42 static inline unsigned
43 translate_wrap_mode(unsigned wrap)
44 {
45 switch (wrap) {
46 case PIPE_TEX_WRAP_REPEAT:
47 return SVGA3D_TEX_ADDRESS_WRAP;
48 case PIPE_TEX_WRAP_CLAMP:
49 return SVGA3D_TEX_ADDRESS_CLAMP;
50 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
51 /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by
52 * hardware.
53 */
54 return SVGA3D_TEX_ADDRESS_CLAMP;
55 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
56 return SVGA3D_TEX_ADDRESS_BORDER;
57 case PIPE_TEX_WRAP_MIRROR_REPEAT:
58 return SVGA3D_TEX_ADDRESS_MIRROR;
59 case PIPE_TEX_WRAP_MIRROR_CLAMP:
60 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
61 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
62 return SVGA3D_TEX_ADDRESS_MIRRORONCE;
63 default:
64 assert(0);
65 return SVGA3D_TEX_ADDRESS_WRAP;
66 }
67 }
68
69
70 static inline unsigned
71 translate_img_filter(unsigned filter)
72 {
73 switch (filter) {
74 case PIPE_TEX_FILTER_NEAREST:
75 return SVGA3D_TEX_FILTER_NEAREST;
76 case PIPE_TEX_FILTER_LINEAR:
77 return SVGA3D_TEX_FILTER_LINEAR;
78 default:
79 assert(0);
80 return SVGA3D_TEX_FILTER_NEAREST;
81 }
82 }
83
84
85 static inline unsigned
86 translate_mip_filter(unsigned filter)
87 {
88 switch (filter) {
89 case PIPE_TEX_MIPFILTER_NONE:
90 return SVGA3D_TEX_FILTER_NONE;
91 case PIPE_TEX_MIPFILTER_NEAREST:
92 return SVGA3D_TEX_FILTER_NEAREST;
93 case PIPE_TEX_MIPFILTER_LINEAR:
94 return SVGA3D_TEX_FILTER_LINEAR;
95 default:
96 assert(0);
97 return SVGA3D_TEX_FILTER_NONE;
98 }
99 }
100
101
102 static uint8
103 translate_comparison_func(unsigned func)
104 {
105 switch (func) {
106 case PIPE_FUNC_NEVER:
107 return SVGA3D_COMPARISON_NEVER;
108 case PIPE_FUNC_LESS:
109 return SVGA3D_COMPARISON_LESS;
110 case PIPE_FUNC_EQUAL:
111 return SVGA3D_COMPARISON_EQUAL;
112 case PIPE_FUNC_LEQUAL:
113 return SVGA3D_COMPARISON_LESS_EQUAL;
114 case PIPE_FUNC_GREATER:
115 return SVGA3D_COMPARISON_GREATER;
116 case PIPE_FUNC_NOTEQUAL:
117 return SVGA3D_COMPARISON_NOT_EQUAL;
118 case PIPE_FUNC_GEQUAL:
119 return SVGA3D_COMPARISON_GREATER_EQUAL;
120 case PIPE_FUNC_ALWAYS:
121 return SVGA3D_COMPARISON_ALWAYS;
122 default:
123 assert(!"Invalid comparison function");
124 return SVGA3D_COMPARISON_ALWAYS;
125 }
126 }
127
128
129 /**
130 * Translate filtering state to vgpu10 format.
131 */
132 static SVGA3dFilter
133 translate_filter_mode(unsigned img_filter,
134 unsigned min_filter,
135 unsigned mag_filter,
136 boolean anisotropic,
137 boolean compare)
138 {
139 SVGA3dFilter mode = 0;
140
141 if (img_filter == PIPE_TEX_FILTER_LINEAR)
142 mode |= SVGA3D_FILTER_MIP_LINEAR;
143 if (min_filter == PIPE_TEX_FILTER_LINEAR)
144 mode |= SVGA3D_FILTER_MIN_LINEAR;
145 if (mag_filter == PIPE_TEX_FILTER_LINEAR)
146 mode |= SVGA3D_FILTER_MAG_LINEAR;
147 if (anisotropic)
148 mode |= SVGA3D_FILTER_ANISOTROPIC;
149 if (compare)
150 mode |= SVGA3D_FILTER_COMPARE;
151
152 return mode;
153 }
154
155
156 /**
157 * Define a vgpu10 sampler state.
158 */
159 static void
160 define_sampler_state_object(struct svga_context *svga,
161 struct svga_sampler_state *ss,
162 const struct pipe_sampler_state *ps)
163 {
164 uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
165 boolean anisotropic;
166 uint8 compare_func;
167 SVGA3dFilter filter;
168 SVGA3dRGBAFloat bcolor;
169 unsigned try;
170 float min_lod, max_lod;
171
172 assert(svga_have_vgpu10(svga));
173
174 anisotropic = ss->aniso_level > 1.0f;
175
176 filter = translate_filter_mode(ps->min_mip_filter,
177 ps->min_img_filter,
178 ps->mag_img_filter,
179 anisotropic,
180 ss->compare_mode);
181
182 compare_func = translate_comparison_func(ss->compare_func);
183
184 COPY_4V(bcolor.value, ps->border_color.f);
185
186 assert(ps->min_lod <= ps->max_lod);
187
188 if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
189 /* just use the base level image */
190 min_lod = max_lod = 0.0f;
191 }
192 else {
193 min_lod = ps->min_lod;
194 max_lod = ps->max_lod;
195 }
196
197 /* If shadow comparisons are enabled, create two sampler states: one
198 * with the given shadow compare mode, another with shadow comparison off.
199 * We need the later because in some cases, we have to do the shadow
200 * compare in the shader. So, we don't want to do it twice.
201 */
202 STATIC_ASSERT(PIPE_TEX_COMPARE_NONE == 0);
203 STATIC_ASSERT(PIPE_TEX_COMPARE_R_TO_TEXTURE == 1);
204 ss->id[1] = SVGA3D_INVALID_ID;
205
206 unsigned i;
207 for (i = 0; i <= ss->compare_mode; i++) {
208 ss->id[i] = util_bitmask_add(svga->sampler_object_id_bm);
209
210 /* Loop in case command buffer is full and we need to flush and retry */
211 for (try = 0; try < 2; try++) {
212 enum pipe_error ret =
213 SVGA3D_vgpu10_DefineSamplerState(svga->swc,
214 ss->id[i],
215 filter,
216 ss->addressu,
217 ss->addressv,
218 ss->addressw,
219 ss->lod_bias, /* float */
220 max_aniso,
221 compare_func,
222 bcolor,
223 min_lod, /* float */
224 max_lod); /* float */
225 if (ret == PIPE_OK)
226 break;
227 svga_context_flush(svga, NULL);
228 }
229
230 /* turn off the shadow compare option for second iteration */
231 filter &= ~SVGA3D_FILTER_COMPARE;
232 }
233 }
234
235
236 static void *
237 svga_create_sampler_state(struct pipe_context *pipe,
238 const struct pipe_sampler_state *sampler)
239 {
240 struct svga_context *svga = svga_context(pipe);
241 struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );
242
243 if (!cso)
244 return NULL;
245
246 cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
247 cso->magfilter = translate_img_filter( sampler->mag_img_filter );
248 cso->minfilter = translate_img_filter( sampler->min_img_filter );
249 cso->aniso_level = MAX2( sampler->max_anisotropy, 1 );
250 if (sampler->max_anisotropy)
251 cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC;
252 cso->lod_bias = sampler->lod_bias;
253 cso->addressu = translate_wrap_mode(sampler->wrap_s);
254 cso->addressv = translate_wrap_mode(sampler->wrap_t);
255 cso->addressw = translate_wrap_mode(sampler->wrap_r);
256 cso->normalized_coords = sampler->normalized_coords;
257 cso->compare_mode = sampler->compare_mode;
258 cso->compare_func = sampler->compare_func;
259
260 {
261 uint32 r = float_to_ubyte(sampler->border_color.f[0]);
262 uint32 g = float_to_ubyte(sampler->border_color.f[1]);
263 uint32 b = float_to_ubyte(sampler->border_color.f[2]);
264 uint32 a = float_to_ubyte(sampler->border_color.f[3]);
265
266 cso->bordercolor = (a << 24) | (r << 16) | (g << 8) | b;
267 }
268
269 /* No SVGA3D support for:
270 * - min/max LOD clamping
271 */
272 cso->min_lod = 0;
273 cso->view_min_lod = MAX2((int) (sampler->min_lod + 0.5), 0);
274 cso->view_max_lod = MAX2((int) (sampler->max_lod + 0.5), 0);
275
276 /* Use min_mipmap */
277 if (svga->debug.use_min_mipmap) {
278 if (cso->view_min_lod == cso->view_max_lod) {
279 cso->min_lod = cso->view_min_lod;
280 cso->view_min_lod = 0;
281 cso->view_max_lod = 1000; /* Just a high number */
282 cso->mipfilter = SVGA3D_TEX_FILTER_NONE;
283 }
284 }
285
286 if (svga_have_vgpu10(svga)) {
287 define_sampler_state_object(svga, cso, sampler);
288 }
289
290 SVGA_DBG(DEBUG_SAMPLERS,
291 "New sampler: min %u, view(min %u, max %u) lod, mipfilter %s\n",
292 cso->min_lod, cso->view_min_lod, cso->view_max_lod,
293 cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
294
295 svga->hud.num_sampler_objects++;
296 SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws,
297 SVGA_STATS_COUNT_SAMPLER);
298
299 return cso;
300 }
301
302
303 static void
304 svga_bind_sampler_states(struct pipe_context *pipe,
305 enum pipe_shader_type shader,
306 unsigned start,
307 unsigned num,
308 void **samplers)
309 {
310 struct svga_context *svga = svga_context(pipe);
311 unsigned i;
312 boolean any_change = FALSE;
313
314 assert(shader < PIPE_SHADER_TYPES);
315 assert(start + num <= PIPE_MAX_SAMPLERS);
316
317 /* Pre-VGPU10 only supports FS textures */
318 if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
319 return;
320
321 for (i = 0; i < num; i++) {
322 if (svga->curr.sampler[shader][start + i] != samplers[i])
323 any_change = TRUE;
324 svga->curr.sampler[shader][start + i] = samplers[i];
325 }
326
327 if (!any_change) {
328 return;
329 }
330
331 /* find highest non-null sampler[] entry */
332 {
333 unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
334 while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
335 j--;
336 svga->curr.num_samplers[shader] = j;
337 }
338
339 svga->dirty |= SVGA_NEW_SAMPLER;
340 }
341
342
343 static void
344 svga_delete_sampler_state(struct pipe_context *pipe, void *sampler)
345 {
346 struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
347 struct svga_context *svga = svga_context(pipe);
348
349 if (svga_have_vgpu10(svga)) {
350 unsigned i;
351 for (i = 0; i < 2; i++) {
352 enum pipe_error ret;
353
354 if (ss->id[i] != SVGA3D_INVALID_ID) {
355 svga_hwtnl_flush_retry(svga);
356
357 ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]);
358 if (ret != PIPE_OK) {
359 svga_context_flush(svga, NULL);
360 ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]);
361 }
362 util_bitmask_clear(svga->sampler_object_id_bm, ss->id[i]);
363 }
364 }
365 }
366
367 FREE(sampler);
368 svga->hud.num_sampler_objects--;
369 }
370
371
372 static struct pipe_sampler_view *
373 svga_create_sampler_view(struct pipe_context *pipe,
374 struct pipe_resource *texture,
375 const struct pipe_sampler_view *templ)
376 {
377 struct svga_context *svga = svga_context(pipe);
378 struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);
379
380 if (!sv) {
381 return NULL;
382 }
383
384 sv->base = *templ;
385 sv->base.reference.count = 1;
386 sv->base.texture = NULL;
387 pipe_resource_reference(&sv->base.texture, texture);
388
389 sv->base.context = pipe;
390 sv->id = SVGA3D_INVALID_ID;
391
392 svga->hud.num_samplerview_objects++;
393 SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws,
394 SVGA_STATS_COUNT_SAMPLERVIEW);
395
396 return &sv->base;
397 }
398
399
400 static void
401 svga_sampler_view_destroy(struct pipe_context *pipe,
402 struct pipe_sampler_view *view)
403 {
404 struct svga_context *svga = svga_context(pipe);
405 struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
406
407 if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
408 if (view->context != pipe) {
409 /* The SVGA3D device will generate an error (and on Linux, cause
410 * us to abort) if we try to destroy a shader resource view from
411 * a context other than the one it was created with. Skip the
412 * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
413 * view for now. This should only sometimes happen when a shared
414 * texture is deleted.
415 */
416 _debug_printf("context mismatch in %s\n", __func__);
417 }
418 else {
419 enum pipe_error ret;
420
421 svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
422
423 ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
424 if (ret != PIPE_OK) {
425 svga_context_flush(svga, NULL);
426 ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
427 }
428 util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
429 }
430 }
431
432 pipe_resource_reference(&sv->base.texture, NULL);
433
434 FREE(sv);
435 svga->hud.num_samplerview_objects--;
436 }
437
438
439 static void
440 svga_set_sampler_views(struct pipe_context *pipe,
441 enum pipe_shader_type shader,
442 unsigned start,
443 unsigned num,
444 struct pipe_sampler_view **views)
445 {
446 struct svga_context *svga = svga_context(pipe);
447 unsigned flag_1d = 0;
448 unsigned flag_srgb = 0;
449 unsigned flag_rect = 0;
450 unsigned flag_buf = 0;
451 uint i;
452 boolean any_change = FALSE;
453
454 assert(shader < PIPE_SHADER_TYPES);
455 assert(start + num <= ARRAY_SIZE(svga->curr.sampler_views[shader]));
456
457 /* Pre-VGPU10 only supports FS textures */
458 if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
459 return;
460
461 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SETSAMPLERVIEWS);
462
463 /* This bit of code works around a quirk in the CSO module.
464 * If start=num=0 it means all sampler views should be released.
465 * Note that the CSO module treats sampler views for fragment shaders
466 * differently than other shader types.
467 */
468 if (start == 0 && num == 0 && svga->curr.num_sampler_views[shader] > 0) {
469 for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
470 pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][i]);
471 }
472 any_change = TRUE;
473 }
474
475 for (i = 0; i < num; i++) {
476 enum pipe_texture_target target;
477
478 if (svga->curr.sampler_views[shader][start + i] != views[i]) {
479 /* Note: we're using pipe_sampler_view_release() here to work around
480 * a possible crash when the old view belongs to another context that
481 * was already destroyed.
482 */
483 pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
484 pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
485 views[i]);
486 any_change = TRUE;
487 }
488
489 if (!views[i])
490 continue;
491
492 if (util_format_is_srgb(views[i]->format))
493 flag_srgb |= 1 << (start + i);
494
495 target = views[i]->texture->target;
496 if (target == PIPE_TEXTURE_1D)
497 flag_1d |= 1 << (start + i);
498 else if (target == PIPE_TEXTURE_RECT)
499 flag_rect |= 1 << (start + i);
500 else if (target == PIPE_BUFFER)
501 flag_buf |= 1 << (start + i);
502 }
503
504 if (!any_change) {
505 goto done;
506 }
507
508 /* find highest non-null sampler_views[] entry */
509 {
510 unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
511 while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
512 j--;
513 svga->curr.num_sampler_views[shader] = j;
514 }
515
516 svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
517
518 if (flag_srgb != svga->curr.tex_flags.flag_srgb ||
519 flag_1d != svga->curr.tex_flags.flag_1d) {
520 svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
521 svga->curr.tex_flags.flag_1d = flag_1d;
522 svga->curr.tex_flags.flag_srgb = flag_srgb;
523 }
524
525 if (flag_rect != svga->curr.tex_flags.flag_rect ||
526 flag_buf != svga->curr.tex_flags.flag_buf)
527 {
528 /* Need to re-emit texture constants */
529 svga->dirty |= SVGA_NEW_TEXTURE_CONSTS;
530 svga->curr.tex_flags.flag_rect = flag_rect;
531 svga->curr.tex_flags.flag_buf = flag_buf;
532 }
533
534 /* Check if any of the sampler view resources collide with the framebuffer
535 * color buffers or depth stencil resource. If so, set the NEW_FRAME_BUFFER
536 * dirty bit so that emit_framebuffer can be invoked to create backed view
537 * for the conflicted surface view.
538 */
539 if (svga_check_sampler_framebuffer_resource_collision(svga, shader)) {
540 svga->dirty |= SVGA_NEW_FRAME_BUFFER;
541 }
542
543 done:
544 SVGA_STATS_TIME_POP(svga_sws(svga));
545 }
546
547 /**
548 * Clean up sampler, sampler view state at context destruction time
549 */
550 void
551 svga_cleanup_sampler_state(struct svga_context *svga)
552 {
553 enum pipe_shader_type shader;
554
555 for (shader = 0; shader <= PIPE_SHADER_GEOMETRY; shader++) {
556 unsigned i;
557
558 for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) {
559 pipe_sampler_view_release(&svga->pipe,
560 &svga->state.hw_draw.sampler_views[shader][i]);
561 }
562 }
563
564 /* free polygon stipple state */
565 if (svga->polygon_stipple.sampler) {
566 svga->pipe.delete_sampler_state(&svga->pipe, svga->polygon_stipple.sampler);
567 }
568
569 if (svga->polygon_stipple.sampler_view) {
570 svga->pipe.sampler_view_destroy(&svga->pipe,
571 &svga->polygon_stipple.sampler_view->base);
572 }
573 pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
574 }
575
576 void
577 svga_init_sampler_functions( struct svga_context *svga )
578 {
579 svga->pipe.create_sampler_state = svga_create_sampler_state;
580 svga->pipe.bind_sampler_states = svga_bind_sampler_states;
581 svga->pipe.delete_sampler_state = svga_delete_sampler_state;
582 svga->pipe.set_sampler_views = svga_set_sampler_views;
583 svga->pipe.create_sampler_view = svga_create_sampler_view;
584 svga->pipe.sampler_view_destroy = svga_sampler_view_destroy;
585 }