i965: Support GL_CLAMP natively on Broadwell.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_sampler_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_state.h"
35 #include "brw_defines.h"
36 #include "intel_mipmap_tree.h"
37
38 #include "main/macros.h"
39 #include "main/samplerobj.h"
40
41
42 /* Samplers aren't strictly wm state from the hardware's perspective,
43 * but that is the only situation in which we use them in this driver.
44 */
45
46
47
48 uint32_t
49 translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
50 {
51 switch( wrap ) {
52 case GL_REPEAT:
53 return BRW_TEXCOORDMODE_WRAP;
54 case GL_CLAMP:
55 /* GL_CLAMP is the weird mode where coordinates are clamped to
56 * [0.0, 1.0], so linear filtering of coordinates outside of
57 * [0.0, 1.0] give you half edge texel value and half border
58 * color.
59 *
60 * Gen8+ supports this natively.
61 */
62 if (brw->gen >= 8)
63 return GEN8_TEXCOORDMODE_HALF_BORDER;
64
65 /* On Gen4-7.5, we clamp the coordinates in the fragment shader
66 * and set clamp_border here, which gets the result desired.
67 * We just use clamp(_to_edge) for nearest, because for nearest
68 * clamping to 1.0 gives border color instead of the desired
69 * edge texels.
70 */
71 if (using_nearest)
72 return BRW_TEXCOORDMODE_CLAMP;
73 else
74 return BRW_TEXCOORDMODE_CLAMP_BORDER;
75 case GL_CLAMP_TO_EDGE:
76 return BRW_TEXCOORDMODE_CLAMP;
77 case GL_CLAMP_TO_BORDER:
78 return BRW_TEXCOORDMODE_CLAMP_BORDER;
79 case GL_MIRRORED_REPEAT:
80 return BRW_TEXCOORDMODE_MIRROR;
81 case GL_MIRROR_CLAMP_TO_EDGE:
82 return BRW_TEXCOORDMODE_MIRROR_ONCE;
83 default:
84 return BRW_TEXCOORDMODE_WRAP;
85 }
86 }
87
88 /**
89 * Upload SAMPLER_BORDER_COLOR_STATE.
90 */
91 void
92 upload_default_color(struct brw_context *brw,
93 struct gl_sampler_object *sampler,
94 int unit,
95 uint32_t *sdc_offset)
96 {
97 struct gl_context *ctx = &brw->ctx;
98 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
99 struct gl_texture_object *texObj = texUnit->_Current;
100 struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
101 float color[4];
102
103 switch (firstImage->_BaseFormat) {
104 case GL_DEPTH_COMPONENT:
105 /* GL specs that border color for depth textures is taken from the
106 * R channel, while the hardware uses A. Spam R into all the
107 * channels for safety.
108 */
109 color[0] = sampler->BorderColor.f[0];
110 color[1] = sampler->BorderColor.f[0];
111 color[2] = sampler->BorderColor.f[0];
112 color[3] = sampler->BorderColor.f[0];
113 break;
114 case GL_ALPHA:
115 color[0] = 0.0;
116 color[1] = 0.0;
117 color[2] = 0.0;
118 color[3] = sampler->BorderColor.f[3];
119 break;
120 case GL_INTENSITY:
121 color[0] = sampler->BorderColor.f[0];
122 color[1] = sampler->BorderColor.f[0];
123 color[2] = sampler->BorderColor.f[0];
124 color[3] = sampler->BorderColor.f[0];
125 break;
126 case GL_LUMINANCE:
127 color[0] = sampler->BorderColor.f[0];
128 color[1] = sampler->BorderColor.f[0];
129 color[2] = sampler->BorderColor.f[0];
130 color[3] = 1.0;
131 break;
132 case GL_LUMINANCE_ALPHA:
133 color[0] = sampler->BorderColor.f[0];
134 color[1] = sampler->BorderColor.f[0];
135 color[2] = sampler->BorderColor.f[0];
136 color[3] = sampler->BorderColor.f[3];
137 break;
138 default:
139 color[0] = sampler->BorderColor.f[0];
140 color[1] = sampler->BorderColor.f[1];
141 color[2] = sampler->BorderColor.f[2];
142 color[3] = sampler->BorderColor.f[3];
143 break;
144 }
145
146 /* In some cases we use an RGBA surface format for GL RGB textures,
147 * where we've initialized the A channel to 1.0. We also have to set
148 * the border color alpha to 1.0 in that case.
149 */
150 if (firstImage->_BaseFormat == GL_RGB)
151 color[3] = 1.0;
152
153 if (brw->gen >= 8) {
154 /* On Broadwell, the border color is represented as four 32-bit floats,
155 * integers, or unsigned values, interpreted according to the surface
156 * format. This matches the sampler->BorderColor union exactly. Since
157 * we use floats both here and in the above reswizzling code, we preserve
158 * the original bit pattern. So we actually handle all three formats.
159 */
160 float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
161 4 * 4, 64, sdc_offset);
162 COPY_4FV(sdc, color);
163 } else if (brw->gen == 5 || brw->gen == 6) {
164 struct gen5_sampler_default_color *sdc;
165
166 sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
167 sizeof(*sdc), 32, sdc_offset);
168
169 memset(sdc, 0, sizeof(*sdc));
170
171 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]);
172 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]);
173 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]);
174 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]);
175
176 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]);
177 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]);
178 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]);
179 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]);
180
181 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]);
182 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]);
183 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]);
184 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]);
185
186 sdc->hf[0] = _mesa_float_to_half(color[0]);
187 sdc->hf[1] = _mesa_float_to_half(color[1]);
188 sdc->hf[2] = _mesa_float_to_half(color[2]);
189 sdc->hf[3] = _mesa_float_to_half(color[3]);
190
191 sdc->b[0] = sdc->s[0] >> 8;
192 sdc->b[1] = sdc->s[1] >> 8;
193 sdc->b[2] = sdc->s[2] >> 8;
194 sdc->b[3] = sdc->s[3] >> 8;
195
196 sdc->f[0] = color[0];
197 sdc->f[1] = color[1];
198 sdc->f[2] = color[2];
199 sdc->f[3] = color[3];
200 } else {
201 struct brw_sampler_default_color *sdc;
202
203 sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
204 sizeof(*sdc), 32, sdc_offset);
205
206 COPY_4V(sdc->color, color);
207 }
208 }
209
210 /**
211 * Sets the sampler state for a single unit based off of the sampler key
212 * entry.
213 */
214 static void brw_update_sampler_state(struct brw_context *brw,
215 int unit,
216 int ss_index,
217 struct brw_sampler_state *sampler,
218 uint32_t sampler_state_table_offset,
219 uint32_t *sdc_offset)
220 {
221 struct gl_context *ctx = &brw->ctx;
222 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
223 struct gl_texture_object *texObj = texUnit->_Current;
224 struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
225 bool using_nearest = false;
226
227 /* These don't use samplers at all. */
228 if (texObj->Target == GL_TEXTURE_BUFFER)
229 return;
230
231 switch (gl_sampler->MinFilter) {
232 case GL_NEAREST:
233 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
234 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
235 using_nearest = true;
236 break;
237 case GL_LINEAR:
238 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
239 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
240 break;
241 case GL_NEAREST_MIPMAP_NEAREST:
242 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
243 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
244 break;
245 case GL_LINEAR_MIPMAP_NEAREST:
246 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
247 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
248 break;
249 case GL_NEAREST_MIPMAP_LINEAR:
250 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
251 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
252 break;
253 case GL_LINEAR_MIPMAP_LINEAR:
254 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
255 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
256 break;
257 default:
258 break;
259 }
260
261 /* Set Anisotropy:
262 */
263 if (gl_sampler->MaxAnisotropy > 1.0) {
264 sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
265 sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
266
267 if (gl_sampler->MaxAnisotropy > 2.0) {
268 sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
269 BRW_ANISORATIO_16);
270 }
271 }
272 else {
273 switch (gl_sampler->MagFilter) {
274 case GL_NEAREST:
275 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
276 using_nearest = true;
277 break;
278 case GL_LINEAR:
279 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
280 break;
281 default:
282 break;
283 }
284 }
285
286 sampler->ss1.r_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapR,
287 using_nearest);
288 sampler->ss1.s_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapS,
289 using_nearest);
290 sampler->ss1.t_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapT,
291 using_nearest);
292
293 if (brw->gen >= 6 &&
294 sampler->ss0.min_filter != sampler->ss0.mag_filter)
295 sampler->ss0.min_mag_neq = 1;
296
297 /* Cube-maps on 965 and later must use the same wrap mode for all 3
298 * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
299 */
300 if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
301 texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
302 if ((ctx->Texture.CubeMapSeamless || gl_sampler->CubeMapSeamless) &&
303 (gl_sampler->MinFilter != GL_NEAREST ||
304 gl_sampler->MagFilter != GL_NEAREST)) {
305 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
306 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
307 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
308 } else {
309 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
310 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
311 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
312 }
313 } else if (texObj->Target == GL_TEXTURE_1D) {
314 /* There's a bug in 1D texture sampling - it actually pays
315 * attention to the wrap_t value, though it should not.
316 * Override the wrap_t value here to GL_REPEAT to keep
317 * any nonexistent border pixels from floating in.
318 */
319 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
320 }
321
322
323 /* Set shadow function:
324 */
325 if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
326 /* Shadowing is "enabled" by emitting a particular sampler
327 * message (sample_c). So need to recompile WM program when
328 * shadow comparison is enabled on each/any texture unit.
329 */
330 sampler->ss0.shadow_function =
331 intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
332 }
333
334 /* Set LOD bias:
335 */
336 sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
337 gl_sampler->LodBias, -16, 15), 6);
338
339 sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
340 sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
341
342 sampler->ss0.base_level = U_FIXED(0, 1);
343
344 sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
345 sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
346
347 /* On Gen6+, the sampler can handle non-normalized texture
348 * rectangle coordinates natively
349 */
350 if (brw->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
351 sampler->ss3.non_normalized_coord = 1;
352 }
353
354 upload_default_color(brw, gl_sampler, unit, sdc_offset);
355
356 if (brw->gen >= 6) {
357 sampler->ss2.default_color_pointer = *sdc_offset >> 5;
358 } else {
359 /* reloc */
360 sampler->ss2.default_color_pointer = (brw->batch.bo->offset64 +
361 *sdc_offset) >> 5;
362
363 drm_intel_bo_emit_reloc(brw->batch.bo,
364 sampler_state_table_offset +
365 ss_index * sizeof(struct brw_sampler_state) +
366 offsetof(struct brw_sampler_state, ss2),
367 brw->batch.bo, *sdc_offset,
368 I915_GEM_DOMAIN_SAMPLER, 0);
369 }
370
371 if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
372 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
373 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
374 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
375 if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
376 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
377 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
378 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
379 }
380
381
382 static void
383 brw_upload_sampler_state_table(struct brw_context *brw,
384 struct gl_program *prog,
385 struct brw_stage_state *stage_state)
386 {
387 struct gl_context *ctx = &brw->ctx;
388 struct brw_sampler_state *samplers;
389 uint32_t sampler_count = stage_state->sampler_count;
390
391 GLbitfield SamplersUsed = prog->SamplersUsed;
392
393 if (sampler_count == 0)
394 return;
395
396 samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
397 sampler_count * sizeof(*samplers),
398 32, &stage_state->sampler_offset);
399 memset(samplers, 0, sampler_count * sizeof(*samplers));
400
401 for (unsigned s = 0; s < sampler_count; s++) {
402 if (SamplersUsed & (1 << s)) {
403 const unsigned unit = prog->SamplerUnits[s];
404 if (ctx->Texture.Unit[unit]._Current)
405 brw_update_sampler_state(brw, unit, s, &samplers[s],
406 stage_state->sampler_offset,
407 &stage_state->sdc_offset[s]);
408 }
409 }
410
411 brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
412 }
413
414 static void
415 brw_upload_fs_samplers(struct brw_context *brw)
416 {
417 /* BRW_NEW_FRAGMENT_PROGRAM */
418 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
419 brw->vtbl.upload_sampler_state_table(brw, fs, &brw->wm.base);
420 }
421
422 const struct brw_tracked_state brw_fs_samplers = {
423 .dirty = {
424 .mesa = _NEW_TEXTURE,
425 .brw = BRW_NEW_BATCH |
426 BRW_NEW_FRAGMENT_PROGRAM,
427 .cache = 0
428 },
429 .emit = brw_upload_fs_samplers,
430 };
431
432 static void
433 brw_upload_vs_samplers(struct brw_context *brw)
434 {
435 /* BRW_NEW_VERTEX_PROGRAM */
436 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
437 brw->vtbl.upload_sampler_state_table(brw, vs, &brw->vs.base);
438 }
439
440
441 const struct brw_tracked_state brw_vs_samplers = {
442 .dirty = {
443 .mesa = _NEW_TEXTURE,
444 .brw = BRW_NEW_BATCH |
445 BRW_NEW_VERTEX_PROGRAM,
446 .cache = 0
447 },
448 .emit = brw_upload_vs_samplers,
449 };
450
451
452 static void
453 brw_upload_gs_samplers(struct brw_context *brw)
454 {
455 /* BRW_NEW_GEOMETRY_PROGRAM */
456 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
457 if (!gs)
458 return;
459
460 brw->vtbl.upload_sampler_state_table(brw, gs, &brw->gs.base);
461 }
462
463
464 const struct brw_tracked_state brw_gs_samplers = {
465 .dirty = {
466 .mesa = _NEW_TEXTURE,
467 .brw = BRW_NEW_BATCH |
468 BRW_NEW_GEOMETRY_PROGRAM,
469 .cache = 0
470 },
471 .emit = brw_upload_gs_samplers,
472 };
473
474
475 void
476 gen4_init_vtable_sampler_functions(struct brw_context *brw)
477 {
478 brw->vtbl.upload_sampler_state_table = brw_upload_sampler_state_table;
479 }