i965: Push computation for sampler state batch offsets up a level.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_sampler_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /**
33 * @file brw_sampler_state.c
34 *
35 * This file contains code for emitting SAMPLER_STATE structures, which
36 * specifies filter modes, wrap modes, border color, and so on.
37 */
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42 #include "intel_mipmap_tree.h"
43
44 #include "main/macros.h"
45 #include "main/samplerobj.h"
46
47 uint32_t
48 translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
49 {
50 switch( wrap ) {
51 case GL_REPEAT:
52 return BRW_TEXCOORDMODE_WRAP;
53 case GL_CLAMP:
54 /* GL_CLAMP is the weird mode where coordinates are clamped to
55 * [0.0, 1.0], so linear filtering of coordinates outside of
56 * [0.0, 1.0] give you half edge texel value and half border
57 * color.
58 *
59 * Gen8+ supports this natively.
60 */
61 if (brw->gen >= 8)
62 return GEN8_TEXCOORDMODE_HALF_BORDER;
63
64 /* On Gen4-7.5, we clamp the coordinates in the fragment shader
65 * and set clamp_border here, which gets the result desired.
66 * We just use clamp(_to_edge) for nearest, because for nearest
67 * clamping to 1.0 gives border color instead of the desired
68 * edge texels.
69 */
70 if (using_nearest)
71 return BRW_TEXCOORDMODE_CLAMP;
72 else
73 return BRW_TEXCOORDMODE_CLAMP_BORDER;
74 case GL_CLAMP_TO_EDGE:
75 return BRW_TEXCOORDMODE_CLAMP;
76 case GL_CLAMP_TO_BORDER:
77 return BRW_TEXCOORDMODE_CLAMP_BORDER;
78 case GL_MIRRORED_REPEAT:
79 return BRW_TEXCOORDMODE_MIRROR;
80 case GL_MIRROR_CLAMP_TO_EDGE:
81 return BRW_TEXCOORDMODE_MIRROR_ONCE;
82 default:
83 return BRW_TEXCOORDMODE_WRAP;
84 }
85 }
86
87 /**
88 * Upload SAMPLER_BORDER_COLOR_STATE.
89 */
90 void
91 upload_default_color(struct brw_context *brw,
92 struct gl_sampler_object *sampler,
93 int unit,
94 uint32_t *sdc_offset)
95 {
96 struct gl_context *ctx = &brw->ctx;
97 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
98 struct gl_texture_object *texObj = texUnit->_Current;
99 struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
100 float color[4];
101
102 switch (firstImage->_BaseFormat) {
103 case GL_DEPTH_COMPONENT:
104 /* GL specs that border color for depth textures is taken from the
105 * R channel, while the hardware uses A. Spam R into all the
106 * channels for safety.
107 */
108 color[0] = sampler->BorderColor.f[0];
109 color[1] = sampler->BorderColor.f[0];
110 color[2] = sampler->BorderColor.f[0];
111 color[3] = sampler->BorderColor.f[0];
112 break;
113 case GL_ALPHA:
114 color[0] = 0.0;
115 color[1] = 0.0;
116 color[2] = 0.0;
117 color[3] = sampler->BorderColor.f[3];
118 break;
119 case GL_INTENSITY:
120 color[0] = sampler->BorderColor.f[0];
121 color[1] = sampler->BorderColor.f[0];
122 color[2] = sampler->BorderColor.f[0];
123 color[3] = sampler->BorderColor.f[0];
124 break;
125 case GL_LUMINANCE:
126 color[0] = sampler->BorderColor.f[0];
127 color[1] = sampler->BorderColor.f[0];
128 color[2] = sampler->BorderColor.f[0];
129 color[3] = 1.0;
130 break;
131 case GL_LUMINANCE_ALPHA:
132 color[0] = sampler->BorderColor.f[0];
133 color[1] = sampler->BorderColor.f[0];
134 color[2] = sampler->BorderColor.f[0];
135 color[3] = sampler->BorderColor.f[3];
136 break;
137 default:
138 color[0] = sampler->BorderColor.f[0];
139 color[1] = sampler->BorderColor.f[1];
140 color[2] = sampler->BorderColor.f[2];
141 color[3] = sampler->BorderColor.f[3];
142 break;
143 }
144
145 /* In some cases we use an RGBA surface format for GL RGB textures,
146 * where we've initialized the A channel to 1.0. We also have to set
147 * the border color alpha to 1.0 in that case.
148 */
149 if (firstImage->_BaseFormat == GL_RGB)
150 color[3] = 1.0;
151
152 if (brw->gen >= 8) {
153 /* On Broadwell, the border color is represented as four 32-bit floats,
154 * integers, or unsigned values, interpreted according to the surface
155 * format. This matches the sampler->BorderColor union exactly. Since
156 * we use floats both here and in the above reswizzling code, we preserve
157 * the original bit pattern. So we actually handle all three formats.
158 */
159 float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
160 4 * 4, 64, sdc_offset);
161 COPY_4FV(sdc, color);
162 } else if (brw->gen == 5 || brw->gen == 6) {
163 struct gen5_sampler_default_color *sdc;
164
165 sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
166 sizeof(*sdc), 32, sdc_offset);
167
168 memset(sdc, 0, sizeof(*sdc));
169
170 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]);
171 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]);
172 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]);
173 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]);
174
175 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]);
176 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]);
177 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]);
178 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]);
179
180 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]);
181 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]);
182 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]);
183 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]);
184
185 sdc->hf[0] = _mesa_float_to_half(color[0]);
186 sdc->hf[1] = _mesa_float_to_half(color[1]);
187 sdc->hf[2] = _mesa_float_to_half(color[2]);
188 sdc->hf[3] = _mesa_float_to_half(color[3]);
189
190 sdc->b[0] = sdc->s[0] >> 8;
191 sdc->b[1] = sdc->s[1] >> 8;
192 sdc->b[2] = sdc->s[2] >> 8;
193 sdc->b[3] = sdc->s[3] >> 8;
194
195 sdc->f[0] = color[0];
196 sdc->f[1] = color[1];
197 sdc->f[2] = color[2];
198 sdc->f[3] = color[3];
199 } else {
200 float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
201 4 * 4, 32, sdc_offset);
202 memcpy(sdc, color, 4 * 4);
203 }
204 }
205
206 /**
207 * Sets the sampler state for a single unit based off of the sampler key
208 * entry.
209 */
210 static void
211 brw_update_sampler_state(struct brw_context *brw,
212 int unit,
213 struct brw_sampler_state *sampler,
214 uint32_t batch_offset_for_sampler_state)
215 {
216 struct gl_context *ctx = &brw->ctx;
217 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
218 struct gl_texture_object *texObj = texUnit->_Current;
219 struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
220 bool using_nearest = false;
221
222 /* These don't use samplers at all. */
223 if (texObj->Target == GL_TEXTURE_BUFFER)
224 return;
225
226 switch (gl_sampler->MinFilter) {
227 case GL_NEAREST:
228 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
229 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
230 using_nearest = true;
231 break;
232 case GL_LINEAR:
233 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
234 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
235 break;
236 case GL_NEAREST_MIPMAP_NEAREST:
237 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
238 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
239 break;
240 case GL_LINEAR_MIPMAP_NEAREST:
241 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
242 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
243 break;
244 case GL_NEAREST_MIPMAP_LINEAR:
245 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
246 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
247 break;
248 case GL_LINEAR_MIPMAP_LINEAR:
249 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
250 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
251 break;
252 default:
253 break;
254 }
255
256 /* Set Anisotropy:
257 */
258 if (gl_sampler->MaxAnisotropy > 1.0) {
259 sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
260 sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
261
262 if (gl_sampler->MaxAnisotropy > 2.0) {
263 sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
264 BRW_ANISORATIO_16);
265 }
266 }
267 else {
268 switch (gl_sampler->MagFilter) {
269 case GL_NEAREST:
270 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
271 using_nearest = true;
272 break;
273 case GL_LINEAR:
274 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
275 break;
276 default:
277 break;
278 }
279 }
280
281 sampler->ss1.r_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapR,
282 using_nearest);
283 sampler->ss1.s_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapS,
284 using_nearest);
285 sampler->ss1.t_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapT,
286 using_nearest);
287
288 if (brw->gen >= 6 &&
289 sampler->ss0.min_filter != sampler->ss0.mag_filter)
290 sampler->ss0.min_mag_neq = 1;
291
292 /* Cube-maps on 965 and later must use the same wrap mode for all 3
293 * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
294 */
295 if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
296 texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
297 if ((ctx->Texture.CubeMapSeamless || gl_sampler->CubeMapSeamless) &&
298 (gl_sampler->MinFilter != GL_NEAREST ||
299 gl_sampler->MagFilter != GL_NEAREST)) {
300 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
301 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
302 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
303 } else {
304 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
305 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
306 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
307 }
308 } else if (texObj->Target == GL_TEXTURE_1D) {
309 /* There's a bug in 1D texture sampling - it actually pays
310 * attention to the wrap_t value, though it should not.
311 * Override the wrap_t value here to GL_REPEAT to keep
312 * any nonexistent border pixels from floating in.
313 */
314 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
315 }
316
317
318 /* Set shadow function:
319 */
320 if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
321 /* Shadowing is "enabled" by emitting a particular sampler
322 * message (sample_c). So need to recompile WM program when
323 * shadow comparison is enabled on each/any texture unit.
324 */
325 sampler->ss0.shadow_function =
326 intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
327 }
328
329 /* Set LOD bias:
330 */
331 sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
332 gl_sampler->LodBias, -16, 15), 6);
333
334 sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
335 sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
336
337 sampler->ss0.base_level = U_FIXED(0, 1);
338
339 sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
340 sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
341
342 /* On Gen6+, the sampler can handle non-normalized texture
343 * rectangle coordinates natively
344 */
345 if (brw->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
346 sampler->ss3.non_normalized_coord = 1;
347 }
348
349 uint32_t sdc_offset;
350 upload_default_color(brw, gl_sampler, unit, &sdc_offset);
351
352 if (brw->gen >= 6) {
353 sampler->ss2.default_color_pointer = sdc_offset >> 5;
354 } else {
355 /* reloc */
356 sampler->ss2.default_color_pointer =
357 (brw->batch.bo->offset64 + sdc_offset) >> 5;
358
359 drm_intel_bo_emit_reloc(brw->batch.bo,
360 batch_offset_for_sampler_state +
361 offsetof(struct brw_sampler_state, ss2),
362 brw->batch.bo, sdc_offset,
363 I915_GEM_DOMAIN_SAMPLER, 0);
364 }
365
366 if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
367 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
368 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
369 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
370 if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
371 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
372 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
373 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
374 }
375
376
377 static void
378 brw_upload_sampler_state_table(struct brw_context *brw,
379 struct gl_program *prog,
380 struct brw_stage_state *stage_state)
381 {
382 struct gl_context *ctx = &brw->ctx;
383 struct brw_sampler_state *samplers;
384 uint32_t sampler_count = stage_state->sampler_count;
385
386 GLbitfield SamplersUsed = prog->SamplersUsed;
387
388 if (sampler_count == 0)
389 return;
390
391 samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
392 sampler_count * sizeof(*samplers),
393 32, &stage_state->sampler_offset);
394 memset(samplers, 0, sampler_count * sizeof(*samplers));
395
396 for (unsigned s = 0; s < sampler_count; s++) {
397 if (SamplersUsed & (1 << s)) {
398 const unsigned unit = prog->SamplerUnits[s];
399 if (ctx->Texture.Unit[unit]._Current) {
400 uint32_t batch_offset_for_sampler_state =
401 stage_state->sampler_offset + s * sizeof(*samplers);
402 brw_update_sampler_state(brw, unit, &samplers[s],
403 batch_offset_for_sampler_state);
404 }
405 }
406 }
407
408 brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
409 }
410
411 static void
412 brw_upload_fs_samplers(struct brw_context *brw)
413 {
414 /* BRW_NEW_FRAGMENT_PROGRAM */
415 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
416 brw->vtbl.upload_sampler_state_table(brw, fs, &brw->wm.base);
417 }
418
419 const struct brw_tracked_state brw_fs_samplers = {
420 .dirty = {
421 .mesa = _NEW_TEXTURE,
422 .brw = BRW_NEW_BATCH |
423 BRW_NEW_FRAGMENT_PROGRAM,
424 .cache = 0
425 },
426 .emit = brw_upload_fs_samplers,
427 };
428
429 static void
430 brw_upload_vs_samplers(struct brw_context *brw)
431 {
432 /* BRW_NEW_VERTEX_PROGRAM */
433 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
434 brw->vtbl.upload_sampler_state_table(brw, vs, &brw->vs.base);
435 }
436
437
438 const struct brw_tracked_state brw_vs_samplers = {
439 .dirty = {
440 .mesa = _NEW_TEXTURE,
441 .brw = BRW_NEW_BATCH |
442 BRW_NEW_VERTEX_PROGRAM,
443 .cache = 0
444 },
445 .emit = brw_upload_vs_samplers,
446 };
447
448
449 static void
450 brw_upload_gs_samplers(struct brw_context *brw)
451 {
452 /* BRW_NEW_GEOMETRY_PROGRAM */
453 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
454 if (!gs)
455 return;
456
457 brw->vtbl.upload_sampler_state_table(brw, gs, &brw->gs.base);
458 }
459
460
461 const struct brw_tracked_state brw_gs_samplers = {
462 .dirty = {
463 .mesa = _NEW_TEXTURE,
464 .brw = BRW_NEW_BATCH |
465 BRW_NEW_GEOMETRY_PROGRAM,
466 .cache = 0
467 },
468 .emit = brw_upload_gs_samplers,
469 };
470
471
472 void
473 gen4_init_vtable_sampler_functions(struct brw_context *brw)
474 {
475 brw->vtbl.upload_sampler_state_table = brw_upload_sampler_state_table;
476 }