i965: Shift brw_upload_sampler_state_table away from structures.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_sampler_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /**
33 * @file brw_sampler_state.c
34 *
35 * This file contains code for emitting SAMPLER_STATE structures, which
36 * specifies filter modes, wrap modes, border color, and so on.
37 */
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42 #include "intel_mipmap_tree.h"
43
44 #include "main/macros.h"
45 #include "main/samplerobj.h"
46
47 uint32_t
48 translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
49 {
50 switch( wrap ) {
51 case GL_REPEAT:
52 return BRW_TEXCOORDMODE_WRAP;
53 case GL_CLAMP:
54 /* GL_CLAMP is the weird mode where coordinates are clamped to
55 * [0.0, 1.0], so linear filtering of coordinates outside of
56 * [0.0, 1.0] give you half edge texel value and half border
57 * color.
58 *
59 * Gen8+ supports this natively.
60 */
61 if (brw->gen >= 8)
62 return GEN8_TEXCOORDMODE_HALF_BORDER;
63
64 /* On Gen4-7.5, we clamp the coordinates in the fragment shader
65 * and set clamp_border here, which gets the result desired.
66 * We just use clamp(_to_edge) for nearest, because for nearest
67 * clamping to 1.0 gives border color instead of the desired
68 * edge texels.
69 */
70 if (using_nearest)
71 return BRW_TEXCOORDMODE_CLAMP;
72 else
73 return BRW_TEXCOORDMODE_CLAMP_BORDER;
74 case GL_CLAMP_TO_EDGE:
75 return BRW_TEXCOORDMODE_CLAMP;
76 case GL_CLAMP_TO_BORDER:
77 return BRW_TEXCOORDMODE_CLAMP_BORDER;
78 case GL_MIRRORED_REPEAT:
79 return BRW_TEXCOORDMODE_MIRROR;
80 case GL_MIRROR_CLAMP_TO_EDGE:
81 return BRW_TEXCOORDMODE_MIRROR_ONCE;
82 default:
83 return BRW_TEXCOORDMODE_WRAP;
84 }
85 }
86
87 /**
88 * Upload SAMPLER_BORDER_COLOR_STATE.
89 */
90 void
91 upload_default_color(struct brw_context *brw,
92 struct gl_sampler_object *sampler,
93 int unit,
94 uint32_t *sdc_offset)
95 {
96 struct gl_context *ctx = &brw->ctx;
97 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
98 struct gl_texture_object *texObj = texUnit->_Current;
99 struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
100 float color[4];
101
102 switch (firstImage->_BaseFormat) {
103 case GL_DEPTH_COMPONENT:
104 /* GL specs that border color for depth textures is taken from the
105 * R channel, while the hardware uses A. Spam R into all the
106 * channels for safety.
107 */
108 color[0] = sampler->BorderColor.f[0];
109 color[1] = sampler->BorderColor.f[0];
110 color[2] = sampler->BorderColor.f[0];
111 color[3] = sampler->BorderColor.f[0];
112 break;
113 case GL_ALPHA:
114 color[0] = 0.0;
115 color[1] = 0.0;
116 color[2] = 0.0;
117 color[3] = sampler->BorderColor.f[3];
118 break;
119 case GL_INTENSITY:
120 color[0] = sampler->BorderColor.f[0];
121 color[1] = sampler->BorderColor.f[0];
122 color[2] = sampler->BorderColor.f[0];
123 color[3] = sampler->BorderColor.f[0];
124 break;
125 case GL_LUMINANCE:
126 color[0] = sampler->BorderColor.f[0];
127 color[1] = sampler->BorderColor.f[0];
128 color[2] = sampler->BorderColor.f[0];
129 color[3] = 1.0;
130 break;
131 case GL_LUMINANCE_ALPHA:
132 color[0] = sampler->BorderColor.f[0];
133 color[1] = sampler->BorderColor.f[0];
134 color[2] = sampler->BorderColor.f[0];
135 color[3] = sampler->BorderColor.f[3];
136 break;
137 default:
138 color[0] = sampler->BorderColor.f[0];
139 color[1] = sampler->BorderColor.f[1];
140 color[2] = sampler->BorderColor.f[2];
141 color[3] = sampler->BorderColor.f[3];
142 break;
143 }
144
145 /* In some cases we use an RGBA surface format for GL RGB textures,
146 * where we've initialized the A channel to 1.0. We also have to set
147 * the border color alpha to 1.0 in that case.
148 */
149 if (firstImage->_BaseFormat == GL_RGB)
150 color[3] = 1.0;
151
152 if (brw->gen >= 8) {
153 /* On Broadwell, the border color is represented as four 32-bit floats,
154 * integers, or unsigned values, interpreted according to the surface
155 * format. This matches the sampler->BorderColor union exactly. Since
156 * we use floats both here and in the above reswizzling code, we preserve
157 * the original bit pattern. So we actually handle all three formats.
158 */
159 float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
160 4 * 4, 64, sdc_offset);
161 COPY_4FV(sdc, color);
162 } else if (brw->gen == 5 || brw->gen == 6) {
163 struct gen5_sampler_default_color *sdc;
164
165 sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
166 sizeof(*sdc), 32, sdc_offset);
167
168 memset(sdc, 0, sizeof(*sdc));
169
170 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]);
171 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]);
172 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]);
173 UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]);
174
175 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]);
176 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]);
177 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]);
178 UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]);
179
180 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]);
181 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]);
182 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]);
183 UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]);
184
185 sdc->hf[0] = _mesa_float_to_half(color[0]);
186 sdc->hf[1] = _mesa_float_to_half(color[1]);
187 sdc->hf[2] = _mesa_float_to_half(color[2]);
188 sdc->hf[3] = _mesa_float_to_half(color[3]);
189
190 sdc->b[0] = sdc->s[0] >> 8;
191 sdc->b[1] = sdc->s[1] >> 8;
192 sdc->b[2] = sdc->s[2] >> 8;
193 sdc->b[3] = sdc->s[3] >> 8;
194
195 sdc->f[0] = color[0];
196 sdc->f[1] = color[1];
197 sdc->f[2] = color[2];
198 sdc->f[3] = color[3];
199 } else {
200 float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
201 4 * 4, 32, sdc_offset);
202 memcpy(sdc, color, 4 * 4);
203 }
204 }
205
206 /**
207 * Sets the sampler state for a single unit based off of the sampler key
208 * entry.
209 */
210 static void
211 brw_update_sampler_state(struct brw_context *brw,
212 int unit,
213 struct brw_sampler_state *sampler,
214 uint32_t batch_offset_for_sampler_state)
215 {
216 struct gl_context *ctx = &brw->ctx;
217 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
218 struct gl_texture_object *texObj = texUnit->_Current;
219 struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
220 bool using_nearest = false;
221
222 /* These don't use samplers at all. */
223 if (texObj->Target == GL_TEXTURE_BUFFER)
224 return;
225
226 switch (gl_sampler->MinFilter) {
227 case GL_NEAREST:
228 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
229 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
230 using_nearest = true;
231 break;
232 case GL_LINEAR:
233 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
234 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
235 break;
236 case GL_NEAREST_MIPMAP_NEAREST:
237 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
238 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
239 break;
240 case GL_LINEAR_MIPMAP_NEAREST:
241 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
242 sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
243 break;
244 case GL_NEAREST_MIPMAP_LINEAR:
245 sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
246 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
247 break;
248 case GL_LINEAR_MIPMAP_LINEAR:
249 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
250 sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
251 break;
252 default:
253 break;
254 }
255
256 /* Set Anisotropy:
257 */
258 if (gl_sampler->MaxAnisotropy > 1.0) {
259 sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
260 sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
261
262 if (gl_sampler->MaxAnisotropy > 2.0) {
263 sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
264 BRW_ANISORATIO_16);
265 }
266 }
267 else {
268 switch (gl_sampler->MagFilter) {
269 case GL_NEAREST:
270 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
271 using_nearest = true;
272 break;
273 case GL_LINEAR:
274 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
275 break;
276 default:
277 break;
278 }
279 }
280
281 sampler->ss1.r_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapR,
282 using_nearest);
283 sampler->ss1.s_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapS,
284 using_nearest);
285 sampler->ss1.t_wrap_mode = translate_wrap_mode(brw, gl_sampler->WrapT,
286 using_nearest);
287
288 if (brw->gen >= 6 &&
289 sampler->ss0.min_filter != sampler->ss0.mag_filter)
290 sampler->ss0.min_mag_neq = 1;
291
292 /* Cube-maps on 965 and later must use the same wrap mode for all 3
293 * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
294 */
295 if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
296 texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
297 if ((ctx->Texture.CubeMapSeamless || gl_sampler->CubeMapSeamless) &&
298 (gl_sampler->MinFilter != GL_NEAREST ||
299 gl_sampler->MagFilter != GL_NEAREST)) {
300 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
301 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
302 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
303 } else {
304 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
305 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
306 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
307 }
308 } else if (texObj->Target == GL_TEXTURE_1D) {
309 /* There's a bug in 1D texture sampling - it actually pays
310 * attention to the wrap_t value, though it should not.
311 * Override the wrap_t value here to GL_REPEAT to keep
312 * any nonexistent border pixels from floating in.
313 */
314 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
315 }
316
317
318 /* Set shadow function:
319 */
320 if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
321 /* Shadowing is "enabled" by emitting a particular sampler
322 * message (sample_c). So need to recompile WM program when
323 * shadow comparison is enabled on each/any texture unit.
324 */
325 sampler->ss0.shadow_function =
326 intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
327 }
328
329 /* Set LOD bias:
330 */
331 sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
332 gl_sampler->LodBias, -16, 15), 6);
333
334 sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
335 sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
336
337 sampler->ss0.base_level = U_FIXED(0, 1);
338
339 sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
340 sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
341
342 /* On Gen6+, the sampler can handle non-normalized texture
343 * rectangle coordinates natively
344 */
345 if (brw->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
346 sampler->ss3.non_normalized_coord = 1;
347 }
348
349 uint32_t sdc_offset;
350 upload_default_color(brw, gl_sampler, unit, &sdc_offset);
351
352 if (brw->gen >= 6) {
353 sampler->ss2.default_color_pointer = sdc_offset >> 5;
354 } else {
355 /* reloc */
356 sampler->ss2.default_color_pointer =
357 (brw->batch.bo->offset64 + sdc_offset) >> 5;
358
359 drm_intel_bo_emit_reloc(brw->batch.bo,
360 batch_offset_for_sampler_state +
361 offsetof(struct brw_sampler_state, ss2),
362 brw->batch.bo, sdc_offset,
363 I915_GEM_DOMAIN_SAMPLER, 0);
364 }
365
366 if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
367 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
368 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
369 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
370 if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
371 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
372 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
373 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
374 }
375
376
377 static void
378 brw_upload_sampler_state_table(struct brw_context *brw,
379 struct gl_program *prog,
380 struct brw_stage_state *stage_state)
381 {
382 struct gl_context *ctx = &brw->ctx;
383 uint32_t sampler_count = stage_state->sampler_count;
384
385 GLbitfield SamplersUsed = prog->SamplersUsed;
386
387 if (sampler_count == 0)
388 return;
389
390 /* SAMPLER_STATE is 4 DWords on all platforms. */
391 const int dwords = 4;
392 const int size_in_bytes = dwords * sizeof(uint32_t);
393
394 uint32_t *sampler_state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
395 sampler_count * size_in_bytes,
396 32, &stage_state->sampler_offset);
397 memset(sampler_state, 0, sampler_count * size_in_bytes);
398
399 uint32_t batch_offset_for_sampler_state = stage_state->sampler_offset;
400
401 for (unsigned s = 0; s < sampler_count; s++) {
402 if (SamplersUsed & (1 << s)) {
403 const unsigned unit = prog->SamplerUnits[s];
404 if (ctx->Texture.Unit[unit]._Current) {
405 brw_update_sampler_state(brw, unit,
406 (struct brw_sampler_state *) sampler_state,
407 batch_offset_for_sampler_state);
408 }
409 }
410
411 sampler_state += dwords;
412 batch_offset_for_sampler_state += size_in_bytes;
413 }
414
415 brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
416 }
417
418 static void
419 brw_upload_fs_samplers(struct brw_context *brw)
420 {
421 /* BRW_NEW_FRAGMENT_PROGRAM */
422 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
423 brw->vtbl.upload_sampler_state_table(brw, fs, &brw->wm.base);
424 }
425
426 const struct brw_tracked_state brw_fs_samplers = {
427 .dirty = {
428 .mesa = _NEW_TEXTURE,
429 .brw = BRW_NEW_BATCH |
430 BRW_NEW_FRAGMENT_PROGRAM,
431 .cache = 0
432 },
433 .emit = brw_upload_fs_samplers,
434 };
435
436 static void
437 brw_upload_vs_samplers(struct brw_context *brw)
438 {
439 /* BRW_NEW_VERTEX_PROGRAM */
440 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
441 brw->vtbl.upload_sampler_state_table(brw, vs, &brw->vs.base);
442 }
443
444
445 const struct brw_tracked_state brw_vs_samplers = {
446 .dirty = {
447 .mesa = _NEW_TEXTURE,
448 .brw = BRW_NEW_BATCH |
449 BRW_NEW_VERTEX_PROGRAM,
450 .cache = 0
451 },
452 .emit = brw_upload_vs_samplers,
453 };
454
455
456 static void
457 brw_upload_gs_samplers(struct brw_context *brw)
458 {
459 /* BRW_NEW_GEOMETRY_PROGRAM */
460 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
461 if (!gs)
462 return;
463
464 brw->vtbl.upload_sampler_state_table(brw, gs, &brw->gs.base);
465 }
466
467
468 const struct brw_tracked_state brw_gs_samplers = {
469 .dirty = {
470 .mesa = _NEW_TEXTURE,
471 .brw = BRW_NEW_BATCH |
472 BRW_NEW_GEOMETRY_PROGRAM,
473 .cache = 0
474 },
475 .emit = brw_upload_gs_samplers,
476 };
477
478
479 void
480 gen4_init_vtable_sampler_functions(struct brw_context *brw)
481 {
482 brw->vtbl.upload_sampler_state_table = brw_upload_sampler_state_table;
483 }