i965: Check the INTEL_USE_NIR environment variable once at context creation
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "main/api_exec.h"
35 #include "main/context.h"
36 #include "main/fbobject.h"
37 #include "main/extensions.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/points.h"
41 #include "main/version.h"
42 #include "main/vtxfmt.h"
43 #include "main/texobj.h"
44
45 #include "vbo/vbo_context.h"
46
47 #include "drivers/common/driverfuncs.h"
48 #include "drivers/common/meta.h"
49 #include "utils.h"
50
51 #include "brw_context.h"
52 #include "brw_defines.h"
53 #include "brw_draw.h"
54 #include "brw_state.h"
55
56 #include "intel_batchbuffer.h"
57 #include "intel_buffer_objects.h"
58 #include "intel_buffers.h"
59 #include "intel_fbo.h"
60 #include "intel_mipmap_tree.h"
61 #include "intel_pixel.h"
62 #include "intel_image.h"
63 #include "intel_tex.h"
64 #include "intel_tex_obj.h"
65
66 #include "swrast_setup/swrast_setup.h"
67 #include "tnl/tnl.h"
68 #include "tnl/t_pipeline.h"
69 #include "util/ralloc.h"
70
71 #include "glsl/nir/nir.h"
72
73 /***************************************
74 * Mesa's Driver Functions
75 ***************************************/
76
77 static size_t
78 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
79 GLenum internalFormat, int samples[16])
80 {
81 struct brw_context *brw = brw_context(ctx);
82
83 (void) target;
84
85 switch (brw->gen) {
86 case 9:
87 case 8:
88 samples[0] = 8;
89 samples[1] = 4;
90 samples[2] = 2;
91 return 3;
92
93 case 7:
94 samples[0] = 8;
95 samples[1] = 4;
96 return 2;
97
98 case 6:
99 samples[0] = 4;
100 return 1;
101
102 default:
103 assert(brw->gen < 6);
104 samples[0] = 1;
105 return 1;
106 }
107 }
108
109 const char *const brw_vendor_string = "Intel Open Source Technology Center";
110
111 const char *
112 brw_get_renderer_string(unsigned deviceID)
113 {
114 const char *chipset;
115 static char buffer[128];
116
117 switch (deviceID) {
118 #undef CHIPSET
119 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
120 #include "pci_ids/i965_pci_ids.h"
121 default:
122 chipset = "Unknown Intel Chipset";
123 break;
124 }
125
126 (void) driGetRendererString(buffer, chipset, 0);
127 return buffer;
128 }
129
130 static const GLubyte *
131 intel_get_string(struct gl_context * ctx, GLenum name)
132 {
133 const struct brw_context *const brw = brw_context(ctx);
134
135 switch (name) {
136 case GL_VENDOR:
137 return (GLubyte *) brw_vendor_string;
138
139 case GL_RENDERER:
140 return
141 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
142
143 default:
144 return NULL;
145 }
146 }
147
148 static void
149 intel_viewport(struct gl_context *ctx)
150 {
151 struct brw_context *brw = brw_context(ctx);
152 __DRIcontext *driContext = brw->driContext;
153
154 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
155 dri2InvalidateDrawable(driContext->driDrawablePriv);
156 dri2InvalidateDrawable(driContext->driReadablePriv);
157 }
158 }
159
160 static void
161 intel_update_state(struct gl_context * ctx, GLuint new_state)
162 {
163 struct brw_context *brw = brw_context(ctx);
164 struct intel_texture_object *tex_obj;
165 struct intel_renderbuffer *depth_irb;
166
167 if (ctx->swrast_context)
168 _swrast_InvalidateState(ctx, new_state);
169 _vbo_InvalidateState(ctx, new_state);
170
171 brw->NewGLState |= new_state;
172
173 _mesa_unlock_context_textures(ctx);
174
175 /* Resolve the depth buffer's HiZ buffer. */
176 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
177 if (depth_irb)
178 intel_renderbuffer_resolve_hiz(brw, depth_irb);
179
180 /* Resolve depth buffer and render cache of each enabled texture. */
181 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
182 for (int i = 0; i <= maxEnabledUnit; i++) {
183 if (!ctx->Texture.Unit[i]._Current)
184 continue;
185 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
186 if (!tex_obj || !tex_obj->mt)
187 continue;
188 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
189 intel_miptree_resolve_color(brw, tex_obj->mt);
190 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
191 }
192
193 _mesa_lock_context_textures(ctx);
194 }
195
196 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
197
198 static void
199 intel_flush_front(struct gl_context *ctx)
200 {
201 struct brw_context *brw = brw_context(ctx);
202 __DRIcontext *driContext = brw->driContext;
203 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
204 __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
205
206 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
207 if (flushFront(screen) && driDrawable &&
208 driDrawable->loaderPrivate) {
209
210 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
211 *
212 * This potentially resolves both front and back buffer. It
213 * is unnecessary to resolve the back, but harms nothing except
214 * performance. And no one cares about front-buffer render
215 * performance.
216 */
217 intel_resolve_for_dri2_flush(brw, driDrawable);
218 intel_batchbuffer_flush(brw);
219
220 flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
221
222 /* We set the dirty bit in intel_prepare_render() if we're
223 * front buffer rendering once we get there.
224 */
225 brw->front_buffer_dirty = false;
226 }
227 }
228 }
229
230 static void
231 intel_glFlush(struct gl_context *ctx)
232 {
233 struct brw_context *brw = brw_context(ctx);
234
235 intel_batchbuffer_flush(brw);
236 intel_flush_front(ctx);
237
238 brw->need_flush_throttle = true;
239 }
240
241 static void
242 intel_finish(struct gl_context * ctx)
243 {
244 struct brw_context *brw = brw_context(ctx);
245
246 intel_glFlush(ctx);
247
248 if (brw->batch.last_bo)
249 drm_intel_bo_wait_rendering(brw->batch.last_bo);
250 }
251
252 static void
253 brw_init_driver_functions(struct brw_context *brw,
254 struct dd_function_table *functions)
255 {
256 _mesa_init_driver_functions(functions);
257
258 /* GLX uses DRI2 invalidate events to handle window resizing.
259 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
260 * which doesn't provide a mechanism for snooping the event queues.
261 *
262 * So EGL still relies on viewport hacks to handle window resizing.
263 * This should go away with DRI3000.
264 */
265 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
266 functions->Viewport = intel_viewport;
267
268 functions->Flush = intel_glFlush;
269 functions->Finish = intel_finish;
270 functions->GetString = intel_get_string;
271 functions->UpdateState = intel_update_state;
272
273 intelInitTextureFuncs(functions);
274 intelInitTextureImageFuncs(functions);
275 intelInitTextureSubImageFuncs(functions);
276 intelInitTextureCopyImageFuncs(functions);
277 intelInitCopyImageFuncs(functions);
278 intelInitClearFuncs(functions);
279 intelInitBufferFuncs(functions);
280 intelInitPixelFuncs(functions);
281 intelInitBufferObjectFuncs(functions);
282 intel_init_syncobj_functions(functions);
283 brw_init_object_purgeable_functions(functions);
284
285 brwInitFragProgFuncs( functions );
286 brw_init_common_queryobj_functions(functions);
287 if (brw->gen >= 6)
288 gen6_init_queryobj_functions(functions);
289 else
290 gen4_init_queryobj_functions(functions);
291
292 functions->QuerySamplesForFormat = brw_query_samples_for_format;
293
294 functions->NewTransformFeedback = brw_new_transform_feedback;
295 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
296 functions->GetTransformFeedbackVertexCount =
297 brw_get_transform_feedback_vertex_count;
298 if (brw->gen >= 7) {
299 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
300 functions->EndTransformFeedback = gen7_end_transform_feedback;
301 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
302 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
303 } else {
304 functions->BeginTransformFeedback = brw_begin_transform_feedback;
305 functions->EndTransformFeedback = brw_end_transform_feedback;
306 }
307
308 if (brw->gen >= 6)
309 functions->GetSamplePosition = gen6_get_sample_position;
310 }
311
312 static void
313 brw_initialize_context_constants(struct brw_context *brw)
314 {
315 struct gl_context *ctx = &brw->ctx;
316
317 unsigned max_samplers =
318 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
319
320 ctx->Const.QueryCounterBits.Timestamp = 36;
321
322 ctx->Const.StripTextureBorder = true;
323
324 ctx->Const.MaxDualSourceDrawBuffers = 1;
325 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
326 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
327 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
328 ctx->Const.MaxTextureUnits =
329 MIN2(ctx->Const.MaxTextureCoordUnits,
330 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
331 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
332 if (brw->gen >= 6)
333 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
334 else
335 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
336 if (_mesa_extension_override_enables.ARB_compute_shader) {
337 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
338 ctx->Const.MaxUniformBufferBindings += 12;
339 } else {
340 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
341 }
342 ctx->Const.MaxCombinedTextureImageUnits =
343 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
344 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
345 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
346 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
347
348 ctx->Const.MaxTextureLevels = 14; /* 8192 */
349 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
350 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
351 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
352 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
353 ctx->Const.MaxTextureMbytes = 1536;
354
355 if (brw->gen >= 7)
356 ctx->Const.MaxArrayTextureLayers = 2048;
357 else
358 ctx->Const.MaxArrayTextureLayers = 512;
359
360 ctx->Const.MaxTextureRectSize = 1 << 12;
361
362 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
363
364 ctx->Const.MaxRenderbufferSize = 8192;
365
366 /* Hardware only supports a limited number of transform feedback buffers.
367 * So we need to override the Mesa default (which is based only on software
368 * limits).
369 */
370 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
371
372 /* On Gen6, in the worst case, we use up one binding table entry per
373 * transform feedback component (see comments above the definition of
374 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
375 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
376 * BRW_MAX_SOL_BINDINGS.
377 *
378 * In "separate components" mode, we need to divide this value by
379 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
380 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
381 */
382 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
383 ctx->Const.MaxTransformFeedbackSeparateComponents =
384 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
385
386 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
387
388 int max_samples;
389 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
390 const int clamp_max_samples =
391 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
392
393 if (clamp_max_samples < 0) {
394 max_samples = msaa_modes[0];
395 } else {
396 /* Select the largest supported MSAA mode that does not exceed
397 * clamp_max_samples.
398 */
399 max_samples = 0;
400 for (int i = 0; msaa_modes[i] != 0; ++i) {
401 if (msaa_modes[i] <= clamp_max_samples) {
402 max_samples = msaa_modes[i];
403 break;
404 }
405 }
406 }
407
408 ctx->Const.MaxSamples = max_samples;
409 ctx->Const.MaxColorTextureSamples = max_samples;
410 ctx->Const.MaxDepthTextureSamples = max_samples;
411 ctx->Const.MaxIntegerSamples = max_samples;
412
413 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
414 * to map indices of rectangular grid to sample numbers within a pixel.
415 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
416 * extension implementation. For more details see the comment above
417 * gen6_set_sample_maps() definition.
418 */
419 gen6_set_sample_maps(ctx);
420
421 if (brw->gen >= 7)
422 ctx->Const.MaxProgramTextureGatherComponents = 4;
423 else if (brw->gen == 6)
424 ctx->Const.MaxProgramTextureGatherComponents = 1;
425
426 ctx->Const.MinLineWidth = 1.0;
427 ctx->Const.MinLineWidthAA = 1.0;
428 if (brw->gen >= 9 || brw->is_cherryview) {
429 ctx->Const.MaxLineWidth = 40.0;
430 ctx->Const.MaxLineWidthAA = 40.0;
431 ctx->Const.LineWidthGranularity = 0.125;
432 } else if (brw->gen >= 6) {
433 ctx->Const.MaxLineWidth = 7.375;
434 ctx->Const.MaxLineWidthAA = 7.375;
435 ctx->Const.LineWidthGranularity = 0.125;
436 } else {
437 ctx->Const.MaxLineWidth = 7.0;
438 ctx->Const.MaxLineWidthAA = 7.0;
439 ctx->Const.LineWidthGranularity = 0.5;
440 }
441
442 ctx->Const.MinPointSize = 1.0;
443 ctx->Const.MinPointSizeAA = 1.0;
444 ctx->Const.MaxPointSize = 255.0;
445 ctx->Const.MaxPointSizeAA = 255.0;
446 ctx->Const.PointSizeGranularity = 1.0;
447
448 if (brw->gen >= 5 || brw->is_g4x)
449 ctx->Const.MaxClipPlanes = 8;
450
451 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
452 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
453 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
454 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
455 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
456 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
457 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
458 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
459 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
460 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
461 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
462 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
463 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
464 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
465
466 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
467 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
468 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
469 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
470 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
471 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
472 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
473 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
474 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
475 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
476 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
477
478 /* Fragment shaders use real, 32-bit twos-complement integers for all
479 * integer types.
480 */
481 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
482 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
483 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
484 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
485 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
486
487 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
488 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
489 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
490 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
491 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
492
493 if (brw->gen >= 7) {
494 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
495 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
496 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
497 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
498 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
499 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
500 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
501 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
502 ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
503 }
504
505 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
506 * but we're not sure how it's actually done for vertex order,
507 * that affect provoking vertex decision. Always use last vertex
508 * convention for quad primitive which works as expected for now.
509 */
510 if (brw->gen >= 6)
511 ctx->Const.QuadsFollowProvokingVertexConvention = false;
512
513 ctx->Const.NativeIntegers = true;
514 ctx->Const.VertexID_is_zero_based = true;
515
516 /* Regarding the CMP instruction, the Ivybridge PRM says:
517 *
518 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
519 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
520 * 0xFFFFFFFF) is assigned to dst."
521 *
522 * but PRMs for earlier generations say
523 *
524 * "In dword format, one GRF may store up to 8 results. When the register
525 * is used later as a vector of Booleans, as only LSB at each channel
526 * contains meaning [sic] data, software should make sure all higher bits
527 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
528 *
529 * We select the representation of a true boolean uniform to be ~0, and fix
530 * the results of Gen <= 5 CMP instruction's with -(result & 1).
531 */
532 ctx->Const.UniformBooleanTrue = ~0;
533
534 /* From the gen4 PRM, volume 4 page 127:
535 *
536 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
537 * the base address of the first element of the surface, computed in
538 * software by adding the surface base address to the byte offset of
539 * the element in the buffer."
540 *
541 * However, unaligned accesses are slower, so enforce buffer alignment.
542 */
543 ctx->Const.UniformBufferOffsetAlignment = 16;
544 ctx->Const.TextureBufferOffsetAlignment = 16;
545
546 if (brw->gen >= 6) {
547 ctx->Const.MaxVarying = 32;
548 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
549 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
550 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
551 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
552 }
553
554 static const nir_shader_compiler_options nir_options = {
555 .native_integers = true,
556 /* In order to help allow for better CSE at the NIR level we tell NIR
557 * to split all ffma instructions during opt_algebraic and we then
558 * re-combine them as a later step.
559 */
560 .lower_ffma = true,
561 };
562
563 bool use_nir_default[MESA_SHADER_STAGES];
564 use_nir_default[MESA_SHADER_VERTEX] = false;
565 use_nir_default[MESA_SHADER_GEOMETRY] = false;
566 use_nir_default[MESA_SHADER_FRAGMENT] = false;
567 use_nir_default[MESA_SHADER_COMPUTE] = false;
568
569 /* We want the GLSL compiler to emit code that uses condition codes */
570 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
571 ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
572 ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
573 ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
574 ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
575 ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
576 ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
577 (i == MESA_SHADER_FRAGMENT);
578 ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
579 (i == MESA_SHADER_FRAGMENT);
580 ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
581 ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
582
583 if (brw_env_var_as_boolean("INTEL_USE_NIR", use_nir_default[i]))
584 ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options;
585 }
586
587 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
588 ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
589
590 if (brw->scalar_vs) {
591 /* If we're using the scalar backend for vertex shaders, we need to
592 * configure these accordingly.
593 */
594 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
595 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
596 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
597 }
598
599 /* ARB_viewport_array */
600 if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
601 ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
602 ctx->Const.ViewportSubpixelBits = 0;
603
604 /* Cast to float before negating becuase MaxViewportWidth is unsigned.
605 */
606 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
607 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
608 }
609
610 /* ARB_gpu_shader5 */
611 if (brw->gen >= 7)
612 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
613 }
614
615 /**
616 * Process driconf (drirc) options, setting appropriate context flags.
617 *
618 * intelInitExtensions still pokes at optionCache directly, in order to
619 * avoid advertising various extensions. No flags are set, so it makes
620 * sense to continue doing that there.
621 */
622 static void
623 brw_process_driconf_options(struct brw_context *brw)
624 {
625 struct gl_context *ctx = &brw->ctx;
626
627 driOptionCache *options = &brw->optionCache;
628 driParseConfigFiles(options, &brw->intelScreen->optionCache,
629 brw->driContext->driScreenPriv->myNum, "i965");
630
631 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
632 switch (bo_reuse_mode) {
633 case DRI_CONF_BO_REUSE_DISABLED:
634 break;
635 case DRI_CONF_BO_REUSE_ALL:
636 intel_bufmgr_gem_enable_reuse(brw->bufmgr);
637 break;
638 }
639
640 if (!driQueryOptionb(options, "hiz")) {
641 brw->has_hiz = false;
642 /* On gen6, you can only do separate stencil with HIZ. */
643 if (brw->gen == 6)
644 brw->has_separate_stencil = false;
645 }
646
647 if (driQueryOptionb(options, "always_flush_batch")) {
648 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
649 brw->always_flush_batch = true;
650 }
651
652 if (driQueryOptionb(options, "always_flush_cache")) {
653 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
654 brw->always_flush_cache = true;
655 }
656
657 if (driQueryOptionb(options, "disable_throttling")) {
658 fprintf(stderr, "disabling flush throttling\n");
659 brw->disable_throttling = true;
660 }
661
662 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
663
664 ctx->Const.ForceGLSLExtensionsWarn =
665 driQueryOptionb(options, "force_glsl_extensions_warn");
666
667 ctx->Const.DisableGLSLLineContinuations =
668 driQueryOptionb(options, "disable_glsl_line_continuations");
669
670 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
671 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
672 }
673
674 /* drop when libdrm 2.4.61 is released */
675 #ifndef I915_PARAM_REVISION
676 #define I915_PARAM_REVISION 32
677 #endif
678
679 static int
680 brw_get_revision(int fd)
681 {
682 struct drm_i915_getparam gp;
683 int revision;
684 int ret;
685
686 memset(&gp, 0, sizeof(gp));
687 gp.param = I915_PARAM_REVISION;
688 gp.value = &revision;
689
690 ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
691 if (ret)
692 revision = -1;
693
694 return revision;
695 }
696
697 GLboolean
698 brwCreateContext(gl_api api,
699 const struct gl_config *mesaVis,
700 __DRIcontext *driContextPriv,
701 unsigned major_version,
702 unsigned minor_version,
703 uint32_t flags,
704 bool notify_reset,
705 unsigned *dri_ctx_error,
706 void *sharedContextPrivate)
707 {
708 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
709 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
710 struct intel_screen *screen = sPriv->driverPrivate;
711 const struct brw_device_info *devinfo = screen->devinfo;
712 struct dd_function_table functions;
713
714 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
715 * provides us with context reset notifications.
716 */
717 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
718 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
719
720 if (screen->has_context_reset_notification)
721 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
722
723 if (flags & ~allowed_flags) {
724 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
725 return false;
726 }
727
728 struct brw_context *brw = rzalloc(NULL, struct brw_context);
729 if (!brw) {
730 fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
731 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
732 return false;
733 }
734
735 driContextPriv->driverPrivate = brw;
736 brw->driContext = driContextPriv;
737 brw->intelScreen = screen;
738 brw->bufmgr = screen->bufmgr;
739
740 brw->gen = devinfo->gen;
741 brw->gt = devinfo->gt;
742 brw->is_g4x = devinfo->is_g4x;
743 brw->is_baytrail = devinfo->is_baytrail;
744 brw->is_haswell = devinfo->is_haswell;
745 brw->is_cherryview = devinfo->is_cherryview;
746 brw->has_llc = devinfo->has_llc;
747 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
748 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
749 brw->has_pln = devinfo->has_pln;
750 brw->has_compr4 = devinfo->has_compr4;
751 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
752 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
753 brw->needs_unlit_centroid_workaround =
754 devinfo->needs_unlit_centroid_workaround;
755 brw->revision = brw_get_revision(sPriv->fd);
756
757 brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
758 brw->has_swizzling = screen->hw_has_swizzling;
759
760 brw->vs.base.stage = MESA_SHADER_VERTEX;
761 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
762 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
763 if (brw->gen >= 8) {
764 gen8_init_vtable_surface_functions(brw);
765 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
766 } else if (brw->gen >= 7) {
767 gen7_init_vtable_surface_functions(brw);
768 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
769 } else if (brw->gen >= 6) {
770 gen6_init_vtable_surface_functions(brw);
771 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
772 } else {
773 gen4_init_vtable_surface_functions(brw);
774 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
775 }
776
777 brw_init_driver_functions(brw, &functions);
778
779 if (notify_reset)
780 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
781
782 struct gl_context *ctx = &brw->ctx;
783
784 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
785 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
786 fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
787 intelDestroyContext(driContextPriv);
788 return false;
789 }
790
791 driContextSetFlags(ctx, flags);
792
793 /* Initialize the software rasterizer and helper modules.
794 *
795 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
796 * software fallbacks (which we have to support on legacy GL to do weird
797 * glDrawPixels(), glBitmap(), and other functions).
798 */
799 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
800 _swrast_CreateContext(ctx);
801 }
802
803 _vbo_CreateContext(ctx);
804 if (ctx->swrast_context) {
805 _tnl_CreateContext(ctx);
806 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
807 _swsetup_CreateContext(ctx);
808
809 /* Configure swrast to match hardware characteristics: */
810 _swrast_allow_pixel_fog(ctx, false);
811 _swrast_allow_vertex_fog(ctx, true);
812 }
813
814 _mesa_meta_init(ctx);
815
816 brw_process_driconf_options(brw);
817 brw_process_intel_debug_variable(brw);
818
819 if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
820 brw->scalar_vs = true;
821
822 brw_initialize_context_constants(brw);
823
824 ctx->Const.ResetStrategy = notify_reset
825 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
826
827 /* Reinitialize the context point state. It depends on ctx->Const values. */
828 _mesa_init_point(ctx);
829
830 intel_fbo_init(brw);
831
832 intel_batchbuffer_init(brw);
833
834 if (brw->gen >= 6) {
835 /* Create a new hardware context. Using a hardware context means that
836 * our GPU state will be saved/restored on context switch, allowing us
837 * to assume that the GPU is in the same state we left it in.
838 *
839 * This is required for transform feedback buffer offsets, query objects,
840 * and also allows us to reduce how much state we have to emit.
841 */
842 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
843
844 if (!brw->hw_ctx) {
845 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
846 intelDestroyContext(driContextPriv);
847 return false;
848 }
849 }
850
851 brw_init_state(brw);
852
853 intelInitExtensions(ctx);
854
855 brw_init_surface_formats(brw);
856
857 brw->max_vs_threads = devinfo->max_vs_threads;
858 brw->max_hs_threads = devinfo->max_hs_threads;
859 brw->max_ds_threads = devinfo->max_ds_threads;
860 brw->max_gs_threads = devinfo->max_gs_threads;
861 brw->max_wm_threads = devinfo->max_wm_threads;
862 brw->urb.size = devinfo->urb.size;
863 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
864 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
865 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
866 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
867 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
868
869 /* Estimate the size of the mappable aperture into the GTT. There's an
870 * ioctl to get the whole GTT size, but not one to get the mappable subset.
871 * It turns out it's basically always 256MB, though some ancient hardware
872 * was smaller.
873 */
874 uint32_t gtt_size = 256 * 1024 * 1024;
875
876 /* We don't want to map two objects such that a memcpy between them would
877 * just fault one mapping in and then the other over and over forever. So
878 * we would need to divide the GTT size by 2. Additionally, some GTT is
879 * taken up by things like the framebuffer and the ringbuffer and such, so
880 * be more conservative.
881 */
882 brw->max_gtt_map_object_size = gtt_size / 4;
883
884 if (brw->gen == 6)
885 brw->urb.gs_present = false;
886
887 brw->prim_restart.in_progress = false;
888 brw->prim_restart.enable_cut_index = false;
889 brw->gs.enabled = false;
890 brw->sf.viewport_transform_enable = true;
891
892 ctx->VertexProgram._MaintainTnlProgram = true;
893 ctx->FragmentProgram._MaintainTexEnvProgram = true;
894
895 brw_draw_init( brw );
896
897 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
898 /* Turn on some extra GL_ARB_debug_output generation. */
899 brw->perf_debug = true;
900 }
901
902 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
903 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
904
905 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
906 brw_init_shader_time(brw);
907
908 _mesa_compute_version(ctx);
909
910 _mesa_initialize_dispatch_tables(ctx);
911 _mesa_initialize_vbo_vtxfmt(ctx);
912
913 if (ctx->Extensions.AMD_performance_monitor) {
914 brw_init_performance_monitors(brw);
915 }
916
917 vbo_use_buffer_objects(ctx);
918 vbo_always_unmap_buffers(ctx);
919
920 return true;
921 }
922
923 void
924 intelDestroyContext(__DRIcontext * driContextPriv)
925 {
926 struct brw_context *brw =
927 (struct brw_context *) driContextPriv->driverPrivate;
928 struct gl_context *ctx = &brw->ctx;
929
930 assert(brw); /* should never be null */
931 if (!brw)
932 return;
933
934 /* Dump a final BMP in case the application doesn't call SwapBuffers */
935 if (INTEL_DEBUG & DEBUG_AUB) {
936 intel_batchbuffer_flush(brw);
937 aub_dump_bmp(&brw->ctx);
938 }
939
940 _mesa_meta_free(&brw->ctx);
941 brw_meta_fast_clear_free(brw);
942
943 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
944 /* Force a report. */
945 brw->shader_time.report_time = 0;
946
947 brw_collect_and_report_shader_time(brw);
948 brw_destroy_shader_time(brw);
949 }
950
951 brw_destroy_state(brw);
952 brw_draw_destroy(brw);
953
954 drm_intel_bo_unreference(brw->curbe.curbe_bo);
955 if (brw->vs.base.scratch_bo)
956 drm_intel_bo_unreference(brw->vs.base.scratch_bo);
957 if (brw->gs.base.scratch_bo)
958 drm_intel_bo_unreference(brw->gs.base.scratch_bo);
959 if (brw->wm.base.scratch_bo)
960 drm_intel_bo_unreference(brw->wm.base.scratch_bo);
961
962 drm_intel_gem_context_destroy(brw->hw_ctx);
963
964 if (ctx->swrast_context) {
965 _swsetup_DestroyContext(&brw->ctx);
966 _tnl_DestroyContext(&brw->ctx);
967 }
968 _vbo_DestroyContext(&brw->ctx);
969
970 if (ctx->swrast_context)
971 _swrast_DestroyContext(&brw->ctx);
972
973 intel_batchbuffer_free(brw);
974
975 drm_intel_bo_unreference(brw->throttle_batch[1]);
976 drm_intel_bo_unreference(brw->throttle_batch[0]);
977 brw->throttle_batch[1] = NULL;
978 brw->throttle_batch[0] = NULL;
979
980 driDestroyOptionCache(&brw->optionCache);
981
982 /* free the Mesa context */
983 _mesa_free_context_data(&brw->ctx);
984
985 ralloc_free(brw);
986 driContextPriv->driverPrivate = NULL;
987 }
988
989 GLboolean
990 intelUnbindContext(__DRIcontext * driContextPriv)
991 {
992 /* Unset current context and dispath table */
993 _mesa_make_current(NULL, NULL, NULL);
994
995 return true;
996 }
997
998 /**
999 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1000 * on window system framebuffers.
1001 *
1002 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1003 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1004 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1005 * for a visual where you're guaranteed to be capable, but it turns out that
1006 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1007 * incapable ones, becuase there's no difference between the two in resources
1008 * used. Applications thus get built that accidentally rely on the default
1009 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1010 * great...
1011 *
1012 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1013 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1014 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1015 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1016 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1017 * and get no sRGB encode (assuming that both kinds of visual are available).
1018 * Thus our choice to support sRGB by default on our visuals for desktop would
1019 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1020 *
1021 * Unfortunately, renderbuffer setup happens before a context is created. So
1022 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1023 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1024 * yet), we go turn that back off before anyone finds out.
1025 */
1026 static void
1027 intel_gles3_srgb_workaround(struct brw_context *brw,
1028 struct gl_framebuffer *fb)
1029 {
1030 struct gl_context *ctx = &brw->ctx;
1031
1032 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1033 return;
1034
1035 /* Some day when we support the sRGB capable bit on visuals available for
1036 * GLES, we'll need to respect that and not disable things here.
1037 */
1038 fb->Visual.sRGBCapable = false;
1039 for (int i = 0; i < BUFFER_COUNT; i++) {
1040 if (fb->Attachment[i].Renderbuffer &&
1041 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1042 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1043 }
1044 }
1045 }
1046
1047 GLboolean
1048 intelMakeCurrent(__DRIcontext * driContextPriv,
1049 __DRIdrawable * driDrawPriv,
1050 __DRIdrawable * driReadPriv)
1051 {
1052 struct brw_context *brw;
1053 GET_CURRENT_CONTEXT(curCtx);
1054
1055 if (driContextPriv)
1056 brw = (struct brw_context *) driContextPriv->driverPrivate;
1057 else
1058 brw = NULL;
1059
1060 /* According to the glXMakeCurrent() man page: "Pending commands to
1061 * the previous context, if any, are flushed before it is released."
1062 * But only flush if we're actually changing contexts.
1063 */
1064 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1065 _mesa_flush(curCtx);
1066 }
1067
1068 if (driContextPriv) {
1069 struct gl_context *ctx = &brw->ctx;
1070 struct gl_framebuffer *fb, *readFb;
1071
1072 if (driDrawPriv == NULL) {
1073 fb = _mesa_get_incomplete_framebuffer();
1074 } else {
1075 fb = driDrawPriv->driverPrivate;
1076 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1077 }
1078
1079 if (driReadPriv == NULL) {
1080 readFb = _mesa_get_incomplete_framebuffer();
1081 } else {
1082 readFb = driReadPriv->driverPrivate;
1083 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1084 }
1085
1086 /* The sRGB workaround changes the renderbuffer's format. We must change
1087 * the format before the renderbuffer's miptree get's allocated, otherwise
1088 * the formats of the renderbuffer and its miptree will differ.
1089 */
1090 intel_gles3_srgb_workaround(brw, fb);
1091 intel_gles3_srgb_workaround(brw, readFb);
1092
1093 /* If the context viewport hasn't been initialized, force a call out to
1094 * the loader to get buffers so we have a drawable size for the initial
1095 * viewport. */
1096 if (!brw->ctx.ViewportInitialized)
1097 intel_prepare_render(brw);
1098
1099 _mesa_make_current(ctx, fb, readFb);
1100 } else {
1101 _mesa_make_current(NULL, NULL, NULL);
1102 }
1103
1104 return true;
1105 }
1106
1107 void
1108 intel_resolve_for_dri2_flush(struct brw_context *brw,
1109 __DRIdrawable *drawable)
1110 {
1111 if (brw->gen < 6) {
1112 /* MSAA and fast color clear are not supported, so don't waste time
1113 * checking whether a resolve is needed.
1114 */
1115 return;
1116 }
1117
1118 struct gl_framebuffer *fb = drawable->driverPrivate;
1119 struct intel_renderbuffer *rb;
1120
1121 /* Usually, only the back buffer will need to be downsampled. However,
1122 * the front buffer will also need it if the user has rendered into it.
1123 */
1124 static const gl_buffer_index buffers[2] = {
1125 BUFFER_BACK_LEFT,
1126 BUFFER_FRONT_LEFT,
1127 };
1128
1129 for (int i = 0; i < 2; ++i) {
1130 rb = intel_get_renderbuffer(fb, buffers[i]);
1131 if (rb == NULL || rb->mt == NULL)
1132 continue;
1133 if (rb->mt->num_samples <= 1)
1134 intel_miptree_resolve_color(brw, rb->mt);
1135 else
1136 intel_renderbuffer_downsample(brw, rb);
1137 }
1138 }
1139
1140 static unsigned
1141 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1142 {
1143 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1144 }
1145
1146 static void
1147 intel_query_dri2_buffers(struct brw_context *brw,
1148 __DRIdrawable *drawable,
1149 __DRIbuffer **buffers,
1150 int *count);
1151
1152 static void
1153 intel_process_dri2_buffer(struct brw_context *brw,
1154 __DRIdrawable *drawable,
1155 __DRIbuffer *buffer,
1156 struct intel_renderbuffer *rb,
1157 const char *buffer_name);
1158
1159 static void
1160 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1161
1162 static void
1163 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1164 {
1165 struct gl_framebuffer *fb = drawable->driverPrivate;
1166 struct intel_renderbuffer *rb;
1167 __DRIbuffer *buffers = NULL;
1168 int i, count;
1169 const char *region_name;
1170
1171 /* Set this up front, so that in case our buffers get invalidated
1172 * while we're getting new buffers, we don't clobber the stamp and
1173 * thus ignore the invalidate. */
1174 drawable->lastStamp = drawable->dri2.stamp;
1175
1176 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1177 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1178
1179 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1180
1181 if (buffers == NULL)
1182 return;
1183
1184 for (i = 0; i < count; i++) {
1185 switch (buffers[i].attachment) {
1186 case __DRI_BUFFER_FRONT_LEFT:
1187 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1188 region_name = "dri2 front buffer";
1189 break;
1190
1191 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1192 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1193 region_name = "dri2 fake front buffer";
1194 break;
1195
1196 case __DRI_BUFFER_BACK_LEFT:
1197 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1198 region_name = "dri2 back buffer";
1199 break;
1200
1201 case __DRI_BUFFER_DEPTH:
1202 case __DRI_BUFFER_HIZ:
1203 case __DRI_BUFFER_DEPTH_STENCIL:
1204 case __DRI_BUFFER_STENCIL:
1205 case __DRI_BUFFER_ACCUM:
1206 default:
1207 fprintf(stderr,
1208 "unhandled buffer attach event, attachment type %d\n",
1209 buffers[i].attachment);
1210 return;
1211 }
1212
1213 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1214 }
1215
1216 }
1217
1218 void
1219 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1220 {
1221 struct brw_context *brw = context->driverPrivate;
1222 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1223
1224 /* Set this up front, so that in case our buffers get invalidated
1225 * while we're getting new buffers, we don't clobber the stamp and
1226 * thus ignore the invalidate. */
1227 drawable->lastStamp = drawable->dri2.stamp;
1228
1229 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1230 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1231
1232 if (screen->image.loader)
1233 intel_update_image_buffers(brw, drawable);
1234 else
1235 intel_update_dri2_buffers(brw, drawable);
1236
1237 driUpdateFramebufferSize(&brw->ctx, drawable);
1238 }
1239
1240 /**
1241 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1242 * state is required.
1243 */
1244 void
1245 intel_prepare_render(struct brw_context *brw)
1246 {
1247 struct gl_context *ctx = &brw->ctx;
1248 __DRIcontext *driContext = brw->driContext;
1249 __DRIdrawable *drawable;
1250
1251 drawable = driContext->driDrawablePriv;
1252 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1253 if (drawable->lastStamp != drawable->dri2.stamp)
1254 intel_update_renderbuffers(driContext, drawable);
1255 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1256 }
1257
1258 drawable = driContext->driReadablePriv;
1259 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1260 if (drawable->lastStamp != drawable->dri2.stamp)
1261 intel_update_renderbuffers(driContext, drawable);
1262 driContext->dri2.read_stamp = drawable->dri2.stamp;
1263 }
1264
1265 /* If we're currently rendering to the front buffer, the rendering
1266 * that will happen next will probably dirty the front buffer. So
1267 * mark it as dirty here.
1268 */
1269 if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1270 brw->front_buffer_dirty = true;
1271 }
1272
1273 /**
1274 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1275 *
1276 * To determine which DRI buffers to request, examine the renderbuffers
1277 * attached to the drawable's framebuffer. Then request the buffers with
1278 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1279 *
1280 * This is called from intel_update_renderbuffers().
1281 *
1282 * \param drawable Drawable whose buffers are queried.
1283 * \param buffers [out] List of buffers returned by DRI2 query.
1284 * \param buffer_count [out] Number of buffers returned.
1285 *
1286 * \see intel_update_renderbuffers()
1287 * \see DRI2GetBuffers()
1288 * \see DRI2GetBuffersWithFormat()
1289 */
1290 static void
1291 intel_query_dri2_buffers(struct brw_context *brw,
1292 __DRIdrawable *drawable,
1293 __DRIbuffer **buffers,
1294 int *buffer_count)
1295 {
1296 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1297 struct gl_framebuffer *fb = drawable->driverPrivate;
1298 int i = 0;
1299 unsigned attachments[8];
1300
1301 struct intel_renderbuffer *front_rb;
1302 struct intel_renderbuffer *back_rb;
1303
1304 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1305 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1306
1307 memset(attachments, 0, sizeof(attachments));
1308 if ((brw_is_front_buffer_drawing(fb) ||
1309 brw_is_front_buffer_reading(fb) ||
1310 !back_rb) && front_rb) {
1311 /* If a fake front buffer is in use, then querying for
1312 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1313 * the real front buffer to the fake front buffer. So before doing the
1314 * query, we need to make sure all the pending drawing has landed in the
1315 * real front buffer.
1316 */
1317 intel_batchbuffer_flush(brw);
1318 intel_flush_front(&brw->ctx);
1319
1320 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1321 attachments[i++] = intel_bits_per_pixel(front_rb);
1322 } else if (front_rb && brw->front_buffer_dirty) {
1323 /* We have pending front buffer rendering, but we aren't querying for a
1324 * front buffer. If the front buffer we have is a fake front buffer,
1325 * the X server is going to throw it away when it processes the query.
1326 * So before doing the query, make sure all the pending drawing has
1327 * landed in the real front buffer.
1328 */
1329 intel_batchbuffer_flush(brw);
1330 intel_flush_front(&brw->ctx);
1331 }
1332
1333 if (back_rb) {
1334 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1335 attachments[i++] = intel_bits_per_pixel(back_rb);
1336 }
1337
1338 assert(i <= ARRAY_SIZE(attachments));
1339
1340 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1341 &drawable->w,
1342 &drawable->h,
1343 attachments, i / 2,
1344 buffer_count,
1345 drawable->loaderPrivate);
1346 }
1347
1348 /**
1349 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1350 *
1351 * This is called from intel_update_renderbuffers().
1352 *
1353 * \par Note:
1354 * DRI buffers whose attachment point is DRI2BufferStencil or
1355 * DRI2BufferDepthStencil are handled as special cases.
1356 *
1357 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1358 * that is passed to drm_intel_bo_gem_create_from_name().
1359 *
1360 * \see intel_update_renderbuffers()
1361 */
1362 static void
1363 intel_process_dri2_buffer(struct brw_context *brw,
1364 __DRIdrawable *drawable,
1365 __DRIbuffer *buffer,
1366 struct intel_renderbuffer *rb,
1367 const char *buffer_name)
1368 {
1369 struct gl_framebuffer *fb = drawable->driverPrivate;
1370 drm_intel_bo *bo;
1371
1372 if (!rb)
1373 return;
1374
1375 unsigned num_samples = rb->Base.Base.NumSamples;
1376
1377 /* We try to avoid closing and reopening the same BO name, because the first
1378 * use of a mapping of the buffer involves a bunch of page faulting which is
1379 * moderately expensive.
1380 */
1381 struct intel_mipmap_tree *last_mt;
1382 if (num_samples == 0)
1383 last_mt = rb->mt;
1384 else
1385 last_mt = rb->singlesample_mt;
1386
1387 uint32_t old_name = 0;
1388 if (last_mt) {
1389 /* The bo already has a name because the miptree was created by a
1390 * previous call to intel_process_dri2_buffer(). If a bo already has a
1391 * name, then drm_intel_bo_flink() is a low-cost getter. It does not
1392 * create a new name.
1393 */
1394 drm_intel_bo_flink(last_mt->bo, &old_name);
1395 }
1396
1397 if (old_name == buffer->name)
1398 return;
1399
1400 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1401 fprintf(stderr,
1402 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1403 buffer->name, buffer->attachment,
1404 buffer->cpp, buffer->pitch);
1405 }
1406
1407 intel_miptree_release(&rb->mt);
1408 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1409 buffer->name);
1410 if (!bo) {
1411 fprintf(stderr,
1412 "Failed to open BO for returned DRI2 buffer "
1413 "(%dx%d, %s, named %d).\n"
1414 "This is likely a bug in the X Server that will lead to a "
1415 "crash soon.\n",
1416 drawable->w, drawable->h, buffer_name, buffer->name);
1417 return;
1418 }
1419
1420 intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1421 drawable->w, drawable->h,
1422 buffer->pitch);
1423
1424 if (brw_is_front_buffer_drawing(fb) &&
1425 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1426 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1427 rb->Base.Base.NumSamples > 1) {
1428 intel_renderbuffer_upsample(brw, rb);
1429 }
1430
1431 assert(rb->mt);
1432
1433 drm_intel_bo_unreference(bo);
1434 }
1435
1436 /**
1437 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1438 *
1439 * To determine which DRI buffers to request, examine the renderbuffers
1440 * attached to the drawable's framebuffer. Then request the buffers from
1441 * the image loader
1442 *
1443 * This is called from intel_update_renderbuffers().
1444 *
1445 * \param drawable Drawable whose buffers are queried.
1446 * \param buffers [out] List of buffers returned by DRI2 query.
1447 * \param buffer_count [out] Number of buffers returned.
1448 *
1449 * \see intel_update_renderbuffers()
1450 */
1451
1452 static void
1453 intel_update_image_buffer(struct brw_context *intel,
1454 __DRIdrawable *drawable,
1455 struct intel_renderbuffer *rb,
1456 __DRIimage *buffer,
1457 enum __DRIimageBufferMask buffer_type)
1458 {
1459 struct gl_framebuffer *fb = drawable->driverPrivate;
1460
1461 if (!rb || !buffer->bo)
1462 return;
1463
1464 unsigned num_samples = rb->Base.Base.NumSamples;
1465
1466 /* Check and see if we're already bound to the right
1467 * buffer object
1468 */
1469 struct intel_mipmap_tree *last_mt;
1470 if (num_samples == 0)
1471 last_mt = rb->mt;
1472 else
1473 last_mt = rb->singlesample_mt;
1474
1475 if (last_mt && last_mt->bo == buffer->bo)
1476 return;
1477
1478 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1479 buffer->width, buffer->height,
1480 buffer->pitch);
1481
1482 if (brw_is_front_buffer_drawing(fb) &&
1483 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1484 rb->Base.Base.NumSamples > 1) {
1485 intel_renderbuffer_upsample(intel, rb);
1486 }
1487 }
1488
1489 static void
1490 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1491 {
1492 struct gl_framebuffer *fb = drawable->driverPrivate;
1493 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1494 struct intel_renderbuffer *front_rb;
1495 struct intel_renderbuffer *back_rb;
1496 struct __DRIimageList images;
1497 unsigned int format;
1498 uint32_t buffer_mask = 0;
1499
1500 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1501 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1502
1503 if (back_rb)
1504 format = intel_rb_format(back_rb);
1505 else if (front_rb)
1506 format = intel_rb_format(front_rb);
1507 else
1508 return;
1509
1510 if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1511 brw_is_front_buffer_reading(fb) || !back_rb)) {
1512 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1513 }
1514
1515 if (back_rb)
1516 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1517
1518 (*screen->image.loader->getBuffers) (drawable,
1519 driGLFormatToImageFormat(format),
1520 &drawable->dri2.stamp,
1521 drawable->loaderPrivate,
1522 buffer_mask,
1523 &images);
1524
1525 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1526 drawable->w = images.front->width;
1527 drawable->h = images.front->height;
1528 intel_update_image_buffer(brw,
1529 drawable,
1530 front_rb,
1531 images.front,
1532 __DRI_IMAGE_BUFFER_FRONT);
1533 }
1534 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1535 drawable->w = images.back->width;
1536 drawable->h = images.back->height;
1537 intel_update_image_buffer(brw,
1538 drawable,
1539 back_rb,
1540 images.back,
1541 __DRI_IMAGE_BUFFER_BACK);
1542 }
1543 }