2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **********************************************************************/
30 * Keith Whitwell <keithw@vmware.com>
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/glthread.h"
40 #include "main/imports.h"
41 #include "main/macros.h"
42 #include "main/points.h"
43 #include "main/version.h"
44 #include "main/vtxfmt.h"
45 #include "main/texobj.h"
46 #include "main/framebuffer.h"
47 #include "main/stencil.h"
48 #include "main/state.h"
52 #include "drivers/common/driverfuncs.h"
53 #include "drivers/common/meta.h"
56 #include "brw_context.h"
57 #include "brw_defines.h"
58 #include "brw_blorp.h"
60 #include "brw_state.h"
62 #include "intel_batchbuffer.h"
63 #include "intel_buffer_objects.h"
64 #include "intel_buffers.h"
65 #include "intel_fbo.h"
66 #include "intel_mipmap_tree.h"
67 #include "intel_pixel.h"
68 #include "intel_image.h"
69 #include "intel_tex.h"
70 #include "intel_tex_obj.h"
72 #include "swrast_setup/swrast_setup.h"
74 #include "tnl/t_pipeline.h"
75 #include "util/ralloc.h"
76 #include "util/debug.h"
77 #include "util/disk_cache.h"
80 #include "common/gen_defines.h"
82 #include "compiler/spirv/nir_spirv.h"
83 /***************************************
84 * Mesa's Driver Functions
85 ***************************************/
87 const char *const brw_vendor_string
= "Intel Open Source Technology Center";
90 get_bsw_model(const struct intel_screen
*screen
)
92 switch (screen
->eu_total
) {
103 brw_get_renderer_string(const struct intel_screen
*screen
)
106 static char buffer
[128];
109 switch (screen
->deviceID
) {
111 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
112 #include "pci_ids/i965_pci_ids.h"
114 chipset
= "Unknown Intel Chipset";
118 /* Braswell branding is funny, so we have to fix it up here */
119 if (screen
->deviceID
== 0x22B1) {
120 bsw
= strdup(chipset
);
121 char *needle
= strstr(bsw
, "XXX");
123 memcpy(needle
, get_bsw_model(screen
), 3);
128 (void) driGetRendererString(buffer
, chipset
, 0);
133 static const GLubyte
*
134 intel_get_string(struct gl_context
* ctx
, GLenum name
)
136 const struct brw_context
*const brw
= brw_context(ctx
);
140 return (GLubyte
*) brw_vendor_string
;
144 (GLubyte
*) brw_get_renderer_string(brw
->screen
);
152 brw_set_background_context(struct gl_context
*ctx
,
153 struct util_queue_monitoring
*queue_info
)
155 struct brw_context
*brw
= brw_context(ctx
);
156 __DRIcontext
*driContext
= brw
->driContext
;
157 __DRIscreen
*driScreen
= driContext
->driScreenPriv
;
158 const __DRIbackgroundCallableExtension
*backgroundCallable
=
159 driScreen
->dri2
.backgroundCallable
;
161 /* Note: Mesa will only call this function if we've called
162 * _mesa_enable_multithreading(). We only do that if the loader exposed
163 * the __DRI_BACKGROUND_CALLABLE extension. So we know that
164 * backgroundCallable is not NULL.
166 backgroundCallable
->setBackgroundContext(driContext
->loaderPrivate
);
170 intel_viewport(struct gl_context
*ctx
)
172 struct brw_context
*brw
= brw_context(ctx
);
173 __DRIcontext
*driContext
= brw
->driContext
;
175 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
176 if (driContext
->driDrawablePriv
)
177 dri2InvalidateDrawable(driContext
->driDrawablePriv
);
178 if (driContext
->driReadablePriv
)
179 dri2InvalidateDrawable(driContext
->driReadablePriv
);
184 intel_update_framebuffer(struct gl_context
*ctx
,
185 struct gl_framebuffer
*fb
)
187 struct brw_context
*brw
= brw_context(ctx
);
189 /* Quantize the derived default number of samples
191 fb
->DefaultGeometry
._NumSamples
=
192 intel_quantize_num_samples(brw
->screen
,
193 fb
->DefaultGeometry
.NumSamples
);
197 intel_update_state(struct gl_context
* ctx
)
199 GLuint new_state
= ctx
->NewState
;
200 struct brw_context
*brw
= brw_context(ctx
);
202 if (ctx
->swrast_context
)
203 _swrast_InvalidateState(ctx
, new_state
);
205 brw
->NewGLState
|= new_state
;
207 if (new_state
& (_NEW_SCISSOR
| _NEW_BUFFERS
| _NEW_VIEWPORT
))
208 _mesa_update_draw_buffer_bounds(ctx
, ctx
->DrawBuffer
);
210 if (new_state
& (_NEW_STENCIL
| _NEW_BUFFERS
)) {
211 brw
->stencil_enabled
= _mesa_stencil_is_enabled(ctx
);
212 brw
->stencil_two_sided
= _mesa_stencil_is_two_sided(ctx
);
213 brw
->stencil_write_enabled
=
214 _mesa_stencil_is_write_enabled(ctx
, brw
->stencil_two_sided
);
217 if (new_state
& _NEW_POLYGON
)
218 brw
->polygon_front_bit
= _mesa_polygon_get_front_bit(ctx
);
220 if (new_state
& _NEW_BUFFERS
) {
221 intel_update_framebuffer(ctx
, ctx
->DrawBuffer
);
222 if (ctx
->DrawBuffer
!= ctx
->ReadBuffer
)
223 intel_update_framebuffer(ctx
, ctx
->ReadBuffer
);
227 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
230 intel_flush_front(struct gl_context
*ctx
)
232 struct brw_context
*brw
= brw_context(ctx
);
233 __DRIcontext
*driContext
= brw
->driContext
;
234 __DRIdrawable
*driDrawable
= driContext
->driDrawablePriv
;
235 __DRIscreen
*const dri_screen
= brw
->screen
->driScrnPriv
;
237 if (brw
->front_buffer_dirty
&& _mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
238 if (flushFront(dri_screen
) && driDrawable
&&
239 driDrawable
->loaderPrivate
) {
241 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
243 * This potentially resolves both front and back buffer. It
244 * is unnecessary to resolve the back, but harms nothing except
245 * performance. And no one cares about front-buffer render
248 intel_resolve_for_dri2_flush(brw
, driDrawable
);
249 intel_batchbuffer_flush(brw
);
251 flushFront(dri_screen
)(driDrawable
, driDrawable
->loaderPrivate
);
253 /* We set the dirty bit in intel_prepare_render() if we're
254 * front buffer rendering once we get there.
256 brw
->front_buffer_dirty
= false;
262 brw_display_shared_buffer(struct brw_context
*brw
)
264 __DRIcontext
*dri_context
= brw
->driContext
;
265 __DRIdrawable
*dri_drawable
= dri_context
->driDrawablePriv
;
266 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
269 if (!brw
->is_shared_buffer_bound
)
272 if (!brw
->is_shared_buffer_dirty
)
275 if (brw
->screen
->has_exec_fence
) {
276 /* This function is always called during a flush operation, so there is
277 * no need to flush again here. But we want to provide a fence_fd to the
278 * loader, and a redundant flush is the easiest way to acquire one.
280 if (intel_batchbuffer_flush_fence(brw
, -1, &fence_fd
))
284 dri_screen
->mutableRenderBuffer
.loader
285 ->displaySharedBuffer(dri_drawable
, fence_fd
,
286 dri_drawable
->loaderPrivate
);
287 brw
->is_shared_buffer_dirty
= false;
291 intel_glFlush(struct gl_context
*ctx
)
293 struct brw_context
*brw
= brw_context(ctx
);
295 intel_batchbuffer_flush(brw
);
296 intel_flush_front(ctx
);
297 brw_display_shared_buffer(brw
);
298 brw
->need_flush_throttle
= true;
302 intel_finish(struct gl_context
* ctx
)
304 struct brw_context
*brw
= brw_context(ctx
);
308 if (brw
->batch
.last_bo
)
309 brw_bo_wait_rendering(brw
->batch
.last_bo
);
313 brw_init_driver_functions(struct brw_context
*brw
,
314 struct dd_function_table
*functions
)
316 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
318 _mesa_init_driver_functions(functions
);
320 /* GLX uses DRI2 invalidate events to handle window resizing.
321 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
322 * which doesn't provide a mechanism for snooping the event queues.
324 * So EGL still relies on viewport hacks to handle window resizing.
325 * This should go away with DRI3000.
327 if (!brw
->driContext
->driScreenPriv
->dri2
.useInvalidate
)
328 functions
->Viewport
= intel_viewport
;
330 functions
->Flush
= intel_glFlush
;
331 functions
->Finish
= intel_finish
;
332 functions
->GetString
= intel_get_string
;
333 functions
->UpdateState
= intel_update_state
;
335 brw_init_draw_functions(functions
);
336 intelInitTextureFuncs(functions
);
337 intelInitTextureImageFuncs(functions
);
338 intelInitTextureCopyImageFuncs(functions
);
339 intelInitCopyImageFuncs(functions
);
340 intelInitClearFuncs(functions
);
341 intelInitBufferFuncs(functions
);
342 intelInitPixelFuncs(functions
);
343 intelInitBufferObjectFuncs(functions
);
344 brw_init_syncobj_functions(functions
);
345 brw_init_object_purgeable_functions(functions
);
347 brwInitFragProgFuncs( functions
);
348 brw_init_common_queryobj_functions(functions
);
349 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
350 hsw_init_queryobj_functions(functions
);
351 else if (devinfo
->gen
>= 6)
352 gen6_init_queryobj_functions(functions
);
354 gen4_init_queryobj_functions(functions
);
355 brw_init_compute_functions(functions
);
356 brw_init_conditional_render_functions(functions
);
358 functions
->GenerateMipmap
= brw_generate_mipmap
;
360 functions
->QueryInternalFormat
= brw_query_internal_format
;
362 functions
->NewTransformFeedback
= brw_new_transform_feedback
;
363 functions
->DeleteTransformFeedback
= brw_delete_transform_feedback
;
364 if (can_do_mi_math_and_lrr(brw
->screen
)) {
365 functions
->BeginTransformFeedback
= hsw_begin_transform_feedback
;
366 functions
->EndTransformFeedback
= hsw_end_transform_feedback
;
367 functions
->PauseTransformFeedback
= hsw_pause_transform_feedback
;
368 functions
->ResumeTransformFeedback
= hsw_resume_transform_feedback
;
369 } else if (devinfo
->gen
>= 7) {
370 functions
->BeginTransformFeedback
= gen7_begin_transform_feedback
;
371 functions
->EndTransformFeedback
= gen7_end_transform_feedback
;
372 functions
->PauseTransformFeedback
= gen7_pause_transform_feedback
;
373 functions
->ResumeTransformFeedback
= gen7_resume_transform_feedback
;
374 functions
->GetTransformFeedbackVertexCount
=
375 brw_get_transform_feedback_vertex_count
;
377 functions
->BeginTransformFeedback
= brw_begin_transform_feedback
;
378 functions
->EndTransformFeedback
= brw_end_transform_feedback
;
379 functions
->PauseTransformFeedback
= brw_pause_transform_feedback
;
380 functions
->ResumeTransformFeedback
= brw_resume_transform_feedback
;
381 functions
->GetTransformFeedbackVertexCount
=
382 brw_get_transform_feedback_vertex_count
;
385 if (devinfo
->gen
>= 6)
386 functions
->GetSamplePosition
= gen6_get_sample_position
;
388 /* GL_ARB_get_program_binary */
389 brw_program_binary_init(brw
->screen
->deviceID
);
390 functions
->GetProgramBinaryDriverSHA1
= brw_get_program_binary_driver_sha1
;
391 functions
->ProgramBinarySerializeDriverBlob
= brw_serialize_program_binary
;
392 functions
->ProgramBinaryDeserializeDriverBlob
=
393 brw_deserialize_program_binary
;
395 if (brw
->screen
->disk_cache
) {
396 functions
->ShaderCacheSerializeDriverBlob
= brw_program_serialize_nir
;
399 functions
->SetBackgroundContext
= brw_set_background_context
;
403 brw_initialize_spirv_supported_capabilities(struct brw_context
*brw
)
405 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
406 struct gl_context
*ctx
= &brw
->ctx
;
408 /* The following SPIR-V capabilities are only supported on gen7+. In theory
409 * you should enable the extension only on gen7+, but just in case let's
412 assert(devinfo
->gen
>= 7);
414 ctx
->Const
.SpirVCapabilities
.atomic_storage
= devinfo
->gen
>= 7;
415 ctx
->Const
.SpirVCapabilities
.draw_parameters
= true;
416 ctx
->Const
.SpirVCapabilities
.float64
= devinfo
->gen
>= 8;
417 ctx
->Const
.SpirVCapabilities
.geometry_streams
= devinfo
->gen
>= 7;
418 ctx
->Const
.SpirVCapabilities
.image_write_without_format
= true;
419 ctx
->Const
.SpirVCapabilities
.int64
= devinfo
->gen
>= 8;
420 ctx
->Const
.SpirVCapabilities
.tessellation
= true;
421 ctx
->Const
.SpirVCapabilities
.transform_feedback
= devinfo
->gen
>= 7;
422 ctx
->Const
.SpirVCapabilities
.variable_pointers
= true;
426 brw_initialize_context_constants(struct brw_context
*brw
)
428 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
429 struct gl_context
*ctx
= &brw
->ctx
;
430 const struct brw_compiler
*compiler
= brw
->screen
->compiler
;
432 const bool stage_exists
[MESA_SHADER_STAGES
] = {
433 [MESA_SHADER_VERTEX
] = true,
434 [MESA_SHADER_TESS_CTRL
] = devinfo
->gen
>= 7,
435 [MESA_SHADER_TESS_EVAL
] = devinfo
->gen
>= 7,
436 [MESA_SHADER_GEOMETRY
] = devinfo
->gen
>= 6,
437 [MESA_SHADER_FRAGMENT
] = true,
438 [MESA_SHADER_COMPUTE
] =
439 (_mesa_is_desktop_gl(ctx
) &&
440 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 1024) ||
441 (ctx
->API
== API_OPENGLES2
&&
442 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 128),
445 unsigned num_stages
= 0;
446 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
451 unsigned max_samplers
=
452 devinfo
->gen
>= 8 || devinfo
->is_haswell
? BRW_MAX_TEX_UNIT
: 16;
454 ctx
->Const
.MaxDualSourceDrawBuffers
= 1;
455 ctx
->Const
.MaxDrawBuffers
= BRW_MAX_DRAW_BUFFERS
;
456 ctx
->Const
.MaxCombinedShaderOutputResources
=
457 MAX_IMAGE_UNITS
+ BRW_MAX_DRAW_BUFFERS
;
459 /* The timestamp register we can read for glGetTimestamp() is
460 * sometimes only 32 bits, before scaling to nanoseconds (depending
463 * Once scaled to nanoseconds the timestamp would roll over at a
464 * non-power-of-two, so an application couldn't use
465 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
466 * report 36 bits and truncate at that (rolling over 5 times as
467 * often as the HW counter), and when the 32-bit counter rolls
468 * over, it happens to also be at a rollover in the reported value
469 * from near (1<<36) to 0.
471 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
472 * rolls over every ~69 seconds.
474 ctx
->Const
.QueryCounterBits
.Timestamp
= 36;
476 ctx
->Const
.MaxTextureCoordUnits
= 8; /* Mesa limit */
477 ctx
->Const
.MaxImageUnits
= MAX_IMAGE_UNITS
;
478 if (devinfo
->gen
>= 7) {
479 ctx
->Const
.MaxRenderbufferSize
= 16384;
480 ctx
->Const
.MaxTextureSize
= 16384;
481 ctx
->Const
.MaxCubeTextureLevels
= 15; /* 16384 */
483 ctx
->Const
.MaxRenderbufferSize
= 8192;
484 ctx
->Const
.MaxTextureSize
= 8192;
485 ctx
->Const
.MaxCubeTextureLevels
= 14; /* 8192 */
487 ctx
->Const
.Max3DTextureLevels
= 12; /* 2048 */
488 ctx
->Const
.MaxArrayTextureLayers
= devinfo
->gen
>= 7 ? 2048 : 512;
489 ctx
->Const
.MaxTextureMbytes
= 1536;
490 ctx
->Const
.MaxTextureRectSize
= devinfo
->gen
>= 7 ? 16384 : 8192;
491 ctx
->Const
.MaxTextureMaxAnisotropy
= 16.0;
492 ctx
->Const
.MaxTextureLodBias
= 15.0;
493 ctx
->Const
.StripTextureBorder
= true;
494 if (devinfo
->gen
>= 7) {
495 ctx
->Const
.MaxProgramTextureGatherComponents
= 4;
496 ctx
->Const
.MinProgramTextureGatherOffset
= -32;
497 ctx
->Const
.MaxProgramTextureGatherOffset
= 31;
498 } else if (devinfo
->gen
== 6) {
499 ctx
->Const
.MaxProgramTextureGatherComponents
= 1;
500 ctx
->Const
.MinProgramTextureGatherOffset
= -8;
501 ctx
->Const
.MaxProgramTextureGatherOffset
= 7;
504 ctx
->Const
.MaxUniformBlockSize
= 65536;
506 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
507 struct gl_program_constants
*prog
= &ctx
->Const
.Program
[i
];
509 if (!stage_exists
[i
])
512 prog
->MaxTextureImageUnits
= max_samplers
;
514 prog
->MaxUniformBlocks
= BRW_MAX_UBO
;
515 prog
->MaxCombinedUniformComponents
=
516 prog
->MaxUniformComponents
+
517 ctx
->Const
.MaxUniformBlockSize
/ 4 * prog
->MaxUniformBlocks
;
519 prog
->MaxAtomicCounters
= MAX_ATOMIC_COUNTERS
;
520 prog
->MaxAtomicBuffers
= BRW_MAX_ABO
;
521 prog
->MaxImageUniforms
= compiler
->scalar_stage
[i
] ? BRW_MAX_IMAGES
: 0;
522 prog
->MaxShaderStorageBlocks
= BRW_MAX_SSBO
;
525 ctx
->Const
.MaxTextureUnits
=
526 MIN2(ctx
->Const
.MaxTextureCoordUnits
,
527 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
);
529 ctx
->Const
.MaxUniformBufferBindings
= num_stages
* BRW_MAX_UBO
;
530 ctx
->Const
.MaxCombinedUniformBlocks
= num_stages
* BRW_MAX_UBO
;
531 ctx
->Const
.MaxCombinedAtomicBuffers
= num_stages
* BRW_MAX_ABO
;
532 ctx
->Const
.MaxCombinedShaderStorageBlocks
= num_stages
* BRW_MAX_SSBO
;
533 ctx
->Const
.MaxShaderStorageBufferBindings
= num_stages
* BRW_MAX_SSBO
;
534 ctx
->Const
.MaxCombinedTextureImageUnits
= num_stages
* max_samplers
;
535 ctx
->Const
.MaxCombinedImageUniforms
= num_stages
* BRW_MAX_IMAGES
;
538 /* Hardware only supports a limited number of transform feedback buffers.
539 * So we need to override the Mesa default (which is based only on software
542 ctx
->Const
.MaxTransformFeedbackBuffers
= BRW_MAX_SOL_BUFFERS
;
544 /* On Gen6, in the worst case, we use up one binding table entry per
545 * transform feedback component (see comments above the definition of
546 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
547 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
548 * BRW_MAX_SOL_BINDINGS.
550 * In "separate components" mode, we need to divide this value by
551 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
552 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
554 ctx
->Const
.MaxTransformFeedbackInterleavedComponents
= BRW_MAX_SOL_BINDINGS
;
555 ctx
->Const
.MaxTransformFeedbackSeparateComponents
=
556 BRW_MAX_SOL_BINDINGS
/ BRW_MAX_SOL_BUFFERS
;
558 ctx
->Const
.AlwaysUseGetTransformFeedbackVertexCount
=
559 !can_do_mi_math_and_lrr(brw
->screen
);
562 const int *msaa_modes
= intel_supported_msaa_modes(brw
->screen
);
563 const int clamp_max_samples
=
564 driQueryOptioni(&brw
->optionCache
, "clamp_max_samples");
566 if (clamp_max_samples
< 0) {
567 max_samples
= msaa_modes
[0];
569 /* Select the largest supported MSAA mode that does not exceed
573 for (int i
= 0; msaa_modes
[i
] != 0; ++i
) {
574 if (msaa_modes
[i
] <= clamp_max_samples
) {
575 max_samples
= msaa_modes
[i
];
581 ctx
->Const
.MaxSamples
= max_samples
;
582 ctx
->Const
.MaxColorTextureSamples
= max_samples
;
583 ctx
->Const
.MaxDepthTextureSamples
= max_samples
;
584 ctx
->Const
.MaxIntegerSamples
= max_samples
;
585 ctx
->Const
.MaxImageSamples
= 0;
587 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
588 * to map indices of rectangular grid to sample numbers within a pixel.
589 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
590 * extension implementation. For more details see the comment above
591 * gen6_set_sample_maps() definition.
593 gen6_set_sample_maps(ctx
);
595 ctx
->Const
.MinLineWidth
= 1.0;
596 ctx
->Const
.MinLineWidthAA
= 1.0;
597 if (devinfo
->gen
>= 6) {
598 ctx
->Const
.MaxLineWidth
= 7.375;
599 ctx
->Const
.MaxLineWidthAA
= 7.375;
600 ctx
->Const
.LineWidthGranularity
= 0.125;
602 ctx
->Const
.MaxLineWidth
= 7.0;
603 ctx
->Const
.MaxLineWidthAA
= 7.0;
604 ctx
->Const
.LineWidthGranularity
= 0.5;
607 /* For non-antialiased lines, we have to round the line width to the
608 * nearest whole number. Make sure that we don't advertise a line
609 * width that, when rounded, will be beyond the actual hardware
612 assert(roundf(ctx
->Const
.MaxLineWidth
) <= ctx
->Const
.MaxLineWidth
);
614 ctx
->Const
.MinPointSize
= 1.0;
615 ctx
->Const
.MinPointSizeAA
= 1.0;
616 ctx
->Const
.MaxPointSize
= 255.0;
617 ctx
->Const
.MaxPointSizeAA
= 255.0;
618 ctx
->Const
.PointSizeGranularity
= 1.0;
620 if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
621 ctx
->Const
.MaxClipPlanes
= 8;
623 ctx
->Const
.GLSLTessLevelsAsInputs
= true;
624 ctx
->Const
.PrimitiveRestartForPatches
= true;
626 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeInstructions
= 16 * 1024;
627 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxAluInstructions
= 0;
628 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexInstructions
= 0;
629 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexIndirections
= 0;
630 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAluInstructions
= 0;
631 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexInstructions
= 0;
632 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexIndirections
= 0;
633 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAttribs
= 16;
634 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTemps
= 256;
635 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAddressRegs
= 1;
636 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
= 1024;
637 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
=
638 MIN2(ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
,
639 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
);
641 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeInstructions
= 1024;
642 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAluInstructions
= 1024;
643 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexInstructions
= 1024;
644 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexIndirections
= 1024;
645 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAttribs
= 12;
646 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTemps
= 256;
647 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAddressRegs
= 0;
648 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
= 1024;
649 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
=
650 MIN2(ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
,
651 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
);
653 /* Fragment shaders use real, 32-bit twos-complement integers for all
656 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMin
= 31;
657 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMax
= 30;
658 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.Precision
= 0;
659 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
660 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
662 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMin
= 31;
663 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMax
= 30;
664 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.Precision
= 0;
665 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
666 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
668 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
669 * but we're not sure how it's actually done for vertex order,
670 * that affect provoking vertex decision. Always use last vertex
671 * convention for quad primitive which works as expected for now.
673 if (devinfo
->gen
>= 6)
674 ctx
->Const
.QuadsFollowProvokingVertexConvention
= false;
676 ctx
->Const
.NativeIntegers
= true;
678 /* Regarding the CMP instruction, the Ivybridge PRM says:
680 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
681 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
682 * 0xFFFFFFFF) is assigned to dst."
684 * but PRMs for earlier generations say
686 * "In dword format, one GRF may store up to 8 results. When the register
687 * is used later as a vector of Booleans, as only LSB at each channel
688 * contains meaning [sic] data, software should make sure all higher bits
689 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
691 * We select the representation of a true boolean uniform to be ~0, and fix
692 * the results of Gen <= 5 CMP instruction's with -(result & 1).
694 ctx
->Const
.UniformBooleanTrue
= ~0;
696 /* From the gen4 PRM, volume 4 page 127:
698 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
699 * the base address of the first element of the surface, computed in
700 * software by adding the surface base address to the byte offset of
701 * the element in the buffer."
703 * However, unaligned accesses are slower, so enforce buffer alignment.
705 * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
706 * restriction: the start of the buffer needs to be 32B aligned.
708 ctx
->Const
.UniformBufferOffsetAlignment
= 32;
710 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
711 * that we can safely have the CPU and GPU writing the same SSBO on
712 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
713 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
714 * be updating disjoint regions of the buffer simultaneously and that will
715 * break if the regions overlap the same cacheline.
717 ctx
->Const
.ShaderStorageBufferOffsetAlignment
= 64;
718 ctx
->Const
.TextureBufferOffsetAlignment
= 16;
719 ctx
->Const
.MaxTextureBufferSize
= 128 * 1024 * 1024;
721 if (devinfo
->gen
>= 6) {
722 ctx
->Const
.MaxVarying
= 32;
723 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxOutputComponents
= 128;
724 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxInputComponents
=
725 compiler
->scalar_stage
[MESA_SHADER_GEOMETRY
] ? 128 : 64;
726 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxOutputComponents
= 128;
727 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxInputComponents
= 128;
728 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxInputComponents
= 128;
729 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxOutputComponents
= 128;
730 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxInputComponents
= 128;
731 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxOutputComponents
= 128;
734 /* We want the GLSL compiler to emit code that uses condition codes */
735 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
736 ctx
->Const
.ShaderCompilerOptions
[i
] =
737 brw
->screen
->compiler
->glsl_compiler_options
[i
];
740 if (devinfo
->gen
>= 7) {
741 ctx
->Const
.MaxViewportWidth
= 32768;
742 ctx
->Const
.MaxViewportHeight
= 32768;
745 /* ARB_viewport_array, OES_viewport_array */
746 if (devinfo
->gen
>= 6) {
747 ctx
->Const
.MaxViewports
= GEN6_NUM_VIEWPORTS
;
748 ctx
->Const
.ViewportSubpixelBits
= 8;
750 /* Cast to float before negating because MaxViewportWidth is unsigned.
752 ctx
->Const
.ViewportBounds
.Min
= -(float)ctx
->Const
.MaxViewportWidth
;
753 ctx
->Const
.ViewportBounds
.Max
= ctx
->Const
.MaxViewportWidth
;
756 /* ARB_gpu_shader5 */
757 if (devinfo
->gen
>= 7)
758 ctx
->Const
.MaxVertexStreams
= MIN2(4, MAX_VERTEX_STREAMS
);
760 /* ARB_framebuffer_no_attachments */
761 ctx
->Const
.MaxFramebufferWidth
= 16384;
762 ctx
->Const
.MaxFramebufferHeight
= 16384;
763 ctx
->Const
.MaxFramebufferLayers
= ctx
->Const
.MaxArrayTextureLayers
;
764 ctx
->Const
.MaxFramebufferSamples
= max_samples
;
766 /* OES_primitive_bounding_box */
767 ctx
->Const
.NoPrimitiveBoundingBoxOutput
= true;
769 /* TODO: We should be able to use STD430 packing by default on all hardware
770 * but some piglit tests [1] currently fail on SNB when this is enabled.
771 * The problem is the messages we're using for doing uniform pulls
772 * in the vec4 back-end on SNB is the OWORD block load instruction, which
773 * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the
774 * sampler which doesn't have these restrictions.
776 * In the scalar back-end, we use the sampler for dynamic uniform loads and
777 * pull an entire cache line at a time for constant offset loads both of
778 * which support almost any alignment.
780 * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
782 if (devinfo
->gen
>= 7)
783 ctx
->Const
.UseSTD430AsDefaultPacking
= true;
785 if (!(ctx
->Const
.ContextFlags
& GL_CONTEXT_FLAG_DEBUG_BIT
))
786 ctx
->Const
.AllowMappedBuffersDuringExecution
= true;
788 /* GL_ARB_get_program_binary */
789 ctx
->Const
.NumProgramBinaryFormats
= 1;
793 brw_initialize_cs_context_constants(struct brw_context
*brw
)
795 struct gl_context
*ctx
= &brw
->ctx
;
796 const struct intel_screen
*screen
= brw
->screen
;
797 struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
799 /* FINISHME: Do this for all platforms that the kernel supports */
800 if (devinfo
->is_cherryview
&&
801 screen
->subslice_total
> 0 && screen
->eu_total
> 0) {
802 /* Logical CS threads = EUs per subslice * 7 threads per EU */
803 uint32_t max_cs_threads
= screen
->eu_total
/ screen
->subslice_total
* 7;
805 /* Fuse configurations may give more threads than expected, never less. */
806 if (max_cs_threads
> devinfo
->max_cs_threads
)
807 devinfo
->max_cs_threads
= max_cs_threads
;
810 /* Maximum number of scalar compute shader invocations that can be run in
811 * parallel in the same subslice assuming SIMD32 dispatch.
813 * We don't advertise more than 64 threads, because we are limited to 64 by
814 * our usage of thread_width_max in the gpgpu walker command. This only
815 * currently impacts Haswell, which otherwise might be able to advertise 70
816 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
817 * required the number of invocation needed for ARB_compute_shader.
819 const unsigned max_threads
= MIN2(64, devinfo
->max_cs_threads
);
820 const uint32_t max_invocations
= 32 * max_threads
;
821 ctx
->Const
.MaxComputeWorkGroupSize
[0] = max_invocations
;
822 ctx
->Const
.MaxComputeWorkGroupSize
[1] = max_invocations
;
823 ctx
->Const
.MaxComputeWorkGroupSize
[2] = max_invocations
;
824 ctx
->Const
.MaxComputeWorkGroupInvocations
= max_invocations
;
825 ctx
->Const
.MaxComputeSharedMemorySize
= 64 * 1024;
829 * Process driconf (drirc) options, setting appropriate context flags.
831 * intelInitExtensions still pokes at optionCache directly, in order to
832 * avoid advertising various extensions. No flags are set, so it makes
833 * sense to continue doing that there.
836 brw_process_driconf_options(struct brw_context
*brw
)
838 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
839 struct gl_context
*ctx
= &brw
->ctx
;
841 driOptionCache
*options
= &brw
->optionCache
;
842 driParseConfigFiles(options
, &brw
->screen
->optionCache
,
843 brw
->driContext
->driScreenPriv
->myNum
,
846 int bo_reuse_mode
= driQueryOptioni(options
, "bo_reuse");
847 switch (bo_reuse_mode
) {
848 case DRI_CONF_BO_REUSE_DISABLED
:
850 case DRI_CONF_BO_REUSE_ALL
:
851 brw_bufmgr_enable_reuse(brw
->bufmgr
);
855 if (INTEL_DEBUG
& DEBUG_NO_HIZ
) {
856 brw
->has_hiz
= false;
857 /* On gen6, you can only do separate stencil with HIZ. */
858 if (devinfo
->gen
== 6)
859 brw
->has_separate_stencil
= false;
862 if (driQueryOptionb(options
, "mesa_no_error"))
863 ctx
->Const
.ContextFlags
|= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR
;
865 if (driQueryOptionb(options
, "always_flush_batch")) {
866 fprintf(stderr
, "flushing batchbuffer before/after each draw call\n");
867 brw
->always_flush_batch
= true;
870 if (driQueryOptionb(options
, "always_flush_cache")) {
871 fprintf(stderr
, "flushing GPU caches before/after each draw call\n");
872 brw
->always_flush_cache
= true;
875 if (driQueryOptionb(options
, "disable_throttling")) {
876 fprintf(stderr
, "disabling flush throttling\n");
877 brw
->disable_throttling
= true;
880 brw
->precompile
= driQueryOptionb(&brw
->optionCache
, "shader_precompile");
882 if (driQueryOptionb(&brw
->optionCache
, "precise_trig"))
883 brw
->screen
->compiler
->precise_trig
= true;
885 ctx
->Const
.ForceGLSLExtensionsWarn
=
886 driQueryOptionb(options
, "force_glsl_extensions_warn");
888 ctx
->Const
.ForceGLSLVersion
=
889 driQueryOptioni(options
, "force_glsl_version");
891 ctx
->Const
.DisableGLSLLineContinuations
=
892 driQueryOptionb(options
, "disable_glsl_line_continuations");
894 ctx
->Const
.AllowGLSLExtensionDirectiveMidShader
=
895 driQueryOptionb(options
, "allow_glsl_extension_directive_midshader");
897 ctx
->Const
.AllowGLSLBuiltinVariableRedeclaration
=
898 driQueryOptionb(options
, "allow_glsl_builtin_variable_redeclaration");
900 ctx
->Const
.AllowHigherCompatVersion
=
901 driQueryOptionb(options
, "allow_higher_compat_version");
903 ctx
->Const
.ForceGLSLAbsSqrt
=
904 driQueryOptionb(options
, "force_glsl_abs_sqrt");
906 ctx
->Const
.GLSLZeroInit
= driQueryOptionb(options
, "glsl_zero_init");
908 brw
->dual_color_blend_by_location
=
909 driQueryOptionb(options
, "dual_color_blend_by_location");
911 ctx
->Const
.AllowGLSLCrossStageInterpolationMismatch
=
912 driQueryOptionb(options
, "allow_glsl_cross_stage_interpolation_mismatch");
914 ctx
->Const
.dri_config_options_sha1
= ralloc_array(brw
, unsigned char, 20);
915 driComputeOptionsSha1(&brw
->screen
->optionCache
,
916 ctx
->Const
.dri_config_options_sha1
);
920 brwCreateContext(gl_api api
,
921 const struct gl_config
*mesaVis
,
922 __DRIcontext
*driContextPriv
,
923 const struct __DriverContextConfig
*ctx_config
,
924 unsigned *dri_ctx_error
,
925 void *sharedContextPrivate
)
927 struct gl_context
*shareCtx
= (struct gl_context
*) sharedContextPrivate
;
928 struct intel_screen
*screen
= driContextPriv
->driScreenPriv
->driverPrivate
;
929 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
930 struct dd_function_table functions
;
932 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
933 * provides us with context reset notifications.
935 uint32_t allowed_flags
= __DRI_CTX_FLAG_DEBUG
|
936 __DRI_CTX_FLAG_FORWARD_COMPATIBLE
|
937 __DRI_CTX_FLAG_NO_ERROR
;
939 if (screen
->has_context_reset_notification
)
940 allowed_flags
|= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
;
942 if (ctx_config
->flags
& ~allowed_flags
) {
943 *dri_ctx_error
= __DRI_CTX_ERROR_UNKNOWN_FLAG
;
947 if (ctx_config
->attribute_mask
&
948 ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY
|
949 __DRIVER_CONTEXT_ATTRIB_PRIORITY
)) {
950 *dri_ctx_error
= __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE
;
955 ((ctx_config
->attribute_mask
& __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY
) &&
956 ctx_config
->reset_strategy
!= __DRI_CTX_RESET_NO_NOTIFICATION
);
958 struct brw_context
*brw
= rzalloc(NULL
, struct brw_context
);
960 fprintf(stderr
, "%s: failed to alloc context\n", __func__
);
961 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
965 driContextPriv
->driverPrivate
= brw
;
966 brw
->driContext
= driContextPriv
;
967 brw
->screen
= screen
;
968 brw
->bufmgr
= screen
->bufmgr
;
970 brw
->has_hiz
= devinfo
->has_hiz_and_separate_stencil
;
971 brw
->has_separate_stencil
= devinfo
->has_hiz_and_separate_stencil
;
973 brw
->has_swizzling
= screen
->hw_has_swizzling
;
975 brw
->isl_dev
= screen
->isl_dev
;
977 brw
->vs
.base
.stage
= MESA_SHADER_VERTEX
;
978 brw
->tcs
.base
.stage
= MESA_SHADER_TESS_CTRL
;
979 brw
->tes
.base
.stage
= MESA_SHADER_TESS_EVAL
;
980 brw
->gs
.base
.stage
= MESA_SHADER_GEOMETRY
;
981 brw
->wm
.base
.stage
= MESA_SHADER_FRAGMENT
;
982 brw
->cs
.base
.stage
= MESA_SHADER_COMPUTE
;
984 brw_init_driver_functions(brw
, &functions
);
987 functions
.GetGraphicsResetStatus
= brw_get_graphics_reset_status
;
989 struct gl_context
*ctx
= &brw
->ctx
;
991 if (!_mesa_initialize_context(ctx
, api
, mesaVis
, shareCtx
, &functions
)) {
992 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
993 fprintf(stderr
, "%s: failed to init mesa context\n", __func__
);
994 intelDestroyContext(driContextPriv
);
998 driContextSetFlags(ctx
, ctx_config
->flags
);
1000 /* Initialize the software rasterizer and helper modules.
1002 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1003 * software fallbacks (which we have to support on legacy GL to do weird
1004 * glDrawPixels(), glBitmap(), and other functions).
1006 if (api
!= API_OPENGL_CORE
&& api
!= API_OPENGLES2
) {
1007 _swrast_CreateContext(ctx
);
1010 _vbo_CreateContext(ctx
);
1011 if (ctx
->swrast_context
) {
1012 _tnl_CreateContext(ctx
);
1013 TNL_CONTEXT(ctx
)->Driver
.RunPipeline
= _tnl_run_pipeline
;
1014 _swsetup_CreateContext(ctx
);
1016 /* Configure swrast to match hardware characteristics: */
1017 _swrast_allow_pixel_fog(ctx
, false);
1018 _swrast_allow_vertex_fog(ctx
, true);
1021 _mesa_meta_init(ctx
);
1023 brw_process_driconf_options(brw
);
1025 if (INTEL_DEBUG
& DEBUG_PERF
)
1026 brw
->perf_debug
= true;
1028 brw_initialize_cs_context_constants(brw
);
1029 brw_initialize_context_constants(brw
);
1031 ctx
->Const
.ResetStrategy
= notify_reset
1032 ? GL_LOSE_CONTEXT_ON_RESET_ARB
: GL_NO_RESET_NOTIFICATION_ARB
;
1034 /* Reinitialize the context point state. It depends on ctx->Const values. */
1035 _mesa_init_point(ctx
);
1037 intel_fbo_init(brw
);
1039 intel_batchbuffer_init(brw
);
1041 /* Create a new hardware context. Using a hardware context means that
1042 * our GPU state will be saved/restored on context switch, allowing us
1043 * to assume that the GPU is in the same state we left it in.
1045 * This is required for transform feedback buffer offsets, query objects,
1046 * and also allows us to reduce how much state we have to emit.
1048 brw
->hw_ctx
= brw_create_hw_context(brw
->bufmgr
);
1049 if (!brw
->hw_ctx
&& devinfo
->gen
>= 6) {
1050 fprintf(stderr
, "Failed to create hardware context.\n");
1051 intelDestroyContext(driContextPriv
);
1056 int hw_priority
= GEN_CONTEXT_MEDIUM_PRIORITY
;
1057 if (ctx_config
->attribute_mask
& __DRIVER_CONTEXT_ATTRIB_PRIORITY
) {
1058 switch (ctx_config
->priority
) {
1059 case __DRI_CTX_PRIORITY_LOW
:
1060 hw_priority
= GEN_CONTEXT_LOW_PRIORITY
;
1062 case __DRI_CTX_PRIORITY_HIGH
:
1063 hw_priority
= GEN_CONTEXT_HIGH_PRIORITY
;
1067 if (hw_priority
!= I915_CONTEXT_DEFAULT_PRIORITY
&&
1068 brw_hw_context_set_priority(brw
->bufmgr
, brw
->hw_ctx
, hw_priority
)) {
1070 "Failed to set priority [%d:%d] for hardware context.\n",
1071 ctx_config
->priority
, hw_priority
);
1072 intelDestroyContext(driContextPriv
);
1077 if (brw_init_pipe_control(brw
, devinfo
)) {
1078 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
1079 intelDestroyContext(driContextPriv
);
1083 brw_upload_init(&brw
->upload
, brw
->bufmgr
, 65536);
1085 brw_init_state(brw
);
1087 intelInitExtensions(ctx
);
1089 brw_init_surface_formats(brw
);
1091 brw_blorp_init(brw
);
1093 brw
->urb
.size
= devinfo
->urb
.size
;
1095 if (devinfo
->gen
== 6)
1096 brw
->urb
.gs_present
= false;
1098 brw
->prim_restart
.in_progress
= false;
1099 brw
->prim_restart
.enable_cut_index
= false;
1100 brw
->gs
.enabled
= false;
1101 brw
->clip
.viewport_count
= 1;
1103 brw
->predicate
.state
= BRW_PREDICATE_STATE_RENDER
;
1105 brw
->max_gtt_map_object_size
= screen
->max_gtt_map_object_size
;
1107 ctx
->VertexProgram
._MaintainTnlProgram
= true;
1108 ctx
->FragmentProgram
._MaintainTexEnvProgram
= true;
1110 brw_draw_init( brw
);
1112 if ((ctx_config
->flags
& __DRI_CTX_FLAG_DEBUG
) != 0) {
1113 /* Turn on some extra GL_ARB_debug_output generation. */
1114 brw
->perf_debug
= true;
1117 if ((ctx_config
->flags
& __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
) != 0) {
1118 ctx
->Const
.ContextFlags
|= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB
;
1119 ctx
->Const
.RobustAccess
= GL_TRUE
;
1122 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
1123 brw_init_shader_time(brw
);
1125 _mesa_override_extensions(ctx
);
1126 _mesa_compute_version(ctx
);
1128 /* GL_ARB_gl_spirv */
1129 if (ctx
->Extensions
.ARB_gl_spirv
)
1130 brw_initialize_spirv_supported_capabilities(brw
);
1132 _mesa_initialize_dispatch_tables(ctx
);
1133 _mesa_initialize_vbo_vtxfmt(ctx
);
1135 if (ctx
->Extensions
.INTEL_performance_query
)
1136 brw_init_performance_queries(brw
);
1138 vbo_use_buffer_objects(ctx
);
1139 vbo_always_unmap_buffers(ctx
);
1141 brw
->ctx
.Cache
= brw
->screen
->disk_cache
;
1143 if (driContextPriv
->driScreenPriv
->dri2
.backgroundCallable
&&
1144 driQueryOptionb(&screen
->optionCache
, "mesa_glthread")) {
1145 /* Loader supports multithreading, and so do we. */
1146 _mesa_glthread_init(ctx
);
1153 intelDestroyContext(__DRIcontext
* driContextPriv
)
1155 struct brw_context
*brw
=
1156 (struct brw_context
*) driContextPriv
->driverPrivate
;
1157 struct gl_context
*ctx
= &brw
->ctx
;
1159 GET_CURRENT_CONTEXT(curctx
);
1161 if (curctx
== NULL
) {
1162 /* No current context, but we need one to release
1163 * renderbuffer surface when we release framebuffer.
1164 * So temporarily bind the context.
1166 _mesa_make_current(ctx
, NULL
, NULL
);
1169 _mesa_glthread_destroy(&brw
->ctx
);
1171 _mesa_meta_free(&brw
->ctx
);
1173 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
1174 /* Force a report. */
1175 brw
->shader_time
.report_time
= 0;
1177 brw_collect_and_report_shader_time(brw
);
1178 brw_destroy_shader_time(brw
);
1181 blorp_finish(&brw
->blorp
);
1183 brw_destroy_state(brw
);
1184 brw_draw_destroy(brw
);
1186 brw_bo_unreference(brw
->curbe
.curbe_bo
);
1188 brw_bo_unreference(brw
->vs
.base
.scratch_bo
);
1189 brw_bo_unreference(brw
->tcs
.base
.scratch_bo
);
1190 brw_bo_unreference(brw
->tes
.base
.scratch_bo
);
1191 brw_bo_unreference(brw
->gs
.base
.scratch_bo
);
1192 brw_bo_unreference(brw
->wm
.base
.scratch_bo
);
1194 brw_bo_unreference(brw
->vs
.base
.push_const_bo
);
1195 brw_bo_unreference(brw
->tcs
.base
.push_const_bo
);
1196 brw_bo_unreference(brw
->tes
.base
.push_const_bo
);
1197 brw_bo_unreference(brw
->gs
.base
.push_const_bo
);
1198 brw_bo_unreference(brw
->wm
.base
.push_const_bo
);
1200 brw_destroy_hw_context(brw
->bufmgr
, brw
->hw_ctx
);
1202 if (ctx
->swrast_context
) {
1203 _swsetup_DestroyContext(&brw
->ctx
);
1204 _tnl_DestroyContext(&brw
->ctx
);
1206 _vbo_DestroyContext(&brw
->ctx
);
1208 if (ctx
->swrast_context
)
1209 _swrast_DestroyContext(&brw
->ctx
);
1211 brw_fini_pipe_control(brw
);
1212 intel_batchbuffer_free(&brw
->batch
);
1214 brw_bo_unreference(brw
->throttle_batch
[1]);
1215 brw_bo_unreference(brw
->throttle_batch
[0]);
1216 brw
->throttle_batch
[1] = NULL
;
1217 brw
->throttle_batch
[0] = NULL
;
1219 driDestroyOptionCache(&brw
->optionCache
);
1221 /* free the Mesa context */
1222 _mesa_free_context_data(&brw
->ctx
, true);
1225 driContextPriv
->driverPrivate
= NULL
;
1229 intelUnbindContext(__DRIcontext
* driContextPriv
)
1231 GET_CURRENT_CONTEXT(ctx
);
1232 _mesa_glthread_finish(ctx
);
1234 /* Unset current context and dispath table */
1235 _mesa_make_current(NULL
, NULL
, NULL
);
1241 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1242 * on window system framebuffers.
1244 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1245 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1246 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1247 * for a visual where you're guaranteed to be capable, but it turns out that
1248 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1249 * incapable ones, because there's no difference between the two in resources
1250 * used. Applications thus get built that accidentally rely on the default
1251 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1254 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1255 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1256 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1257 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1258 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1259 * and get no sRGB encode (assuming that both kinds of visual are available).
1260 * Thus our choice to support sRGB by default on our visuals for desktop would
1261 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1263 * Unfortunately, renderbuffer setup happens before a context is created. So
1264 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1265 * context (without an sRGB visual), we go turn that back off before anyone
1269 intel_gles3_srgb_workaround(struct brw_context
*brw
,
1270 struct gl_framebuffer
*fb
)
1272 struct gl_context
*ctx
= &brw
->ctx
;
1274 if (_mesa_is_desktop_gl(ctx
) || !fb
->Visual
.sRGBCapable
)
1277 for (int i
= 0; i
< BUFFER_COUNT
; i
++) {
1278 struct gl_renderbuffer
*rb
= fb
->Attachment
[i
].Renderbuffer
;
1280 /* Check if sRGB was specifically asked for. */
1281 struct intel_renderbuffer
*irb
= intel_get_renderbuffer(fb
, i
);
1282 if (irb
&& irb
->need_srgb
)
1286 rb
->Format
= _mesa_get_srgb_format_linear(rb
->Format
);
1288 /* Disable sRGB from framebuffers that are not compatible. */
1289 fb
->Visual
.sRGBCapable
= false;
1293 intelMakeCurrent(__DRIcontext
* driContextPriv
,
1294 __DRIdrawable
* driDrawPriv
,
1295 __DRIdrawable
* driReadPriv
)
1297 struct brw_context
*brw
;
1300 brw
= (struct brw_context
*) driContextPriv
->driverPrivate
;
1304 if (driContextPriv
) {
1305 struct gl_context
*ctx
= &brw
->ctx
;
1306 struct gl_framebuffer
*fb
, *readFb
;
1308 if (driDrawPriv
== NULL
) {
1309 fb
= _mesa_get_incomplete_framebuffer();
1311 fb
= driDrawPriv
->driverPrivate
;
1312 driContextPriv
->dri2
.draw_stamp
= driDrawPriv
->dri2
.stamp
- 1;
1315 if (driReadPriv
== NULL
) {
1316 readFb
= _mesa_get_incomplete_framebuffer();
1318 readFb
= driReadPriv
->driverPrivate
;
1319 driContextPriv
->dri2
.read_stamp
= driReadPriv
->dri2
.stamp
- 1;
1322 /* The sRGB workaround changes the renderbuffer's format. We must change
1323 * the format before the renderbuffer's miptree get's allocated, otherwise
1324 * the formats of the renderbuffer and its miptree will differ.
1326 intel_gles3_srgb_workaround(brw
, fb
);
1327 intel_gles3_srgb_workaround(brw
, readFb
);
1329 /* If the context viewport hasn't been initialized, force a call out to
1330 * the loader to get buffers so we have a drawable size for the initial
1332 if (!brw
->ctx
.ViewportInitialized
)
1333 intel_prepare_render(brw
);
1335 _mesa_make_current(ctx
, fb
, readFb
);
1337 GET_CURRENT_CONTEXT(ctx
);
1338 _mesa_glthread_finish(ctx
);
1339 _mesa_make_current(NULL
, NULL
, NULL
);
1346 intel_resolve_for_dri2_flush(struct brw_context
*brw
,
1347 __DRIdrawable
*drawable
)
1349 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1351 if (devinfo
->gen
< 6) {
1352 /* MSAA and fast color clear are not supported, so don't waste time
1353 * checking whether a resolve is needed.
1358 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1359 struct intel_renderbuffer
*rb
;
1361 /* Usually, only the back buffer will need to be downsampled. However,
1362 * the front buffer will also need it if the user has rendered into it.
1364 static const gl_buffer_index buffers
[2] = {
1369 for (int i
= 0; i
< 2; ++i
) {
1370 rb
= intel_get_renderbuffer(fb
, buffers
[i
]);
1371 if (rb
== NULL
|| rb
->mt
== NULL
)
1373 if (rb
->mt
->surf
.samples
== 1) {
1374 assert(rb
->mt_layer
== 0 && rb
->mt_level
== 0 &&
1375 rb
->layer_count
== 1);
1376 intel_miptree_prepare_external(brw
, rb
->mt
);
1378 intel_renderbuffer_downsample(brw
, rb
);
1380 /* Call prepare_external on the single-sample miptree to do any
1381 * needed resolves prior to handing it off to the window system.
1382 * This is needed in the case that rb->singlesample_mt is Y-tiled
1383 * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In
1384 * this case, the MSAA resolve above will write compressed data into
1385 * rb->singlesample_mt.
1387 * TODO: Some day, if we decide to care about the tiny performance
1388 * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1389 * we could detect this case and just allocate the single-sampled
1390 * miptree without aux. However, that would be a lot of plumbing and
1391 * this is a rather exotic case so it's not really worth it.
1393 intel_miptree_prepare_external(brw
, rb
->singlesample_mt
);
1399 intel_bits_per_pixel(const struct intel_renderbuffer
*rb
)
1401 return _mesa_get_format_bytes(intel_rb_format(rb
)) * 8;
1405 intel_query_dri2_buffers(struct brw_context
*brw
,
1406 __DRIdrawable
*drawable
,
1407 __DRIbuffer
**buffers
,
1411 intel_process_dri2_buffer(struct brw_context
*brw
,
1412 __DRIdrawable
*drawable
,
1413 __DRIbuffer
*buffer
,
1414 struct intel_renderbuffer
*rb
,
1415 const char *buffer_name
);
1418 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
);
1421 intel_update_dri2_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1423 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1424 struct intel_renderbuffer
*rb
;
1425 __DRIbuffer
*buffers
= NULL
;
1427 const char *region_name
;
1429 /* Set this up front, so that in case our buffers get invalidated
1430 * while we're getting new buffers, we don't clobber the stamp and
1431 * thus ignore the invalidate. */
1432 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1434 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1435 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1437 intel_query_dri2_buffers(brw
, drawable
, &buffers
, &count
);
1439 if (buffers
== NULL
)
1442 for (int i
= 0; i
< count
; i
++) {
1443 switch (buffers
[i
].attachment
) {
1444 case __DRI_BUFFER_FRONT_LEFT
:
1445 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1446 region_name
= "dri2 front buffer";
1449 case __DRI_BUFFER_FAKE_FRONT_LEFT
:
1450 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1451 region_name
= "dri2 fake front buffer";
1454 case __DRI_BUFFER_BACK_LEFT
:
1455 rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1456 region_name
= "dri2 back buffer";
1459 case __DRI_BUFFER_DEPTH
:
1460 case __DRI_BUFFER_HIZ
:
1461 case __DRI_BUFFER_DEPTH_STENCIL
:
1462 case __DRI_BUFFER_STENCIL
:
1463 case __DRI_BUFFER_ACCUM
:
1466 "unhandled buffer attach event, attachment type %d\n",
1467 buffers
[i
].attachment
);
1471 intel_process_dri2_buffer(brw
, drawable
, &buffers
[i
], rb
, region_name
);
1477 intel_update_renderbuffers(__DRIcontext
*context
, __DRIdrawable
*drawable
)
1479 struct brw_context
*brw
= context
->driverPrivate
;
1480 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1482 /* Set this up front, so that in case our buffers get invalidated
1483 * while we're getting new buffers, we don't clobber the stamp and
1484 * thus ignore the invalidate. */
1485 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1487 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1488 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1490 if (dri_screen
->image
.loader
)
1491 intel_update_image_buffers(brw
, drawable
);
1493 intel_update_dri2_buffers(brw
, drawable
);
1495 driUpdateFramebufferSize(&brw
->ctx
, drawable
);
1499 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1500 * state is required.
1503 intel_prepare_render(struct brw_context
*brw
)
1505 struct gl_context
*ctx
= &brw
->ctx
;
1506 __DRIcontext
*driContext
= brw
->driContext
;
1507 __DRIdrawable
*drawable
;
1509 drawable
= driContext
->driDrawablePriv
;
1510 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.draw_stamp
) {
1511 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1512 intel_update_renderbuffers(driContext
, drawable
);
1513 driContext
->dri2
.draw_stamp
= drawable
->dri2
.stamp
;
1516 drawable
= driContext
->driReadablePriv
;
1517 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.read_stamp
) {
1518 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1519 intel_update_renderbuffers(driContext
, drawable
);
1520 driContext
->dri2
.read_stamp
= drawable
->dri2
.stamp
;
1523 /* If we're currently rendering to the front buffer, the rendering
1524 * that will happen next will probably dirty the front buffer. So
1525 * mark it as dirty here.
1527 if (_mesa_is_front_buffer_drawing(ctx
->DrawBuffer
))
1528 brw
->front_buffer_dirty
= true;
1530 if (brw
->is_shared_buffer_bound
) {
1531 /* Subsequent rendering will probably dirty the shared buffer. */
1532 brw
->is_shared_buffer_dirty
= true;
1537 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1539 * To determine which DRI buffers to request, examine the renderbuffers
1540 * attached to the drawable's framebuffer. Then request the buffers with
1541 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1543 * This is called from intel_update_renderbuffers().
1545 * \param drawable Drawable whose buffers are queried.
1546 * \param buffers [out] List of buffers returned by DRI2 query.
1547 * \param buffer_count [out] Number of buffers returned.
1549 * \see intel_update_renderbuffers()
1550 * \see DRI2GetBuffers()
1551 * \see DRI2GetBuffersWithFormat()
1554 intel_query_dri2_buffers(struct brw_context
*brw
,
1555 __DRIdrawable
*drawable
,
1556 __DRIbuffer
**buffers
,
1559 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1560 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1562 unsigned attachments
[8];
1564 struct intel_renderbuffer
*front_rb
;
1565 struct intel_renderbuffer
*back_rb
;
1567 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1568 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1570 memset(attachments
, 0, sizeof(attachments
));
1571 if ((_mesa_is_front_buffer_drawing(fb
) ||
1572 _mesa_is_front_buffer_reading(fb
) ||
1573 !back_rb
) && front_rb
) {
1574 /* If a fake front buffer is in use, then querying for
1575 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1576 * the real front buffer to the fake front buffer. So before doing the
1577 * query, we need to make sure all the pending drawing has landed in the
1578 * real front buffer.
1580 intel_batchbuffer_flush(brw
);
1581 intel_flush_front(&brw
->ctx
);
1583 attachments
[i
++] = __DRI_BUFFER_FRONT_LEFT
;
1584 attachments
[i
++] = intel_bits_per_pixel(front_rb
);
1585 } else if (front_rb
&& brw
->front_buffer_dirty
) {
1586 /* We have pending front buffer rendering, but we aren't querying for a
1587 * front buffer. If the front buffer we have is a fake front buffer,
1588 * the X server is going to throw it away when it processes the query.
1589 * So before doing the query, make sure all the pending drawing has
1590 * landed in the real front buffer.
1592 intel_batchbuffer_flush(brw
);
1593 intel_flush_front(&brw
->ctx
);
1597 attachments
[i
++] = __DRI_BUFFER_BACK_LEFT
;
1598 attachments
[i
++] = intel_bits_per_pixel(back_rb
);
1601 assert(i
<= ARRAY_SIZE(attachments
));
1604 dri_screen
->dri2
.loader
->getBuffersWithFormat(drawable
,
1609 drawable
->loaderPrivate
);
1613 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1615 * This is called from intel_update_renderbuffers().
1618 * DRI buffers whose attachment point is DRI2BufferStencil or
1619 * DRI2BufferDepthStencil are handled as special cases.
1621 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1622 * that is passed to brw_bo_gem_create_from_name().
1624 * \see intel_update_renderbuffers()
1627 intel_process_dri2_buffer(struct brw_context
*brw
,
1628 __DRIdrawable
*drawable
,
1629 __DRIbuffer
*buffer
,
1630 struct intel_renderbuffer
*rb
,
1631 const char *buffer_name
)
1633 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1639 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1641 /* We try to avoid closing and reopening the same BO name, because the first
1642 * use of a mapping of the buffer involves a bunch of page faulting which is
1643 * moderately expensive.
1645 struct intel_mipmap_tree
*last_mt
;
1646 if (num_samples
== 0)
1649 last_mt
= rb
->singlesample_mt
;
1651 uint32_t old_name
= 0;
1653 /* The bo already has a name because the miptree was created by a
1654 * previous call to intel_process_dri2_buffer(). If a bo already has a
1655 * name, then brw_bo_flink() is a low-cost getter. It does not
1656 * create a new name.
1658 brw_bo_flink(last_mt
->bo
, &old_name
);
1661 if (old_name
== buffer
->name
)
1664 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
)) {
1666 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1667 buffer
->name
, buffer
->attachment
,
1668 buffer
->cpp
, buffer
->pitch
);
1671 bo
= brw_bo_gem_create_from_name(brw
->bufmgr
, buffer_name
,
1675 "Failed to open BO for returned DRI2 buffer "
1676 "(%dx%d, %s, named %d).\n"
1677 "This is likely a bug in the X Server that will lead to a "
1679 drawable
->w
, drawable
->h
, buffer_name
, buffer
->name
);
1683 uint32_t tiling
, swizzle
;
1684 brw_bo_get_tiling(bo
, &tiling
, &swizzle
);
1686 struct intel_mipmap_tree
*mt
=
1687 intel_miptree_create_for_bo(brw
,
1689 intel_rb_format(rb
),
1695 isl_tiling_from_i915_tiling(tiling
),
1696 MIPTREE_CREATE_DEFAULT
);
1698 brw_bo_unreference(bo
);
1702 /* We got this BO from X11. We cana't assume that we have coherent texture
1703 * access because X may suddenly decide to use it for scan-out which would
1704 * destroy coherency.
1706 bo
->cache_coherent
= false;
1708 if (!intel_update_winsys_renderbuffer_miptree(brw
, rb
, mt
,
1709 drawable
->w
, drawable
->h
,
1711 brw_bo_unreference(bo
);
1712 intel_miptree_release(&mt
);
1716 if (_mesa_is_front_buffer_drawing(fb
) &&
1717 (buffer
->attachment
== __DRI_BUFFER_FRONT_LEFT
||
1718 buffer
->attachment
== __DRI_BUFFER_FAKE_FRONT_LEFT
) &&
1719 rb
->Base
.Base
.NumSamples
> 1) {
1720 intel_renderbuffer_upsample(brw
, rb
);
1725 brw_bo_unreference(bo
);
1729 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1731 * To determine which DRI buffers to request, examine the renderbuffers
1732 * attached to the drawable's framebuffer. Then request the buffers from
1735 * This is called from intel_update_renderbuffers().
1737 * \param drawable Drawable whose buffers are queried.
1738 * \param buffers [out] List of buffers returned by DRI2 query.
1739 * \param buffer_count [out] Number of buffers returned.
1741 * \see intel_update_renderbuffers()
1745 intel_update_image_buffer(struct brw_context
*intel
,
1746 __DRIdrawable
*drawable
,
1747 struct intel_renderbuffer
*rb
,
1749 enum __DRIimageBufferMask buffer_type
)
1751 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1753 if (!rb
|| !buffer
->bo
)
1756 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1758 /* Check and see if we're already bound to the right
1761 struct intel_mipmap_tree
*last_mt
;
1762 if (num_samples
== 0)
1765 last_mt
= rb
->singlesample_mt
;
1767 if (last_mt
&& last_mt
->bo
== buffer
->bo
) {
1768 if (buffer_type
== __DRI_IMAGE_BUFFER_SHARED
) {
1769 intel_miptree_make_shareable(intel
, last_mt
);
1774 /* Only allow internal compression if samples == 0. For multisampled
1775 * window system buffers, the only thing the single-sampled buffer is used
1776 * for is as a resolve target. If we do any compression beyond what is
1777 * supported by the window system, we will just have to resolve so it's
1778 * probably better to just not bother.
1780 const bool allow_internal_aux
= (num_samples
== 0);
1782 struct intel_mipmap_tree
*mt
=
1783 intel_miptree_create_for_dri_image(intel
, buffer
, GL_TEXTURE_2D
,
1784 intel_rb_format(rb
),
1785 allow_internal_aux
);
1789 if (!intel_update_winsys_renderbuffer_miptree(intel
, rb
, mt
,
1790 buffer
->width
, buffer
->height
,
1792 intel_miptree_release(&mt
);
1796 if (_mesa_is_front_buffer_drawing(fb
) &&
1797 buffer_type
== __DRI_IMAGE_BUFFER_FRONT
&&
1798 rb
->Base
.Base
.NumSamples
> 1) {
1799 intel_renderbuffer_upsample(intel
, rb
);
1802 if (buffer_type
== __DRI_IMAGE_BUFFER_SHARED
) {
1803 /* The compositor and the application may access this image
1804 * concurrently. The display hardware may even scanout the image while
1805 * the GPU is rendering to it. Aux surfaces cause difficulty with
1806 * concurrent access, so permanently disable aux for this miptree.
1808 * Perhaps we could improve overall application performance by
1809 * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1810 * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1811 * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1812 * approach to be highly dependent on the application's GL usage.
1814 * I [chadv] expect clever disabling/reenabling to be counterproductive
1815 * in the use cases I care about: applications that render nearly
1816 * realtime handwriting to the surface while possibly undergiong
1817 * simultaneously scanout as a display plane. The app requires low
1818 * render latency. Even though the app spends most of its time in
1819 * shared-buffer mode, it also frequently transitions between
1820 * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1821 * mode. Visual sutter during the transitions should be avoided.
1823 * In this case, I [chadv] believe reducing the GPU workload at
1824 * shared-buffer/double-buffer transitions would offer a smoother app
1825 * experience than any savings due to aux compression. But I've
1826 * collected no data to prove my theory.
1828 intel_miptree_make_shareable(intel
, mt
);
1833 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1835 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1836 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1837 struct intel_renderbuffer
*front_rb
;
1838 struct intel_renderbuffer
*back_rb
;
1839 struct __DRIimageList images
;
1841 uint32_t buffer_mask
= 0;
1844 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1845 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1848 format
= intel_rb_format(back_rb
);
1850 format
= intel_rb_format(front_rb
);
1854 if (front_rb
&& (_mesa_is_front_buffer_drawing(fb
) ||
1855 _mesa_is_front_buffer_reading(fb
) || !back_rb
)) {
1856 buffer_mask
|= __DRI_IMAGE_BUFFER_FRONT
;
1860 buffer_mask
|= __DRI_IMAGE_BUFFER_BACK
;
1862 ret
= dri_screen
->image
.loader
->getBuffers(drawable
,
1863 driGLFormatToImageFormat(format
),
1864 &drawable
->dri2
.stamp
,
1865 drawable
->loaderPrivate
,
1871 if (images
.image_mask
& __DRI_IMAGE_BUFFER_FRONT
) {
1872 drawable
->w
= images
.front
->width
;
1873 drawable
->h
= images
.front
->height
;
1874 intel_update_image_buffer(brw
,
1878 __DRI_IMAGE_BUFFER_FRONT
);
1881 if (images
.image_mask
& __DRI_IMAGE_BUFFER_BACK
) {
1882 drawable
->w
= images
.back
->width
;
1883 drawable
->h
= images
.back
->height
;
1884 intel_update_image_buffer(brw
,
1888 __DRI_IMAGE_BUFFER_BACK
);
1891 if (images
.image_mask
& __DRI_IMAGE_BUFFER_SHARED
) {
1892 assert(images
.image_mask
== __DRI_IMAGE_BUFFER_SHARED
);
1893 drawable
->w
= images
.back
->width
;
1894 drawable
->h
= images
.back
->height
;
1895 intel_update_image_buffer(brw
,
1899 __DRI_IMAGE_BUFFER_SHARED
);
1900 brw
->is_shared_buffer_bound
= true;
1902 brw
->is_shared_buffer_bound
= false;
1903 brw
->is_shared_buffer_dirty
= false;