2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **********************************************************************/
30 * Keith Whitwell <keithw@vmware.com>
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/points.h"
42 #include "main/version.h"
43 #include "main/vtxfmt.h"
44 #include "main/texobj.h"
45 #include "main/framebuffer.h"
47 #include "vbo/vbo_context.h"
49 #include "drivers/common/driverfuncs.h"
50 #include "drivers/common/meta.h"
53 #include "brw_context.h"
54 #include "brw_defines.h"
55 #include "brw_blorp.h"
57 #include "brw_state.h"
59 #include "intel_batchbuffer.h"
60 #include "intel_buffer_objects.h"
61 #include "intel_buffers.h"
62 #include "intel_fbo.h"
63 #include "intel_mipmap_tree.h"
64 #include "intel_pixel.h"
65 #include "intel_image.h"
66 #include "intel_tex.h"
67 #include "intel_tex_obj.h"
69 #include "swrast_setup/swrast_setup.h"
71 #include "tnl/t_pipeline.h"
72 #include "util/ralloc.h"
73 #include "util/debug.h"
76 /***************************************
77 * Mesa's Driver Functions
78 ***************************************/
80 const char *const brw_vendor_string
= "Intel Open Source Technology Center";
83 get_bsw_model(const struct intel_screen
*screen
)
85 switch (screen
->eu_total
) {
96 brw_get_renderer_string(const struct intel_screen
*screen
)
99 static char buffer
[128];
102 switch (screen
->deviceID
) {
104 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
105 #include "pci_ids/i965_pci_ids.h"
107 chipset
= "Unknown Intel Chipset";
111 /* Braswell branding is funny, so we have to fix it up here */
112 if (screen
->deviceID
== 0x22B1) {
113 bsw
= strdup(chipset
);
114 char *needle
= strstr(bsw
, "XXX");
116 memcpy(needle
, get_bsw_model(screen
), 3);
121 (void) driGetRendererString(buffer
, chipset
, 0);
126 static const GLubyte
*
127 intel_get_string(struct gl_context
* ctx
, GLenum name
)
129 const struct brw_context
*const brw
= brw_context(ctx
);
133 return (GLubyte
*) brw_vendor_string
;
137 (GLubyte
*) brw_get_renderer_string(brw
->screen
);
145 intel_viewport(struct gl_context
*ctx
)
147 struct brw_context
*brw
= brw_context(ctx
);
148 __DRIcontext
*driContext
= brw
->driContext
;
150 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
151 if (driContext
->driDrawablePriv
)
152 dri2InvalidateDrawable(driContext
->driDrawablePriv
);
153 if (driContext
->driReadablePriv
)
154 dri2InvalidateDrawable(driContext
->driReadablePriv
);
159 intel_update_framebuffer(struct gl_context
*ctx
,
160 struct gl_framebuffer
*fb
)
162 struct brw_context
*brw
= brw_context(ctx
);
164 /* Quantize the derived default number of samples
166 fb
->DefaultGeometry
._NumSamples
=
167 intel_quantize_num_samples(brw
->screen
,
168 fb
->DefaultGeometry
.NumSamples
);
172 intel_disable_rb_aux_buffer(struct brw_context
*brw
, const struct brw_bo
*bo
)
174 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
177 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
178 const struct intel_renderbuffer
*irb
=
179 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
181 if (irb
&& irb
->mt
->bo
== bo
) {
182 found
= brw
->draw_aux_buffer_disabled
[i
] = true;
190 intel_update_state(struct gl_context
* ctx
, GLuint new_state
)
192 struct brw_context
*brw
= brw_context(ctx
);
193 struct intel_texture_object
*tex_obj
;
194 struct intel_renderbuffer
*depth_irb
;
196 if (ctx
->swrast_context
)
197 _swrast_InvalidateState(ctx
, new_state
);
198 _vbo_InvalidateState(ctx
, new_state
);
200 brw
->NewGLState
|= new_state
;
202 _mesa_unlock_context_textures(ctx
);
204 /* Resolve the depth buffer's HiZ buffer. */
205 depth_irb
= intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
206 if (depth_irb
&& depth_irb
->mt
) {
207 intel_miptree_slice_resolve_hiz(brw
, depth_irb
->mt
,
209 depth_irb
->mt_layer
);
212 memset(brw
->draw_aux_buffer_disabled
, 0,
213 sizeof(brw
->draw_aux_buffer_disabled
));
215 /* Resolve depth buffer and render cache of each enabled texture. */
216 int maxEnabledUnit
= ctx
->Texture
._MaxEnabledTexImageUnit
;
217 for (int i
= 0; i
<= maxEnabledUnit
; i
++) {
218 if (!ctx
->Texture
.Unit
[i
]._Current
)
220 tex_obj
= intel_texture_object(ctx
->Texture
.Unit
[i
]._Current
);
221 if (!tex_obj
|| !tex_obj
->mt
)
224 /* We need inte_texture_object::_Format to be valid */
225 intel_finalize_mipmap_tree(brw
, i
);
228 intel_miptree_prepare_texture(brw
, tex_obj
->mt
, tex_obj
->_Format
,
231 if (!aux_supported
&& brw
->gen
>= 9 &&
232 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
233 perf_debug("Sampling renderbuffer with non-compressible format - "
234 "turning off compression");
237 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
239 if (tex_obj
->base
.StencilSampling
||
240 tex_obj
->mt
->format
== MESA_FORMAT_S_UINT8
) {
241 intel_update_r8stencil(brw
, tex_obj
->mt
);
245 /* Resolve color for each active shader image. */
246 for (unsigned i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
247 const struct gl_program
*prog
= ctx
->_Shader
->CurrentProgram
[i
];
249 if (unlikely(prog
&& prog
->info
.num_images
)) {
250 for (unsigned j
= 0; j
< prog
->info
.num_images
; j
++) {
251 struct gl_image_unit
*u
=
252 &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[j
]];
253 tex_obj
= intel_texture_object(u
->TexObj
);
255 if (tex_obj
&& tex_obj
->mt
) {
256 /* Access to images is implemented using indirect messages
257 * against data port. Normal render target write understands
258 * lossless compression but unfortunately the typed/untyped
259 * read/write interface doesn't. Therefore even lossless
260 * compressed surfaces need to be resolved prior to accessing
261 * them. Hence skip setting INTEL_MIPTREE_IGNORE_CCS_E.
263 intel_miptree_all_slices_resolve_color(brw
, tex_obj
->mt
, 0);
265 if (intel_miptree_is_lossless_compressed(brw
, tex_obj
->mt
) &&
266 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
267 perf_debug("Using renderbuffer as shader image - turning "
268 "off lossless compression");
271 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
277 /* Resolve color buffers for non-coherent framebuffer fetch. */
278 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
279 ctx
->FragmentProgram
._Current
&&
280 ctx
->FragmentProgram
._Current
->info
.outputs_read
) {
281 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
283 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
284 const struct intel_renderbuffer
*irb
=
285 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
288 intel_miptree_prepare_fb_fetch(brw
, irb
->mt
, irb
->mt_level
,
289 irb
->mt_layer
, irb
->layer_count
);
294 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
295 for (int i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
296 struct intel_renderbuffer
*irb
=
297 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
299 if (irb
== NULL
|| irb
->mt
== NULL
)
302 intel_miptree_prepare_render(brw
, irb
->mt
, irb
->mt_level
,
303 irb
->mt_layer
, irb
->layer_count
,
304 ctx
->Color
.sRGBEnabled
);
307 _mesa_lock_context_textures(ctx
);
309 if (new_state
& _NEW_BUFFERS
) {
310 intel_update_framebuffer(ctx
, ctx
->DrawBuffer
);
311 if (ctx
->DrawBuffer
!= ctx
->ReadBuffer
)
312 intel_update_framebuffer(ctx
, ctx
->ReadBuffer
);
316 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
319 intel_flush_front(struct gl_context
*ctx
)
321 struct brw_context
*brw
= brw_context(ctx
);
322 __DRIcontext
*driContext
= brw
->driContext
;
323 __DRIdrawable
*driDrawable
= driContext
->driDrawablePriv
;
324 __DRIscreen
*const dri_screen
= brw
->screen
->driScrnPriv
;
326 if (brw
->front_buffer_dirty
&& _mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
327 if (flushFront(dri_screen
) && driDrawable
&&
328 driDrawable
->loaderPrivate
) {
330 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
332 * This potentially resolves both front and back buffer. It
333 * is unnecessary to resolve the back, but harms nothing except
334 * performance. And no one cares about front-buffer render
337 intel_resolve_for_dri2_flush(brw
, driDrawable
);
338 intel_batchbuffer_flush(brw
);
340 flushFront(dri_screen
)(driDrawable
, driDrawable
->loaderPrivate
);
342 /* We set the dirty bit in intel_prepare_render() if we're
343 * front buffer rendering once we get there.
345 brw
->front_buffer_dirty
= false;
351 intel_glFlush(struct gl_context
*ctx
)
353 struct brw_context
*brw
= brw_context(ctx
);
355 intel_batchbuffer_flush(brw
);
356 intel_flush_front(ctx
);
358 brw
->need_flush_throttle
= true;
362 intel_finish(struct gl_context
* ctx
)
364 struct brw_context
*brw
= brw_context(ctx
);
368 if (brw
->batch
.last_bo
)
369 brw_bo_wait_rendering(brw
, brw
->batch
.last_bo
);
373 brw_init_driver_functions(struct brw_context
*brw
,
374 struct dd_function_table
*functions
)
376 _mesa_init_driver_functions(functions
);
378 /* GLX uses DRI2 invalidate events to handle window resizing.
379 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
380 * which doesn't provide a mechanism for snooping the event queues.
382 * So EGL still relies on viewport hacks to handle window resizing.
383 * This should go away with DRI3000.
385 if (!brw
->driContext
->driScreenPriv
->dri2
.useInvalidate
)
386 functions
->Viewport
= intel_viewport
;
388 functions
->Flush
= intel_glFlush
;
389 functions
->Finish
= intel_finish
;
390 functions
->GetString
= intel_get_string
;
391 functions
->UpdateState
= intel_update_state
;
393 intelInitTextureFuncs(functions
);
394 intelInitTextureImageFuncs(functions
);
395 intelInitTextureSubImageFuncs(functions
);
396 intelInitTextureCopyImageFuncs(functions
);
397 intelInitCopyImageFuncs(functions
);
398 intelInitClearFuncs(functions
);
399 intelInitBufferFuncs(functions
);
400 intelInitPixelFuncs(functions
);
401 intelInitBufferObjectFuncs(functions
);
402 brw_init_syncobj_functions(functions
);
403 brw_init_object_purgeable_functions(functions
);
405 brwInitFragProgFuncs( functions
);
406 brw_init_common_queryobj_functions(functions
);
407 if (brw
->gen
>= 8 || brw
->is_haswell
)
408 hsw_init_queryobj_functions(functions
);
409 else if (brw
->gen
>= 6)
410 gen6_init_queryobj_functions(functions
);
412 gen4_init_queryobj_functions(functions
);
413 brw_init_compute_functions(functions
);
415 brw_init_conditional_render_functions(functions
);
417 functions
->QueryInternalFormat
= brw_query_internal_format
;
419 functions
->NewTransformFeedback
= brw_new_transform_feedback
;
420 functions
->DeleteTransformFeedback
= brw_delete_transform_feedback
;
421 if (can_do_mi_math_and_lrr(brw
->screen
)) {
422 functions
->BeginTransformFeedback
= hsw_begin_transform_feedback
;
423 functions
->EndTransformFeedback
= hsw_end_transform_feedback
;
424 functions
->PauseTransformFeedback
= hsw_pause_transform_feedback
;
425 functions
->ResumeTransformFeedback
= hsw_resume_transform_feedback
;
426 } else if (brw
->gen
>= 7) {
427 functions
->BeginTransformFeedback
= gen7_begin_transform_feedback
;
428 functions
->EndTransformFeedback
= gen7_end_transform_feedback
;
429 functions
->PauseTransformFeedback
= gen7_pause_transform_feedback
;
430 functions
->ResumeTransformFeedback
= gen7_resume_transform_feedback
;
431 functions
->GetTransformFeedbackVertexCount
=
432 brw_get_transform_feedback_vertex_count
;
434 functions
->BeginTransformFeedback
= brw_begin_transform_feedback
;
435 functions
->EndTransformFeedback
= brw_end_transform_feedback
;
436 functions
->PauseTransformFeedback
= brw_pause_transform_feedback
;
437 functions
->ResumeTransformFeedback
= brw_resume_transform_feedback
;
438 functions
->GetTransformFeedbackVertexCount
=
439 brw_get_transform_feedback_vertex_count
;
443 functions
->GetSamplePosition
= gen6_get_sample_position
;
447 brw_initialize_context_constants(struct brw_context
*brw
)
449 struct gl_context
*ctx
= &brw
->ctx
;
450 const struct brw_compiler
*compiler
= brw
->screen
->compiler
;
452 const bool stage_exists
[MESA_SHADER_STAGES
] = {
453 [MESA_SHADER_VERTEX
] = true,
454 [MESA_SHADER_TESS_CTRL
] = brw
->gen
>= 7,
455 [MESA_SHADER_TESS_EVAL
] = brw
->gen
>= 7,
456 [MESA_SHADER_GEOMETRY
] = brw
->gen
>= 6,
457 [MESA_SHADER_FRAGMENT
] = true,
458 [MESA_SHADER_COMPUTE
] =
459 ((ctx
->API
== API_OPENGL_COMPAT
|| ctx
->API
== API_OPENGL_CORE
) &&
460 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 1024) ||
461 (ctx
->API
== API_OPENGLES2
&&
462 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 128) ||
463 _mesa_extension_override_enables
.ARB_compute_shader
,
466 unsigned num_stages
= 0;
467 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
472 unsigned max_samplers
=
473 brw
->gen
>= 8 || brw
->is_haswell
? BRW_MAX_TEX_UNIT
: 16;
475 ctx
->Const
.MaxDualSourceDrawBuffers
= 1;
476 ctx
->Const
.MaxDrawBuffers
= BRW_MAX_DRAW_BUFFERS
;
477 ctx
->Const
.MaxCombinedShaderOutputResources
=
478 MAX_IMAGE_UNITS
+ BRW_MAX_DRAW_BUFFERS
;
480 /* The timestamp register we can read for glGetTimestamp() is
481 * sometimes only 32 bits, before scaling to nanoseconds (depending
484 * Once scaled to nanoseconds the timestamp would roll over at a
485 * non-power-of-two, so an application couldn't use
486 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
487 * report 36 bits and truncate at that (rolling over 5 times as
488 * often as the HW counter), and when the 32-bit counter rolls
489 * over, it happens to also be at a rollover in the reported value
490 * from near (1<<36) to 0.
492 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
493 * rolls over every ~69 seconds.
495 ctx
->Const
.QueryCounterBits
.Timestamp
= 36;
497 ctx
->Const
.MaxTextureCoordUnits
= 8; /* Mesa limit */
498 ctx
->Const
.MaxImageUnits
= MAX_IMAGE_UNITS
;
500 ctx
->Const
.MaxRenderbufferSize
= 16384;
501 ctx
->Const
.MaxTextureLevels
= MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS
);
502 ctx
->Const
.MaxCubeTextureLevels
= 15; /* 16384 */
504 ctx
->Const
.MaxRenderbufferSize
= 8192;
505 ctx
->Const
.MaxTextureLevels
= MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS
);
506 ctx
->Const
.MaxCubeTextureLevels
= 14; /* 8192 */
508 ctx
->Const
.Max3DTextureLevels
= 12; /* 2048 */
509 ctx
->Const
.MaxArrayTextureLayers
= brw
->gen
>= 7 ? 2048 : 512;
510 ctx
->Const
.MaxTextureMbytes
= 1536;
511 ctx
->Const
.MaxTextureRectSize
= 1 << 12;
512 ctx
->Const
.MaxTextureMaxAnisotropy
= 16.0;
513 ctx
->Const
.MaxTextureLodBias
= 15.0;
514 ctx
->Const
.StripTextureBorder
= true;
516 ctx
->Const
.MaxProgramTextureGatherComponents
= 4;
517 ctx
->Const
.MinProgramTextureGatherOffset
= -32;
518 ctx
->Const
.MaxProgramTextureGatherOffset
= 31;
519 } else if (brw
->gen
== 6) {
520 ctx
->Const
.MaxProgramTextureGatherComponents
= 1;
521 ctx
->Const
.MinProgramTextureGatherOffset
= -8;
522 ctx
->Const
.MaxProgramTextureGatherOffset
= 7;
525 ctx
->Const
.MaxUniformBlockSize
= 65536;
527 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
528 struct gl_program_constants
*prog
= &ctx
->Const
.Program
[i
];
530 if (!stage_exists
[i
])
533 prog
->MaxTextureImageUnits
= max_samplers
;
535 prog
->MaxUniformBlocks
= BRW_MAX_UBO
;
536 prog
->MaxCombinedUniformComponents
=
537 prog
->MaxUniformComponents
+
538 ctx
->Const
.MaxUniformBlockSize
/ 4 * prog
->MaxUniformBlocks
;
540 prog
->MaxAtomicCounters
= MAX_ATOMIC_COUNTERS
;
541 prog
->MaxAtomicBuffers
= BRW_MAX_ABO
;
542 prog
->MaxImageUniforms
= compiler
->scalar_stage
[i
] ? BRW_MAX_IMAGES
: 0;
543 prog
->MaxShaderStorageBlocks
= BRW_MAX_SSBO
;
546 ctx
->Const
.MaxTextureUnits
=
547 MIN2(ctx
->Const
.MaxTextureCoordUnits
,
548 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
);
550 ctx
->Const
.MaxUniformBufferBindings
= num_stages
* BRW_MAX_UBO
;
551 ctx
->Const
.MaxCombinedUniformBlocks
= num_stages
* BRW_MAX_UBO
;
552 ctx
->Const
.MaxCombinedAtomicBuffers
= num_stages
* BRW_MAX_ABO
;
553 ctx
->Const
.MaxCombinedShaderStorageBlocks
= num_stages
* BRW_MAX_SSBO
;
554 ctx
->Const
.MaxShaderStorageBufferBindings
= num_stages
* BRW_MAX_SSBO
;
555 ctx
->Const
.MaxCombinedTextureImageUnits
= num_stages
* max_samplers
;
556 ctx
->Const
.MaxCombinedImageUniforms
= num_stages
* BRW_MAX_IMAGES
;
559 /* Hardware only supports a limited number of transform feedback buffers.
560 * So we need to override the Mesa default (which is based only on software
563 ctx
->Const
.MaxTransformFeedbackBuffers
= BRW_MAX_SOL_BUFFERS
;
565 /* On Gen6, in the worst case, we use up one binding table entry per
566 * transform feedback component (see comments above the definition of
567 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
568 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
569 * BRW_MAX_SOL_BINDINGS.
571 * In "separate components" mode, we need to divide this value by
572 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
573 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
575 ctx
->Const
.MaxTransformFeedbackInterleavedComponents
= BRW_MAX_SOL_BINDINGS
;
576 ctx
->Const
.MaxTransformFeedbackSeparateComponents
=
577 BRW_MAX_SOL_BINDINGS
/ BRW_MAX_SOL_BUFFERS
;
579 ctx
->Const
.AlwaysUseGetTransformFeedbackVertexCount
=
580 !can_do_mi_math_and_lrr(brw
->screen
);
583 const int *msaa_modes
= intel_supported_msaa_modes(brw
->screen
);
584 const int clamp_max_samples
=
585 driQueryOptioni(&brw
->optionCache
, "clamp_max_samples");
587 if (clamp_max_samples
< 0) {
588 max_samples
= msaa_modes
[0];
590 /* Select the largest supported MSAA mode that does not exceed
594 for (int i
= 0; msaa_modes
[i
] != 0; ++i
) {
595 if (msaa_modes
[i
] <= clamp_max_samples
) {
596 max_samples
= msaa_modes
[i
];
602 ctx
->Const
.MaxSamples
= max_samples
;
603 ctx
->Const
.MaxColorTextureSamples
= max_samples
;
604 ctx
->Const
.MaxDepthTextureSamples
= max_samples
;
605 ctx
->Const
.MaxIntegerSamples
= max_samples
;
606 ctx
->Const
.MaxImageSamples
= 0;
608 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
609 * to map indices of rectangular grid to sample numbers within a pixel.
610 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
611 * extension implementation. For more details see the comment above
612 * gen6_set_sample_maps() definition.
614 gen6_set_sample_maps(ctx
);
616 ctx
->Const
.MinLineWidth
= 1.0;
617 ctx
->Const
.MinLineWidthAA
= 1.0;
619 ctx
->Const
.MaxLineWidth
= 7.375;
620 ctx
->Const
.MaxLineWidthAA
= 7.375;
621 ctx
->Const
.LineWidthGranularity
= 0.125;
623 ctx
->Const
.MaxLineWidth
= 7.0;
624 ctx
->Const
.MaxLineWidthAA
= 7.0;
625 ctx
->Const
.LineWidthGranularity
= 0.5;
628 /* For non-antialiased lines, we have to round the line width to the
629 * nearest whole number. Make sure that we don't advertise a line
630 * width that, when rounded, will be beyond the actual hardware
633 assert(roundf(ctx
->Const
.MaxLineWidth
) <= ctx
->Const
.MaxLineWidth
);
635 ctx
->Const
.MinPointSize
= 1.0;
636 ctx
->Const
.MinPointSizeAA
= 1.0;
637 ctx
->Const
.MaxPointSize
= 255.0;
638 ctx
->Const
.MaxPointSizeAA
= 255.0;
639 ctx
->Const
.PointSizeGranularity
= 1.0;
641 if (brw
->gen
>= 5 || brw
->is_g4x
)
642 ctx
->Const
.MaxClipPlanes
= 8;
644 ctx
->Const
.GLSLTessLevelsAsInputs
= true;
645 ctx
->Const
.LowerTCSPatchVerticesIn
= brw
->gen
>= 8;
646 ctx
->Const
.LowerTESPatchVerticesIn
= true;
647 ctx
->Const
.PrimitiveRestartForPatches
= true;
649 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeInstructions
= 16 * 1024;
650 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxAluInstructions
= 0;
651 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexInstructions
= 0;
652 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexIndirections
= 0;
653 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAluInstructions
= 0;
654 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexInstructions
= 0;
655 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexIndirections
= 0;
656 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAttribs
= 16;
657 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTemps
= 256;
658 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAddressRegs
= 1;
659 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
= 1024;
660 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
=
661 MIN2(ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
,
662 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
);
664 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeInstructions
= 1024;
665 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAluInstructions
= 1024;
666 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexInstructions
= 1024;
667 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexIndirections
= 1024;
668 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAttribs
= 12;
669 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTemps
= 256;
670 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAddressRegs
= 0;
671 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
= 1024;
672 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
=
673 MIN2(ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
,
674 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
);
676 /* Fragment shaders use real, 32-bit twos-complement integers for all
679 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMin
= 31;
680 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMax
= 30;
681 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.Precision
= 0;
682 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
683 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
685 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMin
= 31;
686 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMax
= 30;
687 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.Precision
= 0;
688 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
689 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
691 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
692 * but we're not sure how it's actually done for vertex order,
693 * that affect provoking vertex decision. Always use last vertex
694 * convention for quad primitive which works as expected for now.
697 ctx
->Const
.QuadsFollowProvokingVertexConvention
= false;
699 ctx
->Const
.NativeIntegers
= true;
700 ctx
->Const
.VertexID_is_zero_based
= true;
702 /* Regarding the CMP instruction, the Ivybridge PRM says:
704 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
705 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
706 * 0xFFFFFFFF) is assigned to dst."
708 * but PRMs for earlier generations say
710 * "In dword format, one GRF may store up to 8 results. When the register
711 * is used later as a vector of Booleans, as only LSB at each channel
712 * contains meaning [sic] data, software should make sure all higher bits
713 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
715 * We select the representation of a true boolean uniform to be ~0, and fix
716 * the results of Gen <= 5 CMP instruction's with -(result & 1).
718 ctx
->Const
.UniformBooleanTrue
= ~0;
720 /* From the gen4 PRM, volume 4 page 127:
722 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
723 * the base address of the first element of the surface, computed in
724 * software by adding the surface base address to the byte offset of
725 * the element in the buffer."
727 * However, unaligned accesses are slower, so enforce buffer alignment.
729 ctx
->Const
.UniformBufferOffsetAlignment
= 16;
731 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
732 * that we can safely have the CPU and GPU writing the same SSBO on
733 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
734 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
735 * be updating disjoint regions of the buffer simultaneously and that will
736 * break if the regions overlap the same cacheline.
738 ctx
->Const
.ShaderStorageBufferOffsetAlignment
= 64;
739 ctx
->Const
.TextureBufferOffsetAlignment
= 16;
740 ctx
->Const
.MaxTextureBufferSize
= 128 * 1024 * 1024;
743 ctx
->Const
.MaxVarying
= 32;
744 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxOutputComponents
= 128;
745 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxInputComponents
= 64;
746 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxOutputComponents
= 128;
747 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxInputComponents
= 128;
748 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxInputComponents
= 128;
749 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxOutputComponents
= 128;
750 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxInputComponents
= 128;
751 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxOutputComponents
= 128;
754 /* We want the GLSL compiler to emit code that uses condition codes */
755 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
756 ctx
->Const
.ShaderCompilerOptions
[i
] =
757 brw
->screen
->compiler
->glsl_compiler_options
[i
];
761 ctx
->Const
.MaxViewportWidth
= 32768;
762 ctx
->Const
.MaxViewportHeight
= 32768;
765 /* ARB_viewport_array, OES_viewport_array */
767 ctx
->Const
.MaxViewports
= GEN6_NUM_VIEWPORTS
;
768 ctx
->Const
.ViewportSubpixelBits
= 0;
770 /* Cast to float before negating because MaxViewportWidth is unsigned.
772 ctx
->Const
.ViewportBounds
.Min
= -(float)ctx
->Const
.MaxViewportWidth
;
773 ctx
->Const
.ViewportBounds
.Max
= ctx
->Const
.MaxViewportWidth
;
776 /* ARB_gpu_shader5 */
778 ctx
->Const
.MaxVertexStreams
= MIN2(4, MAX_VERTEX_STREAMS
);
780 /* ARB_framebuffer_no_attachments */
781 ctx
->Const
.MaxFramebufferWidth
= 16384;
782 ctx
->Const
.MaxFramebufferHeight
= 16384;
783 ctx
->Const
.MaxFramebufferLayers
= ctx
->Const
.MaxArrayTextureLayers
;
784 ctx
->Const
.MaxFramebufferSamples
= max_samples
;
786 /* OES_primitive_bounding_box */
787 ctx
->Const
.NoPrimitiveBoundingBoxOutput
= true;
791 brw_initialize_cs_context_constants(struct brw_context
*brw
)
793 struct gl_context
*ctx
= &brw
->ctx
;
794 const struct intel_screen
*screen
= brw
->screen
;
795 struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
797 /* FINISHME: Do this for all platforms that the kernel supports */
798 if (brw
->is_cherryview
&&
799 screen
->subslice_total
> 0 && screen
->eu_total
> 0) {
800 /* Logical CS threads = EUs per subslice * 7 threads per EU */
801 uint32_t max_cs_threads
= screen
->eu_total
/ screen
->subslice_total
* 7;
803 /* Fuse configurations may give more threads than expected, never less. */
804 if (max_cs_threads
> devinfo
->max_cs_threads
)
805 devinfo
->max_cs_threads
= max_cs_threads
;
808 /* Maximum number of scalar compute shader invocations that can be run in
809 * parallel in the same subslice assuming SIMD32 dispatch.
811 * We don't advertise more than 64 threads, because we are limited to 64 by
812 * our usage of thread_width_max in the gpgpu walker command. This only
813 * currently impacts Haswell, which otherwise might be able to advertise 70
814 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
815 * required the number of invocation needed for ARB_compute_shader.
817 const unsigned max_threads
= MIN2(64, devinfo
->max_cs_threads
);
818 const uint32_t max_invocations
= 32 * max_threads
;
819 ctx
->Const
.MaxComputeWorkGroupSize
[0] = max_invocations
;
820 ctx
->Const
.MaxComputeWorkGroupSize
[1] = max_invocations
;
821 ctx
->Const
.MaxComputeWorkGroupSize
[2] = max_invocations
;
822 ctx
->Const
.MaxComputeWorkGroupInvocations
= max_invocations
;
823 ctx
->Const
.MaxComputeSharedMemorySize
= 64 * 1024;
827 * Process driconf (drirc) options, setting appropriate context flags.
829 * intelInitExtensions still pokes at optionCache directly, in order to
830 * avoid advertising various extensions. No flags are set, so it makes
831 * sense to continue doing that there.
834 brw_process_driconf_options(struct brw_context
*brw
)
836 struct gl_context
*ctx
= &brw
->ctx
;
838 driOptionCache
*options
= &brw
->optionCache
;
839 driParseConfigFiles(options
, &brw
->screen
->optionCache
,
840 brw
->driContext
->driScreenPriv
->myNum
, "i965");
842 int bo_reuse_mode
= driQueryOptioni(options
, "bo_reuse");
843 switch (bo_reuse_mode
) {
844 case DRI_CONF_BO_REUSE_DISABLED
:
846 case DRI_CONF_BO_REUSE_ALL
:
847 brw_bufmgr_enable_reuse(brw
->bufmgr
);
851 if (INTEL_DEBUG
& DEBUG_NO_HIZ
) {
852 brw
->has_hiz
= false;
853 /* On gen6, you can only do separate stencil with HIZ. */
855 brw
->has_separate_stencil
= false;
858 if (driQueryOptionb(options
, "always_flush_batch")) {
859 fprintf(stderr
, "flushing batchbuffer before/after each draw call\n");
860 brw
->always_flush_batch
= true;
863 if (driQueryOptionb(options
, "always_flush_cache")) {
864 fprintf(stderr
, "flushing GPU caches before/after each draw call\n");
865 brw
->always_flush_cache
= true;
868 if (driQueryOptionb(options
, "disable_throttling")) {
869 fprintf(stderr
, "disabling flush throttling\n");
870 brw
->disable_throttling
= true;
873 brw
->precompile
= driQueryOptionb(&brw
->optionCache
, "shader_precompile");
875 if (driQueryOptionb(&brw
->optionCache
, "precise_trig"))
876 brw
->screen
->compiler
->precise_trig
= true;
878 ctx
->Const
.ForceGLSLExtensionsWarn
=
879 driQueryOptionb(options
, "force_glsl_extensions_warn");
881 ctx
->Const
.ForceGLSLVersion
=
882 driQueryOptioni(options
, "force_glsl_version");
884 ctx
->Const
.DisableGLSLLineContinuations
=
885 driQueryOptionb(options
, "disable_glsl_line_continuations");
887 ctx
->Const
.AllowGLSLExtensionDirectiveMidShader
=
888 driQueryOptionb(options
, "allow_glsl_extension_directive_midshader");
890 ctx
->Const
.AllowGLSLBuiltinVariableRedeclaration
=
891 driQueryOptionb(options
, "allow_glsl_builtin_variable_redeclaration");
893 ctx
->Const
.AllowHigherCompatVersion
=
894 driQueryOptionb(options
, "allow_higher_compat_version");
896 ctx
->Const
.ForceGLSLAbsSqrt
=
897 driQueryOptionb(options
, "force_glsl_abs_sqrt");
899 ctx
->Const
.GLSLZeroInit
= driQueryOptionb(options
, "glsl_zero_init");
901 brw
->dual_color_blend_by_location
=
902 driQueryOptionb(options
, "dual_color_blend_by_location");
906 brwCreateContext(gl_api api
,
907 const struct gl_config
*mesaVis
,
908 __DRIcontext
*driContextPriv
,
909 unsigned major_version
,
910 unsigned minor_version
,
913 unsigned *dri_ctx_error
,
914 void *sharedContextPrivate
)
916 struct gl_context
*shareCtx
= (struct gl_context
*) sharedContextPrivate
;
917 struct intel_screen
*screen
= driContextPriv
->driScreenPriv
->driverPrivate
;
918 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
919 struct dd_function_table functions
;
921 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
922 * provides us with context reset notifications.
924 uint32_t allowed_flags
= __DRI_CTX_FLAG_DEBUG
925 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE
;
927 if (screen
->has_context_reset_notification
)
928 allowed_flags
|= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
;
930 if (flags
& ~allowed_flags
) {
931 *dri_ctx_error
= __DRI_CTX_ERROR_UNKNOWN_FLAG
;
935 struct brw_context
*brw
= rzalloc(NULL
, struct brw_context
);
937 fprintf(stderr
, "%s: failed to alloc context\n", __func__
);
938 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
942 driContextPriv
->driverPrivate
= brw
;
943 brw
->driContext
= driContextPriv
;
944 brw
->screen
= screen
;
945 brw
->bufmgr
= screen
->bufmgr
;
947 brw
->gen
= devinfo
->gen
;
948 brw
->gt
= devinfo
->gt
;
949 brw
->is_g4x
= devinfo
->is_g4x
;
950 brw
->is_baytrail
= devinfo
->is_baytrail
;
951 brw
->is_haswell
= devinfo
->is_haswell
;
952 brw
->is_cherryview
= devinfo
->is_cherryview
;
953 brw
->is_broxton
= devinfo
->is_broxton
;
954 brw
->has_llc
= devinfo
->has_llc
;
955 brw
->has_hiz
= devinfo
->has_hiz_and_separate_stencil
;
956 brw
->has_separate_stencil
= devinfo
->has_hiz_and_separate_stencil
;
957 brw
->has_pln
= devinfo
->has_pln
;
958 brw
->has_compr4
= devinfo
->has_compr4
;
959 brw
->has_surface_tile_offset
= devinfo
->has_surface_tile_offset
;
960 brw
->has_negative_rhw_bug
= devinfo
->has_negative_rhw_bug
;
961 brw
->needs_unlit_centroid_workaround
=
962 devinfo
->needs_unlit_centroid_workaround
;
964 brw
->must_use_separate_stencil
= devinfo
->must_use_separate_stencil
;
965 brw
->has_swizzling
= screen
->hw_has_swizzling
;
967 isl_device_init(&brw
->isl_dev
, devinfo
, screen
->hw_has_swizzling
);
969 brw
->vs
.base
.stage
= MESA_SHADER_VERTEX
;
970 brw
->tcs
.base
.stage
= MESA_SHADER_TESS_CTRL
;
971 brw
->tes
.base
.stage
= MESA_SHADER_TESS_EVAL
;
972 brw
->gs
.base
.stage
= MESA_SHADER_GEOMETRY
;
973 brw
->wm
.base
.stage
= MESA_SHADER_FRAGMENT
;
975 gen8_init_vtable_surface_functions(brw
);
976 brw
->vtbl
.emit_depth_stencil_hiz
= gen8_emit_depth_stencil_hiz
;
977 } else if (brw
->gen
>= 7) {
978 gen7_init_vtable_surface_functions(brw
);
979 brw
->vtbl
.emit_depth_stencil_hiz
= gen7_emit_depth_stencil_hiz
;
980 } else if (brw
->gen
>= 6) {
981 gen6_init_vtable_surface_functions(brw
);
982 brw
->vtbl
.emit_depth_stencil_hiz
= gen6_emit_depth_stencil_hiz
;
984 gen4_init_vtable_surface_functions(brw
);
985 brw
->vtbl
.emit_depth_stencil_hiz
= brw_emit_depth_stencil_hiz
;
988 brw_init_driver_functions(brw
, &functions
);
991 functions
.GetGraphicsResetStatus
= brw_get_graphics_reset_status
;
993 struct gl_context
*ctx
= &brw
->ctx
;
995 if (!_mesa_initialize_context(ctx
, api
, mesaVis
, shareCtx
, &functions
)) {
996 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
997 fprintf(stderr
, "%s: failed to init mesa context\n", __func__
);
998 intelDestroyContext(driContextPriv
);
1002 driContextSetFlags(ctx
, flags
);
1004 /* Initialize the software rasterizer and helper modules.
1006 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1007 * software fallbacks (which we have to support on legacy GL to do weird
1008 * glDrawPixels(), glBitmap(), and other functions).
1010 if (api
!= API_OPENGL_CORE
&& api
!= API_OPENGLES2
) {
1011 _swrast_CreateContext(ctx
);
1014 _vbo_CreateContext(ctx
);
1015 if (ctx
->swrast_context
) {
1016 _tnl_CreateContext(ctx
);
1017 TNL_CONTEXT(ctx
)->Driver
.RunPipeline
= _tnl_run_pipeline
;
1018 _swsetup_CreateContext(ctx
);
1020 /* Configure swrast to match hardware characteristics: */
1021 _swrast_allow_pixel_fog(ctx
, false);
1022 _swrast_allow_vertex_fog(ctx
, true);
1025 _mesa_meta_init(ctx
);
1027 brw_process_driconf_options(brw
);
1029 if (INTEL_DEBUG
& DEBUG_PERF
)
1030 brw
->perf_debug
= true;
1032 brw_initialize_cs_context_constants(brw
);
1033 brw_initialize_context_constants(brw
);
1035 ctx
->Const
.ResetStrategy
= notify_reset
1036 ? GL_LOSE_CONTEXT_ON_RESET_ARB
: GL_NO_RESET_NOTIFICATION_ARB
;
1038 /* Reinitialize the context point state. It depends on ctx->Const values. */
1039 _mesa_init_point(ctx
);
1041 intel_fbo_init(brw
);
1043 intel_batchbuffer_init(&brw
->batch
, brw
->bufmgr
, brw
->has_llc
);
1045 if (brw
->gen
>= 6) {
1046 /* Create a new hardware context. Using a hardware context means that
1047 * our GPU state will be saved/restored on context switch, allowing us
1048 * to assume that the GPU is in the same state we left it in.
1050 * This is required for transform feedback buffer offsets, query objects,
1051 * and also allows us to reduce how much state we have to emit.
1053 brw
->hw_ctx
= brw_create_hw_context(brw
->bufmgr
);
1056 fprintf(stderr
, "Failed to create hardware context.\n");
1057 intelDestroyContext(driContextPriv
);
1062 if (brw_init_pipe_control(brw
, devinfo
)) {
1063 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
1064 intelDestroyContext(driContextPriv
);
1068 brw_init_state(brw
);
1070 intelInitExtensions(ctx
);
1072 brw_init_surface_formats(brw
);
1074 brw_blorp_init(brw
);
1076 brw
->urb
.size
= devinfo
->urb
.size
;
1079 brw
->urb
.gs_present
= false;
1081 brw
->prim_restart
.in_progress
= false;
1082 brw
->prim_restart
.enable_cut_index
= false;
1083 brw
->gs
.enabled
= false;
1084 brw
->clip
.viewport_count
= 1;
1086 brw
->predicate
.state
= BRW_PREDICATE_STATE_RENDER
;
1088 brw
->max_gtt_map_object_size
= screen
->max_gtt_map_object_size
;
1090 ctx
->VertexProgram
._MaintainTnlProgram
= true;
1091 ctx
->FragmentProgram
._MaintainTexEnvProgram
= true;
1093 brw_draw_init( brw
);
1095 if ((flags
& __DRI_CTX_FLAG_DEBUG
) != 0) {
1096 /* Turn on some extra GL_ARB_debug_output generation. */
1097 brw
->perf_debug
= true;
1100 if ((flags
& __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
) != 0) {
1101 ctx
->Const
.ContextFlags
|= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB
;
1102 ctx
->Const
.RobustAccess
= GL_TRUE
;
1105 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
1106 brw_init_shader_time(brw
);
1108 _mesa_compute_version(ctx
);
1110 _mesa_initialize_dispatch_tables(ctx
);
1111 _mesa_initialize_vbo_vtxfmt(ctx
);
1113 if (ctx
->Extensions
.INTEL_performance_query
)
1114 brw_init_performance_queries(brw
);
1116 vbo_use_buffer_objects(ctx
);
1117 vbo_always_unmap_buffers(ctx
);
1123 intelDestroyContext(__DRIcontext
* driContextPriv
)
1125 struct brw_context
*brw
=
1126 (struct brw_context
*) driContextPriv
->driverPrivate
;
1127 struct gl_context
*ctx
= &brw
->ctx
;
1129 _mesa_meta_free(&brw
->ctx
);
1131 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
1132 /* Force a report. */
1133 brw
->shader_time
.report_time
= 0;
1135 brw_collect_and_report_shader_time(brw
);
1136 brw_destroy_shader_time(brw
);
1140 blorp_finish(&brw
->blorp
);
1142 brw_destroy_state(brw
);
1143 brw_draw_destroy(brw
);
1145 brw_bo_unreference(brw
->curbe
.curbe_bo
);
1146 if (brw
->vs
.base
.scratch_bo
)
1147 brw_bo_unreference(brw
->vs
.base
.scratch_bo
);
1148 if (brw
->tcs
.base
.scratch_bo
)
1149 brw_bo_unreference(brw
->tcs
.base
.scratch_bo
);
1150 if (brw
->tes
.base
.scratch_bo
)
1151 brw_bo_unreference(brw
->tes
.base
.scratch_bo
);
1152 if (brw
->gs
.base
.scratch_bo
)
1153 brw_bo_unreference(brw
->gs
.base
.scratch_bo
);
1154 if (brw
->wm
.base
.scratch_bo
)
1155 brw_bo_unreference(brw
->wm
.base
.scratch_bo
);
1157 brw_destroy_hw_context(brw
->bufmgr
, brw
->hw_ctx
);
1159 if (ctx
->swrast_context
) {
1160 _swsetup_DestroyContext(&brw
->ctx
);
1161 _tnl_DestroyContext(&brw
->ctx
);
1163 _vbo_DestroyContext(&brw
->ctx
);
1165 if (ctx
->swrast_context
)
1166 _swrast_DestroyContext(&brw
->ctx
);
1168 brw_fini_pipe_control(brw
);
1169 intel_batchbuffer_free(&brw
->batch
);
1171 brw_bo_unreference(brw
->throttle_batch
[1]);
1172 brw_bo_unreference(brw
->throttle_batch
[0]);
1173 brw
->throttle_batch
[1] = NULL
;
1174 brw
->throttle_batch
[0] = NULL
;
1176 driDestroyOptionCache(&brw
->optionCache
);
1178 /* free the Mesa context */
1179 _mesa_free_context_data(&brw
->ctx
);
1182 driContextPriv
->driverPrivate
= NULL
;
1186 intelUnbindContext(__DRIcontext
* driContextPriv
)
1188 /* Unset current context and dispath table */
1189 _mesa_make_current(NULL
, NULL
, NULL
);
1195 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1196 * on window system framebuffers.
1198 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1199 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1200 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1201 * for a visual where you're guaranteed to be capable, but it turns out that
1202 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1203 * incapable ones, because there's no difference between the two in resources
1204 * used. Applications thus get built that accidentally rely on the default
1205 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1208 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1209 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1210 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1211 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1212 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1213 * and get no sRGB encode (assuming that both kinds of visual are available).
1214 * Thus our choice to support sRGB by default on our visuals for desktop would
1215 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1217 * Unfortunately, renderbuffer setup happens before a context is created. So
1218 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1219 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1220 * yet), we go turn that back off before anyone finds out.
1223 intel_gles3_srgb_workaround(struct brw_context
*brw
,
1224 struct gl_framebuffer
*fb
)
1226 struct gl_context
*ctx
= &brw
->ctx
;
1228 if (_mesa_is_desktop_gl(ctx
) || !fb
->Visual
.sRGBCapable
)
1231 /* Some day when we support the sRGB capable bit on visuals available for
1232 * GLES, we'll need to respect that and not disable things here.
1234 fb
->Visual
.sRGBCapable
= false;
1235 for (int i
= 0; i
< BUFFER_COUNT
; i
++) {
1236 struct gl_renderbuffer
*rb
= fb
->Attachment
[i
].Renderbuffer
;
1238 rb
->Format
= _mesa_get_srgb_format_linear(rb
->Format
);
1243 intelMakeCurrent(__DRIcontext
* driContextPriv
,
1244 __DRIdrawable
* driDrawPriv
,
1245 __DRIdrawable
* driReadPriv
)
1247 struct brw_context
*brw
;
1248 GET_CURRENT_CONTEXT(curCtx
);
1251 brw
= (struct brw_context
*) driContextPriv
->driverPrivate
;
1255 /* According to the glXMakeCurrent() man page: "Pending commands to
1256 * the previous context, if any, are flushed before it is released."
1257 * But only flush if we're actually changing contexts.
1259 if (brw_context(curCtx
) && brw_context(curCtx
) != brw
) {
1260 _mesa_flush(curCtx
);
1263 if (driContextPriv
) {
1264 struct gl_context
*ctx
= &brw
->ctx
;
1265 struct gl_framebuffer
*fb
, *readFb
;
1267 if (driDrawPriv
== NULL
) {
1268 fb
= _mesa_get_incomplete_framebuffer();
1270 fb
= driDrawPriv
->driverPrivate
;
1271 driContextPriv
->dri2
.draw_stamp
= driDrawPriv
->dri2
.stamp
- 1;
1274 if (driReadPriv
== NULL
) {
1275 readFb
= _mesa_get_incomplete_framebuffer();
1277 readFb
= driReadPriv
->driverPrivate
;
1278 driContextPriv
->dri2
.read_stamp
= driReadPriv
->dri2
.stamp
- 1;
1281 /* The sRGB workaround changes the renderbuffer's format. We must change
1282 * the format before the renderbuffer's miptree get's allocated, otherwise
1283 * the formats of the renderbuffer and its miptree will differ.
1285 intel_gles3_srgb_workaround(brw
, fb
);
1286 intel_gles3_srgb_workaround(brw
, readFb
);
1288 /* If the context viewport hasn't been initialized, force a call out to
1289 * the loader to get buffers so we have a drawable size for the initial
1291 if (!brw
->ctx
.ViewportInitialized
)
1292 intel_prepare_render(brw
);
1294 _mesa_make_current(ctx
, fb
, readFb
);
1296 _mesa_make_current(NULL
, NULL
, NULL
);
1303 intel_resolve_for_dri2_flush(struct brw_context
*brw
,
1304 __DRIdrawable
*drawable
)
1307 /* MSAA and fast color clear are not supported, so don't waste time
1308 * checking whether a resolve is needed.
1313 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1314 struct intel_renderbuffer
*rb
;
1316 /* Usually, only the back buffer will need to be downsampled. However,
1317 * the front buffer will also need it if the user has rendered into it.
1319 static const gl_buffer_index buffers
[2] = {
1324 for (int i
= 0; i
< 2; ++i
) {
1325 rb
= intel_get_renderbuffer(fb
, buffers
[i
]);
1326 if (rb
== NULL
|| rb
->mt
== NULL
)
1328 if (rb
->mt
->num_samples
<= 1) {
1329 assert(rb
->mt_layer
== 0 && rb
->mt_level
== 0 &&
1330 rb
->layer_count
== 1);
1331 intel_miptree_prepare_access(brw
, rb
->mt
, 0, 1, 0, 1, false, false);
1333 intel_renderbuffer_downsample(brw
, rb
);
1339 intel_bits_per_pixel(const struct intel_renderbuffer
*rb
)
1341 return _mesa_get_format_bytes(intel_rb_format(rb
)) * 8;
1345 intel_query_dri2_buffers(struct brw_context
*brw
,
1346 __DRIdrawable
*drawable
,
1347 __DRIbuffer
**buffers
,
1351 intel_process_dri2_buffer(struct brw_context
*brw
,
1352 __DRIdrawable
*drawable
,
1353 __DRIbuffer
*buffer
,
1354 struct intel_renderbuffer
*rb
,
1355 const char *buffer_name
);
1358 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
);
1361 intel_update_dri2_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1363 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1364 struct intel_renderbuffer
*rb
;
1365 __DRIbuffer
*buffers
= NULL
;
1367 const char *region_name
;
1369 /* Set this up front, so that in case our buffers get invalidated
1370 * while we're getting new buffers, we don't clobber the stamp and
1371 * thus ignore the invalidate. */
1372 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1374 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1375 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1377 intel_query_dri2_buffers(brw
, drawable
, &buffers
, &count
);
1379 if (buffers
== NULL
)
1382 for (int i
= 0; i
< count
; i
++) {
1383 switch (buffers
[i
].attachment
) {
1384 case __DRI_BUFFER_FRONT_LEFT
:
1385 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1386 region_name
= "dri2 front buffer";
1389 case __DRI_BUFFER_FAKE_FRONT_LEFT
:
1390 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1391 region_name
= "dri2 fake front buffer";
1394 case __DRI_BUFFER_BACK_LEFT
:
1395 rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1396 region_name
= "dri2 back buffer";
1399 case __DRI_BUFFER_DEPTH
:
1400 case __DRI_BUFFER_HIZ
:
1401 case __DRI_BUFFER_DEPTH_STENCIL
:
1402 case __DRI_BUFFER_STENCIL
:
1403 case __DRI_BUFFER_ACCUM
:
1406 "unhandled buffer attach event, attachment type %d\n",
1407 buffers
[i
].attachment
);
1411 intel_process_dri2_buffer(brw
, drawable
, &buffers
[i
], rb
, region_name
);
1417 intel_update_renderbuffers(__DRIcontext
*context
, __DRIdrawable
*drawable
)
1419 struct brw_context
*brw
= context
->driverPrivate
;
1420 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1422 /* Set this up front, so that in case our buffers get invalidated
1423 * while we're getting new buffers, we don't clobber the stamp and
1424 * thus ignore the invalidate. */
1425 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1427 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1428 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1430 if (dri_screen
->image
.loader
)
1431 intel_update_image_buffers(brw
, drawable
);
1433 intel_update_dri2_buffers(brw
, drawable
);
1435 driUpdateFramebufferSize(&brw
->ctx
, drawable
);
1439 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1440 * state is required.
1443 intel_prepare_render(struct brw_context
*brw
)
1445 struct gl_context
*ctx
= &brw
->ctx
;
1446 __DRIcontext
*driContext
= brw
->driContext
;
1447 __DRIdrawable
*drawable
;
1449 drawable
= driContext
->driDrawablePriv
;
1450 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.draw_stamp
) {
1451 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1452 intel_update_renderbuffers(driContext
, drawable
);
1453 driContext
->dri2
.draw_stamp
= drawable
->dri2
.stamp
;
1456 drawable
= driContext
->driReadablePriv
;
1457 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.read_stamp
) {
1458 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1459 intel_update_renderbuffers(driContext
, drawable
);
1460 driContext
->dri2
.read_stamp
= drawable
->dri2
.stamp
;
1463 /* If we're currently rendering to the front buffer, the rendering
1464 * that will happen next will probably dirty the front buffer. So
1465 * mark it as dirty here.
1467 if (_mesa_is_front_buffer_drawing(ctx
->DrawBuffer
))
1468 brw
->front_buffer_dirty
= true;
1472 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1474 * To determine which DRI buffers to request, examine the renderbuffers
1475 * attached to the drawable's framebuffer. Then request the buffers with
1476 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1478 * This is called from intel_update_renderbuffers().
1480 * \param drawable Drawable whose buffers are queried.
1481 * \param buffers [out] List of buffers returned by DRI2 query.
1482 * \param buffer_count [out] Number of buffers returned.
1484 * \see intel_update_renderbuffers()
1485 * \see DRI2GetBuffers()
1486 * \see DRI2GetBuffersWithFormat()
1489 intel_query_dri2_buffers(struct brw_context
*brw
,
1490 __DRIdrawable
*drawable
,
1491 __DRIbuffer
**buffers
,
1494 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1495 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1497 unsigned attachments
[8];
1499 struct intel_renderbuffer
*front_rb
;
1500 struct intel_renderbuffer
*back_rb
;
1502 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1503 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1505 memset(attachments
, 0, sizeof(attachments
));
1506 if ((_mesa_is_front_buffer_drawing(fb
) ||
1507 _mesa_is_front_buffer_reading(fb
) ||
1508 !back_rb
) && front_rb
) {
1509 /* If a fake front buffer is in use, then querying for
1510 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1511 * the real front buffer to the fake front buffer. So before doing the
1512 * query, we need to make sure all the pending drawing has landed in the
1513 * real front buffer.
1515 intel_batchbuffer_flush(brw
);
1516 intel_flush_front(&brw
->ctx
);
1518 attachments
[i
++] = __DRI_BUFFER_FRONT_LEFT
;
1519 attachments
[i
++] = intel_bits_per_pixel(front_rb
);
1520 } else if (front_rb
&& brw
->front_buffer_dirty
) {
1521 /* We have pending front buffer rendering, but we aren't querying for a
1522 * front buffer. If the front buffer we have is a fake front buffer,
1523 * the X server is going to throw it away when it processes the query.
1524 * So before doing the query, make sure all the pending drawing has
1525 * landed in the real front buffer.
1527 intel_batchbuffer_flush(brw
);
1528 intel_flush_front(&brw
->ctx
);
1532 attachments
[i
++] = __DRI_BUFFER_BACK_LEFT
;
1533 attachments
[i
++] = intel_bits_per_pixel(back_rb
);
1536 assert(i
<= ARRAY_SIZE(attachments
));
1539 dri_screen
->dri2
.loader
->getBuffersWithFormat(drawable
,
1544 drawable
->loaderPrivate
);
1548 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1550 * This is called from intel_update_renderbuffers().
1553 * DRI buffers whose attachment point is DRI2BufferStencil or
1554 * DRI2BufferDepthStencil are handled as special cases.
1556 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1557 * that is passed to brw_bo_gem_create_from_name().
1559 * \see intel_update_renderbuffers()
1562 intel_process_dri2_buffer(struct brw_context
*brw
,
1563 __DRIdrawable
*drawable
,
1564 __DRIbuffer
*buffer
,
1565 struct intel_renderbuffer
*rb
,
1566 const char *buffer_name
)
1568 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1574 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1576 /* We try to avoid closing and reopening the same BO name, because the first
1577 * use of a mapping of the buffer involves a bunch of page faulting which is
1578 * moderately expensive.
1580 struct intel_mipmap_tree
*last_mt
;
1581 if (num_samples
== 0)
1584 last_mt
= rb
->singlesample_mt
;
1586 uint32_t old_name
= 0;
1588 /* The bo already has a name because the miptree was created by a
1589 * previous call to intel_process_dri2_buffer(). If a bo already has a
1590 * name, then brw_bo_flink() is a low-cost getter. It does not
1591 * create a new name.
1593 brw_bo_flink(last_mt
->bo
, &old_name
);
1596 if (old_name
== buffer
->name
)
1599 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
)) {
1601 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1602 buffer
->name
, buffer
->attachment
,
1603 buffer
->cpp
, buffer
->pitch
);
1606 bo
= brw_bo_gem_create_from_name(brw
->bufmgr
, buffer_name
,
1610 "Failed to open BO for returned DRI2 buffer "
1611 "(%dx%d, %s, named %d).\n"
1612 "This is likely a bug in the X Server that will lead to a "
1614 drawable
->w
, drawable
->h
, buffer_name
, buffer
->name
);
1618 intel_update_winsys_renderbuffer_miptree(brw
, rb
, bo
,
1619 drawable
->w
, drawable
->h
,
1622 if (_mesa_is_front_buffer_drawing(fb
) &&
1623 (buffer
->attachment
== __DRI_BUFFER_FRONT_LEFT
||
1624 buffer
->attachment
== __DRI_BUFFER_FAKE_FRONT_LEFT
) &&
1625 rb
->Base
.Base
.NumSamples
> 1) {
1626 intel_renderbuffer_upsample(brw
, rb
);
1631 brw_bo_unreference(bo
);
1635 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1637 * To determine which DRI buffers to request, examine the renderbuffers
1638 * attached to the drawable's framebuffer. Then request the buffers from
1641 * This is called from intel_update_renderbuffers().
1643 * \param drawable Drawable whose buffers are queried.
1644 * \param buffers [out] List of buffers returned by DRI2 query.
1645 * \param buffer_count [out] Number of buffers returned.
1647 * \see intel_update_renderbuffers()
1651 intel_update_image_buffer(struct brw_context
*intel
,
1652 __DRIdrawable
*drawable
,
1653 struct intel_renderbuffer
*rb
,
1655 enum __DRIimageBufferMask buffer_type
)
1657 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1659 if (!rb
|| !buffer
->bo
)
1662 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1664 /* Check and see if we're already bound to the right
1667 struct intel_mipmap_tree
*last_mt
;
1668 if (num_samples
== 0)
1671 last_mt
= rb
->singlesample_mt
;
1673 if (last_mt
&& last_mt
->bo
== buffer
->bo
)
1676 intel_update_winsys_renderbuffer_miptree(intel
, rb
, buffer
->bo
,
1677 buffer
->width
, buffer
->height
,
1680 if (_mesa_is_front_buffer_drawing(fb
) &&
1681 buffer_type
== __DRI_IMAGE_BUFFER_FRONT
&&
1682 rb
->Base
.Base
.NumSamples
> 1) {
1683 intel_renderbuffer_upsample(intel
, rb
);
1688 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1690 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1691 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1692 struct intel_renderbuffer
*front_rb
;
1693 struct intel_renderbuffer
*back_rb
;
1694 struct __DRIimageList images
;
1696 uint32_t buffer_mask
= 0;
1699 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1700 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1703 format
= intel_rb_format(back_rb
);
1705 format
= intel_rb_format(front_rb
);
1709 if (front_rb
&& (_mesa_is_front_buffer_drawing(fb
) ||
1710 _mesa_is_front_buffer_reading(fb
) || !back_rb
)) {
1711 buffer_mask
|= __DRI_IMAGE_BUFFER_FRONT
;
1715 buffer_mask
|= __DRI_IMAGE_BUFFER_BACK
;
1717 ret
= dri_screen
->image
.loader
->getBuffers(drawable
,
1718 driGLFormatToImageFormat(format
),
1719 &drawable
->dri2
.stamp
,
1720 drawable
->loaderPrivate
,
1726 if (images
.image_mask
& __DRI_IMAGE_BUFFER_FRONT
) {
1727 drawable
->w
= images
.front
->width
;
1728 drawable
->h
= images
.front
->height
;
1729 intel_update_image_buffer(brw
,
1733 __DRI_IMAGE_BUFFER_FRONT
);
1736 if (images
.image_mask
& __DRI_IMAGE_BUFFER_BACK
) {
1737 drawable
->w
= images
.back
->width
;
1738 drawable
->h
= images
.back
->height
;
1739 intel_update_image_buffer(brw
,
1743 __DRI_IMAGE_BUFFER_BACK
);