2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **********************************************************************/
30 * Keith Whitwell <keithw@vmware.com>
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/points.h"
42 #include "main/version.h"
43 #include "main/vtxfmt.h"
44 #include "main/texobj.h"
45 #include "main/framebuffer.h"
46 #include "main/stencil.h"
47 #include "main/state.h"
49 #include "vbo/vbo_context.h"
51 #include "drivers/common/driverfuncs.h"
52 #include "drivers/common/meta.h"
55 #include "brw_context.h"
56 #include "brw_defines.h"
57 #include "brw_blorp.h"
59 #include "brw_state.h"
61 #include "intel_batchbuffer.h"
62 #include "intel_buffer_objects.h"
63 #include "intel_buffers.h"
64 #include "intel_fbo.h"
65 #include "intel_mipmap_tree.h"
66 #include "intel_pixel.h"
67 #include "intel_image.h"
68 #include "intel_tex.h"
69 #include "intel_tex_obj.h"
71 #include "swrast_setup/swrast_setup.h"
73 #include "tnl/t_pipeline.h"
74 #include "util/ralloc.h"
75 #include "util/debug.h"
78 /***************************************
79 * Mesa's Driver Functions
80 ***************************************/
82 const char *const brw_vendor_string
= "Intel Open Source Technology Center";
85 get_bsw_model(const struct intel_screen
*screen
)
87 switch (screen
->eu_total
) {
98 brw_get_renderer_string(const struct intel_screen
*screen
)
101 static char buffer
[128];
104 switch (screen
->deviceID
) {
106 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
107 #include "pci_ids/i965_pci_ids.h"
109 chipset
= "Unknown Intel Chipset";
113 /* Braswell branding is funny, so we have to fix it up here */
114 if (screen
->deviceID
== 0x22B1) {
115 bsw
= strdup(chipset
);
116 char *needle
= strstr(bsw
, "XXX");
118 memcpy(needle
, get_bsw_model(screen
), 3);
123 (void) driGetRendererString(buffer
, chipset
, 0);
128 static const GLubyte
*
129 intel_get_string(struct gl_context
* ctx
, GLenum name
)
131 const struct brw_context
*const brw
= brw_context(ctx
);
135 return (GLubyte
*) brw_vendor_string
;
139 (GLubyte
*) brw_get_renderer_string(brw
->screen
);
147 intel_viewport(struct gl_context
*ctx
)
149 struct brw_context
*brw
= brw_context(ctx
);
150 __DRIcontext
*driContext
= brw
->driContext
;
152 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
153 if (driContext
->driDrawablePriv
)
154 dri2InvalidateDrawable(driContext
->driDrawablePriv
);
155 if (driContext
->driReadablePriv
)
156 dri2InvalidateDrawable(driContext
->driReadablePriv
);
161 intel_update_framebuffer(struct gl_context
*ctx
,
162 struct gl_framebuffer
*fb
)
164 struct brw_context
*brw
= brw_context(ctx
);
166 /* Quantize the derived default number of samples
168 fb
->DefaultGeometry
._NumSamples
=
169 intel_quantize_num_samples(brw
->screen
,
170 fb
->DefaultGeometry
.NumSamples
);
174 intel_disable_rb_aux_buffer(struct brw_context
*brw
, const struct brw_bo
*bo
)
176 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
179 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
180 const struct intel_renderbuffer
*irb
=
181 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
183 if (irb
&& irb
->mt
->bo
== bo
) {
184 found
= brw
->draw_aux_buffer_disabled
[i
] = true;
192 intel_update_state(struct gl_context
* ctx
)
194 GLuint new_state
= ctx
->NewState
;
195 struct brw_context
*brw
= brw_context(ctx
);
196 struct intel_texture_object
*tex_obj
;
197 struct intel_renderbuffer
*depth_irb
;
199 if (ctx
->swrast_context
)
200 _swrast_InvalidateState(ctx
, new_state
);
202 brw
->NewGLState
|= new_state
;
204 _mesa_unlock_context_textures(ctx
);
206 if (new_state
& (_NEW_SCISSOR
| _NEW_BUFFERS
| _NEW_VIEWPORT
))
207 _mesa_update_draw_buffer_bounds(ctx
, ctx
->DrawBuffer
);
209 if (new_state
& (_NEW_STENCIL
| _NEW_BUFFERS
)) {
210 brw
->stencil_enabled
= _mesa_stencil_is_enabled(ctx
);
211 brw
->stencil_two_sided
= _mesa_stencil_is_two_sided(ctx
);
212 brw
->stencil_write_enabled
=
213 _mesa_stencil_is_write_enabled(ctx
, brw
->stencil_two_sided
);
216 if (new_state
& _NEW_POLYGON
)
217 brw
->polygon_front_bit
= _mesa_polygon_get_front_bit(ctx
);
219 intel_prepare_render(brw
);
221 /* Resolve the depth buffer's HiZ buffer. */
222 depth_irb
= intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
223 if (depth_irb
&& depth_irb
->mt
) {
224 intel_miptree_prepare_depth(brw
, depth_irb
->mt
,
227 depth_irb
->layer_count
);
230 memset(brw
->draw_aux_buffer_disabled
, 0,
231 sizeof(brw
->draw_aux_buffer_disabled
));
233 /* Resolve depth buffer and render cache of each enabled texture. */
234 int maxEnabledUnit
= ctx
->Texture
._MaxEnabledTexImageUnit
;
235 for (int i
= 0; i
<= maxEnabledUnit
; i
++) {
236 if (!ctx
->Texture
.Unit
[i
]._Current
)
238 tex_obj
= intel_texture_object(ctx
->Texture
.Unit
[i
]._Current
);
239 if (!tex_obj
|| !tex_obj
->mt
)
242 /* We need inte_texture_object::_Format to be valid */
243 intel_finalize_mipmap_tree(brw
, i
);
246 intel_miptree_prepare_texture(brw
, tex_obj
->mt
, tex_obj
->_Format
,
249 if (!aux_supported
&& brw
->gen
>= 9 &&
250 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
251 perf_debug("Sampling renderbuffer with non-compressible format - "
252 "turning off compression");
255 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
257 if (tex_obj
->base
.StencilSampling
||
258 tex_obj
->mt
->format
== MESA_FORMAT_S_UINT8
) {
259 intel_update_r8stencil(brw
, tex_obj
->mt
);
263 /* Resolve color for each active shader image. */
264 for (unsigned i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
265 const struct gl_program
*prog
= ctx
->_Shader
->CurrentProgram
[i
];
267 if (unlikely(prog
&& prog
->info
.num_images
)) {
268 for (unsigned j
= 0; j
< prog
->info
.num_images
; j
++) {
269 struct gl_image_unit
*u
=
270 &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[j
]];
271 tex_obj
= intel_texture_object(u
->TexObj
);
273 if (tex_obj
&& tex_obj
->mt
) {
274 intel_miptree_prepare_image(brw
, tex_obj
->mt
);
276 if (intel_miptree_is_lossless_compressed(brw
, tex_obj
->mt
) &&
277 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
278 perf_debug("Using renderbuffer as shader image - turning "
279 "off lossless compression");
282 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
288 /* Resolve color buffers for non-coherent framebuffer fetch. */
289 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
290 ctx
->FragmentProgram
._Current
&&
291 ctx
->FragmentProgram
._Current
->info
.outputs_read
) {
292 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
294 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
295 const struct intel_renderbuffer
*irb
=
296 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
299 intel_miptree_prepare_fb_fetch(brw
, irb
->mt
, irb
->mt_level
,
300 irb
->mt_layer
, irb
->layer_count
);
305 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
306 for (int i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
307 struct intel_renderbuffer
*irb
=
308 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
310 if (irb
== NULL
|| irb
->mt
== NULL
)
313 intel_miptree_prepare_render(brw
, irb
->mt
, irb
->mt_level
,
314 irb
->mt_layer
, irb
->layer_count
,
315 ctx
->Color
.sRGBEnabled
);
318 _mesa_lock_context_textures(ctx
);
320 if (new_state
& _NEW_BUFFERS
) {
321 intel_update_framebuffer(ctx
, ctx
->DrawBuffer
);
322 if (ctx
->DrawBuffer
!= ctx
->ReadBuffer
)
323 intel_update_framebuffer(ctx
, ctx
->ReadBuffer
);
327 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
330 intel_flush_front(struct gl_context
*ctx
)
332 struct brw_context
*brw
= brw_context(ctx
);
333 __DRIcontext
*driContext
= brw
->driContext
;
334 __DRIdrawable
*driDrawable
= driContext
->driDrawablePriv
;
335 __DRIscreen
*const dri_screen
= brw
->screen
->driScrnPriv
;
337 if (brw
->front_buffer_dirty
&& _mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
338 if (flushFront(dri_screen
) && driDrawable
&&
339 driDrawable
->loaderPrivate
) {
341 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
343 * This potentially resolves both front and back buffer. It
344 * is unnecessary to resolve the back, but harms nothing except
345 * performance. And no one cares about front-buffer render
348 intel_resolve_for_dri2_flush(brw
, driDrawable
);
349 intel_batchbuffer_flush(brw
);
351 flushFront(dri_screen
)(driDrawable
, driDrawable
->loaderPrivate
);
353 /* We set the dirty bit in intel_prepare_render() if we're
354 * front buffer rendering once we get there.
356 brw
->front_buffer_dirty
= false;
362 intel_glFlush(struct gl_context
*ctx
)
364 struct brw_context
*brw
= brw_context(ctx
);
366 intel_batchbuffer_flush(brw
);
367 intel_flush_front(ctx
);
369 brw
->need_flush_throttle
= true;
373 intel_finish(struct gl_context
* ctx
)
375 struct brw_context
*brw
= brw_context(ctx
);
379 if (brw
->batch
.last_bo
)
380 brw_bo_wait_rendering(brw
, brw
->batch
.last_bo
);
384 brw_init_driver_functions(struct brw_context
*brw
,
385 struct dd_function_table
*functions
)
387 _mesa_init_driver_functions(functions
);
389 /* GLX uses DRI2 invalidate events to handle window resizing.
390 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
391 * which doesn't provide a mechanism for snooping the event queues.
393 * So EGL still relies on viewport hacks to handle window resizing.
394 * This should go away with DRI3000.
396 if (!brw
->driContext
->driScreenPriv
->dri2
.useInvalidate
)
397 functions
->Viewport
= intel_viewport
;
399 functions
->Flush
= intel_glFlush
;
400 functions
->Finish
= intel_finish
;
401 functions
->GetString
= intel_get_string
;
402 functions
->UpdateState
= intel_update_state
;
404 intelInitTextureFuncs(functions
);
405 intelInitTextureImageFuncs(functions
);
406 intelInitTextureSubImageFuncs(functions
);
407 intelInitTextureCopyImageFuncs(functions
);
408 intelInitCopyImageFuncs(functions
);
409 intelInitClearFuncs(functions
);
410 intelInitBufferFuncs(functions
);
411 intelInitPixelFuncs(functions
);
412 intelInitBufferObjectFuncs(functions
);
413 brw_init_syncobj_functions(functions
);
414 brw_init_object_purgeable_functions(functions
);
416 brwInitFragProgFuncs( functions
);
417 brw_init_common_queryobj_functions(functions
);
418 if (brw
->gen
>= 8 || brw
->is_haswell
)
419 hsw_init_queryobj_functions(functions
);
420 else if (brw
->gen
>= 6)
421 gen6_init_queryobj_functions(functions
);
423 gen4_init_queryobj_functions(functions
);
424 brw_init_compute_functions(functions
);
425 brw_init_conditional_render_functions(functions
);
427 functions
->QueryInternalFormat
= brw_query_internal_format
;
429 functions
->NewTransformFeedback
= brw_new_transform_feedback
;
430 functions
->DeleteTransformFeedback
= brw_delete_transform_feedback
;
431 if (can_do_mi_math_and_lrr(brw
->screen
)) {
432 functions
->BeginTransformFeedback
= hsw_begin_transform_feedback
;
433 functions
->EndTransformFeedback
= hsw_end_transform_feedback
;
434 functions
->PauseTransformFeedback
= hsw_pause_transform_feedback
;
435 functions
->ResumeTransformFeedback
= hsw_resume_transform_feedback
;
436 } else if (brw
->gen
>= 7) {
437 functions
->BeginTransformFeedback
= gen7_begin_transform_feedback
;
438 functions
->EndTransformFeedback
= gen7_end_transform_feedback
;
439 functions
->PauseTransformFeedback
= gen7_pause_transform_feedback
;
440 functions
->ResumeTransformFeedback
= gen7_resume_transform_feedback
;
441 functions
->GetTransformFeedbackVertexCount
=
442 brw_get_transform_feedback_vertex_count
;
444 functions
->BeginTransformFeedback
= brw_begin_transform_feedback
;
445 functions
->EndTransformFeedback
= brw_end_transform_feedback
;
446 functions
->PauseTransformFeedback
= brw_pause_transform_feedback
;
447 functions
->ResumeTransformFeedback
= brw_resume_transform_feedback
;
448 functions
->GetTransformFeedbackVertexCount
=
449 brw_get_transform_feedback_vertex_count
;
453 functions
->GetSamplePosition
= gen6_get_sample_position
;
457 brw_initialize_context_constants(struct brw_context
*brw
)
459 struct gl_context
*ctx
= &brw
->ctx
;
460 const struct brw_compiler
*compiler
= brw
->screen
->compiler
;
462 const bool stage_exists
[MESA_SHADER_STAGES
] = {
463 [MESA_SHADER_VERTEX
] = true,
464 [MESA_SHADER_TESS_CTRL
] = brw
->gen
>= 7,
465 [MESA_SHADER_TESS_EVAL
] = brw
->gen
>= 7,
466 [MESA_SHADER_GEOMETRY
] = brw
->gen
>= 6,
467 [MESA_SHADER_FRAGMENT
] = true,
468 [MESA_SHADER_COMPUTE
] =
469 ((ctx
->API
== API_OPENGL_COMPAT
|| ctx
->API
== API_OPENGL_CORE
) &&
470 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 1024) ||
471 (ctx
->API
== API_OPENGLES2
&&
472 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 128) ||
473 _mesa_extension_override_enables
.ARB_compute_shader
,
476 unsigned num_stages
= 0;
477 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
482 unsigned max_samplers
=
483 brw
->gen
>= 8 || brw
->is_haswell
? BRW_MAX_TEX_UNIT
: 16;
485 ctx
->Const
.MaxDualSourceDrawBuffers
= 1;
486 ctx
->Const
.MaxDrawBuffers
= BRW_MAX_DRAW_BUFFERS
;
487 ctx
->Const
.MaxCombinedShaderOutputResources
=
488 MAX_IMAGE_UNITS
+ BRW_MAX_DRAW_BUFFERS
;
490 /* The timestamp register we can read for glGetTimestamp() is
491 * sometimes only 32 bits, before scaling to nanoseconds (depending
494 * Once scaled to nanoseconds the timestamp would roll over at a
495 * non-power-of-two, so an application couldn't use
496 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
497 * report 36 bits and truncate at that (rolling over 5 times as
498 * often as the HW counter), and when the 32-bit counter rolls
499 * over, it happens to also be at a rollover in the reported value
500 * from near (1<<36) to 0.
502 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
503 * rolls over every ~69 seconds.
505 ctx
->Const
.QueryCounterBits
.Timestamp
= 36;
507 ctx
->Const
.MaxTextureCoordUnits
= 8; /* Mesa limit */
508 ctx
->Const
.MaxImageUnits
= MAX_IMAGE_UNITS
;
510 ctx
->Const
.MaxRenderbufferSize
= 16384;
511 ctx
->Const
.MaxTextureLevels
= MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS
);
512 ctx
->Const
.MaxCubeTextureLevels
= 15; /* 16384 */
514 ctx
->Const
.MaxRenderbufferSize
= 8192;
515 ctx
->Const
.MaxTextureLevels
= MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS
);
516 ctx
->Const
.MaxCubeTextureLevels
= 14; /* 8192 */
518 ctx
->Const
.Max3DTextureLevels
= 12; /* 2048 */
519 ctx
->Const
.MaxArrayTextureLayers
= brw
->gen
>= 7 ? 2048 : 512;
520 ctx
->Const
.MaxTextureMbytes
= 1536;
521 ctx
->Const
.MaxTextureRectSize
= brw
->gen
>= 7 ? 16384 : 8192;
522 ctx
->Const
.MaxTextureMaxAnisotropy
= 16.0;
523 ctx
->Const
.MaxTextureLodBias
= 15.0;
524 ctx
->Const
.StripTextureBorder
= true;
526 ctx
->Const
.MaxProgramTextureGatherComponents
= 4;
527 ctx
->Const
.MinProgramTextureGatherOffset
= -32;
528 ctx
->Const
.MaxProgramTextureGatherOffset
= 31;
529 } else if (brw
->gen
== 6) {
530 ctx
->Const
.MaxProgramTextureGatherComponents
= 1;
531 ctx
->Const
.MinProgramTextureGatherOffset
= -8;
532 ctx
->Const
.MaxProgramTextureGatherOffset
= 7;
535 ctx
->Const
.MaxUniformBlockSize
= 65536;
537 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
538 struct gl_program_constants
*prog
= &ctx
->Const
.Program
[i
];
540 if (!stage_exists
[i
])
543 prog
->MaxTextureImageUnits
= max_samplers
;
545 prog
->MaxUniformBlocks
= BRW_MAX_UBO
;
546 prog
->MaxCombinedUniformComponents
=
547 prog
->MaxUniformComponents
+
548 ctx
->Const
.MaxUniformBlockSize
/ 4 * prog
->MaxUniformBlocks
;
550 prog
->MaxAtomicCounters
= MAX_ATOMIC_COUNTERS
;
551 prog
->MaxAtomicBuffers
= BRW_MAX_ABO
;
552 prog
->MaxImageUniforms
= compiler
->scalar_stage
[i
] ? BRW_MAX_IMAGES
: 0;
553 prog
->MaxShaderStorageBlocks
= BRW_MAX_SSBO
;
556 ctx
->Const
.MaxTextureUnits
=
557 MIN2(ctx
->Const
.MaxTextureCoordUnits
,
558 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
);
560 ctx
->Const
.MaxUniformBufferBindings
= num_stages
* BRW_MAX_UBO
;
561 ctx
->Const
.MaxCombinedUniformBlocks
= num_stages
* BRW_MAX_UBO
;
562 ctx
->Const
.MaxCombinedAtomicBuffers
= num_stages
* BRW_MAX_ABO
;
563 ctx
->Const
.MaxCombinedShaderStorageBlocks
= num_stages
* BRW_MAX_SSBO
;
564 ctx
->Const
.MaxShaderStorageBufferBindings
= num_stages
* BRW_MAX_SSBO
;
565 ctx
->Const
.MaxCombinedTextureImageUnits
= num_stages
* max_samplers
;
566 ctx
->Const
.MaxCombinedImageUniforms
= num_stages
* BRW_MAX_IMAGES
;
569 /* Hardware only supports a limited number of transform feedback buffers.
570 * So we need to override the Mesa default (which is based only on software
573 ctx
->Const
.MaxTransformFeedbackBuffers
= BRW_MAX_SOL_BUFFERS
;
575 /* On Gen6, in the worst case, we use up one binding table entry per
576 * transform feedback component (see comments above the definition of
577 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
578 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
579 * BRW_MAX_SOL_BINDINGS.
581 * In "separate components" mode, we need to divide this value by
582 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
583 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
585 ctx
->Const
.MaxTransformFeedbackInterleavedComponents
= BRW_MAX_SOL_BINDINGS
;
586 ctx
->Const
.MaxTransformFeedbackSeparateComponents
=
587 BRW_MAX_SOL_BINDINGS
/ BRW_MAX_SOL_BUFFERS
;
589 ctx
->Const
.AlwaysUseGetTransformFeedbackVertexCount
=
590 !can_do_mi_math_and_lrr(brw
->screen
);
593 const int *msaa_modes
= intel_supported_msaa_modes(brw
->screen
);
594 const int clamp_max_samples
=
595 driQueryOptioni(&brw
->optionCache
, "clamp_max_samples");
597 if (clamp_max_samples
< 0) {
598 max_samples
= msaa_modes
[0];
600 /* Select the largest supported MSAA mode that does not exceed
604 for (int i
= 0; msaa_modes
[i
] != 0; ++i
) {
605 if (msaa_modes
[i
] <= clamp_max_samples
) {
606 max_samples
= msaa_modes
[i
];
612 ctx
->Const
.MaxSamples
= max_samples
;
613 ctx
->Const
.MaxColorTextureSamples
= max_samples
;
614 ctx
->Const
.MaxDepthTextureSamples
= max_samples
;
615 ctx
->Const
.MaxIntegerSamples
= max_samples
;
616 ctx
->Const
.MaxImageSamples
= 0;
618 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
619 * to map indices of rectangular grid to sample numbers within a pixel.
620 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
621 * extension implementation. For more details see the comment above
622 * gen6_set_sample_maps() definition.
624 gen6_set_sample_maps(ctx
);
626 ctx
->Const
.MinLineWidth
= 1.0;
627 ctx
->Const
.MinLineWidthAA
= 1.0;
629 ctx
->Const
.MaxLineWidth
= 7.375;
630 ctx
->Const
.MaxLineWidthAA
= 7.375;
631 ctx
->Const
.LineWidthGranularity
= 0.125;
633 ctx
->Const
.MaxLineWidth
= 7.0;
634 ctx
->Const
.MaxLineWidthAA
= 7.0;
635 ctx
->Const
.LineWidthGranularity
= 0.5;
638 /* For non-antialiased lines, we have to round the line width to the
639 * nearest whole number. Make sure that we don't advertise a line
640 * width that, when rounded, will be beyond the actual hardware
643 assert(roundf(ctx
->Const
.MaxLineWidth
) <= ctx
->Const
.MaxLineWidth
);
645 ctx
->Const
.MinPointSize
= 1.0;
646 ctx
->Const
.MinPointSizeAA
= 1.0;
647 ctx
->Const
.MaxPointSize
= 255.0;
648 ctx
->Const
.MaxPointSizeAA
= 255.0;
649 ctx
->Const
.PointSizeGranularity
= 1.0;
651 if (brw
->gen
>= 5 || brw
->is_g4x
)
652 ctx
->Const
.MaxClipPlanes
= 8;
654 ctx
->Const
.GLSLTessLevelsAsInputs
= true;
655 ctx
->Const
.LowerTCSPatchVerticesIn
= brw
->gen
>= 8;
656 ctx
->Const
.LowerTESPatchVerticesIn
= true;
657 ctx
->Const
.PrimitiveRestartForPatches
= true;
659 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeInstructions
= 16 * 1024;
660 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxAluInstructions
= 0;
661 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexInstructions
= 0;
662 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexIndirections
= 0;
663 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAluInstructions
= 0;
664 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexInstructions
= 0;
665 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexIndirections
= 0;
666 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAttribs
= 16;
667 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTemps
= 256;
668 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAddressRegs
= 1;
669 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
= 1024;
670 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
=
671 MIN2(ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
,
672 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
);
674 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeInstructions
= 1024;
675 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAluInstructions
= 1024;
676 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexInstructions
= 1024;
677 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexIndirections
= 1024;
678 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAttribs
= 12;
679 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTemps
= 256;
680 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAddressRegs
= 0;
681 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
= 1024;
682 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
=
683 MIN2(ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
,
684 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
);
686 /* Fragment shaders use real, 32-bit twos-complement integers for all
689 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMin
= 31;
690 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMax
= 30;
691 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.Precision
= 0;
692 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
693 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
695 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMin
= 31;
696 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMax
= 30;
697 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.Precision
= 0;
698 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
699 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
701 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
702 * but we're not sure how it's actually done for vertex order,
703 * that affect provoking vertex decision. Always use last vertex
704 * convention for quad primitive which works as expected for now.
707 ctx
->Const
.QuadsFollowProvokingVertexConvention
= false;
709 ctx
->Const
.NativeIntegers
= true;
710 ctx
->Const
.VertexID_is_zero_based
= true;
712 /* Regarding the CMP instruction, the Ivybridge PRM says:
714 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
715 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
716 * 0xFFFFFFFF) is assigned to dst."
718 * but PRMs for earlier generations say
720 * "In dword format, one GRF may store up to 8 results. When the register
721 * is used later as a vector of Booleans, as only LSB at each channel
722 * contains meaning [sic] data, software should make sure all higher bits
723 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
725 * We select the representation of a true boolean uniform to be ~0, and fix
726 * the results of Gen <= 5 CMP instruction's with -(result & 1).
728 ctx
->Const
.UniformBooleanTrue
= ~0;
730 /* From the gen4 PRM, volume 4 page 127:
732 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
733 * the base address of the first element of the surface, computed in
734 * software by adding the surface base address to the byte offset of
735 * the element in the buffer."
737 * However, unaligned accesses are slower, so enforce buffer alignment.
739 ctx
->Const
.UniformBufferOffsetAlignment
= 16;
741 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
742 * that we can safely have the CPU and GPU writing the same SSBO on
743 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
744 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
745 * be updating disjoint regions of the buffer simultaneously and that will
746 * break if the regions overlap the same cacheline.
748 ctx
->Const
.ShaderStorageBufferOffsetAlignment
= 64;
749 ctx
->Const
.TextureBufferOffsetAlignment
= 16;
750 ctx
->Const
.MaxTextureBufferSize
= 128 * 1024 * 1024;
753 ctx
->Const
.MaxVarying
= 32;
754 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxOutputComponents
= 128;
755 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxInputComponents
= 64;
756 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxOutputComponents
= 128;
757 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxInputComponents
= 128;
758 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxInputComponents
= 128;
759 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxOutputComponents
= 128;
760 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxInputComponents
= 128;
761 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxOutputComponents
= 128;
764 /* We want the GLSL compiler to emit code that uses condition codes */
765 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
766 ctx
->Const
.ShaderCompilerOptions
[i
] =
767 brw
->screen
->compiler
->glsl_compiler_options
[i
];
771 ctx
->Const
.MaxViewportWidth
= 32768;
772 ctx
->Const
.MaxViewportHeight
= 32768;
775 /* ARB_viewport_array, OES_viewport_array */
777 ctx
->Const
.MaxViewports
= GEN6_NUM_VIEWPORTS
;
778 ctx
->Const
.ViewportSubpixelBits
= 0;
780 /* Cast to float before negating because MaxViewportWidth is unsigned.
782 ctx
->Const
.ViewportBounds
.Min
= -(float)ctx
->Const
.MaxViewportWidth
;
783 ctx
->Const
.ViewportBounds
.Max
= ctx
->Const
.MaxViewportWidth
;
786 /* ARB_gpu_shader5 */
788 ctx
->Const
.MaxVertexStreams
= MIN2(4, MAX_VERTEX_STREAMS
);
790 /* ARB_framebuffer_no_attachments */
791 ctx
->Const
.MaxFramebufferWidth
= 16384;
792 ctx
->Const
.MaxFramebufferHeight
= 16384;
793 ctx
->Const
.MaxFramebufferLayers
= ctx
->Const
.MaxArrayTextureLayers
;
794 ctx
->Const
.MaxFramebufferSamples
= max_samples
;
796 /* OES_primitive_bounding_box */
797 ctx
->Const
.NoPrimitiveBoundingBoxOutput
= true;
801 brw_initialize_cs_context_constants(struct brw_context
*brw
)
803 struct gl_context
*ctx
= &brw
->ctx
;
804 const struct intel_screen
*screen
= brw
->screen
;
805 struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
807 /* FINISHME: Do this for all platforms that the kernel supports */
808 if (brw
->is_cherryview
&&
809 screen
->subslice_total
> 0 && screen
->eu_total
> 0) {
810 /* Logical CS threads = EUs per subslice * 7 threads per EU */
811 uint32_t max_cs_threads
= screen
->eu_total
/ screen
->subslice_total
* 7;
813 /* Fuse configurations may give more threads than expected, never less. */
814 if (max_cs_threads
> devinfo
->max_cs_threads
)
815 devinfo
->max_cs_threads
= max_cs_threads
;
818 /* Maximum number of scalar compute shader invocations that can be run in
819 * parallel in the same subslice assuming SIMD32 dispatch.
821 * We don't advertise more than 64 threads, because we are limited to 64 by
822 * our usage of thread_width_max in the gpgpu walker command. This only
823 * currently impacts Haswell, which otherwise might be able to advertise 70
824 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
825 * required the number of invocation needed for ARB_compute_shader.
827 const unsigned max_threads
= MIN2(64, devinfo
->max_cs_threads
);
828 const uint32_t max_invocations
= 32 * max_threads
;
829 ctx
->Const
.MaxComputeWorkGroupSize
[0] = max_invocations
;
830 ctx
->Const
.MaxComputeWorkGroupSize
[1] = max_invocations
;
831 ctx
->Const
.MaxComputeWorkGroupSize
[2] = max_invocations
;
832 ctx
->Const
.MaxComputeWorkGroupInvocations
= max_invocations
;
833 ctx
->Const
.MaxComputeSharedMemorySize
= 64 * 1024;
837 * Process driconf (drirc) options, setting appropriate context flags.
839 * intelInitExtensions still pokes at optionCache directly, in order to
840 * avoid advertising various extensions. No flags are set, so it makes
841 * sense to continue doing that there.
844 brw_process_driconf_options(struct brw_context
*brw
)
846 struct gl_context
*ctx
= &brw
->ctx
;
848 driOptionCache
*options
= &brw
->optionCache
;
849 driParseConfigFiles(options
, &brw
->screen
->optionCache
,
850 brw
->driContext
->driScreenPriv
->myNum
, "i965");
852 int bo_reuse_mode
= driQueryOptioni(options
, "bo_reuse");
853 switch (bo_reuse_mode
) {
854 case DRI_CONF_BO_REUSE_DISABLED
:
856 case DRI_CONF_BO_REUSE_ALL
:
857 brw_bufmgr_enable_reuse(brw
->bufmgr
);
861 if (INTEL_DEBUG
& DEBUG_NO_HIZ
) {
862 brw
->has_hiz
= false;
863 /* On gen6, you can only do separate stencil with HIZ. */
865 brw
->has_separate_stencil
= false;
868 if (driQueryOptionb(options
, "always_flush_batch")) {
869 fprintf(stderr
, "flushing batchbuffer before/after each draw call\n");
870 brw
->always_flush_batch
= true;
873 if (driQueryOptionb(options
, "always_flush_cache")) {
874 fprintf(stderr
, "flushing GPU caches before/after each draw call\n");
875 brw
->always_flush_cache
= true;
878 if (driQueryOptionb(options
, "disable_throttling")) {
879 fprintf(stderr
, "disabling flush throttling\n");
880 brw
->disable_throttling
= true;
883 brw
->precompile
= driQueryOptionb(&brw
->optionCache
, "shader_precompile");
885 if (driQueryOptionb(&brw
->optionCache
, "precise_trig"))
886 brw
->screen
->compiler
->precise_trig
= true;
888 ctx
->Const
.ForceGLSLExtensionsWarn
=
889 driQueryOptionb(options
, "force_glsl_extensions_warn");
891 ctx
->Const
.ForceGLSLVersion
=
892 driQueryOptioni(options
, "force_glsl_version");
894 ctx
->Const
.DisableGLSLLineContinuations
=
895 driQueryOptionb(options
, "disable_glsl_line_continuations");
897 ctx
->Const
.AllowGLSLExtensionDirectiveMidShader
=
898 driQueryOptionb(options
, "allow_glsl_extension_directive_midshader");
900 ctx
->Const
.AllowGLSLBuiltinVariableRedeclaration
=
901 driQueryOptionb(options
, "allow_glsl_builtin_variable_redeclaration");
903 ctx
->Const
.AllowHigherCompatVersion
=
904 driQueryOptionb(options
, "allow_higher_compat_version");
906 ctx
->Const
.ForceGLSLAbsSqrt
=
907 driQueryOptionb(options
, "force_glsl_abs_sqrt");
909 ctx
->Const
.GLSLZeroInit
= driQueryOptionb(options
, "glsl_zero_init");
911 brw
->dual_color_blend_by_location
=
912 driQueryOptionb(options
, "dual_color_blend_by_location");
916 brwCreateContext(gl_api api
,
917 const struct gl_config
*mesaVis
,
918 __DRIcontext
*driContextPriv
,
919 unsigned major_version
,
920 unsigned minor_version
,
923 unsigned *dri_ctx_error
,
924 void *sharedContextPrivate
)
926 struct gl_context
*shareCtx
= (struct gl_context
*) sharedContextPrivate
;
927 struct intel_screen
*screen
= driContextPriv
->driScreenPriv
->driverPrivate
;
928 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
929 struct dd_function_table functions
;
931 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
932 * provides us with context reset notifications.
934 uint32_t allowed_flags
= __DRI_CTX_FLAG_DEBUG
935 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE
;
937 if (screen
->has_context_reset_notification
)
938 allowed_flags
|= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
;
940 if (flags
& ~allowed_flags
) {
941 *dri_ctx_error
= __DRI_CTX_ERROR_UNKNOWN_FLAG
;
945 struct brw_context
*brw
= rzalloc(NULL
, struct brw_context
);
947 fprintf(stderr
, "%s: failed to alloc context\n", __func__
);
948 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
952 driContextPriv
->driverPrivate
= brw
;
953 brw
->driContext
= driContextPriv
;
954 brw
->screen
= screen
;
955 brw
->bufmgr
= screen
->bufmgr
;
957 brw
->gen
= devinfo
->gen
;
958 brw
->gt
= devinfo
->gt
;
959 brw
->is_g4x
= devinfo
->is_g4x
;
960 brw
->is_baytrail
= devinfo
->is_baytrail
;
961 brw
->is_haswell
= devinfo
->is_haswell
;
962 brw
->is_cherryview
= devinfo
->is_cherryview
;
963 brw
->is_broxton
= devinfo
->is_broxton
|| devinfo
->is_geminilake
;
964 brw
->has_llc
= devinfo
->has_llc
;
965 brw
->has_hiz
= devinfo
->has_hiz_and_separate_stencil
;
966 brw
->has_separate_stencil
= devinfo
->has_hiz_and_separate_stencil
;
967 brw
->has_pln
= devinfo
->has_pln
;
968 brw
->has_compr4
= devinfo
->has_compr4
;
969 brw
->has_surface_tile_offset
= devinfo
->has_surface_tile_offset
;
970 brw
->has_negative_rhw_bug
= devinfo
->has_negative_rhw_bug
;
971 brw
->needs_unlit_centroid_workaround
=
972 devinfo
->needs_unlit_centroid_workaround
;
974 brw
->must_use_separate_stencil
= devinfo
->must_use_separate_stencil
;
975 brw
->has_swizzling
= screen
->hw_has_swizzling
;
977 isl_device_init(&brw
->isl_dev
, devinfo
, screen
->hw_has_swizzling
);
979 brw
->vs
.base
.stage
= MESA_SHADER_VERTEX
;
980 brw
->tcs
.base
.stage
= MESA_SHADER_TESS_CTRL
;
981 brw
->tes
.base
.stage
= MESA_SHADER_TESS_EVAL
;
982 brw
->gs
.base
.stage
= MESA_SHADER_GEOMETRY
;
983 brw
->wm
.base
.stage
= MESA_SHADER_FRAGMENT
;
985 gen8_init_vtable_surface_functions(brw
);
986 brw
->vtbl
.emit_depth_stencil_hiz
= gen8_emit_depth_stencil_hiz
;
987 } else if (brw
->gen
>= 7) {
988 gen7_init_vtable_surface_functions(brw
);
989 brw
->vtbl
.emit_depth_stencil_hiz
= gen7_emit_depth_stencil_hiz
;
990 } else if (brw
->gen
>= 6) {
991 gen6_init_vtable_surface_functions(brw
);
992 brw
->vtbl
.emit_depth_stencil_hiz
= gen6_emit_depth_stencil_hiz
;
994 gen4_init_vtable_surface_functions(brw
);
995 brw
->vtbl
.emit_depth_stencil_hiz
= brw_emit_depth_stencil_hiz
;
998 brw_init_driver_functions(brw
, &functions
);
1001 functions
.GetGraphicsResetStatus
= brw_get_graphics_reset_status
;
1003 struct gl_context
*ctx
= &brw
->ctx
;
1005 if (!_mesa_initialize_context(ctx
, api
, mesaVis
, shareCtx
, &functions
)) {
1006 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
1007 fprintf(stderr
, "%s: failed to init mesa context\n", __func__
);
1008 intelDestroyContext(driContextPriv
);
1012 driContextSetFlags(ctx
, flags
);
1014 /* Initialize the software rasterizer and helper modules.
1016 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1017 * software fallbacks (which we have to support on legacy GL to do weird
1018 * glDrawPixels(), glBitmap(), and other functions).
1020 if (api
!= API_OPENGL_CORE
&& api
!= API_OPENGLES2
) {
1021 _swrast_CreateContext(ctx
);
1024 _vbo_CreateContext(ctx
);
1025 if (ctx
->swrast_context
) {
1026 _tnl_CreateContext(ctx
);
1027 TNL_CONTEXT(ctx
)->Driver
.RunPipeline
= _tnl_run_pipeline
;
1028 _swsetup_CreateContext(ctx
);
1030 /* Configure swrast to match hardware characteristics: */
1031 _swrast_allow_pixel_fog(ctx
, false);
1032 _swrast_allow_vertex_fog(ctx
, true);
1035 _mesa_meta_init(ctx
);
1037 brw_process_driconf_options(brw
);
1039 if (INTEL_DEBUG
& DEBUG_PERF
)
1040 brw
->perf_debug
= true;
1042 brw_initialize_cs_context_constants(brw
);
1043 brw_initialize_context_constants(brw
);
1045 ctx
->Const
.ResetStrategy
= notify_reset
1046 ? GL_LOSE_CONTEXT_ON_RESET_ARB
: GL_NO_RESET_NOTIFICATION_ARB
;
1048 /* Reinitialize the context point state. It depends on ctx->Const values. */
1049 _mesa_init_point(ctx
);
1051 intel_fbo_init(brw
);
1053 intel_batchbuffer_init(&brw
->batch
, brw
->bufmgr
, brw
->has_llc
);
1055 if (brw
->gen
>= 6) {
1056 /* Create a new hardware context. Using a hardware context means that
1057 * our GPU state will be saved/restored on context switch, allowing us
1058 * to assume that the GPU is in the same state we left it in.
1060 * This is required for transform feedback buffer offsets, query objects,
1061 * and also allows us to reduce how much state we have to emit.
1063 brw
->hw_ctx
= brw_create_hw_context(brw
->bufmgr
);
1066 fprintf(stderr
, "Failed to create hardware context.\n");
1067 intelDestroyContext(driContextPriv
);
1072 if (brw_init_pipe_control(brw
, devinfo
)) {
1073 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
1074 intelDestroyContext(driContextPriv
);
1078 brw_init_state(brw
);
1080 intelInitExtensions(ctx
);
1082 brw_init_surface_formats(brw
);
1084 brw_blorp_init(brw
);
1086 brw
->urb
.size
= devinfo
->urb
.size
;
1089 brw
->urb
.gs_present
= false;
1091 brw
->prim_restart
.in_progress
= false;
1092 brw
->prim_restart
.enable_cut_index
= false;
1093 brw
->gs
.enabled
= false;
1094 brw
->clip
.viewport_count
= 1;
1096 brw
->predicate
.state
= BRW_PREDICATE_STATE_RENDER
;
1098 brw
->max_gtt_map_object_size
= screen
->max_gtt_map_object_size
;
1100 ctx
->VertexProgram
._MaintainTnlProgram
= true;
1101 ctx
->FragmentProgram
._MaintainTexEnvProgram
= true;
1103 brw_draw_init( brw
);
1105 if ((flags
& __DRI_CTX_FLAG_DEBUG
) != 0) {
1106 /* Turn on some extra GL_ARB_debug_output generation. */
1107 brw
->perf_debug
= true;
1110 if ((flags
& __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
) != 0) {
1111 ctx
->Const
.ContextFlags
|= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB
;
1112 ctx
->Const
.RobustAccess
= GL_TRUE
;
1115 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
1116 brw_init_shader_time(brw
);
1118 _mesa_compute_version(ctx
);
1120 _mesa_initialize_dispatch_tables(ctx
);
1121 _mesa_initialize_vbo_vtxfmt(ctx
);
1123 if (ctx
->Extensions
.INTEL_performance_query
)
1124 brw_init_performance_queries(brw
);
1126 vbo_use_buffer_objects(ctx
);
1127 vbo_always_unmap_buffers(ctx
);
1133 intelDestroyContext(__DRIcontext
* driContextPriv
)
1135 struct brw_context
*brw
=
1136 (struct brw_context
*) driContextPriv
->driverPrivate
;
1137 struct gl_context
*ctx
= &brw
->ctx
;
1139 _mesa_meta_free(&brw
->ctx
);
1141 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
1142 /* Force a report. */
1143 brw
->shader_time
.report_time
= 0;
1145 brw_collect_and_report_shader_time(brw
);
1146 brw_destroy_shader_time(brw
);
1150 blorp_finish(&brw
->blorp
);
1152 brw_destroy_state(brw
);
1153 brw_draw_destroy(brw
);
1155 brw_bo_unreference(brw
->curbe
.curbe_bo
);
1156 if (brw
->vs
.base
.scratch_bo
)
1157 brw_bo_unreference(brw
->vs
.base
.scratch_bo
);
1158 if (brw
->tcs
.base
.scratch_bo
)
1159 brw_bo_unreference(brw
->tcs
.base
.scratch_bo
);
1160 if (brw
->tes
.base
.scratch_bo
)
1161 brw_bo_unreference(brw
->tes
.base
.scratch_bo
);
1162 if (brw
->gs
.base
.scratch_bo
)
1163 brw_bo_unreference(brw
->gs
.base
.scratch_bo
);
1164 if (brw
->wm
.base
.scratch_bo
)
1165 brw_bo_unreference(brw
->wm
.base
.scratch_bo
);
1167 brw_destroy_hw_context(brw
->bufmgr
, brw
->hw_ctx
);
1169 if (ctx
->swrast_context
) {
1170 _swsetup_DestroyContext(&brw
->ctx
);
1171 _tnl_DestroyContext(&brw
->ctx
);
1173 _vbo_DestroyContext(&brw
->ctx
);
1175 if (ctx
->swrast_context
)
1176 _swrast_DestroyContext(&brw
->ctx
);
1178 brw_fini_pipe_control(brw
);
1179 intel_batchbuffer_free(&brw
->batch
);
1181 brw_bo_unreference(brw
->throttle_batch
[1]);
1182 brw_bo_unreference(brw
->throttle_batch
[0]);
1183 brw
->throttle_batch
[1] = NULL
;
1184 brw
->throttle_batch
[0] = NULL
;
1186 driDestroyOptionCache(&brw
->optionCache
);
1188 /* free the Mesa context */
1189 _mesa_free_context_data(&brw
->ctx
);
1192 driContextPriv
->driverPrivate
= NULL
;
1196 intelUnbindContext(__DRIcontext
* driContextPriv
)
1198 /* Unset current context and dispath table */
1199 _mesa_make_current(NULL
, NULL
, NULL
);
1205 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1206 * on window system framebuffers.
1208 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1209 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1210 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1211 * for a visual where you're guaranteed to be capable, but it turns out that
1212 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1213 * incapable ones, because there's no difference between the two in resources
1214 * used. Applications thus get built that accidentally rely on the default
1215 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1218 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1219 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1220 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1221 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1222 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1223 * and get no sRGB encode (assuming that both kinds of visual are available).
1224 * Thus our choice to support sRGB by default on our visuals for desktop would
1225 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1227 * Unfortunately, renderbuffer setup happens before a context is created. So
1228 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1229 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1230 * yet), we go turn that back off before anyone finds out.
1233 intel_gles3_srgb_workaround(struct brw_context
*brw
,
1234 struct gl_framebuffer
*fb
)
1236 struct gl_context
*ctx
= &brw
->ctx
;
1238 if (_mesa_is_desktop_gl(ctx
) || !fb
->Visual
.sRGBCapable
)
1241 /* Some day when we support the sRGB capable bit on visuals available for
1242 * GLES, we'll need to respect that and not disable things here.
1244 fb
->Visual
.sRGBCapable
= false;
1245 for (int i
= 0; i
< BUFFER_COUNT
; i
++) {
1246 struct gl_renderbuffer
*rb
= fb
->Attachment
[i
].Renderbuffer
;
1248 rb
->Format
= _mesa_get_srgb_format_linear(rb
->Format
);
1253 intelMakeCurrent(__DRIcontext
* driContextPriv
,
1254 __DRIdrawable
* driDrawPriv
,
1255 __DRIdrawable
* driReadPriv
)
1257 struct brw_context
*brw
;
1258 GET_CURRENT_CONTEXT(curCtx
);
1261 brw
= (struct brw_context
*) driContextPriv
->driverPrivate
;
1265 /* According to the glXMakeCurrent() man page: "Pending commands to
1266 * the previous context, if any, are flushed before it is released."
1267 * But only flush if we're actually changing contexts.
1269 if (brw_context(curCtx
) && brw_context(curCtx
) != brw
) {
1270 _mesa_flush(curCtx
);
1273 if (driContextPriv
) {
1274 struct gl_context
*ctx
= &brw
->ctx
;
1275 struct gl_framebuffer
*fb
, *readFb
;
1277 if (driDrawPriv
== NULL
) {
1278 fb
= _mesa_get_incomplete_framebuffer();
1280 fb
= driDrawPriv
->driverPrivate
;
1281 driContextPriv
->dri2
.draw_stamp
= driDrawPriv
->dri2
.stamp
- 1;
1284 if (driReadPriv
== NULL
) {
1285 readFb
= _mesa_get_incomplete_framebuffer();
1287 readFb
= driReadPriv
->driverPrivate
;
1288 driContextPriv
->dri2
.read_stamp
= driReadPriv
->dri2
.stamp
- 1;
1291 /* The sRGB workaround changes the renderbuffer's format. We must change
1292 * the format before the renderbuffer's miptree get's allocated, otherwise
1293 * the formats of the renderbuffer and its miptree will differ.
1295 intel_gles3_srgb_workaround(brw
, fb
);
1296 intel_gles3_srgb_workaround(brw
, readFb
);
1298 /* If the context viewport hasn't been initialized, force a call out to
1299 * the loader to get buffers so we have a drawable size for the initial
1301 if (!brw
->ctx
.ViewportInitialized
)
1302 intel_prepare_render(brw
);
1304 _mesa_make_current(ctx
, fb
, readFb
);
1306 _mesa_make_current(NULL
, NULL
, NULL
);
1313 intel_resolve_for_dri2_flush(struct brw_context
*brw
,
1314 __DRIdrawable
*drawable
)
1317 /* MSAA and fast color clear are not supported, so don't waste time
1318 * checking whether a resolve is needed.
1323 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1324 struct intel_renderbuffer
*rb
;
1326 /* Usually, only the back buffer will need to be downsampled. However,
1327 * the front buffer will also need it if the user has rendered into it.
1329 static const gl_buffer_index buffers
[2] = {
1334 for (int i
= 0; i
< 2; ++i
) {
1335 rb
= intel_get_renderbuffer(fb
, buffers
[i
]);
1336 if (rb
== NULL
|| rb
->mt
== NULL
)
1338 if (rb
->mt
->num_samples
<= 1) {
1339 assert(rb
->mt_layer
== 0 && rb
->mt_level
== 0 &&
1340 rb
->layer_count
== 1);
1341 intel_miptree_prepare_access(brw
, rb
->mt
, 0, 1, 0, 1, false, false);
1343 intel_renderbuffer_downsample(brw
, rb
);
1349 intel_bits_per_pixel(const struct intel_renderbuffer
*rb
)
1351 return _mesa_get_format_bytes(intel_rb_format(rb
)) * 8;
1355 intel_query_dri2_buffers(struct brw_context
*brw
,
1356 __DRIdrawable
*drawable
,
1357 __DRIbuffer
**buffers
,
1361 intel_process_dri2_buffer(struct brw_context
*brw
,
1362 __DRIdrawable
*drawable
,
1363 __DRIbuffer
*buffer
,
1364 struct intel_renderbuffer
*rb
,
1365 const char *buffer_name
);
1368 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
);
1371 intel_update_dri2_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1373 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1374 struct intel_renderbuffer
*rb
;
1375 __DRIbuffer
*buffers
= NULL
;
1377 const char *region_name
;
1379 /* Set this up front, so that in case our buffers get invalidated
1380 * while we're getting new buffers, we don't clobber the stamp and
1381 * thus ignore the invalidate. */
1382 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1384 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1385 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1387 intel_query_dri2_buffers(brw
, drawable
, &buffers
, &count
);
1389 if (buffers
== NULL
)
1392 for (int i
= 0; i
< count
; i
++) {
1393 switch (buffers
[i
].attachment
) {
1394 case __DRI_BUFFER_FRONT_LEFT
:
1395 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1396 region_name
= "dri2 front buffer";
1399 case __DRI_BUFFER_FAKE_FRONT_LEFT
:
1400 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1401 region_name
= "dri2 fake front buffer";
1404 case __DRI_BUFFER_BACK_LEFT
:
1405 rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1406 region_name
= "dri2 back buffer";
1409 case __DRI_BUFFER_DEPTH
:
1410 case __DRI_BUFFER_HIZ
:
1411 case __DRI_BUFFER_DEPTH_STENCIL
:
1412 case __DRI_BUFFER_STENCIL
:
1413 case __DRI_BUFFER_ACCUM
:
1416 "unhandled buffer attach event, attachment type %d\n",
1417 buffers
[i
].attachment
);
1421 intel_process_dri2_buffer(brw
, drawable
, &buffers
[i
], rb
, region_name
);
1427 intel_update_renderbuffers(__DRIcontext
*context
, __DRIdrawable
*drawable
)
1429 struct brw_context
*brw
= context
->driverPrivate
;
1430 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1432 /* Set this up front, so that in case our buffers get invalidated
1433 * while we're getting new buffers, we don't clobber the stamp and
1434 * thus ignore the invalidate. */
1435 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1437 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1438 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1440 if (dri_screen
->image
.loader
)
1441 intel_update_image_buffers(brw
, drawable
);
1443 intel_update_dri2_buffers(brw
, drawable
);
1445 driUpdateFramebufferSize(&brw
->ctx
, drawable
);
1449 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1450 * state is required.
1453 intel_prepare_render(struct brw_context
*brw
)
1455 struct gl_context
*ctx
= &brw
->ctx
;
1456 __DRIcontext
*driContext
= brw
->driContext
;
1457 __DRIdrawable
*drawable
;
1459 drawable
= driContext
->driDrawablePriv
;
1460 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.draw_stamp
) {
1461 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1462 intel_update_renderbuffers(driContext
, drawable
);
1463 driContext
->dri2
.draw_stamp
= drawable
->dri2
.stamp
;
1466 drawable
= driContext
->driReadablePriv
;
1467 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.read_stamp
) {
1468 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1469 intel_update_renderbuffers(driContext
, drawable
);
1470 driContext
->dri2
.read_stamp
= drawable
->dri2
.stamp
;
1473 /* If we're currently rendering to the front buffer, the rendering
1474 * that will happen next will probably dirty the front buffer. So
1475 * mark it as dirty here.
1477 if (_mesa_is_front_buffer_drawing(ctx
->DrawBuffer
))
1478 brw
->front_buffer_dirty
= true;
1482 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1484 * To determine which DRI buffers to request, examine the renderbuffers
1485 * attached to the drawable's framebuffer. Then request the buffers with
1486 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1488 * This is called from intel_update_renderbuffers().
1490 * \param drawable Drawable whose buffers are queried.
1491 * \param buffers [out] List of buffers returned by DRI2 query.
1492 * \param buffer_count [out] Number of buffers returned.
1494 * \see intel_update_renderbuffers()
1495 * \see DRI2GetBuffers()
1496 * \see DRI2GetBuffersWithFormat()
1499 intel_query_dri2_buffers(struct brw_context
*brw
,
1500 __DRIdrawable
*drawable
,
1501 __DRIbuffer
**buffers
,
1504 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1505 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1507 unsigned attachments
[8];
1509 struct intel_renderbuffer
*front_rb
;
1510 struct intel_renderbuffer
*back_rb
;
1512 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1513 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1515 memset(attachments
, 0, sizeof(attachments
));
1516 if ((_mesa_is_front_buffer_drawing(fb
) ||
1517 _mesa_is_front_buffer_reading(fb
) ||
1518 !back_rb
) && front_rb
) {
1519 /* If a fake front buffer is in use, then querying for
1520 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1521 * the real front buffer to the fake front buffer. So before doing the
1522 * query, we need to make sure all the pending drawing has landed in the
1523 * real front buffer.
1525 intel_batchbuffer_flush(brw
);
1526 intel_flush_front(&brw
->ctx
);
1528 attachments
[i
++] = __DRI_BUFFER_FRONT_LEFT
;
1529 attachments
[i
++] = intel_bits_per_pixel(front_rb
);
1530 } else if (front_rb
&& brw
->front_buffer_dirty
) {
1531 /* We have pending front buffer rendering, but we aren't querying for a
1532 * front buffer. If the front buffer we have is a fake front buffer,
1533 * the X server is going to throw it away when it processes the query.
1534 * So before doing the query, make sure all the pending drawing has
1535 * landed in the real front buffer.
1537 intel_batchbuffer_flush(brw
);
1538 intel_flush_front(&brw
->ctx
);
1542 attachments
[i
++] = __DRI_BUFFER_BACK_LEFT
;
1543 attachments
[i
++] = intel_bits_per_pixel(back_rb
);
1546 assert(i
<= ARRAY_SIZE(attachments
));
1549 dri_screen
->dri2
.loader
->getBuffersWithFormat(drawable
,
1554 drawable
->loaderPrivate
);
1558 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1560 * This is called from intel_update_renderbuffers().
1563 * DRI buffers whose attachment point is DRI2BufferStencil or
1564 * DRI2BufferDepthStencil are handled as special cases.
1566 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1567 * that is passed to brw_bo_gem_create_from_name().
1569 * \see intel_update_renderbuffers()
1572 intel_process_dri2_buffer(struct brw_context
*brw
,
1573 __DRIdrawable
*drawable
,
1574 __DRIbuffer
*buffer
,
1575 struct intel_renderbuffer
*rb
,
1576 const char *buffer_name
)
1578 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1584 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1586 /* We try to avoid closing and reopening the same BO name, because the first
1587 * use of a mapping of the buffer involves a bunch of page faulting which is
1588 * moderately expensive.
1590 struct intel_mipmap_tree
*last_mt
;
1591 if (num_samples
== 0)
1594 last_mt
= rb
->singlesample_mt
;
1596 uint32_t old_name
= 0;
1598 /* The bo already has a name because the miptree was created by a
1599 * previous call to intel_process_dri2_buffer(). If a bo already has a
1600 * name, then brw_bo_flink() is a low-cost getter. It does not
1601 * create a new name.
1603 brw_bo_flink(last_mt
->bo
, &old_name
);
1606 if (old_name
== buffer
->name
)
1609 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
)) {
1611 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1612 buffer
->name
, buffer
->attachment
,
1613 buffer
->cpp
, buffer
->pitch
);
1616 bo
= brw_bo_gem_create_from_name(brw
->bufmgr
, buffer_name
,
1620 "Failed to open BO for returned DRI2 buffer "
1621 "(%dx%d, %s, named %d).\n"
1622 "This is likely a bug in the X Server that will lead to a "
1624 drawable
->w
, drawable
->h
, buffer_name
, buffer
->name
);
1628 intel_update_winsys_renderbuffer_miptree(brw
, rb
, bo
,
1629 drawable
->w
, drawable
->h
,
1632 if (_mesa_is_front_buffer_drawing(fb
) &&
1633 (buffer
->attachment
== __DRI_BUFFER_FRONT_LEFT
||
1634 buffer
->attachment
== __DRI_BUFFER_FAKE_FRONT_LEFT
) &&
1635 rb
->Base
.Base
.NumSamples
> 1) {
1636 intel_renderbuffer_upsample(brw
, rb
);
1641 brw_bo_unreference(bo
);
1645 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1647 * To determine which DRI buffers to request, examine the renderbuffers
1648 * attached to the drawable's framebuffer. Then request the buffers from
1651 * This is called from intel_update_renderbuffers().
1653 * \param drawable Drawable whose buffers are queried.
1654 * \param buffers [out] List of buffers returned by DRI2 query.
1655 * \param buffer_count [out] Number of buffers returned.
1657 * \see intel_update_renderbuffers()
1661 intel_update_image_buffer(struct brw_context
*intel
,
1662 __DRIdrawable
*drawable
,
1663 struct intel_renderbuffer
*rb
,
1665 enum __DRIimageBufferMask buffer_type
)
1667 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1669 if (!rb
|| !buffer
->bo
)
1672 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1674 /* Check and see if we're already bound to the right
1677 struct intel_mipmap_tree
*last_mt
;
1678 if (num_samples
== 0)
1681 last_mt
= rb
->singlesample_mt
;
1683 if (last_mt
&& last_mt
->bo
== buffer
->bo
)
1686 intel_update_winsys_renderbuffer_miptree(intel
, rb
, buffer
->bo
,
1687 buffer
->width
, buffer
->height
,
1690 if (_mesa_is_front_buffer_drawing(fb
) &&
1691 buffer_type
== __DRI_IMAGE_BUFFER_FRONT
&&
1692 rb
->Base
.Base
.NumSamples
> 1) {
1693 intel_renderbuffer_upsample(intel
, rb
);
1698 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1700 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1701 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1702 struct intel_renderbuffer
*front_rb
;
1703 struct intel_renderbuffer
*back_rb
;
1704 struct __DRIimageList images
;
1706 uint32_t buffer_mask
= 0;
1709 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1710 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1713 format
= intel_rb_format(back_rb
);
1715 format
= intel_rb_format(front_rb
);
1719 if (front_rb
&& (_mesa_is_front_buffer_drawing(fb
) ||
1720 _mesa_is_front_buffer_reading(fb
) || !back_rb
)) {
1721 buffer_mask
|= __DRI_IMAGE_BUFFER_FRONT
;
1725 buffer_mask
|= __DRI_IMAGE_BUFFER_BACK
;
1727 ret
= dri_screen
->image
.loader
->getBuffers(drawable
,
1728 driGLFormatToImageFormat(format
),
1729 &drawable
->dri2
.stamp
,
1730 drawable
->loaderPrivate
,
1736 if (images
.image_mask
& __DRI_IMAGE_BUFFER_FRONT
) {
1737 drawable
->w
= images
.front
->width
;
1738 drawable
->h
= images
.front
->height
;
1739 intel_update_image_buffer(brw
,
1743 __DRI_IMAGE_BUFFER_FRONT
);
1746 if (images
.image_mask
& __DRI_IMAGE_BUFFER_BACK
) {
1747 drawable
->w
= images
.back
->width
;
1748 drawable
->h
= images
.back
->height
;
1749 intel_update_image_buffer(brw
,
1753 __DRI_IMAGE_BUFFER_BACK
);