5494fcd632420e882b5f26da84812af20de39405
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/api_exec.h"
34 #include "main/imports.h"
35 #include "main/macros.h"
36 #include "main/points.h"
37 #include "main/simple_list.h"
38 #include "main/version.h"
39 #include "main/vtxfmt.h"
40
41 #include "vbo/vbo_context.h"
42
43 #include "brw_context.h"
44 #include "brw_defines.h"
45 #include "brw_draw.h"
46 #include "brw_state.h"
47
48 #include "intel_fbo.h"
49 #include "intel_mipmap_tree.h"
50 #include "intel_regions.h"
51 #include "intel_tex.h"
52 #include "intel_tex_obj.h"
53
54 #include "tnl/t_pipeline.h"
55 #include "glsl/ralloc.h"
56
57 /***************************************
58 * Mesa's Driver Functions
59 ***************************************/
60
61 static size_t
62 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
63 GLenum internalFormat, int samples[16])
64 {
65 struct brw_context *brw = brw_context(ctx);
66
67 (void) target;
68
69 switch (brw->gen) {
70 case 7:
71 samples[0] = 8;
72 samples[1] = 4;
73 return 2;
74
75 case 6:
76 samples[0] = 4;
77 return 1;
78
79 default:
80 samples[0] = 1;
81 return 1;
82 }
83 }
84
85 static void brwInitDriverFunctions(struct intel_screen *screen,
86 struct dd_function_table *functions)
87 {
88 intelInitDriverFunctions( functions );
89
90 brwInitFragProgFuncs( functions );
91 brw_init_common_queryobj_functions(functions);
92 if (screen->gen >= 6)
93 gen6_init_queryobj_functions(functions);
94 else
95 gen4_init_queryobj_functions(functions);
96
97 functions->QuerySamplesForFormat = brw_query_samples_for_format;
98
99 if (screen->gen >= 7) {
100 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
101 functions->EndTransformFeedback = gen7_end_transform_feedback;
102 } else {
103 functions->BeginTransformFeedback = brw_begin_transform_feedback;
104 functions->EndTransformFeedback = brw_end_transform_feedback;
105 }
106
107 if (screen->gen >= 6)
108 functions->GetSamplePosition = gen6_get_sample_position;
109 }
110
111 static void
112 brw_initialize_context_constants(struct brw_context *brw)
113 {
114 struct gl_context *ctx = &brw->ctx;
115
116 ctx->Const.QueryCounterBits.Timestamp = 36;
117
118 ctx->Const.StripTextureBorder = true;
119
120 ctx->Const.MaxDualSourceDrawBuffers = 1;
121 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
122 ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
123 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
124 ctx->Const.MaxTextureUnits =
125 MIN2(ctx->Const.MaxTextureCoordUnits,
126 ctx->Const.FragmentProgram.MaxTextureImageUnits);
127 ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
128 ctx->Const.MaxCombinedTextureImageUnits =
129 ctx->Const.VertexProgram.MaxTextureImageUnits +
130 ctx->Const.FragmentProgram.MaxTextureImageUnits;
131
132 ctx->Const.MaxTextureLevels = 14; /* 8192 */
133 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
134 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
135 ctx->Const.Max3DTextureLevels = 9;
136 ctx->Const.MaxCubeTextureLevels = 12;
137
138 if (brw->gen >= 7)
139 ctx->Const.MaxArrayTextureLayers = 2048;
140 else
141 ctx->Const.MaxArrayTextureLayers = 512;
142
143 ctx->Const.MaxTextureRectSize = 1 << 12;
144
145 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
146
147 ctx->Const.MaxRenderbufferSize = 8192;
148
149 /* Hardware only supports a limited number of transform feedback buffers.
150 * So we need to override the Mesa default (which is based only on software
151 * limits).
152 */
153 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
154
155 /* On Gen6, in the worst case, we use up one binding table entry per
156 * transform feedback component (see comments above the definition of
157 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
158 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
159 * BRW_MAX_SOL_BINDINGS.
160 *
161 * In "separate components" mode, we need to divide this value by
162 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
163 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
164 */
165 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
166 ctx->Const.MaxTransformFeedbackSeparateComponents =
167 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
168
169 if (brw->gen == 6) {
170 ctx->Const.MaxSamples = 4;
171 ctx->Const.MaxColorTextureSamples = 4;
172 ctx->Const.MaxDepthTextureSamples = 4;
173 ctx->Const.MaxIntegerSamples = 4;
174 } else if (brw->gen >= 7) {
175 ctx->Const.MaxSamples = 8;
176 ctx->Const.MaxColorTextureSamples = 8;
177 ctx->Const.MaxDepthTextureSamples = 8;
178 ctx->Const.MaxIntegerSamples = 8;
179 ctx->Const.MaxProgramTextureGatherComponents = 4;
180 }
181
182 ctx->Const.MinLineWidth = 1.0;
183 ctx->Const.MinLineWidthAA = 1.0;
184 ctx->Const.MaxLineWidth = 5.0;
185 ctx->Const.MaxLineWidthAA = 5.0;
186 ctx->Const.LineWidthGranularity = 0.5;
187
188 ctx->Const.MinPointSize = 1.0;
189 ctx->Const.MinPointSizeAA = 1.0;
190 ctx->Const.MaxPointSize = 255.0;
191 ctx->Const.MaxPointSizeAA = 255.0;
192 ctx->Const.PointSizeGranularity = 1.0;
193
194 if (brw->gen >= 5 || brw->is_g4x)
195 ctx->Const.MaxClipPlanes = 8;
196
197 ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
198 ctx->Const.VertexProgram.MaxAluInstructions = 0;
199 ctx->Const.VertexProgram.MaxTexInstructions = 0;
200 ctx->Const.VertexProgram.MaxTexIndirections = 0;
201 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
202 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
203 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
204 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
205 ctx->Const.VertexProgram.MaxNativeTemps = 256;
206 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
207 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
208 ctx->Const.VertexProgram.MaxEnvParams =
209 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
210 ctx->Const.VertexProgram.MaxEnvParams);
211
212 ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
213 ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
214 ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
215 ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
216 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
217 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
218 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
219 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
220 ctx->Const.FragmentProgram.MaxEnvParams =
221 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
222 ctx->Const.FragmentProgram.MaxEnvParams);
223
224 /* Fragment shaders use real, 32-bit twos-complement integers for all
225 * integer types.
226 */
227 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
228 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
229 ctx->Const.FragmentProgram.LowInt.Precision = 0;
230 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
231 ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
232
233 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
234 * but we're not sure how it's actually done for vertex order,
235 * that affect provoking vertex decision. Always use last vertex
236 * convention for quad primitive which works as expected for now.
237 */
238 if (brw->gen >= 6)
239 ctx->Const.QuadsFollowProvokingVertexConvention = false;
240
241 ctx->Const.NativeIntegers = true;
242 ctx->Const.UniformBooleanTrue = 1;
243 ctx->Const.UniformBufferOffsetAlignment = 16;
244
245 ctx->Const.ForceGLSLExtensionsWarn =
246 driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn");
247
248 ctx->Const.DisableGLSLLineContinuations =
249 driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations");
250
251 if (brw->gen >= 6) {
252 ctx->Const.MaxVarying = 32;
253 ctx->Const.VertexProgram.MaxOutputComponents = 128;
254 ctx->Const.GeometryProgram.MaxInputComponents = 128;
255 ctx->Const.GeometryProgram.MaxOutputComponents = 128;
256 ctx->Const.FragmentProgram.MaxInputComponents = 128;
257 }
258
259 /* We want the GLSL compiler to emit code that uses condition codes */
260 for (int i = 0; i < MESA_SHADER_TYPES; i++) {
261 ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
262 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
263 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
264 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
265 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
266 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
267
268 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
269 (i == MESA_SHADER_FRAGMENT);
270 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
271 (i == MESA_SHADER_FRAGMENT);
272 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
273 }
274
275 ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
276 }
277
278 bool
279 brwCreateContext(gl_api api,
280 const struct gl_config *mesaVis,
281 __DRIcontext *driContextPriv,
282 unsigned major_version,
283 unsigned minor_version,
284 uint32_t flags,
285 unsigned *dri_ctx_error,
286 void *sharedContextPrivate)
287 {
288 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
289 struct intel_screen *screen = sPriv->driverPrivate;
290 struct dd_function_table functions;
291
292 struct brw_context *brw = rzalloc(NULL, struct brw_context);
293 if (!brw) {
294 printf("%s: failed to alloc context\n", __FUNCTION__);
295 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
296 return false;
297 }
298
299 /* brwInitVtbl needs to know the chipset generation so that it can set the
300 * right pointers.
301 */
302 brw->gen = screen->gen;
303
304 brwInitVtbl( brw );
305
306 brwInitDriverFunctions(screen, &functions);
307
308 struct gl_context *ctx = &brw->ctx;
309
310 if (!intelInitContext( brw, api, major_version, minor_version,
311 mesaVis, driContextPriv,
312 sharedContextPrivate, &functions,
313 dri_ctx_error)) {
314 intelDestroyContext(driContextPriv);
315 return false;
316 }
317
318 brw_initialize_context_constants(brw);
319
320 /* Reinitialize the context point state. It depends on ctx->Const values. */
321 _mesa_init_point(ctx);
322
323 if (brw->gen >= 6) {
324 /* Create a new hardware context. Using a hardware context means that
325 * our GPU state will be saved/restored on context switch, allowing us
326 * to assume that the GPU is in the same state we left it in.
327 *
328 * This is required for transform feedback buffer offsets, query objects,
329 * and also allows us to reduce how much state we have to emit.
330 */
331 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
332
333 if (!brw->hw_ctx) {
334 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
335 intelDestroyContext(driContextPriv);
336 return false;
337 }
338 }
339
340 brw_init_surface_formats(brw);
341
342 /* Initialize swrast, tnl driver tables: */
343 TNLcontext *tnl = TNL_CONTEXT(ctx);
344 if (tnl)
345 tnl->Driver.RunPipeline = _tnl_run_pipeline;
346
347 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
348 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
349 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
350
351 if (brw->is_g4x || brw->gen >= 5) {
352 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
353 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
354 brw->has_surface_tile_offset = true;
355 if (brw->gen < 6)
356 brw->has_compr4 = true;
357 brw->has_aa_line_parameters = true;
358 brw->has_pln = true;
359 } else {
360 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
361 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
362 }
363
364 /* WM maximum threads is number of EUs times number of threads per EU. */
365 assert(brw->gen <= 7);
366
367 if (brw->is_haswell) {
368 if (brw->gt == 1) {
369 brw->max_wm_threads = 102;
370 brw->max_vs_threads = 70;
371 brw->max_gs_threads = 70;
372 brw->urb.size = 128;
373 brw->urb.min_vs_entries = 32;
374 brw->urb.max_vs_entries = 640;
375 brw->urb.max_gs_entries = 256;
376 } else if (brw->gt == 2) {
377 brw->max_wm_threads = 204;
378 brw->max_vs_threads = 280;
379 brw->max_gs_threads = 256;
380 brw->urb.size = 256;
381 brw->urb.min_vs_entries = 64;
382 brw->urb.max_vs_entries = 1664;
383 brw->urb.max_gs_entries = 640;
384 } else if (brw->gt == 3) {
385 brw->max_wm_threads = 408;
386 brw->max_vs_threads = 280;
387 brw->max_gs_threads = 256;
388 brw->urb.size = 512;
389 brw->urb.min_vs_entries = 64;
390 brw->urb.max_vs_entries = 1664;
391 brw->urb.max_gs_entries = 640;
392 }
393 } else if (brw->gen == 7) {
394 if (brw->gt == 1) {
395 brw->max_wm_threads = 48;
396 brw->max_vs_threads = 36;
397 brw->max_gs_threads = 36;
398 brw->urb.size = 128;
399 brw->urb.min_vs_entries = 32;
400 brw->urb.max_vs_entries = 512;
401 brw->urb.max_gs_entries = 192;
402 } else if (brw->gt == 2) {
403 brw->max_wm_threads = 172;
404 brw->max_vs_threads = 128;
405 brw->max_gs_threads = 128;
406 brw->urb.size = 256;
407 brw->urb.min_vs_entries = 32;
408 brw->urb.max_vs_entries = 704;
409 brw->urb.max_gs_entries = 320;
410 } else {
411 assert(!"Unknown gen7 device.");
412 }
413 } else if (brw->gen == 6) {
414 if (brw->gt == 2) {
415 brw->max_wm_threads = 80;
416 brw->max_vs_threads = 60;
417 brw->max_gs_threads = 60;
418 brw->urb.size = 64; /* volume 5c.5 section 5.1 */
419 brw->urb.min_vs_entries = 24;
420 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
421 brw->urb.max_gs_entries = 256;
422 } else {
423 brw->max_wm_threads = 40;
424 brw->max_vs_threads = 24;
425 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
426 brw->urb.size = 32; /* volume 5c.5 section 5.1 */
427 brw->urb.min_vs_entries = 24;
428 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
429 brw->urb.max_gs_entries = 256;
430 }
431 brw->urb.gen6_gs_previously_active = false;
432 } else if (brw->gen == 5) {
433 brw->urb.size = 1024;
434 brw->max_vs_threads = 72;
435 brw->max_gs_threads = 32;
436 brw->max_wm_threads = 12 * 6;
437 } else if (brw->is_g4x) {
438 brw->urb.size = 384;
439 brw->max_vs_threads = 32;
440 brw->max_gs_threads = 2;
441 brw->max_wm_threads = 10 * 5;
442 } else if (brw->gen < 6) {
443 brw->urb.size = 256;
444 brw->max_vs_threads = 16;
445 brw->max_gs_threads = 2;
446 brw->max_wm_threads = 8 * 4;
447 brw->has_negative_rhw_bug = true;
448 }
449
450 if (brw->gen <= 7) {
451 brw->needs_unlit_centroid_workaround = true;
452 }
453
454 brw->prim_restart.in_progress = false;
455 brw->prim_restart.enable_cut_index = false;
456
457 brw_init_state( brw );
458
459 if (brw->gen < 6) {
460 brw->curbe.last_buf = calloc(1, 4096);
461 brw->curbe.next_buf = calloc(1, 4096);
462 }
463
464 brw->state.dirty.mesa = ~0;
465 brw->state.dirty.brw = ~0;
466
467 /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
468 * dirty flags.
469 */
470 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
471
472 brw->emit_state_always = 0;
473
474 brw->batch.need_workaround_flush = true;
475
476 ctx->VertexProgram._MaintainTnlProgram = true;
477 ctx->FragmentProgram._MaintainTexEnvProgram = true;
478
479 brw_draw_init( brw );
480
481 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
482 brw->disable_derivative_optimization =
483 driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
484
485 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
486 /* Turn on some extra GL_ARB_debug_output generation. */
487 brw->perf_debug = true;
488 }
489
490 brw_fs_alloc_reg_sets(brw);
491 brw_vec4_alloc_reg_set(brw);
492
493 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
494 brw_init_shader_time(brw);
495
496 _mesa_compute_version(ctx);
497
498 _mesa_initialize_dispatch_tables(ctx);
499 _mesa_initialize_vbo_vtxfmt(ctx);
500
501 if (ctx->Extensions.AMD_performance_monitor) {
502 brw_init_performance_monitors(brw);
503 }
504
505 return true;
506 }
507