f0f472349d0d7c1b61cb7750141a70e9c05bec25
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/api_exec.h"
34 #include "main/imports.h"
35 #include "main/macros.h"
36 #include "main/points.h"
37 #include "main/simple_list.h"
38 #include "main/version.h"
39 #include "main/vtxfmt.h"
40
41 #include "vbo/vbo_context.h"
42
43 #include "brw_context.h"
44 #include "brw_defines.h"
45 #include "brw_draw.h"
46 #include "brw_state.h"
47
48 #include "intel_fbo.h"
49 #include "intel_mipmap_tree.h"
50 #include "intel_regions.h"
51 #include "intel_tex.h"
52 #include "intel_tex_obj.h"
53
54 #include "tnl/t_pipeline.h"
55 #include "glsl/ralloc.h"
56
57 /***************************************
58 * Mesa's Driver Functions
59 ***************************************/
60
61 static size_t
62 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
63 GLenum internalFormat, int samples[16])
64 {
65 struct intel_context *intel = intel_context(ctx);
66
67 (void) target;
68
69 switch (intel->gen) {
70 case 7:
71 samples[0] = 8;
72 samples[1] = 4;
73 return 2;
74
75 case 6:
76 samples[0] = 4;
77 return 1;
78
79 default:
80 samples[0] = 1;
81 return 1;
82 }
83 }
84
85 static void brwInitDriverFunctions(struct intel_screen *screen,
86 struct dd_function_table *functions)
87 {
88 intelInitDriverFunctions( functions );
89
90 brwInitFragProgFuncs( functions );
91 brw_init_common_queryobj_functions(functions);
92 if (screen->gen >= 6)
93 gen6_init_queryobj_functions(functions);
94 else
95 gen4_init_queryobj_functions(functions);
96
97 functions->QuerySamplesForFormat = brw_query_samples_for_format;
98
99 if (screen->gen >= 7) {
100 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
101 functions->EndTransformFeedback = gen7_end_transform_feedback;
102 } else {
103 functions->BeginTransformFeedback = brw_begin_transform_feedback;
104 functions->EndTransformFeedback = brw_end_transform_feedback;
105 }
106
107 if (screen->gen >= 6)
108 functions->GetSamplePosition = gen6_get_sample_position;
109 }
110
111 static void
112 brw_initialize_context_constants(struct brw_context *brw)
113 {
114 struct intel_context *intel = &brw->intel;
115 struct gl_context *ctx = &intel->ctx;
116
117 ctx->Const.QueryCounterBits.Timestamp = 36;
118
119 ctx->Const.StripTextureBorder = true;
120
121 ctx->Const.MaxDualSourceDrawBuffers = 1;
122 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
123 ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
124 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
125 ctx->Const.MaxTextureUnits =
126 MIN2(ctx->Const.MaxTextureCoordUnits,
127 ctx->Const.FragmentProgram.MaxTextureImageUnits);
128 ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
129 ctx->Const.MaxCombinedTextureImageUnits =
130 ctx->Const.VertexProgram.MaxTextureImageUnits +
131 ctx->Const.FragmentProgram.MaxTextureImageUnits;
132
133 ctx->Const.MaxTextureLevels = 14; /* 8192 */
134 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
135 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
136 ctx->Const.Max3DTextureLevels = 9;
137 ctx->Const.MaxCubeTextureLevels = 12;
138
139 if (intel->gen >= 7)
140 ctx->Const.MaxArrayTextureLayers = 2048;
141 else
142 ctx->Const.MaxArrayTextureLayers = 512;
143
144 ctx->Const.MaxTextureRectSize = 1 << 12;
145
146 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
147
148 ctx->Const.MaxRenderbufferSize = 8192;
149
150 /* Hardware only supports a limited number of transform feedback buffers.
151 * So we need to override the Mesa default (which is based only on software
152 * limits).
153 */
154 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
155
156 /* On Gen6, in the worst case, we use up one binding table entry per
157 * transform feedback component (see comments above the definition of
158 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
159 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
160 * BRW_MAX_SOL_BINDINGS.
161 *
162 * In "separate components" mode, we need to divide this value by
163 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
164 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
165 */
166 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
167 ctx->Const.MaxTransformFeedbackSeparateComponents =
168 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
169
170 if (intel->gen == 6) {
171 ctx->Const.MaxSamples = 4;
172 ctx->Const.MaxColorTextureSamples = 4;
173 ctx->Const.MaxDepthTextureSamples = 4;
174 ctx->Const.MaxIntegerSamples = 4;
175 } else if (intel->gen >= 7) {
176 ctx->Const.MaxSamples = 8;
177 ctx->Const.MaxColorTextureSamples = 8;
178 ctx->Const.MaxDepthTextureSamples = 8;
179 ctx->Const.MaxIntegerSamples = 8;
180 }
181
182 ctx->Const.MinLineWidth = 1.0;
183 ctx->Const.MinLineWidthAA = 1.0;
184 ctx->Const.MaxLineWidth = 5.0;
185 ctx->Const.MaxLineWidthAA = 5.0;
186 ctx->Const.LineWidthGranularity = 0.5;
187
188 ctx->Const.MinPointSize = 1.0;
189 ctx->Const.MinPointSizeAA = 1.0;
190 ctx->Const.MaxPointSize = 255.0;
191 ctx->Const.MaxPointSizeAA = 255.0;
192 ctx->Const.PointSizeGranularity = 1.0;
193
194 if (intel->gen >= 6)
195 ctx->Const.MaxClipPlanes = 8;
196
197 ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
198 ctx->Const.VertexProgram.MaxAluInstructions = 0;
199 ctx->Const.VertexProgram.MaxTexInstructions = 0;
200 ctx->Const.VertexProgram.MaxTexIndirections = 0;
201 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
202 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
203 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
204 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
205 ctx->Const.VertexProgram.MaxNativeTemps = 256;
206 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
207 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
208 ctx->Const.VertexProgram.MaxEnvParams =
209 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
210 ctx->Const.VertexProgram.MaxEnvParams);
211
212 ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
213 ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
214 ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
215 ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
216 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
217 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
218 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
219 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
220 ctx->Const.FragmentProgram.MaxEnvParams =
221 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
222 ctx->Const.FragmentProgram.MaxEnvParams);
223
224 /* Fragment shaders use real, 32-bit twos-complement integers for all
225 * integer types.
226 */
227 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
228 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
229 ctx->Const.FragmentProgram.LowInt.Precision = 0;
230 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
231 ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
232
233 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
234 * but we're not sure how it's actually done for vertex order,
235 * that affect provoking vertex decision. Always use last vertex
236 * convention for quad primitive which works as expected for now.
237 */
238 if (intel->gen >= 6)
239 ctx->Const.QuadsFollowProvokingVertexConvention = false;
240
241 ctx->Const.NativeIntegers = true;
242 ctx->Const.UniformBooleanTrue = 1;
243 ctx->Const.UniformBufferOffsetAlignment = 16;
244
245 ctx->Const.ForceGLSLExtensionsWarn =
246 driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn");
247
248 ctx->Const.DisableGLSLLineContinuations =
249 driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations");
250
251 /* We want the GLSL compiler to emit code that uses condition codes */
252 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
253 ctx->ShaderCompilerOptions[i].MaxIfDepth = intel->gen < 6 ? 16 : UINT_MAX;
254 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
255 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
256 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
257 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
258 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
259
260 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
261 (i == MESA_SHADER_FRAGMENT);
262 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
263 (i == MESA_SHADER_FRAGMENT);
264 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
265 }
266
267 ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
268 }
269
270 bool
271 brwCreateContext(int api,
272 const struct gl_config *mesaVis,
273 __DRIcontext *driContextPriv,
274 unsigned major_version,
275 unsigned minor_version,
276 uint32_t flags,
277 unsigned *error,
278 void *sharedContextPrivate)
279 {
280 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
281 struct intel_screen *screen = sPriv->driverPrivate;
282 struct dd_function_table functions;
283
284 struct brw_context *brw = rzalloc(NULL, struct brw_context);
285 if (!brw) {
286 printf("%s: failed to alloc context\n", __FUNCTION__);
287 *error = __DRI_CTX_ERROR_NO_MEMORY;
288 return false;
289 }
290
291 /* brwInitVtbl needs to know the chipset generation so that it can set the
292 * right pointers.
293 */
294 brw->intel.gen = screen->gen;
295
296 brwInitVtbl( brw );
297
298 brwInitDriverFunctions(screen, &functions);
299
300 struct intel_context *intel = &brw->intel;
301 struct gl_context *ctx = &intel->ctx;
302
303 if (!intelInitContext( brw, api, major_version, minor_version,
304 mesaVis, driContextPriv,
305 sharedContextPrivate, &functions,
306 error)) {
307 ralloc_free(brw);
308 return false;
309 }
310
311 brw_initialize_context_constants(brw);
312
313 /* Reinitialize the context point state. It depends on ctx->Const values. */
314 _mesa_init_point(ctx);
315
316 if (intel->gen >= 6) {
317 /* Create a new hardware context. Using a hardware context means that
318 * our GPU state will be saved/restored on context switch, allowing us
319 * to assume that the GPU is in the same state we left it in.
320 *
321 * This is required for transform feedback buffer offsets, query objects,
322 * and also allows us to reduce how much state we have to emit.
323 */
324 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
325
326 if (!brw->hw_ctx) {
327 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
328 ralloc_free(brw);
329 return false;
330 }
331 }
332
333 brw_init_surface_formats(brw);
334
335 /* Initialize swrast, tnl driver tables: */
336 TNLcontext *tnl = TNL_CONTEXT(ctx);
337 if (tnl)
338 tnl->Driver.RunPipeline = _tnl_run_pipeline;
339
340 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
341 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
342 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
343
344 if (intel->is_g4x || intel->gen >= 5) {
345 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
346 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
347 brw->has_surface_tile_offset = true;
348 if (intel->gen < 6)
349 brw->has_compr4 = true;
350 brw->has_aa_line_parameters = true;
351 brw->has_pln = true;
352 } else {
353 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
354 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
355 }
356
357 /* WM maximum threads is number of EUs times number of threads per EU. */
358 assert(intel->gen <= 7);
359
360 if (intel->is_haswell) {
361 if (intel->gt == 1) {
362 brw->max_wm_threads = 102;
363 brw->max_vs_threads = 70;
364 brw->urb.size = 128;
365 brw->urb.max_vs_entries = 640;
366 brw->urb.max_gs_entries = 256;
367 } else if (intel->gt == 2) {
368 brw->max_wm_threads = 204;
369 brw->max_vs_threads = 280;
370 brw->urb.size = 256;
371 brw->urb.max_vs_entries = 1664;
372 brw->urb.max_gs_entries = 640;
373 } else if (intel->gt == 3) {
374 brw->max_wm_threads = 408;
375 brw->max_vs_threads = 280;
376 brw->urb.size = 512;
377 brw->urb.max_vs_entries = 1664;
378 brw->urb.max_gs_entries = 640;
379 }
380 } else if (intel->gen == 7) {
381 if (intel->gt == 1) {
382 brw->max_wm_threads = 48;
383 brw->max_vs_threads = 36;
384 brw->max_gs_threads = 36;
385 brw->urb.size = 128;
386 brw->urb.max_vs_entries = 512;
387 brw->urb.max_gs_entries = 192;
388 } else if (intel->gt == 2) {
389 brw->max_wm_threads = 172;
390 brw->max_vs_threads = 128;
391 brw->max_gs_threads = 128;
392 brw->urb.size = 256;
393 brw->urb.max_vs_entries = 704;
394 brw->urb.max_gs_entries = 320;
395 } else {
396 assert(!"Unknown gen7 device.");
397 }
398 } else if (intel->gen == 6) {
399 if (intel->gt == 2) {
400 brw->max_wm_threads = 80;
401 brw->max_vs_threads = 60;
402 brw->max_gs_threads = 60;
403 brw->urb.size = 64; /* volume 5c.5 section 5.1 */
404 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
405 brw->urb.max_gs_entries = 256;
406 } else {
407 brw->max_wm_threads = 40;
408 brw->max_vs_threads = 24;
409 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
410 brw->urb.size = 32; /* volume 5c.5 section 5.1 */
411 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
412 brw->urb.max_gs_entries = 256;
413 }
414 brw->urb.gen6_gs_previously_active = false;
415 } else if (intel->gen == 5) {
416 brw->urb.size = 1024;
417 brw->max_vs_threads = 72;
418 brw->max_gs_threads = 32;
419 brw->max_wm_threads = 12 * 6;
420 } else if (intel->is_g4x) {
421 brw->urb.size = 384;
422 brw->max_vs_threads = 32;
423 brw->max_gs_threads = 2;
424 brw->max_wm_threads = 10 * 5;
425 } else if (intel->gen < 6) {
426 brw->urb.size = 256;
427 brw->max_vs_threads = 16;
428 brw->max_gs_threads = 2;
429 brw->max_wm_threads = 8 * 4;
430 brw->has_negative_rhw_bug = true;
431 }
432
433 if (intel->gen <= 7) {
434 brw->needs_unlit_centroid_workaround = true;
435 }
436
437 brw->prim_restart.in_progress = false;
438 brw->prim_restart.enable_cut_index = false;
439
440 brw_init_state( brw );
441
442 brw->curbe.last_buf = calloc(1, 4096);
443 brw->curbe.next_buf = calloc(1, 4096);
444
445 brw->state.dirty.mesa = ~0;
446 brw->state.dirty.brw = ~0;
447
448 brw->emit_state_always = 0;
449
450 brw->batch.need_workaround_flush = true;
451
452 ctx->VertexProgram._MaintainTnlProgram = true;
453 ctx->FragmentProgram._MaintainTexEnvProgram = true;
454
455 brw_draw_init( brw );
456
457 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
458
459 ctx->Const.ContextFlags = 0;
460 if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
461 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
462
463 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
464 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
465
466 /* Turn on some extra GL_ARB_debug_output generation. */
467 intel->perf_debug = true;
468 }
469
470 brw_fs_alloc_reg_sets(brw);
471
472 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
473 brw_init_shader_time(brw);
474
475 _mesa_compute_version(ctx);
476
477 _mesa_initialize_dispatch_tables(ctx);
478 _mesa_initialize_vbo_vtxfmt(ctx);
479
480 return true;
481 }
482