nvfx: rewrite draw code and buffer code
[mesa.git] / src / gallium / drivers / nvfx / nvfx_screen.c
1 #include "pipe/p_screen.h"
2 #include "pipe/p_state.h"
3 #include "util/u_format_s3tc.h"
4 #include "util/u_simple_screen.h"
5
6 #include "nouveau/nouveau_screen.h"
7
8 #include "nvfx_context.h"
9 #include "nvfx_screen.h"
10 #include "nvfx_resource.h"
11 #include "nvfx_tex.h"
12
13 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
14 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
15 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
16
17 /* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
18 * to get the pointer to the context front buffer, so I copied nouveau_winsys here.
19 * nv30_screen_surface_format_supported() can then use it to enforce creating fbo
20 * with same number of bits everywhere.
21 */
22 struct nouveau_winsys {
23 struct pipe_winsys base;
24
25 struct pipe_screen *pscreen;
26
27 struct pipe_surface *front;
28 };
29 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
30 #define NV4X_GRCLASS4497_CHIPSETS 0x00005450
31 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088
32
33 static int
34 nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
35 {
36 struct nvfx_screen *screen = nvfx_screen(pscreen);
37
38 switch (param) {
39 case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
40 /* TODO: check this */
41 return screen->is_nv4x ? 16 : 8;
42 case PIPE_CAP_NPOT_TEXTURES:
43 return !!screen->is_nv4x;
44 case PIPE_CAP_TWO_SIDED_STENCIL:
45 return 1;
46 case PIPE_CAP_GLSL:
47 return 0;
48 case PIPE_CAP_ANISOTROPIC_FILTER:
49 return 1;
50 case PIPE_CAP_POINT_SPRITE:
51 return 1;
52 case PIPE_CAP_MAX_RENDER_TARGETS:
53 return screen->is_nv4x ? 4 : 2;
54 case PIPE_CAP_OCCLUSION_QUERY:
55 return 1;
56 case PIPE_CAP_TIMER_QUERY:
57 return 0;
58 case PIPE_CAP_TEXTURE_SHADOW_MAP:
59 return 1;
60 case PIPE_CAP_TEXTURE_SWIZZLE:
61 return 1;
62 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
63 return 13;
64 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
65 return 10;
66 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
67 return 13;
68 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
69 return !!screen->is_nv4x;
70 case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
71 return 1;
72 case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
73 return 0; /* We have 4 on nv40 - but unsupported currently */
74 case PIPE_CAP_TGSI_CONT_SUPPORTED:
75 return 0;
76 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
77 return !!screen->is_nv4x;
78 case PIPE_CAP_MAX_COMBINED_SAMPLERS:
79 return 16;
80 case PIPE_CAP_INDEP_BLEND_ENABLE:
81 /* TODO: on nv40 we have separate color masks */
82 /* TODO: nv40 mrt blending is probably broken */
83 return 0;
84 case PIPE_CAP_INDEP_BLEND_FUNC:
85 return 0;
86 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
87 return 0;
88 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
89 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
90 return 1;
91 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
92 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
93 return 0;
94 case PIPE_CAP_MAX_FS_INSTRUCTIONS:
95 case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
96 case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
97 case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
98 return 4096;
99 case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
100 /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
101 value (nv30:0/nv40:4) ? */
102 return screen->is_nv4x ? 4 : 0;
103 case PIPE_CAP_MAX_FS_INPUTS:
104 return 10;
105 case PIPE_CAP_MAX_FS_CONSTS:
106 return screen->is_nv4x ? 224 : 32;
107 case PIPE_CAP_MAX_FS_TEMPS:
108 return 32;
109 case PIPE_CAP_MAX_FS_ADDRS:
110 return screen->is_nv4x ? 1 : 0;
111 case PIPE_CAP_MAX_FS_PREDS:
112 return screen->is_nv4x ? 1 : 0;
113 case PIPE_CAP_MAX_VS_INSTRUCTIONS:
114 case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
115 return screen->is_nv4x ? 512 : 256;
116 case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
117 case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
118 return screen->is_nv4x ? 512 : 0;
119 case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
120 /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
121 value (nv30:1/nv40:4) ? */
122 return screen->is_nv4x ? 4 : 1;
123 case PIPE_CAP_MAX_VS_INPUTS:
124 return 16;
125 case PIPE_CAP_MAX_VS_CONSTS:
126 return 256;
127 case PIPE_CAP_MAX_VS_TEMPS:
128 return screen->is_nv4x ? 32 : 13;
129 case PIPE_CAP_MAX_VS_ADDRS:
130 return 2;
131 case PIPE_CAP_MAX_VS_PREDS:
132 return screen->is_nv4x ? 1 : 0;
133 case PIPE_CAP_GEOMETRY_SHADER4:
134 return 0;
135 case PIPE_CAP_DEPTH_CLAMP:
136 return 0; // TODO: implement depth clamp
137 default:
138 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
139 return 0;
140 }
141 }
142
143 static float
144 nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
145 {
146 struct nvfx_screen *screen = nvfx_screen(pscreen);
147
148 switch (param) {
149 case PIPE_CAP_MAX_LINE_WIDTH:
150 case PIPE_CAP_MAX_LINE_WIDTH_AA:
151 return 10.0;
152 case PIPE_CAP_MAX_POINT_WIDTH:
153 case PIPE_CAP_MAX_POINT_WIDTH_AA:
154 return 64.0;
155 case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
156 return screen->is_nv4x ? 16.0 : 8.0;
157 case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
158 return screen->is_nv4x ? 16.0 : 4.0;
159 default:
160 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
161 return 0.0;
162 }
163 }
164
165 static boolean
166 nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
167 enum pipe_format format,
168 enum pipe_texture_target target,
169 unsigned sample_count,
170 unsigned bind, unsigned geom_flags)
171 {
172 struct nvfx_screen *screen = nvfx_screen(pscreen);
173 struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
174
175 if (sample_count > 1)
176 return FALSE;
177
178 if (bind & PIPE_BIND_RENDER_TARGET) {
179 switch (format) {
180 case PIPE_FORMAT_B8G8R8A8_UNORM:
181 case PIPE_FORMAT_B8G8R8X8_UNORM:
182 case PIPE_FORMAT_B5G6R5_UNORM:
183 break;
184 default:
185 return FALSE;
186 }
187 }
188
189 if (bind & PIPE_BIND_DEPTH_STENCIL) {
190 switch (format) {
191 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
192 case PIPE_FORMAT_X8Z24_UNORM:
193 break;
194 case PIPE_FORMAT_Z16_UNORM:
195 /* TODO: this nv30 limitation probably does not exist */
196 if (!screen->is_nv4x && front && front->format != PIPE_FORMAT_B5G6R5_UNORM)
197 return FALSE;
198 break;
199 default:
200 return FALSE;
201 }
202 }
203
204 if (bind & PIPE_BIND_SAMPLER_VIEW) {
205 struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
206 if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
207 return FALSE;
208
209 if(screen->is_nv4x)
210 {
211 if(tf->fmt[4] < 0)
212 return FALSE;
213 }
214 else
215 {
216 if(tf->fmt[0] < 0)
217 return FALSE;
218 }
219 }
220
221 // note that we do actually support everything through translate
222 if (bind & PIPE_BIND_VERTEX_BUFFER) {
223 unsigned type = nvfx_vertex_formats[format];
224 if(!type)
225 return FALSE;
226 }
227
228 if (bind & PIPE_BIND_INDEX_BUFFER) {
229 // 8-bit indices supported, but not in hardware index buffer
230 if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
231 return FALSE;
232 }
233
234 if(bind & PIPE_BIND_STREAM_OUTPUT)
235 return FALSE;
236
237 return TRUE;
238 }
239
240 static void
241 nvfx_screen_destroy(struct pipe_screen *pscreen)
242 {
243 struct nvfx_screen *screen = nvfx_screen(pscreen);
244
245 nouveau_resource_destroy(&screen->vp_exec_heap);
246 nouveau_resource_destroy(&screen->vp_data_heap);
247 nouveau_resource_destroy(&screen->query_heap);
248 nouveau_notifier_free(&screen->query);
249 nouveau_notifier_free(&screen->sync);
250 nouveau_grobj_free(&screen->eng3d);
251 nvfx_screen_surface_takedown(pscreen);
252
253 nouveau_screen_fini(&screen->base);
254
255 FREE(pscreen);
256 }
257
258 static void nv30_screen_init(struct nvfx_screen *screen)
259 {
260 struct nouveau_channel *chan = screen->base.channel;
261 int i;
262
263 /* TODO: perhaps we should do some of this on nv40 too? */
264 for (i=1; i<8; i++) {
265 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1));
266 OUT_RING(chan, 0);
267 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1));
268 OUT_RING(chan, 0);
269 }
270
271 OUT_RING(chan, RING_3D(0x220, 1));
272 OUT_RING(chan, 1);
273
274 OUT_RING(chan, RING_3D(0x03b0, 1));
275 OUT_RING(chan, 0x00100000);
276 OUT_RING(chan, RING_3D(0x1454, 1));
277 OUT_RING(chan, 0);
278 OUT_RING(chan, RING_3D(0x1d80, 1));
279 OUT_RING(chan, 3);
280 OUT_RING(chan, RING_3D(0x1450, 1));
281 OUT_RING(chan, 0x00030004);
282
283 /* NEW */
284 OUT_RING(chan, RING_3D(0x1e98, 1));
285 OUT_RING(chan, 0);
286 OUT_RING(chan, RING_3D(0x17e0, 3));
287 OUT_RING(chan, fui(0.0));
288 OUT_RING(chan, fui(0.0));
289 OUT_RING(chan, fui(1.0));
290 OUT_RING(chan, RING_3D(0x1f80, 16));
291 for (i=0; i<16; i++) {
292 OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
293 }
294
295 OUT_RING(chan, RING_3D(0x120, 3));
296 OUT_RING(chan, 0);
297 OUT_RING(chan, 1);
298 OUT_RING(chan, 2);
299
300 OUT_RING(chan, RING_3D(0x1d88, 1));
301 OUT_RING(chan, 0x00001200);
302
303 OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1));
304 OUT_RING(chan, 0);
305
306 OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2));
307 OUT_RING(chan, fui(0.0));
308 OUT_RING(chan, fui(1.0));
309
310 OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1));
311 OUT_RING(chan, 0xffff0000);
312
313 /* enables use of vp rather than fixed-function somehow */
314 OUT_RING(chan, RING_3D(0x1e94, 1));
315 OUT_RING(chan, 0x13);
316 }
317
318 static void nv40_screen_init(struct nvfx_screen *screen)
319 {
320 struct nouveau_channel *chan = screen->base.channel;
321
322 OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2));
323 OUT_RING(chan, screen->base.channel->vram->handle);
324 OUT_RING(chan, screen->base.channel->vram->handle);
325
326 OUT_RING(chan, RING_3D(0x1ea4, 3));
327 OUT_RING(chan, 0x00000010);
328 OUT_RING(chan, 0x01000100);
329 OUT_RING(chan, 0xff800006);
330
331 /* vtxprog output routing */
332 OUT_RING(chan, RING_3D(0x1fc4, 1));
333 OUT_RING(chan, 0x06144321);
334 OUT_RING(chan, RING_3D(0x1fc8, 2));
335 OUT_RING(chan, 0xedcba987);
336 OUT_RING(chan, 0x00000021);
337 OUT_RING(chan, RING_3D(0x1fd0, 1));
338 OUT_RING(chan, 0x00171615);
339 OUT_RING(chan, RING_3D(0x1fd4, 1));
340 OUT_RING(chan, 0x001b1a19);
341
342 OUT_RING(chan, RING_3D(0x1ef8, 1));
343 OUT_RING(chan, 0x0020ffff);
344 OUT_RING(chan, RING_3D(0x1d64, 1));
345 OUT_RING(chan, 0x00d30000);
346 OUT_RING(chan, RING_3D(0x1e94, 1));
347 OUT_RING(chan, 0x00000001);
348 }
349
350 static unsigned
351 nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
352 {
353 int vram_hack_default = 0;
354 int vram_hack;
355 // TODO: this is a bit of a guess; also add other cards that may need this hack.
356 // It may also depend on the specific card or the AGP/PCIe chipset.
357 if(screen->base.device->chipset == 0x47 /* G70 */
358 || screen->base.device->chipset == 0x49 /* G71 */
359 || screen->base.device->chipset == 0x46 /* G72 */
360 )
361 vram_hack_default = 1;
362 vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default);
363
364 #ifdef DEBUG
365 if(!vram_hack)
366 {
367 fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n"
368 "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n");
369 }
370 else
371 {
372 fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n"
373 "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n");
374 }
375 #endif
376
377 return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
378 }
379
380 struct pipe_screen *
381 nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
382 {
383 static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
384 struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
385 struct nouveau_channel *chan;
386 struct pipe_screen *pscreen;
387 unsigned eng3d_class = 0;
388 int ret, i;
389
390 if (!screen)
391 return NULL;
392
393 pscreen = &screen->base.base;
394
395 ret = nouveau_screen_init(&screen->base, dev);
396 if (ret) {
397 nvfx_screen_destroy(pscreen);
398 return NULL;
399 }
400 chan = screen->base.channel;
401
402 pscreen->winsys = ws;
403 pscreen->destroy = nvfx_screen_destroy;
404 pscreen->get_param = nvfx_screen_get_param;
405 pscreen->get_paramf = nvfx_screen_get_paramf;
406 pscreen->is_format_supported = nvfx_screen_is_format_supported;
407 pscreen->context_create = nvfx_create;
408
409 switch (dev->chipset & 0xf0) {
410 case 0x30:
411 if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
412 eng3d_class = 0x0397;
413 else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
414 eng3d_class = 0x0697;
415 else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
416 eng3d_class = 0x0497;
417 break;
418 case 0x40:
419 if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
420 eng3d_class = NV40TCL;
421 else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
422 eng3d_class = NV44TCL;
423 screen->is_nv4x = ~0;
424 break;
425 case 0x60:
426 if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
427 eng3d_class = NV44TCL;
428 screen->is_nv4x = ~0;
429 break;
430 }
431
432 if (!eng3d_class) {
433 NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
434 return NULL;
435 }
436
437 screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
438 screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
439
440 screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
441 screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
442 screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
443
444 screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
445
446 /* surely both nv3x and nv44 support index buffers too: find out how and test that */
447 if(eng3d_class == NV40TCL)
448 screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;
449
450 if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
451 screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags;
452
453 nvfx_screen_init_resource_functions(pscreen);
454
455 ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
456 if (ret) {
457 NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
458 return FALSE;
459 }
460
461 /* 2D engine setup */
462 nvfx_screen_surface_init(pscreen);
463
464 /* Notifier for sync purposes */
465 ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
466 if (ret) {
467 NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
468 nvfx_screen_destroy(pscreen);
469 return NULL;
470 }
471
472 /* Query objects */
473 for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i)
474 {
475 ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query);
476 if(!ret)
477 break;
478 }
479
480 if (ret) {
481 NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
482 nvfx_screen_destroy(pscreen);
483 return NULL;
484 }
485
486 ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]);
487 if (ret) {
488 NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
489 nvfx_screen_destroy(pscreen);
490 return NULL;
491 }
492
493 LIST_INITHEAD(&screen->query_list);
494
495 /* Vtxprog resources */
496 if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
497 nouveau_resource_init(&screen->vp_data_heap, 0, 256)) {
498 nvfx_screen_destroy(pscreen);
499 return NULL;
500 }
501
502 BIND_RING(chan, screen->eng3d, 7);
503
504 /* Static eng3d initialisation */
505 /* note that we just started using the channel, so we must have space in the pushbuffer */
506 OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1));
507 OUT_RING(chan, screen->sync->handle);
508 OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2));
509 OUT_RING(chan, chan->vram->handle);
510 OUT_RING(chan, chan->gart->handle);
511 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
512 OUT_RING(chan, chan->vram->handle);
513 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2));
514 OUT_RING(chan, chan->vram->handle);
515 OUT_RING(chan, chan->vram->handle);
516 OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2));
517 OUT_RING(chan, chan->vram->handle);
518 OUT_RING(chan, chan->gart->handle);
519
520 OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2));
521 OUT_RING(chan, 0);
522 OUT_RING(chan, screen->query->handle);
523
524 OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2));
525 OUT_RING(chan, chan->vram->handle);
526 OUT_RING(chan, chan->vram->handle);
527
528 if(!screen->is_nv4x)
529 nv30_screen_init(screen);
530 else
531 nv40_screen_init(screen);
532
533 return pscreen;
534 }