1 #include "pipe/p_screen.h"
2 #include "pipe/p_state.h"
3 #include "util/u_format_s3tc.h"
4 #include "util/u_simple_screen.h"
6 #include "nouveau/nouveau_screen.h"
7 #include "nouveau/nv_object.xml.h"
8 #include "nvfx_context.h"
9 #include "nvfx_video_context.h"
10 #include "nvfx_screen.h"
11 #include "nvfx_resource.h"
14 #define NV30_3D_CHIPSET_3X_MASK 0x00000003
15 #define NV34_3D_CHIPSET_3X_MASK 0x00000010
16 #define NV35_3D_CHIPSET_3X_MASK 0x000001e0
18 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
19 #define NV4X_GRCLASS4497_CHIPSETS 0x00005450
20 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088
23 nvfx_screen_get_param(struct pipe_screen
*pscreen
, enum pipe_cap param
)
25 struct nvfx_screen
*screen
= nvfx_screen(pscreen
);
28 case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS
:
30 case PIPE_CAP_NPOT_TEXTURES
:
31 return screen
->advertise_npot
;
32 case PIPE_CAP_TWO_SIDED_STENCIL
:
36 case PIPE_CAP_ANISOTROPIC_FILTER
:
38 case PIPE_CAP_POINT_SPRITE
:
40 case PIPE_CAP_MAX_RENDER_TARGETS
:
41 return screen
->use_nv4x
? 4 : 2;
42 case PIPE_CAP_OCCLUSION_QUERY
:
44 case PIPE_CAP_TIMER_QUERY
:
46 case PIPE_CAP_TEXTURE_SHADOW_MAP
:
48 case PIPE_CAP_TEXTURE_SWIZZLE
:
50 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS
:
52 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS
:
54 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS
:
56 case PIPE_CAP_TEXTURE_MIRROR_CLAMP
:
57 return !!screen
->use_nv4x
;
58 case PIPE_CAP_TEXTURE_MIRROR_REPEAT
:
60 case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS
:
61 return 0; /* We have 4 on nv40 - but unsupported currently */
62 case PIPE_CAP_BLEND_EQUATION_SEPARATE
:
63 return screen
->advertise_blend_equation_separate
;
64 case PIPE_CAP_MAX_COMBINED_SAMPLERS
:
66 case PIPE_CAP_INDEP_BLEND_ENABLE
:
67 /* TODO: on nv40 we have separate color masks */
68 /* TODO: nv40 mrt blending is probably broken */
70 case PIPE_CAP_INDEP_BLEND_FUNC
:
72 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE
:
74 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT
:
75 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
:
76 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT
:
77 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER
:
79 case PIPE_CAP_DEPTH_CLAMP
:
80 return 0; // TODO: implement depth clamp
81 case PIPE_CAP_PRIMITIVE_RESTART
:
82 return 0; // TODO: implement primitive restart
83 case PIPE_CAP_SHADER_STENCIL_EXPORT
:
86 NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param
);
92 nvfx_screen_get_shader_param(struct pipe_screen
*pscreen
, unsigned shader
, enum pipe_shader_cap param
)
94 struct nvfx_screen
*screen
= nvfx_screen(pscreen
);
97 case PIPE_SHADER_FRAGMENT
:
99 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS
:
100 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS
:
101 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS
:
102 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS
:
104 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH
:
105 /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
106 value (nv30:0/nv40:4) ? */
107 return screen
->use_nv4x
? 4 : 0;
108 case PIPE_SHADER_CAP_MAX_INPUTS
:
109 return screen
->use_nv4x
? 12 : 10;
110 case PIPE_SHADER_CAP_MAX_CONSTS
:
111 return screen
->use_nv4x
? 224 : 32;
112 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS
:
114 case PIPE_SHADER_CAP_MAX_TEMPS
:
116 case PIPE_SHADER_CAP_MAX_ADDRS
:
117 return screen
->use_nv4x
? 1 : 0;
118 case PIPE_SHADER_CAP_MAX_PREDS
:
119 return 0; /* we could expose these, but nothing uses them */
120 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED
:
126 case PIPE_SHADER_VERTEX
:
128 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS
:
129 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS
:
130 return screen
->use_nv4x
? 512 : 256;
131 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS
:
132 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS
:
133 return screen
->use_nv4x
? 512 : 0;
134 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH
:
135 /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
136 value (nv30:1/nv40:4) ? */
137 return screen
->use_nv4x
? 4 : 1;
138 case PIPE_SHADER_CAP_MAX_INPUTS
:
140 case PIPE_SHADER_CAP_MAX_CONSTS
:
141 /* - 6 is for clip planes; Gallium should be fixed to put
142 * them in the vertex shader itself, so we don't need to reserve these */
143 return (screen
->use_nv4x
? 468 : 256) - 6;
144 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS
:
146 case PIPE_SHADER_CAP_MAX_TEMPS
:
147 return screen
->use_nv4x
? 32 : 13;
148 case PIPE_SHADER_CAP_MAX_ADDRS
:
150 case PIPE_SHADER_CAP_MAX_PREDS
:
151 return 0; /* we could expose these, but nothing uses them */
152 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED
:
165 nvfx_screen_get_paramf(struct pipe_screen
*pscreen
, enum pipe_cap param
)
167 struct nvfx_screen
*screen
= nvfx_screen(pscreen
);
170 case PIPE_CAP_MAX_LINE_WIDTH
:
171 case PIPE_CAP_MAX_LINE_WIDTH_AA
:
173 case PIPE_CAP_MAX_POINT_WIDTH
:
174 case PIPE_CAP_MAX_POINT_WIDTH_AA
:
176 case PIPE_CAP_MAX_TEXTURE_ANISOTROPY
:
177 return screen
->use_nv4x
? 16.0 : 8.0;
178 case PIPE_CAP_MAX_TEXTURE_LOD_BIAS
:
181 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param
);
187 nvfx_screen_is_format_supported(struct pipe_screen
*pscreen
,
188 enum pipe_format format
,
189 enum pipe_texture_target target
,
190 unsigned sample_count
,
191 unsigned bind
, unsigned geom_flags
)
193 struct nvfx_screen
*screen
= nvfx_screen(pscreen
);
195 if (sample_count
> 1)
198 if (bind
& PIPE_BIND_RENDER_TARGET
) {
200 case PIPE_FORMAT_B8G8R8A8_UNORM
:
201 case PIPE_FORMAT_B8G8R8X8_UNORM
:
202 case PIPE_FORMAT_R8G8B8A8_UNORM
:
203 case PIPE_FORMAT_R8G8B8X8_UNORM
:
204 case PIPE_FORMAT_B5G6R5_UNORM
:
206 case PIPE_FORMAT_R16G16B16A16_FLOAT
:
207 if(!screen
->advertise_fp16
)
210 case PIPE_FORMAT_R32G32B32A32_FLOAT
:
211 if(!screen
->advertise_fp32
)
219 if (bind
& PIPE_BIND_DEPTH_STENCIL
) {
221 case PIPE_FORMAT_S8_USCALED_Z24_UNORM
:
222 case PIPE_FORMAT_X8Z24_UNORM
:
223 case PIPE_FORMAT_Z16_UNORM
:
230 if (bind
& PIPE_BIND_SAMPLER_VIEW
) {
231 struct nvfx_texture_format
* tf
= &nvfx_texture_formats
[format
];
232 if(util_format_is_s3tc(format
) && !util_format_s3tc_enabled
)
234 if(format
== PIPE_FORMAT_R16G16B16A16_FLOAT
&& !screen
->advertise_fp16
)
236 if(format
== PIPE_FORMAT_R32G32B32A32_FLOAT
&& !screen
->advertise_fp32
)
250 // note that we do actually support everything through translate
251 if (bind
& PIPE_BIND_VERTEX_BUFFER
) {
252 unsigned type
= nvfx_vertex_formats
[format
];
257 if (bind
& PIPE_BIND_INDEX_BUFFER
) {
258 // 8-bit indices supported, but not in hardware index buffer
259 if(format
!= PIPE_FORMAT_R16_USCALED
&& format
!= PIPE_FORMAT_R32_USCALED
)
263 if(bind
& PIPE_BIND_STREAM_OUTPUT
)
270 nvfx_screen_destroy(struct pipe_screen
*pscreen
)
272 struct nvfx_screen
*screen
= nvfx_screen(pscreen
);
274 nouveau_resource_destroy(&screen
->vp_exec_heap
);
275 nouveau_resource_destroy(&screen
->vp_data_heap
);
276 nouveau_resource_destroy(&screen
->query_heap
);
277 nouveau_notifier_free(&screen
->query
);
278 nouveau_notifier_free(&screen
->sync
);
279 nouveau_grobj_free(&screen
->eng3d
);
280 nvfx_screen_surface_takedown(pscreen
);
282 nouveau_screen_fini(&screen
->base
);
287 static void nv30_screen_init(struct nvfx_screen
*screen
)
289 struct nouveau_channel
*chan
= screen
->base
.channel
;
292 /* TODO: perhaps we should do some of this on nv40 too? */
293 for (i
=1; i
<8; i
++) {
294 OUT_RING(chan
, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i
), 1));
296 OUT_RING(chan
, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i
), 1));
300 OUT_RING(chan
, RING_3D(0x220, 1));
303 OUT_RING(chan
, RING_3D(0x03b0, 1));
304 OUT_RING(chan
, 0x00100000);
305 OUT_RING(chan
, RING_3D(0x1454, 1));
307 OUT_RING(chan
, RING_3D(0x1d80, 1));
309 OUT_RING(chan
, RING_3D(0x1450, 1));
310 OUT_RING(chan
, 0x00030004);
313 OUT_RING(chan
, RING_3D(0x1e98, 1));
315 OUT_RING(chan
, RING_3D(0x17e0, 3));
316 OUT_RING(chan
, fui(0.0));
317 OUT_RING(chan
, fui(0.0));
318 OUT_RING(chan
, fui(1.0));
319 OUT_RING(chan
, RING_3D(0x1f80, 16));
320 for (i
=0; i
<16; i
++) {
321 OUT_RING(chan
, (i
==8) ? 0x0000ffff : 0);
324 OUT_RING(chan
, RING_3D(0x120, 3));
329 OUT_RING(chan
, RING_3D(0x1d88, 1));
330 OUT_RING(chan
, 0x00001200);
332 OUT_RING(chan
, RING_3D(NV30_3D_RC_ENABLE
, 1));
335 OUT_RING(chan
, RING_3D(NV30_3D_DEPTH_RANGE_NEAR
, 2));
336 OUT_RING(chan
, fui(0.0));
337 OUT_RING(chan
, fui(1.0));
339 OUT_RING(chan
, RING_3D(NV30_3D_MULTISAMPLE_CONTROL
, 1));
340 OUT_RING(chan
, 0xffff0000);
342 /* enables use of vp rather than fixed-function somehow */
343 OUT_RING(chan
, RING_3D(0x1e94, 1));
344 OUT_RING(chan
, 0x13);
347 static void nv40_screen_init(struct nvfx_screen
*screen
)
349 struct nouveau_channel
*chan
= screen
->base
.channel
;
351 OUT_RING(chan
, RING_3D(NV40_3D_DMA_COLOR2
, 2));
352 OUT_RING(chan
, screen
->base
.channel
->vram
->handle
);
353 OUT_RING(chan
, screen
->base
.channel
->vram
->handle
);
355 OUT_RING(chan
, RING_3D(0x1450, 1));
356 OUT_RING(chan
, 0x00000004);
358 OUT_RING(chan
, RING_3D(0x1ea4, 3));
359 OUT_RING(chan
, 0x00000010);
360 OUT_RING(chan
, 0x01000100);
361 OUT_RING(chan
, 0xff800006);
363 /* vtxprog output routing */
364 OUT_RING(chan
, RING_3D(0x1fc4, 1));
365 OUT_RING(chan
, 0x06144321);
366 OUT_RING(chan
, RING_3D(0x1fc8, 2));
367 OUT_RING(chan
, 0xedcba987);
368 OUT_RING(chan
, 0x0000006f);
369 OUT_RING(chan
, RING_3D(0x1fd0, 1));
370 OUT_RING(chan
, 0x00171615);
371 OUT_RING(chan
, RING_3D(0x1fd4, 1));
372 OUT_RING(chan
, 0x001b1a19);
374 OUT_RING(chan
, RING_3D(0x1ef8, 1));
375 OUT_RING(chan
, 0x0020ffff);
376 OUT_RING(chan
, RING_3D(0x1d64, 1));
377 OUT_RING(chan
, 0x01d300d4);
378 OUT_RING(chan
, RING_3D(0x1e94, 1));
379 OUT_RING(chan
, 0x00000001);
381 OUT_RING(chan
, RING_3D(NV40_3D_MIPMAP_ROUNDING
, 1));
382 OUT_RING(chan
, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN
);
386 nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen
* screen
)
388 int vram_hack_default
= 0;
390 // TODO: this is a bit of a guess; also add other cards that may need this hack.
391 // It may also depend on the specific card or the AGP/PCIe chipset.
392 if(screen
->base
.device
->chipset
== 0x47 /* G70 */
393 || screen
->base
.device
->chipset
== 0x49 /* G71 */
394 || screen
->base
.device
->chipset
== 0x46 /* G72 */
396 vram_hack_default
= 1;
397 vram_hack
= debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default
);
399 return vram_hack
? NOUVEAU_BO_VRAM
: NOUVEAU_BO_GART
;
402 static void nvfx_channel_flush_notify(struct nouveau_channel
* chan
)
404 struct nvfx_screen
* screen
= chan
->user_private
;
405 struct nvfx_context
* nvfx
= screen
->cur_ctx
;
407 nvfx
->relocs_needed
= NVFX_RELOCATE_ALL
;
411 nvfx_screen_create(struct pipe_winsys
*ws
, struct nouveau_device
*dev
)
413 static const unsigned query_sizes
[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
414 struct nvfx_screen
*screen
= CALLOC_STRUCT(nvfx_screen
);
415 struct nouveau_channel
*chan
;
416 struct pipe_screen
*pscreen
;
417 unsigned eng3d_class
= 0;
423 pscreen
= &screen
->base
.base
;
425 ret
= nouveau_screen_init(&screen
->base
, dev
);
427 nvfx_screen_destroy(pscreen
);
430 chan
= screen
->base
.channel
;
431 screen
->cur_ctx
= NULL
;
432 chan
->user_private
= screen
;
433 chan
->flush_notify
= nvfx_channel_flush_notify
;
435 pscreen
->winsys
= ws
;
436 pscreen
->destroy
= nvfx_screen_destroy
;
437 pscreen
->get_param
= nvfx_screen_get_param
;
438 pscreen
->get_shader_param
= nvfx_screen_get_shader_param
;
439 pscreen
->get_paramf
= nvfx_screen_get_paramf
;
440 pscreen
->is_format_supported
= nvfx_screen_is_format_supported
;
441 pscreen
->context_create
= nvfx_create
;
442 pscreen
->video_context_create
= nvfx_video_create
;
444 switch (dev
->chipset
& 0xf0) {
446 if (NV30_3D_CHIPSET_3X_MASK
& (1 << (dev
->chipset
& 0x0f)))
447 eng3d_class
= NV30_3D
;
448 else if (NV34_3D_CHIPSET_3X_MASK
& (1 << (dev
->chipset
& 0x0f)))
449 eng3d_class
= NV34_3D
;
450 else if (NV35_3D_CHIPSET_3X_MASK
& (1 << (dev
->chipset
& 0x0f)))
451 eng3d_class
= NV35_3D
;
454 if (NV4X_GRCLASS4097_CHIPSETS
& (1 << (dev
->chipset
& 0x0f)))
455 eng3d_class
= NV40_3D
;
456 else if (NV4X_GRCLASS4497_CHIPSETS
& (1 << (dev
->chipset
& 0x0f)))
457 eng3d_class
= NV44_3D
;
458 screen
->is_nv4x
= ~0;
461 if (NV6X_GRCLASS4497_CHIPSETS
& (1 << (dev
->chipset
& 0x0f)))
462 eng3d_class
= NV44_3D
;
463 screen
->is_nv4x
= ~0;
468 NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev
->chipset
);
472 screen
->advertise_npot
= !!screen
->is_nv4x
;
473 screen
->advertise_blend_equation_separate
= !!screen
->is_nv4x
;
474 screen
->use_nv4x
= screen
->is_nv4x
;
476 if(screen
->is_nv4x
) {
477 if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE
))
478 screen
->use_nv4x
= 0;
479 if(!debug_get_bool_option("NVFX_NPOT", TRUE
))
480 screen
->advertise_npot
= 0;
481 if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE
))
482 screen
->advertise_blend_equation_separate
= 0;
485 screen
->force_swtnl
= debug_get_bool_option("NVFX_SWTNL", FALSE
);
486 screen
->trace_draw
= debug_get_bool_option("NVFX_TRACE_DRAW", FALSE
);
488 screen
->buffer_allocation_cost
= debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
489 screen
->inline_cost_per_hardware_cost
= atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
490 screen
->static_reuse_threshold
= atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
492 /* We don't advertise these by default because filtering and blending doesn't work as
493 * it should, due to several restrictions.
494 * The only exception is fp16 on nv40.
496 screen
->advertise_fp16
= debug_get_bool_option("NVFX_FP16", !!screen
->use_nv4x
);
497 screen
->advertise_fp32
= debug_get_bool_option("NVFX_FP32", 0);
499 screen
->vertex_buffer_reloc_flags
= nvfx_screen_get_vertex_buffer_flags(screen
);
501 /* surely both nv3x and nv44 support index buffers too: find out how and test that */
502 if(eng3d_class
== NV40_3D
)
503 screen
->index_buffer_reloc_flags
= screen
->vertex_buffer_reloc_flags
;
505 if(!screen
->force_swtnl
&& screen
->vertex_buffer_reloc_flags
== screen
->index_buffer_reloc_flags
)
506 screen
->base
.vertex_buffer_flags
= screen
->base
.index_buffer_flags
= screen
->vertex_buffer_reloc_flags
;
508 nvfx_screen_init_resource_functions(pscreen
);
510 ret
= nouveau_grobj_alloc(chan
, 0xbeef3097, eng3d_class
, &screen
->eng3d
);
512 NOUVEAU_ERR("Error creating 3D object: %d\n", ret
);
516 /* 2D engine setup */
517 nvfx_screen_surface_init(pscreen
);
519 /* Notifier for sync purposes */
520 ret
= nouveau_notifier_alloc(chan
, 0xbeef0301, 1, &screen
->sync
);
522 NOUVEAU_ERR("Error creating notifier object: %d\n", ret
);
523 nvfx_screen_destroy(pscreen
);
528 for(i
= 0; i
< sizeof(query_sizes
) / sizeof(query_sizes
[0]); ++i
)
530 ret
= nouveau_notifier_alloc(chan
, 0xbeef0302, query_sizes
[i
], &screen
->query
);
536 NOUVEAU_ERR("Error initialising query objects: %d\n", ret
);
537 nvfx_screen_destroy(pscreen
);
541 ret
= nouveau_resource_init(&screen
->query_heap
, 0, query_sizes
[i
]);
543 NOUVEAU_ERR("Error initialising query object heap: %d\n", ret
);
544 nvfx_screen_destroy(pscreen
);
548 LIST_INITHEAD(&screen
->query_list
);
550 /* Vtxprog resources */
551 if (nouveau_resource_init(&screen
->vp_exec_heap
, 0, screen
->use_nv4x
? 512 : 256) ||
552 nouveau_resource_init(&screen
->vp_data_heap
, 0, screen
->use_nv4x
? 468 : 256)) {
553 nvfx_screen_destroy(pscreen
);
557 BIND_RING(chan
, screen
->eng3d
, 7);
559 /* Static eng3d initialisation */
560 /* note that we just started using the channel, so we must have space in the pushbuffer */
561 OUT_RING(chan
, RING_3D(NV30_3D_DMA_NOTIFY
, 1));
562 OUT_RING(chan
, screen
->sync
->handle
);
563 OUT_RING(chan
, RING_3D(NV30_3D_DMA_TEXTURE0
, 2));
564 OUT_RING(chan
, chan
->vram
->handle
);
565 OUT_RING(chan
, chan
->gart
->handle
);
566 OUT_RING(chan
, RING_3D(NV30_3D_DMA_COLOR1
, 1));
567 OUT_RING(chan
, chan
->vram
->handle
);
568 OUT_RING(chan
, RING_3D(NV30_3D_DMA_COLOR0
, 2));
569 OUT_RING(chan
, chan
->vram
->handle
);
570 OUT_RING(chan
, chan
->vram
->handle
);
571 OUT_RING(chan
, RING_3D(NV30_3D_DMA_VTXBUF0
, 2));
572 OUT_RING(chan
, chan
->vram
->handle
);
573 OUT_RING(chan
, chan
->gart
->handle
);
575 OUT_RING(chan
, RING_3D(NV30_3D_DMA_FENCE
, 2));
577 OUT_RING(chan
, screen
->query
->handle
);
579 OUT_RING(chan
, RING_3D(NV30_3D_DMA_UNK1AC
, 2));
580 OUT_RING(chan
, chan
->vram
->handle
);
581 OUT_RING(chan
, chan
->vram
->handle
);
584 nv30_screen_init(screen
);
586 nv40_screen_init(screen
);