2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "r600_pipe.h"
24 #include "r600_public.h"
26 #include "evergreen_compute.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_blitter.h"
32 #include "util/u_debug.h"
33 #include "util/u_format_s3tc.h"
34 #include "util/u_memory.h"
35 #include "util/u_simple_shaders.h"
36 #include "util/u_upload_mgr.h"
37 #include "util/u_math.h"
38 #include "vl/vl_decoder.h"
39 #include "vl/vl_video_buffer.h"
40 #include "os/os_time.h"
42 static const struct debug_named_value debug_options
[] = {
44 { "texdepth", DBG_TEX_DEPTH
, "Print texture depth info" },
45 { "compute", DBG_COMPUTE
, "Print compute info" },
48 { "fs", DBG_FS
, "Print fetch shaders" },
49 { "vs", DBG_VS
, "Print vertex shaders" },
50 { "gs", DBG_GS
, "Print geometry shaders" },
51 { "ps", DBG_PS
, "Print pixel shaders" },
52 { "cs", DBG_CS
, "Print compute shaders" },
55 { "nohyperz", DBG_NO_HYPERZ
, "Disable Hyper-Z" },
56 #if defined(R600_USE_LLVM)
57 { "nollvm", DBG_NO_LLVM
, "Disable the LLVM shader compiler" },
59 { "nocpdma", DBG_NO_CP_DMA
, "Disable CP DMA" },
60 { "nodma", DBG_NO_ASYNC_DMA
, "Disable asynchronous DMA" },
61 /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
62 { "noinvalrange", DBG_NO_DISCARD_RANGE
, "Disable handling of INVALIDATE_RANGE map flags" },
64 DEBUG_NAMED_VALUE_END
/* must be last */
70 static struct r600_fence
*r600_create_fence(struct r600_context
*rctx
)
72 struct r600_screen
*rscreen
= rctx
->screen
;
73 struct r600_fence
*fence
= NULL
;
75 pipe_mutex_lock(rscreen
->fences
.mutex
);
77 if (!rscreen
->fences
.bo
) {
78 /* Create the shared buffer object */
79 rscreen
->fences
.bo
= (struct r600_resource
*)
80 pipe_buffer_create(&rscreen
->screen
, PIPE_BIND_CUSTOM
,
81 PIPE_USAGE_STAGING
, 4096);
82 if (!rscreen
->fences
.bo
) {
83 R600_ERR("r600: failed to create bo for fence objects\n");
86 rscreen
->fences
.data
= r600_buffer_mmap_sync_with_rings(rctx
, rscreen
->fences
.bo
, PIPE_TRANSFER_READ_WRITE
);
89 if (!LIST_IS_EMPTY(&rscreen
->fences
.pool
)) {
90 struct r600_fence
*entry
;
92 /* Try to find a freed fence that has been signalled */
93 LIST_FOR_EACH_ENTRY(entry
, &rscreen
->fences
.pool
, head
) {
94 if (rscreen
->fences
.data
[entry
->index
] != 0) {
95 LIST_DELINIT(&entry
->head
);
103 /* Allocate a new fence */
104 struct r600_fence_block
*block
;
107 if ((rscreen
->fences
.next_index
+ 1) >= 1024) {
108 R600_ERR("r600: too many concurrent fences\n");
112 index
= rscreen
->fences
.next_index
++;
114 if (!(index
% FENCE_BLOCK_SIZE
)) {
115 /* Allocate a new block */
116 block
= CALLOC_STRUCT(r600_fence_block
);
120 LIST_ADD(&block
->head
, &rscreen
->fences
.blocks
);
122 block
= LIST_ENTRY(struct r600_fence_block
, rscreen
->fences
.blocks
.next
, head
);
125 fence
= &block
->fences
[index
% FENCE_BLOCK_SIZE
];
126 fence
->index
= index
;
129 pipe_reference_init(&fence
->reference
, 1);
131 rscreen
->fences
.data
[fence
->index
] = 0;
132 r600_context_emit_fence(rctx
, rscreen
->fences
.bo
, fence
->index
, 1);
134 /* Create a dummy BO so that fence_finish without a timeout can sleep waiting for completion */
135 fence
->sleep_bo
= (struct r600_resource
*)
136 pipe_buffer_create(&rctx
->screen
->screen
, PIPE_BIND_CUSTOM
,
137 PIPE_USAGE_STAGING
, 1);
138 /* Add the fence as a dummy relocation. */
139 r600_context_bo_reloc(rctx
, &rctx
->rings
.gfx
, fence
->sleep_bo
, RADEON_USAGE_READWRITE
);
142 pipe_mutex_unlock(rscreen
->fences
.mutex
);
146 static void r600_flush(struct pipe_context
*ctx
, unsigned flags
)
148 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
149 struct pipe_query
*render_cond
= NULL
;
150 unsigned render_cond_mode
= 0;
152 rctx
->rings
.gfx
.flushing
= true;
153 /* Disable render condition. */
154 if (rctx
->current_render_cond
) {
155 render_cond
= rctx
->current_render_cond
;
156 render_cond_mode
= rctx
->current_render_cond_mode
;
157 ctx
->render_condition(ctx
, NULL
, 0);
160 r600_context_flush(rctx
, flags
);
161 rctx
->rings
.gfx
.flushing
= false;
162 r600_begin_new_cs(rctx
);
164 /* Re-enable render condition. */
166 ctx
->render_condition(ctx
, render_cond
, render_cond_mode
);
170 static void r600_flush_from_st(struct pipe_context
*ctx
,
171 struct pipe_fence_handle
**fence
,
172 enum pipe_flush_flags flags
)
174 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
175 struct r600_fence
**rfence
= (struct r600_fence
**)fence
;
178 fflags
= flags
& PIPE_FLUSH_END_OF_FRAME
? RADEON_FLUSH_END_OF_FRAME
: 0;
180 *rfence
= r600_create_fence(rctx
);
182 /* flush gfx & dma ring, order does not matter as only one can be live */
183 if (rctx
->rings
.dma
.cs
) {
184 rctx
->rings
.dma
.flush(rctx
, fflags
);
186 rctx
->rings
.gfx
.flush(rctx
, fflags
);
189 static void r600_flush_gfx_ring(void *ctx
, unsigned flags
)
191 r600_flush((struct pipe_context
*)ctx
, flags
);
194 static void r600_flush_dma_ring(void *ctx
, unsigned flags
)
196 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
197 struct radeon_winsys_cs
*cs
= rctx
->rings
.dma
.cs
;
198 unsigned padding_dw
, i
;
204 /* Pad the DMA CS to a multiple of 8 dwords. */
205 padding_dw
= 8 - cs
->cdw
% 8;
206 if (padding_dw
< 8) {
207 for (i
= 0; i
< padding_dw
; i
++) {
208 cs
->buf
[cs
->cdw
++] = DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0);
212 rctx
->rings
.dma
.flushing
= true;
213 rctx
->ws
->cs_flush(cs
, flags
);
214 rctx
->rings
.dma
.flushing
= false;
217 boolean
r600_rings_is_buffer_referenced(struct r600_context
*ctx
,
218 struct radeon_winsys_cs_handle
*buf
,
219 enum radeon_bo_usage usage
)
221 if (ctx
->ws
->cs_is_buffer_referenced(ctx
->rings
.gfx
.cs
, buf
, usage
)) {
224 if (ctx
->rings
.dma
.cs
) {
225 if (ctx
->ws
->cs_is_buffer_referenced(ctx
->rings
.dma
.cs
, buf
, usage
)) {
232 void *r600_buffer_mmap_sync_with_rings(struct r600_context
*ctx
,
233 struct r600_resource
*resource
,
236 enum radeon_bo_usage rusage
= RADEON_USAGE_READWRITE
;
238 bool sync_flush
= TRUE
;
240 if (usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
241 return ctx
->ws
->buffer_map(resource
->cs_buf
, NULL
, usage
);
244 if (!(usage
& PIPE_TRANSFER_WRITE
)) {
245 /* have to wait for pending read */
246 rusage
= RADEON_USAGE_WRITE
;
248 if (usage
& PIPE_TRANSFER_DONTBLOCK
) {
249 flags
|= RADEON_FLUSH_ASYNC
;
252 if (ctx
->ws
->cs_is_buffer_referenced(ctx
->rings
.gfx
.cs
, resource
->cs_buf
, rusage
) && ctx
->rings
.gfx
.cs
->cdw
) {
253 ctx
->rings
.gfx
.flush(ctx
, flags
);
254 if (usage
& PIPE_TRANSFER_DONTBLOCK
) {
258 if (ctx
->rings
.dma
.cs
) {
259 if (ctx
->ws
->cs_is_buffer_referenced(ctx
->rings
.dma
.cs
, resource
->cs_buf
, rusage
) && ctx
->rings
.dma
.cs
->cdw
) {
260 ctx
->rings
.dma
.flush(ctx
, flags
);
261 if (usage
& PIPE_TRANSFER_DONTBLOCK
) {
267 if (usage
& PIPE_TRANSFER_DONTBLOCK
) {
268 if (ctx
->ws
->buffer_is_busy(resource
->buf
, rusage
)) {
273 /* Try to avoid busy-waiting in radeon_bo_wait. */
274 ctx
->ws
->cs_sync_flush(ctx
->rings
.gfx
.cs
);
275 if (ctx
->rings
.dma
.cs
) {
276 ctx
->ws
->cs_sync_flush(ctx
->rings
.dma
.cs
);
279 ctx
->ws
->buffer_wait(resource
->buf
, rusage
);
281 /* at this point everything is synchronized */
282 return ctx
->ws
->buffer_map(resource
->cs_buf
, NULL
, usage
| PIPE_TRANSFER_UNSYNCHRONIZED
);
285 static void r600_flush_from_winsys(void *ctx
, unsigned flags
)
287 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
289 rctx
->rings
.gfx
.flush(rctx
, flags
);
292 static void r600_flush_dma_from_winsys(void *ctx
, unsigned flags
)
294 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
296 rctx
->rings
.dma
.flush(rctx
, flags
);
299 static void r600_destroy_context(struct pipe_context
*context
)
301 struct r600_context
*rctx
= (struct r600_context
*)context
;
303 r600_isa_destroy(rctx
->isa
);
305 pipe_resource_reference((struct pipe_resource
**)&rctx
->dummy_cmask
, NULL
);
306 pipe_resource_reference((struct pipe_resource
**)&rctx
->dummy_fmask
, NULL
);
308 if (rctx
->dummy_pixel_shader
) {
309 rctx
->context
.delete_fs_state(&rctx
->context
, rctx
->dummy_pixel_shader
);
311 if (rctx
->custom_dsa_flush
) {
312 rctx
->context
.delete_depth_stencil_alpha_state(&rctx
->context
, rctx
->custom_dsa_flush
);
314 if (rctx
->custom_blend_resolve
) {
315 rctx
->context
.delete_blend_state(&rctx
->context
, rctx
->custom_blend_resolve
);
317 if (rctx
->custom_blend_decompress
) {
318 rctx
->context
.delete_blend_state(&rctx
->context
, rctx
->custom_blend_decompress
);
320 if (rctx
->custom_blend_fmask_decompress
) {
321 rctx
->context
.delete_blend_state(&rctx
->context
, rctx
->custom_blend_fmask_decompress
);
323 util_unreference_framebuffer_state(&rctx
->framebuffer
.state
);
326 util_blitter_destroy(rctx
->blitter
);
328 if (rctx
->uploader
) {
329 u_upload_destroy(rctx
->uploader
);
331 if (rctx
->allocator_so_filled_size
) {
332 u_suballocator_destroy(rctx
->allocator_so_filled_size
);
334 if (rctx
->allocator_fetch_shader
) {
335 u_suballocator_destroy(rctx
->allocator_fetch_shader
);
337 util_slab_destroy(&rctx
->pool_transfers
);
339 r600_release_command_buffer(&rctx
->start_cs_cmd
);
341 if (rctx
->rings
.gfx
.cs
) {
342 rctx
->ws
->cs_destroy(rctx
->rings
.gfx
.cs
);
344 if (rctx
->rings
.dma
.cs
) {
345 rctx
->ws
->cs_destroy(rctx
->rings
.dma
.cs
);
351 static struct pipe_context
*r600_create_context(struct pipe_screen
*screen
, void *priv
)
353 struct r600_context
*rctx
= CALLOC_STRUCT(r600_context
);
354 struct r600_screen
* rscreen
= (struct r600_screen
*)screen
;
359 util_slab_create(&rctx
->pool_transfers
,
360 sizeof(struct r600_transfer
), 64,
361 UTIL_SLAB_SINGLETHREADED
);
363 rctx
->context
.screen
= screen
;
364 rctx
->context
.priv
= priv
;
365 rctx
->context
.destroy
= r600_destroy_context
;
366 rctx
->context
.flush
= r600_flush_from_st
;
368 /* Easy accessing of screen/winsys. */
369 rctx
->screen
= rscreen
;
370 rctx
->ws
= rscreen
->ws
;
371 rctx
->family
= rscreen
->family
;
372 rctx
->chip_class
= rscreen
->chip_class
;
373 rctx
->keep_tiling_flags
= rscreen
->info
.drm_minor
>= 12;
375 LIST_INITHEAD(&rctx
->active_nontimer_queries
);
377 r600_init_blit_functions(rctx
);
378 r600_init_query_functions(rctx
);
379 r600_init_context_resource_functions(rctx
);
380 r600_init_surface_functions(rctx
);
382 rctx
->context
.create_video_decoder
= vl_create_decoder
;
383 rctx
->context
.create_video_buffer
= vl_video_buffer_create
;
385 r600_init_common_state_functions(rctx
);
387 switch (rctx
->chip_class
) {
390 r600_init_state_functions(rctx
);
391 r600_init_atom_start_cs(rctx
);
393 rctx
->custom_dsa_flush
= r600_create_db_flush_dsa(rctx
);
394 rctx
->custom_blend_resolve
= rctx
->chip_class
== R700
? r700_create_resolve_blend(rctx
)
395 : r600_create_resolve_blend(rctx
);
396 rctx
->custom_blend_decompress
= r600_create_decompress_blend(rctx
);
397 rctx
->has_vertex_cache
= !(rctx
->family
== CHIP_RV610
||
398 rctx
->family
== CHIP_RV620
||
399 rctx
->family
== CHIP_RS780
||
400 rctx
->family
== CHIP_RS880
||
401 rctx
->family
== CHIP_RV710
);
405 evergreen_init_state_functions(rctx
);
406 evergreen_init_atom_start_cs(rctx
);
407 evergreen_init_atom_start_compute_cs(rctx
);
409 rctx
->custom_dsa_flush
= evergreen_create_db_flush_dsa(rctx
);
410 rctx
->custom_blend_resolve
= evergreen_create_resolve_blend(rctx
);
411 rctx
->custom_blend_decompress
= evergreen_create_decompress_blend(rctx
);
412 rctx
->custom_blend_fmask_decompress
= evergreen_create_fmask_decompress_blend(rctx
);
413 rctx
->has_vertex_cache
= !(rctx
->family
== CHIP_CEDAR
||
414 rctx
->family
== CHIP_PALM
||
415 rctx
->family
== CHIP_SUMO
||
416 rctx
->family
== CHIP_SUMO2
||
417 rctx
->family
== CHIP_CAICOS
||
418 rctx
->family
== CHIP_CAYMAN
||
419 rctx
->family
== CHIP_ARUBA
);
422 R600_ERR("Unsupported chip class %d.\n", rctx
->chip_class
);
426 rctx
->rings
.gfx
.cs
= rctx
->ws
->cs_create(rctx
->ws
, RING_GFX
);
427 rctx
->rings
.gfx
.flush
= r600_flush_gfx_ring
;
428 rctx
->ws
->cs_set_flush_callback(rctx
->rings
.gfx
.cs
, r600_flush_from_winsys
, rctx
);
429 rctx
->rings
.gfx
.flushing
= false;
431 rctx
->rings
.dma
.cs
= NULL
;
432 if (rscreen
->info
.r600_has_dma
&& !(rscreen
->debug_flags
& DBG_NO_ASYNC_DMA
)) {
433 rctx
->rings
.dma
.cs
= rctx
->ws
->cs_create(rctx
->ws
, RING_DMA
);
434 rctx
->rings
.dma
.flush
= r600_flush_dma_ring
;
435 rctx
->ws
->cs_set_flush_callback(rctx
->rings
.dma
.cs
, r600_flush_dma_from_winsys
, rctx
);
436 rctx
->rings
.dma
.flushing
= false;
439 rctx
->uploader
= u_upload_create(&rctx
->context
, 1024 * 1024, 256,
440 PIPE_BIND_INDEX_BUFFER
|
441 PIPE_BIND_CONSTANT_BUFFER
);
445 rctx
->allocator_fetch_shader
= u_suballocator_create(&rctx
->context
, 64 * 1024, 256,
446 0, PIPE_USAGE_STATIC
, FALSE
);
447 if (!rctx
->allocator_fetch_shader
)
450 rctx
->allocator_so_filled_size
= u_suballocator_create(&rctx
->context
, 4096, 4,
451 0, PIPE_USAGE_STATIC
, TRUE
);
452 if (!rctx
->allocator_so_filled_size
)
455 rctx
->isa
= calloc(1, sizeof(struct r600_isa
));
456 if (!rctx
->isa
|| r600_isa_init(rctx
, rctx
->isa
))
459 rctx
->blitter
= util_blitter_create(&rctx
->context
);
460 if (rctx
->blitter
== NULL
)
462 util_blitter_set_texture_multisample(rctx
->blitter
, rscreen
->has_msaa
);
463 rctx
->blitter
->draw_rectangle
= r600_draw_rectangle
;
465 r600_begin_new_cs(rctx
);
466 r600_get_backend_mask(rctx
); /* this emits commands and must be last */
468 rctx
->dummy_pixel_shader
=
469 util_make_fragment_cloneinput_shader(&rctx
->context
, 0,
470 TGSI_SEMANTIC_GENERIC
,
471 TGSI_INTERPOLATE_CONSTANT
);
472 rctx
->context
.bind_fs_state(&rctx
->context
, rctx
->dummy_pixel_shader
);
474 return &rctx
->context
;
477 r600_destroy_context(&rctx
->context
);
484 static const char* r600_get_vendor(struct pipe_screen
* pscreen
)
489 static const char *r600_get_family_name(enum radeon_family family
)
492 case CHIP_R600
: return "AMD R600";
493 case CHIP_RV610
: return "AMD RV610";
494 case CHIP_RV630
: return "AMD RV630";
495 case CHIP_RV670
: return "AMD RV670";
496 case CHIP_RV620
: return "AMD RV620";
497 case CHIP_RV635
: return "AMD RV635";
498 case CHIP_RS780
: return "AMD RS780";
499 case CHIP_RS880
: return "AMD RS880";
500 case CHIP_RV770
: return "AMD RV770";
501 case CHIP_RV730
: return "AMD RV730";
502 case CHIP_RV710
: return "AMD RV710";
503 case CHIP_RV740
: return "AMD RV740";
504 case CHIP_CEDAR
: return "AMD CEDAR";
505 case CHIP_REDWOOD
: return "AMD REDWOOD";
506 case CHIP_JUNIPER
: return "AMD JUNIPER";
507 case CHIP_CYPRESS
: return "AMD CYPRESS";
508 case CHIP_HEMLOCK
: return "AMD HEMLOCK";
509 case CHIP_PALM
: return "AMD PALM";
510 case CHIP_SUMO
: return "AMD SUMO";
511 case CHIP_SUMO2
: return "AMD SUMO2";
512 case CHIP_BARTS
: return "AMD BARTS";
513 case CHIP_TURKS
: return "AMD TURKS";
514 case CHIP_CAICOS
: return "AMD CAICOS";
515 case CHIP_CAYMAN
: return "AMD CAYMAN";
516 case CHIP_ARUBA
: return "AMD ARUBA";
517 default: return "AMD unknown";
521 static const char* r600_get_name(struct pipe_screen
* pscreen
)
523 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
525 return r600_get_family_name(rscreen
->family
);
528 static int r600_get_param(struct pipe_screen
* pscreen
, enum pipe_cap param
)
530 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
531 enum radeon_family family
= rscreen
->family
;
534 /* Supported features (boolean caps). */
535 case PIPE_CAP_NPOT_TEXTURES
:
536 case PIPE_CAP_TWO_SIDED_STENCIL
:
537 case PIPE_CAP_ANISOTROPIC_FILTER
:
538 case PIPE_CAP_POINT_SPRITE
:
539 case PIPE_CAP_OCCLUSION_QUERY
:
540 case PIPE_CAP_TEXTURE_SHADOW_MAP
:
541 case PIPE_CAP_TEXTURE_MIRROR_CLAMP
:
542 case PIPE_CAP_BLEND_EQUATION_SEPARATE
:
543 case PIPE_CAP_TEXTURE_SWIZZLE
:
544 case PIPE_CAP_DEPTH_CLIP_DISABLE
:
545 case PIPE_CAP_SHADER_STENCIL_EXPORT
:
546 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR
:
547 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS
:
548 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT
:
549 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
:
551 case PIPE_CAP_SEAMLESS_CUBE_MAP
:
552 case PIPE_CAP_PRIMITIVE_RESTART
:
553 case PIPE_CAP_CONDITIONAL_RENDER
:
554 case PIPE_CAP_TEXTURE_BARRIER
:
555 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED
:
556 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION
:
557 case PIPE_CAP_TGSI_INSTANCEID
:
558 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY
:
559 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY
:
560 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY
:
561 case PIPE_CAP_USER_INDEX_BUFFERS
:
562 case PIPE_CAP_USER_CONSTANT_BUFFERS
:
563 case PIPE_CAP_COMPUTE
:
564 case PIPE_CAP_START_INSTANCE
:
565 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS
:
566 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS
:
567 case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER
:
569 case PIPE_CAP_TGSI_TEXCOORD
:
572 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT
:
573 return R600_MAP_BUFFER_ALIGNMENT
;
575 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT
:
578 case PIPE_CAP_GLSL_FEATURE_LEVEL
:
581 case PIPE_CAP_TEXTURE_MULTISAMPLE
:
582 return rscreen
->msaa_texture_support
!= MSAA_TEXTURE_SAMPLE_ZERO
;
584 /* Supported except the original R600. */
585 case PIPE_CAP_INDEP_BLEND_ENABLE
:
586 case PIPE_CAP_INDEP_BLEND_FUNC
:
587 /* R600 doesn't support per-MRT blends */
588 return family
== CHIP_R600
? 0 : 1;
590 /* Supported on Evergreen. */
591 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE
:
592 case PIPE_CAP_CUBE_MAP_ARRAY
:
593 return family
>= CHIP_CEDAR
? 1 : 0;
595 /* Unsupported features. */
596 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT
:
597 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER
:
598 case PIPE_CAP_SCALED_RESOLVE
:
599 case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS
:
600 case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS
:
601 case PIPE_CAP_FRAGMENT_COLOR_CLAMPED
:
602 case PIPE_CAP_VERTEX_COLOR_CLAMPED
:
603 case PIPE_CAP_USER_VERTEX_BUFFERS
:
604 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
:
605 case PIPE_CAP_QUERY_PIPELINE_STATISTICS
:
609 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS
:
610 return rscreen
->has_streamout
? 4 : 0;
611 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME
:
612 return rscreen
->has_streamout
? 1 : 0;
613 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS
:
614 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS
:
618 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS
:
619 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS
:
620 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS
:
621 if (family
>= CHIP_CEDAR
)
625 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS
:
626 return rscreen
->info
.drm_minor
>= 9 ?
627 (family
>= CHIP_CEDAR
? 16384 : 8192) : 0;
628 case PIPE_CAP_MAX_COMBINED_SAMPLERS
:
631 /* Render targets. */
632 case PIPE_CAP_MAX_RENDER_TARGETS
:
633 /* XXX some r6xx are buggy and can only do 4 */
636 /* Timer queries, present when the clock frequency is non zero. */
637 case PIPE_CAP_QUERY_TIME_ELAPSED
:
638 return rscreen
->info
.r600_clock_crystal_freq
!= 0;
639 case PIPE_CAP_QUERY_TIMESTAMP
:
640 return rscreen
->info
.drm_minor
>= 20 &&
641 rscreen
->info
.r600_clock_crystal_freq
!= 0;
643 case PIPE_CAP_MIN_TEXEL_OFFSET
:
646 case PIPE_CAP_MAX_TEXEL_OFFSET
:
652 static float r600_get_paramf(struct pipe_screen
* pscreen
,
653 enum pipe_capf param
)
655 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
656 enum radeon_family family
= rscreen
->family
;
659 case PIPE_CAPF_MAX_LINE_WIDTH
:
660 case PIPE_CAPF_MAX_LINE_WIDTH_AA
:
661 case PIPE_CAPF_MAX_POINT_WIDTH
:
662 case PIPE_CAPF_MAX_POINT_WIDTH_AA
:
663 if (family
>= CHIP_CEDAR
)
667 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY
:
669 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS
:
671 case PIPE_CAPF_GUARD_BAND_LEFT
:
672 case PIPE_CAPF_GUARD_BAND_TOP
:
673 case PIPE_CAPF_GUARD_BAND_RIGHT
:
674 case PIPE_CAPF_GUARD_BAND_BOTTOM
:
680 static int r600_get_shader_param(struct pipe_screen
* pscreen
, unsigned shader
, enum pipe_shader_cap param
)
684 case PIPE_SHADER_FRAGMENT
:
685 case PIPE_SHADER_VERTEX
:
686 case PIPE_SHADER_COMPUTE
:
688 case PIPE_SHADER_GEOMETRY
:
689 /* XXX: support and enable geometry programs */
692 /* XXX: support tessellation on Evergreen */
697 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS
:
698 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS
:
699 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS
:
700 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS
:
702 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH
:
704 case PIPE_SHADER_CAP_MAX_INPUTS
:
706 case PIPE_SHADER_CAP_MAX_TEMPS
:
707 return 256; /* Max native temporaries. */
708 case PIPE_SHADER_CAP_MAX_ADDRS
:
709 /* XXX Isn't this equal to TEMPS? */
710 return 1; /* Max native address registers */
711 case PIPE_SHADER_CAP_MAX_CONSTS
:
712 return R600_MAX_CONST_BUFFER_SIZE
;
713 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS
:
714 return R600_MAX_USER_CONST_BUFFERS
;
715 case PIPE_SHADER_CAP_MAX_PREDS
:
716 return 0; /* nothing uses this */
717 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED
:
719 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED
:
721 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR
:
722 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR
:
723 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR
:
724 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR
:
726 case PIPE_SHADER_CAP_SUBROUTINES
:
728 case PIPE_SHADER_CAP_INTEGERS
:
730 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS
:
732 case PIPE_SHADER_CAP_PREFERRED_IR
:
733 if (shader
== PIPE_SHADER_COMPUTE
) {
734 return PIPE_SHADER_IR_LLVM
;
736 return PIPE_SHADER_IR_TGSI
;
742 static int r600_get_video_param(struct pipe_screen
*screen
,
743 enum pipe_video_profile profile
,
744 enum pipe_video_cap param
)
747 case PIPE_VIDEO_CAP_SUPPORTED
:
748 return vl_profile_supported(screen
, profile
);
749 case PIPE_VIDEO_CAP_NPOT_TEXTURES
:
751 case PIPE_VIDEO_CAP_MAX_WIDTH
:
752 case PIPE_VIDEO_CAP_MAX_HEIGHT
:
753 return vl_video_buffer_max_size(screen
);
754 case PIPE_VIDEO_CAP_PREFERED_FORMAT
:
755 return PIPE_FORMAT_NV12
;
756 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
758 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
760 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE
:
767 static int r600_get_compute_param(struct pipe_screen
*screen
,
768 enum pipe_compute_cap param
,
771 //TODO: select these params by asic
773 case PIPE_COMPUTE_CAP_IR_TARGET
:
775 strcpy(ret
, "r600--");
777 return 7 * sizeof(char);
779 case PIPE_COMPUTE_CAP_GRID_DIMENSION
:
781 uint64_t * grid_dimension
= ret
;
782 grid_dimension
[0] = 3;
784 return 1 * sizeof(uint64_t);
786 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE
:
788 uint64_t * grid_size
= ret
;
789 grid_size
[0] = 65535;
790 grid_size
[1] = 65535;
793 return 3 * sizeof(uint64_t) ;
795 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE
:
797 uint64_t * block_size
= ret
;
802 return 3 * sizeof(uint64_t);
804 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK
:
806 uint64_t * max_threads_per_block
= ret
;
807 *max_threads_per_block
= 256;
809 return sizeof(uint64_t);
811 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE
:
813 uint64_t * max_global_size
= ret
;
814 /* XXX: This is what the proprietary driver reports, we
815 * may want to use a different value. */
816 *max_global_size
= 201326592;
818 return sizeof(uint64_t);
820 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE
:
822 uint64_t * max_input_size
= ret
;
823 *max_input_size
= 1024;
825 return sizeof(uint64_t);
827 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE
:
829 uint64_t * max_local_size
= ret
;
830 /* XXX: This is what the proprietary driver reports, we
831 * may want to use a different value. */
832 *max_local_size
= 32768;
834 return sizeof(uint64_t);
836 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE
:
838 uint64_t max_global_size
;
839 uint64_t * max_mem_alloc_size
= ret
;
840 r600_get_compute_param(screen
,
841 PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE
,
843 /* OpenCL requres this value be at least
844 * max(MAX_GLOBAL_SIZE / 4, 128 * 1024 *1024)
845 * I'm really not sure what value to report here, but
846 * MAX_GLOBAL_SIZE / 4 seems resonable.
848 *max_mem_alloc_size
= max_global_size
/ 4;
850 return sizeof(uint64_t);
853 fprintf(stderr
, "unknown PIPE_COMPUTE_CAP %d\n", param
);
858 static void r600_destroy_screen(struct pipe_screen
* pscreen
)
860 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
865 if (rscreen
->global_pool
) {
866 compute_memory_pool_delete(rscreen
->global_pool
);
869 if (rscreen
->fences
.bo
) {
870 struct r600_fence_block
*entry
, *tmp
;
872 LIST_FOR_EACH_ENTRY_SAFE(entry
, tmp
, &rscreen
->fences
.blocks
, head
) {
873 LIST_DEL(&entry
->head
);
877 rscreen
->ws
->buffer_unmap(rscreen
->fences
.bo
->cs_buf
);
878 pipe_resource_reference((struct pipe_resource
**)&rscreen
->fences
.bo
, NULL
);
881 if (rscreen
->trace_bo
) {
882 rscreen
->ws
->buffer_unmap(rscreen
->trace_bo
->cs_buf
);
883 pipe_resource_reference((struct pipe_resource
**)&rscreen
->trace_bo
, NULL
);
886 pipe_mutex_destroy(rscreen
->fences
.mutex
);
888 rscreen
->ws
->destroy(rscreen
->ws
);
892 static void r600_fence_reference(struct pipe_screen
*pscreen
,
893 struct pipe_fence_handle
**ptr
,
894 struct pipe_fence_handle
*fence
)
896 struct r600_fence
**oldf
= (struct r600_fence
**)ptr
;
897 struct r600_fence
*newf
= (struct r600_fence
*)fence
;
899 if (pipe_reference(&(*oldf
)->reference
, &newf
->reference
)) {
900 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
901 pipe_mutex_lock(rscreen
->fences
.mutex
);
902 pipe_resource_reference((struct pipe_resource
**)&(*oldf
)->sleep_bo
, NULL
);
903 LIST_ADDTAIL(&(*oldf
)->head
, &rscreen
->fences
.pool
);
904 pipe_mutex_unlock(rscreen
->fences
.mutex
);
910 static boolean
r600_fence_signalled(struct pipe_screen
*pscreen
,
911 struct pipe_fence_handle
*fence
)
913 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
914 struct r600_fence
*rfence
= (struct r600_fence
*)fence
;
916 return rscreen
->fences
.data
[rfence
->index
] != 0;
919 static boolean
r600_fence_finish(struct pipe_screen
*pscreen
,
920 struct pipe_fence_handle
*fence
,
923 struct r600_screen
*rscreen
= (struct r600_screen
*)pscreen
;
924 struct r600_fence
*rfence
= (struct r600_fence
*)fence
;
925 int64_t start_time
= 0;
928 if (timeout
!= PIPE_TIMEOUT_INFINITE
) {
929 start_time
= os_time_get();
931 /* Convert to microseconds. */
935 while (rscreen
->fences
.data
[rfence
->index
] == 0) {
936 /* Special-case infinite timeout - wait for the dummy BO to become idle */
937 if (timeout
== PIPE_TIMEOUT_INFINITE
) {
938 rscreen
->ws
->buffer_wait(rfence
->sleep_bo
->buf
, RADEON_USAGE_READWRITE
);
942 /* The dummy BO will be busy until the CS including the fence has completed, or
943 * the GPU is reset. Don't bother continuing to spin when the BO is idle. */
944 if (!rscreen
->ws
->buffer_is_busy(rfence
->sleep_bo
->buf
, RADEON_USAGE_READWRITE
))
954 if (timeout
!= PIPE_TIMEOUT_INFINITE
&&
955 os_time_get() - start_time
>= timeout
) {
960 return rscreen
->fences
.data
[rfence
->index
] != 0;
963 static int r600_interpret_tiling(struct r600_screen
*rscreen
, uint32_t tiling_config
)
965 switch ((tiling_config
& 0xe) >> 1) {
967 rscreen
->tiling_info
.num_channels
= 1;
970 rscreen
->tiling_info
.num_channels
= 2;
973 rscreen
->tiling_info
.num_channels
= 4;
976 rscreen
->tiling_info
.num_channels
= 8;
982 switch ((tiling_config
& 0x30) >> 4) {
984 rscreen
->tiling_info
.num_banks
= 4;
987 rscreen
->tiling_info
.num_banks
= 8;
993 switch ((tiling_config
& 0xc0) >> 6) {
995 rscreen
->tiling_info
.group_bytes
= 256;
998 rscreen
->tiling_info
.group_bytes
= 512;
1006 static int evergreen_interpret_tiling(struct r600_screen
*rscreen
, uint32_t tiling_config
)
1008 switch (tiling_config
& 0xf) {
1010 rscreen
->tiling_info
.num_channels
= 1;
1013 rscreen
->tiling_info
.num_channels
= 2;
1016 rscreen
->tiling_info
.num_channels
= 4;
1019 rscreen
->tiling_info
.num_channels
= 8;
1025 switch ((tiling_config
& 0xf0) >> 4) {
1027 rscreen
->tiling_info
.num_banks
= 4;
1030 rscreen
->tiling_info
.num_banks
= 8;
1033 rscreen
->tiling_info
.num_banks
= 16;
1039 switch ((tiling_config
& 0xf00) >> 8) {
1041 rscreen
->tiling_info
.group_bytes
= 256;
1044 rscreen
->tiling_info
.group_bytes
= 512;
1052 static int r600_init_tiling(struct r600_screen
*rscreen
)
1054 uint32_t tiling_config
= rscreen
->info
.r600_tiling_config
;
1056 /* set default group bytes, overridden by tiling info ioctl */
1057 if (rscreen
->chip_class
<= R700
) {
1058 rscreen
->tiling_info
.group_bytes
= 256;
1060 rscreen
->tiling_info
.group_bytes
= 512;
1066 if (rscreen
->chip_class
<= R700
) {
1067 return r600_interpret_tiling(rscreen
, tiling_config
);
1069 return evergreen_interpret_tiling(rscreen
, tiling_config
);
1073 static uint64_t r600_get_timestamp(struct pipe_screen
*screen
)
1075 struct r600_screen
*rscreen
= (struct r600_screen
*)screen
;
1077 return 1000000 * rscreen
->ws
->query_timestamp(rscreen
->ws
) /
1078 rscreen
->info
.r600_clock_crystal_freq
;
1081 static int r600_get_driver_query_info(struct pipe_screen
*screen
,
1083 struct pipe_driver_query_info
*info
)
1085 struct r600_screen
*rscreen
= (struct r600_screen
*)screen
;
1086 struct pipe_driver_query_info list
[] = {
1087 {"draw-calls", R600_QUERY_DRAW_CALLS
, 0},
1088 {"requested-VRAM", R600_QUERY_REQUESTED_VRAM
, rscreen
->info
.vram_size
, TRUE
},
1089 {"requested-GTT", R600_QUERY_REQUESTED_GTT
, rscreen
->info
.gart_size
, TRUE
},
1093 return Elements(list
);
1095 if (index
>= Elements(list
))
1098 *info
= list
[index
];
1102 struct pipe_screen
*r600_screen_create(struct radeon_winsys
*ws
)
1104 struct r600_screen
*rscreen
= CALLOC_STRUCT(r600_screen
);
1106 if (rscreen
== NULL
) {
1111 ws
->query_info(ws
, &rscreen
->info
);
1113 rscreen
->debug_flags
= debug_get_flags_option("R600_DEBUG", debug_options
, 0);
1114 if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE
))
1115 rscreen
->debug_flags
|= DBG_COMPUTE
;
1116 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE
))
1117 rscreen
->debug_flags
|= DBG_FS
| DBG_VS
| DBG_GS
| DBG_PS
| DBG_CS
;
1118 if (!debug_get_bool_option("R600_HYPERZ", TRUE
))
1119 rscreen
->debug_flags
|= DBG_NO_HYPERZ
;
1120 if (!debug_get_bool_option("R600_LLVM", TRUE
))
1121 rscreen
->debug_flags
|= DBG_NO_LLVM
;
1122 if (debug_get_bool_option("R600_PRINT_TEXDEPTH", FALSE
))
1123 rscreen
->debug_flags
|= DBG_TEX_DEPTH
;
1124 rscreen
->family
= rscreen
->info
.family
;
1125 rscreen
->chip_class
= rscreen
->info
.chip_class
;
1127 if (rscreen
->family
== CHIP_UNKNOWN
) {
1128 fprintf(stderr
, "r600: Unknown chipset 0x%04X\n", rscreen
->info
.pci_id
);
1133 /* Figure out streamout kernel support. */
1134 switch (rscreen
->chip_class
) {
1136 if (rscreen
->family
< CHIP_RS780
) {
1137 rscreen
->has_streamout
= rscreen
->info
.drm_minor
>= 14;
1139 rscreen
->has_streamout
= rscreen
->info
.drm_minor
>= 23;
1143 rscreen
->has_streamout
= rscreen
->info
.drm_minor
>= 17;
1147 rscreen
->has_streamout
= rscreen
->info
.drm_minor
>= 14;
1150 rscreen
->has_streamout
= FALSE
;
1155 switch (rscreen
->chip_class
) {
1158 rscreen
->has_msaa
= rscreen
->info
.drm_minor
>= 22;
1159 rscreen
->msaa_texture_support
= MSAA_TEXTURE_DECOMPRESSED
;
1162 rscreen
->has_msaa
= rscreen
->info
.drm_minor
>= 19;
1163 rscreen
->msaa_texture_support
=
1164 rscreen
->info
.drm_minor
>= 24 ? MSAA_TEXTURE_COMPRESSED
:
1165 MSAA_TEXTURE_DECOMPRESSED
;
1168 rscreen
->has_msaa
= rscreen
->info
.drm_minor
>= 19;
1169 /* We should be able to read compressed MSAA textures, but it doesn't work. */
1170 rscreen
->msaa_texture_support
= MSAA_TEXTURE_SAMPLE_ZERO
;
1173 rscreen
->has_msaa
= FALSE
;
1174 rscreen
->msaa_texture_support
= 0;
1178 rscreen
->has_cp_dma
= rscreen
->info
.drm_minor
>= 27 &&
1179 !(rscreen
->debug_flags
& DBG_NO_CP_DMA
);
1181 if (r600_init_tiling(rscreen
)) {
1186 rscreen
->screen
.destroy
= r600_destroy_screen
;
1187 rscreen
->screen
.get_name
= r600_get_name
;
1188 rscreen
->screen
.get_vendor
= r600_get_vendor
;
1189 rscreen
->screen
.get_param
= r600_get_param
;
1190 rscreen
->screen
.get_shader_param
= r600_get_shader_param
;
1191 rscreen
->screen
.get_paramf
= r600_get_paramf
;
1192 rscreen
->screen
.get_video_param
= r600_get_video_param
;
1193 rscreen
->screen
.get_compute_param
= r600_get_compute_param
;
1194 rscreen
->screen
.get_timestamp
= r600_get_timestamp
;
1196 if (rscreen
->chip_class
>= EVERGREEN
) {
1197 rscreen
->screen
.is_format_supported
= evergreen_is_format_supported
;
1198 rscreen
->dma_blit
= &evergreen_dma_blit
;
1200 rscreen
->screen
.is_format_supported
= r600_is_format_supported
;
1201 rscreen
->dma_blit
= &r600_dma_blit
;
1203 rscreen
->screen
.is_video_format_supported
= vl_video_buffer_is_format_supported
;
1204 rscreen
->screen
.context_create
= r600_create_context
;
1205 rscreen
->screen
.fence_reference
= r600_fence_reference
;
1206 rscreen
->screen
.fence_signalled
= r600_fence_signalled
;
1207 rscreen
->screen
.fence_finish
= r600_fence_finish
;
1208 rscreen
->screen
.get_driver_query_info
= r600_get_driver_query_info
;
1209 r600_init_screen_resource_functions(&rscreen
->screen
);
1211 util_format_s3tc_init();
1213 rscreen
->fences
.bo
= NULL
;
1214 rscreen
->fences
.data
= NULL
;
1215 rscreen
->fences
.next_index
= 0;
1216 LIST_INITHEAD(&rscreen
->fences
.pool
);
1217 LIST_INITHEAD(&rscreen
->fences
.blocks
);
1218 pipe_mutex_init(rscreen
->fences
.mutex
);
1220 rscreen
->global_pool
= compute_memory_pool_new(rscreen
);
1223 rscreen
->cs_count
= 0;
1224 if (rscreen
->info
.drm_minor
>= 28) {
1225 rscreen
->trace_bo
= (struct r600_resource
*)pipe_buffer_create(&rscreen
->screen
,
1229 if (rscreen
->trace_bo
) {
1230 rscreen
->trace_ptr
= rscreen
->ws
->buffer_map(rscreen
->trace_bo
->cs_buf
, NULL
,
1231 PIPE_TRANSFER_UNSYNCHRONIZED
);
1236 return &rscreen
->screen
;