X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_pipe_query.c;h=77be3692ba06c6176cc36905205bd554ba488256;hb=a03d17ede778610f2c66099d0d5342cf09ef12a2;hp=11e69edce82958ae111d31b2a27594e4529bff5f;hpb=79e343b36a729afb8086b99e4bf15d8c444887c1;p=mesa.git diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 11e69edce82..77be3692ba0 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -50,6 +50,7 @@ struct svga_query { SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */ unsigned id; /** Per-context query identifier */ + boolean active; /** TRUE if query is active */ struct pipe_fence_handle *fence; @@ -72,16 +73,19 @@ struct svga_query { /** cast wrapper */ static inline struct svga_query * -svga_query( struct pipe_query *q ) +svga_query(struct pipe_query *q) { return (struct svga_query *)q; } +/** + * VGPU9 + */ -static boolean +static bool svga_get_query_result(struct pipe_context *pipe, struct pipe_query *q, - boolean wait, + bool wait, union pipe_query_result *result); static enum pipe_error @@ -115,11 +119,10 @@ define_query_vgpu9(struct svga_context *svga, return PIPE_OK; } -static enum pipe_error +static void begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret = PIPE_OK; if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { /* The application doesn't care for the pending query result. @@ -137,36 +140,23 @@ begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq) sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; sws->fence_reference(sws, &sq->fence, NULL); - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - } - return ret; + SVGA_RETRY(svga, SVGA3D_BeginQuery(svga->swc, sq->svga_type)); } -static enum pipe_error +static void end_query_vgpu9(struct svga_context *svga, struct svga_query *sq) { - enum pipe_error ret = PIPE_OK; - /* Set to PENDING before sending EndQuery. */ sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; - ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); - } - return ret; + SVGA_RETRY(svga, SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf)); } -static boolean +static bool get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, - boolean wait, uint64_t *result) + bool wait, uint64_t *result) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret; SVGA3dQueryState state; if (!sq->fence) { @@ -174,12 +164,8 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause * a synchronous wait on the host. */ - ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); - } - assert (ret == PIPE_OK); + SVGA_RETRY(svga, SVGA3D_WaitForQuery(svga->swc, sq->svga_type, + sq->hwbuf)); svga_context_flush(svga, &sq->fence); assert(sq->fence); } @@ -187,8 +173,9 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, state = sq->queryResult->state; if (state == SVGA3D_QUERYSTATE_PENDING) { if (!wait) - return FALSE; - sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + return false; + sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE, + SVGA_FENCE_FLAG_QUERY); state = sq->queryResult->state; } @@ -196,7 +183,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, state == SVGA3D_QUERYSTATE_FAILED); *result = (uint64_t)sq->queryResult->result32; - return TRUE; + return true; } @@ -210,10 +197,10 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * will hold queries of the same type. Multiple memory blocks can be allocated * for a particular query type. * - * Currently each memory block is of 184 bytes. We support up to 128 + * Currently each memory block is of 184 bytes. We support up to 512 * memory blocks. The query memory size is arbitrary right now. * Each occlusion query takes about 8 bytes. One memory block can accomodate - * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion + * 23 occlusion queries. 512 of those blocks can support up to 11K occlusion * queries. That seems reasonable for now. If we think this limit is * not enough, we can increase the limit or try to grow the mob in runtime. * Note, SVGA device does not impose one mob per context for queries, @@ -224,7 +211,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery. */ #define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2) -#define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE) +#define SVGA_QUERY_MEM_SIZE (512 * SVGA_QUERY_MEM_BLOCK_SIZE) struct svga_qmem_alloc_entry { @@ -239,31 +226,34 @@ struct svga_qmem_alloc_entry /** * Allocate a memory block from the query object memory - * \return -1 if out of memory, else index of the query memory block + * \return NULL if out of memory, else pointer to the query memory block */ -static int +static struct svga_qmem_alloc_entry * allocate_query_block(struct svga_context *svga) { int index; unsigned offset; + struct svga_qmem_alloc_entry *alloc_entry = NULL; /* Find the next available query block */ index = util_bitmask_add(svga->gb_query_alloc_mask); if (index == UTIL_BITMASK_INVALID_INDEX) - return -1; + return NULL; offset = index * SVGA_QUERY_MEM_BLOCK_SIZE; if (offset >= svga->gb_query_len) { unsigned i; + /* Deallocate the out-of-range index */ + util_bitmask_clear(svga->gb_query_alloc_mask, index); + index = -1; + /** * All the memory blocks are allocated, lets see if there is * any empty memory block around that can be freed up. */ - index = -1; - for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) { - struct svga_qmem_alloc_entry *alloc_entry; + for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) { struct svga_qmem_alloc_entry *prev_alloc_entry = NULL; alloc_entry = svga->gb_query_map[i]; @@ -282,9 +272,20 @@ allocate_query_block(struct svga_context *svga) } } } + + if (index == -1) { + debug_printf("Query memory object is full\n"); + return NULL; + } } - return index; + if (!alloc_entry) { + assert(index != -1); + alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry); + alloc_entry->block_index = index; + } + + return alloc_entry; } /** @@ -342,17 +343,14 @@ allocate_query_block_entry(struct svga_context *svga, unsigned len) { struct svga_qmem_alloc_entry *alloc_entry; - int block_index = -1; - block_index = allocate_query_block(svga); - if (block_index == -1) - return NULL; - alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry); + alloc_entry = allocate_query_block(svga); if (!alloc_entry) return NULL; - alloc_entry->block_index = block_index; - alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE; + assert(alloc_entry->block_index != -1); + alloc_entry->start_offset = + alloc_entry->block_index * SVGA_QUERY_MEM_BLOCK_SIZE; alloc_entry->nquery = 0; alloc_entry->alloc_mask = util_bitmask_create(); alloc_entry->next = NULL; @@ -377,7 +375,7 @@ allocate_query(struct svga_context *svga, int slot_index = -1; unsigned offset; - assert(type < SVGA_QUERY_MAX); + assert(type < SVGA3D_QUERYTYPE_MAX); alloc_entry = svga->gb_query_map[type]; @@ -450,7 +448,7 @@ destroy_gb_query_obj(struct svga_context *svga) struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; unsigned i; - for (i = 0; i < SVGA_QUERY_MAX; i++) { + for (i = 0; i < SVGA3D_QUERYTYPE_MAX; i++) { struct svga_qmem_alloc_entry *alloc_entry, *next; alloc_entry = svga->gb_query_map[i]; while (alloc_entry) { @@ -494,69 +492,48 @@ define_query_vgpu10(struct svga_context *svga, svga->gb_query_alloc_mask = util_bitmask_create(); /* Bind the query object to the context */ - if (svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_SET) != PIPE_OK) { - svga_context_flush(svga, NULL); - svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_SET); - } + SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_SET)); } sq->gb_query = svga->gb_query; - /* Allocate an integer ID for this query */ - sq->id = util_bitmask_add(svga->query_id_bm); - if (sq->id == UTIL_BITMASK_INVALID_INDEX) - return PIPE_ERROR_OUT_OF_MEMORY; + /* Make sure query length is in multiples of 8 bytes */ + qlen = align(resultLen + sizeof(SVGA3dQueryState), 8); /* Find a slot for this query in the gb object */ - qlen = resultLen + sizeof(SVGA3dQueryState); sq->offset = allocate_query(svga, sq->svga_type, qlen); if (sq->offset == -1) return PIPE_ERROR_OUT_OF_MEMORY; + assert((sq->offset & 7) == 0); + SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n", sq->svga_type, sq->id, sq->offset); /** * Send SVGA3D commands to define the query */ - ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); - } + SVGA_RETRY_OOM(svga, ret, SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, + sq->svga_type, + sq->flags)); if (ret != PIPE_OK) return PIPE_ERROR_OUT_OF_MEMORY; - ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); - } - assert(ret == PIPE_OK); - - ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); - } - assert(ret == PIPE_OK); + SVGA_RETRY(svga, SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id)); + SVGA_RETRY(svga, SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, + sq->offset)); return PIPE_OK; } -static enum pipe_error +static void destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { - enum pipe_error ret; - - ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id); + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id)); /* Deallocate the memory slot allocated for this query */ deallocate_query(svga, sq); - - return ret; } @@ -566,13 +543,8 @@ destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq) static void rebind_vgpu10_query(struct svga_context *svga) { - if (svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_REF) != PIPE_OK) { - svga_context_flush(svga, NULL); - svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_REF); - } - + SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_REF)); svga->rebind.flags.query = FALSE; } @@ -581,7 +553,6 @@ static enum pipe_error begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret = PIPE_OK; int status = 0; sws->fence_reference(sws, &sq->fence, NULL); @@ -596,45 +567,23 @@ begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq) } /* Send the BeginQuery command to the device */ - ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); - } - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id)); + return PIPE_OK; } -static enum pipe_error +static void end_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { - struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret = PIPE_OK; - if (svga->rebind.flags.query) { rebind_vgpu10_query(svga); } - ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); - } - - /* Finish fence is copied here from get_query_result_vgpu10. This helps - * with cases where svga_begin_query might be called again before - * svga_get_query_result, such as GL_TIME_ELAPSED. - */ - if (!sq->fence) { - svga_context_flush(svga, &sq->fence); - } - sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); - - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_EndQuery(svga->swc, sq->id)); } -static boolean +static bool get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq, - boolean wait, void *result, int resultLen) + bool wait, void *result, int resultLen) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; SVGA3dQueryState queryState; @@ -645,17 +594,28 @@ get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq, sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen); - if (queryState == SVGA3D_QUERYSTATE_PENDING) { + if (queryState != SVGA3D_QUERYSTATE_SUCCEEDED && !sq->fence) { + /* We don't have the query result yet, and the query hasn't been + * submitted. We need to submit it now since the GL spec says + * "Querying the state for a given occlusion query forces that + * occlusion query to complete within a finite amount of time." + */ + svga_context_flush(svga, &sq->fence); + } + + if (queryState == SVGA3D_QUERYSTATE_PENDING || + queryState == SVGA3D_QUERYSTATE_NEW) { if (!wait) - return FALSE; - sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + return false; + sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE, + SVGA_FENCE_FLAG_QUERY); sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen); } assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED || queryState == SVGA3D_QUERYSTATE_FAILED); - return TRUE; + return true; } static struct pipe_query * @@ -665,6 +625,7 @@ svga_create_query(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_query *sq; + enum pipe_error ret; assert(query_type < SVGA_QUERY_MAX); @@ -684,7 +645,10 @@ svga_create_query(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION; if (svga_have_vgpu10(svga)) { - define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult)); + ret = define_query_vgpu10(svga, sq, + sizeof(SVGADXOcclusionQueryResult)); + if (ret != PIPE_OK) + goto fail; /** * In OpenGL, occlusion counter query can be used in conditional @@ -698,37 +662,65 @@ svga_create_query(struct pipe_context *pipe, sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index); } else { - define_query_vgpu9(svga, sq); + ret = define_query_vgpu9(svga, sq); + if (ret != PIPE_OK) + goto fail; } break; case PIPE_QUERY_OCCLUSION_PREDICATE: - assert(svga_have_vgpu10(svga)); - sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE; - define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult)); + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + if (svga_have_vgpu10(svga)) { + sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE; + ret = define_query_vgpu10(svga, sq, + sizeof(SVGADXOcclusionPredicateQueryResult)); + if (ret != PIPE_OK) + goto fail; + } else { + sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION; + ret = define_query_vgpu9(svga, sq); + if (ret != PIPE_OK) + goto fail; + } break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: assert(svga_have_vgpu10(svga)); - sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS; - define_query_vgpu10(svga, sq, - sizeof(SVGADXStreamOutStatisticsQueryResult)); + + /* Until the device supports the new query type for multiple streams, + * we will use the single stream query type for stream 0. + */ + if (svga_have_sm5(svga) && index > 0) { + assert(index < 4); + + sq->svga_type = SVGA3D_QUERYTYPE_SOSTATS_STREAM0 + index; + } + else { + assert(index == 0); + sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS; + } + ret = define_query_vgpu10(svga, sq, + sizeof(SVGADXStreamOutStatisticsQueryResult)); + if (ret != PIPE_OK) + goto fail; break; case PIPE_QUERY_TIMESTAMP: assert(svga_have_vgpu10(svga)); sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP; - define_query_vgpu10(svga, sq, - sizeof(SVGADXTimestampQueryResult)); + ret = define_query_vgpu10(svga, sq, + sizeof(SVGADXTimestampQueryResult)); + if (ret != PIPE_OK) + goto fail; break; case SVGA_QUERY_NUM_DRAW_CALLS: case SVGA_QUERY_NUM_FALLBACKS: case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_NUM_VALIDATIONS: - case SVGA_QUERY_MAP_BUFFER_TIME: - case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BUFFERS_MAPPED: + case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: - case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: @@ -737,7 +729,22 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: + case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: + case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + case SVGA_QUERY_SHADER_MEM_USED: break; + case SVGA_QUERY_FLUSH_TIME: + case SVGA_QUERY_MAP_BUFFER_TIME: + /* These queries need os_time_get() */ + svga->hud.uses_time = TRUE; + break; + default: assert(!"unexpected query type in svga_create_query()"); } @@ -770,6 +777,8 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (svga_have_vgpu10(svga)) { /* make sure to also destroy any associated predicate query */ if (sq->predicate) @@ -780,11 +789,6 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) } sws->fence_reference(sws, &sq->fence, NULL); break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - assert(svga_have_vgpu10(svga)); - destroy_query_vgpu10(svga, sq); - sws->fence_reference(sws, &sq->fence, NULL); - break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: @@ -798,8 +802,10 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_NUM_VALIDATIONS: case SVGA_QUERY_MAP_BUFFER_TIME: - case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BUFFERS_MAPPED: + case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: @@ -810,6 +816,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: + case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: + case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: @@ -823,19 +838,16 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) } -static boolean +static bool svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct svga_context *svga = svga_context(pipe); struct svga_query *sq = svga_query(q); - enum pipe_error ret; + enum pipe_error ret = PIPE_OK; assert(sq); assert(sq->type < SVGA_QUERY_MAX); - SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, - sq, sq->id); - /* Need to flush out buffered drawing commands so that they don't * get counted in the query results. */ @@ -843,6 +855,8 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (svga_have_vgpu10(svga)) { ret = begin_query_vgpu10(svga, sq); /* also need to start the associated occlusion predicate query */ @@ -853,16 +867,11 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) (void) status; } } else { - ret = begin_query_vgpu9(svga, sq); + begin_query_vgpu9(svga, sq); } assert(ret == PIPE_OK); (void) ret; break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - assert(svga_have_vgpu10(svga)); - ret = begin_query_vgpu10(svga, sq); - assert(ret == PIPE_OK); - break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: @@ -886,12 +895,18 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_MAP_BUFFER_TIME: sq->begin_count = svga->hud.map_buffer_time; break; - case SVGA_QUERY_NUM_RESOURCES_MAPPED: - sq->begin_count = svga->hud.num_resources_mapped; + case SVGA_QUERY_NUM_BUFFERS_MAPPED: + sq->begin_count = svga->hud.num_buffers_mapped; + break; + case SVGA_QUERY_NUM_TEXTURES_MAPPED: + sq->begin_count = svga->hud.num_textures_mapped; break; case SVGA_QUERY_NUM_BYTES_UPLOADED: sq->begin_count = svga->hud.num_bytes_uploaded; break; + case SVGA_QUERY_NUM_COMMAND_BUFFERS: + sq->begin_count = svga->swc->num_command_buffers; + break; case SVGA_QUERY_COMMAND_BUFFER_SIZE: sq->begin_count = svga->hud.command_buffer_size; break; @@ -904,78 +919,90 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_READBACKS: sq->begin_count = svga->hud.num_readbacks; break; + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + sq->begin_count = svga->hud.num_resource_updates; + break; + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + sq->begin_count = svga->hud.num_buffer_uploads; + break; + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + sq->begin_count = svga->hud.num_const_buf_updates; + break; + case SVGA_QUERY_NUM_CONST_UPDATES: + sq->begin_count = svga->hud.num_const_updates; + break; + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + sq->begin_count = svga->swc->num_shader_reloc; + break; + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + sq->begin_count = svga->swc->num_surf_reloc; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: case SVGA_QUERY_NUM_STATE_OBJECTS: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: + case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: + case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: assert(!"unexpected query type in svga_begin_query()"); } - svga->sq[sq->type] = sq; + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n", + __FUNCTION__, sq, sq->id, sq->type, sq->svga_type); + + sq->active = TRUE; return true; } -static void +static bool svga_end_query(struct pipe_context *pipe, struct pipe_query *q) { struct svga_context *svga = svga_context(pipe); struct svga_query *sq = svga_query(q); - enum pipe_error ret; assert(sq); assert(sq->type < SVGA_QUERY_MAX); - SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, - sq, sq->id); + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x type=%d\n", + __FUNCTION__, sq, sq->type); - if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq) + if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active) svga_begin_query(pipe, q); + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n", + __FUNCTION__, sq, sq->id, sq->type, sq->svga_type); + svga_hwtnl_flush_retry(svga); - assert(svga->sq[sq->type] == sq); + assert(sq->active); switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (svga_have_vgpu10(svga)) { - ret = end_query_vgpu10(svga, sq); + end_query_vgpu10(svga, sq); /* also need to end the associated occlusion predicate query */ if (sq->predicate) { - enum pipe_error status; - status = end_query_vgpu10(svga, svga_query(sq->predicate)); - assert(status == PIPE_OK); - (void) status; + end_query_vgpu10(svga, svga_query(sq->predicate)); } } else { - ret = end_query_vgpu9(svga, sq); + end_query_vgpu9(svga, sq); } - assert(ret == PIPE_OK); - (void) ret; - /* TODO: Delay flushing. We don't really need to flush here, just ensure - * that there is one flush before svga_get_query_result attempts to get - * the result. - */ - svga_context_flush(svga, NULL); - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - assert(svga_have_vgpu10(svga)); - ret = end_query_vgpu10(svga, sq); - assert(ret == PIPE_OK); break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_TIMESTAMP: assert(svga_have_vgpu10(svga)); - ret = end_query_vgpu10(svga, sq); - assert(ret == PIPE_OK); + end_query_vgpu10(svga, sq); break; case SVGA_QUERY_NUM_DRAW_CALLS: sq->end_count = svga->hud.num_draw_calls; @@ -992,12 +1019,18 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_MAP_BUFFER_TIME: sq->end_count = svga->hud.map_buffer_time; break; - case SVGA_QUERY_NUM_RESOURCES_MAPPED: - sq->end_count = svga->hud.num_resources_mapped; + case SVGA_QUERY_NUM_BUFFERS_MAPPED: + sq->end_count = svga->hud.num_buffers_mapped; + break; + case SVGA_QUERY_NUM_TEXTURES_MAPPED: + sq->end_count = svga->hud.num_textures_mapped; break; case SVGA_QUERY_NUM_BYTES_UPLOADED: sq->end_count = svga->hud.num_bytes_uploaded; break; + case SVGA_QUERY_NUM_COMMAND_BUFFERS: + sq->end_count = svga->swc->num_command_buffers; + break; case SVGA_QUERY_COMMAND_BUFFER_SIZE: sq->end_count = svga->hud.command_buffer_size; break; @@ -1010,32 +1043,54 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_READBACKS: sq->end_count = svga->hud.num_readbacks; break; + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + sq->end_count = svga->hud.num_resource_updates; + break; + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + sq->end_count = svga->hud.num_buffer_uploads; + break; + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + sq->end_count = svga->hud.num_const_buf_updates; + break; + case SVGA_QUERY_NUM_CONST_UPDATES: + sq->end_count = svga->hud.num_const_updates; + break; + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + sq->end_count = svga->swc->num_shader_reloc; + break; + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + sq->end_count = svga->swc->num_surf_reloc; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: case SVGA_QUERY_NUM_STATE_OBJECTS: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: + case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: + case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: assert(!"unexpected query type in svga_end_query()"); } - svga->sq[sq->type] = NULL; + sq->active = FALSE; + return true; } -static boolean +static bool svga_get_query_result(struct pipe_context *pipe, struct pipe_query *q, - boolean wait, + bool wait, union pipe_query_result *vresult) { struct svga_screen *svgascreen = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_query *sq = svga_query(q); uint64_t *result = (uint64_t *)vresult; - boolean ret = TRUE; + bool ret = true; assert(sq); @@ -1050,15 +1105,21 @@ svga_get_query_result(struct pipe_context *pipe, (void *)&occResult, sizeof(occResult)); *result = (uint64_t)occResult.samplesRendered; } else { - ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result); + ret = get_query_result_vgpu9(svga, sq, wait, result); } break; - case PIPE_QUERY_OCCLUSION_PREDICATE: { - SVGADXOcclusionPredicateQueryResult occResult; - assert(svga_have_vgpu10(svga)); - ret = get_query_result_vgpu10(svga, sq, wait, - (void *)&occResult, sizeof(occResult)); - vresult->b = occResult.anySamplesRendered != 0; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { + if (svga_have_vgpu10(svga)) { + SVGADXOcclusionPredicateQueryResult occResult; + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&occResult, sizeof(occResult)); + vresult->b = occResult.anySamplesRendered != 0; + } else { + uint64_t count = 0; + ret = get_query_result_vgpu9(svga, sq, wait, &count); + vresult->b = count != 0; + } break; } case PIPE_QUERY_SO_STATISTICS: { @@ -1106,12 +1167,20 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_NUM_VALIDATIONS: case SVGA_QUERY_MAP_BUFFER_TIME: - case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BUFFERS_MAPPED: + case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: vresult->u64 = sq->end_count - sq->begin_count; break; /* These are running total counters */ @@ -1125,7 +1194,12 @@ svga_get_query_result(struct pipe_context *pipe, vresult->u64 = svgascreen->hud.num_resources; break; case SVGA_QUERY_NUM_STATE_OBJECTS: - vresult->u64 = svga->hud.num_state_objects; + vresult->u64 = (svga->hud.num_blend_objects + + svga->hud.num_depthstencil_objects + + svga->hud.num_rasterizer_objects + + svga->hud.num_sampler_objects + + svga->hud.num_samplerview_objects + + svga->hud.num_vertexelement_objects); break; case SVGA_QUERY_NUM_SURFACE_VIEWS: vresult->u64 = svga->hud.num_surface_views; @@ -1133,6 +1207,16 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_GENERATE_MIPMAP: vresult->u64 = svga->hud.num_generate_mipmap; break; + case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: + vresult->u64 = svgascreen->hud.num_failed_allocations; + break; + case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + vresult->f = (float) svga->swc->num_commands + / (float) svga->swc->num_draw_commands; + break; + case SVGA_QUERY_SHADER_MEM_USED: + vresult->u64 = svga->hud.shader_mem_used; + break; default: assert(!"unexpected query type in svga_get_query_result"); } @@ -1144,13 +1228,12 @@ svga_get_query_result(struct pipe_context *pipe, static void svga_render_condition(struct pipe_context *pipe, struct pipe_query *q, - boolean condition, uint mode) + bool condition, enum pipe_render_cond_flag mode) { struct svga_context *svga = svga_context(pipe); struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; struct svga_query *sq = svga_query(q); SVGA3dQueryId queryId; - enum pipe_error ret; SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); @@ -1174,17 +1257,23 @@ svga_render_condition(struct pipe_context *pipe, struct pipe_query *q, if ((mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) { - sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE, + SVGA_FENCE_FLAG_QUERY); } } - - ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, - (uint32) condition); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, - (uint32) condition); + /* + * if the kernel module doesn't support the predication command, + * we'll just render unconditionally. + * This is probably acceptable for the typical case of occlusion culling. + */ + if (sws->have_set_predication_cmd) { + SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, queryId, + (uint32) condition)); + svga->pred.query_id = queryId; + svga->pred.cond = condition; } + + svga->render_condition = (sq != NULL); } @@ -1207,6 +1296,45 @@ svga_get_timestamp(struct pipe_context *pipe) } +static void +svga_set_active_query_state(struct pipe_context *pipe, bool enable) +{ +} + + +/** + * \brief Toggle conditional rendering if already enabled + * + * \param svga[in] The svga context + * \param render_condition_enabled[in] Whether to ignore requests to turn + * conditional rendering off + * \param on[in] Whether to turn conditional rendering on or off + */ +void +svga_toggle_render_condition(struct svga_context *svga, + boolean render_condition_enabled, + boolean on) +{ + SVGA3dQueryId query_id; + + if (render_condition_enabled || + svga->pred.query_id == SVGA3D_INVALID_ID) { + return; + } + + /* + * If we get here, it means that the system supports + * conditional rendering since svga->pred.query_id has already been + * modified for this context and thus support has already been + * verified. + */ + query_id = on ? svga->pred.query_id : SVGA3D_INVALID_ID; + + SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, query_id, + (uint32) svga->pred.cond)); +} + + void svga_init_query_functions(struct svga_context *svga) { @@ -1215,6 +1343,7 @@ svga_init_query_functions(struct svga_context *svga) svga->pipe.begin_query = svga_begin_query; svga->pipe.end_query = svga_end_query; svga->pipe.get_query_result = svga_get_query_result; + svga->pipe.set_active_query_state = svga_set_active_query_state; svga->pipe.render_condition = svga_render_condition; svga->pipe.get_timestamp = svga_get_timestamp; }