* Chia-I Wu <olv@lunarg.com>
*/
+#include "util/u_prim.h"
#include "intel_winsys.h"
#include "ilo_3d_pipeline.h"
+#include "ilo_blit.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_query.h"
/* in pairs */
assert(q->reg_read % 2 == 0);
- q->bo->map(q->bo, false);
- vals = q->bo->get_virtual(q->bo);
+ vals = intel_bo_map(q->bo, false);
for (i = 1; i < q->reg_read; i += 2)
depth_count += vals[i] - vals[i - 1];
- q->bo->unmap(q->bo);
+ intel_bo_unmap(q->bo);
/* accumulate so that the query can be resumed if wanted */
q->data.u64 += depth_count;
assert(q->reg_read == 1);
- q->bo->map(q->bo, false);
- vals = q->bo->get_virtual(q->bo);
+ vals = intel_bo_map(q->bo, false);
timestamp = vals[0];
- q->bo->unmap(q->bo);
+ intel_bo_unmap(q->bo);
q->data.u64 = timestamp_to_ns(timestamp);
q->reg_read = 0;
/* in pairs */
assert(q->reg_read % 2 == 0);
- q->bo->map(q->bo, false);
- vals = q->bo->get_virtual(q->bo);
+ vals = intel_bo_map(q->bo, false);
for (i = 1; i < q->reg_read; i += 2)
elapsed += vals[i] - vals[i - 1];
- q->bo->unmap(q->bo);
+ intel_bo_unmap(q->bo);
/* accumulate so that the query can be resumed if wanted */
q->data.u64 += timestamp_to_ns(elapsed);
q->reg_read = 0;
}
+static void
+process_query_for_pipeline_statistics(struct ilo_3d *hw3d,
+ struct ilo_query *q)
+{
+ const uint64_t *vals;
+ int i;
+
+ assert(q->reg_read % 22 == 0);
+
+ vals = intel_bo_map(q->bo, false);
+
+ for (i = 0; i < q->reg_read; i += 22) {
+ struct pipe_query_data_pipeline_statistics *stats =
+ &q->data.pipeline_statistics;
+ const uint64_t *begin = vals + i;
+ const uint64_t *end = begin + 11;
+
+ stats->ia_vertices += end[0] - begin[0];
+ stats->ia_primitives += end[1] - begin[1];
+ stats->vs_invocations += end[2] - begin[2];
+ stats->gs_invocations += end[3] - begin[3];
+ stats->gs_primitives += end[4] - begin[4];
+ stats->c_invocations += end[5] - begin[5];
+ stats->c_primitives += end[6] - begin[6];
+ stats->ps_invocations += end[7] - begin[7];
+ stats->hs_invocations += end[8] - begin[8];
+ stats->ds_invocations += end[9] - begin[9];
+ stats->cs_invocations += end[10] - begin[10];
+ }
+
+ intel_bo_unmap(q->bo);
+
+ q->reg_read = 0;
+}
+
static void
ilo_3d_resume_queries(struct ilo_3d *hw3d)
{
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
+
+ /* resume pipeline statistics queries */
+ LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
+ /* accumulate the result if the bo is alreay full */
+ if (q->reg_read >= q->reg_total)
+ process_query_for_pipeline_statistics(hw3d, q);
+
+ ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+ q->bo, q->reg_read);
+ q->reg_read += 11;
+ }
}
static void
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
+
+ /* pause pipeline statistics queries */
+ LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
+ assert(q->reg_read < q->reg_total);
+ ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+ q->bo, q->reg_read);
+ q->reg_read += 11;
+ }
}
static void
ilo_3d_pause_queries(hw3d);
}
-static void
+void
ilo_3d_own_render_ring(struct ilo_3d *hw3d)
{
- ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
+ ilo_cp_set_ring(hw3d->cp, INTEL_RING_RENDER);
if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
ilo_3d_resume_queries(hw3d);
q->data.u64 = 0;
list_add(&q->list, &hw3d->prim_emitted_queries);
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ /* reserve some space for pausing the query */
+ q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+ ILO_3D_PIPELINE_WRITE_STATISTICS, NULL);
+ hw3d->owner_reserve += q->reg_cmd_size;
+ ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+
+ memset(&q->data.pipeline_statistics, 0,
+ sizeof(q->data.pipeline_statistics));
+
+ if (ilo_query_alloc_bo(q, 11 * 2, -1, hw3d->cp->winsys)) {
+ /* XXX we should check the aperture size */
+ ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+ q->bo, q->reg_read);
+ q->reg_read += 11;
+
+ list_add(&q->list, &hw3d->pipeline_statistics_queries);
+ }
+ break;
default:
assert(!"unknown query type");
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
list_del(&q->list);
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ list_del(&q->list);
+
+ assert(q->reg_read + 11 <= q->reg_total);
+ hw3d->owner_reserve -= q->reg_cmd_size;
+ ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+ ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+ q->bo, q->reg_read);
+ q->reg_read += 11;
+ break;
default:
assert(!"unknown query type");
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ if (q->bo)
+ process_query_for_pipeline_statistics(hw3d, q);
+ break;
default:
assert(!"unknown query type");
break;
ilo_3d_pipeline_invalidate(hw3d->pipeline,
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
- if (!hw3d->cp->render_ctx) {
- ilo_3d_pipeline_invalidate(hw3d->pipeline,
- ILO_3D_PIPELINE_INVALIDATE_HW);
- }
hw3d->new_batch = true;
}
list_inithead(&hw3d->time_elapsed_queries);
list_inithead(&hw3d->prim_generated_queries);
list_inithead(&hw3d->prim_emitted_queries);
+ list_inithead(&hw3d->pipeline_statistics_queries);
hw3d->pipeline = ilo_3d_pipeline_create(cp, dev);
if (!hw3d->pipeline) {
ilo_3d_destroy(struct ilo_3d *hw3d)
{
ilo_3d_pipeline_destroy(hw3d->pipeline);
+
+ if (hw3d->kernel.bo)
+ intel_bo_unreference(hw3d->kernel.bo);
+
FREE(hw3d);
}
static bool
draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
- const struct pipe_draw_info *info,
int *prim_generated, int *prim_emitted)
{
bool need_flush = false;
* happens in the middle of a batch buffer, we need to insert manual
* flushes.
*/
- need_flush = (ilo->dirty & ILO_DIRTY_FRAMEBUFFER);
+ need_flush = (ilo->dirty & ILO_DIRTY_FB);
/* same to SO target changes */
- need_flush |= (ilo->dirty & ILO_DIRTY_STREAM_OUTPUT_TARGETS);
+ need_flush |= (ilo->dirty & ILO_DIRTY_SO);
}
/* make sure there is enough room first */
}
if (max_len > ilo_cp_space(hw3d->cp)) {
- ilo_cp_flush(hw3d->cp);
+ ilo_cp_flush(hw3d->cp, "out of space");
need_flush = false;
assert(max_len <= ilo_cp_space(hw3d->cp));
}
if (need_flush)
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
- return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo, info,
+ return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo,
prim_generated, prim_emitted);
}
q->data.u64 += emitted;
}
-static bool
-pass_render_condition(struct ilo_3d *hw3d, struct pipe_context *pipe)
+bool
+ilo_3d_pass_render_condition(struct ilo_context *ilo)
{
+ struct ilo_3d *hw3d = ilo->hw3d;
uint64_t result;
bool wait;
break;
}
- if (pipe->get_query_result(pipe, hw3d->render_condition.query,
- wait, (union pipe_query_result *) &result)) {
- return (result > 0);
- }
- else {
+ if (ilo->base.get_query_result(&ilo->base, hw3d->render_condition.query,
+ wait, (union pipe_query_result *) &result))
+ return (!result == hw3d->render_condition.cond);
+ else
return true;
- }
}
#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
}
static inline bool
-ilo_check_restart_index(struct ilo_context *ilo,
- const struct pipe_draw_info *info)
+ilo_check_restart_index(const struct ilo_context *ilo, unsigned restart_index)
{
/*
* Haswell (GEN(7.5)) supports an arbitrary cut index, check everything
return true;
/* Note: indices must be unsigned byte, unsigned short or unsigned int */
- switch (ilo->ib.state.index_size) {
+ switch (ilo->ib.index_size) {
case 1:
- return ((info->restart_index & 0xff) == 0xff);
+ return ((restart_index & 0xff) == 0xff);
break;
case 2:
- return ((info->restart_index & 0xffff) == 0xffff);
+ return ((restart_index & 0xffff) == 0xffff);
break;
case 4:
- return (info->restart_index == 0xffffffff);
+ return (restart_index == 0xffffffff);
break;
}
return false;
}
static inline bool
-ilo_check_restart_prim_type(struct ilo_context *ilo,
- const struct pipe_draw_info *info)
+ilo_check_restart_prim_type(const struct ilo_context *ilo, unsigned prim)
{
- switch (info->mode) {
+ switch (prim) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_LINE_STRIP:
return;
}
- struct pipe_transfer *transfer = NULL;
- const void *map = NULL;
- map = pipe_buffer_map(pipe, ilo->ib.state.buffer,
- PIPE_TRANSFER_READ, &transfer);
+ if (ilo->ib.buffer) {
+ struct pipe_transfer *transfer;
+ const void *map;
+
+ map = pipe_buffer_map(pipe, ilo->ib.buffer,
+ PIPE_TRANSFER_READ, &transfer);
- sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.state.offset,
- ilo->ib.state.index_size, info, restart_info);
+ sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.offset,
+ ilo->ib.index_size, info, restart_info);
- pipe_buffer_unmap(pipe, transfer);
+ pipe_buffer_unmap(pipe, transfer);
+ }
+ else {
+ sub_prim_count = ilo_find_sub_primitives(ilo->ib.user_buffer,
+ ilo->ib.index_size, info, restart_info);
+ }
info = restart_info;
FREE(restart_info);
}
+static bool
+upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc)
+{
+ bool incremental = true;
+ int upload;
+
+ upload = ilo_shader_cache_upload(shc,
+ NULL, hw3d->kernel.used, incremental);
+ if (!upload)
+ return true;
+
+ /*
+ * Allocate a new bo. When this is a new batch, assume the bo is still in
+ * use by the previous batch and force allocation.
+ *
+ * Does it help to make shader cache upload with unsynchronized mapping,
+ * and remove the check for new batch here?
+ */
+ if (hw3d->kernel.used + upload > hw3d->kernel.size || hw3d->new_batch) {
+ unsigned new_size = (hw3d->kernel.size) ?
+ hw3d->kernel.size : (8 * 1024);
+
+ while (hw3d->kernel.used + upload > new_size)
+ new_size *= 2;
+
+ if (hw3d->kernel.bo)
+ intel_bo_unreference(hw3d->kernel.bo);
+
+ hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys,
+ "kernel bo", new_size, INTEL_DOMAIN_CPU);
+ if (!hw3d->kernel.bo) {
+ ilo_err("failed to allocate kernel bo\n");
+ return false;
+ }
+
+ hw3d->kernel.used = 0;
+ hw3d->kernel.size = new_size;
+ incremental = false;
+
+ assert(new_size >= ilo_shader_cache_upload(shc,
+ NULL, hw3d->kernel.used, incremental));
+
+ ilo_3d_pipeline_invalidate(hw3d->pipeline,
+ ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
+ }
+
+ upload = ilo_shader_cache_upload(shc,
+ hw3d->kernel.bo, hw3d->kernel.used, incremental);
+ if (upload < 0) {
+ ilo_err("failed to upload shaders\n");
+ return false;
+ }
+
+ hw3d->kernel.used += upload;
+
+ assert(hw3d->kernel.used <= hw3d->kernel.size);
+
+ return true;
+}
+
static void
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct ilo_3d *hw3d = ilo->hw3d;
int prim_generated, prim_emitted;
- if (!pass_render_condition(hw3d, pipe))
+ if (ilo_debug & ILO_DEBUG_DRAW) {
+ if (info->indexed) {
+ ilo_printf("indexed draw %s: "
+ "index start %d, count %d, vertex range [%d, %d]\n",
+ u_prim_name(info->mode), info->start, info->count,
+ info->min_index, info->max_index);
+ }
+ else {
+ ilo_printf("draw %s: vertex start %d, count %d\n",
+ u_prim_name(info->mode), info->start, info->count);
+ }
+
+ ilo_dump_dirty_flags(ilo->dirty);
+ }
+
+ if (!ilo_3d_pass_render_condition(ilo))
return;
if (info->primitive_restart && info->indexed) {
* Want to draw an indexed primitive using primitive restart
* Check that HW can handle the request and fall to SW if not.
*/
- if (!ilo_check_restart_index(ilo, info) ||
- !ilo_check_restart_prim_type(ilo, info)) {
+ if (!ilo_check_restart_index(ilo, info->restart_index) ||
+ !ilo_check_restart_prim_type(ilo, info->mode)) {
ilo_draw_vbo_with_sw_restart(pipe, info);
return;
}
}
- /* assume the cache is still in use by the previous batch */
- if (hw3d->new_batch)
- ilo_shader_cache_mark_busy(ilo->shader_cache);
-
- ilo_finalize_states(ilo);
+ ilo_finalize_3d_states(ilo, info);
- /* the shaders may be uploaded to a new shader cache */
- if (hw3d->shader_cache_seqno != ilo->shader_cache->seqno) {
- ilo_3d_pipeline_invalidate(hw3d->pipeline,
- ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
- }
+ if (!upload_shaders(hw3d, ilo->shader_cache))
+ return;
- /*
- * The VBs and/or IB may have different BOs due to being mapped with
- * PIPE_TRANSFER_DISCARD_x. We should track that instead of setting the
- * dirty flags for the performance reason.
- */
- ilo->dirty |= ILO_DIRTY_VERTEX_BUFFERS | ILO_DIRTY_INDEX_BUFFER;
+ ilo_blit_resolve_framebuffer(ilo);
/* If draw_vbo ever fails, return immediately. */
- if (!draw_vbo(hw3d, ilo, info, &prim_generated, &prim_emitted))
+ if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted))
return;
/* clear dirty status */
ilo->dirty = 0x0;
hw3d->new_batch = false;
- hw3d->shader_cache_seqno = ilo->shader_cache->seqno;
+
+ /* avoid dangling pointer reference */
+ ilo->draw = NULL;
update_prim_count(hw3d, prim_generated, prim_emitted);
static void
ilo_render_condition(struct pipe_context *pipe,
struct pipe_query *query,
+ boolean condition,
uint mode)
{
struct ilo_context *ilo = ilo_context(pipe);
/* reference count? */
hw3d->render_condition.query = query;
hw3d->render_condition.mode = mode;
+ hw3d->render_condition.cond = condition;
}
static void
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
- if (ilo->cp->ring != ILO_CP_RING_RENDER)
+ if (ilo->cp->ring != INTEL_RING_RENDER)
return;
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
/* don't know why */
if (ilo->dev->gen >= ILO_GEN(7))
- ilo_cp_flush(hw3d->cp);
+ ilo_cp_flush(hw3d->cp, "texture barrier");
}
static void