#ifdef DEBUG
#define BEGIN_CS(size) do { \
- assert(size <= (cs_copy->max_dw - cs_copy->cdw)); \
+ assert(size <= (cs_copy->current.max_dw - cs_copy->current.cdw)); \
cs_count = size; \
} while (0)
*/
#define OUT_CS(value) do { \
- cs_copy->buf[cs_copy->cdw++] = (value); \
+ cs_copy->current.buf[cs_copy->current.cdw++] = (value); \
CS_USED_DW(1); \
} while (0)
OUT_CS(CP_PACKET3(op, count))
#define OUT_CS_TABLE(values, count) do { \
- memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
- cs_copy->cdw += (count); \
+ memcpy(cs_copy->current.buf + cs_copy->current.cdw, (values), (count) * 4); \
+ cs_copy->current.cdw += (count); \
CS_USED_DW(count); \
} while (0)
#define WRITE_CS_TABLE(values, count) do { \
assert(cs_count == 0); \
- memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
- cs_copy->cdw += (count); \
+ memcpy(cs_copy->current.buf + cs_copy->current.cdw, (values), (count) * 4); \
+ cs_copy->current.cdw += (count); \
} while (0)
#endif /* R300_CS_H */
ctx->last_primitive_type = -1;
ctx->last_start_instance = -1;
- ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
+ assert(!ctx->b.gfx.cs->prev_dw);
+ ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
}
/* The max number of bytes to copy per packet. */
static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_command_buffer *cb)
{
- assert(cs->cdw + cb->num_dw <= cs->max_dw);
- memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
- cs->cdw += cb->num_dw;
+ assert(cs->current.cdw + cb->num_dw <= cs->current.max_dw);
+ memcpy(cs->current.buf + cs->current.cdw, cb->buf, 4 * cb->num_dw);
+ cs->current.cdw += cb->num_dw;
}
static inline void r600_set_atom_dirty(struct r600_context *rctx,
{
radeon_set_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
- cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
+ cs->current.buf[cs->current.cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
static inline void radeon_set_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
- assert(cs->cdw+2+num <= cs->max_dw);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CTL_CONST, num, 0));
radeon_emit(cs, (reg - R600_CTL_CONST_OFFSET) >> 2);
}
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= cs->max_dw);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= cs->max_dw);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw+2+num <= cs->max_dw);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw+2+num <= cs->max_dw);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
!ctx->ws->cs_memory_below_limit(ctx->dma.cs, vram, gtt)) {
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw);
+ assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
}
/* If GPUVM is not supported, the CS checker needs 2 entries
#include "util/list.h"
-#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
-#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
+#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RVCE_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+ RVCE_CS(cmd)
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
-#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
#define RVCE_MAX_AUX_BUFFER_NUM 4
RVCE_BEGIN(0x00000002); // task info
if (op == 0x3) {
if (enc->task_info_idx) {
- uint32_t offs = enc->cs->cdw - enc->task_info_idx + 3;
+ uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3;
// Update offsetOfNextTaskInfo
- enc->cs->buf[enc->task_info_idx] = offs;
+ enc->cs->current.buf[enc->task_info_idx] = offs;
}
- enc->task_info_idx = enc->cs->cdw;
+ enc->task_info_idx = enc->cs->current.cdw;
}
RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
RVCE_CS(op); // taskOperation
struct winsys_handle;
struct radeon_winsys_ctx;
+struct radeon_winsys_cs_chunk {
+ unsigned cdw; /* Number of used dwords. */
+ unsigned max_dw; /* Maximum number of dwords. */
+ uint32_t *buf; /* The base pointer of the chunk. */
+};
+
struct radeon_winsys_cs {
- unsigned cdw; /* Number of used dwords. */
- unsigned max_dw; /* Maximum number of dwords. */
- uint32_t *buf; /* The command buffer. */
+ struct radeon_winsys_cs_chunk current;
+ struct radeon_winsys_cs_chunk *prev;
+ unsigned num_prev; /* Number of previous chunks. */
+ unsigned max_prev; /* Space in array pointed to by prev. */
+ unsigned prev_dw; /* Total number of dwords in previous chunks. */
};
struct radeon_info {
static inline bool radeon_emitted(struct radeon_winsys_cs *cs, unsigned num_dw)
{
- return cs && cs->cdw > num_dw;
+ return cs && (cs->prev_dw + cs->current.cdw > num_dw);
}
static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
{
- cs->buf[cs->cdw++] = value;
+ cs->current.buf[cs->current.cdw++] = value;
}
static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
const uint32_t *values, unsigned count)
{
- memcpy(cs->buf+cs->cdw, values, count * 4);
- cs->cdw += count;
+ memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
+ cs->current.cdw += count;
}
#endif
si_trace_emit(ctx);
if (ctx->is_debug) {
+ uint32_t *buf;
unsigned i;
/* Save the IB for debug contexts. */
free(ctx->last_ib);
- ctx->last_ib_dw_size = cs->cdw;
- ctx->last_ib = malloc(cs->cdw * 4);
- memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
+ ctx->last_ib_dw_size = cs->prev_dw + cs->current.cdw;
+ ctx->last_ib = malloc(ctx->last_ib_dw_size * 4);
+ buf = ctx->last_ib;
+ for (i = 0; i < cs->num_prev; ++i) {
+ memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
+ buf += cs->prev[i].cdw;
+ }
+ memcpy(buf, cs->current.buf, cs->current.cdw * 4);
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
r600_resource_reference(&ctx->trace_buf, NULL);
r600_postflush_resume_features(&ctx->b);
- ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
+ assert(!ctx->b.gfx.cs->prev_dw);
+ ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
/* Invalidate various draw states so that they are emitted before
* the first draw call. */
4 * MIN2(util_next_power_of_two(ib->max_ib_size),
amdgpu_ib_max_submit_dwords(ib_type)));
- ib->base.cdw = 0;
- ib->base.buf = NULL;
+ ib->base.prev_dw = 0;
+ ib->base.num_prev = 0;
+ ib->base.current.cdw = 0;
+ ib->base.current.buf = NULL;
/* Allocate a new buffer for IBs if the current buffer is all used. */
if (!ib->big_ib_buffer ||
amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer,
RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
- ib->base.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
+ ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
ib_size = ib->big_ib_buffer->size - ib->used_ib_space;
- ib->base.max_dw = ib_size / 4;
+ ib->base.current.max_dw = ib_size / 4;
return true;
}
return &cs->const_preamble_ib.base;
}
-#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
+#define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value)
static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
struct pb_buffer *buf)
{
struct amdgpu_ib *ib = amdgpu_ib(rcs);
struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
- unsigned requested_size = rcs->cdw + dw;
+ unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw;
- assert(rcs->cdw <= rcs->max_dw);
+ assert(rcs->current.cdw <= rcs->current.max_dw);
if (requested_size > amdgpu_ib_max_submit_dwords(ib->ib_type))
return false;
ib->max_ib_size = MAX2(ib->max_ib_size, requested_size);
- return rcs->max_dw - rcs->cdw >= dw;
+ return rcs->current.max_dw - rcs->current.cdw >= dw;
}
static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
switch (cs->ring_type) {
case RING_DMA:
/* pad DMA ring to 8 DWs */
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(rcs, 0x00000000); /* NOP packet */
break;
case RING_GFX:
/* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(rcs, 0xffff1000); /* type3 nop packet */
/* Also pad the const IB. */
if (cs->const_ib.ib_mapped)
- while (!cs->const_ib.base.cdw || (cs->const_ib.base.cdw & 7))
+ while (!cs->const_ib.base.current.cdw || (cs->const_ib.base.current.cdw & 7))
OUT_CS(&cs->const_ib.base, 0xffff1000); /* type3 nop packet */
if (cs->const_preamble_ib.ib_mapped)
- while (!cs->const_preamble_ib.base.cdw || (cs->const_preamble_ib.base.cdw & 7))
+ while (!cs->const_preamble_ib.base.current.cdw || (cs->const_preamble_ib.base.current.cdw & 7))
OUT_CS(&cs->const_preamble_ib.base, 0xffff1000);
break;
case RING_UVD:
- while (rcs->cdw & 15)
+ while (rcs->current.cdw & 15)
OUT_CS(rcs, 0x80000000); /* type2 nop packet */
break;
default:
break;
}
- if (rcs->cdw > rcs->max_dw) {
+ if (rcs->current.cdw > rcs->current.max_dw) {
fprintf(stderr, "amdgpu: command stream overflowed\n");
}
/* If the CS is not empty or overflowed.... */
- if (cs->main.base.cdw && cs->main.base.cdw <= cs->main.base.max_dw &&
+ if (radeon_emitted(&cs->main.base, 0) &&
+ cs->main.base.current.cdw <= cs->main.base.current.max_dw &&
!debug_get_option_noop()) {
struct amdgpu_cs_context *cur = cs->csc;
unsigned i, num_buffers = cur->num_buffers;
/* Set IB sizes. */
- cur->ib[IB_MAIN].size = cs->main.base.cdw;
- cs->main.used_ib_space += cs->main.base.cdw * 4;
- cs->main.max_ib_size = MAX2(cs->main.max_ib_size, cs->main.base.cdw);
+ cur->ib[IB_MAIN].size = cs->main.base.current.cdw;
+ cs->main.used_ib_space += cs->main.base.current.cdw * 4;
+ cs->main.max_ib_size = MAX2(cs->main.max_ib_size, cs->main.base.prev_dw + cs->main.base.current.cdw);
if (cs->const_ib.ib_mapped) {
- cur->ib[IB_CONST].size = cs->const_ib.base.cdw;
- cs->const_ib.used_ib_space += cs->const_ib.base.cdw * 4;
- cs->const_ib.max_ib_size = MAX2(cs->const_ib.max_ib_size, cs->const_ib.base.cdw);
+ cur->ib[IB_CONST].size = cs->const_ib.base.current.cdw;
+ cs->const_ib.used_ib_space += cs->const_ib.base.current.cdw * 4;
+ cs->const_ib.max_ib_size =
+ MAX2(cs->const_ib.max_ib_size, cs->main.base.prev_dw + cs->const_ib.base.current.cdw);
}
if (cs->const_preamble_ib.ib_mapped) {
- cur->ib[IB_CONST_PREAMBLE].size = cs->const_preamble_ib.base.cdw;
- cs->const_preamble_ib.used_ib_space += cs->const_preamble_ib.base.cdw * 4;
+ cur->ib[IB_CONST_PREAMBLE].size = cs->const_preamble_ib.base.current.cdw;
+ cs->const_preamble_ib.used_ib_space += cs->const_preamble_ib.base.current.cdw * 4;
cs->const_preamble_ib.max_ib_size =
- MAX2(cs->const_preamble_ib.max_ib_size, cs->const_preamble_ib.base.cdw);
+ MAX2(cs->const_preamble_ib.max_ib_size,
+ cs->const_preamble_ib.base.prev_dw + cs->const_preamble_ib.base.current.cdw);
}
/* Create a fence. */
/* Set the first command buffer as current. */
cs->csc = &cs->csc1;
cs->cst = &cs->csc2;
- cs->base.buf = cs->csc->buf;
- cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
+ cs->base.current.buf = cs->csc->buf;
+ cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
cs->ring_type = ring_type;
p_atomic_inc(&ws->num_cs);
return &cs->base;
}
-#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
+#define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value)
static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
enum radeon_bo_domain rd,
} else {
radeon_cs_context_cleanup(cs->csc);
- assert(cs->base.cdw == 0);
- if (cs->base.cdw != 0) {
+ assert(cs->base.current.cdw == 0);
+ if (cs->base.current.cdw != 0) {
fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
}
}
static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
{
- assert(rcs->cdw <= rcs->max_dw);
- return rcs->max_dw - rcs->cdw >= dw;
+ assert(rcs->current.cdw <= rcs->current.max_dw);
+ return rcs->current.max_dw - rcs->current.cdw >= dw;
}
static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
case RING_DMA:
/* pad DMA ring to 8 DWs */
if (cs->ws->info.chip_class <= SI) {
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
} else {
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(&cs->base, 0x00000000); /* NOP packet */
}
break;
* r6xx, requires at least 4 dw alignment to avoid a hw bug.
*/
if (cs->ws->info.gfx_ib_pad_with_type2) {
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
} else {
- while (rcs->cdw & 7)
+ while (rcs->current.cdw & 7)
OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
}
break;
case RING_UVD:
- while (rcs->cdw & 15)
+ while (rcs->current.cdw & 15)
OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
break;
default:
break;
}
- if (rcs->cdw > rcs->max_dw) {
+ if (rcs->current.cdw > rcs->current.max_dw) {
fprintf(stderr, "radeon: command stream overflowed\n");
}
cs->cst = tmp;
/* If the CS is not empty or overflowed, emit it in a separate thread. */
- if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
+ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
unsigned i, crelocs;
crelocs = cs->cst->crelocs;
- cs->cst->chunks[0].length_dw = cs->base.cdw;
+ cs->cst->chunks[0].length_dw = cs->base.current.cdw;
for (i = 0; i < crelocs; i++) {
/* Update the number of active asynchronous CS ioctls for the buffer. */
}
/* Prepare a new CS. */
- cs->base.buf = cs->csc->buf;
- cs->base.cdw = 0;
+ cs->base.current.buf = cs->csc->buf;
+ cs->base.current.cdw = 0;
cs->ws->num_cs_flushes++;
}