r600g: build packet header once
authorJerome Glisse <jglisse@redhat.com>
Mon, 27 Sep 2010 15:53:34 +0000 (11:53 -0400)
committerJerome Glisse <jglisse@redhat.com>
Mon, 27 Sep 2010 15:53:34 +0000 (11:53 -0400)
Build packet header once and allow to add fake register support so
we can handle things like indexed set of register (evergreen sampler
border registers for instance.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600.h
src/gallium/winsys/r600/drm/evergreen_state.c
src/gallium/winsys/r600/drm/r600_state2.c

index 0a0a91eea2aa76d3c28a95203f36e2faedf353c5..486cb29005d9216c39842053ce6047c284c14276 100644 (file)
 #define   S_03000C_DST_SEL_W(x)                        (((x) & 0x7) << 12)
 #define   G_03000C_DST_SEL_W(x)                        (((x) >> 12) & 0x7)
 
+#define R_00A400_TD_PS_SAMPLER0_BORDER_INDEX         0x00A400
+#define R_00A404_TD_PS_SAMPLER0_BORDER_RED           0x00A404
+#define R_00A408_TD_PS_SAMPLER0_BORDER_GREEN         0x00A408
+#define R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE          0x00A40C
+#define R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA         0x00A410
+#define R_00A414_TD_VS_SAMPLER0_BORDER_INDEX         0x00A414
+#define R_00A418_TD_VS_SAMPLER0_BORDER_RED           0x00A418
+#define R_00A41C_TD_VS_SAMPLER0_BORDER_GREEN         0x00A41C
+#define R_00A420_TD_VS_SAMPLER0_BORDER_BLUE          0x00A420
+#define R_00A424_TD_VS_SAMPLER0_BORDER_ALPHA         0x00A424
+#define R_00A428_TD_GS_SAMPLER0_BORDER_INDEX         0x00A428
+#define R_00A42C_TD_GS_SAMPLER0_BORDER_RED           0x00A42C
+#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN         0x00A430
+#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE          0x00A434
+#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA         0x00A438
+
 #define R_03C000_SQ_TEX_SAMPLER_WORD0_0              0x03C000
 #define   S_03C000_CLAMP_X(x)                          (((x) & 0x7) << 0)
 #define   G_03C000_CLAMP_X(x)                          (((x) >> 0) & 0x7)
index 44fae4bcef6f2cc2c987da08d86705fab6f11a87..b8c74675e60b417e5684e8d0863d8805b3a1bb5c 100644 (file)
@@ -137,6 +137,7 @@ enum evergreen_group_id {
        EVERGREEN_GROUP_CTL_CONST,
        EVERGREEN_GROUP_LOOP_CONST,
        EVERGREEN_GROUP_BOOL_CONST,
+       EVERGREEN_GROUP_SAMPLER_BORDER,
        EVERGREEN_NGROUPS
 };
 
@@ -183,6 +184,7 @@ struct r600_group_block {
        unsigned                pm4_ndwords;
        unsigned                nbo;
        unsigned                nreg;
+       u32                     *reg;
        u32                     pm4[R600_BLOCK_MAX_REG];
        unsigned                pm4_bo_index[R600_BLOCK_MAX_REG];
        struct r600_block_reloc reloc[R600_BLOCK_MAX_BO];
index b9d333060fcb6f16c4b5dea7dfce7950c2056857..c2455e3a490442491e6986e3a948b78f9d7893c2 100644 (file)
@@ -53,13 +53,13 @@ struct radeon_ws_bo {
 struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
 
 struct radeon_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned group_id, unsigned offset);
-void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode);
+void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group);
 void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_bo *bo);
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg);
+int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode);
 int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset);
 
 #define GROUP_FORCE_NEW_BLOCK  0
-static const struct r600_reg evergreen_reg_list[] = {
+static const struct r600_reg evergreen_config_reg_list[] = {
        {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
        {0, 0, R_008A14_PA_CL_ENHANCE},
        {0, 0, R_008C00_SQ_CONFIG},
@@ -74,6 +74,9 @@ static const struct r600_reg evergreen_reg_list[] = {
        {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ},
        {0, 0, R_009100_SPI_CONFIG_CNTL},
        {0, 0, R_00913C_SPI_CONFIG_CNTL_1},
+};
+
+static const struct r600_reg evergreen_context_reg_list[] = {
        {0, 0, R_028000_DB_RENDER_CONTROL},
        {0, 0, R_028004_DB_COUNT_CONTROL},
        {0, 0, R_028008_DB_DEPTH_VIEW},
@@ -450,7 +453,7 @@ static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_resource[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_resource, nreg);
+       return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE);
 }
 
 /* SHADER SAMPLER R600/R700 */
@@ -466,7 +469,39 @@ static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_sampler[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_sampler, nreg);
+       return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER);
+}
+
+/* SHADER SAMPLER BORDER R600/R700 */
+static int evergreen_state_sampler_border_init(struct r600_context *ctx, u32 offset, unsigned id)
+{
+       struct r600_reg r600_shader_sampler_border[] = {
+               {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX},
+               {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_RED},
+               {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN},
+               {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE},
+               {0, 0, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA},
+       };
+       unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg);
+       unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x10 + 0x40000 + id * 0x1C;
+       struct r600_group_block *block;
+       struct r600_group *group;
+       int r;
+
+       for (int i = 0; i < nreg; i++) {
+               r600_shader_sampler_border[i].offset -= R_00A400_TD_PS_SAMPLER0_BORDER_INDEX;
+               r600_shader_sampler_border[i].offset += fake_offset;
+       }
+       r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG);
+       if (r) {
+               return r;
+       }
+       /* set proper offset */
+       group = &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER];
+       id = group->offset_block_id[((fake_offset - group->start_offset) >> 2)];
+       block = &group->blocks[id];
+       block->pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+       return 0;
 }
 
 int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
@@ -502,10 +537,22 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
        if (r) {
                goto out_err;
        }
+       /* we use unassigned range of GPU reg to fake border color register */
+       r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER], 0x40000, 0x41000);
+       if (r) {
+               goto out_err;
+       }
        ctx->ngroups = EVERGREEN_NGROUPS;
 
        /* add blocks */
-       r = r600_context_add_block(ctx, evergreen_reg_list, sizeof(evergreen_reg_list)/sizeof(struct r600_reg));
+       r = r600_context_add_block(ctx, evergreen_config_reg_list,
+                                sizeof(evergreen_config_reg_list)/sizeof(struct r600_reg),
+                                       PKT3_SET_CONFIG_REG);
+       if (r)
+               goto out_err;
+       r = r600_context_add_block(ctx, evergreen_context_reg_list,
+                                sizeof(evergreen_context_reg_list)/sizeof(struct r600_reg),
+                                       PKT3_SET_CONTEXT_REG);
        if (r)
                goto out_err;
 
@@ -521,6 +568,18 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
                if (r)
                        goto out_err;
        }
+       /* PS SAMPLER BORDER */
+       for (int j = 0; j < 18; j++) {
+               r = evergreen_state_sampler_border_init(ctx, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j);
+               if (r)
+                       goto out_err;
+       }
+       /* VS SAMPLER BORDER */
+       for (int j = 0; j < 18; j++) {
+               r = evergreen_state_sampler_border_init(ctx, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j);
+               if (r)
+                       goto out_err;
+       }
        /* PS RESOURCE */
        for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) {
                r = evergreen_state_resource_init(ctx, offset);
@@ -566,14 +625,14 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context
        offset -= ctx->groups[EVERGREEN_GROUP_RESOURCE].start_offset;
        id = ctx->groups[EVERGREEN_GROUP_RESOURCE].offset_block_id[offset >> 2];
        block = &ctx->groups[EVERGREEN_GROUP_RESOURCE].blocks[id];
-       block->pm4[0] = state->regs[0].value;
-       block->pm4[1] = state->regs[1].value;
-       block->pm4[2] = state->regs[2].value;
-       block->pm4[3] = state->regs[3].value;
-       block->pm4[4] = state->regs[4].value;
-       block->pm4[5] = state->regs[5].value;
-       block->pm4[6] = state->regs[6].value;
-       block->pm4[7] = state->regs[7].value;
+       block->reg[0] = state->regs[0].value;
+       block->reg[1] = state->regs[1].value;
+       block->reg[2] = state->regs[2].value;
+       block->reg[3] = state->regs[3].value;
+       block->reg[4] = state->regs[4].value;
+       block->reg[5] = state->regs[5].value;
+       block->reg[6] = state->regs[6].value;
+       block->reg[7] = state->regs[7].value;
        radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
        radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
        if (state->regs[0].bo) {
@@ -589,7 +648,7 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context
        }
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
@@ -614,29 +673,29 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context
        offset -= ctx->groups[EVERGREEN_GROUP_SAMPLER].start_offset;
        id = ctx->groups[EVERGREEN_GROUP_SAMPLER].offset_block_id[offset >> 2];
        block = &ctx->groups[EVERGREEN_GROUP_SAMPLER].blocks[id];
-       block->pm4[0] = state->regs[0].value;
-       block->pm4[1] = state->regs[1].value;
-       block->pm4[2] = state->regs[2].value;
+       block->reg[0] = state->regs[0].value;
+       block->reg[1] = state->regs[1].value;
+       block->reg[2] = state->regs[2].value;
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
-static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
 {
        struct r600_group_block *block;
-       unsigned id;
-
-       offset -= ctx->groups[EVERGREEN_GROUP_CONFIG].start_offset;
-       id = ctx->groups[EVERGREEN_GROUP_CONFIG].offset_block_id[offset >> 2];
-       block = &ctx->groups[EVERGREEN_GROUP_CONFIG].blocks[id];
-       block->pm4[0] = state->regs[3].value;
-       block->pm4[1] = state->regs[4].value;
-       block->pm4[2] = state->regs[5].value;
-       block->pm4[3] = state->regs[6].value;
+       unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x10 + 0x40000 + id * 0x1C;
+
+       fake_offset -= ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].start_offset;
+       id = ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].offset_block_id[fake_offset >> 2];
+       block = &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].blocks[id];
+       block->reg[0] = id;
+       block->reg[1] = state->regs[3].value;
+       block->reg[2] = state->regs[4].value;
+       block->reg[3] = state->regs[5].value;
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
@@ -646,8 +705,7 @@ void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struc
        offset = 0x0003C000 + id * 0xc;
        evergreen_context_pipe_state_set_sampler(ctx, state, offset);
        if (state->nregs > 3) {
-               offset = 0x0000A400 + id * 0x10;
-               //              evergreen_context_pipe_state_set_sampler_border(ctx, state, offset);
+               evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, id);
        }
 }
 
@@ -658,8 +716,7 @@ void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struc
        offset = 0x0003C0D8 + id * 0xc;
        evergreen_context_pipe_state_set_sampler(ctx, state, offset);
        if (state->nregs > 3) {
-               offset = 0x0000A600 + id * 0x10;
-               //              evergreen_context_pipe_state_set_sampler_border(ctx, state, offset);
+               evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, id);
        }
 }
 
@@ -719,10 +776,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
        }
 
        /* enough room to copy packet */
-       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONFIG], PKT3_SET_CONFIG_REG);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_RESOURCE], PKT3_SET_RESOURCE);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_SAMPLER], PKT3_SET_SAMPLER);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONFIG]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONTEXT]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_RESOURCE]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_SAMPLER]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER]);
 
        /* draw packet */
        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
@@ -773,14 +831,14 @@ static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_
        offset -= ctx->groups[EVERGREEN_GROUP_RESOURCE].start_offset;
        id = ctx->groups[EVERGREEN_GROUP_RESOURCE].offset_block_id[offset >> 2];
        block = &ctx->groups[EVERGREEN_GROUP_RESOURCE].blocks[id];
-       block->pm4[0] = state->regs[0].value;
-       block->pm4[1] = state->regs[1].value;
-       block->pm4[2] = state->regs[2].value;
-       block->pm4[3] = state->regs[3].value;
-       block->pm4[4] = state->regs[4].value;
-       block->pm4[5] = state->regs[5].value;
-       block->pm4[6] = state->regs[6].value;
-       block->pm4[7] = state->regs[7].value;
+       block->reg[0] = state->regs[0].value;
+       block->reg[1] = state->regs[1].value;
+       block->reg[2] = state->regs[2].value;
+       block->reg[3] = state->regs[3].value;
+       block->reg[4] = state->regs[4].value;
+       block->reg[5] = state->regs[5].value;
+       block->reg[6] = state->regs[6].value;
+       block->reg[7] = state->regs[7].value;
        radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
        radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
        if (state->regs[0].bo) {
@@ -796,7 +854,7 @@ static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_
        }
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
index d60c37fc90da9ab4d1d4cdf6f8e021e331b6929a..c4b45e565c5d83626f1db8aae65d5d3797910bfc 100644 (file)
@@ -75,7 +75,7 @@ static int r600_group_id_register_offset(struct r600_context *ctx, unsigned offs
        return -1;
 }
 
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg)
+int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode)
 {
        struct r600_group_block *block, *tmp;
        struct r600_group *group;
@@ -109,6 +109,12 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
                if (tmp == NULL) {
                        return -ENOMEM;
                }
+               /* update reg pointer */
+               if (tmp != group->blocks) {
+                       for (int j = 0; j < group->nblocks; j++) {
+                               tmp[j].reg = &tmp[j].pm4[2];
+                       }
+               }
                group->blocks = tmp;
                block = &group->blocks[group->nblocks++];
                for (int j = 0; j < n; j++) {
@@ -118,7 +124,10 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
                /* initialize block */
                memset(block, 0, sizeof(struct r600_group_block));
                block->start_offset = reg[i].offset;
-               block->pm4_ndwords = n;
+               block->pm4[block->pm4_ndwords++] = PKT3(opcode, n);
+               block->pm4[block->pm4_ndwords++] = (block->start_offset - group->start_offset) >> 2;
+               block->reg = &block->pm4[block->pm4_ndwords];
+               block->pm4_ndwords += n;
                block->nreg = n;
                for (j = 0; j < n; j++) {
                        if (reg[i+j].need_bo) {
@@ -168,7 +177,8 @@ static void r600_group_fini(struct r600_group *group)
 }
 
 /* R600/R700 configuration */
-static const struct r600_reg r600_reg_list[] = {
+static const struct r600_reg r600_config_reg_list[] = {
+       {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
        {0, 0, R_008C00_SQ_CONFIG},
        {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1},
        {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2},
@@ -180,6 +190,9 @@ static const struct r600_reg r600_reg_list[] = {
        {0, 0, R_009714_VC_ENHANCE},
        {0, 0, R_009830_DB_DEBUG},
        {0, 0, R_009838_DB_WATERMARKS},
+};
+
+static const struct r600_reg r600_context_reg_list[] = {
        {0, 0, R_028350_SX_MISC},
        {0, 0, R_0286C8_SPI_THREAD_GROUPING},
        {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE},
@@ -509,7 +522,6 @@ static const struct r600_reg r600_reg_list[] = {
        {0, 0, R_028850_SQ_PGM_RESOURCES_PS},
        {0, 0, R_028854_SQ_PGM_EXPORTS_PS},
        {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS},
-       {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
        {0, 0, R_028400_VGT_MAX_VTX_INDX},
        {0, 0, R_028404_VGT_MIN_VTX_INDX},
        {0, 0, R_028408_VGT_INDX_OFFSET},
@@ -534,7 +546,7 @@ static int r600_state_constant_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_constant[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_constant, nreg);
+       return r600_context_add_block(ctx, r600_shader_constant, nreg, PKT3_SET_ALU_CONST);
 }
 
 /* SHADER RESOURCE R600/R700 */
@@ -554,7 +566,7 @@ static int r600_state_resource_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_resource[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_resource, nreg);
+       return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE);
 }
 
 /* SHADER SAMPLER R600/R700 */
@@ -570,7 +582,7 @@ static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_sampler[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_sampler, nreg);
+       return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER);
 }
 
 /* SHADER SAMPLER BORDER R600/R700 */
@@ -587,7 +599,7 @@ static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset)
        for (int i = 0; i < nreg; i++) {
                r600_shader_sampler_border[i].offset += offset;
        }
-       return r600_context_add_block(ctx, r600_shader_sampler_border, nreg);
+       return r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG);
 }
 
 /* initialize */
@@ -644,7 +656,14 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
        ctx->ngroups = R600_NGROUPS;
 
        /* add blocks */
-       r = r600_context_add_block(ctx, r600_reg_list, sizeof(r600_reg_list)/sizeof(struct r600_reg));
+       r = r600_context_add_block(ctx, r600_config_reg_list,
+                               sizeof(r600_config_reg_list)/sizeof(struct r600_reg),
+                               PKT3_SET_CONFIG_REG);
+       if (r)
+               goto out_err;
+       r = r600_context_add_block(ctx, r600_context_reg_list,
+                               sizeof(r600_context_reg_list)/sizeof(struct r600_reg),
+                               PKT3_SET_CONTEXT_REG);
        if (r)
                goto out_err;
 
@@ -762,8 +781,8 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
                id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2];
                block = &group->blocks[id];
                id = (state->regs[i].offset - block->start_offset) >> 2;
-               block->pm4[id] &= ~state->regs[i].mask;
-               block->pm4[id] |= state->regs[i].value;
+               block->reg[id] &= ~state->regs[i].mask;
+               block->reg[id] |= state->regs[i].value;
                if (block->pm4_bo_index[id]) {
                        /* find relocation */
                        id = block->pm4_bo_index[id];
@@ -771,7 +790,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
                }
                block->status |= R600_BLOCK_STATUS_ENABLED;
                block->status |= R600_BLOCK_STATUS_DIRTY;
-               ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+               ctx->pm4_dirty_cdwords += block->pm4_ndwords;
        }
 }
 
@@ -783,13 +802,13 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx
        offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset;
        id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2];
        block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id];
-       block->pm4[0] = state->regs[0].value;
-       block->pm4[1] = state->regs[1].value;
-       block->pm4[2] = state->regs[2].value;
-       block->pm4[3] = state->regs[3].value;
-       block->pm4[4] = state->regs[4].value;
-       block->pm4[5] = state->regs[5].value;
-       block->pm4[6] = state->regs[6].value;
+       block->reg[0] = state->regs[0].value;
+       block->reg[1] = state->regs[1].value;
+       block->reg[2] = state->regs[2].value;
+       block->reg[3] = state->regs[3].value;
+       block->reg[4] = state->regs[4].value;
+       block->reg[5] = state->regs[5].value;
+       block->reg[6] = state->regs[6].value;
        radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
        radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
        if (state->regs[0].bo) {
@@ -805,7 +824,7 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx
        }
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
@@ -830,12 +849,12 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx,
        offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset;
        id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2];
        block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id];
-       block->pm4[0] = state->regs[0].value;
-       block->pm4[1] = state->regs[1].value;
-       block->pm4[2] = state->regs[2].value;
+       block->reg[0] = state->regs[0].value;
+       block->reg[1] = state->regs[1].value;
+       block->reg[2] = state->regs[2].value;
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
@@ -846,13 +865,13 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
        offset -= ctx->groups[R600_GROUP_CONFIG].start_offset;
        id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2];
        block = &ctx->groups[R600_GROUP_CONFIG].blocks[id];
-       block->pm4[0] = state->regs[3].value;
-       block->pm4[1] = state->regs[4].value;
-       block->pm4[2] = state->regs[5].value;
-       block->pm4[3] = state->regs[6].value;
+       block->reg[0] = state->regs[3].value;
+       block->reg[1] = state->regs[4].value;
+       block->reg[2] = state->regs[5].value;
+       block->reg[3] = state->regs[6].value;
        block->status |= R600_BLOCK_STATUS_ENABLED;
        block->status |= R600_BLOCK_STATUS_DIRTY;
-       ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+       ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 }
 
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
@@ -879,7 +898,7 @@ void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r60
        }
 }
 
-void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode)
+void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group)
 {
        struct radeon_bo *bo;
        int id;
@@ -898,8 +917,6 @@ void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *
                                }
                        }
 
-                       ctx->pm4[ctx->pm4_cdwords++] = PKT3(opcode, block->nreg);
-                       ctx->pm4[ctx->pm4_cdwords++] = (block->start_offset - group->start_offset) >> 2;
                        memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
                        ctx->pm4_cdwords += block->pm4_ndwords;
                        block->status ^= R600_BLOCK_STATUS_DIRTY;
@@ -968,11 +985,11 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
        }
 
        /* enough room to copy packet */
-       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE], PKT3_SET_RESOURCE);
-       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER], PKT3_SET_SAMPLER);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE]);
+       r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER]);
 
        /* draw packet */
        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
@@ -1063,7 +1080,7 @@ void r600_context_flush(struct r600_context *ctx)
                        /* mark enabled block as dirty */
                        block = &ctx->groups[i].blocks[j];
                        if (block->status & R600_BLOCK_STATUS_ENABLED) {
-                               ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
+                               ctx->pm4_dirty_cdwords += block->pm4_ndwords;
                                block->status |= R600_BLOCK_STATUS_DIRTY;
                        }
                }