2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "radeon/r600_cs.h"
25 #include "util/u_memory.h"
29 void si_pm4_cmd_begin(struct si_pm4_state
*state
, unsigned opcode
)
31 state
->last_opcode
= opcode
;
32 state
->last_pm4
= state
->ndw
++;
35 void si_pm4_cmd_add(struct si_pm4_state
*state
, uint32_t dw
)
37 state
->pm4
[state
->ndw
++] = dw
;
40 void si_pm4_cmd_end(struct si_pm4_state
*state
, bool predicate
)
43 count
= state
->ndw
- state
->last_pm4
- 2;
44 state
->pm4
[state
->last_pm4
] =
45 PKT3(state
->last_opcode
, count
, predicate
);
47 assert(state
->ndw
<= SI_PM4_MAX_DW
);
50 void si_pm4_set_reg(struct si_pm4_state
*state
, unsigned reg
, uint32_t val
)
54 if (reg
>= SI_CONFIG_REG_OFFSET
&& reg
< SI_CONFIG_REG_END
) {
55 opcode
= PKT3_SET_CONFIG_REG
;
56 reg
-= SI_CONFIG_REG_OFFSET
;
58 } else if (reg
>= SI_SH_REG_OFFSET
&& reg
< SI_SH_REG_END
) {
59 opcode
= PKT3_SET_SH_REG
;
60 reg
-= SI_SH_REG_OFFSET
;
62 } else if (reg
>= SI_CONTEXT_REG_OFFSET
&& reg
< SI_CONTEXT_REG_END
) {
63 opcode
= PKT3_SET_CONTEXT_REG
;
64 reg
-= SI_CONTEXT_REG_OFFSET
;
66 } else if (reg
>= CIK_UCONFIG_REG_OFFSET
&& reg
< CIK_UCONFIG_REG_END
) {
67 opcode
= PKT3_SET_UCONFIG_REG
;
68 reg
-= CIK_UCONFIG_REG_OFFSET
;
71 R600_ERR("Invalid register offset %08x!\n", reg
);
77 if (opcode
!= state
->last_opcode
|| reg
!= (state
->last_reg
+ 1)) {
78 si_pm4_cmd_begin(state
, opcode
);
79 si_pm4_cmd_add(state
, reg
);
82 state
->last_reg
= reg
;
83 si_pm4_cmd_add(state
, val
);
84 si_pm4_cmd_end(state
, false);
87 void si_pm4_add_bo(struct si_pm4_state
*state
,
88 struct r600_resource
*bo
,
89 enum radeon_bo_usage usage
,
90 enum radeon_bo_priority priority
)
92 unsigned idx
= state
->nbo
++;
93 assert(idx
< SI_PM4_MAX_BO
);
95 r600_resource_reference(&state
->bo
[idx
], bo
);
96 state
->bo_usage
[idx
] = usage
;
97 state
->bo_priority
[idx
] = priority
;
100 void si_pm4_clear_state(struct si_pm4_state
*state
)
102 for (int i
= 0; i
< state
->nbo
; ++i
)
103 r600_resource_reference(&state
->bo
[i
], NULL
);
104 r600_resource_reference(&state
->indirect_buffer
, NULL
);
109 void si_pm4_free_state(struct si_context
*sctx
,
110 struct si_pm4_state
*state
,
116 if (idx
!= ~0 && sctx
->emitted
.array
[idx
] == state
) {
117 sctx
->emitted
.array
[idx
] = NULL
;
120 si_pm4_clear_state(state
);
124 void si_pm4_emit(struct si_context
*sctx
, struct si_pm4_state
*state
)
126 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx_cs
;
128 for (int i
= 0; i
< state
->nbo
; ++i
) {
129 radeon_add_to_buffer_list(&sctx
->b
, sctx
->b
.gfx_cs
, state
->bo
[i
],
130 state
->bo_usage
[i
], state
->bo_priority
[i
]);
133 if (!state
->indirect_buffer
) {
134 radeon_emit_array(cs
, state
->pm4
, state
->ndw
);
136 struct r600_resource
*ib
= state
->indirect_buffer
;
138 radeon_add_to_buffer_list(&sctx
->b
, sctx
->b
.gfx_cs
, ib
,
142 radeon_emit(cs
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
143 radeon_emit(cs
, ib
->gpu_address
);
144 radeon_emit(cs
, ib
->gpu_address
>> 32);
145 radeon_emit(cs
, (ib
->b
.b
.width0
>> 2) & 0xfffff);
149 void si_pm4_reset_emitted(struct si_context
*sctx
)
151 memset(&sctx
->emitted
, 0, sizeof(sctx
->emitted
));
152 sctx
->dirty_states
|= u_bit_consecutive(0, SI_NUM_STATES
);
155 void si_pm4_upload_indirect_buffer(struct si_context
*sctx
,
156 struct si_pm4_state
*state
)
158 struct pipe_screen
*screen
= sctx
->b
.b
.screen
;
159 unsigned aligned_ndw
= align(state
->ndw
, 8);
161 /* only supported on CIK and later */
162 if (sctx
->b
.chip_class
< CIK
)
166 assert(aligned_ndw
<= SI_PM4_MAX_DW
);
168 r600_resource_reference(&state
->indirect_buffer
, NULL
);
169 /* TODO: this hangs with 1024 or higher alignment on GFX9. */
170 state
->indirect_buffer
= (struct r600_resource
*)
171 si_aligned_buffer_create(screen
, 0,
172 PIPE_USAGE_DEFAULT
, aligned_ndw
* 4,
174 if (!state
->indirect_buffer
)
177 /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
178 if (sctx
->screen
->info
.gfx_ib_pad_with_type2
) {
179 for (int i
= state
->ndw
; i
< aligned_ndw
; i
++)
180 state
->pm4
[i
] = 0x80000000; /* type2 nop packet */
182 for (int i
= state
->ndw
; i
< aligned_ndw
; i
++)
183 state
->pm4
[i
] = 0xffff1000; /* type3 nop packet */
186 pipe_buffer_write(&sctx
->b
.b
, &state
->indirect_buffer
->b
.b
,
187 0, aligned_ndw
*4, state
->pm4
);