2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Christian König <christian.koenig@amd.com>
27 #include "../radeon/r600_cs.h"
28 #include "util/u_memory.h"
33 #define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
35 void si_pm4_cmd_begin(struct si_pm4_state
*state
, unsigned opcode
)
37 state
->last_opcode
= opcode
;
38 state
->last_pm4
= state
->ndw
++;
41 void si_pm4_cmd_add(struct si_pm4_state
*state
, uint32_t dw
)
43 state
->pm4
[state
->ndw
++] = dw
;
46 void si_pm4_cmd_end(struct si_pm4_state
*state
, bool predicate
)
49 count
= state
->ndw
- state
->last_pm4
- 2;
50 state
->pm4
[state
->last_pm4
] =
51 PKT3(state
->last_opcode
, count
, predicate
)
52 | PKT3_SHADER_TYPE_S(state
->compute_pkt
);
54 assert(state
->ndw
<= SI_PM4_MAX_DW
);
57 void si_pm4_set_reg(struct si_pm4_state
*state
, unsigned reg
, uint32_t val
)
61 if (reg
>= SI_CONFIG_REG_OFFSET
&& reg
< SI_CONFIG_REG_END
) {
62 opcode
= PKT3_SET_CONFIG_REG
;
63 reg
-= SI_CONFIG_REG_OFFSET
;
65 } else if (reg
>= SI_SH_REG_OFFSET
&& reg
< SI_SH_REG_END
) {
66 opcode
= PKT3_SET_SH_REG
;
67 reg
-= SI_SH_REG_OFFSET
;
69 } else if (reg
>= SI_CONTEXT_REG_OFFSET
&& reg
< SI_CONTEXT_REG_END
) {
70 opcode
= PKT3_SET_CONTEXT_REG
;
71 reg
-= SI_CONTEXT_REG_OFFSET
;
73 } else if (reg
>= CIK_UCONFIG_REG_OFFSET
&& reg
< CIK_UCONFIG_REG_END
) {
74 opcode
= PKT3_SET_UCONFIG_REG
;
75 reg
-= CIK_UCONFIG_REG_OFFSET
;
78 R600_ERR("Invalid register offset %08x!\n", reg
);
84 if (opcode
!= state
->last_opcode
|| reg
!= (state
->last_reg
+ 1)) {
85 si_pm4_cmd_begin(state
, opcode
);
86 si_pm4_cmd_add(state
, reg
);
89 state
->last_reg
= reg
;
90 si_pm4_cmd_add(state
, val
);
91 si_pm4_cmd_end(state
, false);
94 void si_pm4_add_bo(struct si_pm4_state
*state
,
95 struct r600_resource
*bo
,
96 enum radeon_bo_usage usage
)
98 unsigned idx
= state
->nbo
++;
99 assert(idx
< SI_PM4_MAX_BO
);
101 r600_resource_reference(&state
->bo
[idx
], bo
);
102 state
->bo_usage
[idx
] = usage
;
105 void si_pm4_sh_data_begin(struct si_pm4_state
*state
)
107 si_pm4_cmd_begin(state
, PKT3_NOP
);
110 void si_pm4_sh_data_add(struct si_pm4_state
*state
, uint32_t dw
)
112 si_pm4_cmd_add(state
, dw
);
115 void si_pm4_sh_data_end(struct si_pm4_state
*state
, unsigned base
, unsigned idx
)
117 unsigned offs
= state
->last_pm4
+ 1;
118 unsigned reg
= base
+ idx
* 4;
120 /* Bail if no data was added */
121 if (state
->ndw
== offs
) {
126 si_pm4_cmd_end(state
, false);
128 si_pm4_cmd_begin(state
, PKT3_SET_SH_REG_OFFSET
);
129 si_pm4_cmd_add(state
, (reg
- SI_SH_REG_OFFSET
) >> 2);
130 state
->relocs
[state
->nrelocs
++] = state
->ndw
;
131 si_pm4_cmd_add(state
, offs
<< 2);
132 si_pm4_cmd_add(state
, 0);
133 si_pm4_cmd_end(state
, false);
136 void si_pm4_inval_shader_cache(struct si_pm4_state
*state
)
138 state
->cp_coher_cntl
|= S_0085F0_SH_ICACHE_ACTION_ENA(1);
139 state
->cp_coher_cntl
|= S_0085F0_SH_KCACHE_ACTION_ENA(1);
142 void si_pm4_inval_texture_cache(struct si_pm4_state
*state
)
144 state
->cp_coher_cntl
|= S_0085F0_TC_ACTION_ENA(1);
145 state
->cp_coher_cntl
|= S_0085F0_TCL1_ACTION_ENA(1);
148 void si_pm4_free_state(struct si_context
*sctx
,
149 struct si_pm4_state
*state
,
155 if (idx
!= ~0 && sctx
->emitted
.array
[idx
] == state
) {
156 sctx
->emitted
.array
[idx
] = NULL
;
159 for (int i
= 0; i
< state
->nbo
; ++i
) {
160 r600_resource_reference(&state
->bo
[i
], NULL
);
165 struct si_pm4_state
* si_pm4_alloc_state(struct si_context
*sctx
)
167 struct si_pm4_state
*pm4
= CALLOC_STRUCT(si_pm4_state
);
172 pm4
->chip_class
= sctx
->b
.chip_class
;
177 uint32_t si_pm4_sync_flags(struct si_context
*sctx
)
179 uint32_t cp_coher_cntl
= 0;
181 for (int i
= 0; i
< NUMBER_OF_STATES
; ++i
) {
182 struct si_pm4_state
*state
= sctx
->queued
.array
[i
];
184 if (!state
|| sctx
->emitted
.array
[i
] == state
)
187 cp_coher_cntl
|= state
->cp_coher_cntl
;
189 return cp_coher_cntl
;
192 unsigned si_pm4_dirty_dw(struct si_context
*sctx
)
196 for (int i
= 0; i
< NUMBER_OF_STATES
; ++i
) {
197 struct si_pm4_state
*state
= sctx
->queued
.array
[i
];
199 if (!state
|| sctx
->emitted
.array
[i
] == state
)
204 /* for tracing each states */
205 if (sctx
->screen
->b
.trace_bo
) {
206 count
+= SI_TRACE_CS_DWORDS
;
214 void si_pm4_emit(struct si_context
*sctx
, struct si_pm4_state
*state
)
216 struct radeon_winsys_cs
*cs
= sctx
->b
.rings
.gfx
.cs
;
217 for (int i
= 0; i
< state
->nbo
; ++i
) {
218 r600_context_bo_reloc(&sctx
->b
, &sctx
->b
.rings
.gfx
, state
->bo
[i
],
222 memcpy(&cs
->buf
[cs
->cdw
], state
->pm4
, state
->ndw
* 4);
224 for (int i
= 0; i
< state
->nrelocs
; ++i
) {
225 cs
->buf
[cs
->cdw
+ state
->relocs
[i
]] += cs
->cdw
<< 2;
228 cs
->cdw
+= state
->ndw
;
231 if (sctx
->screen
->b
.trace_bo
) {
237 void si_pm4_emit_dirty(struct si_context
*sctx
)
239 for (int i
= 0; i
< NUMBER_OF_STATES
; ++i
) {
240 struct si_pm4_state
*state
= sctx
->queued
.array
[i
];
242 if (!state
|| sctx
->emitted
.array
[i
] == state
)
245 assert(state
!= sctx
->queued
.named
.init
);
246 si_pm4_emit(sctx
, state
);
247 sctx
->emitted
.array
[i
] = state
;
251 void si_pm4_reset_emitted(struct si_context
*sctx
)
253 memset(&sctx
->emitted
, 0, sizeof(sctx
->emitted
));