2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Christian König <christian.koenig@amd.com>
27 #include "util/u_memory.h"
28 #include "radeonsi_pipe.h"
29 #include "radeonsi_pm4.h"
31 #include "r600_hw_context_priv.h"
33 #define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
35 void si_pm4_set_reg(struct si_pm4_state
*state
, unsigned reg
, uint32_t val
)
37 unsigned opcode
, count
;
39 if (reg
>= SI_CONFIG_REG_OFFSET
&& reg
<= SI_CONFIG_REG_END
) {
40 opcode
= PKT3_SET_CONFIG_REG
;
41 reg
-= SI_CONFIG_REG_OFFSET
;
43 } else if (reg
>= SI_SH_REG_OFFSET
&& reg
<= SI_SH_REG_END
) {
44 opcode
= PKT3_SET_SH_REG
;
45 reg
-= SI_SH_REG_OFFSET
;
47 } else if (reg
>= SI_CONTEXT_REG_OFFSET
&& reg
<= SI_CONTEXT_REG_END
) {
48 opcode
= PKT3_SET_CONTEXT_REG
;
49 reg
-= SI_CONTEXT_REG_OFFSET
;
51 R600_ERR("Invalid register offset %08x!\n", reg
);
57 if (opcode
!= state
->last_opcode
|| reg
!= (state
->last_reg
+ 1)) {
58 state
->last_opcode
= opcode
;
59 state
->last_pm4
= state
->ndw
++;
60 state
->pm4
[state
->ndw
++] = reg
;
63 state
->last_reg
= reg
;
64 count
= state
->ndw
- state
->last_pm4
- 1;
65 state
->pm4
[state
->last_pm4
] = PKT3(opcode
, count
, 0);
66 state
->pm4
[state
->ndw
++] = val
;
68 assert(state
->ndw
<= SI_PM4_MAX_DW
);
71 void si_pm4_add_bo(struct si_pm4_state
*state
,
72 struct si_resource
*bo
,
73 enum radeon_bo_usage usage
)
75 unsigned idx
= state
->nbo
++;
76 assert(idx
< SI_PM4_MAX_BO
);
78 si_resource_reference(&state
->bo
[idx
], bo
);
79 state
->bo_usage
[idx
] = usage
;
82 void si_pm4_inval_shader_cache(struct si_pm4_state
*state
)
84 state
->cp_coher_cntl
|= S_0085F0_SH_ICACHE_ACTION_ENA(1);
85 state
->cp_coher_cntl
|= S_0085F0_SH_KCACHE_ACTION_ENA(1);
88 void si_pm4_inval_texture_cache(struct si_pm4_state
*state
)
90 state
->cp_coher_cntl
|= S_0085F0_TC_ACTION_ENA(1);
93 void si_pm4_inval_vertex_cache(struct si_pm4_state
*state
)
95 /* Some GPUs don't have the vertex cache and must use the texture cache instead. */
96 state
->cp_coher_cntl
|= S_0085F0_TC_ACTION_ENA(1);
99 void si_pm4_inval_fb_cache(struct si_pm4_state
*state
, unsigned nr_cbufs
)
101 state
->cp_coher_cntl
|= S_0085F0_CB_ACTION_ENA(1);
102 state
->cp_coher_cntl
|= ((1 << nr_cbufs
) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT
;
105 void si_pm4_inval_zsbuf_cache(struct si_pm4_state
*state
)
107 state
->cp_coher_cntl
|= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
110 void si_pm4_free_state(struct r600_context
*rctx
,
111 struct si_pm4_state
*state
,
117 if (rctx
->emitted
.array
[idx
] == state
) {
118 rctx
->emitted
.array
[idx
] = NULL
;
121 for (int i
= 0; i
< state
->nbo
; ++i
) {
122 si_resource_reference(&state
->bo
[idx
], NULL
);
127 unsigned si_pm4_dirty_dw(struct r600_context
*rctx
)
130 uint32_t cp_coher_cntl
= 0;
132 for (int i
= 0; i
< NUMBER_OF_STATES
; ++i
) {
133 struct si_pm4_state
*state
= rctx
->queued
.array
[i
];
135 if (!state
|| rctx
->emitted
.array
[i
] == state
)
139 cp_coher_cntl
|= state
->cp_coher_cntl
;
143 rctx
->atom_surface_sync
.flush_flags
|= cp_coher_cntl
;
144 r600_atom_dirty(rctx
, &rctx
->atom_surface_sync
.atom
);
148 void si_pm4_emit_dirty(struct r600_context
*rctx
)
150 struct radeon_winsys_cs
*cs
= rctx
->cs
;
152 for (int i
= 0; i
< NUMBER_OF_STATES
; ++i
) {
153 struct si_pm4_state
*state
= rctx
->queued
.array
[i
];
155 if (!state
|| rctx
->emitted
.array
[i
] == state
)
158 for (int j
= 0; j
< state
->nbo
; ++j
) {
159 r600_context_bo_reloc(rctx
, state
->bo
[j
],
163 memcpy(&cs
->buf
[cs
->cdw
], state
->pm4
, state
->ndw
* 4);
164 cs
->cdw
+= state
->ndw
;
166 rctx
->emitted
.array
[i
] = state
;
170 void si_pm4_reset_emitted(struct r600_context
*rctx
)
172 memset(&rctx
->emitted
, 0, sizeof(rctx
->emitted
));