2 * Copyright 2012 Francisco Jerez
3 * Copyright 2015 Samuel Pitoiset
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_compute.xml.h"
30 #include "codegen/nv50_ir_driver.h"
33 nv50_screen_compute_setup(struct nv50_screen
*screen
,
34 struct nouveau_pushbuf
*push
)
36 struct nouveau_device
*dev
= screen
->base
.device
;
37 struct nouveau_object
*chan
= screen
->base
.channel
;
38 struct nv04_fifo
*fifo
= (struct nv04_fifo
*)chan
->data
;
42 switch (dev
->chipset
& 0xf0) {
46 obj_class
= NV50_COMPUTE_CLASS
;
49 switch (dev
->chipset
) {
53 obj_class
= NVA3_COMPUTE_CLASS
;
56 obj_class
= NV50_COMPUTE_CLASS
;
61 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev
->chipset
);
65 ret
= nouveau_object_new(chan
, 0xbeef50c0, obj_class
, NULL
, 0,
70 BEGIN_NV04(push
, SUBC_CP(NV01_SUBCHAN_OBJECT
), 1);
71 PUSH_DATA (push
, screen
->compute
->handle
);
73 BEGIN_NV04(push
, NV50_CP(UNK02A0
), 1);
75 BEGIN_NV04(push
, NV50_CP(DMA_STACK
), 1);
76 PUSH_DATA (push
, fifo
->vram
);
77 BEGIN_NV04(push
, NV50_CP(STACK_ADDRESS_HIGH
), 2);
78 PUSH_DATAh(push
, screen
->stack_bo
->offset
);
79 PUSH_DATA (push
, screen
->stack_bo
->offset
);
80 BEGIN_NV04(push
, NV50_CP(STACK_SIZE_LOG
), 1);
83 BEGIN_NV04(push
, NV50_CP(UNK0290
), 1);
85 BEGIN_NV04(push
, NV50_CP(LANES32_ENABLE
), 1);
87 BEGIN_NV04(push
, NV50_CP(REG_MODE
), 1);
88 PUSH_DATA (push
, NV50_COMPUTE_REG_MODE_STRIPED
);
89 BEGIN_NV04(push
, NV50_CP(UNK0384
), 1);
90 PUSH_DATA (push
, 0x100);
91 BEGIN_NV04(push
, NV50_CP(DMA_GLOBAL
), 1);
92 PUSH_DATA (push
, fifo
->vram
);
94 for (i
= 0; i
< 15; i
++) {
95 BEGIN_NV04(push
, NV50_CP(GLOBAL_ADDRESS_HIGH(i
)), 2);
98 BEGIN_NV04(push
, NV50_CP(GLOBAL_LIMIT(i
)), 1);
100 BEGIN_NV04(push
, NV50_CP(GLOBAL_MODE(i
)), 1);
101 PUSH_DATA (push
, NV50_COMPUTE_GLOBAL_MODE_LINEAR
);
104 BEGIN_NV04(push
, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
107 BEGIN_NV04(push
, NV50_CP(GLOBAL_LIMIT(15)), 1);
108 PUSH_DATA (push
, ~0);
109 BEGIN_NV04(push
, NV50_CP(GLOBAL_MODE(15)), 1);
110 PUSH_DATA (push
, NV50_COMPUTE_GLOBAL_MODE_LINEAR
);
112 BEGIN_NV04(push
, NV50_CP(LOCAL_WARPS_LOG_ALLOC
), 1);
114 BEGIN_NV04(push
, NV50_CP(LOCAL_WARPS_NO_CLAMP
), 1);
116 BEGIN_NV04(push
, NV50_CP(STACK_WARPS_LOG_ALLOC
), 1);
118 BEGIN_NV04(push
, NV50_CP(STACK_WARPS_NO_CLAMP
), 1);
120 BEGIN_NV04(push
, NV50_CP(USER_PARAM_COUNT
), 1);
123 BEGIN_NV04(push
, NV50_CP(DMA_TEXTURE
), 1);
124 PUSH_DATA (push
, fifo
->vram
);
125 BEGIN_NV04(push
, NV50_CP(TEX_LIMITS
), 1);
126 PUSH_DATA (push
, 0x54);
127 BEGIN_NV04(push
, NV50_CP(LINKED_TSC
), 1);
130 BEGIN_NV04(push
, NV50_CP(DMA_TIC
), 1);
131 PUSH_DATA (push
, fifo
->vram
);
132 BEGIN_NV04(push
, NV50_CP(TIC_ADDRESS_HIGH
), 3);
133 PUSH_DATAh(push
, screen
->txc
->offset
);
134 PUSH_DATA (push
, screen
->txc
->offset
);
135 PUSH_DATA (push
, NV50_TIC_MAX_ENTRIES
- 1);
137 BEGIN_NV04(push
, NV50_CP(DMA_TSC
), 1);
138 PUSH_DATA (push
, fifo
->vram
);
139 BEGIN_NV04(push
, NV50_CP(TSC_ADDRESS_HIGH
), 3);
140 PUSH_DATAh(push
, screen
->txc
->offset
+ 65536);
141 PUSH_DATA (push
, screen
->txc
->offset
+ 65536);
142 PUSH_DATA (push
, NV50_TSC_MAX_ENTRIES
- 1);
144 BEGIN_NV04(push
, NV50_CP(DMA_CODE_CB
), 1);
145 PUSH_DATA (push
, fifo
->vram
);
147 BEGIN_NV04(push
, NV50_CP(DMA_LOCAL
), 1);
148 PUSH_DATA (push
, fifo
->vram
);
149 BEGIN_NV04(push
, NV50_CP(LOCAL_ADDRESS_HIGH
), 2);
150 PUSH_DATAh(push
, screen
->tls_bo
->offset
+ 65536);
151 PUSH_DATA (push
, screen
->tls_bo
->offset
+ 65536);
152 BEGIN_NV04(push
, NV50_CP(LOCAL_SIZE_LOG
), 1);
153 PUSH_DATA (push
, util_logbase2((screen
->max_tls_space
/ ONE_TEMP_SIZE
) * 2));
159 nv50_compute_validate_globals(struct nv50_context
*nv50
)
163 for (i
= 0; i
< nv50
->global_residents
.size
/ sizeof(struct pipe_resource
*);
165 struct pipe_resource
*res
= *util_dynarray_element(
166 &nv50
->global_residents
, struct pipe_resource
*, i
);
168 nv50_add_bufctx_resident(nv50
->bufctx_cp
, NV50_BIND_CP_GLOBAL
,
169 nv04_resource(res
), NOUVEAU_BO_RDWR
);
173 static struct nv50_state_validate
174 validate_list_cp
[] = {
175 { nv50_compprog_validate
, NV50_NEW_CP_PROGRAM
},
176 { nv50_compute_validate_globals
, NV50_NEW_CP_GLOBALS
},
180 nv50_state_validate_cp(struct nv50_context
*nv50
, uint32_t mask
)
184 /* TODO: validate textures, samplers, surfaces */
185 ret
= nv50_state_validate(nv50
, mask
, validate_list_cp
,
186 ARRAY_SIZE(validate_list_cp
), &nv50
->dirty_cp
,
189 if (unlikely(nv50
->state
.flushed
))
190 nv50_bufctx_fence(nv50
->bufctx_cp
, true);
195 nv50_compute_upload_input(struct nv50_context
*nv50
, const uint32_t *input
)
197 struct nv50_screen
*screen
= nv50
->screen
;
198 struct nouveau_pushbuf
*push
= screen
->base
.pushbuf
;
199 unsigned size
= align(nv50
->compprog
->parm_size
, 0x4);
201 BEGIN_NV04(push
, NV50_CP(USER_PARAM_COUNT
), 1);
202 PUSH_DATA (push
, (size
/ 4) << 8);
205 struct nouveau_mm_allocation
*mm
;
206 struct nouveau_bo
*bo
= NULL
;
209 mm
= nouveau_mm_allocate(screen
->base
.mm_GART
, size
, &bo
, &offset
);
212 nouveau_bo_map(bo
, 0, screen
->base
.client
);
213 memcpy(bo
->map
+ offset
, input
, size
);
215 nouveau_bufctx_refn(nv50
->bufctx
, 0, bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
216 nouveau_pushbuf_bufctx(push
, nv50
->bufctx
);
217 nouveau_pushbuf_validate(push
);
219 BEGIN_NV04(push
, NV50_CP(USER_PARAM(0)), size
/ 4);
220 nouveau_pushbuf_data(push
, bo
, offset
, size
);
222 nouveau_fence_work(screen
->base
.fence
.current
, nouveau_mm_free_work
, mm
);
223 nouveau_bo_ref(NULL
, &bo
);
224 nouveau_bufctx_reset(nv50
->bufctx
, 0);
229 nv50_compute_find_symbol(struct nv50_context
*nv50
, uint32_t label
)
231 struct nv50_program
*prog
= nv50
->compprog
;
232 const struct nv50_ir_prog_symbol
*syms
=
233 (const struct nv50_ir_prog_symbol
*)prog
->cp
.syms
;
236 for (i
= 0; i
< prog
->cp
.num_syms
; ++i
) {
237 if (syms
[i
].label
== label
)
238 return prog
->code_base
+ syms
[i
].offset
;
240 return prog
->code_base
; /* no symbols or symbol not found */
244 nv50_launch_grid(struct pipe_context
*pipe
, const struct pipe_grid_info
*info
)
246 struct nv50_context
*nv50
= nv50_context(pipe
);
247 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
248 unsigned block_size
= info
->block
[0] * info
->block
[1] * info
->block
[2];
249 struct nv50_program
*cp
= nv50
->compprog
;
252 ret
= !nv50_state_validate_cp(nv50
, ~0);
254 NOUVEAU_ERR("Failed to launch grid !\n");
258 nv50_compute_upload_input(nv50
, info
->input
);
260 BEGIN_NV04(push
, NV50_CP(CP_START_ID
), 1);
261 PUSH_DATA (push
, nv50_compute_find_symbol(nv50
, info
->pc
));
263 BEGIN_NV04(push
, NV50_CP(SHARED_SIZE
), 1);
264 PUSH_DATA (push
, align(cp
->cp
.smem_size
+ cp
->parm_size
+ 0x10, 0x40));
265 BEGIN_NV04(push
, NV50_CP(CP_REG_ALLOC_TEMP
), 1);
266 PUSH_DATA (push
, cp
->max_gpr
);
268 /* grid/block setup */
269 BEGIN_NV04(push
, NV50_CP(BLOCKDIM_XY
), 2);
270 PUSH_DATA (push
, info
->block
[1] << 16 | info
->block
[0]);
271 PUSH_DATA (push
, info
->block
[2]);
272 BEGIN_NV04(push
, NV50_CP(BLOCK_ALLOC
), 1);
273 PUSH_DATA (push
, 1 << 16 | block_size
);
274 BEGIN_NV04(push
, NV50_CP(BLOCKDIM_LATCH
), 1);
276 BEGIN_NV04(push
, NV50_CP(GRIDDIM
), 1);
277 PUSH_DATA (push
, info
->grid
[1] << 16 | info
->grid
[0]);
278 BEGIN_NV04(push
, NV50_CP(GRIDID
), 1);
281 /* kernel launching */
282 BEGIN_NV04(push
, NV50_CP(LAUNCH
), 1);
284 BEGIN_NV04(push
, SUBC_CP(NV50_GRAPH_SERIALIZE
), 1);
287 /* bind a compute shader clobbers fragment shader state */
288 nv50
->dirty_3d
|= NV50_NEW_3D_FRAGPROG
;