2 * Copyright 2016 Red Hat.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "util/u_inlines.h"
24 #include "util/u_math.h"
25 #include "util/u_memory.h"
26 #include "util/u_pstipple.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "draw/draw_context.h"
29 #include "draw/draw_vertex.h"
30 #include "sp_context.h"
31 #include "sp_screen.h"
33 #include "sp_texture.h"
34 #include "sp_tex_sample.h"
35 #include "sp_tex_tile_cache.h"
36 #include "tgsi/tgsi_parse.h"
39 cs_prepare(const struct sp_compute_shader
*cs
,
40 struct tgsi_exec_machine
*machine
,
42 int g_w
, int g_h
, int g_d
,
43 int b_w
, int b_h
, int b_d
,
44 struct tgsi_sampler
*sampler
,
45 struct tgsi_image
*image
,
46 struct tgsi_buffer
*buffer
)
50 * Bind tokens/shader to the interpreter's machine state.
52 tgsi_exec_machine_bind_shader(machine
,
54 sampler
, image
, buffer
);
56 if (machine
->SysSemanticToIndex
[TGSI_SEMANTIC_THREAD_ID
] != -1) {
57 unsigned i
= machine
->SysSemanticToIndex
[TGSI_SEMANTIC_THREAD_ID
];
58 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
59 machine
->SystemValue
[i
].xyzw
[0].i
[j
] = w
;
60 machine
->SystemValue
[i
].xyzw
[1].i
[j
] = h
;
61 machine
->SystemValue
[i
].xyzw
[2].i
[j
] = d
;
65 if (machine
->SysSemanticToIndex
[TGSI_SEMANTIC_GRID_SIZE
] != -1) {
66 unsigned i
= machine
->SysSemanticToIndex
[TGSI_SEMANTIC_GRID_SIZE
];
67 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
68 machine
->SystemValue
[i
].xyzw
[0].i
[j
] = g_w
;
69 machine
->SystemValue
[i
].xyzw
[1].i
[j
] = g_h
;
70 machine
->SystemValue
[i
].xyzw
[2].i
[j
] = g_d
;
74 if (machine
->SysSemanticToIndex
[TGSI_SEMANTIC_BLOCK_SIZE
] != -1) {
75 unsigned i
= machine
->SysSemanticToIndex
[TGSI_SEMANTIC_BLOCK_SIZE
];
76 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
77 machine
->SystemValue
[i
].xyzw
[0].i
[j
] = b_w
;
78 machine
->SystemValue
[i
].xyzw
[1].i
[j
] = b_h
;
79 machine
->SystemValue
[i
].xyzw
[2].i
[j
] = b_d
;
85 cs_run(const struct sp_compute_shader
*cs
,
86 int g_w
, int g_h
, int g_d
,
87 struct tgsi_exec_machine
*machine
, bool restart
)
90 if (machine
->SysSemanticToIndex
[TGSI_SEMANTIC_BLOCK_ID
] != -1) {
91 unsigned i
= machine
->SysSemanticToIndex
[TGSI_SEMANTIC_BLOCK_ID
];
93 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
94 machine
->SystemValue
[i
].xyzw
[0].i
[j
] = g_w
;
95 machine
->SystemValue
[i
].xyzw
[1].i
[j
] = g_h
;
96 machine
->SystemValue
[i
].xyzw
[2].i
[j
] = g_d
;
99 machine
->NonHelperMask
= (1 << 1) - 1;
102 tgsi_exec_machine_run(machine
, restart
? machine
->pc
: 0);
104 if (machine
->pc
!= -1)
110 run_workgroup(const struct sp_compute_shader
*cs
,
111 int g_w
, int g_h
, int g_d
, int num_threads
,
112 struct tgsi_exec_machine
**machines
)
115 bool grp_hit_barrier
, restart_threads
= false;
118 grp_hit_barrier
= false;
119 for (i
= 0; i
< num_threads
; i
++) {
120 grp_hit_barrier
|= cs_run(cs
, g_w
, g_h
, g_d
, machines
[i
], restart_threads
);
122 restart_threads
= false;
123 if (grp_hit_barrier
) {
124 grp_hit_barrier
= false;
125 restart_threads
= true;
127 } while (restart_threads
);
131 cs_delete(const struct sp_compute_shader
*cs
,
132 struct tgsi_exec_machine
*machine
)
134 if (machine
->Tokens
== cs
->tokens
) {
135 tgsi_exec_machine_bind_shader(machine
, NULL
, NULL
, NULL
, NULL
);
140 fill_grid_size(struct pipe_context
*context
,
141 const struct pipe_grid_info
*info
,
142 uint32_t grid_size
[3])
144 struct pipe_transfer
*transfer
;
146 if (!info
->indirect
) {
147 grid_size
[0] = info
->grid
[0];
148 grid_size
[1] = info
->grid
[1];
149 grid_size
[2] = info
->grid
[2];
152 params
= pipe_buffer_map_range(context
, info
->indirect
,
153 info
->indirect_offset
,
154 3 * sizeof(uint32_t),
161 grid_size
[0] = params
[0];
162 grid_size
[1] = params
[1];
163 grid_size
[2] = params
[2];
164 pipe_buffer_unmap(context
, transfer
);
168 softpipe_launch_grid(struct pipe_context
*context
,
169 const struct pipe_grid_info
*info
)
171 struct softpipe_context
*softpipe
= softpipe_context(context
);
172 struct sp_compute_shader
*cs
= softpipe
->cs
;
173 int num_threads_in_group
;
174 struct tgsi_exec_machine
**machines
;
175 int bwidth
, bheight
, bdepth
;
178 uint32_t grid_size
[3];
179 void *local_mem
= NULL
;
181 softpipe_update_compute_samplers(softpipe
);
182 bwidth
= cs
->info
.properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH
];
183 bheight
= cs
->info
.properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT
];
184 bdepth
= cs
->info
.properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH
];
185 num_threads_in_group
= bwidth
* bheight
* bdepth
;
187 fill_grid_size(context
, info
, grid_size
);
189 if (cs
->shader
.req_local_mem
) {
190 local_mem
= CALLOC(1, cs
->shader
.req_local_mem
);
193 machines
= CALLOC(sizeof(struct tgsi_exec_machine
*), num_threads_in_group
);
197 /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */
198 for (d
= 0; d
< bdepth
; d
++) {
199 for (h
= 0; h
< bheight
; h
++) {
200 for (w
= 0; w
< bwidth
; w
++) {
201 int idx
= w
+ (h
* bwidth
) + (d
* bheight
* bwidth
);
202 machines
[idx
] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE
);
204 machines
[idx
]->LocalMem
= local_mem
;
205 machines
[idx
]->LocalMemSize
= cs
->shader
.req_local_mem
;
206 cs_prepare(cs
, machines
[idx
],
208 grid_size
[0], grid_size
[1], grid_size
[2],
209 bwidth
, bheight
, bdepth
,
210 (struct tgsi_sampler
*)softpipe
->tgsi
.sampler
[PIPE_SHADER_COMPUTE
],
211 (struct tgsi_image
*)softpipe
->tgsi
.image
[PIPE_SHADER_COMPUTE
],
212 (struct tgsi_buffer
*)softpipe
->tgsi
.buffer
[PIPE_SHADER_COMPUTE
]);
213 tgsi_exec_set_constant_buffers(machines
[idx
], PIPE_MAX_CONSTANT_BUFFERS
,
214 softpipe
->mapped_constants
[PIPE_SHADER_COMPUTE
],
215 softpipe
->const_buffer_size
[PIPE_SHADER_COMPUTE
]);
220 for (g_d
= 0; g_d
< grid_size
[2]; g_d
++) {
221 for (g_h
= 0; g_h
< grid_size
[1]; g_h
++) {
222 for (g_w
= 0; g_w
< grid_size
[0]; g_w
++) {
223 run_workgroup(cs
, g_w
, g_h
, g_d
, num_threads_in_group
, machines
);
228 for (i
= 0; i
< num_threads_in_group
; i
++) {
229 cs_delete(cs
, machines
[i
]);
230 tgsi_exec_machine_destroy(machines
[i
]);