radeonsi: use ac_shader_config
[mesa.git] / src / amd / common / ac_binary.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_binary.h"
25
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28
29 #include <gelf.h>
30 #include <libelf.h>
31 #include <stdio.h>
32
33 #include <sid.h>
34
35 #define SPILLED_SGPRS 0x4
36 #define SPILLED_VGPRS 0x8
37
38 static void parse_symbol_table(Elf_Data *symbol_table_data,
39 const GElf_Shdr *symbol_table_header,
40 struct ac_shader_binary *binary)
41 {
42 GElf_Sym symbol;
43 unsigned i = 0;
44 unsigned symbol_count =
45 symbol_table_header->sh_size / symbol_table_header->sh_entsize;
46
47 /* We are over allocating this list, because symbol_count gives the
48 * total number of symbols, and we will only be filling the list
49 * with offsets of global symbols. The memory savings from
50 * allocating the correct size of this list will be small, and
51 * I don't think it is worth the cost of pre-computing the number
52 * of global symbols.
53 */
54 binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
55
56 while (gelf_getsym(symbol_table_data, i++, &symbol)) {
57 unsigned i;
58 if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
59 symbol.st_shndx == 0 /* Undefined symbol */) {
60 continue;
61 }
62
63 binary->global_symbol_offsets[binary->global_symbol_count] =
64 symbol.st_value;
65
66 /* Sort the list using bubble sort. This list will usually
67 * be small. */
68 for (i = binary->global_symbol_count; i > 0; --i) {
69 uint64_t lhs = binary->global_symbol_offsets[i - 1];
70 uint64_t rhs = binary->global_symbol_offsets[i];
71 if (lhs < rhs) {
72 break;
73 }
74 binary->global_symbol_offsets[i] = lhs;
75 binary->global_symbol_offsets[i - 1] = rhs;
76 }
77 ++binary->global_symbol_count;
78 }
79 }
80
81 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
82 unsigned symbol_sh_link,
83 struct ac_shader_binary *binary)
84 {
85 unsigned i;
86
87 if (!relocs || !symbols || !binary->reloc_count) {
88 return;
89 }
90 binary->relocs = CALLOC(binary->reloc_count,
91 sizeof(struct ac_shader_reloc));
92 for (i = 0; i < binary->reloc_count; i++) {
93 GElf_Sym symbol;
94 GElf_Rel rel;
95 char *symbol_name;
96 struct ac_shader_reloc *reloc = &binary->relocs[i];
97
98 gelf_getrel(relocs, i, &rel);
99 gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
100 symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
101
102 reloc->offset = rel.r_offset;
103 strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
104 reloc->name[sizeof(reloc->name)-1] = 0;
105 }
106 }
107
108 bool ac_elf_read(const char *elf_data, unsigned elf_size,
109 struct ac_shader_binary *binary)
110 {
111 char *elf_buffer;
112 Elf *elf;
113 Elf_Scn *section = NULL;
114 Elf_Data *symbols = NULL, *relocs = NULL;
115 size_t section_str_index;
116 unsigned symbol_sh_link = 0;
117 bool success = true;
118
119 /* One of the libelf implementations
120 * (http://www.mr511.de/software/english.htm) requires calling
121 * elf_version() before elf_memory().
122 */
123 elf_version(EV_CURRENT);
124 elf_buffer = MALLOC(elf_size);
125 memcpy(elf_buffer, elf_data, elf_size);
126
127 elf = elf_memory(elf_buffer, elf_size);
128
129 elf_getshdrstrndx(elf, &section_str_index);
130
131 while ((section = elf_nextscn(elf, section))) {
132 const char *name;
133 Elf_Data *section_data = NULL;
134 GElf_Shdr section_header;
135 if (gelf_getshdr(section, &section_header) != &section_header) {
136 fprintf(stderr, "Failed to read ELF section header\n");
137 success = false;
138 break;
139 }
140 name = elf_strptr(elf, section_str_index, section_header.sh_name);
141 if (!strcmp(name, ".text")) {
142 section_data = elf_getdata(section, section_data);
143 binary->code_size = section_data->d_size;
144 binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
145 memcpy(binary->code, section_data->d_buf, binary->code_size);
146 } else if (!strcmp(name, ".AMDGPU.config")) {
147 section_data = elf_getdata(section, section_data);
148 binary->config_size = section_data->d_size;
149 if (!binary->config_size) {
150 fprintf(stderr, ".AMDGPU.config is empty!\n");
151 success = false;
152 break;
153 }
154 binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
155 memcpy(binary->config, section_data->d_buf, binary->config_size);
156 } else if (!strcmp(name, ".AMDGPU.disasm")) {
157 /* Always read disassembly if it's available. */
158 section_data = elf_getdata(section, section_data);
159 binary->disasm_string = strndup(section_data->d_buf,
160 section_data->d_size);
161 } else if (!strncmp(name, ".rodata", 7)) {
162 section_data = elf_getdata(section, section_data);
163 binary->rodata_size = section_data->d_size;
164 binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
165 memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
166 } else if (!strncmp(name, ".symtab", 7)) {
167 symbols = elf_getdata(section, section_data);
168 symbol_sh_link = section_header.sh_link;
169 parse_symbol_table(symbols, &section_header, binary);
170 } else if (!strcmp(name, ".rel.text")) {
171 relocs = elf_getdata(section, section_data);
172 binary->reloc_count = section_header.sh_size /
173 section_header.sh_entsize;
174 }
175 }
176
177 parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
178
179 if (elf){
180 elf_end(elf);
181 }
182 FREE(elf_buffer);
183
184 /* Cache the config size per symbol */
185 if (binary->global_symbol_count) {
186 binary->config_size_per_symbol =
187 binary->config_size / binary->global_symbol_count;
188 } else {
189 binary->global_symbol_count = 1;
190 binary->config_size_per_symbol = binary->config_size;
191 }
192 return success;
193 }
194
195 const unsigned char *ac_shader_binary_config_start(
196 const struct ac_shader_binary *binary,
197 uint64_t symbol_offset)
198 {
199 unsigned i;
200 for (i = 0; i < binary->global_symbol_count; ++i) {
201 if (binary->global_symbol_offsets[i] == symbol_offset) {
202 unsigned offset = i * binary->config_size_per_symbol;
203 return binary->config + offset;
204 }
205 }
206 return binary->config;
207 }
208
209 /* Parse configuration data in .AMDGPU.config section format. */
210 void ac_parse_shader_binary_config(const char *data, size_t nbytes,
211 bool really_needs_scratch,
212 struct ac_shader_config *conf)
213 {
214 uint32_t wavesize = 0;
215
216 for (size_t i = 0; i < nbytes; i += 8) {
217 unsigned reg = util_le32_to_cpu(*(uint32_t*)(data + i));
218 unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));
219 switch (reg) {
220 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
221 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
222 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
223 case R_00B848_COMPUTE_PGM_RSRC1:
224 case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
225 conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
226 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
227 conf->float_mode = G_00B028_FLOAT_MODE(value);
228 conf->rsrc1 = value;
229 break;
230 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
231 conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
232 break;
233 case R_00B84C_COMPUTE_PGM_RSRC2:
234 conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
235 conf->rsrc2 = value;
236 break;
237 case R_0286CC_SPI_PS_INPUT_ENA:
238 conf->spi_ps_input_ena = value;
239 break;
240 case R_0286D0_SPI_PS_INPUT_ADDR:
241 conf->spi_ps_input_addr = value;
242 break;
243 case R_0286E8_SPI_TMPRING_SIZE:
244 case R_00B860_COMPUTE_TMPRING_SIZE:
245 /* WAVESIZE is in units of 256 dwords. */
246 wavesize = value;
247 break;
248 case SPILLED_SGPRS:
249 conf->spilled_sgprs = value;
250 break;
251 case SPILLED_VGPRS:
252 conf->spilled_vgprs = value;
253 break;
254 default:
255 {
256 static bool printed;
257
258 if (!printed) {
259 fprintf(stderr, "Warning: LLVM emitted unknown "
260 "config register: 0x%x\n", reg);
261 printed = true;
262 }
263 }
264 break;
265 }
266 }
267
268 if (!conf->spi_ps_input_addr)
269 conf->spi_ps_input_addr = conf->spi_ps_input_ena;
270
271 if (really_needs_scratch) {
272 /* sgprs spills aren't spilling */
273 conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
274 }
275 }
276
277 static const char *scratch_rsrc_dword0_symbol =
278 "SCRATCH_RSRC_DWORD0";
279
280 static const char *scratch_rsrc_dword1_symbol =
281 "SCRATCH_RSRC_DWORD1";
282
283 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
284 struct ac_shader_config *conf,
285 unsigned symbol_offset,
286 bool supports_spill)
287 {
288 unsigned i;
289 const char *config =
290 (const char *)ac_shader_binary_config_start(binary, symbol_offset);
291 bool really_needs_scratch = false;
292 /* LLVM adds SGPR spills to the scratch size.
293 * Find out if we really need the scratch buffer.
294 */
295 if (supports_spill) {
296 really_needs_scratch = true;
297 } else {
298 for (i = 0; i < binary->reloc_count; i++) {
299 const struct ac_shader_reloc *reloc = &binary->relocs[i];
300
301 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
302 !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
303 really_needs_scratch = true;
304 break;
305 }
306 }
307 }
308
309 ac_parse_shader_binary_config(config, binary->config_size_per_symbol,
310 really_needs_scratch, conf);
311 }
312
313 void ac_shader_binary_clean(struct ac_shader_binary *b)
314 {
315 if (!b)
316 return;
317 FREE(b->code);
318 FREE(b->config);
319 FREE(b->rodata);
320 FREE(b->global_symbol_offsets);
321 FREE(b->relocs);
322 FREE(b->disasm_string);
323 FREE(b->llvm_ir_string);
324 }