2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "ac_binary.h"
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
35 #define SPILLED_SGPRS 0x4
36 #define SPILLED_VGPRS 0x8
38 static void parse_symbol_table(Elf_Data
*symbol_table_data
,
39 const GElf_Shdr
*symbol_table_header
,
40 struct ac_shader_binary
*binary
)
44 unsigned symbol_count
=
45 symbol_table_header
->sh_size
/ symbol_table_header
->sh_entsize
;
47 /* We are over allocating this list, because symbol_count gives the
48 * total number of symbols, and we will only be filling the list
49 * with offsets of global symbols. The memory savings from
50 * allocating the correct size of this list will be small, and
51 * I don't think it is worth the cost of pre-computing the number
54 binary
->global_symbol_offsets
= CALLOC(symbol_count
, sizeof(uint64_t));
56 while (gelf_getsym(symbol_table_data
, i
++, &symbol
)) {
58 if (GELF_ST_BIND(symbol
.st_info
) != STB_GLOBAL
||
59 symbol
.st_shndx
== 0 /* Undefined symbol */) {
63 binary
->global_symbol_offsets
[binary
->global_symbol_count
] =
66 /* Sort the list using bubble sort. This list will usually
68 for (i
= binary
->global_symbol_count
; i
> 0; --i
) {
69 uint64_t lhs
= binary
->global_symbol_offsets
[i
- 1];
70 uint64_t rhs
= binary
->global_symbol_offsets
[i
];
74 binary
->global_symbol_offsets
[i
] = lhs
;
75 binary
->global_symbol_offsets
[i
- 1] = rhs
;
77 ++binary
->global_symbol_count
;
81 static void parse_relocs(Elf
*elf
, Elf_Data
*relocs
, Elf_Data
*symbols
,
82 unsigned symbol_sh_link
,
83 struct ac_shader_binary
*binary
)
87 if (!relocs
|| !symbols
|| !binary
->reloc_count
) {
90 binary
->relocs
= CALLOC(binary
->reloc_count
,
91 sizeof(struct ac_shader_reloc
));
92 for (i
= 0; i
< binary
->reloc_count
; i
++) {
96 struct ac_shader_reloc
*reloc
= &binary
->relocs
[i
];
98 gelf_getrel(relocs
, i
, &rel
);
99 gelf_getsym(symbols
, GELF_R_SYM(rel
.r_info
), &symbol
);
100 symbol_name
= elf_strptr(elf
, symbol_sh_link
, symbol
.st_name
);
102 reloc
->offset
= rel
.r_offset
;
103 strncpy(reloc
->name
, symbol_name
, sizeof(reloc
->name
)-1);
104 reloc
->name
[sizeof(reloc
->name
)-1] = 0;
108 bool ac_elf_read(const char *elf_data
, unsigned elf_size
,
109 struct ac_shader_binary
*binary
)
113 Elf_Scn
*section
= NULL
;
114 Elf_Data
*symbols
= NULL
, *relocs
= NULL
;
115 size_t section_str_index
;
116 unsigned symbol_sh_link
= 0;
119 /* One of the libelf implementations
120 * (http://www.mr511.de/software/english.htm) requires calling
121 * elf_version() before elf_memory().
123 elf_version(EV_CURRENT
);
124 elf_buffer
= MALLOC(elf_size
);
125 memcpy(elf_buffer
, elf_data
, elf_size
);
127 elf
= elf_memory(elf_buffer
, elf_size
);
129 elf_getshdrstrndx(elf
, §ion_str_index
);
131 while ((section
= elf_nextscn(elf
, section
))) {
133 Elf_Data
*section_data
= NULL
;
134 GElf_Shdr section_header
;
135 if (gelf_getshdr(section
, §ion_header
) != §ion_header
) {
136 fprintf(stderr
, "Failed to read ELF section header\n");
140 name
= elf_strptr(elf
, section_str_index
, section_header
.sh_name
);
141 if (!strcmp(name
, ".text")) {
142 section_data
= elf_getdata(section
, section_data
);
143 binary
->code_size
= section_data
->d_size
;
144 binary
->code
= MALLOC(binary
->code_size
* sizeof(unsigned char));
145 memcpy(binary
->code
, section_data
->d_buf
, binary
->code_size
);
146 } else if (!strcmp(name
, ".AMDGPU.config")) {
147 section_data
= elf_getdata(section
, section_data
);
148 binary
->config_size
= section_data
->d_size
;
149 if (!binary
->config_size
) {
150 fprintf(stderr
, ".AMDGPU.config is empty!\n");
154 binary
->config
= MALLOC(binary
->config_size
* sizeof(unsigned char));
155 memcpy(binary
->config
, section_data
->d_buf
, binary
->config_size
);
156 } else if (!strcmp(name
, ".AMDGPU.disasm")) {
157 /* Always read disassembly if it's available. */
158 section_data
= elf_getdata(section
, section_data
);
159 binary
->disasm_string
= strndup(section_data
->d_buf
,
160 section_data
->d_size
);
161 } else if (!strncmp(name
, ".rodata", 7)) {
162 section_data
= elf_getdata(section
, section_data
);
163 binary
->rodata_size
= section_data
->d_size
;
164 binary
->rodata
= MALLOC(binary
->rodata_size
* sizeof(unsigned char));
165 memcpy(binary
->rodata
, section_data
->d_buf
, binary
->rodata_size
);
166 } else if (!strncmp(name
, ".symtab", 7)) {
167 symbols
= elf_getdata(section
, section_data
);
168 symbol_sh_link
= section_header
.sh_link
;
169 parse_symbol_table(symbols
, §ion_header
, binary
);
170 } else if (!strcmp(name
, ".rel.text")) {
171 relocs
= elf_getdata(section
, section_data
);
172 binary
->reloc_count
= section_header
.sh_size
/
173 section_header
.sh_entsize
;
177 parse_relocs(elf
, relocs
, symbols
, symbol_sh_link
, binary
);
184 /* Cache the config size per symbol */
185 if (binary
->global_symbol_count
) {
186 binary
->config_size_per_symbol
=
187 binary
->config_size
/ binary
->global_symbol_count
;
189 binary
->global_symbol_count
= 1;
190 binary
->config_size_per_symbol
= binary
->config_size
;
195 const unsigned char *ac_shader_binary_config_start(
196 const struct ac_shader_binary
*binary
,
197 uint64_t symbol_offset
)
200 for (i
= 0; i
< binary
->global_symbol_count
; ++i
) {
201 if (binary
->global_symbol_offsets
[i
] == symbol_offset
) {
202 unsigned offset
= i
* binary
->config_size_per_symbol
;
203 return binary
->config
+ offset
;
206 return binary
->config
;
209 /* Parse configuration data in .AMDGPU.config section format. */
210 void ac_parse_shader_binary_config(const char *data
, size_t nbytes
,
211 bool really_needs_scratch
,
212 struct ac_shader_config
*conf
)
214 uint32_t wavesize
= 0;
216 for (size_t i
= 0; i
< nbytes
; i
+= 8) {
217 unsigned reg
= util_le32_to_cpu(*(uint32_t*)(data
+ i
));
218 unsigned value
= util_le32_to_cpu(*(uint32_t*)(data
+ i
+ 4));
220 case R_00B028_SPI_SHADER_PGM_RSRC1_PS
:
221 case R_00B128_SPI_SHADER_PGM_RSRC1_VS
:
222 case R_00B228_SPI_SHADER_PGM_RSRC1_GS
:
223 case R_00B848_COMPUTE_PGM_RSRC1
:
224 case R_00B428_SPI_SHADER_PGM_RSRC1_HS
:
225 conf
->num_sgprs
= MAX2(conf
->num_sgprs
, (G_00B028_SGPRS(value
) + 1) * 8);
226 conf
->num_vgprs
= MAX2(conf
->num_vgprs
, (G_00B028_VGPRS(value
) + 1) * 4);
227 conf
->float_mode
= G_00B028_FLOAT_MODE(value
);
229 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS
:
230 conf
->lds_size
= MAX2(conf
->lds_size
, G_00B02C_EXTRA_LDS_SIZE(value
));
232 case R_00B84C_COMPUTE_PGM_RSRC2
:
233 conf
->lds_size
= MAX2(conf
->lds_size
, G_00B84C_LDS_SIZE(value
));
235 case R_0286CC_SPI_PS_INPUT_ENA
:
236 conf
->spi_ps_input_ena
= value
;
238 case R_0286D0_SPI_PS_INPUT_ADDR
:
239 conf
->spi_ps_input_addr
= value
;
241 case R_0286E8_SPI_TMPRING_SIZE
:
242 case R_00B860_COMPUTE_TMPRING_SIZE
:
243 /* WAVESIZE is in units of 256 dwords. */
247 conf
->spilled_sgprs
= value
;
250 conf
->spilled_vgprs
= value
;
257 fprintf(stderr
, "Warning: LLVM emitted unknown "
258 "config register: 0x%x\n", reg
);
266 if (!conf
->spi_ps_input_addr
)
267 conf
->spi_ps_input_addr
= conf
->spi_ps_input_ena
;
269 if (really_needs_scratch
) {
270 /* sgprs spills aren't spilling */
271 conf
->scratch_bytes_per_wave
= G_00B860_WAVESIZE(wavesize
) * 256 * 4;
275 static const char *scratch_rsrc_dword0_symbol
=
276 "SCRATCH_RSRC_DWORD0";
278 static const char *scratch_rsrc_dword1_symbol
=
279 "SCRATCH_RSRC_DWORD1";
281 void ac_shader_binary_read_config(struct ac_shader_binary
*binary
,
282 struct ac_shader_config
*conf
,
283 unsigned symbol_offset
,
288 (const char *)ac_shader_binary_config_start(binary
, symbol_offset
);
289 bool really_needs_scratch
= false;
290 /* LLVM adds SGPR spills to the scratch size.
291 * Find out if we really need the scratch buffer.
293 if (supports_spill
) {
294 really_needs_scratch
= true;
296 for (i
= 0; i
< binary
->reloc_count
; i
++) {
297 const struct ac_shader_reloc
*reloc
= &binary
->relocs
[i
];
299 if (!strcmp(scratch_rsrc_dword0_symbol
, reloc
->name
) ||
300 !strcmp(scratch_rsrc_dword1_symbol
, reloc
->name
)) {
301 really_needs_scratch
= true;
307 ac_parse_shader_binary_config(config
, binary
->config_size_per_symbol
,
308 really_needs_scratch
, conf
);
311 void ac_shader_binary_clean(struct ac_shader_binary
*b
)
318 FREE(b
->global_symbol_offsets
);
320 FREE(b
->disasm_string
);
321 FREE(b
->llvm_ir_string
);