elf.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
  2. #ifndef _GNU_SOURCE
  3. #define _GNU_SOURCE
  4. #endif
  5. #include <libelf.h>
  6. #include <gelf.h>
  7. #include <fcntl.h>
  8. #include <linux/kernel.h>
  9. #include "libbpf_internal.h"
  10. #include "str_error.h"
  11. /* A SHT_GNU_versym section holds 16-bit words. This bit is set if
  12. * the symbol is hidden and can only be seen when referenced using an
  13. * explicit version number. This is a GNU extension.
  14. */
  15. #define VERSYM_HIDDEN 0x8000
  16. /* This is the mask for the rest of the data in a word read from a
  17. * SHT_GNU_versym section.
  18. */
  19. #define VERSYM_VERSION 0x7fff
  20. int elf_open(const char *binary_path, struct elf_fd *elf_fd)
  21. {
  22. char errmsg[STRERR_BUFSIZE];
  23. int fd, ret;
  24. Elf *elf;
  25. elf_fd->elf = NULL;
  26. elf_fd->fd = -1;
  27. if (elf_version(EV_CURRENT) == EV_NONE) {
  28. pr_warn("elf: failed to init libelf for %s\n", binary_path);
  29. return -LIBBPF_ERRNO__LIBELF;
  30. }
  31. fd = open(binary_path, O_RDONLY | O_CLOEXEC);
  32. if (fd < 0) {
  33. ret = -errno;
  34. pr_warn("elf: failed to open %s: %s\n", binary_path,
  35. libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
  36. return ret;
  37. }
  38. elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
  39. if (!elf) {
  40. pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
  41. close(fd);
  42. return -LIBBPF_ERRNO__FORMAT;
  43. }
  44. elf_fd->fd = fd;
  45. elf_fd->elf = elf;
  46. return 0;
  47. }
  48. void elf_close(struct elf_fd *elf_fd)
  49. {
  50. if (!elf_fd)
  51. return;
  52. elf_end(elf_fd->elf);
  53. close(elf_fd->fd);
  54. }
  55. /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
  56. static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
  57. {
  58. while ((scn = elf_nextscn(elf, scn)) != NULL) {
  59. GElf_Shdr sh;
  60. if (!gelf_getshdr(scn, &sh))
  61. continue;
  62. if (sh.sh_type == sh_type)
  63. return scn;
  64. }
  65. return NULL;
  66. }
  67. struct elf_sym {
  68. const char *name;
  69. GElf_Sym sym;
  70. GElf_Shdr sh;
  71. int ver;
  72. bool hidden;
  73. };
  74. struct elf_sym_iter {
  75. Elf *elf;
  76. Elf_Data *syms;
  77. Elf_Data *versyms;
  78. Elf_Data *verdefs;
  79. size_t nr_syms;
  80. size_t strtabidx;
  81. size_t verdef_strtabidx;
  82. size_t next_sym_idx;
  83. struct elf_sym sym;
  84. int st_type;
  85. };
  86. static int elf_sym_iter_new(struct elf_sym_iter *iter,
  87. Elf *elf, const char *binary_path,
  88. int sh_type, int st_type)
  89. {
  90. Elf_Scn *scn = NULL;
  91. GElf_Ehdr ehdr;
  92. GElf_Shdr sh;
  93. memset(iter, 0, sizeof(*iter));
  94. if (!gelf_getehdr(elf, &ehdr)) {
  95. pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
  96. return -EINVAL;
  97. }
  98. scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
  99. if (!scn) {
  100. pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
  101. binary_path);
  102. return -ENOENT;
  103. }
  104. if (!gelf_getshdr(scn, &sh))
  105. return -EINVAL;
  106. iter->strtabidx = sh.sh_link;
  107. iter->syms = elf_getdata(scn, 0);
  108. if (!iter->syms) {
  109. pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
  110. binary_path, elf_errmsg(-1));
  111. return -EINVAL;
  112. }
  113. iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
  114. iter->elf = elf;
  115. iter->st_type = st_type;
  116. /* Version symbol table is meaningful to dynsym only */
  117. if (sh_type != SHT_DYNSYM)
  118. return 0;
  119. scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL);
  120. if (!scn)
  121. return 0;
  122. iter->versyms = elf_getdata(scn, 0);
  123. scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL);
  124. if (!scn)
  125. return 0;
  126. iter->verdefs = elf_getdata(scn, 0);
  127. if (!iter->verdefs || !gelf_getshdr(scn, &sh)) {
  128. pr_warn("elf: failed to get verdef ELF section in '%s'\n", binary_path);
  129. return -EINVAL;
  130. }
  131. iter->verdef_strtabidx = sh.sh_link;
  132. return 0;
  133. }
  134. static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
  135. {
  136. struct elf_sym *ret = &iter->sym;
  137. GElf_Sym *sym = &ret->sym;
  138. const char *name = NULL;
  139. GElf_Versym versym;
  140. Elf_Scn *sym_scn;
  141. size_t idx;
  142. for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
  143. if (!gelf_getsym(iter->syms, idx, sym))
  144. continue;
  145. if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
  146. continue;
  147. name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
  148. if (!name)
  149. continue;
  150. sym_scn = elf_getscn(iter->elf, sym->st_shndx);
  151. if (!sym_scn)
  152. continue;
  153. if (!gelf_getshdr(sym_scn, &ret->sh))
  154. continue;
  155. iter->next_sym_idx = idx + 1;
  156. ret->name = name;
  157. ret->ver = 0;
  158. ret->hidden = false;
  159. if (iter->versyms) {
  160. if (!gelf_getversym(iter->versyms, idx, &versym))
  161. continue;
  162. ret->ver = versym & VERSYM_VERSION;
  163. ret->hidden = versym & VERSYM_HIDDEN;
  164. }
  165. return ret;
  166. }
  167. return NULL;
  168. }
  169. static const char *elf_get_vername(struct elf_sym_iter *iter, int ver)
  170. {
  171. GElf_Verdaux verdaux;
  172. GElf_Verdef verdef;
  173. int offset;
  174. if (!iter->verdefs)
  175. return NULL;
  176. offset = 0;
  177. while (gelf_getverdef(iter->verdefs, offset, &verdef)) {
  178. if (verdef.vd_ndx != ver) {
  179. if (!verdef.vd_next)
  180. break;
  181. offset += verdef.vd_next;
  182. continue;
  183. }
  184. if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux))
  185. break;
  186. return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name);
  187. }
  188. return NULL;
  189. }
  190. static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym,
  191. const char *name, size_t name_len, const char *lib_ver)
  192. {
  193. const char *ver_name;
  194. /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER
  195. * make sure the func part matches the user specified name
  196. */
  197. if (strncmp(sym->name, name, name_len) != 0)
  198. return false;
  199. /* ...but we don't want a search for "foo" to match 'foo2" also, so any
  200. * additional characters in sname should be of the form "@@LIB".
  201. */
  202. if (sym->name[name_len] != '\0' && sym->name[name_len] != '@')
  203. return false;
  204. /* If user does not specify symbol version, then we got a match */
  205. if (!lib_ver)
  206. return true;
  207. /* If user specifies symbol version, for dynamic symbols,
  208. * get version name from ELF verdef section for comparison.
  209. */
  210. if (sh_type == SHT_DYNSYM) {
  211. ver_name = elf_get_vername(iter, sym->ver);
  212. if (!ver_name)
  213. return false;
  214. return strcmp(ver_name, lib_ver) == 0;
  215. }
  216. /* For normal symbols, it is already in form of func@LIB_VER */
  217. return strcmp(sym->name, name) == 0;
  218. }
  219. /* Transform symbol's virtual address (absolute for binaries and relative
  220. * for shared libs) into file offset, which is what kernel is expecting
  221. * for uprobe/uretprobe attachment.
  222. * See Documentation/trace/uprobetracer.rst for more details. This is done
  223. * by looking up symbol's containing section's header and using iter's virtual
  224. * address (sh_addr) and corresponding file offset (sh_offset) to transform
  225. * sym.st_value (virtual address) into desired final file offset.
  226. */
  227. static unsigned long elf_sym_offset(struct elf_sym *sym)
  228. {
  229. return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
  230. }
  231. /* Find offset of function name in the provided ELF object. "binary_path" is
  232. * the path to the ELF binary represented by "elf", and only used for error
  233. * reporting matters. "name" matches symbol name or name@@LIB for library
  234. * functions.
  235. */
  236. long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
  237. {
  238. int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
  239. const char *at_symbol, *lib_ver;
  240. bool is_shared_lib;
  241. long ret = -ENOENT;
  242. size_t name_len;
  243. GElf_Ehdr ehdr;
  244. if (!gelf_getehdr(elf, &ehdr)) {
  245. pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
  246. ret = -LIBBPF_ERRNO__FORMAT;
  247. goto out;
  248. }
  249. /* for shared lib case, we do not need to calculate relative offset */
  250. is_shared_lib = ehdr.e_type == ET_DYN;
  251. /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */
  252. at_symbol = strchr(name, '@');
  253. if (at_symbol) {
  254. name_len = at_symbol - name;
  255. /* skip second @ if it's @@LIB_VER case */
  256. if (at_symbol[1] == '@')
  257. at_symbol++;
  258. lib_ver = at_symbol + 1;
  259. } else {
  260. name_len = strlen(name);
  261. lib_ver = NULL;
  262. }
  263. /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
  264. * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
  265. * linked binary may not have SHT_DYMSYM, so absence of a section should not be
  266. * reported as a warning/error.
  267. */
  268. for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
  269. struct elf_sym_iter iter;
  270. struct elf_sym *sym;
  271. int last_bind = -1;
  272. int cur_bind;
  273. ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
  274. if (ret == -ENOENT)
  275. continue;
  276. if (ret)
  277. goto out;
  278. while ((sym = elf_sym_iter_next(&iter))) {
  279. if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver))
  280. continue;
  281. cur_bind = GELF_ST_BIND(sym->sym.st_info);
  282. if (ret > 0) {
  283. /* handle multiple matches */
  284. if (elf_sym_offset(sym) == ret) {
  285. /* same offset, no problem */
  286. continue;
  287. } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
  288. /* Only accept one non-weak bind. */
  289. pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
  290. sym->name, name, binary_path);
  291. ret = -LIBBPF_ERRNO__FORMAT;
  292. goto out;
  293. } else if (cur_bind == STB_WEAK) {
  294. /* already have a non-weak bind, and
  295. * this is a weak bind, so ignore.
  296. */
  297. continue;
  298. }
  299. }
  300. ret = elf_sym_offset(sym);
  301. last_bind = cur_bind;
  302. }
  303. if (ret > 0)
  304. break;
  305. }
  306. if (ret > 0) {
  307. pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
  308. ret);
  309. } else {
  310. if (ret == 0) {
  311. pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
  312. is_shared_lib ? "should not be 0 in a shared library" :
  313. "try using shared library path instead");
  314. ret = -ENOENT;
  315. } else {
  316. pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
  317. }
  318. }
  319. out:
  320. return ret;
  321. }
  322. /* Find offset of function name in ELF object specified by path. "name" matches
  323. * symbol name or name@@LIB for library functions.
  324. */
  325. long elf_find_func_offset_from_file(const char *binary_path, const char *name)
  326. {
  327. struct elf_fd elf_fd;
  328. long ret = -ENOENT;
  329. ret = elf_open(binary_path, &elf_fd);
  330. if (ret)
  331. return ret;
  332. ret = elf_find_func_offset(elf_fd.elf, binary_path, name);
  333. elf_close(&elf_fd);
  334. return ret;
  335. }
  336. struct symbol {
  337. const char *name;
  338. int bind;
  339. int idx;
  340. };
  341. static int symbol_cmp(const void *a, const void *b)
  342. {
  343. const struct symbol *sym_a = a;
  344. const struct symbol *sym_b = b;
  345. return strcmp(sym_a->name, sym_b->name);
  346. }
  347. /*
  348. * Return offsets in @poffsets for symbols specified in @syms array argument.
  349. * On success returns 0 and offsets are returned in allocated array with @cnt
  350. * size, that needs to be released by the caller.
  351. */
  352. int elf_resolve_syms_offsets(const char *binary_path, int cnt,
  353. const char **syms, unsigned long **poffsets,
  354. int st_type)
  355. {
  356. int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
  357. int err = 0, i, cnt_done = 0;
  358. unsigned long *offsets;
  359. struct symbol *symbols;
  360. struct elf_fd elf_fd;
  361. err = elf_open(binary_path, &elf_fd);
  362. if (err)
  363. return err;
  364. offsets = calloc(cnt, sizeof(*offsets));
  365. symbols = calloc(cnt, sizeof(*symbols));
  366. if (!offsets || !symbols) {
  367. err = -ENOMEM;
  368. goto out;
  369. }
  370. for (i = 0; i < cnt; i++) {
  371. symbols[i].name = syms[i];
  372. symbols[i].idx = i;
  373. }
  374. qsort(symbols, cnt, sizeof(*symbols), symbol_cmp);
  375. for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
  376. struct elf_sym_iter iter;
  377. struct elf_sym *sym;
  378. err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type);
  379. if (err == -ENOENT)
  380. continue;
  381. if (err)
  382. goto out;
  383. while ((sym = elf_sym_iter_next(&iter))) {
  384. unsigned long sym_offset = elf_sym_offset(sym);
  385. int bind = GELF_ST_BIND(sym->sym.st_info);
  386. struct symbol *found, tmp = {
  387. .name = sym->name,
  388. };
  389. unsigned long *offset;
  390. found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp);
  391. if (!found)
  392. continue;
  393. offset = &offsets[found->idx];
  394. if (*offset > 0) {
  395. /* same offset, no problem */
  396. if (*offset == sym_offset)
  397. continue;
  398. /* handle multiple matches */
  399. if (found->bind != STB_WEAK && bind != STB_WEAK) {
  400. /* Only accept one non-weak bind. */
  401. pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n",
  402. sym->name, sym_offset, binary_path, *offset);
  403. err = -ESRCH;
  404. goto out;
  405. } else if (bind == STB_WEAK) {
  406. /* already have a non-weak bind, and
  407. * this is a weak bind, so ignore.
  408. */
  409. continue;
  410. }
  411. } else {
  412. cnt_done++;
  413. }
  414. *offset = sym_offset;
  415. found->bind = bind;
  416. }
  417. }
  418. if (cnt != cnt_done) {
  419. err = -ENOENT;
  420. goto out;
  421. }
  422. *poffsets = offsets;
  423. out:
  424. free(symbols);
  425. if (err)
  426. free(offsets);
  427. elf_close(&elf_fd);
  428. return err;
  429. }
  430. /*
  431. * Return offsets in @poffsets for symbols specified by @pattern argument.
  432. * On success returns 0 and offsets are returned in allocated @poffsets
  433. * array with the @pctn size, that needs to be released by the caller.
  434. */
  435. int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
  436. unsigned long **poffsets, size_t *pcnt)
  437. {
  438. int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM };
  439. unsigned long *offsets = NULL;
  440. size_t cap = 0, cnt = 0;
  441. struct elf_fd elf_fd;
  442. int err = 0, i;
  443. err = elf_open(binary_path, &elf_fd);
  444. if (err)
  445. return err;
  446. for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
  447. struct elf_sym_iter iter;
  448. struct elf_sym *sym;
  449. err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
  450. if (err == -ENOENT)
  451. continue;
  452. if (err)
  453. goto out;
  454. while ((sym = elf_sym_iter_next(&iter))) {
  455. if (!glob_match(sym->name, pattern))
  456. continue;
  457. err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets),
  458. cnt + 1);
  459. if (err)
  460. goto out;
  461. offsets[cnt++] = elf_sym_offset(sym);
  462. }
  463. /* If we found anything in the first symbol section,
  464. * do not search others to avoid duplicates.
  465. */
  466. if (cnt)
  467. break;
  468. }
  469. if (cnt) {
  470. *poffsets = offsets;
  471. *pcnt = cnt;
  472. } else {
  473. err = -ENOENT;
  474. }
  475. out:
  476. if (err)
  477. free(offsets);
  478. elf_close(&elf_fd);
  479. return err;
  480. }