elf_kexec.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Load ELF vmlinux file for the kexec_file_load syscall.
  4. *
  5. * Copyright (C) 2021 Huawei Technologies Co, Ltd.
  6. *
  7. * Author: Liao Chang (liaochang1@huawei.com)
  8. *
  9. * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
  10. * for kernel.
  11. */
  12. #define pr_fmt(fmt) "kexec_image: " fmt
  13. #include <linux/elf.h>
  14. #include <linux/kexec.h>
  15. #include <linux/slab.h>
  16. #include <linux/of.h>
  17. #include <linux/libfdt.h>
  18. #include <linux/types.h>
  19. #include <linux/memblock.h>
  20. #include <linux/vmalloc.h>
  21. #include <asm/setup.h>
  22. int arch_kimage_file_post_load_cleanup(struct kimage *image)
  23. {
  24. kvfree(image->arch.fdt);
  25. image->arch.fdt = NULL;
  26. vfree(image->elf_headers);
  27. image->elf_headers = NULL;
  28. image->elf_headers_sz = 0;
  29. return kexec_image_post_load_cleanup_default(image);
  30. }
  31. static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
  32. struct kexec_elf_info *elf_info, unsigned long old_pbase,
  33. unsigned long new_pbase)
  34. {
  35. int i;
  36. int ret = 0;
  37. size_t size;
  38. struct kexec_buf kbuf;
  39. const struct elf_phdr *phdr;
  40. kbuf.image = image;
  41. for (i = 0; i < ehdr->e_phnum; i++) {
  42. phdr = &elf_info->proghdrs[i];
  43. if (phdr->p_type != PT_LOAD)
  44. continue;
  45. size = phdr->p_filesz;
  46. if (size > phdr->p_memsz)
  47. size = phdr->p_memsz;
  48. kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
  49. kbuf.bufsz = size;
  50. kbuf.buf_align = phdr->p_align;
  51. kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
  52. kbuf.memsz = phdr->p_memsz;
  53. kbuf.top_down = false;
  54. ret = kexec_add_buffer(&kbuf);
  55. if (ret)
  56. break;
  57. }
  58. return ret;
  59. }
  60. /*
  61. * Go through the available phsyical memory regions and find one that hold
  62. * an image of the specified size.
  63. */
  64. static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
  65. struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
  66. unsigned long *old_pbase, unsigned long *new_pbase)
  67. {
  68. int i;
  69. int ret;
  70. struct kexec_buf kbuf;
  71. const struct elf_phdr *phdr;
  72. unsigned long lowest_paddr = ULONG_MAX;
  73. unsigned long lowest_vaddr = ULONG_MAX;
  74. for (i = 0; i < ehdr->e_phnum; i++) {
  75. phdr = &elf_info->proghdrs[i];
  76. if (phdr->p_type != PT_LOAD)
  77. continue;
  78. if (lowest_paddr > phdr->p_paddr)
  79. lowest_paddr = phdr->p_paddr;
  80. if (lowest_vaddr > phdr->p_vaddr)
  81. lowest_vaddr = phdr->p_vaddr;
  82. }
  83. kbuf.image = image;
  84. kbuf.buf_min = lowest_paddr;
  85. kbuf.buf_max = ULONG_MAX;
  86. /*
  87. * Current riscv boot protocol requires 2MB alignment for
  88. * RV64 and 4MB alignment for RV32
  89. *
  90. */
  91. kbuf.buf_align = PMD_SIZE;
  92. kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
  93. kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
  94. kbuf.top_down = false;
  95. ret = arch_kexec_locate_mem_hole(&kbuf);
  96. if (!ret) {
  97. *old_pbase = lowest_paddr;
  98. *new_pbase = kbuf.mem;
  99. image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
  100. }
  101. return ret;
  102. }
  103. #ifdef CONFIG_CRASH_DUMP
  104. static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
  105. {
  106. unsigned int *nr_ranges = arg;
  107. (*nr_ranges)++;
  108. return 0;
  109. }
  110. static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
  111. {
  112. struct crash_mem *cmem = arg;
  113. cmem->ranges[cmem->nr_ranges].start = res->start;
  114. cmem->ranges[cmem->nr_ranges].end = res->end;
  115. cmem->nr_ranges++;
  116. return 0;
  117. }
  118. static int prepare_elf_headers(void **addr, unsigned long *sz)
  119. {
  120. struct crash_mem *cmem;
  121. unsigned int nr_ranges;
  122. int ret;
  123. nr_ranges = 1; /* For exclusion of crashkernel region */
  124. walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
  125. cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
  126. if (!cmem)
  127. return -ENOMEM;
  128. cmem->max_nr_ranges = nr_ranges;
  129. cmem->nr_ranges = 0;
  130. ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
  131. if (ret)
  132. goto out;
  133. /* Exclude crashkernel region */
  134. ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
  135. if (!ret)
  136. ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
  137. out:
  138. kfree(cmem);
  139. return ret;
  140. }
  141. static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
  142. unsigned long cmdline_len)
  143. {
  144. int elfcorehdr_strlen;
  145. char *cmdline_ptr;
  146. cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
  147. if (!cmdline_ptr)
  148. return NULL;
  149. elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
  150. image->elf_load_addr);
  151. if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
  152. pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
  153. kfree(cmdline_ptr);
  154. return NULL;
  155. }
  156. memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
  157. /* Ensure it's nul terminated */
  158. cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
  159. return cmdline_ptr;
  160. }
  161. #endif
  162. static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
  163. unsigned long kernel_len, char *initrd,
  164. unsigned long initrd_len, char *cmdline,
  165. unsigned long cmdline_len)
  166. {
  167. int ret;
  168. void *fdt;
  169. unsigned long old_kernel_pbase = ULONG_MAX;
  170. unsigned long new_kernel_pbase = 0UL;
  171. unsigned long initrd_pbase = 0UL;
  172. unsigned long kernel_start;
  173. struct elfhdr ehdr;
  174. struct kexec_buf kbuf;
  175. struct kexec_elf_info elf_info;
  176. char *modified_cmdline = NULL;
  177. ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
  178. if (ret)
  179. return ERR_PTR(ret);
  180. ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
  181. &old_kernel_pbase, &new_kernel_pbase);
  182. if (ret)
  183. goto out;
  184. kernel_start = image->start;
  185. /* Add the kernel binary to the image */
  186. ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
  187. old_kernel_pbase, new_kernel_pbase);
  188. if (ret)
  189. goto out;
  190. kbuf.image = image;
  191. kbuf.buf_min = new_kernel_pbase + kernel_len;
  192. kbuf.buf_max = ULONG_MAX;
  193. #ifdef CONFIG_CRASH_DUMP
  194. /* Add elfcorehdr */
  195. if (image->type == KEXEC_TYPE_CRASH) {
  196. void *headers;
  197. unsigned long headers_sz;
  198. ret = prepare_elf_headers(&headers, &headers_sz);
  199. if (ret) {
  200. pr_err("Preparing elf core header failed\n");
  201. goto out;
  202. }
  203. kbuf.buffer = headers;
  204. kbuf.bufsz = headers_sz;
  205. kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
  206. kbuf.memsz = headers_sz;
  207. kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
  208. kbuf.top_down = true;
  209. ret = kexec_add_buffer(&kbuf);
  210. if (ret) {
  211. vfree(headers);
  212. goto out;
  213. }
  214. image->elf_headers = headers;
  215. image->elf_load_addr = kbuf.mem;
  216. image->elf_headers_sz = headers_sz;
  217. kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
  218. image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
  219. /* Setup cmdline for kdump kernel case */
  220. modified_cmdline = setup_kdump_cmdline(image, cmdline,
  221. cmdline_len);
  222. if (!modified_cmdline) {
  223. pr_err("Setting up cmdline for kdump kernel failed\n");
  224. ret = -EINVAL;
  225. goto out;
  226. }
  227. cmdline = modified_cmdline;
  228. }
  229. #endif
  230. #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
  231. /* Add purgatory to the image */
  232. kbuf.top_down = true;
  233. kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
  234. ret = kexec_load_purgatory(image, &kbuf);
  235. if (ret) {
  236. pr_err("Error loading purgatory ret=%d\n", ret);
  237. goto out;
  238. }
  239. kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
  240. ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
  241. &kernel_start,
  242. sizeof(kernel_start), 0);
  243. if (ret)
  244. pr_err("Error update purgatory ret=%d\n", ret);
  245. #endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
  246. /* Add the initrd to the image */
  247. if (initrd != NULL) {
  248. kbuf.buffer = initrd;
  249. kbuf.bufsz = kbuf.memsz = initrd_len;
  250. kbuf.buf_align = PAGE_SIZE;
  251. kbuf.top_down = true;
  252. kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
  253. ret = kexec_add_buffer(&kbuf);
  254. if (ret)
  255. goto out;
  256. initrd_pbase = kbuf.mem;
  257. kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
  258. }
  259. /* Add the DTB to the image */
  260. fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
  261. initrd_len, cmdline, 0);
  262. if (!fdt) {
  263. pr_err("Error setting up the new device tree.\n");
  264. ret = -EINVAL;
  265. goto out;
  266. }
  267. fdt_pack(fdt);
  268. kbuf.buffer = fdt;
  269. kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
  270. kbuf.buf_align = PAGE_SIZE;
  271. kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
  272. kbuf.top_down = true;
  273. ret = kexec_add_buffer(&kbuf);
  274. if (ret) {
  275. pr_err("Error add DTB kbuf ret=%d\n", ret);
  276. goto out_free_fdt;
  277. }
  278. /* Cache the fdt buffer address for memory cleanup */
  279. image->arch.fdt = fdt;
  280. kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
  281. goto out;
  282. out_free_fdt:
  283. kvfree(fdt);
  284. out:
  285. kfree(modified_cmdline);
  286. kexec_free_elf_info(&elf_info);
  287. return ret ? ERR_PTR(ret) : NULL;
  288. }
  289. #define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
  290. #define RISCV_IMM_BITS 12
  291. #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
  292. #define RISCV_CONST_HIGH_PART(x) \
  293. (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
  294. #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
  295. #define ENCODE_ITYPE_IMM(x) \
  296. (RV_X(x, 0, 12) << 20)
  297. #define ENCODE_BTYPE_IMM(x) \
  298. ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
  299. (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
  300. #define ENCODE_UTYPE_IMM(x) \
  301. (RV_X(x, 12, 20) << 12)
  302. #define ENCODE_JTYPE_IMM(x) \
  303. ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
  304. (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
  305. #define ENCODE_CBTYPE_IMM(x) \
  306. ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
  307. (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
  308. #define ENCODE_CJTYPE_IMM(x) \
  309. ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
  310. (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
  311. (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
  312. #define ENCODE_UJTYPE_IMM(x) \
  313. (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
  314. (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
  315. #define ENCODE_UITYPE_IMM(x) \
  316. (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
  317. #define CLEAN_IMM(type, x) \
  318. ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
  319. int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
  320. Elf_Shdr *section,
  321. const Elf_Shdr *relsec,
  322. const Elf_Shdr *symtab)
  323. {
  324. const char *strtab, *name, *shstrtab;
  325. const Elf_Shdr *sechdrs;
  326. Elf64_Rela *relas;
  327. int i, r_type;
  328. /* String & section header string table */
  329. sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
  330. strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
  331. shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
  332. relas = (void *)pi->ehdr + relsec->sh_offset;
  333. for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
  334. const Elf_Sym *sym; /* symbol to relocate */
  335. unsigned long addr; /* final location after relocation */
  336. unsigned long val; /* relocated symbol value */
  337. unsigned long sec_base; /* relocated symbol value */
  338. void *loc; /* tmp location to modify */
  339. sym = (void *)pi->ehdr + symtab->sh_offset;
  340. sym += ELF64_R_SYM(relas[i].r_info);
  341. if (sym->st_name)
  342. name = strtab + sym->st_name;
  343. else
  344. name = shstrtab + sechdrs[sym->st_shndx].sh_name;
  345. loc = pi->purgatory_buf;
  346. loc += section->sh_offset;
  347. loc += relas[i].r_offset;
  348. if (sym->st_shndx == SHN_ABS)
  349. sec_base = 0;
  350. else if (sym->st_shndx >= pi->ehdr->e_shnum) {
  351. pr_err("Invalid section %d for symbol %s\n",
  352. sym->st_shndx, name);
  353. return -ENOEXEC;
  354. } else
  355. sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
  356. val = sym->st_value;
  357. val += sec_base;
  358. val += relas[i].r_addend;
  359. addr = section->sh_addr + relas[i].r_offset;
  360. r_type = ELF64_R_TYPE(relas[i].r_info);
  361. switch (r_type) {
  362. case R_RISCV_BRANCH:
  363. *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
  364. ENCODE_BTYPE_IMM(val - addr);
  365. break;
  366. case R_RISCV_JAL:
  367. *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
  368. ENCODE_JTYPE_IMM(val - addr);
  369. break;
  370. /*
  371. * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
  372. * sym is expected to be next to R_RISCV_PCREL_HI20
  373. * in purgatory relsec. Handle it like R_RISCV_CALL
  374. * sym, instead of searching the whole relsec.
  375. */
  376. case R_RISCV_PCREL_HI20:
  377. case R_RISCV_CALL_PLT:
  378. case R_RISCV_CALL:
  379. *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
  380. ENCODE_UJTYPE_IMM(val - addr);
  381. break;
  382. case R_RISCV_RVC_BRANCH:
  383. *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
  384. ENCODE_CBTYPE_IMM(val - addr);
  385. break;
  386. case R_RISCV_RVC_JUMP:
  387. *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
  388. ENCODE_CJTYPE_IMM(val - addr);
  389. break;
  390. case R_RISCV_ADD16:
  391. *(u16 *)loc += val;
  392. break;
  393. case R_RISCV_SUB16:
  394. *(u16 *)loc -= val;
  395. break;
  396. case R_RISCV_ADD32:
  397. *(u32 *)loc += val;
  398. break;
  399. case R_RISCV_SUB32:
  400. *(u32 *)loc -= val;
  401. break;
  402. /* It has been applied by R_RISCV_PCREL_HI20 sym */
  403. case R_RISCV_PCREL_LO12_I:
  404. case R_RISCV_ALIGN:
  405. case R_RISCV_RELAX:
  406. break;
  407. case R_RISCV_64:
  408. *(u64 *)loc = val;
  409. break;
  410. default:
  411. pr_err("Unknown rela relocation: %d\n", r_type);
  412. return -ENOEXEC;
  413. }
  414. }
  415. return 0;
  416. }
  417. const struct kexec_file_ops elf_kexec_ops = {
  418. .probe = kexec_elf_probe,
  419. .load = elf_kexec_load,
  420. };