e820.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280
  1. /*
  2. * Low level x86 E820 memory map handling functions.
  3. *
  4. * The firmware and bootloader passes us the "E820 table", which is the primary
  5. * physical memory layout description available about x86 systems.
  6. *
  7. * The kernel takes the E820 memory layout and optionally modifies it with
  8. * quirks and other tweaks, and feeds that into the generic Linux memory
  9. * allocation code routines via a platform independent interface (memblock, etc.).
  10. */
  11. #include <linux/crash_dump.h>
  12. #include <linux/bootmem.h>
  13. #include <linux/suspend.h>
  14. #include <linux/acpi.h>
  15. #include <linux/firmware-map.h>
  16. #include <linux/memblock.h>
  17. #include <linux/sort.h>
  18. #include <asm/e820/api.h>
  19. #include <asm/setup.h>
  20. /*
  21. * We organize the E820 table into three main data structures:
  22. *
  23. * - 'e820_table_firmware': the original firmware version passed to us by the
  24. * bootloader - not modified by the kernel. It is composed of two parts:
  25. * the first 128 E820 memory entries in boot_params.e820_table and the remaining
  26. * (if any) entries of the SETUP_E820_EXT nodes. We use this to:
  27. *
  28. * - inform the user about the firmware's notion of memory layout
  29. * via /sys/firmware/memmap
  30. *
  31. * - the hibernation code uses it to generate a kernel-independent MD5
  32. * fingerprint of the physical memory layout of a system.
  33. *
  34. * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version
  35. * passed to us by the bootloader - the major difference between
  36. * e820_table_firmware[] and this one is that, the latter marks the setup_data
  37. * list created by the EFI boot stub as reserved, so that kexec can reuse the
  38. * setup_data information in the second kernel. Besides, e820_table_kexec[]
  39. * might also be modified by the kexec itself to fake a mptable.
  40. * We use this to:
  41. *
  42. * - kexec, which is a bootloader in disguise, uses the original E820
  43. * layout to pass to the kexec-ed kernel. This way the original kernel
  44. * can have a restricted E820 map while the kexec()-ed kexec-kernel
  45. * can have access to full memory - etc.
  46. *
  47. * - 'e820_table': this is the main E820 table that is massaged by the
  48. * low level x86 platform code, or modified by boot parameters, before
  49. * passed on to higher level MM layers.
  50. *
  51. * Once the E820 map has been converted to the standard Linux memory layout
  52. * information its role stops - modifying it has no effect and does not get
  53. * re-propagated. So itsmain role is a temporary bootstrap storage of firmware
  54. * specific memory layout data during early bootup.
  55. */
  56. static struct e820_table e820_table_init __initdata;
  57. static struct e820_table e820_table_kexec_init __initdata;
  58. static struct e820_table e820_table_firmware_init __initdata;
  59. struct e820_table *e820_table __refdata = &e820_table_init;
  60. struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init;
  61. struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init;
  62. /* For PCI or other memory-mapped resources */
  63. unsigned long pci_mem_start = 0xaeedbabe;
  64. #ifdef CONFIG_PCI
  65. EXPORT_SYMBOL(pci_mem_start);
  66. #endif
  67. /*
  68. * This function checks if any part of the range <start,end> is mapped
  69. * with type.
  70. */
  71. bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
  72. {
  73. int i;
  74. for (i = 0; i < e820_table->nr_entries; i++) {
  75. struct e820_entry *entry = &e820_table->entries[i];
  76. if (type && entry->type != type)
  77. continue;
  78. if (entry->addr >= end || entry->addr + entry->size <= start)
  79. continue;
  80. return 1;
  81. }
  82. return 0;
  83. }
  84. EXPORT_SYMBOL_GPL(e820__mapped_any);
  85. /*
  86. * This function checks if the entire <start,end> range is mapped with 'type'.
  87. *
  88. * Note: this function only works correctly once the E820 table is sorted and
  89. * not-overlapping (at least for the range specified), which is the case normally.
  90. */
  91. static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
  92. enum e820_type type)
  93. {
  94. int i;
  95. for (i = 0; i < e820_table->nr_entries; i++) {
  96. struct e820_entry *entry = &e820_table->entries[i];
  97. if (type && entry->type != type)
  98. continue;
  99. /* Is the region (part) in overlap with the current region? */
  100. if (entry->addr >= end || entry->addr + entry->size <= start)
  101. continue;
  102. /*
  103. * If the region is at the beginning of <start,end> we move
  104. * 'start' to the end of the region since it's ok until there
  105. */
  106. if (entry->addr <= start)
  107. start = entry->addr + entry->size;
  108. /*
  109. * If 'start' is now at or beyond 'end', we're done, full
  110. * coverage of the desired range exists:
  111. */
  112. if (start >= end)
  113. return entry;
  114. }
  115. return NULL;
  116. }
  117. /*
  118. * This function checks if the entire range <start,end> is mapped with type.
  119. */
  120. bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
  121. {
  122. return __e820__mapped_all(start, end, type);
  123. }
  124. /*
  125. * This function returns the type associated with the range <start,end>.
  126. */
  127. int e820__get_entry_type(u64 start, u64 end)
  128. {
  129. struct e820_entry *entry = __e820__mapped_all(start, end, 0);
  130. return entry ? entry->type : -EINVAL;
  131. }
  132. /*
  133. * Add a memory region to the kernel E820 map.
  134. */
  135. static void __init __e820__range_add(struct e820_table *table, u64 start, u64 size, enum e820_type type)
  136. {
  137. int x = table->nr_entries;
  138. if (x >= ARRAY_SIZE(table->entries)) {
  139. pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n",
  140. start, start + size - 1);
  141. return;
  142. }
  143. table->entries[x].addr = start;
  144. table->entries[x].size = size;
  145. table->entries[x].type = type;
  146. table->nr_entries++;
  147. }
  148. void __init e820__range_add(u64 start, u64 size, enum e820_type type)
  149. {
  150. __e820__range_add(e820_table, start, size, type);
  151. }
  152. static void __init e820_print_type(enum e820_type type)
  153. {
  154. switch (type) {
  155. case E820_TYPE_RAM: /* Fall through: */
  156. case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
  157. case E820_TYPE_RESERVED: pr_cont("reserved"); break;
  158. case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
  159. case E820_TYPE_NVS: pr_cont("ACPI NVS"); break;
  160. case E820_TYPE_UNUSABLE: pr_cont("unusable"); break;
  161. case E820_TYPE_PMEM: /* Fall through: */
  162. case E820_TYPE_PRAM: pr_cont("persistent (type %u)", type); break;
  163. default: pr_cont("type %u", type); break;
  164. }
  165. }
  166. void __init e820__print_table(char *who)
  167. {
  168. int i;
  169. for (i = 0; i < e820_table->nr_entries; i++) {
  170. pr_info("%s: [mem %#018Lx-%#018Lx] ",
  171. who,
  172. e820_table->entries[i].addr,
  173. e820_table->entries[i].addr + e820_table->entries[i].size - 1);
  174. e820_print_type(e820_table->entries[i].type);
  175. pr_cont("\n");
  176. }
  177. }
  178. /*
  179. * Sanitize an E820 map.
  180. *
  181. * Some E820 layouts include overlapping entries. The following
  182. * replaces the original E820 map with a new one, removing overlaps,
  183. * and resolving conflicting memory types in favor of highest
  184. * numbered type.
  185. *
  186. * The input parameter 'entries' points to an array of 'struct
  187. * e820_entry' which on entry has elements in the range [0, *nr_entries)
  188. * valid, and which has space for up to max_nr_entries entries.
  189. * On return, the resulting sanitized E820 map entries will be in
  190. * overwritten in the same location, starting at 'entries'.
  191. *
  192. * The integer pointed to by nr_entries must be valid on entry (the
  193. * current number of valid entries located at 'entries'). If the
  194. * sanitizing succeeds the *nr_entries will be updated with the new
  195. * number of valid entries (something no more than max_nr_entries).
  196. *
  197. * The return value from e820__update_table() is zero if it
  198. * successfully 'sanitized' the map entries passed in, and is -1
  199. * if it did nothing, which can happen if either of (1) it was
  200. * only passed one map entry, or (2) any of the input map entries
  201. * were invalid (start + size < start, meaning that the size was
  202. * so big the described memory range wrapped around through zero.)
  203. *
  204. * Visually we're performing the following
  205. * (1,2,3,4 = memory types)...
  206. *
  207. * Sample memory map (w/overlaps):
  208. * ____22__________________
  209. * ______________________4_
  210. * ____1111________________
  211. * _44_____________________
  212. * 11111111________________
  213. * ____________________33__
  214. * ___________44___________
  215. * __________33333_________
  216. * ______________22________
  217. * ___________________2222_
  218. * _________111111111______
  219. * _____________________11_
  220. * _________________4______
  221. *
  222. * Sanitized equivalent (no overlap):
  223. * 1_______________________
  224. * _44_____________________
  225. * ___1____________________
  226. * ____22__________________
  227. * ______11________________
  228. * _________1______________
  229. * __________3_____________
  230. * ___________44___________
  231. * _____________33_________
  232. * _______________2________
  233. * ________________1_______
  234. * _________________4______
  235. * ___________________2____
  236. * ____________________33__
  237. * ______________________4_
  238. */
  239. struct change_member {
  240. /* Pointer to the original entry: */
  241. struct e820_entry *entry;
  242. /* Address for this change point: */
  243. unsigned long long addr;
  244. };
  245. static struct change_member change_point_list[2*E820_MAX_ENTRIES] __initdata;
  246. static struct change_member *change_point[2*E820_MAX_ENTRIES] __initdata;
  247. static struct e820_entry *overlap_list[E820_MAX_ENTRIES] __initdata;
  248. static struct e820_entry new_entries[E820_MAX_ENTRIES] __initdata;
  249. static int __init cpcompare(const void *a, const void *b)
  250. {
  251. struct change_member * const *app = a, * const *bpp = b;
  252. const struct change_member *ap = *app, *bp = *bpp;
  253. /*
  254. * Inputs are pointers to two elements of change_point[]. If their
  255. * addresses are not equal, their difference dominates. If the addresses
  256. * are equal, then consider one that represents the end of its region
  257. * to be greater than one that does not.
  258. */
  259. if (ap->addr != bp->addr)
  260. return ap->addr > bp->addr ? 1 : -1;
  261. return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
  262. }
  263. int __init e820__update_table(struct e820_table *table)
  264. {
  265. struct e820_entry *entries = table->entries;
  266. u32 max_nr_entries = ARRAY_SIZE(table->entries);
  267. enum e820_type current_type, last_type;
  268. unsigned long long last_addr;
  269. u32 new_nr_entries, overlap_entries;
  270. u32 i, chg_idx, chg_nr;
  271. /* If there's only one memory region, don't bother: */
  272. if (table->nr_entries < 2)
  273. return -1;
  274. BUG_ON(table->nr_entries > max_nr_entries);
  275. /* Bail out if we find any unreasonable addresses in the map: */
  276. for (i = 0; i < table->nr_entries; i++) {
  277. if (entries[i].addr + entries[i].size < entries[i].addr)
  278. return -1;
  279. }
  280. /* Create pointers for initial change-point information (for sorting): */
  281. for (i = 0; i < 2 * table->nr_entries; i++)
  282. change_point[i] = &change_point_list[i];
  283. /*
  284. * Record all known change-points (starting and ending addresses),
  285. * omitting empty memory regions:
  286. */
  287. chg_idx = 0;
  288. for (i = 0; i < table->nr_entries; i++) {
  289. if (entries[i].size != 0) {
  290. change_point[chg_idx]->addr = entries[i].addr;
  291. change_point[chg_idx++]->entry = &entries[i];
  292. change_point[chg_idx]->addr = entries[i].addr + entries[i].size;
  293. change_point[chg_idx++]->entry = &entries[i];
  294. }
  295. }
  296. chg_nr = chg_idx;
  297. /* Sort change-point list by memory addresses (low -> high): */
  298. sort(change_point, chg_nr, sizeof(*change_point), cpcompare, NULL);
  299. /* Create a new memory map, removing overlaps: */
  300. overlap_entries = 0; /* Number of entries in the overlap table */
  301. new_nr_entries = 0; /* Index for creating new map entries */
  302. last_type = 0; /* Start with undefined memory type */
  303. last_addr = 0; /* Start with 0 as last starting address */
  304. /* Loop through change-points, determining effect on the new map: */
  305. for (chg_idx = 0; chg_idx < chg_nr; chg_idx++) {
  306. /* Keep track of all overlapping entries */
  307. if (change_point[chg_idx]->addr == change_point[chg_idx]->entry->addr) {
  308. /* Add map entry to overlap list (> 1 entry implies an overlap) */
  309. overlap_list[overlap_entries++] = change_point[chg_idx]->entry;
  310. } else {
  311. /* Remove entry from list (order independent, so swap with last): */
  312. for (i = 0; i < overlap_entries; i++) {
  313. if (overlap_list[i] == change_point[chg_idx]->entry)
  314. overlap_list[i] = overlap_list[overlap_entries-1];
  315. }
  316. overlap_entries--;
  317. }
  318. /*
  319. * If there are overlapping entries, decide which
  320. * "type" to use (larger value takes precedence --
  321. * 1=usable, 2,3,4,4+=unusable)
  322. */
  323. current_type = 0;
  324. for (i = 0; i < overlap_entries; i++) {
  325. if (overlap_list[i]->type > current_type)
  326. current_type = overlap_list[i]->type;
  327. }
  328. /* Continue building up new map based on this information: */
  329. if (current_type != last_type || current_type == E820_TYPE_PRAM) {
  330. if (last_type != 0) {
  331. new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
  332. /* Move forward only if the new size was non-zero: */
  333. if (new_entries[new_nr_entries].size != 0)
  334. /* No more space left for new entries? */
  335. if (++new_nr_entries >= max_nr_entries)
  336. break;
  337. }
  338. if (current_type != 0) {
  339. new_entries[new_nr_entries].addr = change_point[chg_idx]->addr;
  340. new_entries[new_nr_entries].type = current_type;
  341. last_addr = change_point[chg_idx]->addr;
  342. }
  343. last_type = current_type;
  344. }
  345. }
  346. /* Copy the new entries into the original location: */
  347. memcpy(entries, new_entries, new_nr_entries*sizeof(*entries));
  348. table->nr_entries = new_nr_entries;
  349. return 0;
  350. }
  351. static int __init __append_e820_table(struct boot_e820_entry *entries, u32 nr_entries)
  352. {
  353. struct boot_e820_entry *entry = entries;
  354. while (nr_entries) {
  355. u64 start = entry->addr;
  356. u64 size = entry->size;
  357. u64 end = start + size - 1;
  358. u32 type = entry->type;
  359. /* Ignore the entry on 64-bit overflow: */
  360. if (start > end && likely(size))
  361. return -1;
  362. e820__range_add(start, size, type);
  363. entry++;
  364. nr_entries--;
  365. }
  366. return 0;
  367. }
  368. /*
  369. * Copy the BIOS E820 map into a safe place.
  370. *
  371. * Sanity-check it while we're at it..
  372. *
  373. * If we're lucky and live on a modern system, the setup code
  374. * will have given us a memory map that we can use to properly
  375. * set up memory. If we aren't, we'll fake a memory map.
  376. */
  377. static int __init append_e820_table(struct boot_e820_entry *entries, u32 nr_entries)
  378. {
  379. /* Only one memory region (or negative)? Ignore it */
  380. if (nr_entries < 2)
  381. return -1;
  382. return __append_e820_table(entries, nr_entries);
  383. }
  384. static u64 __init
  385. __e820__range_update(struct e820_table *table, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  386. {
  387. u64 end;
  388. unsigned int i;
  389. u64 real_updated_size = 0;
  390. BUG_ON(old_type == new_type);
  391. if (size > (ULLONG_MAX - start))
  392. size = ULLONG_MAX - start;
  393. end = start + size;
  394. printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", start, end - 1);
  395. e820_print_type(old_type);
  396. pr_cont(" ==> ");
  397. e820_print_type(new_type);
  398. pr_cont("\n");
  399. for (i = 0; i < table->nr_entries; i++) {
  400. struct e820_entry *entry = &table->entries[i];
  401. u64 final_start, final_end;
  402. u64 entry_end;
  403. if (entry->type != old_type)
  404. continue;
  405. entry_end = entry->addr + entry->size;
  406. /* Completely covered by new range? */
  407. if (entry->addr >= start && entry_end <= end) {
  408. entry->type = new_type;
  409. real_updated_size += entry->size;
  410. continue;
  411. }
  412. /* New range is completely covered? */
  413. if (entry->addr < start && entry_end > end) {
  414. __e820__range_add(table, start, size, new_type);
  415. __e820__range_add(table, end, entry_end - end, entry->type);
  416. entry->size = start - entry->addr;
  417. real_updated_size += size;
  418. continue;
  419. }
  420. /* Partially covered: */
  421. final_start = max(start, entry->addr);
  422. final_end = min(end, entry_end);
  423. if (final_start >= final_end)
  424. continue;
  425. __e820__range_add(table, final_start, final_end - final_start, new_type);
  426. real_updated_size += final_end - final_start;
  427. /*
  428. * Left range could be head or tail, so need to update
  429. * its size first:
  430. */
  431. entry->size -= final_end - final_start;
  432. if (entry->addr < final_start)
  433. continue;
  434. entry->addr = final_end;
  435. }
  436. return real_updated_size;
  437. }
  438. u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  439. {
  440. return __e820__range_update(e820_table, start, size, old_type, new_type);
  441. }
  442. static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  443. {
  444. return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
  445. }
  446. /* Remove a range of memory from the E820 table: */
  447. u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type)
  448. {
  449. int i;
  450. u64 end;
  451. u64 real_removed_size = 0;
  452. if (size > (ULLONG_MAX - start))
  453. size = ULLONG_MAX - start;
  454. end = start + size;
  455. printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", start, end - 1);
  456. if (check_type)
  457. e820_print_type(old_type);
  458. pr_cont("\n");
  459. for (i = 0; i < e820_table->nr_entries; i++) {
  460. struct e820_entry *entry = &e820_table->entries[i];
  461. u64 final_start, final_end;
  462. u64 entry_end;
  463. if (check_type && entry->type != old_type)
  464. continue;
  465. entry_end = entry->addr + entry->size;
  466. /* Completely covered? */
  467. if (entry->addr >= start && entry_end <= end) {
  468. real_removed_size += entry->size;
  469. memset(entry, 0, sizeof(*entry));
  470. continue;
  471. }
  472. /* Is the new range completely covered? */
  473. if (entry->addr < start && entry_end > end) {
  474. e820__range_add(end, entry_end - end, entry->type);
  475. entry->size = start - entry->addr;
  476. real_removed_size += size;
  477. continue;
  478. }
  479. /* Partially covered: */
  480. final_start = max(start, entry->addr);
  481. final_end = min(end, entry_end);
  482. if (final_start >= final_end)
  483. continue;
  484. real_removed_size += final_end - final_start;
  485. /*
  486. * Left range could be head or tail, so need to update
  487. * the size first:
  488. */
  489. entry->size -= final_end - final_start;
  490. if (entry->addr < final_start)
  491. continue;
  492. entry->addr = final_end;
  493. }
  494. return real_removed_size;
  495. }
  496. void __init e820__update_table_print(void)
  497. {
  498. if (e820__update_table(e820_table))
  499. return;
  500. pr_info("modified physical RAM map:\n");
  501. e820__print_table("modified");
  502. }
  503. static void __init e820__update_table_kexec(void)
  504. {
  505. e820__update_table(e820_table_kexec);
  506. }
  507. #define MAX_GAP_END 0x100000000ull
  508. /*
  509. * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB).
  510. */
  511. static int __init e820_search_gap(unsigned long *gapstart, unsigned long *gapsize)
  512. {
  513. unsigned long long last = MAX_GAP_END;
  514. int i = e820_table->nr_entries;
  515. int found = 0;
  516. while (--i >= 0) {
  517. unsigned long long start = e820_table->entries[i].addr;
  518. unsigned long long end = start + e820_table->entries[i].size;
  519. /*
  520. * Since "last" is at most 4GB, we know we'll
  521. * fit in 32 bits if this condition is true:
  522. */
  523. if (last > end) {
  524. unsigned long gap = last - end;
  525. if (gap >= *gapsize) {
  526. *gapsize = gap;
  527. *gapstart = end;
  528. found = 1;
  529. }
  530. }
  531. if (start < last)
  532. last = start;
  533. }
  534. return found;
  535. }
  536. /*
  537. * Search for the biggest gap in the low 32 bits of the E820
  538. * memory space. We pass this space to the PCI subsystem, so
  539. * that it can assign MMIO resources for hotplug or
  540. * unconfigured devices in.
  541. *
  542. * Hopefully the BIOS let enough space left.
  543. */
  544. __init void e820__setup_pci_gap(void)
  545. {
  546. unsigned long gapstart, gapsize;
  547. int found;
  548. gapsize = 0x400000;
  549. found = e820_search_gap(&gapstart, &gapsize);
  550. if (!found) {
  551. #ifdef CONFIG_X86_64
  552. gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
  553. pr_err("Cannot find an available gap in the 32-bit address range\n");
  554. pr_err("PCI devices with unassigned 32-bit BARs may not work!\n");
  555. #else
  556. gapstart = 0x10000000;
  557. #endif
  558. }
  559. /*
  560. * e820__reserve_resources_late() protects stolen RAM already:
  561. */
  562. pci_mem_start = gapstart;
  563. pr_info("[mem %#010lx-%#010lx] available for PCI devices\n",
  564. gapstart, gapstart + gapsize - 1);
  565. }
  566. /*
  567. * Called late during init, in free_initmem().
  568. *
  569. * Initial e820_table and e820_table_kexec are largish __initdata arrays.
  570. *
  571. * Copy them to a (usually much smaller) dynamically allocated area that is
  572. * sized precisely after the number of e820 entries.
  573. *
  574. * This is done after we've performed all the fixes and tweaks to the tables.
  575. * All functions which modify them are __init functions, which won't exist
  576. * after free_initmem().
  577. */
  578. __init void e820__reallocate_tables(void)
  579. {
  580. struct e820_table *n;
  581. int size;
  582. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries;
  583. n = kmalloc(size, GFP_KERNEL);
  584. BUG_ON(!n);
  585. memcpy(n, e820_table, size);
  586. e820_table = n;
  587. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries;
  588. n = kmalloc(size, GFP_KERNEL);
  589. BUG_ON(!n);
  590. memcpy(n, e820_table_kexec, size);
  591. e820_table_kexec = n;
  592. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries;
  593. n = kmalloc(size, GFP_KERNEL);
  594. BUG_ON(!n);
  595. memcpy(n, e820_table_firmware, size);
  596. e820_table_firmware = n;
  597. }
  598. /*
  599. * Because of the small fixed size of struct boot_params, only the first
  600. * 128 E820 memory entries are passed to the kernel via boot_params.e820_table,
  601. * the remaining (if any) entries are passed via the SETUP_E820_EXT node of
  602. * struct setup_data, which is parsed here.
  603. */
  604. void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
  605. {
  606. int entries;
  607. struct boot_e820_entry *extmap;
  608. struct setup_data *sdata;
  609. sdata = early_memremap(phys_addr, data_len);
  610. entries = sdata->len / sizeof(*extmap);
  611. extmap = (struct boot_e820_entry *)(sdata->data);
  612. __append_e820_table(extmap, entries);
  613. e820__update_table(e820_table);
  614. memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
  615. memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
  616. early_memunmap(sdata, data_len);
  617. pr_info("extended physical RAM map:\n");
  618. e820__print_table("extended");
  619. }
  620. /*
  621. * Find the ranges of physical addresses that do not correspond to
  622. * E820 RAM areas and register the corresponding pages as 'nosave' for
  623. * hibernation (32-bit) or software suspend and suspend to RAM (64-bit).
  624. *
  625. * This function requires the E820 map to be sorted and without any
  626. * overlapping entries.
  627. */
  628. void __init e820__register_nosave_regions(unsigned long limit_pfn)
  629. {
  630. int i;
  631. unsigned long pfn = 0;
  632. for (i = 0; i < e820_table->nr_entries; i++) {
  633. struct e820_entry *entry = &e820_table->entries[i];
  634. if (pfn < PFN_UP(entry->addr))
  635. register_nosave_region(pfn, PFN_UP(entry->addr));
  636. pfn = PFN_DOWN(entry->addr + entry->size);
  637. if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
  638. register_nosave_region(PFN_UP(entry->addr), pfn);
  639. if (pfn >= limit_pfn)
  640. break;
  641. }
  642. }
  643. #ifdef CONFIG_ACPI
  644. /*
  645. * Register ACPI NVS memory regions, so that we can save/restore them during
  646. * hibernation and the subsequent resume:
  647. */
  648. static int __init e820__register_nvs_regions(void)
  649. {
  650. int i;
  651. for (i = 0; i < e820_table->nr_entries; i++) {
  652. struct e820_entry *entry = &e820_table->entries[i];
  653. if (entry->type == E820_TYPE_NVS)
  654. acpi_nvs_register(entry->addr, entry->size);
  655. }
  656. return 0;
  657. }
  658. core_initcall(e820__register_nvs_regions);
  659. #endif
  660. /*
  661. * Allocate the requested number of bytes with the requsted alignment
  662. * and return (the physical address) to the caller. Also register this
  663. * range in the 'kexec' E820 table as a reserved range.
  664. *
  665. * This allows kexec to fake a new mptable, as if it came from the real
  666. * system.
  667. */
  668. u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
  669. {
  670. u64 addr;
  671. addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
  672. if (addr) {
  673. e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
  674. pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
  675. e820__update_table_kexec();
  676. }
  677. return addr;
  678. }
  679. #ifdef CONFIG_X86_32
  680. # ifdef CONFIG_X86_PAE
  681. # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
  682. # else
  683. # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
  684. # endif
  685. #else /* CONFIG_X86_32 */
  686. # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
  687. #endif
  688. /*
  689. * Find the highest page frame number we have available
  690. */
  691. static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type)
  692. {
  693. int i;
  694. unsigned long last_pfn = 0;
  695. unsigned long max_arch_pfn = MAX_ARCH_PFN;
  696. for (i = 0; i < e820_table->nr_entries; i++) {
  697. struct e820_entry *entry = &e820_table->entries[i];
  698. unsigned long start_pfn;
  699. unsigned long end_pfn;
  700. if (entry->type != type)
  701. continue;
  702. start_pfn = entry->addr >> PAGE_SHIFT;
  703. end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT;
  704. if (start_pfn >= limit_pfn)
  705. continue;
  706. if (end_pfn > limit_pfn) {
  707. last_pfn = limit_pfn;
  708. break;
  709. }
  710. if (end_pfn > last_pfn)
  711. last_pfn = end_pfn;
  712. }
  713. if (last_pfn > max_arch_pfn)
  714. last_pfn = max_arch_pfn;
  715. pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n",
  716. last_pfn, max_arch_pfn);
  717. return last_pfn;
  718. }
  719. unsigned long __init e820__end_of_ram_pfn(void)
  720. {
  721. return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM);
  722. }
  723. unsigned long __init e820__end_of_low_ram_pfn(void)
  724. {
  725. return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM);
  726. }
  727. static void __init early_panic(char *msg)
  728. {
  729. early_printk(msg);
  730. panic(msg);
  731. }
  732. static int userdef __initdata;
  733. /* The "mem=nopentium" boot option disables 4MB page tables on 32-bit kernels: */
  734. static int __init parse_memopt(char *p)
  735. {
  736. u64 mem_size;
  737. if (!p)
  738. return -EINVAL;
  739. if (!strcmp(p, "nopentium")) {
  740. #ifdef CONFIG_X86_32
  741. setup_clear_cpu_cap(X86_FEATURE_PSE);
  742. return 0;
  743. #else
  744. pr_warn("mem=nopentium ignored! (only supported on x86_32)\n");
  745. return -EINVAL;
  746. #endif
  747. }
  748. userdef = 1;
  749. mem_size = memparse(p, &p);
  750. /* Don't remove all memory when getting "mem={invalid}" parameter: */
  751. if (mem_size == 0)
  752. return -EINVAL;
  753. e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
  754. return 0;
  755. }
  756. early_param("mem", parse_memopt);
  757. static int __init parse_memmap_one(char *p)
  758. {
  759. char *oldp;
  760. u64 start_at, mem_size;
  761. if (!p)
  762. return -EINVAL;
  763. if (!strncmp(p, "exactmap", 8)) {
  764. #ifdef CONFIG_CRASH_DUMP
  765. /*
  766. * If we are doing a crash dump, we still need to know
  767. * the real memory size before the original memory map is
  768. * reset.
  769. */
  770. saved_max_pfn = e820__end_of_ram_pfn();
  771. #endif
  772. e820_table->nr_entries = 0;
  773. userdef = 1;
  774. return 0;
  775. }
  776. oldp = p;
  777. mem_size = memparse(p, &p);
  778. if (p == oldp)
  779. return -EINVAL;
  780. userdef = 1;
  781. if (*p == '@') {
  782. start_at = memparse(p+1, &p);
  783. e820__range_add(start_at, mem_size, E820_TYPE_RAM);
  784. } else if (*p == '#') {
  785. start_at = memparse(p+1, &p);
  786. e820__range_add(start_at, mem_size, E820_TYPE_ACPI);
  787. } else if (*p == '$') {
  788. start_at = memparse(p+1, &p);
  789. e820__range_add(start_at, mem_size, E820_TYPE_RESERVED);
  790. } else if (*p == '!') {
  791. start_at = memparse(p+1, &p);
  792. e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
  793. } else if (*p == '%') {
  794. enum e820_type from = 0, to = 0;
  795. start_at = memparse(p + 1, &p);
  796. if (*p == '-')
  797. from = simple_strtoull(p + 1, &p, 0);
  798. if (*p == '+')
  799. to = simple_strtoull(p + 1, &p, 0);
  800. if (*p != '\0')
  801. return -EINVAL;
  802. if (from && to)
  803. e820__range_update(start_at, mem_size, from, to);
  804. else if (to)
  805. e820__range_add(start_at, mem_size, to);
  806. else if (from)
  807. e820__range_remove(start_at, mem_size, from, 1);
  808. else
  809. e820__range_remove(start_at, mem_size, 0, 0);
  810. } else {
  811. e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
  812. }
  813. return *p == '\0' ? 0 : -EINVAL;
  814. }
  815. static int __init parse_memmap_opt(char *str)
  816. {
  817. while (str) {
  818. char *k = strchr(str, ',');
  819. if (k)
  820. *k++ = 0;
  821. parse_memmap_one(str);
  822. str = k;
  823. }
  824. return 0;
  825. }
  826. early_param("memmap", parse_memmap_opt);
  827. /*
  828. * Reserve all entries from the bootloader's extensible data nodes list,
  829. * because if present we are going to use it later on to fetch e820
  830. * entries from it:
  831. */
  832. void __init e820__reserve_setup_data(void)
  833. {
  834. struct setup_data *data;
  835. u64 pa_data;
  836. pa_data = boot_params.hdr.setup_data;
  837. if (!pa_data)
  838. return;
  839. while (pa_data) {
  840. data = early_memremap(pa_data, sizeof(*data));
  841. e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
  842. e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
  843. pa_data = data->next;
  844. early_memunmap(data, sizeof(*data));
  845. }
  846. e820__update_table(e820_table);
  847. e820__update_table(e820_table_kexec);
  848. pr_info("extended physical RAM map:\n");
  849. e820__print_table("reserve setup_data");
  850. }
  851. /*
  852. * Called after parse_early_param(), after early parameters (such as mem=)
  853. * have been processed, in which case we already have an E820 table filled in
  854. * via the parameter callback function(s), but it's not sorted and printed yet:
  855. */
  856. void __init e820__finish_early_params(void)
  857. {
  858. if (userdef) {
  859. if (e820__update_table(e820_table) < 0)
  860. early_panic("Invalid user supplied memory map");
  861. pr_info("user-defined physical RAM map:\n");
  862. e820__print_table("user");
  863. }
  864. }
  865. static const char *__init e820_type_to_string(struct e820_entry *entry)
  866. {
  867. switch (entry->type) {
  868. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  869. case E820_TYPE_RAM: return "System RAM";
  870. case E820_TYPE_ACPI: return "ACPI Tables";
  871. case E820_TYPE_NVS: return "ACPI Non-volatile Storage";
  872. case E820_TYPE_UNUSABLE: return "Unusable memory";
  873. case E820_TYPE_PRAM: return "Persistent Memory (legacy)";
  874. case E820_TYPE_PMEM: return "Persistent Memory";
  875. case E820_TYPE_RESERVED: return "Reserved";
  876. default: return "Unknown E820 type";
  877. }
  878. }
  879. static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
  880. {
  881. switch (entry->type) {
  882. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  883. case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM;
  884. case E820_TYPE_ACPI: /* Fall-through: */
  885. case E820_TYPE_NVS: /* Fall-through: */
  886. case E820_TYPE_UNUSABLE: /* Fall-through: */
  887. case E820_TYPE_PRAM: /* Fall-through: */
  888. case E820_TYPE_PMEM: /* Fall-through: */
  889. case E820_TYPE_RESERVED: /* Fall-through: */
  890. default: return IORESOURCE_MEM;
  891. }
  892. }
  893. static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
  894. {
  895. switch (entry->type) {
  896. case E820_TYPE_ACPI: return IORES_DESC_ACPI_TABLES;
  897. case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
  898. case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
  899. case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
  900. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  901. case E820_TYPE_RAM: /* Fall-through: */
  902. case E820_TYPE_UNUSABLE: /* Fall-through: */
  903. case E820_TYPE_RESERVED: /* Fall-through: */
  904. default: return IORES_DESC_NONE;
  905. }
  906. }
  907. static bool __init do_mark_busy(enum e820_type type, struct resource *res)
  908. {
  909. /* this is the legacy bios/dos rom-shadow + mmio region */
  910. if (res->start < (1ULL<<20))
  911. return true;
  912. /*
  913. * Treat persistent memory like device memory, i.e. reserve it
  914. * for exclusive use of a driver
  915. */
  916. switch (type) {
  917. case E820_TYPE_RESERVED:
  918. case E820_TYPE_PRAM:
  919. case E820_TYPE_PMEM:
  920. return false;
  921. case E820_TYPE_RESERVED_KERN:
  922. case E820_TYPE_RAM:
  923. case E820_TYPE_ACPI:
  924. case E820_TYPE_NVS:
  925. case E820_TYPE_UNUSABLE:
  926. default:
  927. return true;
  928. }
  929. }
  930. /*
  931. * Mark E820 reserved areas as busy for the resource manager:
  932. */
  933. static struct resource __initdata *e820_res;
  934. void __init e820__reserve_resources(void)
  935. {
  936. int i;
  937. struct resource *res;
  938. u64 end;
  939. res = alloc_bootmem(sizeof(*res) * e820_table->nr_entries);
  940. e820_res = res;
  941. for (i = 0; i < e820_table->nr_entries; i++) {
  942. struct e820_entry *entry = e820_table->entries + i;
  943. end = entry->addr + entry->size - 1;
  944. if (end != (resource_size_t)end) {
  945. res++;
  946. continue;
  947. }
  948. res->start = entry->addr;
  949. res->end = end;
  950. res->name = e820_type_to_string(entry);
  951. res->flags = e820_type_to_iomem_type(entry);
  952. res->desc = e820_type_to_iores_desc(entry);
  953. /*
  954. * Don't register the region that could be conflicted with
  955. * PCI device BAR resources and insert them later in
  956. * pcibios_resource_survey():
  957. */
  958. if (do_mark_busy(entry->type, res)) {
  959. res->flags |= IORESOURCE_BUSY;
  960. insert_resource(&iomem_resource, res);
  961. }
  962. res++;
  963. }
  964. /* Expose the bootloader-provided memory layout to the sysfs. */
  965. for (i = 0; i < e820_table_firmware->nr_entries; i++) {
  966. struct e820_entry *entry = e820_table_firmware->entries + i;
  967. firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry));
  968. }
  969. }
  970. /*
  971. * How much should we pad the end of RAM, depending on where it is?
  972. */
  973. static unsigned long __init ram_alignment(resource_size_t pos)
  974. {
  975. unsigned long mb = pos >> 20;
  976. /* To 64kB in the first megabyte */
  977. if (!mb)
  978. return 64*1024;
  979. /* To 1MB in the first 16MB */
  980. if (mb < 16)
  981. return 1024*1024;
  982. /* To 64MB for anything above that */
  983. return 64*1024*1024;
  984. }
  985. #define MAX_RESOURCE_SIZE ((resource_size_t)-1)
  986. void __init e820__reserve_resources_late(void)
  987. {
  988. int i;
  989. struct resource *res;
  990. res = e820_res;
  991. for (i = 0; i < e820_table->nr_entries; i++) {
  992. if (!res->parent && res->end)
  993. insert_resource_expand_to_fit(&iomem_resource, res);
  994. res++;
  995. }
  996. /*
  997. * Try to bump up RAM regions to reasonable boundaries, to
  998. * avoid stolen RAM:
  999. */
  1000. for (i = 0; i < e820_table->nr_entries; i++) {
  1001. struct e820_entry *entry = &e820_table->entries[i];
  1002. u64 start, end;
  1003. if (entry->type != E820_TYPE_RAM)
  1004. continue;
  1005. start = entry->addr + entry->size;
  1006. end = round_up(start, ram_alignment(start)) - 1;
  1007. if (end > MAX_RESOURCE_SIZE)
  1008. end = MAX_RESOURCE_SIZE;
  1009. if (start >= end)
  1010. continue;
  1011. printk(KERN_DEBUG "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", start, end);
  1012. reserve_region_with_split(&iomem_resource, start, end, "RAM buffer");
  1013. }
  1014. }
  1015. /*
  1016. * Pass the firmware (bootloader) E820 map to the kernel and process it:
  1017. */
  1018. char *__init e820__memory_setup_default(void)
  1019. {
  1020. char *who = "BIOS-e820";
  1021. /*
  1022. * Try to copy the BIOS-supplied E820-map.
  1023. *
  1024. * Otherwise fake a memory map; one section from 0k->640k,
  1025. * the next section from 1mb->appropriate_mem_k
  1026. */
  1027. if (append_e820_table(boot_params.e820_table, boot_params.e820_entries) < 0) {
  1028. u64 mem_size;
  1029. /* Compare results from other methods and take the one that gives more RAM: */
  1030. if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
  1031. mem_size = boot_params.screen_info.ext_mem_k;
  1032. who = "BIOS-88";
  1033. } else {
  1034. mem_size = boot_params.alt_mem_k;
  1035. who = "BIOS-e801";
  1036. }
  1037. e820_table->nr_entries = 0;
  1038. e820__range_add(0, LOWMEMSIZE(), E820_TYPE_RAM);
  1039. e820__range_add(HIGH_MEMORY, mem_size << 10, E820_TYPE_RAM);
  1040. }
  1041. /* We just appended a lot of ranges, sanitize the table: */
  1042. e820__update_table(e820_table);
  1043. return who;
  1044. }
  1045. /*
  1046. * Calls e820__memory_setup_default() in essence to pick up the firmware/bootloader
  1047. * E820 map - with an optional platform quirk available for virtual platforms
  1048. * to override this method of boot environment processing:
  1049. */
  1050. void __init e820__memory_setup(void)
  1051. {
  1052. char *who;
  1053. /* This is a firmware interface ABI - make sure we don't break it: */
  1054. BUILD_BUG_ON(sizeof(struct boot_e820_entry) != 20);
  1055. who = x86_init.resources.memory_setup();
  1056. memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
  1057. memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
  1058. pr_info("BIOS-provided physical RAM map:\n");
  1059. e820__print_table(who);
  1060. }
  1061. void __init e820__memblock_setup(void)
  1062. {
  1063. int i;
  1064. u64 end;
  1065. /*
  1066. * The bootstrap memblock region count maximum is 128 entries
  1067. * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
  1068. * than that - so allow memblock resizing.
  1069. *
  1070. * This is safe, because this call happens pretty late during x86 setup,
  1071. * so we know about reserved memory regions already. (This is important
  1072. * so that memblock resizing does no stomp over reserved areas.)
  1073. */
  1074. memblock_allow_resize();
  1075. for (i = 0; i < e820_table->nr_entries; i++) {
  1076. struct e820_entry *entry = &e820_table->entries[i];
  1077. end = entry->addr + entry->size;
  1078. if (end != (resource_size_t)end)
  1079. continue;
  1080. if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
  1081. continue;
  1082. memblock_add(entry->addr, entry->size);
  1083. }
  1084. /* Throw away partial pages: */
  1085. memblock_trim_memory(PAGE_SIZE);
  1086. memblock_dump_all();
  1087. }