bus.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
  3. #include <linux/memremap.h>
  4. #include <linux/device.h>
  5. #include <linux/mutex.h>
  6. #include <linux/list.h>
  7. #include <linux/slab.h>
  8. #include <linux/dax.h>
  9. #include <linux/io.h>
  10. #include "dax-private.h"
  11. #include "bus.h"
  12. static DEFINE_MUTEX(dax_bus_lock);
  13. /*
  14. * All changes to the dax region configuration occur with this lock held
  15. * for write.
  16. */
  17. DECLARE_RWSEM(dax_region_rwsem);
  18. /*
  19. * All changes to the dax device configuration occur with this lock held
  20. * for write.
  21. */
  22. DECLARE_RWSEM(dax_dev_rwsem);
  23. #define DAX_NAME_LEN 30
  24. struct dax_id {
  25. struct list_head list;
  26. char dev_name[DAX_NAME_LEN];
  27. };
  28. static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
  29. {
  30. /*
  31. * We only ever expect to handle device-dax instances, i.e. the
  32. * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
  33. */
  34. return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
  35. }
  36. #define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv)
  37. static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv,
  38. const char *dev_name)
  39. {
  40. struct dax_id *dax_id;
  41. lockdep_assert_held(&dax_bus_lock);
  42. list_for_each_entry(dax_id, &dax_drv->ids, list)
  43. if (sysfs_streq(dax_id->dev_name, dev_name))
  44. return dax_id;
  45. return NULL;
  46. }
  47. static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev)
  48. {
  49. int match;
  50. mutex_lock(&dax_bus_lock);
  51. match = !!__dax_match_id(dax_drv, dev_name(dev));
  52. mutex_unlock(&dax_bus_lock);
  53. return match;
  54. }
  55. static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev)
  56. {
  57. enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
  58. struct dev_dax *dev_dax = to_dev_dax(dev);
  59. if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
  60. type = DAXDRV_KMEM_TYPE;
  61. if (dax_drv->type == type)
  62. return 1;
  63. /* default to device mode if dax_kmem is disabled */
  64. if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
  65. !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
  66. return 1;
  67. return 0;
  68. }
  69. enum id_action {
  70. ID_REMOVE,
  71. ID_ADD,
  72. };
  73. static ssize_t do_id_store(struct device_driver *drv, const char *buf,
  74. size_t count, enum id_action action)
  75. {
  76. struct dax_device_driver *dax_drv = to_dax_drv(drv);
  77. unsigned int region_id, id;
  78. char devname[DAX_NAME_LEN];
  79. struct dax_id *dax_id;
  80. ssize_t rc = count;
  81. int fields;
  82. fields = sscanf(buf, "dax%d.%d", &region_id, &id);
  83. if (fields != 2)
  84. return -EINVAL;
  85. sprintf(devname, "dax%d.%d", region_id, id);
  86. if (!sysfs_streq(buf, devname))
  87. return -EINVAL;
  88. mutex_lock(&dax_bus_lock);
  89. dax_id = __dax_match_id(dax_drv, buf);
  90. if (!dax_id) {
  91. if (action == ID_ADD) {
  92. dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
  93. if (dax_id) {
  94. strscpy(dax_id->dev_name, buf, DAX_NAME_LEN);
  95. list_add(&dax_id->list, &dax_drv->ids);
  96. } else
  97. rc = -ENOMEM;
  98. }
  99. } else if (action == ID_REMOVE) {
  100. list_del(&dax_id->list);
  101. kfree(dax_id);
  102. }
  103. mutex_unlock(&dax_bus_lock);
  104. if (rc < 0)
  105. return rc;
  106. if (action == ID_ADD)
  107. rc = driver_attach(drv);
  108. if (rc)
  109. return rc;
  110. return count;
  111. }
  112. static ssize_t new_id_store(struct device_driver *drv, const char *buf,
  113. size_t count)
  114. {
  115. return do_id_store(drv, buf, count, ID_ADD);
  116. }
  117. static DRIVER_ATTR_WO(new_id);
  118. static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
  119. size_t count)
  120. {
  121. return do_id_store(drv, buf, count, ID_REMOVE);
  122. }
  123. static DRIVER_ATTR_WO(remove_id);
  124. static struct attribute *dax_drv_attrs[] = {
  125. &driver_attr_new_id.attr,
  126. &driver_attr_remove_id.attr,
  127. NULL,
  128. };
  129. ATTRIBUTE_GROUPS(dax_drv);
  130. static int dax_bus_match(struct device *dev, const struct device_driver *drv);
  131. /*
  132. * Static dax regions are regions created by an external subsystem
  133. * nvdimm where a single range is assigned. Its boundaries are by the external
  134. * subsystem and are usually limited to one physical memory range. For example,
  135. * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
  136. * single contiguous range)
  137. *
  138. * On dynamic dax regions, the assigned region can be partitioned by dax core
  139. * into multiple subdivisions. A subdivision is represented into one
  140. * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
  141. *
  142. * When allocating a dax region, drivers must set whether it's static
  143. * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned
  144. * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
  145. * devices it is NULL but afterwards allocated by dax core on device ->probe().
  146. * Care is needed to make sure that dynamic dax devices are torn down with a
  147. * cleared @pgmap field (see kill_dev_dax()).
  148. */
  149. static bool is_static(struct dax_region *dax_region)
  150. {
  151. return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
  152. }
  153. bool static_dev_dax(struct dev_dax *dev_dax)
  154. {
  155. return is_static(dev_dax->region);
  156. }
  157. EXPORT_SYMBOL_GPL(static_dev_dax);
  158. static u64 dev_dax_size(struct dev_dax *dev_dax)
  159. {
  160. u64 size = 0;
  161. int i;
  162. lockdep_assert_held(&dax_dev_rwsem);
  163. for (i = 0; i < dev_dax->nr_range; i++)
  164. size += range_len(&dev_dax->ranges[i].range);
  165. return size;
  166. }
  167. static int dax_bus_probe(struct device *dev)
  168. {
  169. struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
  170. struct dev_dax *dev_dax = to_dev_dax(dev);
  171. struct dax_region *dax_region = dev_dax->region;
  172. int rc;
  173. u64 size;
  174. rc = down_read_interruptible(&dax_dev_rwsem);
  175. if (rc)
  176. return rc;
  177. size = dev_dax_size(dev_dax);
  178. up_read(&dax_dev_rwsem);
  179. if (size == 0 || dev_dax->id < 0)
  180. return -ENXIO;
  181. rc = dax_drv->probe(dev_dax);
  182. if (rc || is_static(dax_region))
  183. return rc;
  184. /*
  185. * Track new seed creation only after successful probe of the
  186. * previous seed.
  187. */
  188. if (dax_region->seed == dev)
  189. dax_region->seed = NULL;
  190. return 0;
  191. }
  192. static void dax_bus_remove(struct device *dev)
  193. {
  194. struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
  195. struct dev_dax *dev_dax = to_dev_dax(dev);
  196. if (dax_drv->remove)
  197. dax_drv->remove(dev_dax);
  198. }
  199. static const struct bus_type dax_bus_type = {
  200. .name = "dax",
  201. .uevent = dax_bus_uevent,
  202. .match = dax_bus_match,
  203. .probe = dax_bus_probe,
  204. .remove = dax_bus_remove,
  205. .drv_groups = dax_drv_groups,
  206. };
  207. static int dax_bus_match(struct device *dev, const struct device_driver *drv)
  208. {
  209. const struct dax_device_driver *dax_drv = to_dax_drv(drv);
  210. if (dax_match_id(dax_drv, dev))
  211. return 1;
  212. return dax_match_type(dax_drv, dev);
  213. }
  214. /*
  215. * Rely on the fact that drvdata is set before the attributes are
  216. * registered, and that the attributes are unregistered before drvdata
  217. * is cleared to assume that drvdata is always valid.
  218. */
  219. static ssize_t id_show(struct device *dev,
  220. struct device_attribute *attr, char *buf)
  221. {
  222. struct dax_region *dax_region = dev_get_drvdata(dev);
  223. return sysfs_emit(buf, "%d\n", dax_region->id);
  224. }
  225. static DEVICE_ATTR_RO(id);
  226. static ssize_t region_size_show(struct device *dev,
  227. struct device_attribute *attr, char *buf)
  228. {
  229. struct dax_region *dax_region = dev_get_drvdata(dev);
  230. return sysfs_emit(buf, "%llu\n",
  231. (unsigned long long)resource_size(&dax_region->res));
  232. }
  233. static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
  234. region_size_show, NULL);
  235. static ssize_t region_align_show(struct device *dev,
  236. struct device_attribute *attr, char *buf)
  237. {
  238. struct dax_region *dax_region = dev_get_drvdata(dev);
  239. return sysfs_emit(buf, "%u\n", dax_region->align);
  240. }
  241. static struct device_attribute dev_attr_region_align =
  242. __ATTR(align, 0400, region_align_show, NULL);
  243. #define for_each_dax_region_resource(dax_region, res) \
  244. for (res = (dax_region)->res.child; res; res = res->sibling)
  245. static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
  246. {
  247. resource_size_t size = resource_size(&dax_region->res);
  248. struct resource *res;
  249. lockdep_assert_held(&dax_region_rwsem);
  250. for_each_dax_region_resource(dax_region, res)
  251. size -= resource_size(res);
  252. return size;
  253. }
  254. static ssize_t available_size_show(struct device *dev,
  255. struct device_attribute *attr, char *buf)
  256. {
  257. struct dax_region *dax_region = dev_get_drvdata(dev);
  258. unsigned long long size;
  259. int rc;
  260. rc = down_read_interruptible(&dax_region_rwsem);
  261. if (rc)
  262. return rc;
  263. size = dax_region_avail_size(dax_region);
  264. up_read(&dax_region_rwsem);
  265. return sysfs_emit(buf, "%llu\n", size);
  266. }
  267. static DEVICE_ATTR_RO(available_size);
  268. static ssize_t seed_show(struct device *dev,
  269. struct device_attribute *attr, char *buf)
  270. {
  271. struct dax_region *dax_region = dev_get_drvdata(dev);
  272. struct device *seed;
  273. ssize_t rc;
  274. if (is_static(dax_region))
  275. return -EINVAL;
  276. rc = down_read_interruptible(&dax_region_rwsem);
  277. if (rc)
  278. return rc;
  279. seed = dax_region->seed;
  280. rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
  281. up_read(&dax_region_rwsem);
  282. return rc;
  283. }
  284. static DEVICE_ATTR_RO(seed);
  285. static ssize_t create_show(struct device *dev,
  286. struct device_attribute *attr, char *buf)
  287. {
  288. struct dax_region *dax_region = dev_get_drvdata(dev);
  289. struct device *youngest;
  290. ssize_t rc;
  291. if (is_static(dax_region))
  292. return -EINVAL;
  293. rc = down_read_interruptible(&dax_region_rwsem);
  294. if (rc)
  295. return rc;
  296. youngest = dax_region->youngest;
  297. rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
  298. up_read(&dax_region_rwsem);
  299. return rc;
  300. }
  301. static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
  302. static ssize_t create_store(struct device *dev, struct device_attribute *attr,
  303. const char *buf, size_t len)
  304. {
  305. struct dax_region *dax_region = dev_get_drvdata(dev);
  306. unsigned long long avail;
  307. ssize_t rc;
  308. int val;
  309. if (is_static(dax_region))
  310. return -EINVAL;
  311. rc = kstrtoint(buf, 0, &val);
  312. if (rc)
  313. return rc;
  314. if (val != 1)
  315. return -EINVAL;
  316. rc = down_write_killable(&dax_region_rwsem);
  317. if (rc)
  318. return rc;
  319. avail = dax_region_avail_size(dax_region);
  320. if (avail == 0)
  321. rc = -ENOSPC;
  322. else {
  323. struct dev_dax_data data = {
  324. .dax_region = dax_region,
  325. .size = 0,
  326. .id = -1,
  327. .memmap_on_memory = false,
  328. };
  329. struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
  330. if (IS_ERR(dev_dax))
  331. rc = PTR_ERR(dev_dax);
  332. else {
  333. /*
  334. * In support of crafting multiple new devices
  335. * simultaneously multiple seeds can be created,
  336. * but only the first one that has not been
  337. * successfully bound is tracked as the region
  338. * seed.
  339. */
  340. if (!dax_region->seed)
  341. dax_region->seed = &dev_dax->dev;
  342. dax_region->youngest = &dev_dax->dev;
  343. rc = len;
  344. }
  345. }
  346. up_write(&dax_region_rwsem);
  347. return rc;
  348. }
  349. static DEVICE_ATTR_RW(create);
  350. void kill_dev_dax(struct dev_dax *dev_dax)
  351. {
  352. struct dax_device *dax_dev = dev_dax->dax_dev;
  353. struct inode *inode = dax_inode(dax_dev);
  354. kill_dax(dax_dev);
  355. unmap_mapping_range(inode->i_mapping, 0, 0, 1);
  356. /*
  357. * Dynamic dax region have the pgmap allocated via dev_kzalloc()
  358. * and thus freed by devm. Clear the pgmap to not have stale pgmap
  359. * ranges on probe() from previous reconfigurations of region devices.
  360. */
  361. if (!static_dev_dax(dev_dax))
  362. dev_dax->pgmap = NULL;
  363. }
  364. EXPORT_SYMBOL_GPL(kill_dev_dax);
  365. static void trim_dev_dax_range(struct dev_dax *dev_dax)
  366. {
  367. int i = dev_dax->nr_range - 1;
  368. struct range *range = &dev_dax->ranges[i].range;
  369. struct dax_region *dax_region = dev_dax->region;
  370. lockdep_assert_held_write(&dax_region_rwsem);
  371. dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
  372. (unsigned long long)range->start,
  373. (unsigned long long)range->end);
  374. __release_region(&dax_region->res, range->start, range_len(range));
  375. if (--dev_dax->nr_range == 0) {
  376. kfree(dev_dax->ranges);
  377. dev_dax->ranges = NULL;
  378. }
  379. }
  380. static void free_dev_dax_ranges(struct dev_dax *dev_dax)
  381. {
  382. while (dev_dax->nr_range)
  383. trim_dev_dax_range(dev_dax);
  384. }
  385. static void unregister_dev_dax(void *dev)
  386. {
  387. struct dev_dax *dev_dax = to_dev_dax(dev);
  388. dev_dbg(dev, "%s\n", __func__);
  389. down_write(&dax_region_rwsem);
  390. kill_dev_dax(dev_dax);
  391. device_del(dev);
  392. free_dev_dax_ranges(dev_dax);
  393. put_device(dev);
  394. up_write(&dax_region_rwsem);
  395. }
  396. static void dax_region_free(struct kref *kref)
  397. {
  398. struct dax_region *dax_region;
  399. dax_region = container_of(kref, struct dax_region, kref);
  400. kfree(dax_region);
  401. }
  402. static void dax_region_put(struct dax_region *dax_region)
  403. {
  404. kref_put(&dax_region->kref, dax_region_free);
  405. }
  406. /* a return value >= 0 indicates this invocation invalidated the id */
  407. static int __free_dev_dax_id(struct dev_dax *dev_dax)
  408. {
  409. struct dax_region *dax_region;
  410. int rc = dev_dax->id;
  411. lockdep_assert_held_write(&dax_dev_rwsem);
  412. if (!dev_dax->dyn_id || dev_dax->id < 0)
  413. return -1;
  414. dax_region = dev_dax->region;
  415. ida_free(&dax_region->ida, dev_dax->id);
  416. dax_region_put(dax_region);
  417. dev_dax->id = -1;
  418. return rc;
  419. }
  420. static int free_dev_dax_id(struct dev_dax *dev_dax)
  421. {
  422. int rc;
  423. rc = down_write_killable(&dax_dev_rwsem);
  424. if (rc)
  425. return rc;
  426. rc = __free_dev_dax_id(dev_dax);
  427. up_write(&dax_dev_rwsem);
  428. return rc;
  429. }
  430. static int alloc_dev_dax_id(struct dev_dax *dev_dax)
  431. {
  432. struct dax_region *dax_region = dev_dax->region;
  433. int id;
  434. id = ida_alloc(&dax_region->ida, GFP_KERNEL);
  435. if (id < 0)
  436. return id;
  437. kref_get(&dax_region->kref);
  438. dev_dax->dyn_id = true;
  439. dev_dax->id = id;
  440. return id;
  441. }
  442. static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
  443. const char *buf, size_t len)
  444. {
  445. struct dax_region *dax_region = dev_get_drvdata(dev);
  446. struct dev_dax *dev_dax;
  447. struct device *victim;
  448. bool do_del = false;
  449. int rc;
  450. if (is_static(dax_region))
  451. return -EINVAL;
  452. victim = device_find_child_by_name(dax_region->dev, buf);
  453. if (!victim)
  454. return -ENXIO;
  455. device_lock(dev);
  456. device_lock(victim);
  457. dev_dax = to_dev_dax(victim);
  458. down_write(&dax_dev_rwsem);
  459. if (victim->driver || dev_dax_size(dev_dax))
  460. rc = -EBUSY;
  461. else {
  462. /*
  463. * Invalidate the device so it does not become active
  464. * again, but always preserve device-id-0 so that
  465. * /sys/bus/dax/ is guaranteed to be populated while any
  466. * dax_region is registered.
  467. */
  468. if (dev_dax->id > 0) {
  469. do_del = __free_dev_dax_id(dev_dax) >= 0;
  470. rc = len;
  471. if (dax_region->seed == victim)
  472. dax_region->seed = NULL;
  473. if (dax_region->youngest == victim)
  474. dax_region->youngest = NULL;
  475. } else
  476. rc = -EBUSY;
  477. }
  478. up_write(&dax_dev_rwsem);
  479. device_unlock(victim);
  480. /* won the race to invalidate the device, clean it up */
  481. if (do_del)
  482. devm_release_action(dev, unregister_dev_dax, victim);
  483. device_unlock(dev);
  484. put_device(victim);
  485. return rc;
  486. }
  487. static DEVICE_ATTR_WO(delete);
  488. static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
  489. int n)
  490. {
  491. struct device *dev = container_of(kobj, struct device, kobj);
  492. struct dax_region *dax_region = dev_get_drvdata(dev);
  493. if (is_static(dax_region))
  494. if (a == &dev_attr_available_size.attr
  495. || a == &dev_attr_create.attr
  496. || a == &dev_attr_seed.attr
  497. || a == &dev_attr_delete.attr)
  498. return 0;
  499. return a->mode;
  500. }
  501. static struct attribute *dax_region_attributes[] = {
  502. &dev_attr_available_size.attr,
  503. &dev_attr_region_size.attr,
  504. &dev_attr_region_align.attr,
  505. &dev_attr_create.attr,
  506. &dev_attr_seed.attr,
  507. &dev_attr_delete.attr,
  508. &dev_attr_id.attr,
  509. NULL,
  510. };
  511. static const struct attribute_group dax_region_attribute_group = {
  512. .name = "dax_region",
  513. .attrs = dax_region_attributes,
  514. .is_visible = dax_region_visible,
  515. };
  516. static const struct attribute_group *dax_region_attribute_groups[] = {
  517. &dax_region_attribute_group,
  518. NULL,
  519. };
  520. static void dax_region_unregister(void *region)
  521. {
  522. struct dax_region *dax_region = region;
  523. sysfs_remove_groups(&dax_region->dev->kobj,
  524. dax_region_attribute_groups);
  525. dax_region_put(dax_region);
  526. }
  527. struct dax_region *alloc_dax_region(struct device *parent, int region_id,
  528. struct range *range, int target_node, unsigned int align,
  529. unsigned long flags)
  530. {
  531. struct dax_region *dax_region;
  532. /*
  533. * The DAX core assumes that it can store its private data in
  534. * parent->driver_data. This WARN is a reminder / safeguard for
  535. * developers of device-dax drivers.
  536. */
  537. if (dev_get_drvdata(parent)) {
  538. dev_WARN(parent, "dax core failed to setup private data\n");
  539. return NULL;
  540. }
  541. if (!IS_ALIGNED(range->start, align)
  542. || !IS_ALIGNED(range_len(range), align))
  543. return NULL;
  544. dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
  545. if (!dax_region)
  546. return NULL;
  547. dev_set_drvdata(parent, dax_region);
  548. kref_init(&dax_region->kref);
  549. dax_region->id = region_id;
  550. dax_region->align = align;
  551. dax_region->dev = parent;
  552. dax_region->target_node = target_node;
  553. ida_init(&dax_region->ida);
  554. dax_region->res = (struct resource) {
  555. .start = range->start,
  556. .end = range->end,
  557. .flags = IORESOURCE_MEM | flags,
  558. };
  559. if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
  560. kfree(dax_region);
  561. return NULL;
  562. }
  563. if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
  564. return NULL;
  565. return dax_region;
  566. }
  567. EXPORT_SYMBOL_GPL(alloc_dax_region);
  568. static void dax_mapping_release(struct device *dev)
  569. {
  570. struct dax_mapping *mapping = to_dax_mapping(dev);
  571. struct device *parent = dev->parent;
  572. struct dev_dax *dev_dax = to_dev_dax(parent);
  573. ida_free(&dev_dax->ida, mapping->id);
  574. kfree(mapping);
  575. put_device(parent);
  576. }
  577. static void unregister_dax_mapping(void *data)
  578. {
  579. struct device *dev = data;
  580. struct dax_mapping *mapping = to_dax_mapping(dev);
  581. struct dev_dax *dev_dax = to_dev_dax(dev->parent);
  582. dev_dbg(dev, "%s\n", __func__);
  583. dev_dax->ranges[mapping->range_id].mapping = NULL;
  584. mapping->range_id = -1;
  585. device_unregister(dev);
  586. }
  587. static struct dev_dax_range *get_dax_range(struct device *dev)
  588. {
  589. struct dax_mapping *mapping = to_dax_mapping(dev);
  590. struct dev_dax *dev_dax = to_dev_dax(dev->parent);
  591. int rc;
  592. rc = down_write_killable(&dax_region_rwsem);
  593. if (rc)
  594. return NULL;
  595. if (mapping->range_id < 0) {
  596. up_write(&dax_region_rwsem);
  597. return NULL;
  598. }
  599. return &dev_dax->ranges[mapping->range_id];
  600. }
  601. static void put_dax_range(void)
  602. {
  603. up_write(&dax_region_rwsem);
  604. }
  605. static ssize_t start_show(struct device *dev,
  606. struct device_attribute *attr, char *buf)
  607. {
  608. struct dev_dax_range *dax_range;
  609. ssize_t rc;
  610. dax_range = get_dax_range(dev);
  611. if (!dax_range)
  612. return -ENXIO;
  613. rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
  614. put_dax_range();
  615. return rc;
  616. }
  617. static DEVICE_ATTR(start, 0400, start_show, NULL);
  618. static ssize_t end_show(struct device *dev,
  619. struct device_attribute *attr, char *buf)
  620. {
  621. struct dev_dax_range *dax_range;
  622. ssize_t rc;
  623. dax_range = get_dax_range(dev);
  624. if (!dax_range)
  625. return -ENXIO;
  626. rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
  627. put_dax_range();
  628. return rc;
  629. }
  630. static DEVICE_ATTR(end, 0400, end_show, NULL);
  631. static ssize_t pgoff_show(struct device *dev,
  632. struct device_attribute *attr, char *buf)
  633. {
  634. struct dev_dax_range *dax_range;
  635. ssize_t rc;
  636. dax_range = get_dax_range(dev);
  637. if (!dax_range)
  638. return -ENXIO;
  639. rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
  640. put_dax_range();
  641. return rc;
  642. }
  643. static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
  644. static struct attribute *dax_mapping_attributes[] = {
  645. &dev_attr_start.attr,
  646. &dev_attr_end.attr,
  647. &dev_attr_page_offset.attr,
  648. NULL,
  649. };
  650. static const struct attribute_group dax_mapping_attribute_group = {
  651. .attrs = dax_mapping_attributes,
  652. };
  653. static const struct attribute_group *dax_mapping_attribute_groups[] = {
  654. &dax_mapping_attribute_group,
  655. NULL,
  656. };
  657. static const struct device_type dax_mapping_type = {
  658. .release = dax_mapping_release,
  659. .groups = dax_mapping_attribute_groups,
  660. };
  661. static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
  662. {
  663. struct dax_region *dax_region = dev_dax->region;
  664. struct dax_mapping *mapping;
  665. struct device *dev;
  666. int rc;
  667. lockdep_assert_held_write(&dax_region_rwsem);
  668. if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
  669. "region disabled\n"))
  670. return -ENXIO;
  671. mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
  672. if (!mapping)
  673. return -ENOMEM;
  674. mapping->range_id = range_id;
  675. mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
  676. if (mapping->id < 0) {
  677. kfree(mapping);
  678. return -ENOMEM;
  679. }
  680. dev_dax->ranges[range_id].mapping = mapping;
  681. dev = &mapping->dev;
  682. device_initialize(dev);
  683. dev->parent = &dev_dax->dev;
  684. get_device(dev->parent);
  685. dev->type = &dax_mapping_type;
  686. dev_set_name(dev, "mapping%d", mapping->id);
  687. rc = device_add(dev);
  688. if (rc) {
  689. put_device(dev);
  690. return rc;
  691. }
  692. rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
  693. dev);
  694. if (rc)
  695. return rc;
  696. return 0;
  697. }
  698. static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
  699. resource_size_t size)
  700. {
  701. struct dax_region *dax_region = dev_dax->region;
  702. struct resource *res = &dax_region->res;
  703. struct device *dev = &dev_dax->dev;
  704. struct dev_dax_range *ranges;
  705. unsigned long pgoff = 0;
  706. struct resource *alloc;
  707. int i, rc;
  708. lockdep_assert_held_write(&dax_region_rwsem);
  709. /* handle the seed alloc special case */
  710. if (!size) {
  711. if (dev_WARN_ONCE(dev, dev_dax->nr_range,
  712. "0-size allocation must be first\n"))
  713. return -EBUSY;
  714. /* nr_range == 0 is elsewhere special cased as 0-size device */
  715. return 0;
  716. }
  717. alloc = __request_region(res, start, size, dev_name(dev), 0);
  718. if (!alloc)
  719. return -ENOMEM;
  720. ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
  721. * (dev_dax->nr_range + 1), GFP_KERNEL);
  722. if (!ranges) {
  723. __release_region(res, alloc->start, resource_size(alloc));
  724. return -ENOMEM;
  725. }
  726. for (i = 0; i < dev_dax->nr_range; i++)
  727. pgoff += PHYS_PFN(range_len(&ranges[i].range));
  728. dev_dax->ranges = ranges;
  729. ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
  730. .pgoff = pgoff,
  731. .range = {
  732. .start = alloc->start,
  733. .end = alloc->end,
  734. },
  735. };
  736. dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
  737. &alloc->start, &alloc->end);
  738. /*
  739. * A dev_dax instance must be registered before mapping device
  740. * children can be added. Defer to devm_create_dev_dax() to add
  741. * the initial mapping device.
  742. */
  743. if (!device_is_registered(&dev_dax->dev))
  744. return 0;
  745. rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
  746. if (rc)
  747. trim_dev_dax_range(dev_dax);
  748. return rc;
  749. }
  750. static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
  751. {
  752. int last_range = dev_dax->nr_range - 1;
  753. struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
  754. bool is_shrink = resource_size(res) > size;
  755. struct range *range = &dax_range->range;
  756. struct device *dev = &dev_dax->dev;
  757. int rc;
  758. lockdep_assert_held_write(&dax_region_rwsem);
  759. if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
  760. return -EINVAL;
  761. rc = adjust_resource(res, range->start, size);
  762. if (rc)
  763. return rc;
  764. *range = (struct range) {
  765. .start = range->start,
  766. .end = range->start + size - 1,
  767. };
  768. dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
  769. last_range, (unsigned long long) range->start,
  770. (unsigned long long) range->end);
  771. return 0;
  772. }
  773. static ssize_t size_show(struct device *dev,
  774. struct device_attribute *attr, char *buf)
  775. {
  776. struct dev_dax *dev_dax = to_dev_dax(dev);
  777. unsigned long long size;
  778. int rc;
  779. rc = down_read_interruptible(&dax_dev_rwsem);
  780. if (rc)
  781. return rc;
  782. size = dev_dax_size(dev_dax);
  783. up_read(&dax_dev_rwsem);
  784. return sysfs_emit(buf, "%llu\n", size);
  785. }
  786. static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
  787. {
  788. /*
  789. * The minimum mapping granularity for a device instance is a
  790. * single subsection, unless the arch says otherwise.
  791. */
  792. return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
  793. }
  794. static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
  795. {
  796. resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
  797. struct dax_region *dax_region = dev_dax->region;
  798. struct device *dev = &dev_dax->dev;
  799. int i;
  800. for (i = dev_dax->nr_range - 1; i >= 0; i--) {
  801. struct range *range = &dev_dax->ranges[i].range;
  802. struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
  803. struct resource *adjust = NULL, *res;
  804. resource_size_t shrink;
  805. shrink = min_t(u64, to_shrink, range_len(range));
  806. if (shrink >= range_len(range)) {
  807. devm_release_action(dax_region->dev,
  808. unregister_dax_mapping, &mapping->dev);
  809. trim_dev_dax_range(dev_dax);
  810. to_shrink -= shrink;
  811. if (!to_shrink)
  812. break;
  813. continue;
  814. }
  815. for_each_dax_region_resource(dax_region, res)
  816. if (strcmp(res->name, dev_name(dev)) == 0
  817. && res->start == range->start) {
  818. adjust = res;
  819. break;
  820. }
  821. if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
  822. "failed to find matching resource\n"))
  823. return -ENXIO;
  824. return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
  825. - shrink);
  826. }
  827. return 0;
  828. }
  829. /*
  830. * Only allow adjustments that preserve the relative pgoff of existing
  831. * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
  832. */
  833. static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
  834. {
  835. struct dev_dax_range *last;
  836. int i;
  837. if (dev_dax->nr_range == 0)
  838. return false;
  839. if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
  840. return false;
  841. last = &dev_dax->ranges[dev_dax->nr_range - 1];
  842. if (last->range.start != res->start || last->range.end != res->end)
  843. return false;
  844. for (i = 0; i < dev_dax->nr_range - 1; i++) {
  845. struct dev_dax_range *dax_range = &dev_dax->ranges[i];
  846. if (dax_range->pgoff > last->pgoff)
  847. return false;
  848. }
  849. return true;
  850. }
  851. static ssize_t dev_dax_resize(struct dax_region *dax_region,
  852. struct dev_dax *dev_dax, resource_size_t size)
  853. {
  854. resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
  855. resource_size_t dev_size = dev_dax_size(dev_dax);
  856. struct resource *region_res = &dax_region->res;
  857. struct device *dev = &dev_dax->dev;
  858. struct resource *res, *first;
  859. resource_size_t alloc = 0;
  860. int rc;
  861. if (dev->driver)
  862. return -EBUSY;
  863. if (size == dev_size)
  864. return 0;
  865. if (size > dev_size && size - dev_size > avail)
  866. return -ENOSPC;
  867. if (size < dev_size)
  868. return dev_dax_shrink(dev_dax, size);
  869. to_alloc = size - dev_size;
  870. if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
  871. "resize of %pa misaligned\n", &to_alloc))
  872. return -ENXIO;
  873. /*
  874. * Expand the device into the unused portion of the region. This
  875. * may involve adjusting the end of an existing resource, or
  876. * allocating a new resource.
  877. */
  878. retry:
  879. first = region_res->child;
  880. if (!first)
  881. return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
  882. rc = -ENOSPC;
  883. for (res = first; res; res = res->sibling) {
  884. struct resource *next = res->sibling;
  885. /* space at the beginning of the region */
  886. if (res == first && res->start > dax_region->res.start) {
  887. alloc = min(res->start - dax_region->res.start, to_alloc);
  888. rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
  889. break;
  890. }
  891. alloc = 0;
  892. /* space between allocations */
  893. if (next && next->start > res->end + 1)
  894. alloc = min(next->start - (res->end + 1), to_alloc);
  895. /* space at the end of the region */
  896. if (!alloc && !next && res->end < region_res->end)
  897. alloc = min(region_res->end - res->end, to_alloc);
  898. if (!alloc)
  899. continue;
  900. if (adjust_ok(dev_dax, res)) {
  901. rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
  902. break;
  903. }
  904. rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
  905. break;
  906. }
  907. if (rc)
  908. return rc;
  909. to_alloc -= alloc;
  910. if (to_alloc)
  911. goto retry;
  912. return 0;
  913. }
  914. static ssize_t size_store(struct device *dev, struct device_attribute *attr,
  915. const char *buf, size_t len)
  916. {
  917. ssize_t rc;
  918. unsigned long long val;
  919. struct dev_dax *dev_dax = to_dev_dax(dev);
  920. struct dax_region *dax_region = dev_dax->region;
  921. rc = kstrtoull(buf, 0, &val);
  922. if (rc)
  923. return rc;
  924. if (!alloc_is_aligned(dev_dax, val)) {
  925. dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
  926. return -EINVAL;
  927. }
  928. rc = down_write_killable(&dax_region_rwsem);
  929. if (rc)
  930. return rc;
  931. if (!dax_region->dev->driver) {
  932. rc = -ENXIO;
  933. goto err_region;
  934. }
  935. rc = down_write_killable(&dax_dev_rwsem);
  936. if (rc)
  937. goto err_dev;
  938. rc = dev_dax_resize(dax_region, dev_dax, val);
  939. err_dev:
  940. up_write(&dax_dev_rwsem);
  941. err_region:
  942. up_write(&dax_region_rwsem);
  943. if (rc == 0)
  944. return len;
  945. return rc;
  946. }
  947. static DEVICE_ATTR_RW(size);
  948. static ssize_t range_parse(const char *opt, size_t len, struct range *range)
  949. {
  950. unsigned long long addr = 0;
  951. char *start, *end, *str;
  952. ssize_t rc = -EINVAL;
  953. str = kstrdup(opt, GFP_KERNEL);
  954. if (!str)
  955. return rc;
  956. end = str;
  957. start = strsep(&end, "-");
  958. if (!start || !end)
  959. goto err;
  960. rc = kstrtoull(start, 16, &addr);
  961. if (rc)
  962. goto err;
  963. range->start = addr;
  964. rc = kstrtoull(end, 16, &addr);
  965. if (rc)
  966. goto err;
  967. range->end = addr;
  968. err:
  969. kfree(str);
  970. return rc;
  971. }
  972. static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
  973. const char *buf, size_t len)
  974. {
  975. struct dev_dax *dev_dax = to_dev_dax(dev);
  976. struct dax_region *dax_region = dev_dax->region;
  977. size_t to_alloc;
  978. struct range r;
  979. ssize_t rc;
  980. rc = range_parse(buf, len, &r);
  981. if (rc)
  982. return rc;
  983. rc = down_write_killable(&dax_region_rwsem);
  984. if (rc)
  985. return rc;
  986. if (!dax_region->dev->driver) {
  987. up_write(&dax_region_rwsem);
  988. return rc;
  989. }
  990. rc = down_write_killable(&dax_dev_rwsem);
  991. if (rc) {
  992. up_write(&dax_region_rwsem);
  993. return rc;
  994. }
  995. to_alloc = range_len(&r);
  996. if (alloc_is_aligned(dev_dax, to_alloc))
  997. rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
  998. up_write(&dax_dev_rwsem);
  999. up_write(&dax_region_rwsem);
  1000. return rc == 0 ? len : rc;
  1001. }
  1002. static DEVICE_ATTR_WO(mapping);
  1003. static ssize_t align_show(struct device *dev,
  1004. struct device_attribute *attr, char *buf)
  1005. {
  1006. struct dev_dax *dev_dax = to_dev_dax(dev);
  1007. return sysfs_emit(buf, "%d\n", dev_dax->align);
  1008. }
  1009. static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
  1010. {
  1011. struct device *dev = &dev_dax->dev;
  1012. int i;
  1013. for (i = 0; i < dev_dax->nr_range; i++) {
  1014. size_t len = range_len(&dev_dax->ranges[i].range);
  1015. if (!alloc_is_aligned(dev_dax, len)) {
  1016. dev_dbg(dev, "%s: align %u invalid for range %d\n",
  1017. __func__, dev_dax->align, i);
  1018. return -EINVAL;
  1019. }
  1020. }
  1021. return 0;
  1022. }
  1023. static ssize_t align_store(struct device *dev, struct device_attribute *attr,
  1024. const char *buf, size_t len)
  1025. {
  1026. struct dev_dax *dev_dax = to_dev_dax(dev);
  1027. struct dax_region *dax_region = dev_dax->region;
  1028. unsigned long val, align_save;
  1029. ssize_t rc;
  1030. rc = kstrtoul(buf, 0, &val);
  1031. if (rc)
  1032. return -ENXIO;
  1033. if (!dax_align_valid(val))
  1034. return -EINVAL;
  1035. rc = down_write_killable(&dax_region_rwsem);
  1036. if (rc)
  1037. return rc;
  1038. if (!dax_region->dev->driver) {
  1039. up_write(&dax_region_rwsem);
  1040. return -ENXIO;
  1041. }
  1042. rc = down_write_killable(&dax_dev_rwsem);
  1043. if (rc) {
  1044. up_write(&dax_region_rwsem);
  1045. return rc;
  1046. }
  1047. if (dev->driver) {
  1048. rc = -EBUSY;
  1049. goto out_unlock;
  1050. }
  1051. align_save = dev_dax->align;
  1052. dev_dax->align = val;
  1053. rc = dev_dax_validate_align(dev_dax);
  1054. if (rc)
  1055. dev_dax->align = align_save;
  1056. out_unlock:
  1057. up_write(&dax_dev_rwsem);
  1058. up_write(&dax_region_rwsem);
  1059. return rc == 0 ? len : rc;
  1060. }
  1061. static DEVICE_ATTR_RW(align);
  1062. static int dev_dax_target_node(struct dev_dax *dev_dax)
  1063. {
  1064. struct dax_region *dax_region = dev_dax->region;
  1065. return dax_region->target_node;
  1066. }
  1067. static ssize_t target_node_show(struct device *dev,
  1068. struct device_attribute *attr, char *buf)
  1069. {
  1070. struct dev_dax *dev_dax = to_dev_dax(dev);
  1071. return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
  1072. }
  1073. static DEVICE_ATTR_RO(target_node);
  1074. static ssize_t resource_show(struct device *dev,
  1075. struct device_attribute *attr, char *buf)
  1076. {
  1077. struct dev_dax *dev_dax = to_dev_dax(dev);
  1078. struct dax_region *dax_region = dev_dax->region;
  1079. unsigned long long start;
  1080. if (dev_dax->nr_range < 1)
  1081. start = dax_region->res.start;
  1082. else
  1083. start = dev_dax->ranges[0].range.start;
  1084. return sysfs_emit(buf, "%#llx\n", start);
  1085. }
  1086. static DEVICE_ATTR(resource, 0400, resource_show, NULL);
  1087. static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
  1088. char *buf)
  1089. {
  1090. /*
  1091. * We only ever expect to handle device-dax instances, i.e. the
  1092. * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
  1093. */
  1094. return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
  1095. }
  1096. static DEVICE_ATTR_RO(modalias);
  1097. static ssize_t numa_node_show(struct device *dev,
  1098. struct device_attribute *attr, char *buf)
  1099. {
  1100. return sysfs_emit(buf, "%d\n", dev_to_node(dev));
  1101. }
  1102. static DEVICE_ATTR_RO(numa_node);
  1103. static ssize_t memmap_on_memory_show(struct device *dev,
  1104. struct device_attribute *attr, char *buf)
  1105. {
  1106. struct dev_dax *dev_dax = to_dev_dax(dev);
  1107. return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
  1108. }
  1109. static ssize_t memmap_on_memory_store(struct device *dev,
  1110. struct device_attribute *attr,
  1111. const char *buf, size_t len)
  1112. {
  1113. struct dev_dax *dev_dax = to_dev_dax(dev);
  1114. bool val;
  1115. int rc;
  1116. rc = kstrtobool(buf, &val);
  1117. if (rc)
  1118. return rc;
  1119. if (val == true && !mhp_supports_memmap_on_memory()) {
  1120. dev_dbg(dev, "memmap_on_memory is not available\n");
  1121. return -EOPNOTSUPP;
  1122. }
  1123. rc = down_write_killable(&dax_dev_rwsem);
  1124. if (rc)
  1125. return rc;
  1126. if (dev_dax->memmap_on_memory != val && dev->driver &&
  1127. to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
  1128. up_write(&dax_dev_rwsem);
  1129. return -EBUSY;
  1130. }
  1131. dev_dax->memmap_on_memory = val;
  1132. up_write(&dax_dev_rwsem);
  1133. return len;
  1134. }
  1135. static DEVICE_ATTR_RW(memmap_on_memory);
  1136. static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
  1137. {
  1138. struct device *dev = container_of(kobj, struct device, kobj);
  1139. struct dev_dax *dev_dax = to_dev_dax(dev);
  1140. struct dax_region *dax_region = dev_dax->region;
  1141. if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
  1142. return 0;
  1143. if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
  1144. return 0;
  1145. if (a == &dev_attr_mapping.attr && is_static(dax_region))
  1146. return 0;
  1147. if ((a == &dev_attr_align.attr ||
  1148. a == &dev_attr_size.attr) && is_static(dax_region))
  1149. return 0444;
  1150. return a->mode;
  1151. }
  1152. static struct attribute *dev_dax_attributes[] = {
  1153. &dev_attr_modalias.attr,
  1154. &dev_attr_size.attr,
  1155. &dev_attr_mapping.attr,
  1156. &dev_attr_target_node.attr,
  1157. &dev_attr_align.attr,
  1158. &dev_attr_resource.attr,
  1159. &dev_attr_numa_node.attr,
  1160. &dev_attr_memmap_on_memory.attr,
  1161. NULL,
  1162. };
  1163. static const struct attribute_group dev_dax_attribute_group = {
  1164. .attrs = dev_dax_attributes,
  1165. .is_visible = dev_dax_visible,
  1166. };
  1167. static const struct attribute_group *dax_attribute_groups[] = {
  1168. &dev_dax_attribute_group,
  1169. NULL,
  1170. };
  1171. static void dev_dax_release(struct device *dev)
  1172. {
  1173. struct dev_dax *dev_dax = to_dev_dax(dev);
  1174. struct dax_device *dax_dev = dev_dax->dax_dev;
  1175. put_dax(dax_dev);
  1176. free_dev_dax_id(dev_dax);
  1177. kfree(dev_dax->pgmap);
  1178. kfree(dev_dax);
  1179. }
  1180. static const struct device_type dev_dax_type = {
  1181. .release = dev_dax_release,
  1182. .groups = dax_attribute_groups,
  1183. };
  1184. static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
  1185. {
  1186. struct dax_region *dax_region = data->dax_region;
  1187. struct device *parent = dax_region->dev;
  1188. struct dax_device *dax_dev;
  1189. struct dev_dax *dev_dax;
  1190. struct inode *inode;
  1191. struct device *dev;
  1192. int rc;
  1193. dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
  1194. if (!dev_dax)
  1195. return ERR_PTR(-ENOMEM);
  1196. dev_dax->region = dax_region;
  1197. if (is_static(dax_region)) {
  1198. if (dev_WARN_ONCE(parent, data->id < 0,
  1199. "dynamic id specified to static region\n")) {
  1200. rc = -EINVAL;
  1201. goto err_id;
  1202. }
  1203. dev_dax->id = data->id;
  1204. } else {
  1205. if (dev_WARN_ONCE(parent, data->id >= 0,
  1206. "static id specified to dynamic region\n")) {
  1207. rc = -EINVAL;
  1208. goto err_id;
  1209. }
  1210. rc = alloc_dev_dax_id(dev_dax);
  1211. if (rc < 0)
  1212. goto err_id;
  1213. }
  1214. dev = &dev_dax->dev;
  1215. device_initialize(dev);
  1216. dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
  1217. rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
  1218. if (rc)
  1219. goto err_range;
  1220. if (data->pgmap) {
  1221. dev_WARN_ONCE(parent, !is_static(dax_region),
  1222. "custom dev_pagemap requires a static dax_region\n");
  1223. dev_dax->pgmap = kmemdup(data->pgmap,
  1224. sizeof(struct dev_pagemap), GFP_KERNEL);
  1225. if (!dev_dax->pgmap) {
  1226. rc = -ENOMEM;
  1227. goto err_pgmap;
  1228. }
  1229. }
  1230. /*
  1231. * No dax_operations since there is no access to this device outside of
  1232. * mmap of the resulting character device.
  1233. */
  1234. dax_dev = alloc_dax(dev_dax, NULL);
  1235. if (IS_ERR(dax_dev)) {
  1236. rc = PTR_ERR(dax_dev);
  1237. goto err_alloc_dax;
  1238. }
  1239. set_dax_synchronous(dax_dev);
  1240. set_dax_nocache(dax_dev);
  1241. set_dax_nomc(dax_dev);
  1242. /* a device_dax instance is dead while the driver is not attached */
  1243. kill_dax(dax_dev);
  1244. dev_dax->dax_dev = dax_dev;
  1245. dev_dax->target_node = dax_region->target_node;
  1246. dev_dax->align = dax_region->align;
  1247. ida_init(&dev_dax->ida);
  1248. dev_dax->memmap_on_memory = data->memmap_on_memory;
  1249. inode = dax_inode(dax_dev);
  1250. dev->devt = inode->i_rdev;
  1251. dev->bus = &dax_bus_type;
  1252. dev->parent = parent;
  1253. dev->type = &dev_dax_type;
  1254. rc = device_add(dev);
  1255. if (rc) {
  1256. kill_dev_dax(dev_dax);
  1257. put_device(dev);
  1258. return ERR_PTR(rc);
  1259. }
  1260. rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
  1261. if (rc)
  1262. return ERR_PTR(rc);
  1263. /* register mapping device for the initial allocation range */
  1264. if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
  1265. rc = devm_register_dax_mapping(dev_dax, 0);
  1266. if (rc)
  1267. return ERR_PTR(rc);
  1268. }
  1269. return dev_dax;
  1270. err_alloc_dax:
  1271. kfree(dev_dax->pgmap);
  1272. err_pgmap:
  1273. free_dev_dax_ranges(dev_dax);
  1274. err_range:
  1275. free_dev_dax_id(dev_dax);
  1276. err_id:
  1277. kfree(dev_dax);
  1278. return ERR_PTR(rc);
  1279. }
  1280. struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
  1281. {
  1282. struct dev_dax *dev_dax;
  1283. down_write(&dax_region_rwsem);
  1284. dev_dax = __devm_create_dev_dax(data);
  1285. up_write(&dax_region_rwsem);
  1286. return dev_dax;
  1287. }
  1288. EXPORT_SYMBOL_GPL(devm_create_dev_dax);
  1289. int __dax_driver_register(struct dax_device_driver *dax_drv,
  1290. struct module *module, const char *mod_name)
  1291. {
  1292. struct device_driver *drv = &dax_drv->drv;
  1293. /*
  1294. * dax_bus_probe() calls dax_drv->probe() unconditionally.
  1295. * So better be safe than sorry and ensure it is provided.
  1296. */
  1297. if (!dax_drv->probe)
  1298. return -EINVAL;
  1299. INIT_LIST_HEAD(&dax_drv->ids);
  1300. drv->owner = module;
  1301. drv->name = mod_name;
  1302. drv->mod_name = mod_name;
  1303. drv->bus = &dax_bus_type;
  1304. return driver_register(drv);
  1305. }
  1306. EXPORT_SYMBOL_GPL(__dax_driver_register);
  1307. void dax_driver_unregister(struct dax_device_driver *dax_drv)
  1308. {
  1309. struct device_driver *drv = &dax_drv->drv;
  1310. struct dax_id *dax_id, *_id;
  1311. mutex_lock(&dax_bus_lock);
  1312. list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
  1313. list_del(&dax_id->list);
  1314. kfree(dax_id);
  1315. }
  1316. mutex_unlock(&dax_bus_lock);
  1317. driver_unregister(drv);
  1318. }
  1319. EXPORT_SYMBOL_GPL(dax_driver_unregister);
  1320. int __init dax_bus_init(void)
  1321. {
  1322. return bus_register(&dax_bus_type);
  1323. }
  1324. void __exit dax_bus_exit(void)
  1325. {
  1326. bus_unregister(&dax_bus_type);
  1327. }