region.c 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
  3. #include <linux/memregion.h>
  4. #include <linux/genalloc.h>
  5. #include <linux/device.h>
  6. #include <linux/module.h>
  7. #include <linux/memory.h>
  8. #include <linux/slab.h>
  9. #include <linux/uuid.h>
  10. #include <linux/sort.h>
  11. #include <linux/idr.h>
  12. #include <linux/memory-tiers.h>
  13. #include <cxlmem.h>
  14. #include <cxl.h>
  15. #include "core.h"
  16. /**
  17. * DOC: cxl core region
  18. *
  19. * CXL Regions represent mapped memory capacity in system physical address
  20. * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
  21. * Memory ranges, Regions represent the active mapped capacity by the HDM
  22. * Decoder Capability structures throughout the Host Bridges, Switches, and
  23. * Endpoints in the topology.
  24. *
  25. * Region configuration has ordering constraints. UUID may be set at any time
  26. * but is only visible for persistent regions.
  27. * 1. Interleave granularity
  28. * 2. Interleave size
  29. * 3. Decoder targets
  30. */
  31. static struct cxl_region *to_cxl_region(struct device *dev);
  32. #define __ACCESS_ATTR_RO(_level, _name) { \
  33. .attr = { .name = __stringify(_name), .mode = 0444 }, \
  34. .show = _name##_access##_level##_show, \
  35. }
  36. #define ACCESS_DEVICE_ATTR_RO(level, name) \
  37. struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
  38. #define ACCESS_ATTR_RO(level, attrib) \
  39. static ssize_t attrib##_access##level##_show(struct device *dev, \
  40. struct device_attribute *attr, \
  41. char *buf) \
  42. { \
  43. struct cxl_region *cxlr = to_cxl_region(dev); \
  44. \
  45. if (cxlr->coord[level].attrib == 0) \
  46. return -ENOENT; \
  47. \
  48. return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \
  49. } \
  50. static ACCESS_DEVICE_ATTR_RO(level, attrib)
  51. ACCESS_ATTR_RO(0, read_bandwidth);
  52. ACCESS_ATTR_RO(0, read_latency);
  53. ACCESS_ATTR_RO(0, write_bandwidth);
  54. ACCESS_ATTR_RO(0, write_latency);
  55. #define ACCESS_ATTR_DECLARE(level, attrib) \
  56. (&dev_attr_access##level##_##attrib.attr)
  57. static struct attribute *access0_coordinate_attrs[] = {
  58. ACCESS_ATTR_DECLARE(0, read_bandwidth),
  59. ACCESS_ATTR_DECLARE(0, write_bandwidth),
  60. ACCESS_ATTR_DECLARE(0, read_latency),
  61. ACCESS_ATTR_DECLARE(0, write_latency),
  62. NULL
  63. };
  64. ACCESS_ATTR_RO(1, read_bandwidth);
  65. ACCESS_ATTR_RO(1, read_latency);
  66. ACCESS_ATTR_RO(1, write_bandwidth);
  67. ACCESS_ATTR_RO(1, write_latency);
  68. static struct attribute *access1_coordinate_attrs[] = {
  69. ACCESS_ATTR_DECLARE(1, read_bandwidth),
  70. ACCESS_ATTR_DECLARE(1, write_bandwidth),
  71. ACCESS_ATTR_DECLARE(1, read_latency),
  72. ACCESS_ATTR_DECLARE(1, write_latency),
  73. NULL
  74. };
  75. #define ACCESS_VISIBLE(level) \
  76. static umode_t cxl_region_access##level##_coordinate_visible( \
  77. struct kobject *kobj, struct attribute *a, int n) \
  78. { \
  79. struct device *dev = kobj_to_dev(kobj); \
  80. struct cxl_region *cxlr = to_cxl_region(dev); \
  81. \
  82. if (a == &dev_attr_access##level##_read_latency.attr && \
  83. cxlr->coord[level].read_latency == 0) \
  84. return 0; \
  85. \
  86. if (a == &dev_attr_access##level##_write_latency.attr && \
  87. cxlr->coord[level].write_latency == 0) \
  88. return 0; \
  89. \
  90. if (a == &dev_attr_access##level##_read_bandwidth.attr && \
  91. cxlr->coord[level].read_bandwidth == 0) \
  92. return 0; \
  93. \
  94. if (a == &dev_attr_access##level##_write_bandwidth.attr && \
  95. cxlr->coord[level].write_bandwidth == 0) \
  96. return 0; \
  97. \
  98. return a->mode; \
  99. }
  100. ACCESS_VISIBLE(0);
  101. ACCESS_VISIBLE(1);
  102. static const struct attribute_group cxl_region_access0_coordinate_group = {
  103. .name = "access0",
  104. .attrs = access0_coordinate_attrs,
  105. .is_visible = cxl_region_access0_coordinate_visible,
  106. };
  107. static const struct attribute_group *get_cxl_region_access0_group(void)
  108. {
  109. return &cxl_region_access0_coordinate_group;
  110. }
  111. static const struct attribute_group cxl_region_access1_coordinate_group = {
  112. .name = "access1",
  113. .attrs = access1_coordinate_attrs,
  114. .is_visible = cxl_region_access1_coordinate_visible,
  115. };
  116. static const struct attribute_group *get_cxl_region_access1_group(void)
  117. {
  118. return &cxl_region_access1_coordinate_group;
  119. }
  120. static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
  121. char *buf)
  122. {
  123. struct cxl_region *cxlr = to_cxl_region(dev);
  124. struct cxl_region_params *p = &cxlr->params;
  125. ssize_t rc;
  126. rc = down_read_interruptible(&cxl_region_rwsem);
  127. if (rc)
  128. return rc;
  129. if (cxlr->mode != CXL_DECODER_PMEM)
  130. rc = sysfs_emit(buf, "\n");
  131. else
  132. rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
  133. up_read(&cxl_region_rwsem);
  134. return rc;
  135. }
  136. static int is_dup(struct device *match, void *data)
  137. {
  138. struct cxl_region_params *p;
  139. struct cxl_region *cxlr;
  140. uuid_t *uuid = data;
  141. if (!is_cxl_region(match))
  142. return 0;
  143. lockdep_assert_held(&cxl_region_rwsem);
  144. cxlr = to_cxl_region(match);
  145. p = &cxlr->params;
  146. if (uuid_equal(&p->uuid, uuid)) {
  147. dev_dbg(match, "already has uuid: %pUb\n", uuid);
  148. return -EBUSY;
  149. }
  150. return 0;
  151. }
  152. static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
  153. const char *buf, size_t len)
  154. {
  155. struct cxl_region *cxlr = to_cxl_region(dev);
  156. struct cxl_region_params *p = &cxlr->params;
  157. uuid_t temp;
  158. ssize_t rc;
  159. if (len != UUID_STRING_LEN + 1)
  160. return -EINVAL;
  161. rc = uuid_parse(buf, &temp);
  162. if (rc)
  163. return rc;
  164. if (uuid_is_null(&temp))
  165. return -EINVAL;
  166. rc = down_write_killable(&cxl_region_rwsem);
  167. if (rc)
  168. return rc;
  169. if (uuid_equal(&p->uuid, &temp))
  170. goto out;
  171. rc = -EBUSY;
  172. if (p->state >= CXL_CONFIG_ACTIVE)
  173. goto out;
  174. rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
  175. if (rc < 0)
  176. goto out;
  177. uuid_copy(&p->uuid, &temp);
  178. out:
  179. up_write(&cxl_region_rwsem);
  180. if (rc)
  181. return rc;
  182. return len;
  183. }
  184. static DEVICE_ATTR_RW(uuid);
  185. static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
  186. struct cxl_region *cxlr)
  187. {
  188. return xa_load(&port->regions, (unsigned long)cxlr);
  189. }
  190. static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
  191. {
  192. if (!cpu_cache_has_invalidate_memregion()) {
  193. if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
  194. dev_info_once(
  195. &cxlr->dev,
  196. "Bypassing cpu_cache_invalidate_memregion() for testing!\n");
  197. return 0;
  198. } else {
  199. dev_WARN(&cxlr->dev,
  200. "Failed to synchronize CPU cache state\n");
  201. return -ENXIO;
  202. }
  203. }
  204. cpu_cache_invalidate_memregion(IORES_DESC_CXL);
  205. return 0;
  206. }
  207. static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
  208. {
  209. struct cxl_region_params *p = &cxlr->params;
  210. int i;
  211. /*
  212. * Before region teardown attempt to flush, evict any data cached for
  213. * this region, or scream loudly about missing arch / platform support
  214. * for CXL teardown.
  215. */
  216. cxl_region_invalidate_memregion(cxlr);
  217. for (i = count - 1; i >= 0; i--) {
  218. struct cxl_endpoint_decoder *cxled = p->targets[i];
  219. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  220. struct cxl_port *iter = cxled_to_port(cxled);
  221. struct cxl_dev_state *cxlds = cxlmd->cxlds;
  222. struct cxl_ep *ep;
  223. if (cxlds->rcd)
  224. goto endpoint_reset;
  225. while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
  226. iter = to_cxl_port(iter->dev.parent);
  227. for (ep = cxl_ep_load(iter, cxlmd); iter;
  228. iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
  229. struct cxl_region_ref *cxl_rr;
  230. struct cxl_decoder *cxld;
  231. cxl_rr = cxl_rr_load(iter, cxlr);
  232. cxld = cxl_rr->decoder;
  233. if (cxld->reset)
  234. cxld->reset(cxld);
  235. set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
  236. }
  237. endpoint_reset:
  238. cxled->cxld.reset(&cxled->cxld);
  239. set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
  240. }
  241. /* all decoders associated with this region have been torn down */
  242. clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
  243. }
  244. static int commit_decoder(struct cxl_decoder *cxld)
  245. {
  246. struct cxl_switch_decoder *cxlsd = NULL;
  247. if (cxld->commit)
  248. return cxld->commit(cxld);
  249. if (is_switch_decoder(&cxld->dev))
  250. cxlsd = to_cxl_switch_decoder(&cxld->dev);
  251. if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
  252. "->commit() is required\n"))
  253. return -ENXIO;
  254. return 0;
  255. }
  256. static int cxl_region_decode_commit(struct cxl_region *cxlr)
  257. {
  258. struct cxl_region_params *p = &cxlr->params;
  259. int i, rc = 0;
  260. for (i = 0; i < p->nr_targets; i++) {
  261. struct cxl_endpoint_decoder *cxled = p->targets[i];
  262. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  263. struct cxl_region_ref *cxl_rr;
  264. struct cxl_decoder *cxld;
  265. struct cxl_port *iter;
  266. struct cxl_ep *ep;
  267. /* commit bottom up */
  268. for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
  269. iter = to_cxl_port(iter->dev.parent)) {
  270. cxl_rr = cxl_rr_load(iter, cxlr);
  271. cxld = cxl_rr->decoder;
  272. rc = commit_decoder(cxld);
  273. if (rc)
  274. break;
  275. }
  276. if (rc) {
  277. /* programming @iter failed, teardown */
  278. for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
  279. iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
  280. cxl_rr = cxl_rr_load(iter, cxlr);
  281. cxld = cxl_rr->decoder;
  282. if (cxld->reset)
  283. cxld->reset(cxld);
  284. }
  285. cxled->cxld.reset(&cxled->cxld);
  286. goto err;
  287. }
  288. }
  289. return 0;
  290. err:
  291. /* undo the targets that were successfully committed */
  292. cxl_region_decode_reset(cxlr, i);
  293. return rc;
  294. }
  295. static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
  296. const char *buf, size_t len)
  297. {
  298. struct cxl_region *cxlr = to_cxl_region(dev);
  299. struct cxl_region_params *p = &cxlr->params;
  300. bool commit;
  301. ssize_t rc;
  302. rc = kstrtobool(buf, &commit);
  303. if (rc)
  304. return rc;
  305. rc = down_write_killable(&cxl_region_rwsem);
  306. if (rc)
  307. return rc;
  308. /* Already in the requested state? */
  309. if (commit && p->state >= CXL_CONFIG_COMMIT)
  310. goto out;
  311. if (!commit && p->state < CXL_CONFIG_COMMIT)
  312. goto out;
  313. /* Not ready to commit? */
  314. if (commit && p->state < CXL_CONFIG_ACTIVE) {
  315. rc = -ENXIO;
  316. goto out;
  317. }
  318. /*
  319. * Invalidate caches before region setup to drop any speculative
  320. * consumption of this address space
  321. */
  322. rc = cxl_region_invalidate_memregion(cxlr);
  323. if (rc)
  324. goto out;
  325. if (commit) {
  326. rc = cxl_region_decode_commit(cxlr);
  327. if (rc == 0)
  328. p->state = CXL_CONFIG_COMMIT;
  329. } else {
  330. p->state = CXL_CONFIG_RESET_PENDING;
  331. up_write(&cxl_region_rwsem);
  332. device_release_driver(&cxlr->dev);
  333. down_write(&cxl_region_rwsem);
  334. /*
  335. * The lock was dropped, so need to revalidate that the reset is
  336. * still pending.
  337. */
  338. if (p->state == CXL_CONFIG_RESET_PENDING) {
  339. cxl_region_decode_reset(cxlr, p->interleave_ways);
  340. p->state = CXL_CONFIG_ACTIVE;
  341. }
  342. }
  343. out:
  344. up_write(&cxl_region_rwsem);
  345. if (rc)
  346. return rc;
  347. return len;
  348. }
  349. static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
  350. char *buf)
  351. {
  352. struct cxl_region *cxlr = to_cxl_region(dev);
  353. struct cxl_region_params *p = &cxlr->params;
  354. ssize_t rc;
  355. rc = down_read_interruptible(&cxl_region_rwsem);
  356. if (rc)
  357. return rc;
  358. rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
  359. up_read(&cxl_region_rwsem);
  360. return rc;
  361. }
  362. static DEVICE_ATTR_RW(commit);
  363. static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
  364. int n)
  365. {
  366. struct device *dev = kobj_to_dev(kobj);
  367. struct cxl_region *cxlr = to_cxl_region(dev);
  368. /*
  369. * Support tooling that expects to find a 'uuid' attribute for all
  370. * regions regardless of mode.
  371. */
  372. if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
  373. return 0444;
  374. return a->mode;
  375. }
  376. static ssize_t interleave_ways_show(struct device *dev,
  377. struct device_attribute *attr, char *buf)
  378. {
  379. struct cxl_region *cxlr = to_cxl_region(dev);
  380. struct cxl_region_params *p = &cxlr->params;
  381. ssize_t rc;
  382. rc = down_read_interruptible(&cxl_region_rwsem);
  383. if (rc)
  384. return rc;
  385. rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
  386. up_read(&cxl_region_rwsem);
  387. return rc;
  388. }
  389. static const struct attribute_group *get_cxl_region_target_group(void);
  390. static ssize_t interleave_ways_store(struct device *dev,
  391. struct device_attribute *attr,
  392. const char *buf, size_t len)
  393. {
  394. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
  395. struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
  396. struct cxl_region *cxlr = to_cxl_region(dev);
  397. struct cxl_region_params *p = &cxlr->params;
  398. unsigned int val, save;
  399. int rc;
  400. u8 iw;
  401. rc = kstrtouint(buf, 0, &val);
  402. if (rc)
  403. return rc;
  404. rc = ways_to_eiw(val, &iw);
  405. if (rc)
  406. return rc;
  407. /*
  408. * Even for x3, x6, and x12 interleaves the region interleave must be a
  409. * power of 2 multiple of the host bridge interleave.
  410. */
  411. if (!is_power_of_2(val / cxld->interleave_ways) ||
  412. (val % cxld->interleave_ways)) {
  413. dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
  414. return -EINVAL;
  415. }
  416. rc = down_write_killable(&cxl_region_rwsem);
  417. if (rc)
  418. return rc;
  419. if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
  420. rc = -EBUSY;
  421. goto out;
  422. }
  423. save = p->interleave_ways;
  424. p->interleave_ways = val;
  425. rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
  426. if (rc)
  427. p->interleave_ways = save;
  428. out:
  429. up_write(&cxl_region_rwsem);
  430. if (rc)
  431. return rc;
  432. return len;
  433. }
  434. static DEVICE_ATTR_RW(interleave_ways);
  435. static ssize_t interleave_granularity_show(struct device *dev,
  436. struct device_attribute *attr,
  437. char *buf)
  438. {
  439. struct cxl_region *cxlr = to_cxl_region(dev);
  440. struct cxl_region_params *p = &cxlr->params;
  441. ssize_t rc;
  442. rc = down_read_interruptible(&cxl_region_rwsem);
  443. if (rc)
  444. return rc;
  445. rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
  446. up_read(&cxl_region_rwsem);
  447. return rc;
  448. }
  449. static ssize_t interleave_granularity_store(struct device *dev,
  450. struct device_attribute *attr,
  451. const char *buf, size_t len)
  452. {
  453. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
  454. struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
  455. struct cxl_region *cxlr = to_cxl_region(dev);
  456. struct cxl_region_params *p = &cxlr->params;
  457. int rc, val;
  458. u16 ig;
  459. rc = kstrtoint(buf, 0, &val);
  460. if (rc)
  461. return rc;
  462. rc = granularity_to_eig(val, &ig);
  463. if (rc)
  464. return rc;
  465. /*
  466. * When the host-bridge is interleaved, disallow region granularity !=
  467. * root granularity. Regions with a granularity less than the root
  468. * interleave result in needing multiple endpoints to support a single
  469. * slot in the interleave (possible to support in the future). Regions
  470. * with a granularity greater than the root interleave result in invalid
  471. * DPA translations (invalid to support).
  472. */
  473. if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
  474. return -EINVAL;
  475. rc = down_write_killable(&cxl_region_rwsem);
  476. if (rc)
  477. return rc;
  478. if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
  479. rc = -EBUSY;
  480. goto out;
  481. }
  482. p->interleave_granularity = val;
  483. out:
  484. up_write(&cxl_region_rwsem);
  485. if (rc)
  486. return rc;
  487. return len;
  488. }
  489. static DEVICE_ATTR_RW(interleave_granularity);
  490. static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
  491. char *buf)
  492. {
  493. struct cxl_region *cxlr = to_cxl_region(dev);
  494. struct cxl_region_params *p = &cxlr->params;
  495. u64 resource = -1ULL;
  496. ssize_t rc;
  497. rc = down_read_interruptible(&cxl_region_rwsem);
  498. if (rc)
  499. return rc;
  500. if (p->res)
  501. resource = p->res->start;
  502. rc = sysfs_emit(buf, "%#llx\n", resource);
  503. up_read(&cxl_region_rwsem);
  504. return rc;
  505. }
  506. static DEVICE_ATTR_RO(resource);
  507. static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
  508. char *buf)
  509. {
  510. struct cxl_region *cxlr = to_cxl_region(dev);
  511. return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
  512. }
  513. static DEVICE_ATTR_RO(mode);
  514. static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
  515. {
  516. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
  517. struct cxl_region_params *p = &cxlr->params;
  518. struct resource *res;
  519. u64 remainder = 0;
  520. lockdep_assert_held_write(&cxl_region_rwsem);
  521. /* Nothing to do... */
  522. if (p->res && resource_size(p->res) == size)
  523. return 0;
  524. /* To change size the old size must be freed first */
  525. if (p->res)
  526. return -EBUSY;
  527. if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
  528. return -EBUSY;
  529. /* ways, granularity and uuid (if PMEM) need to be set before HPA */
  530. if (!p->interleave_ways || !p->interleave_granularity ||
  531. (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
  532. return -ENXIO;
  533. div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
  534. if (remainder)
  535. return -EINVAL;
  536. res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
  537. dev_name(&cxlr->dev));
  538. if (IS_ERR(res)) {
  539. dev_dbg(&cxlr->dev,
  540. "HPA allocation error (%ld) for size:%pap in %s %pr\n",
  541. PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
  542. return PTR_ERR(res);
  543. }
  544. p->res = res;
  545. p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
  546. return 0;
  547. }
  548. static void cxl_region_iomem_release(struct cxl_region *cxlr)
  549. {
  550. struct cxl_region_params *p = &cxlr->params;
  551. if (device_is_registered(&cxlr->dev))
  552. lockdep_assert_held_write(&cxl_region_rwsem);
  553. if (p->res) {
  554. /*
  555. * Autodiscovered regions may not have been able to insert their
  556. * resource.
  557. */
  558. if (p->res->parent)
  559. remove_resource(p->res);
  560. kfree(p->res);
  561. p->res = NULL;
  562. }
  563. }
  564. static int free_hpa(struct cxl_region *cxlr)
  565. {
  566. struct cxl_region_params *p = &cxlr->params;
  567. lockdep_assert_held_write(&cxl_region_rwsem);
  568. if (!p->res)
  569. return 0;
  570. if (p->state >= CXL_CONFIG_ACTIVE)
  571. return -EBUSY;
  572. cxl_region_iomem_release(cxlr);
  573. p->state = CXL_CONFIG_IDLE;
  574. return 0;
  575. }
  576. static ssize_t size_store(struct device *dev, struct device_attribute *attr,
  577. const char *buf, size_t len)
  578. {
  579. struct cxl_region *cxlr = to_cxl_region(dev);
  580. u64 val;
  581. int rc;
  582. rc = kstrtou64(buf, 0, &val);
  583. if (rc)
  584. return rc;
  585. rc = down_write_killable(&cxl_region_rwsem);
  586. if (rc)
  587. return rc;
  588. if (val)
  589. rc = alloc_hpa(cxlr, val);
  590. else
  591. rc = free_hpa(cxlr);
  592. up_write(&cxl_region_rwsem);
  593. if (rc)
  594. return rc;
  595. return len;
  596. }
  597. static ssize_t size_show(struct device *dev, struct device_attribute *attr,
  598. char *buf)
  599. {
  600. struct cxl_region *cxlr = to_cxl_region(dev);
  601. struct cxl_region_params *p = &cxlr->params;
  602. u64 size = 0;
  603. ssize_t rc;
  604. rc = down_read_interruptible(&cxl_region_rwsem);
  605. if (rc)
  606. return rc;
  607. if (p->res)
  608. size = resource_size(p->res);
  609. rc = sysfs_emit(buf, "%#llx\n", size);
  610. up_read(&cxl_region_rwsem);
  611. return rc;
  612. }
  613. static DEVICE_ATTR_RW(size);
  614. static struct attribute *cxl_region_attrs[] = {
  615. &dev_attr_uuid.attr,
  616. &dev_attr_commit.attr,
  617. &dev_attr_interleave_ways.attr,
  618. &dev_attr_interleave_granularity.attr,
  619. &dev_attr_resource.attr,
  620. &dev_attr_size.attr,
  621. &dev_attr_mode.attr,
  622. NULL,
  623. };
  624. static const struct attribute_group cxl_region_group = {
  625. .attrs = cxl_region_attrs,
  626. .is_visible = cxl_region_visible,
  627. };
  628. static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
  629. {
  630. struct cxl_region_params *p = &cxlr->params;
  631. struct cxl_endpoint_decoder *cxled;
  632. int rc;
  633. rc = down_read_interruptible(&cxl_region_rwsem);
  634. if (rc)
  635. return rc;
  636. if (pos >= p->interleave_ways) {
  637. dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
  638. p->interleave_ways);
  639. rc = -ENXIO;
  640. goto out;
  641. }
  642. cxled = p->targets[pos];
  643. if (!cxled)
  644. rc = sysfs_emit(buf, "\n");
  645. else
  646. rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
  647. out:
  648. up_read(&cxl_region_rwsem);
  649. return rc;
  650. }
  651. static int check_commit_order(struct device *dev, const void *data)
  652. {
  653. struct cxl_decoder *cxld = to_cxl_decoder(dev);
  654. /*
  655. * if port->commit_end is not the only free decoder, then out of
  656. * order shutdown has occurred, block further allocations until
  657. * that is resolved
  658. */
  659. if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
  660. return -EBUSY;
  661. return 0;
  662. }
  663. static int match_free_decoder(struct device *dev, void *data)
  664. {
  665. struct cxl_port *port = to_cxl_port(dev->parent);
  666. struct cxl_decoder *cxld;
  667. int rc;
  668. if (!is_switch_decoder(dev))
  669. return 0;
  670. cxld = to_cxl_decoder(dev);
  671. if (cxld->id != port->commit_end + 1)
  672. return 0;
  673. if (cxld->region) {
  674. dev_dbg(dev->parent,
  675. "next decoder to commit (%s) is already reserved (%s)\n",
  676. dev_name(dev), dev_name(&cxld->region->dev));
  677. return 0;
  678. }
  679. rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
  680. check_commit_order);
  681. if (rc) {
  682. dev_dbg(dev->parent,
  683. "unable to allocate %s due to out of order shutdown\n",
  684. dev_name(dev));
  685. return 0;
  686. }
  687. return 1;
  688. }
  689. static int match_auto_decoder(struct device *dev, void *data)
  690. {
  691. struct cxl_region_params *p = data;
  692. struct cxl_decoder *cxld;
  693. struct range *r;
  694. if (!is_switch_decoder(dev))
  695. return 0;
  696. cxld = to_cxl_decoder(dev);
  697. r = &cxld->hpa_range;
  698. if (p->res && p->res->start == r->start && p->res->end == r->end)
  699. return 1;
  700. return 0;
  701. }
  702. static struct cxl_decoder *
  703. cxl_region_find_decoder(struct cxl_port *port,
  704. struct cxl_endpoint_decoder *cxled,
  705. struct cxl_region *cxlr)
  706. {
  707. struct device *dev;
  708. if (port == cxled_to_port(cxled))
  709. return &cxled->cxld;
  710. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
  711. dev = device_find_child(&port->dev, &cxlr->params,
  712. match_auto_decoder);
  713. else
  714. dev = device_find_child(&port->dev, NULL, match_free_decoder);
  715. if (!dev)
  716. return NULL;
  717. /*
  718. * This decoder is pinned registered as long as the endpoint decoder is
  719. * registered, and endpoint decoder unregistration holds the
  720. * cxl_region_rwsem over unregister events, so no need to hold on to
  721. * this extra reference.
  722. */
  723. put_device(dev);
  724. return to_cxl_decoder(dev);
  725. }
  726. static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
  727. struct cxl_decoder *cxld)
  728. {
  729. struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
  730. struct cxl_decoder *cxld_iter = rr->decoder;
  731. /*
  732. * Allow the out of order assembly of auto-discovered regions.
  733. * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
  734. * in HPA order. Confirm that the decoder with the lesser HPA
  735. * starting address has the lesser id.
  736. */
  737. dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
  738. dev_name(&cxld->dev), cxld->id,
  739. dev_name(&cxld_iter->dev), cxld_iter->id);
  740. if (cxld_iter->id > cxld->id)
  741. return true;
  742. return false;
  743. }
  744. static struct cxl_region_ref *
  745. alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
  746. struct cxl_endpoint_decoder *cxled)
  747. {
  748. struct cxl_region_params *p = &cxlr->params;
  749. struct cxl_region_ref *cxl_rr, *iter;
  750. unsigned long index;
  751. int rc;
  752. xa_for_each(&port->regions, index, iter) {
  753. struct cxl_region_params *ip = &iter->region->params;
  754. if (!ip->res || ip->res->start < p->res->start)
  755. continue;
  756. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
  757. struct cxl_decoder *cxld;
  758. cxld = cxl_region_find_decoder(port, cxled, cxlr);
  759. if (auto_order_ok(port, iter->region, cxld))
  760. continue;
  761. }
  762. dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
  763. dev_name(&port->dev),
  764. dev_name(&iter->region->dev), ip->res, p->res);
  765. return ERR_PTR(-EBUSY);
  766. }
  767. cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
  768. if (!cxl_rr)
  769. return ERR_PTR(-ENOMEM);
  770. cxl_rr->port = port;
  771. cxl_rr->region = cxlr;
  772. cxl_rr->nr_targets = 1;
  773. xa_init(&cxl_rr->endpoints);
  774. rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
  775. if (rc) {
  776. dev_dbg(&cxlr->dev,
  777. "%s: failed to track region reference: %d\n",
  778. dev_name(&port->dev), rc);
  779. kfree(cxl_rr);
  780. return ERR_PTR(rc);
  781. }
  782. return cxl_rr;
  783. }
  784. static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
  785. {
  786. struct cxl_region *cxlr = cxl_rr->region;
  787. struct cxl_decoder *cxld = cxl_rr->decoder;
  788. if (!cxld)
  789. return;
  790. dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
  791. if (cxld->region == cxlr) {
  792. cxld->region = NULL;
  793. put_device(&cxlr->dev);
  794. }
  795. }
  796. static void free_region_ref(struct cxl_region_ref *cxl_rr)
  797. {
  798. struct cxl_port *port = cxl_rr->port;
  799. struct cxl_region *cxlr = cxl_rr->region;
  800. cxl_rr_free_decoder(cxl_rr);
  801. xa_erase(&port->regions, (unsigned long)cxlr);
  802. xa_destroy(&cxl_rr->endpoints);
  803. kfree(cxl_rr);
  804. }
  805. static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
  806. struct cxl_endpoint_decoder *cxled)
  807. {
  808. int rc;
  809. struct cxl_port *port = cxl_rr->port;
  810. struct cxl_region *cxlr = cxl_rr->region;
  811. struct cxl_decoder *cxld = cxl_rr->decoder;
  812. struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
  813. if (ep) {
  814. rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
  815. GFP_KERNEL);
  816. if (rc)
  817. return rc;
  818. }
  819. cxl_rr->nr_eps++;
  820. if (!cxld->region) {
  821. cxld->region = cxlr;
  822. get_device(&cxlr->dev);
  823. }
  824. return 0;
  825. }
  826. static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
  827. struct cxl_endpoint_decoder *cxled,
  828. struct cxl_region_ref *cxl_rr)
  829. {
  830. struct cxl_decoder *cxld;
  831. cxld = cxl_region_find_decoder(port, cxled, cxlr);
  832. if (!cxld) {
  833. dev_dbg(&cxlr->dev, "%s: no decoder available\n",
  834. dev_name(&port->dev));
  835. return -EBUSY;
  836. }
  837. if (cxld->region) {
  838. dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
  839. dev_name(&port->dev), dev_name(&cxld->dev),
  840. dev_name(&cxld->region->dev));
  841. return -EBUSY;
  842. }
  843. /*
  844. * Endpoints should already match the region type, but backstop that
  845. * assumption with an assertion. Switch-decoders change mapping-type
  846. * based on what is mapped when they are assigned to a region.
  847. */
  848. dev_WARN_ONCE(&cxlr->dev,
  849. port == cxled_to_port(cxled) &&
  850. cxld->target_type != cxlr->type,
  851. "%s:%s mismatch decoder type %d -> %d\n",
  852. dev_name(&cxled_to_memdev(cxled)->dev),
  853. dev_name(&cxld->dev), cxld->target_type, cxlr->type);
  854. cxld->target_type = cxlr->type;
  855. cxl_rr->decoder = cxld;
  856. return 0;
  857. }
  858. /**
  859. * cxl_port_attach_region() - track a region's interest in a port by endpoint
  860. * @port: port to add a new region reference 'struct cxl_region_ref'
  861. * @cxlr: region to attach to @port
  862. * @cxled: endpoint decoder used to create or further pin a region reference
  863. * @pos: interleave position of @cxled in @cxlr
  864. *
  865. * The attach event is an opportunity to validate CXL decode setup
  866. * constraints and record metadata needed for programming HDM decoders,
  867. * in particular decoder target lists.
  868. *
  869. * The steps are:
  870. *
  871. * - validate that there are no other regions with a higher HPA already
  872. * associated with @port
  873. * - establish a region reference if one is not already present
  874. *
  875. * - additionally allocate a decoder instance that will host @cxlr on
  876. * @port
  877. *
  878. * - pin the region reference by the endpoint
  879. * - account for how many entries in @port's target list are needed to
  880. * cover all of the added endpoints.
  881. */
  882. static int cxl_port_attach_region(struct cxl_port *port,
  883. struct cxl_region *cxlr,
  884. struct cxl_endpoint_decoder *cxled, int pos)
  885. {
  886. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  887. struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
  888. struct cxl_region_ref *cxl_rr;
  889. bool nr_targets_inc = false;
  890. struct cxl_decoder *cxld;
  891. unsigned long index;
  892. int rc = -EBUSY;
  893. lockdep_assert_held_write(&cxl_region_rwsem);
  894. cxl_rr = cxl_rr_load(port, cxlr);
  895. if (cxl_rr) {
  896. struct cxl_ep *ep_iter;
  897. int found = 0;
  898. /*
  899. * Walk the existing endpoints that have been attached to
  900. * @cxlr at @port and see if they share the same 'next' port
  901. * in the downstream direction. I.e. endpoints that share common
  902. * upstream switch.
  903. */
  904. xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
  905. if (ep_iter == ep)
  906. continue;
  907. if (ep_iter->next == ep->next) {
  908. found++;
  909. break;
  910. }
  911. }
  912. /*
  913. * New target port, or @port is an endpoint port that always
  914. * accounts its own local decode as a target.
  915. */
  916. if (!found || !ep->next) {
  917. cxl_rr->nr_targets++;
  918. nr_targets_inc = true;
  919. }
  920. } else {
  921. cxl_rr = alloc_region_ref(port, cxlr, cxled);
  922. if (IS_ERR(cxl_rr)) {
  923. dev_dbg(&cxlr->dev,
  924. "%s: failed to allocate region reference\n",
  925. dev_name(&port->dev));
  926. return PTR_ERR(cxl_rr);
  927. }
  928. nr_targets_inc = true;
  929. rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
  930. if (rc)
  931. goto out_erase;
  932. }
  933. cxld = cxl_rr->decoder;
  934. /*
  935. * the number of targets should not exceed the target_count
  936. * of the decoder
  937. */
  938. if (is_switch_decoder(&cxld->dev)) {
  939. struct cxl_switch_decoder *cxlsd;
  940. cxlsd = to_cxl_switch_decoder(&cxld->dev);
  941. if (cxl_rr->nr_targets > cxlsd->nr_targets) {
  942. dev_dbg(&cxlr->dev,
  943. "%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
  944. dev_name(port->uport_dev), dev_name(&port->dev),
  945. dev_name(&cxld->dev), dev_name(&cxlmd->dev),
  946. dev_name(&cxled->cxld.dev), pos,
  947. cxlsd->nr_targets);
  948. rc = -ENXIO;
  949. goto out_erase;
  950. }
  951. }
  952. rc = cxl_rr_ep_add(cxl_rr, cxled);
  953. if (rc) {
  954. dev_dbg(&cxlr->dev,
  955. "%s: failed to track endpoint %s:%s reference\n",
  956. dev_name(&port->dev), dev_name(&cxlmd->dev),
  957. dev_name(&cxld->dev));
  958. goto out_erase;
  959. }
  960. dev_dbg(&cxlr->dev,
  961. "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
  962. dev_name(port->uport_dev), dev_name(&port->dev),
  963. dev_name(&cxld->dev), dev_name(&cxlmd->dev),
  964. dev_name(&cxled->cxld.dev), pos,
  965. ep ? ep->next ? dev_name(ep->next->uport_dev) :
  966. dev_name(&cxlmd->dev) :
  967. "none",
  968. cxl_rr->nr_eps, cxl_rr->nr_targets);
  969. return 0;
  970. out_erase:
  971. if (nr_targets_inc)
  972. cxl_rr->nr_targets--;
  973. if (cxl_rr->nr_eps == 0)
  974. free_region_ref(cxl_rr);
  975. return rc;
  976. }
  977. static void cxl_port_detach_region(struct cxl_port *port,
  978. struct cxl_region *cxlr,
  979. struct cxl_endpoint_decoder *cxled)
  980. {
  981. struct cxl_region_ref *cxl_rr;
  982. struct cxl_ep *ep = NULL;
  983. lockdep_assert_held_write(&cxl_region_rwsem);
  984. cxl_rr = cxl_rr_load(port, cxlr);
  985. if (!cxl_rr)
  986. return;
  987. /*
  988. * Endpoint ports do not carry cxl_ep references, and they
  989. * never target more than one endpoint by definition
  990. */
  991. if (cxl_rr->decoder == &cxled->cxld)
  992. cxl_rr->nr_eps--;
  993. else
  994. ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
  995. if (ep) {
  996. struct cxl_ep *ep_iter;
  997. unsigned long index;
  998. int found = 0;
  999. cxl_rr->nr_eps--;
  1000. xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
  1001. if (ep_iter->next == ep->next) {
  1002. found++;
  1003. break;
  1004. }
  1005. }
  1006. if (!found)
  1007. cxl_rr->nr_targets--;
  1008. }
  1009. if (cxl_rr->nr_eps == 0)
  1010. free_region_ref(cxl_rr);
  1011. }
  1012. static int check_last_peer(struct cxl_endpoint_decoder *cxled,
  1013. struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
  1014. int distance)
  1015. {
  1016. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1017. struct cxl_region *cxlr = cxl_rr->region;
  1018. struct cxl_region_params *p = &cxlr->params;
  1019. struct cxl_endpoint_decoder *cxled_peer;
  1020. struct cxl_port *port = cxl_rr->port;
  1021. struct cxl_memdev *cxlmd_peer;
  1022. struct cxl_ep *ep_peer;
  1023. int pos = cxled->pos;
  1024. /*
  1025. * If this position wants to share a dport with the last endpoint mapped
  1026. * then that endpoint, at index 'position - distance', must also be
  1027. * mapped by this dport.
  1028. */
  1029. if (pos < distance) {
  1030. dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
  1031. dev_name(port->uport_dev), dev_name(&port->dev),
  1032. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
  1033. return -ENXIO;
  1034. }
  1035. cxled_peer = p->targets[pos - distance];
  1036. cxlmd_peer = cxled_to_memdev(cxled_peer);
  1037. ep_peer = cxl_ep_load(port, cxlmd_peer);
  1038. if (ep->dport != ep_peer->dport) {
  1039. dev_dbg(&cxlr->dev,
  1040. "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
  1041. dev_name(port->uport_dev), dev_name(&port->dev),
  1042. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
  1043. dev_name(&cxlmd_peer->dev),
  1044. dev_name(&cxled_peer->cxld.dev));
  1045. return -ENXIO;
  1046. }
  1047. return 0;
  1048. }
  1049. static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
  1050. {
  1051. struct cxl_port *port = to_cxl_port(cxld->dev.parent);
  1052. struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
  1053. unsigned int interleave_mask;
  1054. u8 eiw;
  1055. u16 eig;
  1056. int high_pos, low_pos;
  1057. if (!test_bit(iw, &cxlhdm->iw_cap_mask))
  1058. return -ENXIO;
  1059. /*
  1060. * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
  1061. * if eiw < 8:
  1062. * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
  1063. * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
  1064. *
  1065. * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
  1066. * interleave bits are none.
  1067. *
  1068. * if eiw >= 8:
  1069. * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
  1070. * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
  1071. *
  1072. * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
  1073. * interleave bits are none.
  1074. */
  1075. ways_to_eiw(iw, &eiw);
  1076. if (eiw == 0 || eiw == 8)
  1077. return 0;
  1078. granularity_to_eig(ig, &eig);
  1079. if (eiw > 8)
  1080. high_pos = eiw + eig - 1;
  1081. else
  1082. high_pos = eiw + eig + 7;
  1083. low_pos = eig + 8;
  1084. interleave_mask = GENMASK(high_pos, low_pos);
  1085. if (interleave_mask & ~cxlhdm->interleave_mask)
  1086. return -ENXIO;
  1087. return 0;
  1088. }
  1089. static int cxl_port_setup_targets(struct cxl_port *port,
  1090. struct cxl_region *cxlr,
  1091. struct cxl_endpoint_decoder *cxled)
  1092. {
  1093. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
  1094. int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
  1095. struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
  1096. struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
  1097. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1098. struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
  1099. struct cxl_region_params *p = &cxlr->params;
  1100. struct cxl_decoder *cxld = cxl_rr->decoder;
  1101. struct cxl_switch_decoder *cxlsd;
  1102. struct cxl_port *iter = port;
  1103. u16 eig, peig;
  1104. u8 eiw, peiw;
  1105. /*
  1106. * While root level decoders support x3, x6, x12, switch level
  1107. * decoders only support powers of 2 up to x16.
  1108. */
  1109. if (!is_power_of_2(cxl_rr->nr_targets)) {
  1110. dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
  1111. dev_name(port->uport_dev), dev_name(&port->dev),
  1112. cxl_rr->nr_targets);
  1113. return -EINVAL;
  1114. }
  1115. cxlsd = to_cxl_switch_decoder(&cxld->dev);
  1116. if (cxl_rr->nr_targets_set) {
  1117. int i, distance = 1;
  1118. struct cxl_region_ref *cxl_rr_iter;
  1119. /*
  1120. * The "distance" between peer downstream ports represents which
  1121. * endpoint positions in the region interleave a given port can
  1122. * host.
  1123. *
  1124. * For example, at the root of a hierarchy the distance is
  1125. * always 1 as every index targets a different host-bridge. At
  1126. * each subsequent switch level those ports map every Nth region
  1127. * position where N is the width of the switch == distance.
  1128. */
  1129. do {
  1130. cxl_rr_iter = cxl_rr_load(iter, cxlr);
  1131. distance *= cxl_rr_iter->nr_targets;
  1132. iter = to_cxl_port(iter->dev.parent);
  1133. } while (!is_cxl_root(iter));
  1134. distance *= cxlrd->cxlsd.cxld.interleave_ways;
  1135. for (i = 0; i < cxl_rr->nr_targets_set; i++)
  1136. if (ep->dport == cxlsd->target[i]) {
  1137. rc = check_last_peer(cxled, ep, cxl_rr,
  1138. distance);
  1139. if (rc)
  1140. return rc;
  1141. goto out_target_set;
  1142. }
  1143. goto add_target;
  1144. }
  1145. if (is_cxl_root(parent_port)) {
  1146. /*
  1147. * Root decoder IG is always set to value in CFMWS which
  1148. * may be different than this region's IG. We can use the
  1149. * region's IG here since interleave_granularity_store()
  1150. * does not allow interleaved host-bridges with
  1151. * root IG != region IG.
  1152. */
  1153. parent_ig = p->interleave_granularity;
  1154. parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
  1155. /*
  1156. * For purposes of address bit routing, use power-of-2 math for
  1157. * switch ports.
  1158. */
  1159. if (!is_power_of_2(parent_iw))
  1160. parent_iw /= 3;
  1161. } else {
  1162. struct cxl_region_ref *parent_rr;
  1163. struct cxl_decoder *parent_cxld;
  1164. parent_rr = cxl_rr_load(parent_port, cxlr);
  1165. parent_cxld = parent_rr->decoder;
  1166. parent_ig = parent_cxld->interleave_granularity;
  1167. parent_iw = parent_cxld->interleave_ways;
  1168. }
  1169. rc = granularity_to_eig(parent_ig, &peig);
  1170. if (rc) {
  1171. dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
  1172. dev_name(parent_port->uport_dev),
  1173. dev_name(&parent_port->dev), parent_ig);
  1174. return rc;
  1175. }
  1176. rc = ways_to_eiw(parent_iw, &peiw);
  1177. if (rc) {
  1178. dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
  1179. dev_name(parent_port->uport_dev),
  1180. dev_name(&parent_port->dev), parent_iw);
  1181. return rc;
  1182. }
  1183. iw = cxl_rr->nr_targets;
  1184. rc = ways_to_eiw(iw, &eiw);
  1185. if (rc) {
  1186. dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
  1187. dev_name(port->uport_dev), dev_name(&port->dev), iw);
  1188. return rc;
  1189. }
  1190. /*
  1191. * Interleave granularity is a multiple of @parent_port granularity.
  1192. * Multiplier is the parent port interleave ways.
  1193. */
  1194. rc = granularity_to_eig(parent_ig * parent_iw, &eig);
  1195. if (rc) {
  1196. dev_dbg(&cxlr->dev,
  1197. "%s: invalid granularity calculation (%d * %d)\n",
  1198. dev_name(&parent_port->dev), parent_ig, parent_iw);
  1199. return rc;
  1200. }
  1201. rc = eig_to_granularity(eig, &ig);
  1202. if (rc) {
  1203. dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
  1204. dev_name(port->uport_dev), dev_name(&port->dev),
  1205. 256 << eig);
  1206. return rc;
  1207. }
  1208. if (iw > 8 || iw > cxlsd->nr_targets) {
  1209. dev_dbg(&cxlr->dev,
  1210. "%s:%s:%s: ways: %d overflows targets: %d\n",
  1211. dev_name(port->uport_dev), dev_name(&port->dev),
  1212. dev_name(&cxld->dev), iw, cxlsd->nr_targets);
  1213. return -ENXIO;
  1214. }
  1215. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
  1216. if (cxld->interleave_ways != iw ||
  1217. cxld->interleave_granularity != ig ||
  1218. cxld->hpa_range.start != p->res->start ||
  1219. cxld->hpa_range.end != p->res->end ||
  1220. ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
  1221. dev_err(&cxlr->dev,
  1222. "%s:%s %s expected iw: %d ig: %d %pr\n",
  1223. dev_name(port->uport_dev), dev_name(&port->dev),
  1224. __func__, iw, ig, p->res);
  1225. dev_err(&cxlr->dev,
  1226. "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
  1227. dev_name(port->uport_dev), dev_name(&port->dev),
  1228. __func__, cxld->interleave_ways,
  1229. cxld->interleave_granularity,
  1230. (cxld->flags & CXL_DECODER_F_ENABLE) ?
  1231. "enabled" :
  1232. "disabled",
  1233. cxld->hpa_range.start, cxld->hpa_range.end);
  1234. return -ENXIO;
  1235. }
  1236. } else {
  1237. rc = check_interleave_cap(cxld, iw, ig);
  1238. if (rc) {
  1239. dev_dbg(&cxlr->dev,
  1240. "%s:%s iw: %d ig: %d is not supported\n",
  1241. dev_name(port->uport_dev),
  1242. dev_name(&port->dev), iw, ig);
  1243. return rc;
  1244. }
  1245. cxld->interleave_ways = iw;
  1246. cxld->interleave_granularity = ig;
  1247. cxld->hpa_range = (struct range) {
  1248. .start = p->res->start,
  1249. .end = p->res->end,
  1250. };
  1251. }
  1252. dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
  1253. dev_name(&port->dev), iw, ig);
  1254. add_target:
  1255. if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
  1256. dev_dbg(&cxlr->dev,
  1257. "%s:%s: targets full trying to add %s:%s at %d\n",
  1258. dev_name(port->uport_dev), dev_name(&port->dev),
  1259. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
  1260. return -ENXIO;
  1261. }
  1262. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
  1263. if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
  1264. dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
  1265. dev_name(port->uport_dev), dev_name(&port->dev),
  1266. dev_name(&cxlsd->cxld.dev),
  1267. dev_name(ep->dport->dport_dev),
  1268. cxl_rr->nr_targets_set);
  1269. return -ENXIO;
  1270. }
  1271. } else
  1272. cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
  1273. inc = 1;
  1274. out_target_set:
  1275. cxl_rr->nr_targets_set += inc;
  1276. dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
  1277. dev_name(port->uport_dev), dev_name(&port->dev),
  1278. cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
  1279. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
  1280. return 0;
  1281. }
  1282. static void cxl_port_reset_targets(struct cxl_port *port,
  1283. struct cxl_region *cxlr)
  1284. {
  1285. struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
  1286. struct cxl_decoder *cxld;
  1287. /*
  1288. * After the last endpoint has been detached the entire cxl_rr may now
  1289. * be gone.
  1290. */
  1291. if (!cxl_rr)
  1292. return;
  1293. cxl_rr->nr_targets_set = 0;
  1294. cxld = cxl_rr->decoder;
  1295. cxld->hpa_range = (struct range) {
  1296. .start = 0,
  1297. .end = -1,
  1298. };
  1299. }
  1300. static void cxl_region_teardown_targets(struct cxl_region *cxlr)
  1301. {
  1302. struct cxl_region_params *p = &cxlr->params;
  1303. struct cxl_endpoint_decoder *cxled;
  1304. struct cxl_dev_state *cxlds;
  1305. struct cxl_memdev *cxlmd;
  1306. struct cxl_port *iter;
  1307. struct cxl_ep *ep;
  1308. int i;
  1309. /*
  1310. * In the auto-discovery case skip automatic teardown since the
  1311. * address space is already active
  1312. */
  1313. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
  1314. return;
  1315. for (i = 0; i < p->nr_targets; i++) {
  1316. cxled = p->targets[i];
  1317. cxlmd = cxled_to_memdev(cxled);
  1318. cxlds = cxlmd->cxlds;
  1319. if (cxlds->rcd)
  1320. continue;
  1321. iter = cxled_to_port(cxled);
  1322. while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
  1323. iter = to_cxl_port(iter->dev.parent);
  1324. for (ep = cxl_ep_load(iter, cxlmd); iter;
  1325. iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
  1326. cxl_port_reset_targets(iter, cxlr);
  1327. }
  1328. }
  1329. static int cxl_region_setup_targets(struct cxl_region *cxlr)
  1330. {
  1331. struct cxl_region_params *p = &cxlr->params;
  1332. struct cxl_endpoint_decoder *cxled;
  1333. struct cxl_dev_state *cxlds;
  1334. int i, rc, rch = 0, vh = 0;
  1335. struct cxl_memdev *cxlmd;
  1336. struct cxl_port *iter;
  1337. struct cxl_ep *ep;
  1338. for (i = 0; i < p->nr_targets; i++) {
  1339. cxled = p->targets[i];
  1340. cxlmd = cxled_to_memdev(cxled);
  1341. cxlds = cxlmd->cxlds;
  1342. /* validate that all targets agree on topology */
  1343. if (!cxlds->rcd) {
  1344. vh++;
  1345. } else {
  1346. rch++;
  1347. continue;
  1348. }
  1349. iter = cxled_to_port(cxled);
  1350. while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
  1351. iter = to_cxl_port(iter->dev.parent);
  1352. /*
  1353. * Descend the topology tree programming / validating
  1354. * targets while looking for conflicts.
  1355. */
  1356. for (ep = cxl_ep_load(iter, cxlmd); iter;
  1357. iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
  1358. rc = cxl_port_setup_targets(iter, cxlr, cxled);
  1359. if (rc) {
  1360. cxl_region_teardown_targets(cxlr);
  1361. return rc;
  1362. }
  1363. }
  1364. }
  1365. if (rch && vh) {
  1366. dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
  1367. cxl_region_teardown_targets(cxlr);
  1368. return -ENXIO;
  1369. }
  1370. return 0;
  1371. }
  1372. static int cxl_region_validate_position(struct cxl_region *cxlr,
  1373. struct cxl_endpoint_decoder *cxled,
  1374. int pos)
  1375. {
  1376. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1377. struct cxl_region_params *p = &cxlr->params;
  1378. int i;
  1379. if (pos < 0 || pos >= p->interleave_ways) {
  1380. dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
  1381. p->interleave_ways);
  1382. return -ENXIO;
  1383. }
  1384. if (p->targets[pos] == cxled)
  1385. return 0;
  1386. if (p->targets[pos]) {
  1387. struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
  1388. struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
  1389. dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
  1390. pos, dev_name(&cxlmd_target->dev),
  1391. dev_name(&cxled_target->cxld.dev));
  1392. return -EBUSY;
  1393. }
  1394. for (i = 0; i < p->interleave_ways; i++) {
  1395. struct cxl_endpoint_decoder *cxled_target;
  1396. struct cxl_memdev *cxlmd_target;
  1397. cxled_target = p->targets[i];
  1398. if (!cxled_target)
  1399. continue;
  1400. cxlmd_target = cxled_to_memdev(cxled_target);
  1401. if (cxlmd_target == cxlmd) {
  1402. dev_dbg(&cxlr->dev,
  1403. "%s already specified at position %d via: %s\n",
  1404. dev_name(&cxlmd->dev), pos,
  1405. dev_name(&cxled_target->cxld.dev));
  1406. return -EBUSY;
  1407. }
  1408. }
  1409. return 0;
  1410. }
  1411. static int cxl_region_attach_position(struct cxl_region *cxlr,
  1412. struct cxl_root_decoder *cxlrd,
  1413. struct cxl_endpoint_decoder *cxled,
  1414. const struct cxl_dport *dport, int pos)
  1415. {
  1416. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1417. struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
  1418. struct cxl_decoder *cxld = &cxlsd->cxld;
  1419. int iw = cxld->interleave_ways;
  1420. struct cxl_port *iter;
  1421. int rc;
  1422. if (dport != cxlrd->cxlsd.target[pos % iw]) {
  1423. dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
  1424. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  1425. dev_name(&cxlrd->cxlsd.cxld.dev));
  1426. return -ENXIO;
  1427. }
  1428. for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
  1429. iter = to_cxl_port(iter->dev.parent)) {
  1430. rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
  1431. if (rc)
  1432. goto err;
  1433. }
  1434. return 0;
  1435. err:
  1436. for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
  1437. iter = to_cxl_port(iter->dev.parent))
  1438. cxl_port_detach_region(iter, cxlr, cxled);
  1439. return rc;
  1440. }
  1441. static int cxl_region_attach_auto(struct cxl_region *cxlr,
  1442. struct cxl_endpoint_decoder *cxled, int pos)
  1443. {
  1444. struct cxl_region_params *p = &cxlr->params;
  1445. if (cxled->state != CXL_DECODER_STATE_AUTO) {
  1446. dev_err(&cxlr->dev,
  1447. "%s: unable to add decoder to autodetected region\n",
  1448. dev_name(&cxled->cxld.dev));
  1449. return -EINVAL;
  1450. }
  1451. if (pos >= 0) {
  1452. dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
  1453. dev_name(&cxled->cxld.dev), pos);
  1454. return -EINVAL;
  1455. }
  1456. if (p->nr_targets >= p->interleave_ways) {
  1457. dev_err(&cxlr->dev, "%s: no more target slots available\n",
  1458. dev_name(&cxled->cxld.dev));
  1459. return -ENXIO;
  1460. }
  1461. /*
  1462. * Temporarily record the endpoint decoder into the target array. Yes,
  1463. * this means that userspace can view devices in the wrong position
  1464. * before the region activates, and must be careful to understand when
  1465. * it might be racing region autodiscovery.
  1466. */
  1467. pos = p->nr_targets;
  1468. p->targets[pos] = cxled;
  1469. cxled->pos = pos;
  1470. p->nr_targets++;
  1471. return 0;
  1472. }
  1473. static int cmp_interleave_pos(const void *a, const void *b)
  1474. {
  1475. struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
  1476. struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
  1477. return cxled_a->pos - cxled_b->pos;
  1478. }
  1479. static struct cxl_port *next_port(struct cxl_port *port)
  1480. {
  1481. if (!port->parent_dport)
  1482. return NULL;
  1483. return port->parent_dport->port;
  1484. }
  1485. static int match_switch_decoder_by_range(struct device *dev, void *data)
  1486. {
  1487. struct cxl_switch_decoder *cxlsd;
  1488. struct range *r1, *r2 = data;
  1489. if (!is_switch_decoder(dev))
  1490. return 0;
  1491. cxlsd = to_cxl_switch_decoder(dev);
  1492. r1 = &cxlsd->cxld.hpa_range;
  1493. if (is_root_decoder(dev))
  1494. return range_contains(r1, r2);
  1495. return (r1->start == r2->start && r1->end == r2->end);
  1496. }
  1497. static int find_pos_and_ways(struct cxl_port *port, struct range *range,
  1498. int *pos, int *ways)
  1499. {
  1500. struct cxl_switch_decoder *cxlsd;
  1501. struct cxl_port *parent;
  1502. struct device *dev;
  1503. int rc = -ENXIO;
  1504. parent = next_port(port);
  1505. if (!parent)
  1506. return rc;
  1507. dev = device_find_child(&parent->dev, range,
  1508. match_switch_decoder_by_range);
  1509. if (!dev) {
  1510. dev_err(port->uport_dev,
  1511. "failed to find decoder mapping %#llx-%#llx\n",
  1512. range->start, range->end);
  1513. return rc;
  1514. }
  1515. cxlsd = to_cxl_switch_decoder(dev);
  1516. *ways = cxlsd->cxld.interleave_ways;
  1517. for (int i = 0; i < *ways; i++) {
  1518. if (cxlsd->target[i] == port->parent_dport) {
  1519. *pos = i;
  1520. rc = 0;
  1521. break;
  1522. }
  1523. }
  1524. put_device(dev);
  1525. return rc;
  1526. }
  1527. /**
  1528. * cxl_calc_interleave_pos() - calculate an endpoint position in a region
  1529. * @cxled: endpoint decoder member of given region
  1530. *
  1531. * The endpoint position is calculated by traversing the topology from
  1532. * the endpoint to the root decoder and iteratively applying this
  1533. * calculation:
  1534. *
  1535. * position = position * parent_ways + parent_pos;
  1536. *
  1537. * ...where @position is inferred from switch and root decoder target lists.
  1538. *
  1539. * Return: position >= 0 on success
  1540. * -ENXIO on failure
  1541. */
  1542. static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
  1543. {
  1544. struct cxl_port *iter, *port = cxled_to_port(cxled);
  1545. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1546. struct range *range = &cxled->cxld.hpa_range;
  1547. int parent_ways = 0, parent_pos = 0, pos = 0;
  1548. int rc;
  1549. /*
  1550. * Example: the expected interleave order of the 4-way region shown
  1551. * below is: mem0, mem2, mem1, mem3
  1552. *
  1553. * root_port
  1554. * / \
  1555. * host_bridge_0 host_bridge_1
  1556. * | | | |
  1557. * mem0 mem1 mem2 mem3
  1558. *
  1559. * In the example the calculator will iterate twice. The first iteration
  1560. * uses the mem position in the host-bridge and the ways of the host-
  1561. * bridge to generate the first, or local, position. The second
  1562. * iteration uses the host-bridge position in the root_port and the ways
  1563. * of the root_port to refine the position.
  1564. *
  1565. * A trace of the calculation per endpoint looks like this:
  1566. * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
  1567. * pos = 0 * 2 + 0 pos = 0 * 2 + 1
  1568. * pos: 0 pos: 1
  1569. *
  1570. * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
  1571. * pos = 1 * 2 + 0 pos = 1 * 2 + 1
  1572. * pos: 2 pos = 3
  1573. *
  1574. * Note that while this example is simple, the method applies to more
  1575. * complex topologies, including those with switches.
  1576. */
  1577. /* Iterate from endpoint to root_port refining the position */
  1578. for (iter = port; iter; iter = next_port(iter)) {
  1579. if (is_cxl_root(iter))
  1580. break;
  1581. rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
  1582. if (rc)
  1583. return rc;
  1584. pos = pos * parent_ways + parent_pos;
  1585. }
  1586. dev_dbg(&cxlmd->dev,
  1587. "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
  1588. dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
  1589. dev_name(&port->dev), range->start, range->end, pos);
  1590. return pos;
  1591. }
  1592. static int cxl_region_sort_targets(struct cxl_region *cxlr)
  1593. {
  1594. struct cxl_region_params *p = &cxlr->params;
  1595. int i, rc = 0;
  1596. for (i = 0; i < p->nr_targets; i++) {
  1597. struct cxl_endpoint_decoder *cxled = p->targets[i];
  1598. cxled->pos = cxl_calc_interleave_pos(cxled);
  1599. /*
  1600. * Record that sorting failed, but still continue to calc
  1601. * cxled->pos so that follow-on code paths can reliably
  1602. * do p->targets[cxled->pos] to self-reference their entry.
  1603. */
  1604. if (cxled->pos < 0)
  1605. rc = -ENXIO;
  1606. }
  1607. /* Keep the cxlr target list in interleave position order */
  1608. sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
  1609. cmp_interleave_pos, NULL);
  1610. dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
  1611. return rc;
  1612. }
  1613. static int cxl_region_attach(struct cxl_region *cxlr,
  1614. struct cxl_endpoint_decoder *cxled, int pos)
  1615. {
  1616. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
  1617. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1618. struct cxl_region_params *p = &cxlr->params;
  1619. struct cxl_port *ep_port, *root_port;
  1620. struct cxl_dport *dport;
  1621. int rc = -ENXIO;
  1622. rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
  1623. p->interleave_granularity);
  1624. if (rc) {
  1625. dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
  1626. dev_name(&cxled->cxld.dev), p->interleave_ways,
  1627. p->interleave_granularity);
  1628. return rc;
  1629. }
  1630. if (cxled->mode != cxlr->mode) {
  1631. dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
  1632. dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
  1633. return -EINVAL;
  1634. }
  1635. if (cxled->mode == CXL_DECODER_DEAD) {
  1636. dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
  1637. return -ENODEV;
  1638. }
  1639. /* all full of members, or interleave config not established? */
  1640. if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
  1641. dev_dbg(&cxlr->dev, "region already active\n");
  1642. return -EBUSY;
  1643. } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
  1644. dev_dbg(&cxlr->dev, "interleave config missing\n");
  1645. return -ENXIO;
  1646. }
  1647. if (p->nr_targets >= p->interleave_ways) {
  1648. dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
  1649. p->nr_targets);
  1650. return -EINVAL;
  1651. }
  1652. ep_port = cxled_to_port(cxled);
  1653. root_port = cxlrd_to_port(cxlrd);
  1654. dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
  1655. if (!dport) {
  1656. dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
  1657. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  1658. dev_name(cxlr->dev.parent));
  1659. return -ENXIO;
  1660. }
  1661. if (cxled->cxld.target_type != cxlr->type) {
  1662. dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
  1663. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  1664. cxled->cxld.target_type, cxlr->type);
  1665. return -ENXIO;
  1666. }
  1667. if (!cxled->dpa_res) {
  1668. dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
  1669. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
  1670. return -ENXIO;
  1671. }
  1672. if (resource_size(cxled->dpa_res) * p->interleave_ways !=
  1673. resource_size(p->res)) {
  1674. dev_dbg(&cxlr->dev,
  1675. "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
  1676. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  1677. (u64)resource_size(cxled->dpa_res), p->interleave_ways,
  1678. (u64)resource_size(p->res));
  1679. return -EINVAL;
  1680. }
  1681. cxl_region_perf_data_calculate(cxlr, cxled);
  1682. if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
  1683. int i;
  1684. rc = cxl_region_attach_auto(cxlr, cxled, pos);
  1685. if (rc)
  1686. return rc;
  1687. /* await more targets to arrive... */
  1688. if (p->nr_targets < p->interleave_ways)
  1689. return 0;
  1690. /*
  1691. * All targets are here, which implies all PCI enumeration that
  1692. * affects this region has been completed. Walk the topology to
  1693. * sort the devices into their relative region decode position.
  1694. */
  1695. rc = cxl_region_sort_targets(cxlr);
  1696. if (rc)
  1697. return rc;
  1698. for (i = 0; i < p->nr_targets; i++) {
  1699. cxled = p->targets[i];
  1700. ep_port = cxled_to_port(cxled);
  1701. dport = cxl_find_dport_by_dev(root_port,
  1702. ep_port->host_bridge);
  1703. rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
  1704. dport, i);
  1705. if (rc)
  1706. return rc;
  1707. }
  1708. rc = cxl_region_setup_targets(cxlr);
  1709. if (rc)
  1710. return rc;
  1711. /*
  1712. * If target setup succeeds in the autodiscovery case
  1713. * then the region is already committed.
  1714. */
  1715. p->state = CXL_CONFIG_COMMIT;
  1716. cxl_region_shared_upstream_bandwidth_update(cxlr);
  1717. return 0;
  1718. }
  1719. rc = cxl_region_validate_position(cxlr, cxled, pos);
  1720. if (rc)
  1721. return rc;
  1722. rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
  1723. if (rc)
  1724. return rc;
  1725. p->targets[pos] = cxled;
  1726. cxled->pos = pos;
  1727. p->nr_targets++;
  1728. if (p->nr_targets == p->interleave_ways) {
  1729. rc = cxl_region_setup_targets(cxlr);
  1730. if (rc)
  1731. return rc;
  1732. p->state = CXL_CONFIG_ACTIVE;
  1733. cxl_region_shared_upstream_bandwidth_update(cxlr);
  1734. }
  1735. cxled->cxld.interleave_ways = p->interleave_ways;
  1736. cxled->cxld.interleave_granularity = p->interleave_granularity;
  1737. cxled->cxld.hpa_range = (struct range) {
  1738. .start = p->res->start,
  1739. .end = p->res->end,
  1740. };
  1741. if (p->nr_targets != p->interleave_ways)
  1742. return 0;
  1743. /*
  1744. * Test the auto-discovery position calculator function
  1745. * against this successfully created user-defined region.
  1746. * A fail message here means that this interleave config
  1747. * will fail when presented as CXL_REGION_F_AUTO.
  1748. */
  1749. for (int i = 0; i < p->nr_targets; i++) {
  1750. struct cxl_endpoint_decoder *cxled = p->targets[i];
  1751. int test_pos;
  1752. test_pos = cxl_calc_interleave_pos(cxled);
  1753. dev_dbg(&cxled->cxld.dev,
  1754. "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
  1755. (test_pos == cxled->pos) ? "success" : "fail",
  1756. test_pos, cxled->pos);
  1757. }
  1758. return 0;
  1759. }
  1760. static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
  1761. {
  1762. struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
  1763. struct cxl_region *cxlr = cxled->cxld.region;
  1764. struct cxl_region_params *p;
  1765. int rc = 0;
  1766. lockdep_assert_held_write(&cxl_region_rwsem);
  1767. if (!cxlr)
  1768. return 0;
  1769. p = &cxlr->params;
  1770. get_device(&cxlr->dev);
  1771. if (p->state > CXL_CONFIG_ACTIVE) {
  1772. cxl_region_decode_reset(cxlr, p->interleave_ways);
  1773. p->state = CXL_CONFIG_ACTIVE;
  1774. }
  1775. for (iter = ep_port; !is_cxl_root(iter);
  1776. iter = to_cxl_port(iter->dev.parent))
  1777. cxl_port_detach_region(iter, cxlr, cxled);
  1778. if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
  1779. p->targets[cxled->pos] != cxled) {
  1780. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  1781. dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
  1782. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  1783. cxled->pos);
  1784. goto out;
  1785. }
  1786. if (p->state == CXL_CONFIG_ACTIVE) {
  1787. p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
  1788. cxl_region_teardown_targets(cxlr);
  1789. }
  1790. p->targets[cxled->pos] = NULL;
  1791. p->nr_targets--;
  1792. cxled->cxld.hpa_range = (struct range) {
  1793. .start = 0,
  1794. .end = -1,
  1795. };
  1796. /* notify the region driver that one of its targets has departed */
  1797. up_write(&cxl_region_rwsem);
  1798. device_release_driver(&cxlr->dev);
  1799. down_write(&cxl_region_rwsem);
  1800. out:
  1801. put_device(&cxlr->dev);
  1802. return rc;
  1803. }
  1804. void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
  1805. {
  1806. down_write(&cxl_region_rwsem);
  1807. cxled->mode = CXL_DECODER_DEAD;
  1808. cxl_region_detach(cxled);
  1809. up_write(&cxl_region_rwsem);
  1810. }
  1811. static int attach_target(struct cxl_region *cxlr,
  1812. struct cxl_endpoint_decoder *cxled, int pos,
  1813. unsigned int state)
  1814. {
  1815. int rc = 0;
  1816. if (state == TASK_INTERRUPTIBLE)
  1817. rc = down_write_killable(&cxl_region_rwsem);
  1818. else
  1819. down_write(&cxl_region_rwsem);
  1820. if (rc)
  1821. return rc;
  1822. down_read(&cxl_dpa_rwsem);
  1823. rc = cxl_region_attach(cxlr, cxled, pos);
  1824. up_read(&cxl_dpa_rwsem);
  1825. up_write(&cxl_region_rwsem);
  1826. return rc;
  1827. }
  1828. static int detach_target(struct cxl_region *cxlr, int pos)
  1829. {
  1830. struct cxl_region_params *p = &cxlr->params;
  1831. int rc;
  1832. rc = down_write_killable(&cxl_region_rwsem);
  1833. if (rc)
  1834. return rc;
  1835. if (pos >= p->interleave_ways) {
  1836. dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
  1837. p->interleave_ways);
  1838. rc = -ENXIO;
  1839. goto out;
  1840. }
  1841. if (!p->targets[pos]) {
  1842. rc = 0;
  1843. goto out;
  1844. }
  1845. rc = cxl_region_detach(p->targets[pos]);
  1846. out:
  1847. up_write(&cxl_region_rwsem);
  1848. return rc;
  1849. }
  1850. static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
  1851. size_t len)
  1852. {
  1853. int rc;
  1854. if (sysfs_streq(buf, "\n"))
  1855. rc = detach_target(cxlr, pos);
  1856. else {
  1857. struct device *dev;
  1858. dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
  1859. if (!dev)
  1860. return -ENODEV;
  1861. if (!is_endpoint_decoder(dev)) {
  1862. rc = -EINVAL;
  1863. goto out;
  1864. }
  1865. rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
  1866. TASK_INTERRUPTIBLE);
  1867. out:
  1868. put_device(dev);
  1869. }
  1870. if (rc < 0)
  1871. return rc;
  1872. return len;
  1873. }
  1874. #define TARGET_ATTR_RW(n) \
  1875. static ssize_t target##n##_show( \
  1876. struct device *dev, struct device_attribute *attr, char *buf) \
  1877. { \
  1878. return show_targetN(to_cxl_region(dev), buf, (n)); \
  1879. } \
  1880. static ssize_t target##n##_store(struct device *dev, \
  1881. struct device_attribute *attr, \
  1882. const char *buf, size_t len) \
  1883. { \
  1884. return store_targetN(to_cxl_region(dev), buf, (n), len); \
  1885. } \
  1886. static DEVICE_ATTR_RW(target##n)
  1887. TARGET_ATTR_RW(0);
  1888. TARGET_ATTR_RW(1);
  1889. TARGET_ATTR_RW(2);
  1890. TARGET_ATTR_RW(3);
  1891. TARGET_ATTR_RW(4);
  1892. TARGET_ATTR_RW(5);
  1893. TARGET_ATTR_RW(6);
  1894. TARGET_ATTR_RW(7);
  1895. TARGET_ATTR_RW(8);
  1896. TARGET_ATTR_RW(9);
  1897. TARGET_ATTR_RW(10);
  1898. TARGET_ATTR_RW(11);
  1899. TARGET_ATTR_RW(12);
  1900. TARGET_ATTR_RW(13);
  1901. TARGET_ATTR_RW(14);
  1902. TARGET_ATTR_RW(15);
  1903. static struct attribute *target_attrs[] = {
  1904. &dev_attr_target0.attr,
  1905. &dev_attr_target1.attr,
  1906. &dev_attr_target2.attr,
  1907. &dev_attr_target3.attr,
  1908. &dev_attr_target4.attr,
  1909. &dev_attr_target5.attr,
  1910. &dev_attr_target6.attr,
  1911. &dev_attr_target7.attr,
  1912. &dev_attr_target8.attr,
  1913. &dev_attr_target9.attr,
  1914. &dev_attr_target10.attr,
  1915. &dev_attr_target11.attr,
  1916. &dev_attr_target12.attr,
  1917. &dev_attr_target13.attr,
  1918. &dev_attr_target14.attr,
  1919. &dev_attr_target15.attr,
  1920. NULL,
  1921. };
  1922. static umode_t cxl_region_target_visible(struct kobject *kobj,
  1923. struct attribute *a, int n)
  1924. {
  1925. struct device *dev = kobj_to_dev(kobj);
  1926. struct cxl_region *cxlr = to_cxl_region(dev);
  1927. struct cxl_region_params *p = &cxlr->params;
  1928. if (n < p->interleave_ways)
  1929. return a->mode;
  1930. return 0;
  1931. }
  1932. static const struct attribute_group cxl_region_target_group = {
  1933. .attrs = target_attrs,
  1934. .is_visible = cxl_region_target_visible,
  1935. };
  1936. static const struct attribute_group *get_cxl_region_target_group(void)
  1937. {
  1938. return &cxl_region_target_group;
  1939. }
  1940. static const struct attribute_group *region_groups[] = {
  1941. &cxl_base_attribute_group,
  1942. &cxl_region_group,
  1943. &cxl_region_target_group,
  1944. &cxl_region_access0_coordinate_group,
  1945. &cxl_region_access1_coordinate_group,
  1946. NULL,
  1947. };
  1948. static void cxl_region_release(struct device *dev)
  1949. {
  1950. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
  1951. struct cxl_region *cxlr = to_cxl_region(dev);
  1952. int id = atomic_read(&cxlrd->region_id);
  1953. /*
  1954. * Try to reuse the recently idled id rather than the cached
  1955. * next id to prevent the region id space from increasing
  1956. * unnecessarily.
  1957. */
  1958. if (cxlr->id < id)
  1959. if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
  1960. memregion_free(id);
  1961. goto out;
  1962. }
  1963. memregion_free(cxlr->id);
  1964. out:
  1965. put_device(dev->parent);
  1966. kfree(cxlr);
  1967. }
  1968. const struct device_type cxl_region_type = {
  1969. .name = "cxl_region",
  1970. .release = cxl_region_release,
  1971. .groups = region_groups
  1972. };
  1973. bool is_cxl_region(struct device *dev)
  1974. {
  1975. return dev->type == &cxl_region_type;
  1976. }
  1977. EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
  1978. static struct cxl_region *to_cxl_region(struct device *dev)
  1979. {
  1980. if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
  1981. "not a cxl_region device\n"))
  1982. return NULL;
  1983. return container_of(dev, struct cxl_region, dev);
  1984. }
  1985. static void unregister_region(void *_cxlr)
  1986. {
  1987. struct cxl_region *cxlr = _cxlr;
  1988. struct cxl_region_params *p = &cxlr->params;
  1989. int i;
  1990. device_del(&cxlr->dev);
  1991. /*
  1992. * Now that region sysfs is shutdown, the parameter block is now
  1993. * read-only, so no need to hold the region rwsem to access the
  1994. * region parameters.
  1995. */
  1996. for (i = 0; i < p->interleave_ways; i++)
  1997. detach_target(cxlr, i);
  1998. cxl_region_iomem_release(cxlr);
  1999. put_device(&cxlr->dev);
  2000. }
  2001. static struct lock_class_key cxl_region_key;
  2002. static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
  2003. {
  2004. struct cxl_region *cxlr;
  2005. struct device *dev;
  2006. cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
  2007. if (!cxlr) {
  2008. memregion_free(id);
  2009. return ERR_PTR(-ENOMEM);
  2010. }
  2011. dev = &cxlr->dev;
  2012. device_initialize(dev);
  2013. lockdep_set_class(&dev->mutex, &cxl_region_key);
  2014. dev->parent = &cxlrd->cxlsd.cxld.dev;
  2015. /*
  2016. * Keep root decoder pinned through cxl_region_release to fixup
  2017. * region id allocations
  2018. */
  2019. get_device(dev->parent);
  2020. device_set_pm_not_required(dev);
  2021. dev->bus = &cxl_bus_type;
  2022. dev->type = &cxl_region_type;
  2023. cxlr->id = id;
  2024. return cxlr;
  2025. }
  2026. static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
  2027. {
  2028. int cset = 0;
  2029. int rc;
  2030. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
  2031. if (cxlr->coord[i].read_bandwidth) {
  2032. rc = 0;
  2033. if (cxl_need_node_perf_attrs_update(nid))
  2034. node_set_perf_attrs(nid, &cxlr->coord[i], i);
  2035. else
  2036. rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
  2037. if (rc == 0)
  2038. cset++;
  2039. }
  2040. }
  2041. if (!cset)
  2042. return false;
  2043. rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
  2044. if (rc)
  2045. dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
  2046. rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
  2047. if (rc)
  2048. dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
  2049. return true;
  2050. }
  2051. static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
  2052. unsigned long action, void *arg)
  2053. {
  2054. struct cxl_region *cxlr = container_of(nb, struct cxl_region,
  2055. memory_notifier);
  2056. struct memory_notify *mnb = arg;
  2057. int nid = mnb->status_change_nid;
  2058. int region_nid;
  2059. if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
  2060. return NOTIFY_DONE;
  2061. /*
  2062. * No need to hold cxl_region_rwsem; region parameters are stable
  2063. * within the cxl_region driver.
  2064. */
  2065. region_nid = phys_to_target_node(cxlr->params.res->start);
  2066. if (nid != region_nid)
  2067. return NOTIFY_DONE;
  2068. if (!cxl_region_update_coordinates(cxlr, nid))
  2069. return NOTIFY_DONE;
  2070. return NOTIFY_OK;
  2071. }
  2072. static int cxl_region_calculate_adistance(struct notifier_block *nb,
  2073. unsigned long nid, void *data)
  2074. {
  2075. struct cxl_region *cxlr = container_of(nb, struct cxl_region,
  2076. adist_notifier);
  2077. struct access_coordinate *perf;
  2078. int *adist = data;
  2079. int region_nid;
  2080. /*
  2081. * No need to hold cxl_region_rwsem; region parameters are stable
  2082. * within the cxl_region driver.
  2083. */
  2084. region_nid = phys_to_target_node(cxlr->params.res->start);
  2085. if (nid != region_nid)
  2086. return NOTIFY_OK;
  2087. perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
  2088. if (mt_perf_to_adistance(perf, adist))
  2089. return NOTIFY_OK;
  2090. return NOTIFY_STOP;
  2091. }
  2092. /**
  2093. * devm_cxl_add_region - Adds a region to a decoder
  2094. * @cxlrd: root decoder
  2095. * @id: memregion id to create, or memregion_free() on failure
  2096. * @mode: mode for the endpoint decoders of this region
  2097. * @type: select whether this is an expander or accelerator (type-2 or type-3)
  2098. *
  2099. * This is the second step of region initialization. Regions exist within an
  2100. * address space which is mapped by a @cxlrd.
  2101. *
  2102. * Return: 0 if the region was added to the @cxlrd, else returns negative error
  2103. * code. The region will be named "regionZ" where Z is the unique region number.
  2104. */
  2105. static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
  2106. int id,
  2107. enum cxl_decoder_mode mode,
  2108. enum cxl_decoder_type type)
  2109. {
  2110. struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
  2111. struct cxl_region *cxlr;
  2112. struct device *dev;
  2113. int rc;
  2114. cxlr = cxl_region_alloc(cxlrd, id);
  2115. if (IS_ERR(cxlr))
  2116. return cxlr;
  2117. cxlr->mode = mode;
  2118. cxlr->type = type;
  2119. dev = &cxlr->dev;
  2120. rc = dev_set_name(dev, "region%d", id);
  2121. if (rc)
  2122. goto err;
  2123. rc = device_add(dev);
  2124. if (rc)
  2125. goto err;
  2126. rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
  2127. if (rc)
  2128. return ERR_PTR(rc);
  2129. dev_dbg(port->uport_dev, "%s: created %s\n",
  2130. dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
  2131. return cxlr;
  2132. err:
  2133. put_device(dev);
  2134. return ERR_PTR(rc);
  2135. }
  2136. static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
  2137. {
  2138. return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
  2139. }
  2140. static ssize_t create_pmem_region_show(struct device *dev,
  2141. struct device_attribute *attr, char *buf)
  2142. {
  2143. return __create_region_show(to_cxl_root_decoder(dev), buf);
  2144. }
  2145. static ssize_t create_ram_region_show(struct device *dev,
  2146. struct device_attribute *attr, char *buf)
  2147. {
  2148. return __create_region_show(to_cxl_root_decoder(dev), buf);
  2149. }
  2150. static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
  2151. enum cxl_decoder_mode mode, int id)
  2152. {
  2153. int rc;
  2154. switch (mode) {
  2155. case CXL_DECODER_RAM:
  2156. case CXL_DECODER_PMEM:
  2157. break;
  2158. default:
  2159. dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
  2160. return ERR_PTR(-EINVAL);
  2161. }
  2162. rc = memregion_alloc(GFP_KERNEL);
  2163. if (rc < 0)
  2164. return ERR_PTR(rc);
  2165. if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
  2166. memregion_free(rc);
  2167. return ERR_PTR(-EBUSY);
  2168. }
  2169. return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
  2170. }
  2171. static ssize_t create_pmem_region_store(struct device *dev,
  2172. struct device_attribute *attr,
  2173. const char *buf, size_t len)
  2174. {
  2175. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
  2176. struct cxl_region *cxlr;
  2177. int rc, id;
  2178. rc = sscanf(buf, "region%d\n", &id);
  2179. if (rc != 1)
  2180. return -EINVAL;
  2181. cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
  2182. if (IS_ERR(cxlr))
  2183. return PTR_ERR(cxlr);
  2184. return len;
  2185. }
  2186. DEVICE_ATTR_RW(create_pmem_region);
  2187. static ssize_t create_ram_region_store(struct device *dev,
  2188. struct device_attribute *attr,
  2189. const char *buf, size_t len)
  2190. {
  2191. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
  2192. struct cxl_region *cxlr;
  2193. int rc, id;
  2194. rc = sscanf(buf, "region%d\n", &id);
  2195. if (rc != 1)
  2196. return -EINVAL;
  2197. cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
  2198. if (IS_ERR(cxlr))
  2199. return PTR_ERR(cxlr);
  2200. return len;
  2201. }
  2202. DEVICE_ATTR_RW(create_ram_region);
  2203. static ssize_t region_show(struct device *dev, struct device_attribute *attr,
  2204. char *buf)
  2205. {
  2206. struct cxl_decoder *cxld = to_cxl_decoder(dev);
  2207. ssize_t rc;
  2208. rc = down_read_interruptible(&cxl_region_rwsem);
  2209. if (rc)
  2210. return rc;
  2211. if (cxld->region)
  2212. rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
  2213. else
  2214. rc = sysfs_emit(buf, "\n");
  2215. up_read(&cxl_region_rwsem);
  2216. return rc;
  2217. }
  2218. DEVICE_ATTR_RO(region);
  2219. static struct cxl_region *
  2220. cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
  2221. {
  2222. struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
  2223. struct device *region_dev;
  2224. region_dev = device_find_child_by_name(&cxld->dev, name);
  2225. if (!region_dev)
  2226. return ERR_PTR(-ENODEV);
  2227. return to_cxl_region(region_dev);
  2228. }
  2229. static ssize_t delete_region_store(struct device *dev,
  2230. struct device_attribute *attr,
  2231. const char *buf, size_t len)
  2232. {
  2233. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
  2234. struct cxl_port *port = to_cxl_port(dev->parent);
  2235. struct cxl_region *cxlr;
  2236. cxlr = cxl_find_region_by_name(cxlrd, buf);
  2237. if (IS_ERR(cxlr))
  2238. return PTR_ERR(cxlr);
  2239. devm_release_action(port->uport_dev, unregister_region, cxlr);
  2240. put_device(&cxlr->dev);
  2241. return len;
  2242. }
  2243. DEVICE_ATTR_WO(delete_region);
  2244. static void cxl_pmem_region_release(struct device *dev)
  2245. {
  2246. struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
  2247. int i;
  2248. for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
  2249. struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
  2250. put_device(&cxlmd->dev);
  2251. }
  2252. kfree(cxlr_pmem);
  2253. }
  2254. static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
  2255. &cxl_base_attribute_group,
  2256. NULL,
  2257. };
  2258. const struct device_type cxl_pmem_region_type = {
  2259. .name = "cxl_pmem_region",
  2260. .release = cxl_pmem_region_release,
  2261. .groups = cxl_pmem_region_attribute_groups,
  2262. };
  2263. bool is_cxl_pmem_region(struct device *dev)
  2264. {
  2265. return dev->type == &cxl_pmem_region_type;
  2266. }
  2267. EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
  2268. struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
  2269. {
  2270. if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
  2271. "not a cxl_pmem_region device\n"))
  2272. return NULL;
  2273. return container_of(dev, struct cxl_pmem_region, dev);
  2274. }
  2275. EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
  2276. struct cxl_poison_context {
  2277. struct cxl_port *port;
  2278. enum cxl_decoder_mode mode;
  2279. u64 offset;
  2280. };
  2281. static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
  2282. struct cxl_poison_context *ctx)
  2283. {
  2284. struct cxl_dev_state *cxlds = cxlmd->cxlds;
  2285. u64 offset, length;
  2286. int rc = 0;
  2287. /*
  2288. * Collect poison for the remaining unmapped resources
  2289. * after poison is collected by committed endpoints.
  2290. *
  2291. * Knowing that PMEM must always follow RAM, get poison
  2292. * for unmapped resources based on the last decoder's mode:
  2293. * ram: scan remains of ram range, then any pmem range
  2294. * pmem: scan remains of pmem range
  2295. */
  2296. if (ctx->mode == CXL_DECODER_RAM) {
  2297. offset = ctx->offset;
  2298. length = resource_size(&cxlds->ram_res) - offset;
  2299. rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
  2300. if (rc == -EFAULT)
  2301. rc = 0;
  2302. if (rc)
  2303. return rc;
  2304. }
  2305. if (ctx->mode == CXL_DECODER_PMEM) {
  2306. offset = ctx->offset;
  2307. length = resource_size(&cxlds->dpa_res) - offset;
  2308. if (!length)
  2309. return 0;
  2310. } else if (resource_size(&cxlds->pmem_res)) {
  2311. offset = cxlds->pmem_res.start;
  2312. length = resource_size(&cxlds->pmem_res);
  2313. } else {
  2314. return 0;
  2315. }
  2316. return cxl_mem_get_poison(cxlmd, offset, length, NULL);
  2317. }
  2318. static int poison_by_decoder(struct device *dev, void *arg)
  2319. {
  2320. struct cxl_poison_context *ctx = arg;
  2321. struct cxl_endpoint_decoder *cxled;
  2322. struct cxl_memdev *cxlmd;
  2323. u64 offset, length;
  2324. int rc = 0;
  2325. if (!is_endpoint_decoder(dev))
  2326. return rc;
  2327. cxled = to_cxl_endpoint_decoder(dev);
  2328. if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
  2329. return rc;
  2330. /*
  2331. * Regions are only created with single mode decoders: pmem or ram.
  2332. * Linux does not support mixed mode decoders. This means that
  2333. * reading poison per endpoint decoder adheres to the requirement
  2334. * that poison reads of pmem and ram must be separated.
  2335. * CXL 3.0 Spec 8.2.9.8.4.1
  2336. */
  2337. if (cxled->mode == CXL_DECODER_MIXED) {
  2338. dev_dbg(dev, "poison list read unsupported in mixed mode\n");
  2339. return rc;
  2340. }
  2341. cxlmd = cxled_to_memdev(cxled);
  2342. if (cxled->skip) {
  2343. offset = cxled->dpa_res->start - cxled->skip;
  2344. length = cxled->skip;
  2345. rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
  2346. if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
  2347. rc = 0;
  2348. if (rc)
  2349. return rc;
  2350. }
  2351. offset = cxled->dpa_res->start;
  2352. length = cxled->dpa_res->end - offset + 1;
  2353. rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
  2354. if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
  2355. rc = 0;
  2356. if (rc)
  2357. return rc;
  2358. /* Iterate until commit_end is reached */
  2359. if (cxled->cxld.id == ctx->port->commit_end) {
  2360. ctx->offset = cxled->dpa_res->end + 1;
  2361. ctx->mode = cxled->mode;
  2362. return 1;
  2363. }
  2364. return 0;
  2365. }
  2366. int cxl_get_poison_by_endpoint(struct cxl_port *port)
  2367. {
  2368. struct cxl_poison_context ctx;
  2369. int rc = 0;
  2370. ctx = (struct cxl_poison_context) {
  2371. .port = port
  2372. };
  2373. rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
  2374. if (rc == 1)
  2375. rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
  2376. &ctx);
  2377. return rc;
  2378. }
  2379. struct cxl_dpa_to_region_context {
  2380. struct cxl_region *cxlr;
  2381. u64 dpa;
  2382. };
  2383. static int __cxl_dpa_to_region(struct device *dev, void *arg)
  2384. {
  2385. struct cxl_dpa_to_region_context *ctx = arg;
  2386. struct cxl_endpoint_decoder *cxled;
  2387. struct cxl_region *cxlr;
  2388. u64 dpa = ctx->dpa;
  2389. if (!is_endpoint_decoder(dev))
  2390. return 0;
  2391. cxled = to_cxl_endpoint_decoder(dev);
  2392. if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
  2393. return 0;
  2394. if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
  2395. return 0;
  2396. /*
  2397. * Stop the region search (return 1) when an endpoint mapping is
  2398. * found. The region may not be fully constructed so offering
  2399. * the cxlr in the context structure is not guaranteed.
  2400. */
  2401. cxlr = cxled->cxld.region;
  2402. if (cxlr)
  2403. dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
  2404. dev_name(&cxlr->dev));
  2405. else
  2406. dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
  2407. dev_name(dev));
  2408. ctx->cxlr = cxlr;
  2409. return 1;
  2410. }
  2411. struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
  2412. {
  2413. struct cxl_dpa_to_region_context ctx;
  2414. struct cxl_port *port;
  2415. ctx = (struct cxl_dpa_to_region_context) {
  2416. .dpa = dpa,
  2417. };
  2418. port = cxlmd->endpoint;
  2419. if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
  2420. device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
  2421. return ctx.cxlr;
  2422. }
  2423. static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
  2424. {
  2425. struct cxl_region_params *p = &cxlr->params;
  2426. int gran = p->interleave_granularity;
  2427. int ways = p->interleave_ways;
  2428. u64 offset;
  2429. /* Is the hpa in an expected chunk for its pos(-ition) */
  2430. offset = hpa - p->res->start;
  2431. offset = do_div(offset, gran * ways);
  2432. if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
  2433. return true;
  2434. dev_dbg(&cxlr->dev,
  2435. "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
  2436. return false;
  2437. }
  2438. u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
  2439. u64 dpa)
  2440. {
  2441. struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
  2442. u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
  2443. struct cxl_region_params *p = &cxlr->params;
  2444. struct cxl_endpoint_decoder *cxled = NULL;
  2445. u16 eig = 0;
  2446. u8 eiw = 0;
  2447. int pos;
  2448. for (int i = 0; i < p->nr_targets; i++) {
  2449. cxled = p->targets[i];
  2450. if (cxlmd == cxled_to_memdev(cxled))
  2451. break;
  2452. }
  2453. if (!cxled || cxlmd != cxled_to_memdev(cxled))
  2454. return ULLONG_MAX;
  2455. pos = cxled->pos;
  2456. ways_to_eiw(p->interleave_ways, &eiw);
  2457. granularity_to_eig(p->interleave_granularity, &eig);
  2458. /*
  2459. * The device position in the region interleave set was removed
  2460. * from the offset at HPA->DPA translation. To reconstruct the
  2461. * HPA, place the 'pos' in the offset.
  2462. *
  2463. * The placement of 'pos' in the HPA is determined by interleave
  2464. * ways and granularity and is defined in the CXL Spec 3.0 Section
  2465. * 8.2.4.19.13 Implementation Note: Device Decode Logic
  2466. */
  2467. /* Remove the dpa base */
  2468. dpa_offset = dpa - cxl_dpa_resource_start(cxled);
  2469. mask_upper = GENMASK_ULL(51, eig + 8);
  2470. if (eiw < 8) {
  2471. hpa_offset = (dpa_offset & mask_upper) << eiw;
  2472. hpa_offset |= pos << (eig + 8);
  2473. } else {
  2474. bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
  2475. bits_upper = bits_upper * 3;
  2476. hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
  2477. }
  2478. /* The lower bits remain unchanged */
  2479. hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
  2480. /* Apply the hpa_offset to the region base address */
  2481. hpa = hpa_offset + p->res->start;
  2482. /* Root decoder translation overrides typical modulo decode */
  2483. if (cxlrd->hpa_to_spa)
  2484. hpa = cxlrd->hpa_to_spa(cxlrd, hpa);
  2485. if (hpa < p->res->start || hpa > p->res->end) {
  2486. dev_dbg(&cxlr->dev,
  2487. "Addr trans fail: hpa 0x%llx not in region\n", hpa);
  2488. return ULLONG_MAX;
  2489. }
  2490. /* Simple chunk check, by pos & gran, only applies to modulo decodes */
  2491. if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
  2492. return ULLONG_MAX;
  2493. return hpa;
  2494. }
  2495. static struct lock_class_key cxl_pmem_region_key;
  2496. static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
  2497. {
  2498. struct cxl_region_params *p = &cxlr->params;
  2499. struct cxl_nvdimm_bridge *cxl_nvb;
  2500. struct device *dev;
  2501. int i;
  2502. guard(rwsem_read)(&cxl_region_rwsem);
  2503. if (p->state != CXL_CONFIG_COMMIT)
  2504. return -ENXIO;
  2505. struct cxl_pmem_region *cxlr_pmem __free(kfree) =
  2506. kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), GFP_KERNEL);
  2507. if (!cxlr_pmem)
  2508. return -ENOMEM;
  2509. cxlr_pmem->hpa_range.start = p->res->start;
  2510. cxlr_pmem->hpa_range.end = p->res->end;
  2511. /* Snapshot the region configuration underneath the cxl_region_rwsem */
  2512. cxlr_pmem->nr_mappings = p->nr_targets;
  2513. for (i = 0; i < p->nr_targets; i++) {
  2514. struct cxl_endpoint_decoder *cxled = p->targets[i];
  2515. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  2516. struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
  2517. /*
  2518. * Regions never span CXL root devices, so by definition the
  2519. * bridge for one device is the same for all.
  2520. */
  2521. if (i == 0) {
  2522. cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
  2523. if (!cxl_nvb)
  2524. return -ENODEV;
  2525. cxlr->cxl_nvb = cxl_nvb;
  2526. }
  2527. m->cxlmd = cxlmd;
  2528. get_device(&cxlmd->dev);
  2529. m->start = cxled->dpa_res->start;
  2530. m->size = resource_size(cxled->dpa_res);
  2531. m->position = i;
  2532. }
  2533. dev = &cxlr_pmem->dev;
  2534. device_initialize(dev);
  2535. lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
  2536. device_set_pm_not_required(dev);
  2537. dev->parent = &cxlr->dev;
  2538. dev->bus = &cxl_bus_type;
  2539. dev->type = &cxl_pmem_region_type;
  2540. cxlr_pmem->cxlr = cxlr;
  2541. cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem);
  2542. return 0;
  2543. }
  2544. static void cxl_dax_region_release(struct device *dev)
  2545. {
  2546. struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
  2547. kfree(cxlr_dax);
  2548. }
  2549. static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
  2550. &cxl_base_attribute_group,
  2551. NULL,
  2552. };
  2553. const struct device_type cxl_dax_region_type = {
  2554. .name = "cxl_dax_region",
  2555. .release = cxl_dax_region_release,
  2556. .groups = cxl_dax_region_attribute_groups,
  2557. };
  2558. static bool is_cxl_dax_region(struct device *dev)
  2559. {
  2560. return dev->type == &cxl_dax_region_type;
  2561. }
  2562. struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
  2563. {
  2564. if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
  2565. "not a cxl_dax_region device\n"))
  2566. return NULL;
  2567. return container_of(dev, struct cxl_dax_region, dev);
  2568. }
  2569. EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
  2570. static struct lock_class_key cxl_dax_region_key;
  2571. static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
  2572. {
  2573. struct cxl_region_params *p = &cxlr->params;
  2574. struct cxl_dax_region *cxlr_dax;
  2575. struct device *dev;
  2576. down_read(&cxl_region_rwsem);
  2577. if (p->state != CXL_CONFIG_COMMIT) {
  2578. cxlr_dax = ERR_PTR(-ENXIO);
  2579. goto out;
  2580. }
  2581. cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
  2582. if (!cxlr_dax) {
  2583. cxlr_dax = ERR_PTR(-ENOMEM);
  2584. goto out;
  2585. }
  2586. cxlr_dax->hpa_range.start = p->res->start;
  2587. cxlr_dax->hpa_range.end = p->res->end;
  2588. dev = &cxlr_dax->dev;
  2589. cxlr_dax->cxlr = cxlr;
  2590. device_initialize(dev);
  2591. lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
  2592. device_set_pm_not_required(dev);
  2593. dev->parent = &cxlr->dev;
  2594. dev->bus = &cxl_bus_type;
  2595. dev->type = &cxl_dax_region_type;
  2596. out:
  2597. up_read(&cxl_region_rwsem);
  2598. return cxlr_dax;
  2599. }
  2600. static void cxlr_pmem_unregister(void *_cxlr_pmem)
  2601. {
  2602. struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
  2603. struct cxl_region *cxlr = cxlr_pmem->cxlr;
  2604. struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
  2605. /*
  2606. * Either the bridge is in ->remove() context under the device_lock(),
  2607. * or cxlr_release_nvdimm() is cancelling the bridge's release action
  2608. * for @cxlr_pmem and doing it itself (while manually holding the bridge
  2609. * lock).
  2610. */
  2611. device_lock_assert(&cxl_nvb->dev);
  2612. cxlr->cxlr_pmem = NULL;
  2613. cxlr_pmem->cxlr = NULL;
  2614. device_unregister(&cxlr_pmem->dev);
  2615. }
  2616. static void cxlr_release_nvdimm(void *_cxlr)
  2617. {
  2618. struct cxl_region *cxlr = _cxlr;
  2619. struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
  2620. scoped_guard(device, &cxl_nvb->dev) {
  2621. if (cxlr->cxlr_pmem)
  2622. devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
  2623. cxlr->cxlr_pmem);
  2624. }
  2625. cxlr->cxl_nvb = NULL;
  2626. put_device(&cxl_nvb->dev);
  2627. }
  2628. /**
  2629. * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
  2630. * @cxlr: parent CXL region for this pmem region bridge device
  2631. *
  2632. * Return: 0 on success negative error code on failure.
  2633. */
  2634. static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
  2635. {
  2636. struct cxl_pmem_region *cxlr_pmem;
  2637. struct cxl_nvdimm_bridge *cxl_nvb;
  2638. struct device *dev;
  2639. int rc;
  2640. rc = cxl_pmem_region_alloc(cxlr);
  2641. if (rc)
  2642. return rc;
  2643. cxlr_pmem = cxlr->cxlr_pmem;
  2644. cxl_nvb = cxlr->cxl_nvb;
  2645. dev = &cxlr_pmem->dev;
  2646. rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
  2647. if (rc)
  2648. goto err;
  2649. rc = device_add(dev);
  2650. if (rc)
  2651. goto err;
  2652. dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
  2653. dev_name(dev));
  2654. scoped_guard(device, &cxl_nvb->dev) {
  2655. if (cxl_nvb->dev.driver)
  2656. rc = devm_add_action_or_reset(&cxl_nvb->dev,
  2657. cxlr_pmem_unregister,
  2658. cxlr_pmem);
  2659. else
  2660. rc = -ENXIO;
  2661. }
  2662. if (rc)
  2663. goto err_bridge;
  2664. /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
  2665. return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
  2666. err:
  2667. put_device(dev);
  2668. err_bridge:
  2669. put_device(&cxl_nvb->dev);
  2670. cxlr->cxl_nvb = NULL;
  2671. return rc;
  2672. }
  2673. static void cxlr_dax_unregister(void *_cxlr_dax)
  2674. {
  2675. struct cxl_dax_region *cxlr_dax = _cxlr_dax;
  2676. device_unregister(&cxlr_dax->dev);
  2677. }
  2678. static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
  2679. {
  2680. struct cxl_dax_region *cxlr_dax;
  2681. struct device *dev;
  2682. int rc;
  2683. cxlr_dax = cxl_dax_region_alloc(cxlr);
  2684. if (IS_ERR(cxlr_dax))
  2685. return PTR_ERR(cxlr_dax);
  2686. dev = &cxlr_dax->dev;
  2687. rc = dev_set_name(dev, "dax_region%d", cxlr->id);
  2688. if (rc)
  2689. goto err;
  2690. rc = device_add(dev);
  2691. if (rc)
  2692. goto err;
  2693. dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
  2694. dev_name(dev));
  2695. return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
  2696. cxlr_dax);
  2697. err:
  2698. put_device(dev);
  2699. return rc;
  2700. }
  2701. static int match_root_decoder_by_range(struct device *dev, void *data)
  2702. {
  2703. struct range *r1, *r2 = data;
  2704. struct cxl_root_decoder *cxlrd;
  2705. if (!is_root_decoder(dev))
  2706. return 0;
  2707. cxlrd = to_cxl_root_decoder(dev);
  2708. r1 = &cxlrd->cxlsd.cxld.hpa_range;
  2709. return range_contains(r1, r2);
  2710. }
  2711. static int match_region_by_range(struct device *dev, void *data)
  2712. {
  2713. struct cxl_region_params *p;
  2714. struct cxl_region *cxlr;
  2715. struct range *r = data;
  2716. int rc = 0;
  2717. if (!is_cxl_region(dev))
  2718. return 0;
  2719. cxlr = to_cxl_region(dev);
  2720. p = &cxlr->params;
  2721. down_read(&cxl_region_rwsem);
  2722. if (p->res && p->res->start == r->start && p->res->end == r->end)
  2723. rc = 1;
  2724. up_read(&cxl_region_rwsem);
  2725. return rc;
  2726. }
  2727. /* Establish an empty region covering the given HPA range */
  2728. static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
  2729. struct cxl_endpoint_decoder *cxled)
  2730. {
  2731. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  2732. struct cxl_port *port = cxlrd_to_port(cxlrd);
  2733. struct range *hpa = &cxled->cxld.hpa_range;
  2734. struct cxl_region_params *p;
  2735. struct cxl_region *cxlr;
  2736. struct resource *res;
  2737. int rc;
  2738. do {
  2739. cxlr = __create_region(cxlrd, cxled->mode,
  2740. atomic_read(&cxlrd->region_id));
  2741. } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
  2742. if (IS_ERR(cxlr)) {
  2743. dev_err(cxlmd->dev.parent,
  2744. "%s:%s: %s failed assign region: %ld\n",
  2745. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  2746. __func__, PTR_ERR(cxlr));
  2747. return cxlr;
  2748. }
  2749. down_write(&cxl_region_rwsem);
  2750. p = &cxlr->params;
  2751. if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
  2752. dev_err(cxlmd->dev.parent,
  2753. "%s:%s: %s autodiscovery interrupted\n",
  2754. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  2755. __func__);
  2756. rc = -EBUSY;
  2757. goto err;
  2758. }
  2759. set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
  2760. res = kmalloc(sizeof(*res), GFP_KERNEL);
  2761. if (!res) {
  2762. rc = -ENOMEM;
  2763. goto err;
  2764. }
  2765. *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
  2766. dev_name(&cxlr->dev));
  2767. rc = insert_resource(cxlrd->res, res);
  2768. if (rc) {
  2769. /*
  2770. * Platform-firmware may not have split resources like "System
  2771. * RAM" on CXL window boundaries see cxl_region_iomem_release()
  2772. */
  2773. dev_warn(cxlmd->dev.parent,
  2774. "%s:%s: %s %s cannot insert resource\n",
  2775. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
  2776. __func__, dev_name(&cxlr->dev));
  2777. }
  2778. p->res = res;
  2779. p->interleave_ways = cxled->cxld.interleave_ways;
  2780. p->interleave_granularity = cxled->cxld.interleave_granularity;
  2781. p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
  2782. rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
  2783. if (rc)
  2784. goto err;
  2785. dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
  2786. dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
  2787. dev_name(&cxlr->dev), p->res, p->interleave_ways,
  2788. p->interleave_granularity);
  2789. /* ...to match put_device() in cxl_add_to_region() */
  2790. get_device(&cxlr->dev);
  2791. up_write(&cxl_region_rwsem);
  2792. return cxlr;
  2793. err:
  2794. up_write(&cxl_region_rwsem);
  2795. devm_release_action(port->uport_dev, unregister_region, cxlr);
  2796. return ERR_PTR(rc);
  2797. }
  2798. int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
  2799. {
  2800. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  2801. struct range *hpa = &cxled->cxld.hpa_range;
  2802. struct cxl_decoder *cxld = &cxled->cxld;
  2803. struct device *cxlrd_dev, *region_dev;
  2804. struct cxl_root_decoder *cxlrd;
  2805. struct cxl_region_params *p;
  2806. struct cxl_region *cxlr;
  2807. bool attach = false;
  2808. int rc;
  2809. cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
  2810. match_root_decoder_by_range);
  2811. if (!cxlrd_dev) {
  2812. dev_err(cxlmd->dev.parent,
  2813. "%s:%s no CXL window for range %#llx:%#llx\n",
  2814. dev_name(&cxlmd->dev), dev_name(&cxld->dev),
  2815. cxld->hpa_range.start, cxld->hpa_range.end);
  2816. return -ENXIO;
  2817. }
  2818. cxlrd = to_cxl_root_decoder(cxlrd_dev);
  2819. /*
  2820. * Ensure that if multiple threads race to construct_region() for @hpa
  2821. * one does the construction and the others add to that.
  2822. */
  2823. mutex_lock(&cxlrd->range_lock);
  2824. region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
  2825. match_region_by_range);
  2826. if (!region_dev) {
  2827. cxlr = construct_region(cxlrd, cxled);
  2828. region_dev = &cxlr->dev;
  2829. } else
  2830. cxlr = to_cxl_region(region_dev);
  2831. mutex_unlock(&cxlrd->range_lock);
  2832. rc = PTR_ERR_OR_ZERO(cxlr);
  2833. if (rc)
  2834. goto out;
  2835. attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
  2836. down_read(&cxl_region_rwsem);
  2837. p = &cxlr->params;
  2838. attach = p->state == CXL_CONFIG_COMMIT;
  2839. up_read(&cxl_region_rwsem);
  2840. if (attach) {
  2841. /*
  2842. * If device_attach() fails the range may still be active via
  2843. * the platform-firmware memory map, otherwise the driver for
  2844. * regions is local to this file, so driver matching can't fail.
  2845. */
  2846. if (device_attach(&cxlr->dev) < 0)
  2847. dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
  2848. p->res);
  2849. }
  2850. put_device(region_dev);
  2851. out:
  2852. put_device(cxlrd_dev);
  2853. return rc;
  2854. }
  2855. EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
  2856. static int is_system_ram(struct resource *res, void *arg)
  2857. {
  2858. struct cxl_region *cxlr = arg;
  2859. struct cxl_region_params *p = &cxlr->params;
  2860. dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
  2861. return 1;
  2862. }
  2863. static void shutdown_notifiers(void *_cxlr)
  2864. {
  2865. struct cxl_region *cxlr = _cxlr;
  2866. unregister_memory_notifier(&cxlr->memory_notifier);
  2867. unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
  2868. }
  2869. static int cxl_region_probe(struct device *dev)
  2870. {
  2871. struct cxl_region *cxlr = to_cxl_region(dev);
  2872. struct cxl_region_params *p = &cxlr->params;
  2873. int rc;
  2874. rc = down_read_interruptible(&cxl_region_rwsem);
  2875. if (rc) {
  2876. dev_dbg(&cxlr->dev, "probe interrupted\n");
  2877. return rc;
  2878. }
  2879. if (p->state < CXL_CONFIG_COMMIT) {
  2880. dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
  2881. rc = -ENXIO;
  2882. goto out;
  2883. }
  2884. if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
  2885. dev_err(&cxlr->dev,
  2886. "failed to activate, re-commit region and retry\n");
  2887. rc = -ENXIO;
  2888. goto out;
  2889. }
  2890. /*
  2891. * From this point on any path that changes the region's state away from
  2892. * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
  2893. */
  2894. out:
  2895. up_read(&cxl_region_rwsem);
  2896. if (rc)
  2897. return rc;
  2898. cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
  2899. cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
  2900. register_memory_notifier(&cxlr->memory_notifier);
  2901. cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
  2902. cxlr->adist_notifier.priority = 100;
  2903. register_mt_adistance_algorithm(&cxlr->adist_notifier);
  2904. rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
  2905. if (rc)
  2906. return rc;
  2907. switch (cxlr->mode) {
  2908. case CXL_DECODER_PMEM:
  2909. return devm_cxl_add_pmem_region(cxlr);
  2910. case CXL_DECODER_RAM:
  2911. /*
  2912. * The region can not be manged by CXL if any portion of
  2913. * it is already online as 'System RAM'
  2914. */
  2915. if (walk_iomem_res_desc(IORES_DESC_NONE,
  2916. IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
  2917. p->res->start, p->res->end, cxlr,
  2918. is_system_ram) > 0)
  2919. return 0;
  2920. return devm_cxl_add_dax_region(cxlr);
  2921. default:
  2922. dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
  2923. cxlr->mode);
  2924. return -ENXIO;
  2925. }
  2926. }
  2927. static struct cxl_driver cxl_region_driver = {
  2928. .name = "cxl_region",
  2929. .probe = cxl_region_probe,
  2930. .id = CXL_DEVICE_REGION,
  2931. };
  2932. int cxl_region_init(void)
  2933. {
  2934. return cxl_driver_register(&cxl_region_driver);
  2935. }
  2936. void cxl_region_exit(void)
  2937. {
  2938. cxl_driver_unregister(&cxl_region_driver);
  2939. }
  2940. MODULE_IMPORT_NS(CXL);
  2941. MODULE_IMPORT_NS(DEVMEM);
  2942. MODULE_ALIAS_CXL(CXL_DEVICE_REGION);