cdat.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright(c) 2023 Intel Corporation. All rights reserved. */
  3. #include <linux/acpi.h>
  4. #include <linux/xarray.h>
  5. #include <linux/fw_table.h>
  6. #include <linux/node.h>
  7. #include <linux/overflow.h>
  8. #include "cxlpci.h"
  9. #include "cxlmem.h"
  10. #include "core.h"
  11. #include "cxl.h"
  12. struct dsmas_entry {
  13. struct range dpa_range;
  14. u8 handle;
  15. struct access_coordinate coord[ACCESS_COORDINATE_MAX];
  16. struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX];
  17. int entries;
  18. int qos_class;
  19. };
  20. static u32 cdat_normalize(u16 entry, u64 base, u8 type)
  21. {
  22. u32 value;
  23. /*
  24. * Check for invalid and overflow values
  25. */
  26. if (entry == 0xffff || !entry)
  27. return 0;
  28. else if (base > (UINT_MAX / (entry)))
  29. return 0;
  30. /*
  31. * CDAT fields follow the format of HMAT fields. See table 5 Device
  32. * Scoped Latency and Bandwidth Information Structure in Coherent Device
  33. * Attribute Table (CDAT) Specification v1.01.
  34. */
  35. value = entry * base;
  36. switch (type) {
  37. case ACPI_HMAT_ACCESS_LATENCY:
  38. case ACPI_HMAT_READ_LATENCY:
  39. case ACPI_HMAT_WRITE_LATENCY:
  40. value = DIV_ROUND_UP(value, 1000);
  41. break;
  42. default:
  43. break;
  44. }
  45. return value;
  46. }
  47. static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
  48. const unsigned long end)
  49. {
  50. struct acpi_cdat_header *hdr = &header->cdat;
  51. struct acpi_cdat_dsmas *dsmas;
  52. int size = sizeof(*hdr) + sizeof(*dsmas);
  53. struct xarray *dsmas_xa = arg;
  54. struct dsmas_entry *dent;
  55. u16 len;
  56. int rc;
  57. len = le16_to_cpu((__force __le16)hdr->length);
  58. if (len != size || (unsigned long)hdr + len > end) {
  59. pr_warn("Malformed DSMAS table length: (%u:%u)\n", size, len);
  60. return -EINVAL;
  61. }
  62. /* Skip common header */
  63. dsmas = (struct acpi_cdat_dsmas *)(hdr + 1);
  64. dent = kzalloc(sizeof(*dent), GFP_KERNEL);
  65. if (!dent)
  66. return -ENOMEM;
  67. dent->handle = dsmas->dsmad_handle;
  68. dent->dpa_range.start = le64_to_cpu((__force __le64)dsmas->dpa_base_address);
  69. dent->dpa_range.end = le64_to_cpu((__force __le64)dsmas->dpa_base_address) +
  70. le64_to_cpu((__force __le64)dsmas->dpa_length) - 1;
  71. rc = xa_insert(dsmas_xa, dent->handle, dent, GFP_KERNEL);
  72. if (rc) {
  73. kfree(dent);
  74. return rc;
  75. }
  76. return 0;
  77. }
  78. static void __cxl_access_coordinate_set(struct access_coordinate *coord,
  79. int access, unsigned int val)
  80. {
  81. switch (access) {
  82. case ACPI_HMAT_ACCESS_LATENCY:
  83. coord->read_latency = val;
  84. coord->write_latency = val;
  85. break;
  86. case ACPI_HMAT_READ_LATENCY:
  87. coord->read_latency = val;
  88. break;
  89. case ACPI_HMAT_WRITE_LATENCY:
  90. coord->write_latency = val;
  91. break;
  92. case ACPI_HMAT_ACCESS_BANDWIDTH:
  93. coord->read_bandwidth = val;
  94. coord->write_bandwidth = val;
  95. break;
  96. case ACPI_HMAT_READ_BANDWIDTH:
  97. coord->read_bandwidth = val;
  98. break;
  99. case ACPI_HMAT_WRITE_BANDWIDTH:
  100. coord->write_bandwidth = val;
  101. break;
  102. }
  103. }
  104. static void cxl_access_coordinate_set(struct access_coordinate *coord,
  105. int access, unsigned int val)
  106. {
  107. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
  108. __cxl_access_coordinate_set(&coord[i], access, val);
  109. }
  110. static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
  111. const unsigned long end)
  112. {
  113. struct acpi_cdat_header *hdr = &header->cdat;
  114. struct acpi_cdat_dslbis *dslbis;
  115. int size = sizeof(*hdr) + sizeof(*dslbis);
  116. struct xarray *dsmas_xa = arg;
  117. struct dsmas_entry *dent;
  118. __le64 le_base;
  119. __le16 le_val;
  120. u64 val;
  121. u16 len;
  122. len = le16_to_cpu((__force __le16)hdr->length);
  123. if (len != size || (unsigned long)hdr + len > end) {
  124. pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size, len);
  125. return -EINVAL;
  126. }
  127. /* Skip common header */
  128. dslbis = (struct acpi_cdat_dslbis *)(hdr + 1);
  129. /* Skip unrecognized data type */
  130. if (dslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
  131. return 0;
  132. /* Not a memory type, skip */
  133. if ((dslbis->flags & ACPI_HMAT_MEMORY_HIERARCHY) != ACPI_HMAT_MEMORY)
  134. return 0;
  135. dent = xa_load(dsmas_xa, dslbis->handle);
  136. if (!dent) {
  137. pr_warn("No matching DSMAS entry for DSLBIS entry.\n");
  138. return 0;
  139. }
  140. le_base = (__force __le64)dslbis->entry_base_unit;
  141. le_val = (__force __le16)dslbis->entry[0];
  142. val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
  143. dslbis->data_type);
  144. cxl_access_coordinate_set(dent->cdat_coord, dslbis->data_type, val);
  145. return 0;
  146. }
  147. static int cdat_table_parse_output(int rc)
  148. {
  149. if (rc < 0)
  150. return rc;
  151. if (rc == 0)
  152. return -ENOENT;
  153. return 0;
  154. }
  155. static int cxl_cdat_endpoint_process(struct cxl_port *port,
  156. struct xarray *dsmas_xa)
  157. {
  158. int rc;
  159. rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler,
  160. dsmas_xa, port->cdat.table, port->cdat.length);
  161. rc = cdat_table_parse_output(rc);
  162. if (rc)
  163. return rc;
  164. rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler,
  165. dsmas_xa, port->cdat.table, port->cdat.length);
  166. return cdat_table_parse_output(rc);
  167. }
  168. static int cxl_port_perf_data_calculate(struct cxl_port *port,
  169. struct xarray *dsmas_xa)
  170. {
  171. struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
  172. struct dsmas_entry *dent;
  173. int valid_entries = 0;
  174. unsigned long index;
  175. int rc;
  176. rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
  177. if (rc) {
  178. dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
  179. return rc;
  180. }
  181. struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
  182. if (!cxl_root)
  183. return -ENODEV;
  184. if (!cxl_root->ops || !cxl_root->ops->qos_class)
  185. return -EOPNOTSUPP;
  186. xa_for_each(dsmas_xa, index, dent) {
  187. int qos_class;
  188. cxl_coordinates_combine(dent->coord, dent->cdat_coord, ep_c);
  189. dent->entries = 1;
  190. rc = cxl_root->ops->qos_class(cxl_root,
  191. &dent->coord[ACCESS_COORDINATE_CPU],
  192. 1, &qos_class);
  193. if (rc != 1)
  194. continue;
  195. valid_entries++;
  196. dent->qos_class = qos_class;
  197. }
  198. if (!valid_entries)
  199. return -ENOENT;
  200. return 0;
  201. }
  202. static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
  203. struct cxl_dpa_perf *dpa_perf)
  204. {
  205. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
  206. dpa_perf->coord[i] = dent->coord[i];
  207. dpa_perf->cdat_coord[i] = dent->cdat_coord[i];
  208. }
  209. dpa_perf->dpa_range = dent->dpa_range;
  210. dpa_perf->qos_class = dent->qos_class;
  211. dev_dbg(dev,
  212. "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
  213. dent->dpa_range.start, dpa_perf->qos_class,
  214. dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
  215. dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
  216. dent->coord[ACCESS_COORDINATE_CPU].read_latency,
  217. dent->coord[ACCESS_COORDINATE_CPU].write_latency);
  218. }
  219. static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
  220. struct xarray *dsmas_xa)
  221. {
  222. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
  223. struct device *dev = cxlds->dev;
  224. struct range pmem_range = {
  225. .start = cxlds->pmem_res.start,
  226. .end = cxlds->pmem_res.end,
  227. };
  228. struct range ram_range = {
  229. .start = cxlds->ram_res.start,
  230. .end = cxlds->ram_res.end,
  231. };
  232. struct dsmas_entry *dent;
  233. unsigned long index;
  234. xa_for_each(dsmas_xa, index, dent) {
  235. if (resource_size(&cxlds->ram_res) &&
  236. range_contains(&ram_range, &dent->dpa_range))
  237. update_perf_entry(dev, dent, &mds->ram_perf);
  238. else if (resource_size(&cxlds->pmem_res) &&
  239. range_contains(&pmem_range, &dent->dpa_range))
  240. update_perf_entry(dev, dent, &mds->pmem_perf);
  241. else
  242. dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
  243. dent->dpa_range.start);
  244. }
  245. }
  246. static int match_cxlrd_qos_class(struct device *dev, void *data)
  247. {
  248. int dev_qos_class = *(int *)data;
  249. struct cxl_root_decoder *cxlrd;
  250. if (!is_root_decoder(dev))
  251. return 0;
  252. cxlrd = to_cxl_root_decoder(dev);
  253. if (cxlrd->qos_class == CXL_QOS_CLASS_INVALID)
  254. return 0;
  255. if (cxlrd->qos_class == dev_qos_class)
  256. return 1;
  257. return 0;
  258. }
  259. static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
  260. {
  261. *dpa_perf = (struct cxl_dpa_perf) {
  262. .qos_class = CXL_QOS_CLASS_INVALID,
  263. };
  264. }
  265. static bool cxl_qos_match(struct cxl_port *root_port,
  266. struct cxl_dpa_perf *dpa_perf)
  267. {
  268. if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
  269. return false;
  270. if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class,
  271. match_cxlrd_qos_class))
  272. return false;
  273. return true;
  274. }
  275. static int match_cxlrd_hb(struct device *dev, void *data)
  276. {
  277. struct device *host_bridge = data;
  278. struct cxl_switch_decoder *cxlsd;
  279. struct cxl_root_decoder *cxlrd;
  280. if (!is_root_decoder(dev))
  281. return 0;
  282. cxlrd = to_cxl_root_decoder(dev);
  283. cxlsd = &cxlrd->cxlsd;
  284. guard(rwsem_read)(&cxl_region_rwsem);
  285. for (int i = 0; i < cxlsd->nr_targets; i++) {
  286. if (host_bridge == cxlsd->target[i]->dport_dev)
  287. return 1;
  288. }
  289. return 0;
  290. }
  291. static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
  292. {
  293. struct cxl_dev_state *cxlds = cxlmd->cxlds;
  294. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
  295. struct cxl_port *root_port;
  296. int rc;
  297. struct cxl_root *cxl_root __free(put_cxl_root) =
  298. find_cxl_root(cxlmd->endpoint);
  299. if (!cxl_root)
  300. return -ENODEV;
  301. root_port = &cxl_root->port;
  302. /* Check that the QTG IDs are all sane between end device and root decoders */
  303. if (!cxl_qos_match(root_port, &mds->ram_perf))
  304. reset_dpa_perf(&mds->ram_perf);
  305. if (!cxl_qos_match(root_port, &mds->pmem_perf))
  306. reset_dpa_perf(&mds->pmem_perf);
  307. /* Check to make sure that the device's host bridge is under a root decoder */
  308. rc = device_for_each_child(&root_port->dev,
  309. cxlmd->endpoint->host_bridge, match_cxlrd_hb);
  310. if (!rc) {
  311. reset_dpa_perf(&mds->ram_perf);
  312. reset_dpa_perf(&mds->pmem_perf);
  313. }
  314. return rc;
  315. }
  316. static void discard_dsmas(struct xarray *xa)
  317. {
  318. unsigned long index;
  319. void *ent;
  320. xa_for_each(xa, index, ent) {
  321. xa_erase(xa, index);
  322. kfree(ent);
  323. }
  324. xa_destroy(xa);
  325. }
  326. DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T))
  327. void cxl_endpoint_parse_cdat(struct cxl_port *port)
  328. {
  329. struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
  330. struct cxl_dev_state *cxlds = cxlmd->cxlds;
  331. struct xarray __dsmas_xa;
  332. struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa;
  333. int rc;
  334. xa_init(&__dsmas_xa);
  335. if (!port->cdat.table)
  336. return;
  337. rc = cxl_cdat_endpoint_process(port, dsmas_xa);
  338. if (rc < 0) {
  339. dev_dbg(&port->dev, "Failed to parse CDAT: %d\n", rc);
  340. return;
  341. }
  342. rc = cxl_port_perf_data_calculate(port, dsmas_xa);
  343. if (rc) {
  344. dev_dbg(&port->dev, "Failed to do perf coord calculations.\n");
  345. return;
  346. }
  347. cxl_memdev_set_qos_class(cxlds, dsmas_xa);
  348. cxl_qos_class_verify(cxlmd);
  349. cxl_memdev_update_perf(cxlmd);
  350. }
  351. EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
  352. static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
  353. const unsigned long end)
  354. {
  355. struct acpi_cdat_sslbis_table {
  356. struct acpi_cdat_header header;
  357. struct acpi_cdat_sslbis sslbis_header;
  358. struct acpi_cdat_sslbe entries[];
  359. } *tbl = (struct acpi_cdat_sslbis_table *)header;
  360. int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header);
  361. struct acpi_cdat_sslbis *sslbis;
  362. struct cxl_port *port = arg;
  363. struct device *dev = &port->dev;
  364. int remain, entries, i;
  365. u16 len;
  366. len = le16_to_cpu((__force __le16)header->cdat.length);
  367. remain = len - size;
  368. if (!remain || remain % sizeof(tbl->entries[0]) ||
  369. (unsigned long)header + len > end) {
  370. dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len);
  371. return -EINVAL;
  372. }
  373. sslbis = &tbl->sslbis_header;
  374. /* Unrecognized data type, we can skip */
  375. if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
  376. return 0;
  377. entries = remain / sizeof(tbl->entries[0]);
  378. if (struct_size(tbl, entries, entries) != len)
  379. return -EINVAL;
  380. for (i = 0; i < entries; i++) {
  381. u16 x = le16_to_cpu((__force __le16)tbl->entries[i].portx_id);
  382. u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id);
  383. __le64 le_base;
  384. __le16 le_val;
  385. struct cxl_dport *dport;
  386. unsigned long index;
  387. u16 dsp_id;
  388. u64 val;
  389. switch (x) {
  390. case ACPI_CDAT_SSLBIS_US_PORT:
  391. dsp_id = y;
  392. break;
  393. case ACPI_CDAT_SSLBIS_ANY_PORT:
  394. switch (y) {
  395. case ACPI_CDAT_SSLBIS_US_PORT:
  396. dsp_id = x;
  397. break;
  398. case ACPI_CDAT_SSLBIS_ANY_PORT:
  399. dsp_id = ACPI_CDAT_SSLBIS_ANY_PORT;
  400. break;
  401. default:
  402. dsp_id = y;
  403. break;
  404. }
  405. break;
  406. default:
  407. dsp_id = x;
  408. break;
  409. }
  410. le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
  411. le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
  412. val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
  413. sslbis->data_type);
  414. xa_for_each(&port->dports, index, dport) {
  415. if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
  416. dsp_id == dport->port_id) {
  417. cxl_access_coordinate_set(dport->coord,
  418. sslbis->data_type,
  419. val);
  420. }
  421. }
  422. }
  423. return 0;
  424. }
  425. void cxl_switch_parse_cdat(struct cxl_port *port)
  426. {
  427. int rc;
  428. if (!port->cdat.table)
  429. return;
  430. rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler,
  431. port, port->cdat.table, port->cdat.length);
  432. rc = cdat_table_parse_output(rc);
  433. if (rc)
  434. dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc);
  435. }
  436. EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
  437. static void __cxl_coordinates_combine(struct access_coordinate *out,
  438. struct access_coordinate *c1,
  439. struct access_coordinate *c2)
  440. {
  441. if (c1->write_bandwidth && c2->write_bandwidth)
  442. out->write_bandwidth = min(c1->write_bandwidth,
  443. c2->write_bandwidth);
  444. out->write_latency = c1->write_latency + c2->write_latency;
  445. if (c1->read_bandwidth && c2->read_bandwidth)
  446. out->read_bandwidth = min(c1->read_bandwidth,
  447. c2->read_bandwidth);
  448. out->read_latency = c1->read_latency + c2->read_latency;
  449. }
  450. /**
  451. * cxl_coordinates_combine - Combine the two input coordinates
  452. *
  453. * @out: Output coordinate of c1 and c2 combined
  454. * @c1: input coordinates
  455. * @c2: input coordinates
  456. */
  457. void cxl_coordinates_combine(struct access_coordinate *out,
  458. struct access_coordinate *c1,
  459. struct access_coordinate *c2)
  460. {
  461. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
  462. __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
  463. }
  464. MODULE_IMPORT_NS(CXL);
  465. static void cxl_bandwidth_add(struct access_coordinate *coord,
  466. struct access_coordinate *c1,
  467. struct access_coordinate *c2)
  468. {
  469. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
  470. coord[i].read_bandwidth = c1[i].read_bandwidth +
  471. c2[i].read_bandwidth;
  472. coord[i].write_bandwidth = c1[i].write_bandwidth +
  473. c2[i].write_bandwidth;
  474. }
  475. }
  476. static bool dpa_perf_contains(struct cxl_dpa_perf *perf,
  477. struct resource *dpa_res)
  478. {
  479. struct range dpa = {
  480. .start = dpa_res->start,
  481. .end = dpa_res->end,
  482. };
  483. return range_contains(&perf->dpa_range, &dpa);
  484. }
  485. static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled,
  486. enum cxl_decoder_mode mode)
  487. {
  488. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  489. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
  490. struct cxl_dpa_perf *perf;
  491. switch (mode) {
  492. case CXL_DECODER_RAM:
  493. perf = &mds->ram_perf;
  494. break;
  495. case CXL_DECODER_PMEM:
  496. perf = &mds->pmem_perf;
  497. break;
  498. default:
  499. return ERR_PTR(-EINVAL);
  500. }
  501. if (!dpa_perf_contains(perf, cxled->dpa_res))
  502. return ERR_PTR(-EINVAL);
  503. return perf;
  504. }
  505. /*
  506. * Transient context for containing the current calculation of bandwidth when
  507. * doing walking the port hierarchy to deal with shared upstream link.
  508. */
  509. struct cxl_perf_ctx {
  510. struct access_coordinate coord[ACCESS_COORDINATE_MAX];
  511. struct cxl_port *port;
  512. };
  513. /**
  514. * cxl_endpoint_gather_bandwidth - collect all the endpoint bandwidth in an xarray
  515. * @cxlr: CXL region for the bandwidth calculation
  516. * @cxled: endpoint decoder to start on
  517. * @usp_xa: (output) the xarray that collects all the bandwidth coordinates
  518. * indexed by the upstream device with data of 'struct cxl_perf_ctx'.
  519. * @gp_is_root: (output) bool of whether the grandparent is cxl root.
  520. *
  521. * Return: 0 for success or -errno
  522. *
  523. * Collects aggregated endpoint bandwidth and store the bandwidth in
  524. * an xarray indexed by the upstream device of the switch or the RP
  525. * device. Each endpoint consists the minimum of the bandwidth from DSLBIS
  526. * from the endpoint CDAT, the endpoint upstream link bandwidth, and the
  527. * bandwidth from the SSLBIS of the switch CDAT for the switch upstream port to
  528. * the downstream port that's associated with the endpoint. If the
  529. * device is directly connected to a RP, then no SSLBIS is involved.
  530. */
  531. static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr,
  532. struct cxl_endpoint_decoder *cxled,
  533. struct xarray *usp_xa,
  534. bool *gp_is_root)
  535. {
  536. struct cxl_port *endpoint = to_cxl_port(cxled->cxld.dev.parent);
  537. struct cxl_port *parent_port = to_cxl_port(endpoint->dev.parent);
  538. struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent);
  539. struct access_coordinate pci_coord[ACCESS_COORDINATE_MAX];
  540. struct access_coordinate sw_coord[ACCESS_COORDINATE_MAX];
  541. struct access_coordinate ep_coord[ACCESS_COORDINATE_MAX];
  542. struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
  543. struct cxl_dev_state *cxlds = cxlmd->cxlds;
  544. struct pci_dev *pdev = to_pci_dev(cxlds->dev);
  545. struct cxl_perf_ctx *perf_ctx;
  546. struct cxl_dpa_perf *perf;
  547. unsigned long index;
  548. void *ptr;
  549. int rc;
  550. if (!dev_is_pci(cxlds->dev))
  551. return -ENODEV;
  552. if (cxlds->rcd)
  553. return -ENODEV;
  554. perf = cxled_get_dpa_perf(cxled, cxlr->mode);
  555. if (IS_ERR(perf))
  556. return PTR_ERR(perf);
  557. gp_port = to_cxl_port(parent_port->dev.parent);
  558. *gp_is_root = is_cxl_root(gp_port);
  559. /*
  560. * If the grandparent is cxl root, then index is the root port,
  561. * otherwise it's the parent switch upstream device.
  562. */
  563. if (*gp_is_root)
  564. index = (unsigned long)endpoint->parent_dport->dport_dev;
  565. else
  566. index = (unsigned long)parent_port->uport_dev;
  567. perf_ctx = xa_load(usp_xa, index);
  568. if (!perf_ctx) {
  569. struct cxl_perf_ctx *c __free(kfree) =
  570. kzalloc(sizeof(*perf_ctx), GFP_KERNEL);
  571. if (!c)
  572. return -ENOMEM;
  573. ptr = xa_store(usp_xa, index, c, GFP_KERNEL);
  574. if (xa_is_err(ptr))
  575. return xa_err(ptr);
  576. perf_ctx = no_free_ptr(c);
  577. perf_ctx->port = parent_port;
  578. }
  579. /* Direct upstream link from EP bandwidth */
  580. rc = cxl_pci_get_bandwidth(pdev, pci_coord);
  581. if (rc < 0)
  582. return rc;
  583. /*
  584. * Min of upstream link bandwidth and Endpoint CDAT bandwidth from
  585. * DSLBIS.
  586. */
  587. cxl_coordinates_combine(ep_coord, pci_coord, perf->cdat_coord);
  588. /*
  589. * If grandparent port is root, then there's no switch involved and
  590. * the endpoint is connected to a root port.
  591. */
  592. if (!*gp_is_root) {
  593. /*
  594. * Retrieve the switch SSLBIS for switch downstream port
  595. * associated with the endpoint bandwidth.
  596. */
  597. rc = cxl_port_get_switch_dport_bandwidth(endpoint, sw_coord);
  598. if (rc)
  599. return rc;
  600. /*
  601. * Min of the earlier coordinates with the switch SSLBIS
  602. * bandwidth
  603. */
  604. cxl_coordinates_combine(ep_coord, ep_coord, sw_coord);
  605. }
  606. /*
  607. * Aggregate the computed bandwidth with the current aggregated bandwidth
  608. * of the endpoints with the same switch upstream device or RP.
  609. */
  610. cxl_bandwidth_add(perf_ctx->coord, perf_ctx->coord, ep_coord);
  611. return 0;
  612. }
  613. static void free_perf_xa(struct xarray *xa)
  614. {
  615. struct cxl_perf_ctx *ctx;
  616. unsigned long index;
  617. if (!xa)
  618. return;
  619. xa_for_each(xa, index, ctx)
  620. kfree(ctx);
  621. xa_destroy(xa);
  622. kfree(xa);
  623. }
  624. DEFINE_FREE(free_perf_xa, struct xarray *, if (_T) free_perf_xa(_T))
  625. /**
  626. * cxl_switch_gather_bandwidth - collect all the bandwidth at switch level in an xarray
  627. * @cxlr: The region being operated on
  628. * @input_xa: xarray indexed by upstream device of a switch with data of 'struct
  629. * cxl_perf_ctx'
  630. * @gp_is_root: (output) bool of whether the grandparent is cxl root.
  631. *
  632. * Return: a xarray of resulting cxl_perf_ctx per parent switch or root port
  633. * or ERR_PTR(-errno)
  634. *
  635. * Iterate through the xarray. Take the minimum of the downstream calculated
  636. * bandwidth, the upstream link bandwidth, and the SSLBIS of the upstream
  637. * switch if exists. Sum the resulting bandwidth under the switch upstream
  638. * device or a RP device. The function can be iterated over multiple switches
  639. * if the switches are present.
  640. */
  641. static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
  642. struct xarray *input_xa,
  643. bool *gp_is_root)
  644. {
  645. struct xarray *res_xa __free(free_perf_xa) =
  646. kzalloc(sizeof(*res_xa), GFP_KERNEL);
  647. struct access_coordinate coords[ACCESS_COORDINATE_MAX];
  648. struct cxl_perf_ctx *ctx, *us_ctx;
  649. unsigned long index, us_index;
  650. int dev_count = 0;
  651. int gp_count = 0;
  652. void *ptr;
  653. int rc;
  654. if (!res_xa)
  655. return ERR_PTR(-ENOMEM);
  656. xa_init(res_xa);
  657. xa_for_each(input_xa, index, ctx) {
  658. struct device *dev = (struct device *)index;
  659. struct cxl_port *port = ctx->port;
  660. struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
  661. struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent);
  662. struct cxl_dport *dport = port->parent_dport;
  663. bool is_root = false;
  664. dev_count++;
  665. if (is_cxl_root(gp_port)) {
  666. is_root = true;
  667. gp_count++;
  668. }
  669. /*
  670. * If the grandparent is cxl root, then index is the root port,
  671. * otherwise it's the parent switch upstream device.
  672. */
  673. if (is_root)
  674. us_index = (unsigned long)port->parent_dport->dport_dev;
  675. else
  676. us_index = (unsigned long)parent_port->uport_dev;
  677. us_ctx = xa_load(res_xa, us_index);
  678. if (!us_ctx) {
  679. struct cxl_perf_ctx *n __free(kfree) =
  680. kzalloc(sizeof(*n), GFP_KERNEL);
  681. if (!n)
  682. return ERR_PTR(-ENOMEM);
  683. ptr = xa_store(res_xa, us_index, n, GFP_KERNEL);
  684. if (xa_is_err(ptr))
  685. return ERR_PTR(xa_err(ptr));
  686. us_ctx = no_free_ptr(n);
  687. us_ctx->port = parent_port;
  688. }
  689. /*
  690. * If the device isn't an upstream PCIe port, there's something
  691. * wrong with the topology.
  692. */
  693. if (!dev_is_pci(dev))
  694. return ERR_PTR(-EINVAL);
  695. /* Retrieve the upstream link bandwidth */
  696. rc = cxl_pci_get_bandwidth(to_pci_dev(dev), coords);
  697. if (rc)
  698. return ERR_PTR(-ENXIO);
  699. /*
  700. * Take the min of downstream bandwidth and the upstream link
  701. * bandwidth.
  702. */
  703. cxl_coordinates_combine(coords, coords, ctx->coord);
  704. /*
  705. * Take the min of the calculated bandwdith and the upstream
  706. * switch SSLBIS bandwidth if there's a parent switch
  707. */
  708. if (!is_root)
  709. cxl_coordinates_combine(coords, coords, dport->coord);
  710. /*
  711. * Aggregate the calculated bandwidth common to an upstream
  712. * switch.
  713. */
  714. cxl_bandwidth_add(us_ctx->coord, us_ctx->coord, coords);
  715. }
  716. /* Asymmetric topology detected. */
  717. if (gp_count) {
  718. if (gp_count != dev_count) {
  719. dev_dbg(&cxlr->dev,
  720. "Asymmetric hierarchy detected, bandwidth not updated\n");
  721. return ERR_PTR(-EOPNOTSUPP);
  722. }
  723. *gp_is_root = true;
  724. }
  725. return no_free_ptr(res_xa);
  726. }
  727. /**
  728. * cxl_rp_gather_bandwidth - handle the root port level bandwidth collection
  729. * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
  730. * below each root port device.
  731. *
  732. * Return: xarray that holds cxl_perf_ctx per host bridge or ERR_PTR(-errno)
  733. */
  734. static struct xarray *cxl_rp_gather_bandwidth(struct xarray *xa)
  735. {
  736. struct xarray *hb_xa __free(free_perf_xa) =
  737. kzalloc(sizeof(*hb_xa), GFP_KERNEL);
  738. struct cxl_perf_ctx *ctx;
  739. unsigned long index;
  740. if (!hb_xa)
  741. return ERR_PTR(-ENOMEM);
  742. xa_init(hb_xa);
  743. xa_for_each(xa, index, ctx) {
  744. struct cxl_port *port = ctx->port;
  745. unsigned long hb_index = (unsigned long)port->uport_dev;
  746. struct cxl_perf_ctx *hb_ctx;
  747. void *ptr;
  748. hb_ctx = xa_load(hb_xa, hb_index);
  749. if (!hb_ctx) {
  750. struct cxl_perf_ctx *n __free(kfree) =
  751. kzalloc(sizeof(*n), GFP_KERNEL);
  752. if (!n)
  753. return ERR_PTR(-ENOMEM);
  754. ptr = xa_store(hb_xa, hb_index, n, GFP_KERNEL);
  755. if (xa_is_err(ptr))
  756. return ERR_PTR(xa_err(ptr));
  757. hb_ctx = no_free_ptr(n);
  758. hb_ctx->port = port;
  759. }
  760. cxl_bandwidth_add(hb_ctx->coord, hb_ctx->coord, ctx->coord);
  761. }
  762. return no_free_ptr(hb_xa);
  763. }
  764. /**
  765. * cxl_hb_gather_bandwidth - handle the host bridge level bandwidth collection
  766. * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
  767. * below each host bridge.
  768. *
  769. * Return: xarray that holds cxl_perf_ctx per ACPI0017 device or ERR_PTR(-errno)
  770. */
  771. static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
  772. {
  773. struct xarray *mw_xa __free(free_perf_xa) =
  774. kzalloc(sizeof(*mw_xa), GFP_KERNEL);
  775. struct cxl_perf_ctx *ctx;
  776. unsigned long index;
  777. if (!mw_xa)
  778. return ERR_PTR(-ENOMEM);
  779. xa_init(mw_xa);
  780. xa_for_each(xa, index, ctx) {
  781. struct cxl_port *port = ctx->port;
  782. struct cxl_port *parent_port;
  783. struct cxl_perf_ctx *mw_ctx;
  784. struct cxl_dport *dport;
  785. unsigned long mw_index;
  786. void *ptr;
  787. parent_port = to_cxl_port(port->dev.parent);
  788. mw_index = (unsigned long)parent_port->uport_dev;
  789. mw_ctx = xa_load(mw_xa, mw_index);
  790. if (!mw_ctx) {
  791. struct cxl_perf_ctx *n __free(kfree) =
  792. kzalloc(sizeof(*n), GFP_KERNEL);
  793. if (!n)
  794. return ERR_PTR(-ENOMEM);
  795. ptr = xa_store(mw_xa, mw_index, n, GFP_KERNEL);
  796. if (xa_is_err(ptr))
  797. return ERR_PTR(xa_err(ptr));
  798. mw_ctx = no_free_ptr(n);
  799. }
  800. dport = port->parent_dport;
  801. cxl_coordinates_combine(ctx->coord, ctx->coord, dport->coord);
  802. cxl_bandwidth_add(mw_ctx->coord, mw_ctx->coord, ctx->coord);
  803. }
  804. return no_free_ptr(mw_xa);
  805. }
  806. /**
  807. * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
  808. * @cxlr: The region being operated on
  809. * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
  810. */
  811. static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
  812. struct xarray *input_xa)
  813. {
  814. struct access_coordinate coord[ACCESS_COORDINATE_MAX];
  815. struct cxl_perf_ctx *ctx;
  816. unsigned long index;
  817. memset(coord, 0, sizeof(coord));
  818. xa_for_each(input_xa, index, ctx)
  819. cxl_bandwidth_add(coord, coord, ctx->coord);
  820. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
  821. cxlr->coord[i].read_bandwidth = coord[i].read_bandwidth;
  822. cxlr->coord[i].write_bandwidth = coord[i].write_bandwidth;
  823. }
  824. }
  825. /**
  826. * cxl_region_shared_upstream_bandwidth_update - Recalculate the bandwidth for
  827. * the region
  828. * @cxlr: the cxl region to recalculate
  829. *
  830. * The function walks the topology from bottom up and calculates the bandwidth. It
  831. * starts at the endpoints, processes at the switches if any, processes at the rootport
  832. * level, at the host bridge level, and finally aggregates at the region.
  833. */
  834. void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr)
  835. {
  836. struct xarray *working_xa;
  837. int root_count = 0;
  838. bool is_root;
  839. int rc;
  840. lockdep_assert_held(&cxl_dpa_rwsem);
  841. struct xarray *usp_xa __free(free_perf_xa) =
  842. kzalloc(sizeof(*usp_xa), GFP_KERNEL);
  843. if (!usp_xa)
  844. return;
  845. xa_init(usp_xa);
  846. /* Collect bandwidth data from all the endpoints. */
  847. for (int i = 0; i < cxlr->params.nr_targets; i++) {
  848. struct cxl_endpoint_decoder *cxled = cxlr->params.targets[i];
  849. is_root = false;
  850. rc = cxl_endpoint_gather_bandwidth(cxlr, cxled, usp_xa, &is_root);
  851. if (rc)
  852. return;
  853. root_count += is_root;
  854. }
  855. /* Detect asymmetric hierarchy with some direct attached endpoints. */
  856. if (root_count && root_count != cxlr->params.nr_targets) {
  857. dev_dbg(&cxlr->dev,
  858. "Asymmetric hierarchy detected, bandwidth not updated\n");
  859. return;
  860. }
  861. /*
  862. * Walk up one or more switches to deal with the bandwidth of the
  863. * switches if they exist. Endpoints directly attached to RPs skip
  864. * over this part.
  865. */
  866. if (!root_count) {
  867. do {
  868. working_xa = cxl_switch_gather_bandwidth(cxlr, usp_xa,
  869. &is_root);
  870. if (IS_ERR(working_xa))
  871. return;
  872. free_perf_xa(usp_xa);
  873. usp_xa = working_xa;
  874. } while (!is_root);
  875. }
  876. /* Handle the bandwidth at the root port of the hierarchy */
  877. working_xa = cxl_rp_gather_bandwidth(usp_xa);
  878. if (IS_ERR(working_xa))
  879. return;
  880. free_perf_xa(usp_xa);
  881. usp_xa = working_xa;
  882. /* Handle the bandwidth at the host bridge of the hierarchy */
  883. working_xa = cxl_hb_gather_bandwidth(usp_xa);
  884. if (IS_ERR(working_xa))
  885. return;
  886. free_perf_xa(usp_xa);
  887. usp_xa = working_xa;
  888. /*
  889. * Aggregate all the bandwidth collected per CFMWS (ACPI0017) and
  890. * update the region bandwidth with the final calculated values.
  891. */
  892. cxl_region_update_bandwidth(cxlr, usp_xa);
  893. }
  894. void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
  895. struct cxl_endpoint_decoder *cxled)
  896. {
  897. struct cxl_dpa_perf *perf;
  898. lockdep_assert_held(&cxl_dpa_rwsem);
  899. perf = cxled_get_dpa_perf(cxled, cxlr->mode);
  900. if (IS_ERR(perf))
  901. return;
  902. for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
  903. /* Get total bandwidth and the worst latency for the cxl region */
  904. cxlr->coord[i].read_latency = max_t(unsigned int,
  905. cxlr->coord[i].read_latency,
  906. perf->coord[i].read_latency);
  907. cxlr->coord[i].write_latency = max_t(unsigned int,
  908. cxlr->coord[i].write_latency,
  909. perf->coord[i].write_latency);
  910. cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
  911. cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
  912. }
  913. }
  914. int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
  915. enum access_coordinate_class access)
  916. {
  917. return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
  918. }
  919. bool cxl_need_node_perf_attrs_update(int nid)
  920. {
  921. return !acpi_node_backed_by_real_pxm(nid);
  922. }