route.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Management Component Transport Protocol (MCTP) - routing
  4. * implementation.
  5. *
  6. * This is currently based on a simple routing table, with no dst cache. The
  7. * number of routes should stay fairly small, so the lookup cost is small.
  8. *
  9. * Copyright (c) 2021 Code Construct
  10. * Copyright (c) 2021 Google
  11. */
  12. #include <linux/idr.h>
  13. #include <linux/kconfig.h>
  14. #include <linux/mctp.h>
  15. #include <linux/netdevice.h>
  16. #include <linux/rtnetlink.h>
  17. #include <linux/skbuff.h>
  18. #include <uapi/linux/if_arp.h>
  19. #include <net/mctp.h>
  20. #include <net/mctpdevice.h>
  21. #include <net/netlink.h>
  22. #include <net/sock.h>
  23. #include <trace/events/mctp.h>
  24. static const unsigned int mctp_message_maxlen = 64 * 1024;
  25. static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
  26. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
  27. /* route output callbacks */
  28. static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
  29. {
  30. kfree_skb(skb);
  31. return 0;
  32. }
  33. static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
  34. {
  35. struct mctp_skb_cb *cb = mctp_cb(skb);
  36. struct mctp_hdr *mh;
  37. struct sock *sk;
  38. u8 type;
  39. WARN_ON(!rcu_read_lock_held());
  40. /* TODO: look up in skb->cb? */
  41. mh = mctp_hdr(skb);
  42. if (!skb_headlen(skb))
  43. return NULL;
  44. type = (*(u8 *)skb->data) & 0x7f;
  45. sk_for_each_rcu(sk, &net->mctp.binds) {
  46. struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
  47. if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
  48. continue;
  49. if (msk->bind_type != type)
  50. continue;
  51. if (!mctp_address_matches(msk->bind_addr, mh->dest))
  52. continue;
  53. return msk;
  54. }
  55. return NULL;
  56. }
  57. /* A note on the key allocations.
  58. *
  59. * struct net->mctp.keys contains our set of currently-allocated keys for
  60. * MCTP tag management. The lookup tuple for these is the peer EID,
  61. * local EID and MCTP tag.
  62. *
  63. * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
  64. * broadcast message is sent, we may receive responses from any peer EID.
  65. * Because the broadcast dest address is equivalent to ANY, we create
  66. * a key with (local = local-eid, peer = ANY). This allows a match on the
  67. * incoming broadcast responses from any peer.
  68. *
  69. * We perform lookups when packets are received, and when tags are allocated
  70. * in two scenarios:
  71. *
  72. * - when a packet is sent, with a locally-owned tag: we need to find an
  73. * unused tag value for the (local, peer) EID pair.
  74. *
  75. * - when a tag is manually allocated: we need to find an unused tag value
  76. * for the peer EID, but don't have a specific local EID at that stage.
  77. *
  78. * in the latter case, on successful allocation, we end up with a tag with
  79. * (local = ANY, peer = peer-eid).
  80. *
  81. * So, the key set allows both a local EID of ANY, as well as a peer EID of
  82. * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
  83. * The matching (in mctp_key_match()) during lookup allows the match value to
  84. * be ANY in either the dest or source addresses.
  85. *
  86. * When allocating (+ inserting) a tag, we need to check for conflicts amongst
  87. * the existing tag set. This requires macthing either exactly on the local
  88. * and peer addresses, or either being ANY.
  89. */
  90. static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
  91. mctp_eid_t local, mctp_eid_t peer, u8 tag)
  92. {
  93. if (key->net != net)
  94. return false;
  95. if (!mctp_address_matches(key->local_addr, local))
  96. return false;
  97. if (!mctp_address_matches(key->peer_addr, peer))
  98. return false;
  99. if (key->tag != tag)
  100. return false;
  101. return true;
  102. }
  103. /* returns a key (with key->lock held, and refcounted), or NULL if no such
  104. * key exists.
  105. */
  106. static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
  107. unsigned int netid, mctp_eid_t peer,
  108. unsigned long *irqflags)
  109. __acquires(&key->lock)
  110. {
  111. struct mctp_sk_key *key, *ret;
  112. unsigned long flags;
  113. struct mctp_hdr *mh;
  114. u8 tag;
  115. mh = mctp_hdr(skb);
  116. tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  117. ret = NULL;
  118. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  119. hlist_for_each_entry(key, &net->mctp.keys, hlist) {
  120. if (!mctp_key_match(key, netid, mh->dest, peer, tag))
  121. continue;
  122. spin_lock(&key->lock);
  123. if (key->valid) {
  124. refcount_inc(&key->refs);
  125. ret = key;
  126. break;
  127. }
  128. spin_unlock(&key->lock);
  129. }
  130. if (ret) {
  131. spin_unlock(&net->mctp.keys_lock);
  132. *irqflags = flags;
  133. } else {
  134. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  135. }
  136. return ret;
  137. }
  138. static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
  139. unsigned int net,
  140. mctp_eid_t local, mctp_eid_t peer,
  141. u8 tag, gfp_t gfp)
  142. {
  143. struct mctp_sk_key *key;
  144. key = kzalloc(sizeof(*key), gfp);
  145. if (!key)
  146. return NULL;
  147. key->net = net;
  148. key->peer_addr = peer;
  149. key->local_addr = local;
  150. key->tag = tag;
  151. key->sk = &msk->sk;
  152. key->valid = true;
  153. spin_lock_init(&key->lock);
  154. refcount_set(&key->refs, 1);
  155. sock_hold(key->sk);
  156. return key;
  157. }
  158. void mctp_key_unref(struct mctp_sk_key *key)
  159. {
  160. unsigned long flags;
  161. if (!refcount_dec_and_test(&key->refs))
  162. return;
  163. /* even though no refs exist here, the lock allows us to stay
  164. * consistent with the locking requirement of mctp_dev_release_key
  165. */
  166. spin_lock_irqsave(&key->lock, flags);
  167. mctp_dev_release_key(key->dev, key);
  168. spin_unlock_irqrestore(&key->lock, flags);
  169. sock_put(key->sk);
  170. kfree(key);
  171. }
  172. static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
  173. {
  174. struct net *net = sock_net(&msk->sk);
  175. struct mctp_sk_key *tmp;
  176. unsigned long flags;
  177. int rc = 0;
  178. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  179. if (sock_flag(&msk->sk, SOCK_DEAD)) {
  180. rc = -EINVAL;
  181. goto out_unlock;
  182. }
  183. hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
  184. if (mctp_key_match(tmp, key->net, key->local_addr,
  185. key->peer_addr, key->tag)) {
  186. spin_lock(&tmp->lock);
  187. if (tmp->valid)
  188. rc = -EEXIST;
  189. spin_unlock(&tmp->lock);
  190. if (rc)
  191. break;
  192. }
  193. }
  194. if (!rc) {
  195. refcount_inc(&key->refs);
  196. key->expiry = jiffies + mctp_key_lifetime;
  197. timer_reduce(&msk->key_expiry, key->expiry);
  198. hlist_add_head(&key->hlist, &net->mctp.keys);
  199. hlist_add_head(&key->sklist, &msk->keys);
  200. }
  201. out_unlock:
  202. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  203. return rc;
  204. }
  205. /* Helper for mctp_route_input().
  206. * We're done with the key; unlock and unref the key.
  207. * For the usual case of automatic expiry we remove the key from lists.
  208. * In the case that manual allocation is set on a key we release the lock
  209. * and local ref, reset reassembly, but don't remove from lists.
  210. */
  211. static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
  212. unsigned long flags, unsigned long reason)
  213. __releases(&key->lock)
  214. {
  215. struct sk_buff *skb;
  216. trace_mctp_key_release(key, reason);
  217. skb = key->reasm_head;
  218. key->reasm_head = NULL;
  219. if (!key->manual_alloc) {
  220. key->reasm_dead = true;
  221. key->valid = false;
  222. mctp_dev_release_key(key->dev, key);
  223. }
  224. spin_unlock_irqrestore(&key->lock, flags);
  225. if (!key->manual_alloc) {
  226. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  227. if (!hlist_unhashed(&key->hlist)) {
  228. hlist_del_init(&key->hlist);
  229. hlist_del_init(&key->sklist);
  230. mctp_key_unref(key);
  231. }
  232. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  233. }
  234. /* and one for the local reference */
  235. mctp_key_unref(key);
  236. kfree_skb(skb);
  237. }
  238. #ifdef CONFIG_MCTP_FLOWS
  239. static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
  240. {
  241. struct mctp_flow *flow;
  242. flow = skb_ext_add(skb, SKB_EXT_MCTP);
  243. if (!flow)
  244. return;
  245. refcount_inc(&key->refs);
  246. flow->key = key;
  247. }
  248. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
  249. {
  250. struct mctp_sk_key *key;
  251. struct mctp_flow *flow;
  252. flow = skb_ext_find(skb, SKB_EXT_MCTP);
  253. if (!flow)
  254. return;
  255. key = flow->key;
  256. if (WARN_ON(key->dev && key->dev != dev))
  257. return;
  258. mctp_dev_set_key(dev, key);
  259. }
  260. #else
  261. static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
  262. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
  263. #endif
  264. static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
  265. {
  266. struct mctp_hdr *hdr = mctp_hdr(skb);
  267. u8 exp_seq, this_seq;
  268. this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
  269. & MCTP_HDR_SEQ_MASK;
  270. if (!key->reasm_head) {
  271. key->reasm_head = skb;
  272. key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
  273. key->last_seq = this_seq;
  274. return 0;
  275. }
  276. exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
  277. if (this_seq != exp_seq)
  278. return -EINVAL;
  279. if (key->reasm_head->len + skb->len > mctp_message_maxlen)
  280. return -EINVAL;
  281. skb->next = NULL;
  282. skb->sk = NULL;
  283. *key->reasm_tailp = skb;
  284. key->reasm_tailp = &skb->next;
  285. key->last_seq = this_seq;
  286. key->reasm_head->data_len += skb->len;
  287. key->reasm_head->len += skb->len;
  288. key->reasm_head->truesize += skb->truesize;
  289. return 0;
  290. }
  291. static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
  292. {
  293. struct mctp_sk_key *key, *any_key = NULL;
  294. struct net *net = dev_net(skb->dev);
  295. struct mctp_sock *msk;
  296. struct mctp_hdr *mh;
  297. unsigned int netid;
  298. unsigned long f;
  299. u8 tag, flags;
  300. int rc;
  301. msk = NULL;
  302. rc = -EINVAL;
  303. /* We may be receiving a locally-routed packet; drop source sk
  304. * accounting.
  305. *
  306. * From here, we will either queue the skb - either to a frag_queue, or
  307. * to a receiving socket. When that succeeds, we clear the skb pointer;
  308. * a non-NULL skb on exit will be otherwise unowned, and hence
  309. * kfree_skb()-ed.
  310. */
  311. skb_orphan(skb);
  312. /* ensure we have enough data for a header and a type */
  313. if (skb->len < sizeof(struct mctp_hdr) + 1)
  314. goto out;
  315. /* grab header, advance data ptr */
  316. mh = mctp_hdr(skb);
  317. netid = mctp_cb(skb)->net;
  318. skb_pull(skb, sizeof(struct mctp_hdr));
  319. if (mh->ver != 1)
  320. goto out;
  321. flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
  322. tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  323. rcu_read_lock();
  324. /* lookup socket / reasm context, exactly matching (src,dest,tag).
  325. * we hold a ref on the key, and key->lock held.
  326. */
  327. key = mctp_lookup_key(net, skb, netid, mh->src, &f);
  328. if (flags & MCTP_HDR_FLAG_SOM) {
  329. if (key) {
  330. msk = container_of(key->sk, struct mctp_sock, sk);
  331. } else {
  332. /* first response to a broadcast? do a more general
  333. * key lookup to find the socket, but don't use this
  334. * key for reassembly - we'll create a more specific
  335. * one for future packets if required (ie, !EOM).
  336. *
  337. * this lookup requires key->peer to be MCTP_ADDR_ANY,
  338. * it doesn't match just any key->peer.
  339. */
  340. any_key = mctp_lookup_key(net, skb, netid,
  341. MCTP_ADDR_ANY, &f);
  342. if (any_key) {
  343. msk = container_of(any_key->sk,
  344. struct mctp_sock, sk);
  345. spin_unlock_irqrestore(&any_key->lock, f);
  346. }
  347. }
  348. if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
  349. msk = mctp_lookup_bind(net, skb);
  350. if (!msk) {
  351. rc = -ENOENT;
  352. goto out_unlock;
  353. }
  354. /* single-packet message? deliver to socket, clean up any
  355. * pending key.
  356. */
  357. if (flags & MCTP_HDR_FLAG_EOM) {
  358. rc = sock_queue_rcv_skb(&msk->sk, skb);
  359. if (!rc)
  360. skb = NULL;
  361. if (key) {
  362. /* we've hit a pending reassembly; not much we
  363. * can do but drop it
  364. */
  365. __mctp_key_done_in(key, net, f,
  366. MCTP_TRACE_KEY_REPLIED);
  367. key = NULL;
  368. }
  369. goto out_unlock;
  370. }
  371. /* broadcast response or a bind() - create a key for further
  372. * packets for this message
  373. */
  374. if (!key) {
  375. key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
  376. tag, GFP_ATOMIC);
  377. if (!key) {
  378. rc = -ENOMEM;
  379. goto out_unlock;
  380. }
  381. /* we can queue without the key lock here, as the
  382. * key isn't observable yet
  383. */
  384. mctp_frag_queue(key, skb);
  385. /* if the key_add fails, we've raced with another
  386. * SOM packet with the same src, dest and tag. There's
  387. * no way to distinguish future packets, so all we
  388. * can do is drop; we'll free the skb on exit from
  389. * this function.
  390. */
  391. rc = mctp_key_add(key, msk);
  392. if (!rc) {
  393. trace_mctp_key_acquire(key);
  394. skb = NULL;
  395. }
  396. /* we don't need to release key->lock on exit, so
  397. * clean up here and suppress the unlock via
  398. * setting to NULL
  399. */
  400. mctp_key_unref(key);
  401. key = NULL;
  402. } else {
  403. if (key->reasm_head || key->reasm_dead) {
  404. /* duplicate start? drop everything */
  405. __mctp_key_done_in(key, net, f,
  406. MCTP_TRACE_KEY_INVALIDATED);
  407. rc = -EEXIST;
  408. key = NULL;
  409. } else {
  410. rc = mctp_frag_queue(key, skb);
  411. if (!rc)
  412. skb = NULL;
  413. }
  414. }
  415. } else if (key) {
  416. /* this packet continues a previous message; reassemble
  417. * using the message-specific key
  418. */
  419. /* we need to be continuing an existing reassembly... */
  420. if (!key->reasm_head)
  421. rc = -EINVAL;
  422. else
  423. rc = mctp_frag_queue(key, skb);
  424. if (rc)
  425. goto out_unlock;
  426. /* we've queued; the queue owns the skb now */
  427. skb = NULL;
  428. /* end of message? deliver to socket, and we're done with
  429. * the reassembly/response key
  430. */
  431. if (flags & MCTP_HDR_FLAG_EOM) {
  432. rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
  433. if (!rc)
  434. key->reasm_head = NULL;
  435. __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
  436. key = NULL;
  437. }
  438. } else {
  439. /* not a start, no matching key */
  440. rc = -ENOENT;
  441. }
  442. out_unlock:
  443. rcu_read_unlock();
  444. if (key) {
  445. spin_unlock_irqrestore(&key->lock, f);
  446. mctp_key_unref(key);
  447. }
  448. if (any_key)
  449. mctp_key_unref(any_key);
  450. out:
  451. kfree_skb(skb);
  452. return rc;
  453. }
  454. static unsigned int mctp_route_mtu(struct mctp_route *rt)
  455. {
  456. return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
  457. }
  458. static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
  459. {
  460. struct mctp_skb_cb *cb = mctp_cb(skb);
  461. struct mctp_hdr *hdr = mctp_hdr(skb);
  462. char daddr_buf[MAX_ADDR_LEN];
  463. char *daddr = NULL;
  464. unsigned int mtu;
  465. int rc;
  466. skb->protocol = htons(ETH_P_MCTP);
  467. mtu = READ_ONCE(skb->dev->mtu);
  468. if (skb->len > mtu) {
  469. kfree_skb(skb);
  470. return -EMSGSIZE;
  471. }
  472. if (cb->ifindex) {
  473. /* direct route; use the hwaddr we stashed in sendmsg */
  474. if (cb->halen != skb->dev->addr_len) {
  475. /* sanity check, sendmsg should have already caught this */
  476. kfree_skb(skb);
  477. return -EMSGSIZE;
  478. }
  479. daddr = cb->haddr;
  480. } else {
  481. /* If lookup fails let the device handle daddr==NULL */
  482. if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
  483. daddr = daddr_buf;
  484. }
  485. rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
  486. daddr, skb->dev->dev_addr, skb->len);
  487. if (rc < 0) {
  488. kfree_skb(skb);
  489. return -EHOSTUNREACH;
  490. }
  491. mctp_flow_prepare_output(skb, route->dev);
  492. rc = dev_queue_xmit(skb);
  493. if (rc)
  494. rc = net_xmit_errno(rc);
  495. return rc;
  496. }
  497. /* route alloc/release */
  498. static void mctp_route_release(struct mctp_route *rt)
  499. {
  500. if (refcount_dec_and_test(&rt->refs)) {
  501. mctp_dev_put(rt->dev);
  502. kfree_rcu(rt, rcu);
  503. }
  504. }
  505. /* returns a route with the refcount at 1 */
  506. static struct mctp_route *mctp_route_alloc(void)
  507. {
  508. struct mctp_route *rt;
  509. rt = kzalloc(sizeof(*rt), GFP_KERNEL);
  510. if (!rt)
  511. return NULL;
  512. INIT_LIST_HEAD(&rt->list);
  513. refcount_set(&rt->refs, 1);
  514. rt->output = mctp_route_discard;
  515. return rt;
  516. }
  517. unsigned int mctp_default_net(struct net *net)
  518. {
  519. return READ_ONCE(net->mctp.default_net);
  520. }
  521. int mctp_default_net_set(struct net *net, unsigned int index)
  522. {
  523. if (index == 0)
  524. return -EINVAL;
  525. WRITE_ONCE(net->mctp.default_net, index);
  526. return 0;
  527. }
  528. /* tag management */
  529. static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
  530. struct mctp_sock *msk)
  531. {
  532. struct netns_mctp *mns = &net->mctp;
  533. lockdep_assert_held(&mns->keys_lock);
  534. key->expiry = jiffies + mctp_key_lifetime;
  535. timer_reduce(&msk->key_expiry, key->expiry);
  536. /* we hold the net->key_lock here, allowing updates to both
  537. * then net and sk
  538. */
  539. hlist_add_head_rcu(&key->hlist, &mns->keys);
  540. hlist_add_head_rcu(&key->sklist, &msk->keys);
  541. refcount_inc(&key->refs);
  542. }
  543. /* Allocate a locally-owned tag value for (local, peer), and reserve
  544. * it for the socket msk
  545. */
  546. struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
  547. unsigned int netid,
  548. mctp_eid_t local, mctp_eid_t peer,
  549. bool manual, u8 *tagp)
  550. {
  551. struct net *net = sock_net(&msk->sk);
  552. struct netns_mctp *mns = &net->mctp;
  553. struct mctp_sk_key *key, *tmp;
  554. unsigned long flags;
  555. u8 tagbits;
  556. /* for NULL destination EIDs, we may get a response from any peer */
  557. if (peer == MCTP_ADDR_NULL)
  558. peer = MCTP_ADDR_ANY;
  559. /* be optimistic, alloc now */
  560. key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
  561. if (!key)
  562. return ERR_PTR(-ENOMEM);
  563. /* 8 possible tag values */
  564. tagbits = 0xff;
  565. spin_lock_irqsave(&mns->keys_lock, flags);
  566. /* Walk through the existing keys, looking for potential conflicting
  567. * tags. If we find a conflict, clear that bit from tagbits
  568. */
  569. hlist_for_each_entry(tmp, &mns->keys, hlist) {
  570. /* We can check the lookup fields (*_addr, tag) without the
  571. * lock held, they don't change over the lifetime of the key.
  572. */
  573. /* tags are net-specific */
  574. if (tmp->net != netid)
  575. continue;
  576. /* if we don't own the tag, it can't conflict */
  577. if (tmp->tag & MCTP_HDR_FLAG_TO)
  578. continue;
  579. /* Since we're avoiding conflicting entries, match peer and
  580. * local addresses, including with a wildcard on ANY. See
  581. * 'A note on key allocations' for background.
  582. */
  583. if (peer != MCTP_ADDR_ANY &&
  584. !mctp_address_matches(tmp->peer_addr, peer))
  585. continue;
  586. if (local != MCTP_ADDR_ANY &&
  587. !mctp_address_matches(tmp->local_addr, local))
  588. continue;
  589. spin_lock(&tmp->lock);
  590. /* key must still be valid. If we find a match, clear the
  591. * potential tag value
  592. */
  593. if (tmp->valid)
  594. tagbits &= ~(1 << tmp->tag);
  595. spin_unlock(&tmp->lock);
  596. if (!tagbits)
  597. break;
  598. }
  599. if (tagbits) {
  600. key->tag = __ffs(tagbits);
  601. mctp_reserve_tag(net, key, msk);
  602. trace_mctp_key_acquire(key);
  603. key->manual_alloc = manual;
  604. *tagp = key->tag;
  605. }
  606. spin_unlock_irqrestore(&mns->keys_lock, flags);
  607. if (!tagbits) {
  608. mctp_key_unref(key);
  609. return ERR_PTR(-EBUSY);
  610. }
  611. return key;
  612. }
  613. static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
  614. unsigned int netid,
  615. mctp_eid_t daddr,
  616. u8 req_tag, u8 *tagp)
  617. {
  618. struct net *net = sock_net(&msk->sk);
  619. struct netns_mctp *mns = &net->mctp;
  620. struct mctp_sk_key *key, *tmp;
  621. unsigned long flags;
  622. req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
  623. key = NULL;
  624. spin_lock_irqsave(&mns->keys_lock, flags);
  625. hlist_for_each_entry(tmp, &mns->keys, hlist) {
  626. if (tmp->net != netid)
  627. continue;
  628. if (tmp->tag != req_tag)
  629. continue;
  630. if (!mctp_address_matches(tmp->peer_addr, daddr))
  631. continue;
  632. if (!tmp->manual_alloc)
  633. continue;
  634. spin_lock(&tmp->lock);
  635. if (tmp->valid) {
  636. key = tmp;
  637. refcount_inc(&key->refs);
  638. spin_unlock(&tmp->lock);
  639. break;
  640. }
  641. spin_unlock(&tmp->lock);
  642. }
  643. spin_unlock_irqrestore(&mns->keys_lock, flags);
  644. if (!key)
  645. return ERR_PTR(-ENOENT);
  646. if (tagp)
  647. *tagp = key->tag;
  648. return key;
  649. }
  650. /* routing lookups */
  651. static bool mctp_rt_match_eid(struct mctp_route *rt,
  652. unsigned int net, mctp_eid_t eid)
  653. {
  654. return READ_ONCE(rt->dev->net) == net &&
  655. rt->min <= eid && rt->max >= eid;
  656. }
  657. /* compares match, used for duplicate prevention */
  658. static bool mctp_rt_compare_exact(struct mctp_route *rt1,
  659. struct mctp_route *rt2)
  660. {
  661. ASSERT_RTNL();
  662. return rt1->dev->net == rt2->dev->net &&
  663. rt1->min == rt2->min &&
  664. rt1->max == rt2->max;
  665. }
  666. struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
  667. mctp_eid_t daddr)
  668. {
  669. struct mctp_route *tmp, *rt = NULL;
  670. rcu_read_lock();
  671. list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
  672. /* TODO: add metrics */
  673. if (mctp_rt_match_eid(tmp, dnet, daddr)) {
  674. if (refcount_inc_not_zero(&tmp->refs)) {
  675. rt = tmp;
  676. break;
  677. }
  678. }
  679. }
  680. rcu_read_unlock();
  681. return rt;
  682. }
  683. static struct mctp_route *mctp_route_lookup_null(struct net *net,
  684. struct net_device *dev)
  685. {
  686. struct mctp_route *tmp, *rt = NULL;
  687. rcu_read_lock();
  688. list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
  689. if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
  690. refcount_inc_not_zero(&tmp->refs)) {
  691. rt = tmp;
  692. break;
  693. }
  694. }
  695. rcu_read_unlock();
  696. return rt;
  697. }
  698. static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
  699. unsigned int mtu, u8 tag)
  700. {
  701. const unsigned int hlen = sizeof(struct mctp_hdr);
  702. struct mctp_hdr *hdr, *hdr2;
  703. unsigned int pos, size, headroom;
  704. struct sk_buff *skb2;
  705. int rc;
  706. u8 seq;
  707. hdr = mctp_hdr(skb);
  708. seq = 0;
  709. rc = 0;
  710. if (mtu < hlen + 1) {
  711. kfree_skb(skb);
  712. return -EMSGSIZE;
  713. }
  714. /* keep same headroom as the original skb */
  715. headroom = skb_headroom(skb);
  716. /* we've got the header */
  717. skb_pull(skb, hlen);
  718. for (pos = 0; pos < skb->len;) {
  719. /* size of message payload */
  720. size = min(mtu - hlen, skb->len - pos);
  721. skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
  722. if (!skb2) {
  723. rc = -ENOMEM;
  724. break;
  725. }
  726. /* generic skb copy */
  727. skb2->protocol = skb->protocol;
  728. skb2->priority = skb->priority;
  729. skb2->dev = skb->dev;
  730. memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
  731. if (skb->sk)
  732. skb_set_owner_w(skb2, skb->sk);
  733. /* establish packet */
  734. skb_reserve(skb2, headroom);
  735. skb_reset_network_header(skb2);
  736. skb_put(skb2, hlen + size);
  737. skb2->transport_header = skb2->network_header + hlen;
  738. /* copy header fields, calculate SOM/EOM flags & seq */
  739. hdr2 = mctp_hdr(skb2);
  740. hdr2->ver = hdr->ver;
  741. hdr2->dest = hdr->dest;
  742. hdr2->src = hdr->src;
  743. hdr2->flags_seq_tag = tag &
  744. (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  745. if (pos == 0)
  746. hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
  747. if (pos + size == skb->len)
  748. hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
  749. hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
  750. /* copy message payload */
  751. skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
  752. /* we need to copy the extensions, for MCTP flow data */
  753. skb_ext_copy(skb2, skb);
  754. /* do route */
  755. rc = rt->output(rt, skb2);
  756. if (rc)
  757. break;
  758. seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
  759. pos += size;
  760. }
  761. consume_skb(skb);
  762. return rc;
  763. }
  764. int mctp_local_output(struct sock *sk, struct mctp_route *rt,
  765. struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
  766. {
  767. struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
  768. struct mctp_skb_cb *cb = mctp_cb(skb);
  769. struct mctp_route tmp_rt = {0};
  770. struct mctp_sk_key *key;
  771. struct mctp_hdr *hdr;
  772. unsigned long flags;
  773. unsigned int netid;
  774. unsigned int mtu;
  775. mctp_eid_t saddr;
  776. bool ext_rt;
  777. int rc;
  778. u8 tag;
  779. rc = -ENODEV;
  780. if (rt) {
  781. ext_rt = false;
  782. if (WARN_ON(!rt->dev))
  783. goto out_release;
  784. } else if (cb->ifindex) {
  785. struct net_device *dev;
  786. ext_rt = true;
  787. rt = &tmp_rt;
  788. rcu_read_lock();
  789. dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
  790. if (!dev) {
  791. rcu_read_unlock();
  792. goto out_free;
  793. }
  794. rt->dev = __mctp_dev_get(dev);
  795. rcu_read_unlock();
  796. if (!rt->dev)
  797. goto out_release;
  798. /* establish temporary route - we set up enough to keep
  799. * mctp_route_output happy
  800. */
  801. rt->output = mctp_route_output;
  802. rt->mtu = 0;
  803. } else {
  804. rc = -EINVAL;
  805. goto out_free;
  806. }
  807. spin_lock_irqsave(&rt->dev->addrs_lock, flags);
  808. if (rt->dev->num_addrs == 0) {
  809. rc = -EHOSTUNREACH;
  810. } else {
  811. /* use the outbound interface's first address as our source */
  812. saddr = rt->dev->addrs[0];
  813. rc = 0;
  814. }
  815. spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
  816. netid = READ_ONCE(rt->dev->net);
  817. if (rc)
  818. goto out_release;
  819. if (req_tag & MCTP_TAG_OWNER) {
  820. if (req_tag & MCTP_TAG_PREALLOC)
  821. key = mctp_lookup_prealloc_tag(msk, netid, daddr,
  822. req_tag, &tag);
  823. else
  824. key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
  825. false, &tag);
  826. if (IS_ERR(key)) {
  827. rc = PTR_ERR(key);
  828. goto out_release;
  829. }
  830. mctp_skb_set_flow(skb, key);
  831. /* done with the key in this scope */
  832. mctp_key_unref(key);
  833. tag |= MCTP_HDR_FLAG_TO;
  834. } else {
  835. key = NULL;
  836. tag = req_tag & MCTP_TAG_MASK;
  837. }
  838. skb->protocol = htons(ETH_P_MCTP);
  839. skb->priority = 0;
  840. skb_reset_transport_header(skb);
  841. skb_push(skb, sizeof(struct mctp_hdr));
  842. skb_reset_network_header(skb);
  843. skb->dev = rt->dev->dev;
  844. /* cb->net will have been set on initial ingress */
  845. cb->src = saddr;
  846. /* set up common header fields */
  847. hdr = mctp_hdr(skb);
  848. hdr->ver = 1;
  849. hdr->dest = daddr;
  850. hdr->src = saddr;
  851. mtu = mctp_route_mtu(rt);
  852. if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
  853. hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
  854. MCTP_HDR_FLAG_EOM | tag;
  855. rc = rt->output(rt, skb);
  856. } else {
  857. rc = mctp_do_fragment_route(rt, skb, mtu, tag);
  858. }
  859. /* route output functions consume the skb, even on error */
  860. skb = NULL;
  861. out_release:
  862. if (!ext_rt)
  863. mctp_route_release(rt);
  864. mctp_dev_put(tmp_rt.dev);
  865. out_free:
  866. kfree_skb(skb);
  867. return rc;
  868. }
  869. /* route management */
  870. static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
  871. unsigned int daddr_extent, unsigned int mtu,
  872. unsigned char type)
  873. {
  874. int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
  875. struct net *net = dev_net(mdev->dev);
  876. struct mctp_route *rt, *ert;
  877. if (!mctp_address_unicast(daddr_start))
  878. return -EINVAL;
  879. if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
  880. return -EINVAL;
  881. switch (type) {
  882. case RTN_LOCAL:
  883. rtfn = mctp_route_input;
  884. break;
  885. case RTN_UNICAST:
  886. rtfn = mctp_route_output;
  887. break;
  888. default:
  889. return -EINVAL;
  890. }
  891. rt = mctp_route_alloc();
  892. if (!rt)
  893. return -ENOMEM;
  894. rt->min = daddr_start;
  895. rt->max = daddr_start + daddr_extent;
  896. rt->mtu = mtu;
  897. rt->dev = mdev;
  898. mctp_dev_hold(rt->dev);
  899. rt->type = type;
  900. rt->output = rtfn;
  901. ASSERT_RTNL();
  902. /* Prevent duplicate identical routes. */
  903. list_for_each_entry(ert, &net->mctp.routes, list) {
  904. if (mctp_rt_compare_exact(rt, ert)) {
  905. mctp_route_release(rt);
  906. return -EEXIST;
  907. }
  908. }
  909. list_add_rcu(&rt->list, &net->mctp.routes);
  910. return 0;
  911. }
  912. static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
  913. unsigned int daddr_extent, unsigned char type)
  914. {
  915. struct net *net = dev_net(mdev->dev);
  916. struct mctp_route *rt, *tmp;
  917. mctp_eid_t daddr_end;
  918. bool dropped;
  919. if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
  920. return -EINVAL;
  921. daddr_end = daddr_start + daddr_extent;
  922. dropped = false;
  923. ASSERT_RTNL();
  924. list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
  925. if (rt->dev == mdev &&
  926. rt->min == daddr_start && rt->max == daddr_end &&
  927. rt->type == type) {
  928. list_del_rcu(&rt->list);
  929. /* TODO: immediate RTM_DELROUTE */
  930. mctp_route_release(rt);
  931. dropped = true;
  932. }
  933. }
  934. return dropped ? 0 : -ENOENT;
  935. }
  936. int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
  937. {
  938. return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
  939. }
  940. int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
  941. {
  942. return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
  943. }
  944. /* removes all entries for a given device */
  945. void mctp_route_remove_dev(struct mctp_dev *mdev)
  946. {
  947. struct net *net = dev_net(mdev->dev);
  948. struct mctp_route *rt, *tmp;
  949. ASSERT_RTNL();
  950. list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
  951. if (rt->dev == mdev) {
  952. list_del_rcu(&rt->list);
  953. /* TODO: immediate RTM_DELROUTE */
  954. mctp_route_release(rt);
  955. }
  956. }
  957. }
  958. /* Incoming packet-handling */
  959. static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
  960. struct packet_type *pt,
  961. struct net_device *orig_dev)
  962. {
  963. struct net *net = dev_net(dev);
  964. struct mctp_dev *mdev;
  965. struct mctp_skb_cb *cb;
  966. struct mctp_route *rt;
  967. struct mctp_hdr *mh;
  968. rcu_read_lock();
  969. mdev = __mctp_dev_get(dev);
  970. rcu_read_unlock();
  971. if (!mdev) {
  972. /* basic non-data sanity checks */
  973. goto err_drop;
  974. }
  975. if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
  976. goto err_drop;
  977. skb_reset_transport_header(skb);
  978. skb_reset_network_header(skb);
  979. /* We have enough for a header; decode and route */
  980. mh = mctp_hdr(skb);
  981. if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
  982. goto err_drop;
  983. /* source must be valid unicast or null; drop reserved ranges and
  984. * broadcast
  985. */
  986. if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
  987. goto err_drop;
  988. /* dest address: as above, but allow broadcast */
  989. if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
  990. mctp_address_broadcast(mh->dest)))
  991. goto err_drop;
  992. /* MCTP drivers must populate halen/haddr */
  993. if (dev->type == ARPHRD_MCTP) {
  994. cb = mctp_cb(skb);
  995. } else {
  996. cb = __mctp_cb(skb);
  997. cb->halen = 0;
  998. }
  999. cb->net = READ_ONCE(mdev->net);
  1000. cb->ifindex = dev->ifindex;
  1001. rt = mctp_route_lookup(net, cb->net, mh->dest);
  1002. /* NULL EID, but addressed to our physical address */
  1003. if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
  1004. rt = mctp_route_lookup_null(net, dev);
  1005. if (!rt)
  1006. goto err_drop;
  1007. rt->output(rt, skb);
  1008. mctp_route_release(rt);
  1009. mctp_dev_put(mdev);
  1010. return NET_RX_SUCCESS;
  1011. err_drop:
  1012. kfree_skb(skb);
  1013. mctp_dev_put(mdev);
  1014. return NET_RX_DROP;
  1015. }
  1016. static struct packet_type mctp_packet_type = {
  1017. .type = cpu_to_be16(ETH_P_MCTP),
  1018. .func = mctp_pkttype_receive,
  1019. };
  1020. /* netlink interface */
  1021. static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
  1022. [RTA_DST] = { .type = NLA_U8 },
  1023. [RTA_METRICS] = { .type = NLA_NESTED },
  1024. [RTA_OIF] = { .type = NLA_U32 },
  1025. };
  1026. /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
  1027. * tb must hold RTA_MAX+1 elements.
  1028. */
  1029. static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
  1030. struct netlink_ext_ack *extack,
  1031. struct nlattr **tb, struct rtmsg **rtm,
  1032. struct mctp_dev **mdev, mctp_eid_t *daddr_start)
  1033. {
  1034. struct net *net = sock_net(skb->sk);
  1035. struct net_device *dev;
  1036. unsigned int ifindex;
  1037. int rc;
  1038. rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
  1039. rta_mctp_policy, extack);
  1040. if (rc < 0) {
  1041. NL_SET_ERR_MSG(extack, "incorrect format");
  1042. return rc;
  1043. }
  1044. if (!tb[RTA_DST]) {
  1045. NL_SET_ERR_MSG(extack, "dst EID missing");
  1046. return -EINVAL;
  1047. }
  1048. *daddr_start = nla_get_u8(tb[RTA_DST]);
  1049. if (!tb[RTA_OIF]) {
  1050. NL_SET_ERR_MSG(extack, "ifindex missing");
  1051. return -EINVAL;
  1052. }
  1053. ifindex = nla_get_u32(tb[RTA_OIF]);
  1054. *rtm = nlmsg_data(nlh);
  1055. if ((*rtm)->rtm_family != AF_MCTP) {
  1056. NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
  1057. return -EINVAL;
  1058. }
  1059. dev = __dev_get_by_index(net, ifindex);
  1060. if (!dev) {
  1061. NL_SET_ERR_MSG(extack, "bad ifindex");
  1062. return -ENODEV;
  1063. }
  1064. *mdev = mctp_dev_get_rtnl(dev);
  1065. if (!*mdev)
  1066. return -ENODEV;
  1067. if (dev->flags & IFF_LOOPBACK) {
  1068. NL_SET_ERR_MSG(extack, "no routes to loopback");
  1069. return -EINVAL;
  1070. }
  1071. return 0;
  1072. }
  1073. static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
  1074. [RTAX_MTU] = { .type = NLA_U32 },
  1075. };
  1076. static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  1077. struct netlink_ext_ack *extack)
  1078. {
  1079. struct nlattr *tb[RTA_MAX + 1];
  1080. struct nlattr *tbx[RTAX_MAX + 1];
  1081. mctp_eid_t daddr_start;
  1082. struct mctp_dev *mdev;
  1083. struct rtmsg *rtm;
  1084. unsigned int mtu;
  1085. int rc;
  1086. rc = mctp_route_nlparse(skb, nlh, extack, tb,
  1087. &rtm, &mdev, &daddr_start);
  1088. if (rc < 0)
  1089. return rc;
  1090. if (rtm->rtm_type != RTN_UNICAST) {
  1091. NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
  1092. return -EINVAL;
  1093. }
  1094. mtu = 0;
  1095. if (tb[RTA_METRICS]) {
  1096. rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
  1097. rta_metrics_policy, NULL);
  1098. if (rc < 0)
  1099. return rc;
  1100. if (tbx[RTAX_MTU])
  1101. mtu = nla_get_u32(tbx[RTAX_MTU]);
  1102. }
  1103. rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
  1104. rtm->rtm_type);
  1105. return rc;
  1106. }
  1107. static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  1108. struct netlink_ext_ack *extack)
  1109. {
  1110. struct nlattr *tb[RTA_MAX + 1];
  1111. mctp_eid_t daddr_start;
  1112. struct mctp_dev *mdev;
  1113. struct rtmsg *rtm;
  1114. int rc;
  1115. rc = mctp_route_nlparse(skb, nlh, extack, tb,
  1116. &rtm, &mdev, &daddr_start);
  1117. if (rc < 0)
  1118. return rc;
  1119. /* we only have unicast routes */
  1120. if (rtm->rtm_type != RTN_UNICAST)
  1121. return -EINVAL;
  1122. rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
  1123. return rc;
  1124. }
  1125. static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
  1126. u32 portid, u32 seq, int event, unsigned int flags)
  1127. {
  1128. struct nlmsghdr *nlh;
  1129. struct rtmsg *hdr;
  1130. void *metrics;
  1131. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
  1132. if (!nlh)
  1133. return -EMSGSIZE;
  1134. hdr = nlmsg_data(nlh);
  1135. hdr->rtm_family = AF_MCTP;
  1136. /* we use the _len fields as a number of EIDs, rather than
  1137. * a number of bits in the address
  1138. */
  1139. hdr->rtm_dst_len = rt->max - rt->min;
  1140. hdr->rtm_src_len = 0;
  1141. hdr->rtm_tos = 0;
  1142. hdr->rtm_table = RT_TABLE_DEFAULT;
  1143. hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
  1144. hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
  1145. hdr->rtm_type = rt->type;
  1146. if (nla_put_u8(skb, RTA_DST, rt->min))
  1147. goto cancel;
  1148. metrics = nla_nest_start_noflag(skb, RTA_METRICS);
  1149. if (!metrics)
  1150. goto cancel;
  1151. if (rt->mtu) {
  1152. if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
  1153. goto cancel;
  1154. }
  1155. nla_nest_end(skb, metrics);
  1156. if (rt->dev) {
  1157. if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
  1158. goto cancel;
  1159. }
  1160. /* TODO: conditional neighbour physaddr? */
  1161. nlmsg_end(skb, nlh);
  1162. return 0;
  1163. cancel:
  1164. nlmsg_cancel(skb, nlh);
  1165. return -EMSGSIZE;
  1166. }
  1167. static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
  1168. {
  1169. struct net *net = sock_net(skb->sk);
  1170. struct mctp_route *rt;
  1171. int s_idx, idx;
  1172. /* TODO: allow filtering on route data, possibly under
  1173. * cb->strict_check
  1174. */
  1175. /* TODO: change to struct overlay */
  1176. s_idx = cb->args[0];
  1177. idx = 0;
  1178. rcu_read_lock();
  1179. list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
  1180. if (idx++ < s_idx)
  1181. continue;
  1182. if (mctp_fill_rtinfo(skb, rt,
  1183. NETLINK_CB(cb->skb).portid,
  1184. cb->nlh->nlmsg_seq,
  1185. RTM_NEWROUTE, NLM_F_MULTI) < 0)
  1186. break;
  1187. }
  1188. rcu_read_unlock();
  1189. cb->args[0] = idx;
  1190. return skb->len;
  1191. }
  1192. /* net namespace implementation */
  1193. static int __net_init mctp_routes_net_init(struct net *net)
  1194. {
  1195. struct netns_mctp *ns = &net->mctp;
  1196. INIT_LIST_HEAD(&ns->routes);
  1197. INIT_HLIST_HEAD(&ns->binds);
  1198. mutex_init(&ns->bind_lock);
  1199. INIT_HLIST_HEAD(&ns->keys);
  1200. spin_lock_init(&ns->keys_lock);
  1201. WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
  1202. return 0;
  1203. }
  1204. static void __net_exit mctp_routes_net_exit(struct net *net)
  1205. {
  1206. struct mctp_route *rt;
  1207. rcu_read_lock();
  1208. list_for_each_entry_rcu(rt, &net->mctp.routes, list)
  1209. mctp_route_release(rt);
  1210. rcu_read_unlock();
  1211. }
  1212. static struct pernet_operations mctp_net_ops = {
  1213. .init = mctp_routes_net_init,
  1214. .exit = mctp_routes_net_exit,
  1215. };
  1216. static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
  1217. {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
  1218. {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
  1219. {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
  1220. };
  1221. int __init mctp_routes_init(void)
  1222. {
  1223. int err;
  1224. dev_add_pack(&mctp_packet_type);
  1225. err = register_pernet_subsys(&mctp_net_ops);
  1226. if (err)
  1227. goto err_pernet;
  1228. err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
  1229. if (err)
  1230. goto err_rtnl;
  1231. return 0;
  1232. err_rtnl:
  1233. unregister_pernet_subsys(&mctp_net_ops);
  1234. err_pernet:
  1235. dev_remove_pack(&mctp_packet_type);
  1236. return err;
  1237. }
  1238. void mctp_routes_exit(void)
  1239. {
  1240. rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
  1241. unregister_pernet_subsys(&mctp_net_ops);
  1242. dev_remove_pack(&mctp_packet_type);
  1243. }
  1244. #if IS_ENABLED(CONFIG_MCTP_TEST)
  1245. #include "test/route-test.c"
  1246. #endif