route.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Management Component Transport Protocol (MCTP) - routing
  4. * implementation.
  5. *
  6. * This is currently based on a simple routing table, with no dst cache. The
  7. * number of routes should stay fairly small, so the lookup cost is small.
  8. *
  9. * Copyright (c) 2021 Code Construct
  10. * Copyright (c) 2021 Google
  11. */
  12. #include <linux/idr.h>
  13. #include <linux/kconfig.h>
  14. #include <linux/mctp.h>
  15. #include <linux/netdevice.h>
  16. #include <linux/rtnetlink.h>
  17. #include <linux/skbuff.h>
  18. #include <uapi/linux/if_arp.h>
  19. #include <net/mctp.h>
  20. #include <net/mctpdevice.h>
  21. #include <net/netlink.h>
  22. #include <net/sock.h>
  23. #include <trace/events/mctp.h>
  24. static const unsigned int mctp_message_maxlen = 64 * 1024;
  25. static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
  26. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
  27. /* route output callbacks */
  28. static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
  29. {
  30. kfree_skb(skb);
  31. return 0;
  32. }
  33. static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
  34. {
  35. struct mctp_skb_cb *cb = mctp_cb(skb);
  36. struct mctp_hdr *mh;
  37. struct sock *sk;
  38. u8 type;
  39. WARN_ON(!rcu_read_lock_held());
  40. /* TODO: look up in skb->cb? */
  41. mh = mctp_hdr(skb);
  42. if (!skb_headlen(skb))
  43. return NULL;
  44. type = (*(u8 *)skb->data) & 0x7f;
  45. sk_for_each_rcu(sk, &net->mctp.binds) {
  46. struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
  47. if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
  48. continue;
  49. if (msk->bind_type != type)
  50. continue;
  51. if (!mctp_address_matches(msk->bind_addr, mh->dest))
  52. continue;
  53. return msk;
  54. }
  55. return NULL;
  56. }
  57. /* A note on the key allocations.
  58. *
  59. * struct net->mctp.keys contains our set of currently-allocated keys for
  60. * MCTP tag management. The lookup tuple for these is the peer EID,
  61. * local EID and MCTP tag.
  62. *
  63. * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
  64. * broadcast message is sent, we may receive responses from any peer EID.
  65. * Because the broadcast dest address is equivalent to ANY, we create
  66. * a key with (local = local-eid, peer = ANY). This allows a match on the
  67. * incoming broadcast responses from any peer.
  68. *
  69. * We perform lookups when packets are received, and when tags are allocated
  70. * in two scenarios:
  71. *
  72. * - when a packet is sent, with a locally-owned tag: we need to find an
  73. * unused tag value for the (local, peer) EID pair.
  74. *
  75. * - when a tag is manually allocated: we need to find an unused tag value
  76. * for the peer EID, but don't have a specific local EID at that stage.
  77. *
  78. * in the latter case, on successful allocation, we end up with a tag with
  79. * (local = ANY, peer = peer-eid).
  80. *
  81. * So, the key set allows both a local EID of ANY, as well as a peer EID of
  82. * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
  83. * The matching (in mctp_key_match()) during lookup allows the match value to
  84. * be ANY in either the dest or source addresses.
  85. *
  86. * When allocating (+ inserting) a tag, we need to check for conflicts amongst
  87. * the existing tag set. This requires macthing either exactly on the local
  88. * and peer addresses, or either being ANY.
  89. */
  90. static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
  91. mctp_eid_t local, mctp_eid_t peer, u8 tag)
  92. {
  93. if (key->net != net)
  94. return false;
  95. if (!mctp_address_matches(key->local_addr, local))
  96. return false;
  97. if (!mctp_address_matches(key->peer_addr, peer))
  98. return false;
  99. if (key->tag != tag)
  100. return false;
  101. return true;
  102. }
  103. /* returns a key (with key->lock held, and refcounted), or NULL if no such
  104. * key exists.
  105. */
  106. static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
  107. unsigned int netid, mctp_eid_t peer,
  108. unsigned long *irqflags)
  109. __acquires(&key->lock)
  110. {
  111. struct mctp_sk_key *key, *ret;
  112. unsigned long flags;
  113. struct mctp_hdr *mh;
  114. u8 tag;
  115. mh = mctp_hdr(skb);
  116. tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  117. ret = NULL;
  118. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  119. hlist_for_each_entry(key, &net->mctp.keys, hlist) {
  120. if (!mctp_key_match(key, netid, mh->dest, peer, tag))
  121. continue;
  122. spin_lock(&key->lock);
  123. if (key->valid) {
  124. refcount_inc(&key->refs);
  125. ret = key;
  126. break;
  127. }
  128. spin_unlock(&key->lock);
  129. }
  130. if (ret) {
  131. spin_unlock(&net->mctp.keys_lock);
  132. *irqflags = flags;
  133. } else {
  134. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  135. }
  136. return ret;
  137. }
  138. static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
  139. unsigned int net,
  140. mctp_eid_t local, mctp_eid_t peer,
  141. u8 tag, gfp_t gfp)
  142. {
  143. struct mctp_sk_key *key;
  144. key = kzalloc(sizeof(*key), gfp);
  145. if (!key)
  146. return NULL;
  147. key->net = net;
  148. key->peer_addr = peer;
  149. key->local_addr = local;
  150. key->tag = tag;
  151. key->sk = &msk->sk;
  152. key->valid = true;
  153. spin_lock_init(&key->lock);
  154. refcount_set(&key->refs, 1);
  155. sock_hold(key->sk);
  156. return key;
  157. }
  158. void mctp_key_unref(struct mctp_sk_key *key)
  159. {
  160. unsigned long flags;
  161. if (!refcount_dec_and_test(&key->refs))
  162. return;
  163. /* even though no refs exist here, the lock allows us to stay
  164. * consistent with the locking requirement of mctp_dev_release_key
  165. */
  166. spin_lock_irqsave(&key->lock, flags);
  167. mctp_dev_release_key(key->dev, key);
  168. spin_unlock_irqrestore(&key->lock, flags);
  169. sock_put(key->sk);
  170. kfree(key);
  171. }
  172. static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
  173. {
  174. struct net *net = sock_net(&msk->sk);
  175. struct mctp_sk_key *tmp;
  176. unsigned long flags;
  177. int rc = 0;
  178. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  179. if (sock_flag(&msk->sk, SOCK_DEAD)) {
  180. rc = -EINVAL;
  181. goto out_unlock;
  182. }
  183. hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
  184. if (mctp_key_match(tmp, key->net, key->local_addr,
  185. key->peer_addr, key->tag)) {
  186. spin_lock(&tmp->lock);
  187. if (tmp->valid)
  188. rc = -EEXIST;
  189. spin_unlock(&tmp->lock);
  190. if (rc)
  191. break;
  192. }
  193. }
  194. if (!rc) {
  195. refcount_inc(&key->refs);
  196. key->expiry = jiffies + mctp_key_lifetime;
  197. timer_reduce(&msk->key_expiry, key->expiry);
  198. hlist_add_head(&key->hlist, &net->mctp.keys);
  199. hlist_add_head(&key->sklist, &msk->keys);
  200. }
  201. out_unlock:
  202. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  203. return rc;
  204. }
  205. /* Helper for mctp_route_input().
  206. * We're done with the key; unlock and unref the key.
  207. * For the usual case of automatic expiry we remove the key from lists.
  208. * In the case that manual allocation is set on a key we release the lock
  209. * and local ref, reset reassembly, but don't remove from lists.
  210. */
  211. static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
  212. unsigned long flags, unsigned long reason)
  213. __releases(&key->lock)
  214. {
  215. struct sk_buff *skb;
  216. trace_mctp_key_release(key, reason);
  217. skb = key->reasm_head;
  218. key->reasm_head = NULL;
  219. if (!key->manual_alloc) {
  220. key->reasm_dead = true;
  221. key->valid = false;
  222. mctp_dev_release_key(key->dev, key);
  223. }
  224. spin_unlock_irqrestore(&key->lock, flags);
  225. if (!key->manual_alloc) {
  226. spin_lock_irqsave(&net->mctp.keys_lock, flags);
  227. if (!hlist_unhashed(&key->hlist)) {
  228. hlist_del_init(&key->hlist);
  229. hlist_del_init(&key->sklist);
  230. mctp_key_unref(key);
  231. }
  232. spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
  233. }
  234. /* and one for the local reference */
  235. mctp_key_unref(key);
  236. kfree_skb(skb);
  237. }
  238. #ifdef CONFIG_MCTP_FLOWS
  239. static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
  240. {
  241. struct mctp_flow *flow;
  242. flow = skb_ext_add(skb, SKB_EXT_MCTP);
  243. if (!flow)
  244. return;
  245. refcount_inc(&key->refs);
  246. flow->key = key;
  247. }
  248. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
  249. {
  250. struct mctp_sk_key *key;
  251. struct mctp_flow *flow;
  252. flow = skb_ext_find(skb, SKB_EXT_MCTP);
  253. if (!flow)
  254. return;
  255. key = flow->key;
  256. if (key->dev) {
  257. WARN_ON(key->dev != dev);
  258. return;
  259. }
  260. mctp_dev_set_key(dev, key);
  261. }
  262. #else
  263. static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
  264. static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
  265. #endif
  266. /* takes ownership of skb, both in success and failure cases */
  267. static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
  268. {
  269. struct mctp_hdr *hdr = mctp_hdr(skb);
  270. u8 exp_seq, this_seq;
  271. this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
  272. & MCTP_HDR_SEQ_MASK;
  273. if (!key->reasm_head) {
  274. /* Since we're manipulating the shared frag_list, ensure it
  275. * isn't shared with any other SKBs. In the cloned case,
  276. * this will free the skb; callers can no longer access it
  277. * safely.
  278. */
  279. key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
  280. if (!key->reasm_head)
  281. return -ENOMEM;
  282. key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
  283. key->last_seq = this_seq;
  284. return 0;
  285. }
  286. exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
  287. if (this_seq != exp_seq)
  288. goto err_free;
  289. if (key->reasm_head->len + skb->len > mctp_message_maxlen)
  290. goto err_free;
  291. skb->next = NULL;
  292. skb->sk = NULL;
  293. *key->reasm_tailp = skb;
  294. key->reasm_tailp = &skb->next;
  295. key->last_seq = this_seq;
  296. key->reasm_head->data_len += skb->len;
  297. key->reasm_head->len += skb->len;
  298. key->reasm_head->truesize += skb->truesize;
  299. return 0;
  300. err_free:
  301. kfree_skb(skb);
  302. return -EINVAL;
  303. }
  304. static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
  305. {
  306. struct mctp_sk_key *key, *any_key = NULL;
  307. struct net *net = dev_net(skb->dev);
  308. struct mctp_sock *msk;
  309. struct mctp_hdr *mh;
  310. unsigned int netid;
  311. unsigned long f;
  312. u8 tag, flags;
  313. int rc;
  314. msk = NULL;
  315. rc = -EINVAL;
  316. /* We may be receiving a locally-routed packet; drop source sk
  317. * accounting.
  318. *
  319. * From here, we will either queue the skb - either to a frag_queue, or
  320. * to a receiving socket. When that succeeds, we clear the skb pointer;
  321. * a non-NULL skb on exit will be otherwise unowned, and hence
  322. * kfree_skb()-ed.
  323. */
  324. skb_orphan(skb);
  325. /* ensure we have enough data for a header and a type */
  326. if (skb->len < sizeof(struct mctp_hdr) + 1)
  327. goto out;
  328. /* grab header, advance data ptr */
  329. mh = mctp_hdr(skb);
  330. netid = mctp_cb(skb)->net;
  331. skb_pull(skb, sizeof(struct mctp_hdr));
  332. if (mh->ver != 1)
  333. goto out;
  334. flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
  335. tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  336. rcu_read_lock();
  337. /* lookup socket / reasm context, exactly matching (src,dest,tag).
  338. * we hold a ref on the key, and key->lock held.
  339. */
  340. key = mctp_lookup_key(net, skb, netid, mh->src, &f);
  341. if (flags & MCTP_HDR_FLAG_SOM) {
  342. if (key) {
  343. msk = container_of(key->sk, struct mctp_sock, sk);
  344. } else {
  345. /* first response to a broadcast? do a more general
  346. * key lookup to find the socket, but don't use this
  347. * key for reassembly - we'll create a more specific
  348. * one for future packets if required (ie, !EOM).
  349. *
  350. * this lookup requires key->peer to be MCTP_ADDR_ANY,
  351. * it doesn't match just any key->peer.
  352. */
  353. any_key = mctp_lookup_key(net, skb, netid,
  354. MCTP_ADDR_ANY, &f);
  355. if (any_key) {
  356. msk = container_of(any_key->sk,
  357. struct mctp_sock, sk);
  358. spin_unlock_irqrestore(&any_key->lock, f);
  359. }
  360. }
  361. if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
  362. msk = mctp_lookup_bind(net, skb);
  363. if (!msk) {
  364. rc = -ENOENT;
  365. goto out_unlock;
  366. }
  367. /* single-packet message? deliver to socket, clean up any
  368. * pending key.
  369. */
  370. if (flags & MCTP_HDR_FLAG_EOM) {
  371. rc = sock_queue_rcv_skb(&msk->sk, skb);
  372. if (!rc)
  373. skb = NULL;
  374. if (key) {
  375. /* we've hit a pending reassembly; not much we
  376. * can do but drop it
  377. */
  378. __mctp_key_done_in(key, net, f,
  379. MCTP_TRACE_KEY_REPLIED);
  380. key = NULL;
  381. }
  382. goto out_unlock;
  383. }
  384. /* broadcast response or a bind() - create a key for further
  385. * packets for this message
  386. */
  387. if (!key) {
  388. key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
  389. tag, GFP_ATOMIC);
  390. if (!key) {
  391. rc = -ENOMEM;
  392. goto out_unlock;
  393. }
  394. /* we can queue without the key lock here, as the
  395. * key isn't observable yet
  396. */
  397. mctp_frag_queue(key, skb);
  398. skb = NULL;
  399. /* if the key_add fails, we've raced with another
  400. * SOM packet with the same src, dest and tag. There's
  401. * no way to distinguish future packets, so all we
  402. * can do is drop.
  403. */
  404. rc = mctp_key_add(key, msk);
  405. if (!rc)
  406. trace_mctp_key_acquire(key);
  407. /* we don't need to release key->lock on exit, so
  408. * clean up here and suppress the unlock via
  409. * setting to NULL
  410. */
  411. mctp_key_unref(key);
  412. key = NULL;
  413. } else {
  414. if (key->reasm_head || key->reasm_dead) {
  415. /* duplicate start? drop everything */
  416. __mctp_key_done_in(key, net, f,
  417. MCTP_TRACE_KEY_INVALIDATED);
  418. rc = -EEXIST;
  419. key = NULL;
  420. } else {
  421. rc = mctp_frag_queue(key, skb);
  422. skb = NULL;
  423. }
  424. }
  425. } else if (key) {
  426. /* this packet continues a previous message; reassemble
  427. * using the message-specific key
  428. */
  429. /* we need to be continuing an existing reassembly... */
  430. if (!key->reasm_head) {
  431. rc = -EINVAL;
  432. } else {
  433. rc = mctp_frag_queue(key, skb);
  434. skb = NULL;
  435. }
  436. if (rc)
  437. goto out_unlock;
  438. /* end of message? deliver to socket, and we're done with
  439. * the reassembly/response key
  440. */
  441. if (flags & MCTP_HDR_FLAG_EOM) {
  442. rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
  443. if (!rc)
  444. key->reasm_head = NULL;
  445. __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
  446. key = NULL;
  447. }
  448. } else {
  449. /* not a start, no matching key */
  450. rc = -ENOENT;
  451. }
  452. out_unlock:
  453. rcu_read_unlock();
  454. if (key) {
  455. spin_unlock_irqrestore(&key->lock, f);
  456. mctp_key_unref(key);
  457. }
  458. if (any_key)
  459. mctp_key_unref(any_key);
  460. out:
  461. kfree_skb(skb);
  462. return rc;
  463. }
  464. static unsigned int mctp_route_mtu(struct mctp_route *rt)
  465. {
  466. return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
  467. }
  468. static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
  469. {
  470. struct mctp_skb_cb *cb = mctp_cb(skb);
  471. struct mctp_hdr *hdr = mctp_hdr(skb);
  472. char daddr_buf[MAX_ADDR_LEN];
  473. char *daddr = NULL;
  474. unsigned int mtu;
  475. int rc;
  476. skb->protocol = htons(ETH_P_MCTP);
  477. mtu = READ_ONCE(skb->dev->mtu);
  478. if (skb->len > mtu) {
  479. kfree_skb(skb);
  480. return -EMSGSIZE;
  481. }
  482. if (cb->ifindex) {
  483. /* direct route; use the hwaddr we stashed in sendmsg */
  484. if (cb->halen != skb->dev->addr_len) {
  485. /* sanity check, sendmsg should have already caught this */
  486. kfree_skb(skb);
  487. return -EMSGSIZE;
  488. }
  489. daddr = cb->haddr;
  490. } else {
  491. /* If lookup fails let the device handle daddr==NULL */
  492. if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
  493. daddr = daddr_buf;
  494. }
  495. rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
  496. daddr, skb->dev->dev_addr, skb->len);
  497. if (rc < 0) {
  498. kfree_skb(skb);
  499. return -EHOSTUNREACH;
  500. }
  501. mctp_flow_prepare_output(skb, route->dev);
  502. rc = dev_queue_xmit(skb);
  503. if (rc)
  504. rc = net_xmit_errno(rc);
  505. return rc;
  506. }
  507. /* route alloc/release */
  508. static void mctp_route_release(struct mctp_route *rt)
  509. {
  510. if (refcount_dec_and_test(&rt->refs)) {
  511. mctp_dev_put(rt->dev);
  512. kfree_rcu(rt, rcu);
  513. }
  514. }
  515. /* returns a route with the refcount at 1 */
  516. static struct mctp_route *mctp_route_alloc(void)
  517. {
  518. struct mctp_route *rt;
  519. rt = kzalloc(sizeof(*rt), GFP_KERNEL);
  520. if (!rt)
  521. return NULL;
  522. INIT_LIST_HEAD(&rt->list);
  523. refcount_set(&rt->refs, 1);
  524. rt->output = mctp_route_discard;
  525. return rt;
  526. }
  527. unsigned int mctp_default_net(struct net *net)
  528. {
  529. return READ_ONCE(net->mctp.default_net);
  530. }
  531. int mctp_default_net_set(struct net *net, unsigned int index)
  532. {
  533. if (index == 0)
  534. return -EINVAL;
  535. WRITE_ONCE(net->mctp.default_net, index);
  536. return 0;
  537. }
  538. /* tag management */
  539. static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
  540. struct mctp_sock *msk)
  541. {
  542. struct netns_mctp *mns = &net->mctp;
  543. lockdep_assert_held(&mns->keys_lock);
  544. key->expiry = jiffies + mctp_key_lifetime;
  545. timer_reduce(&msk->key_expiry, key->expiry);
  546. /* we hold the net->key_lock here, allowing updates to both
  547. * then net and sk
  548. */
  549. hlist_add_head_rcu(&key->hlist, &mns->keys);
  550. hlist_add_head_rcu(&key->sklist, &msk->keys);
  551. refcount_inc(&key->refs);
  552. }
  553. /* Allocate a locally-owned tag value for (local, peer), and reserve
  554. * it for the socket msk
  555. */
  556. struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
  557. unsigned int netid,
  558. mctp_eid_t local, mctp_eid_t peer,
  559. bool manual, u8 *tagp)
  560. {
  561. struct net *net = sock_net(&msk->sk);
  562. struct netns_mctp *mns = &net->mctp;
  563. struct mctp_sk_key *key, *tmp;
  564. unsigned long flags;
  565. u8 tagbits;
  566. /* for NULL destination EIDs, we may get a response from any peer */
  567. if (peer == MCTP_ADDR_NULL)
  568. peer = MCTP_ADDR_ANY;
  569. /* be optimistic, alloc now */
  570. key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
  571. if (!key)
  572. return ERR_PTR(-ENOMEM);
  573. /* 8 possible tag values */
  574. tagbits = 0xff;
  575. spin_lock_irqsave(&mns->keys_lock, flags);
  576. /* Walk through the existing keys, looking for potential conflicting
  577. * tags. If we find a conflict, clear that bit from tagbits
  578. */
  579. hlist_for_each_entry(tmp, &mns->keys, hlist) {
  580. /* We can check the lookup fields (*_addr, tag) without the
  581. * lock held, they don't change over the lifetime of the key.
  582. */
  583. /* tags are net-specific */
  584. if (tmp->net != netid)
  585. continue;
  586. /* if we don't own the tag, it can't conflict */
  587. if (tmp->tag & MCTP_HDR_FLAG_TO)
  588. continue;
  589. /* Since we're avoiding conflicting entries, match peer and
  590. * local addresses, including with a wildcard on ANY. See
  591. * 'A note on key allocations' for background.
  592. */
  593. if (peer != MCTP_ADDR_ANY &&
  594. !mctp_address_matches(tmp->peer_addr, peer))
  595. continue;
  596. if (local != MCTP_ADDR_ANY &&
  597. !mctp_address_matches(tmp->local_addr, local))
  598. continue;
  599. spin_lock(&tmp->lock);
  600. /* key must still be valid. If we find a match, clear the
  601. * potential tag value
  602. */
  603. if (tmp->valid)
  604. tagbits &= ~(1 << tmp->tag);
  605. spin_unlock(&tmp->lock);
  606. if (!tagbits)
  607. break;
  608. }
  609. if (tagbits) {
  610. key->tag = __ffs(tagbits);
  611. mctp_reserve_tag(net, key, msk);
  612. trace_mctp_key_acquire(key);
  613. key->manual_alloc = manual;
  614. *tagp = key->tag;
  615. }
  616. spin_unlock_irqrestore(&mns->keys_lock, flags);
  617. if (!tagbits) {
  618. mctp_key_unref(key);
  619. return ERR_PTR(-EBUSY);
  620. }
  621. return key;
  622. }
  623. static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
  624. unsigned int netid,
  625. mctp_eid_t daddr,
  626. u8 req_tag, u8 *tagp)
  627. {
  628. struct net *net = sock_net(&msk->sk);
  629. struct netns_mctp *mns = &net->mctp;
  630. struct mctp_sk_key *key, *tmp;
  631. unsigned long flags;
  632. req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
  633. key = NULL;
  634. spin_lock_irqsave(&mns->keys_lock, flags);
  635. hlist_for_each_entry(tmp, &mns->keys, hlist) {
  636. if (tmp->net != netid)
  637. continue;
  638. if (tmp->tag != req_tag)
  639. continue;
  640. if (!mctp_address_matches(tmp->peer_addr, daddr))
  641. continue;
  642. if (!tmp->manual_alloc)
  643. continue;
  644. spin_lock(&tmp->lock);
  645. if (tmp->valid) {
  646. key = tmp;
  647. refcount_inc(&key->refs);
  648. spin_unlock(&tmp->lock);
  649. break;
  650. }
  651. spin_unlock(&tmp->lock);
  652. }
  653. spin_unlock_irqrestore(&mns->keys_lock, flags);
  654. if (!key)
  655. return ERR_PTR(-ENOENT);
  656. if (tagp)
  657. *tagp = key->tag;
  658. return key;
  659. }
  660. /* routing lookups */
  661. static bool mctp_rt_match_eid(struct mctp_route *rt,
  662. unsigned int net, mctp_eid_t eid)
  663. {
  664. return READ_ONCE(rt->dev->net) == net &&
  665. rt->min <= eid && rt->max >= eid;
  666. }
  667. /* compares match, used for duplicate prevention */
  668. static bool mctp_rt_compare_exact(struct mctp_route *rt1,
  669. struct mctp_route *rt2)
  670. {
  671. ASSERT_RTNL();
  672. return rt1->dev->net == rt2->dev->net &&
  673. rt1->min == rt2->min &&
  674. rt1->max == rt2->max;
  675. }
  676. struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
  677. mctp_eid_t daddr)
  678. {
  679. struct mctp_route *tmp, *rt = NULL;
  680. rcu_read_lock();
  681. list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
  682. /* TODO: add metrics */
  683. if (mctp_rt_match_eid(tmp, dnet, daddr)) {
  684. if (refcount_inc_not_zero(&tmp->refs)) {
  685. rt = tmp;
  686. break;
  687. }
  688. }
  689. }
  690. rcu_read_unlock();
  691. return rt;
  692. }
  693. static struct mctp_route *mctp_route_lookup_null(struct net *net,
  694. struct net_device *dev)
  695. {
  696. struct mctp_route *tmp, *rt = NULL;
  697. rcu_read_lock();
  698. list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
  699. if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
  700. refcount_inc_not_zero(&tmp->refs)) {
  701. rt = tmp;
  702. break;
  703. }
  704. }
  705. rcu_read_unlock();
  706. return rt;
  707. }
  708. static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
  709. unsigned int mtu, u8 tag)
  710. {
  711. const unsigned int hlen = sizeof(struct mctp_hdr);
  712. struct mctp_hdr *hdr, *hdr2;
  713. unsigned int pos, size, headroom;
  714. struct sk_buff *skb2;
  715. int rc;
  716. u8 seq;
  717. hdr = mctp_hdr(skb);
  718. seq = 0;
  719. rc = 0;
  720. if (mtu < hlen + 1) {
  721. kfree_skb(skb);
  722. return -EMSGSIZE;
  723. }
  724. /* keep same headroom as the original skb */
  725. headroom = skb_headroom(skb);
  726. /* we've got the header */
  727. skb_pull(skb, hlen);
  728. for (pos = 0; pos < skb->len;) {
  729. /* size of message payload */
  730. size = min(mtu - hlen, skb->len - pos);
  731. skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
  732. if (!skb2) {
  733. rc = -ENOMEM;
  734. break;
  735. }
  736. /* generic skb copy */
  737. skb2->protocol = skb->protocol;
  738. skb2->priority = skb->priority;
  739. skb2->dev = skb->dev;
  740. memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
  741. if (skb->sk)
  742. skb_set_owner_w(skb2, skb->sk);
  743. /* establish packet */
  744. skb_reserve(skb2, headroom);
  745. skb_reset_network_header(skb2);
  746. skb_put(skb2, hlen + size);
  747. skb2->transport_header = skb2->network_header + hlen;
  748. /* copy header fields, calculate SOM/EOM flags & seq */
  749. hdr2 = mctp_hdr(skb2);
  750. hdr2->ver = hdr->ver;
  751. hdr2->dest = hdr->dest;
  752. hdr2->src = hdr->src;
  753. hdr2->flags_seq_tag = tag &
  754. (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
  755. if (pos == 0)
  756. hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
  757. if (pos + size == skb->len)
  758. hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
  759. hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
  760. /* copy message payload */
  761. skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
  762. /* we need to copy the extensions, for MCTP flow data */
  763. skb_ext_copy(skb2, skb);
  764. /* do route */
  765. rc = rt->output(rt, skb2);
  766. if (rc)
  767. break;
  768. seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
  769. pos += size;
  770. }
  771. consume_skb(skb);
  772. return rc;
  773. }
  774. int mctp_local_output(struct sock *sk, struct mctp_route *rt,
  775. struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
  776. {
  777. struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
  778. struct mctp_skb_cb *cb = mctp_cb(skb);
  779. struct mctp_route tmp_rt = {0};
  780. struct mctp_sk_key *key;
  781. struct mctp_hdr *hdr;
  782. unsigned long flags;
  783. unsigned int netid;
  784. unsigned int mtu;
  785. mctp_eid_t saddr;
  786. bool ext_rt;
  787. int rc;
  788. u8 tag;
  789. rc = -ENODEV;
  790. if (rt) {
  791. ext_rt = false;
  792. if (WARN_ON(!rt->dev))
  793. goto out_release;
  794. } else if (cb->ifindex) {
  795. struct net_device *dev;
  796. ext_rt = true;
  797. rt = &tmp_rt;
  798. rcu_read_lock();
  799. dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
  800. if (!dev) {
  801. rcu_read_unlock();
  802. goto out_free;
  803. }
  804. rt->dev = __mctp_dev_get(dev);
  805. rcu_read_unlock();
  806. if (!rt->dev)
  807. goto out_release;
  808. /* establish temporary route - we set up enough to keep
  809. * mctp_route_output happy
  810. */
  811. rt->output = mctp_route_output;
  812. rt->mtu = 0;
  813. } else {
  814. rc = -EINVAL;
  815. goto out_free;
  816. }
  817. spin_lock_irqsave(&rt->dev->addrs_lock, flags);
  818. if (rt->dev->num_addrs == 0) {
  819. rc = -EHOSTUNREACH;
  820. } else {
  821. /* use the outbound interface's first address as our source */
  822. saddr = rt->dev->addrs[0];
  823. rc = 0;
  824. }
  825. spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
  826. netid = READ_ONCE(rt->dev->net);
  827. if (rc)
  828. goto out_release;
  829. if (req_tag & MCTP_TAG_OWNER) {
  830. if (req_tag & MCTP_TAG_PREALLOC)
  831. key = mctp_lookup_prealloc_tag(msk, netid, daddr,
  832. req_tag, &tag);
  833. else
  834. key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
  835. false, &tag);
  836. if (IS_ERR(key)) {
  837. rc = PTR_ERR(key);
  838. goto out_release;
  839. }
  840. mctp_skb_set_flow(skb, key);
  841. /* done with the key in this scope */
  842. mctp_key_unref(key);
  843. tag |= MCTP_HDR_FLAG_TO;
  844. } else {
  845. key = NULL;
  846. tag = req_tag & MCTP_TAG_MASK;
  847. }
  848. skb->protocol = htons(ETH_P_MCTP);
  849. skb->priority = 0;
  850. skb_reset_transport_header(skb);
  851. skb_push(skb, sizeof(struct mctp_hdr));
  852. skb_reset_network_header(skb);
  853. skb->dev = rt->dev->dev;
  854. /* cb->net will have been set on initial ingress */
  855. cb->src = saddr;
  856. /* set up common header fields */
  857. hdr = mctp_hdr(skb);
  858. hdr->ver = 1;
  859. hdr->dest = daddr;
  860. hdr->src = saddr;
  861. mtu = mctp_route_mtu(rt);
  862. if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
  863. hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
  864. MCTP_HDR_FLAG_EOM | tag;
  865. rc = rt->output(rt, skb);
  866. } else {
  867. rc = mctp_do_fragment_route(rt, skb, mtu, tag);
  868. }
  869. /* route output functions consume the skb, even on error */
  870. skb = NULL;
  871. out_release:
  872. if (!ext_rt)
  873. mctp_route_release(rt);
  874. mctp_dev_put(tmp_rt.dev);
  875. out_free:
  876. kfree_skb(skb);
  877. return rc;
  878. }
  879. /* route management */
  880. static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
  881. unsigned int daddr_extent, unsigned int mtu,
  882. unsigned char type)
  883. {
  884. int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
  885. struct net *net = dev_net(mdev->dev);
  886. struct mctp_route *rt, *ert;
  887. if (!mctp_address_unicast(daddr_start))
  888. return -EINVAL;
  889. if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
  890. return -EINVAL;
  891. switch (type) {
  892. case RTN_LOCAL:
  893. rtfn = mctp_route_input;
  894. break;
  895. case RTN_UNICAST:
  896. rtfn = mctp_route_output;
  897. break;
  898. default:
  899. return -EINVAL;
  900. }
  901. rt = mctp_route_alloc();
  902. if (!rt)
  903. return -ENOMEM;
  904. rt->min = daddr_start;
  905. rt->max = daddr_start + daddr_extent;
  906. rt->mtu = mtu;
  907. rt->dev = mdev;
  908. mctp_dev_hold(rt->dev);
  909. rt->type = type;
  910. rt->output = rtfn;
  911. ASSERT_RTNL();
  912. /* Prevent duplicate identical routes. */
  913. list_for_each_entry(ert, &net->mctp.routes, list) {
  914. if (mctp_rt_compare_exact(rt, ert)) {
  915. mctp_route_release(rt);
  916. return -EEXIST;
  917. }
  918. }
  919. list_add_rcu(&rt->list, &net->mctp.routes);
  920. return 0;
  921. }
  922. static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
  923. unsigned int daddr_extent, unsigned char type)
  924. {
  925. struct net *net = dev_net(mdev->dev);
  926. struct mctp_route *rt, *tmp;
  927. mctp_eid_t daddr_end;
  928. bool dropped;
  929. if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
  930. return -EINVAL;
  931. daddr_end = daddr_start + daddr_extent;
  932. dropped = false;
  933. ASSERT_RTNL();
  934. list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
  935. if (rt->dev == mdev &&
  936. rt->min == daddr_start && rt->max == daddr_end &&
  937. rt->type == type) {
  938. list_del_rcu(&rt->list);
  939. /* TODO: immediate RTM_DELROUTE */
  940. mctp_route_release(rt);
  941. dropped = true;
  942. }
  943. }
  944. return dropped ? 0 : -ENOENT;
  945. }
  946. int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
  947. {
  948. return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
  949. }
  950. int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
  951. {
  952. return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
  953. }
  954. /* removes all entries for a given device */
  955. void mctp_route_remove_dev(struct mctp_dev *mdev)
  956. {
  957. struct net *net = dev_net(mdev->dev);
  958. struct mctp_route *rt, *tmp;
  959. ASSERT_RTNL();
  960. list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
  961. if (rt->dev == mdev) {
  962. list_del_rcu(&rt->list);
  963. /* TODO: immediate RTM_DELROUTE */
  964. mctp_route_release(rt);
  965. }
  966. }
  967. }
  968. /* Incoming packet-handling */
  969. static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
  970. struct packet_type *pt,
  971. struct net_device *orig_dev)
  972. {
  973. struct net *net = dev_net(dev);
  974. struct mctp_dev *mdev;
  975. struct mctp_skb_cb *cb;
  976. struct mctp_route *rt;
  977. struct mctp_hdr *mh;
  978. rcu_read_lock();
  979. mdev = __mctp_dev_get(dev);
  980. rcu_read_unlock();
  981. if (!mdev) {
  982. /* basic non-data sanity checks */
  983. goto err_drop;
  984. }
  985. if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
  986. goto err_drop;
  987. skb_reset_transport_header(skb);
  988. skb_reset_network_header(skb);
  989. /* We have enough for a header; decode and route */
  990. mh = mctp_hdr(skb);
  991. if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
  992. goto err_drop;
  993. /* source must be valid unicast or null; drop reserved ranges and
  994. * broadcast
  995. */
  996. if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
  997. goto err_drop;
  998. /* dest address: as above, but allow broadcast */
  999. if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
  1000. mctp_address_broadcast(mh->dest)))
  1001. goto err_drop;
  1002. /* MCTP drivers must populate halen/haddr */
  1003. if (dev->type == ARPHRD_MCTP) {
  1004. cb = mctp_cb(skb);
  1005. } else {
  1006. cb = __mctp_cb(skb);
  1007. cb->halen = 0;
  1008. }
  1009. cb->net = READ_ONCE(mdev->net);
  1010. cb->ifindex = dev->ifindex;
  1011. rt = mctp_route_lookup(net, cb->net, mh->dest);
  1012. /* NULL EID, but addressed to our physical address */
  1013. if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
  1014. rt = mctp_route_lookup_null(net, dev);
  1015. if (!rt)
  1016. goto err_drop;
  1017. rt->output(rt, skb);
  1018. mctp_route_release(rt);
  1019. mctp_dev_put(mdev);
  1020. return NET_RX_SUCCESS;
  1021. err_drop:
  1022. kfree_skb(skb);
  1023. mctp_dev_put(mdev);
  1024. return NET_RX_DROP;
  1025. }
  1026. static struct packet_type mctp_packet_type = {
  1027. .type = cpu_to_be16(ETH_P_MCTP),
  1028. .func = mctp_pkttype_receive,
  1029. };
  1030. /* netlink interface */
  1031. static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
  1032. [RTA_DST] = { .type = NLA_U8 },
  1033. [RTA_METRICS] = { .type = NLA_NESTED },
  1034. [RTA_OIF] = { .type = NLA_U32 },
  1035. };
  1036. /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
  1037. * tb must hold RTA_MAX+1 elements.
  1038. */
  1039. static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
  1040. struct netlink_ext_ack *extack,
  1041. struct nlattr **tb, struct rtmsg **rtm,
  1042. struct mctp_dev **mdev, mctp_eid_t *daddr_start)
  1043. {
  1044. struct net *net = sock_net(skb->sk);
  1045. struct net_device *dev;
  1046. unsigned int ifindex;
  1047. int rc;
  1048. rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
  1049. rta_mctp_policy, extack);
  1050. if (rc < 0) {
  1051. NL_SET_ERR_MSG(extack, "incorrect format");
  1052. return rc;
  1053. }
  1054. if (!tb[RTA_DST]) {
  1055. NL_SET_ERR_MSG(extack, "dst EID missing");
  1056. return -EINVAL;
  1057. }
  1058. *daddr_start = nla_get_u8(tb[RTA_DST]);
  1059. if (!tb[RTA_OIF]) {
  1060. NL_SET_ERR_MSG(extack, "ifindex missing");
  1061. return -EINVAL;
  1062. }
  1063. ifindex = nla_get_u32(tb[RTA_OIF]);
  1064. *rtm = nlmsg_data(nlh);
  1065. if ((*rtm)->rtm_family != AF_MCTP) {
  1066. NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
  1067. return -EINVAL;
  1068. }
  1069. dev = __dev_get_by_index(net, ifindex);
  1070. if (!dev) {
  1071. NL_SET_ERR_MSG(extack, "bad ifindex");
  1072. return -ENODEV;
  1073. }
  1074. *mdev = mctp_dev_get_rtnl(dev);
  1075. if (!*mdev)
  1076. return -ENODEV;
  1077. if (dev->flags & IFF_LOOPBACK) {
  1078. NL_SET_ERR_MSG(extack, "no routes to loopback");
  1079. return -EINVAL;
  1080. }
  1081. return 0;
  1082. }
  1083. static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
  1084. [RTAX_MTU] = { .type = NLA_U32 },
  1085. };
  1086. static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  1087. struct netlink_ext_ack *extack)
  1088. {
  1089. struct nlattr *tb[RTA_MAX + 1];
  1090. struct nlattr *tbx[RTAX_MAX + 1];
  1091. mctp_eid_t daddr_start;
  1092. struct mctp_dev *mdev;
  1093. struct rtmsg *rtm;
  1094. unsigned int mtu;
  1095. int rc;
  1096. rc = mctp_route_nlparse(skb, nlh, extack, tb,
  1097. &rtm, &mdev, &daddr_start);
  1098. if (rc < 0)
  1099. return rc;
  1100. if (rtm->rtm_type != RTN_UNICAST) {
  1101. NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
  1102. return -EINVAL;
  1103. }
  1104. mtu = 0;
  1105. if (tb[RTA_METRICS]) {
  1106. rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
  1107. rta_metrics_policy, NULL);
  1108. if (rc < 0)
  1109. return rc;
  1110. if (tbx[RTAX_MTU])
  1111. mtu = nla_get_u32(tbx[RTAX_MTU]);
  1112. }
  1113. rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
  1114. rtm->rtm_type);
  1115. return rc;
  1116. }
  1117. static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  1118. struct netlink_ext_ack *extack)
  1119. {
  1120. struct nlattr *tb[RTA_MAX + 1];
  1121. mctp_eid_t daddr_start;
  1122. struct mctp_dev *mdev;
  1123. struct rtmsg *rtm;
  1124. int rc;
  1125. rc = mctp_route_nlparse(skb, nlh, extack, tb,
  1126. &rtm, &mdev, &daddr_start);
  1127. if (rc < 0)
  1128. return rc;
  1129. /* we only have unicast routes */
  1130. if (rtm->rtm_type != RTN_UNICAST)
  1131. return -EINVAL;
  1132. rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
  1133. return rc;
  1134. }
  1135. static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
  1136. u32 portid, u32 seq, int event, unsigned int flags)
  1137. {
  1138. struct nlmsghdr *nlh;
  1139. struct rtmsg *hdr;
  1140. void *metrics;
  1141. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
  1142. if (!nlh)
  1143. return -EMSGSIZE;
  1144. hdr = nlmsg_data(nlh);
  1145. hdr->rtm_family = AF_MCTP;
  1146. /* we use the _len fields as a number of EIDs, rather than
  1147. * a number of bits in the address
  1148. */
  1149. hdr->rtm_dst_len = rt->max - rt->min;
  1150. hdr->rtm_src_len = 0;
  1151. hdr->rtm_tos = 0;
  1152. hdr->rtm_table = RT_TABLE_DEFAULT;
  1153. hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
  1154. hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
  1155. hdr->rtm_type = rt->type;
  1156. if (nla_put_u8(skb, RTA_DST, rt->min))
  1157. goto cancel;
  1158. metrics = nla_nest_start_noflag(skb, RTA_METRICS);
  1159. if (!metrics)
  1160. goto cancel;
  1161. if (rt->mtu) {
  1162. if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
  1163. goto cancel;
  1164. }
  1165. nla_nest_end(skb, metrics);
  1166. if (rt->dev) {
  1167. if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
  1168. goto cancel;
  1169. }
  1170. /* TODO: conditional neighbour physaddr? */
  1171. nlmsg_end(skb, nlh);
  1172. return 0;
  1173. cancel:
  1174. nlmsg_cancel(skb, nlh);
  1175. return -EMSGSIZE;
  1176. }
  1177. static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
  1178. {
  1179. struct net *net = sock_net(skb->sk);
  1180. struct mctp_route *rt;
  1181. int s_idx, idx;
  1182. /* TODO: allow filtering on route data, possibly under
  1183. * cb->strict_check
  1184. */
  1185. /* TODO: change to struct overlay */
  1186. s_idx = cb->args[0];
  1187. idx = 0;
  1188. rcu_read_lock();
  1189. list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
  1190. if (idx++ < s_idx)
  1191. continue;
  1192. if (mctp_fill_rtinfo(skb, rt,
  1193. NETLINK_CB(cb->skb).portid,
  1194. cb->nlh->nlmsg_seq,
  1195. RTM_NEWROUTE, NLM_F_MULTI) < 0)
  1196. break;
  1197. }
  1198. rcu_read_unlock();
  1199. cb->args[0] = idx;
  1200. return skb->len;
  1201. }
  1202. /* net namespace implementation */
  1203. static int __net_init mctp_routes_net_init(struct net *net)
  1204. {
  1205. struct netns_mctp *ns = &net->mctp;
  1206. INIT_LIST_HEAD(&ns->routes);
  1207. INIT_HLIST_HEAD(&ns->binds);
  1208. mutex_init(&ns->bind_lock);
  1209. INIT_HLIST_HEAD(&ns->keys);
  1210. spin_lock_init(&ns->keys_lock);
  1211. WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
  1212. return 0;
  1213. }
  1214. static void __net_exit mctp_routes_net_exit(struct net *net)
  1215. {
  1216. struct mctp_route *rt;
  1217. rcu_read_lock();
  1218. list_for_each_entry_rcu(rt, &net->mctp.routes, list)
  1219. mctp_route_release(rt);
  1220. rcu_read_unlock();
  1221. }
  1222. static struct pernet_operations mctp_net_ops = {
  1223. .init = mctp_routes_net_init,
  1224. .exit = mctp_routes_net_exit,
  1225. };
  1226. static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
  1227. {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
  1228. {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
  1229. {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
  1230. };
  1231. int __init mctp_routes_init(void)
  1232. {
  1233. int err;
  1234. dev_add_pack(&mctp_packet_type);
  1235. err = register_pernet_subsys(&mctp_net_ops);
  1236. if (err)
  1237. goto err_pernet;
  1238. err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
  1239. if (err)
  1240. goto err_rtnl;
  1241. return 0;
  1242. err_rtnl:
  1243. unregister_pernet_subsys(&mctp_net_ops);
  1244. err_pernet:
  1245. dev_remove_pack(&mctp_packet_type);
  1246. return err;
  1247. }
  1248. void mctp_routes_exit(void)
  1249. {
  1250. rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
  1251. unregister_pernet_subsys(&mctp_net_ops);
  1252. dev_remove_pack(&mctp_packet_type);
  1253. }
  1254. #if IS_ENABLED(CONFIG_MCTP_TEST)
  1255. #include "test/route-test.c"
  1256. #endif