inet_diag.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * inet_diag.c Module for monitoring INET transport protocols sockets.
  4. *
  5. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/types.h>
  10. #include <linux/fcntl.h>
  11. #include <linux/random.h>
  12. #include <linux/slab.h>
  13. #include <linux/cache.h>
  14. #include <linux/init.h>
  15. #include <linux/time.h>
  16. #include <net/icmp.h>
  17. #include <net/tcp.h>
  18. #include <net/ipv6.h>
  19. #include <net/inet_common.h>
  20. #include <net/inet_connection_sock.h>
  21. #include <net/inet_hashtables.h>
  22. #include <net/inet_timewait_sock.h>
  23. #include <net/inet6_hashtables.h>
  24. #include <net/bpf_sk_storage.h>
  25. #include <net/netlink.h>
  26. #include <linux/inet.h>
  27. #include <linux/stddef.h>
  28. #include <linux/inet_diag.h>
  29. #include <linux/sock_diag.h>
  30. static const struct inet_diag_handler __rcu **inet_diag_table;
  31. struct inet_diag_entry {
  32. const __be32 *saddr;
  33. const __be32 *daddr;
  34. u16 sport;
  35. u16 dport;
  36. u16 family;
  37. u16 userlocks;
  38. u32 ifindex;
  39. u32 mark;
  40. #ifdef CONFIG_SOCK_CGROUP_DATA
  41. u64 cgroup_id;
  42. #endif
  43. };
  44. static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
  45. {
  46. const struct inet_diag_handler *handler;
  47. if (proto < 0 || proto >= IPPROTO_MAX)
  48. return NULL;
  49. if (!READ_ONCE(inet_diag_table[proto]))
  50. sock_load_diag_module(AF_INET, proto);
  51. rcu_read_lock();
  52. handler = rcu_dereference(inet_diag_table[proto]);
  53. if (handler && !try_module_get(handler->owner))
  54. handler = NULL;
  55. rcu_read_unlock();
  56. return handler;
  57. }
  58. static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
  59. {
  60. module_put(handler->owner);
  61. }
  62. void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
  63. {
  64. r->idiag_family = sk->sk_family;
  65. r->id.idiag_sport = htons(sk->sk_num);
  66. r->id.idiag_dport = sk->sk_dport;
  67. r->id.idiag_if = sk->sk_bound_dev_if;
  68. sock_diag_save_cookie(sk, r->id.idiag_cookie);
  69. #if IS_ENABLED(CONFIG_IPV6)
  70. if (sk->sk_family == AF_INET6) {
  71. *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
  72. *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
  73. } else
  74. #endif
  75. {
  76. memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
  77. memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
  78. r->id.idiag_src[0] = sk->sk_rcv_saddr;
  79. r->id.idiag_dst[0] = sk->sk_daddr;
  80. }
  81. }
  82. EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
  83. static size_t inet_sk_attr_size(struct sock *sk,
  84. const struct inet_diag_req_v2 *req,
  85. bool net_admin)
  86. {
  87. const struct inet_diag_handler *handler;
  88. size_t aux = 0;
  89. rcu_read_lock();
  90. handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]);
  91. DEBUG_NET_WARN_ON_ONCE(!handler);
  92. if (handler && handler->idiag_get_aux_size)
  93. aux = handler->idiag_get_aux_size(sk, net_admin);
  94. rcu_read_unlock();
  95. return nla_total_size(sizeof(struct tcp_info))
  96. + nla_total_size(sizeof(struct inet_diag_msg))
  97. + inet_diag_msg_attrs_size()
  98. + nla_total_size(sizeof(struct inet_diag_meminfo))
  99. + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
  100. + nla_total_size(TCP_CA_NAME_MAX)
  101. + nla_total_size(sizeof(struct tcpvegas_info))
  102. + aux
  103. + 64;
  104. }
  105. int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
  106. struct inet_diag_msg *r, int ext,
  107. struct user_namespace *user_ns,
  108. bool net_admin)
  109. {
  110. const struct inet_sock *inet = inet_sk(sk);
  111. struct inet_diag_sockopt inet_sockopt;
  112. if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
  113. goto errout;
  114. /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
  115. * hence this needs to be included regardless of socket family.
  116. */
  117. if (ext & (1 << (INET_DIAG_TOS - 1)))
  118. if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0)
  119. goto errout;
  120. #if IS_ENABLED(CONFIG_IPV6)
  121. if (r->idiag_family == AF_INET6) {
  122. if (ext & (1 << (INET_DIAG_TCLASS - 1)))
  123. if (nla_put_u8(skb, INET_DIAG_TCLASS,
  124. inet6_sk(sk)->tclass) < 0)
  125. goto errout;
  126. if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
  127. nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
  128. goto errout;
  129. }
  130. #endif
  131. if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
  132. goto errout;
  133. if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
  134. ext & (1 << (INET_DIAG_TCLASS - 1))) {
  135. u32 classid = 0;
  136. #ifdef CONFIG_SOCK_CGROUP_DATA
  137. classid = sock_cgroup_classid(&sk->sk_cgrp_data);
  138. #endif
  139. /* Fallback to socket priority if class id isn't set.
  140. * Classful qdiscs use it as direct reference to class.
  141. * For cgroup2 classid is always zero.
  142. */
  143. if (!classid)
  144. classid = READ_ONCE(sk->sk_priority);
  145. if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
  146. goto errout;
  147. }
  148. #ifdef CONFIG_SOCK_CGROUP_DATA
  149. if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID,
  150. cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)),
  151. INET_DIAG_PAD))
  152. goto errout;
  153. #endif
  154. r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
  155. r->idiag_inode = sock_i_ino(sk);
  156. memset(&inet_sockopt, 0, sizeof(inet_sockopt));
  157. inet_sockopt.recverr = inet_test_bit(RECVERR, sk);
  158. inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk);
  159. inet_sockopt.freebind = inet_test_bit(FREEBIND, sk);
  160. inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk);
  161. inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk);
  162. inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk);
  163. inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk);
  164. inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk);
  165. inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
  166. inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk);
  167. inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk);
  168. if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
  169. &inet_sockopt))
  170. goto errout;
  171. return 0;
  172. errout:
  173. return 1;
  174. }
  175. EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
  176. static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
  177. struct nlattr **req_nlas)
  178. {
  179. struct nlattr *nla;
  180. int remaining;
  181. nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) {
  182. int type = nla_type(nla);
  183. if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32))
  184. return -EINVAL;
  185. if (type < __INET_DIAG_REQ_MAX)
  186. req_nlas[type] = nla;
  187. }
  188. return 0;
  189. }
  190. static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req,
  191. const struct inet_diag_dump_data *data)
  192. {
  193. if (data->req_nlas[INET_DIAG_REQ_PROTOCOL])
  194. return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]);
  195. return req->sdiag_protocol;
  196. }
  197. #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  198. int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
  199. struct sk_buff *skb, struct netlink_callback *cb,
  200. const struct inet_diag_req_v2 *req,
  201. u16 nlmsg_flags, bool net_admin)
  202. {
  203. const struct tcp_congestion_ops *ca_ops;
  204. const struct inet_diag_handler *handler;
  205. struct inet_diag_dump_data *cb_data;
  206. int ext = req->idiag_ext;
  207. struct inet_diag_msg *r;
  208. struct nlmsghdr *nlh;
  209. struct nlattr *attr;
  210. void *info = NULL;
  211. int protocol;
  212. cb_data = cb->data;
  213. protocol = inet_diag_get_protocol(req, cb_data);
  214. /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */
  215. handler = rcu_dereference_protected(inet_diag_table[protocol], 1);
  216. DEBUG_NET_WARN_ON_ONCE(!handler);
  217. if (!handler)
  218. return -ENXIO;
  219. nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  220. cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
  221. if (!nlh)
  222. return -EMSGSIZE;
  223. r = nlmsg_data(nlh);
  224. BUG_ON(!sk_fullsock(sk));
  225. inet_diag_msg_common_fill(r, sk);
  226. r->idiag_state = sk->sk_state;
  227. r->idiag_timer = 0;
  228. r->idiag_retrans = 0;
  229. r->idiag_expires = 0;
  230. if (inet_diag_msg_attrs_fill(sk, skb, r, ext,
  231. sk_user_ns(NETLINK_CB(cb->skb).sk),
  232. net_admin))
  233. goto errout;
  234. if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
  235. struct inet_diag_meminfo minfo = {
  236. .idiag_rmem = sk_rmem_alloc_get(sk),
  237. .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
  238. .idiag_fmem = sk_forward_alloc_get(sk),
  239. .idiag_tmem = sk_wmem_alloc_get(sk),
  240. };
  241. if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
  242. goto errout;
  243. }
  244. if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
  245. if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
  246. goto errout;
  247. /*
  248. * RAW sockets might have user-defined protocols assigned,
  249. * so report the one supplied on socket creation.
  250. */
  251. if (sk->sk_type == SOCK_RAW) {
  252. if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
  253. goto errout;
  254. }
  255. if (!icsk) {
  256. handler->idiag_get_info(sk, r, NULL);
  257. goto out;
  258. }
  259. if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
  260. icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
  261. icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
  262. r->idiag_timer = 1;
  263. r->idiag_retrans = icsk->icsk_retransmits;
  264. r->idiag_expires =
  265. jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
  266. } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
  267. r->idiag_timer = 4;
  268. r->idiag_retrans = icsk->icsk_probes_out;
  269. r->idiag_expires =
  270. jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
  271. } else if (timer_pending(&sk->sk_timer)) {
  272. r->idiag_timer = 2;
  273. r->idiag_retrans = icsk->icsk_probes_out;
  274. r->idiag_expires =
  275. jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
  276. }
  277. if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
  278. attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
  279. handler->idiag_info_size,
  280. INET_DIAG_PAD);
  281. if (!attr)
  282. goto errout;
  283. info = nla_data(attr);
  284. }
  285. if (ext & (1 << (INET_DIAG_CONG - 1))) {
  286. int err = 0;
  287. rcu_read_lock();
  288. ca_ops = READ_ONCE(icsk->icsk_ca_ops);
  289. if (ca_ops)
  290. err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
  291. rcu_read_unlock();
  292. if (err < 0)
  293. goto errout;
  294. }
  295. handler->idiag_get_info(sk, r, info);
  296. if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
  297. if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
  298. goto errout;
  299. if (sk->sk_state < TCP_TIME_WAIT) {
  300. union tcp_cc_info info;
  301. size_t sz = 0;
  302. int attr;
  303. rcu_read_lock();
  304. ca_ops = READ_ONCE(icsk->icsk_ca_ops);
  305. if (ca_ops && ca_ops->get_info)
  306. sz = ca_ops->get_info(sk, ext, &attr, &info);
  307. rcu_read_unlock();
  308. if (sz && nla_put(skb, attr, sz, &info) < 0)
  309. goto errout;
  310. }
  311. /* Keep it at the end for potential retry with a larger skb,
  312. * or else do best-effort fitting, which is only done for the
  313. * first_nlmsg.
  314. */
  315. if (cb_data->bpf_stg_diag) {
  316. bool first_nlmsg = ((unsigned char *)nlh == skb->data);
  317. unsigned int prev_min_dump_alloc;
  318. unsigned int total_nla_size = 0;
  319. unsigned int msg_len;
  320. int err;
  321. msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
  322. err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb,
  323. INET_DIAG_SK_BPF_STORAGES,
  324. &total_nla_size);
  325. if (!err)
  326. goto out;
  327. total_nla_size += msg_len;
  328. prev_min_dump_alloc = cb->min_dump_alloc;
  329. if (total_nla_size > prev_min_dump_alloc)
  330. cb->min_dump_alloc = min_t(u32, total_nla_size,
  331. MAX_DUMP_ALLOC_SIZE);
  332. if (!first_nlmsg)
  333. goto errout;
  334. if (cb->min_dump_alloc > prev_min_dump_alloc)
  335. /* Retry with pskb_expand_head() with
  336. * __GFP_DIRECT_RECLAIM
  337. */
  338. goto errout;
  339. WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc);
  340. /* Send what we have for this sk
  341. * and move on to the next sk in the following
  342. * dump()
  343. */
  344. }
  345. out:
  346. nlmsg_end(skb, nlh);
  347. return 0;
  348. errout:
  349. nlmsg_cancel(skb, nlh);
  350. return -EMSGSIZE;
  351. }
  352. EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
  353. static int inet_twsk_diag_fill(struct sock *sk,
  354. struct sk_buff *skb,
  355. struct netlink_callback *cb,
  356. u16 nlmsg_flags, bool net_admin)
  357. {
  358. struct inet_timewait_sock *tw = inet_twsk(sk);
  359. struct inet_diag_msg *r;
  360. struct nlmsghdr *nlh;
  361. long tmo;
  362. nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
  363. cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
  364. sizeof(*r), nlmsg_flags);
  365. if (!nlh)
  366. return -EMSGSIZE;
  367. r = nlmsg_data(nlh);
  368. BUG_ON(tw->tw_state != TCP_TIME_WAIT);
  369. inet_diag_msg_common_fill(r, sk);
  370. r->idiag_retrans = 0;
  371. r->idiag_state = READ_ONCE(tw->tw_substate);
  372. r->idiag_timer = 3;
  373. tmo = tw->tw_timer.expires - jiffies;
  374. r->idiag_expires = jiffies_delta_to_msecs(tmo);
  375. r->idiag_rqueue = 0;
  376. r->idiag_wqueue = 0;
  377. r->idiag_uid = 0;
  378. r->idiag_inode = 0;
  379. if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
  380. tw->tw_mark)) {
  381. nlmsg_cancel(skb, nlh);
  382. return -EMSGSIZE;
  383. }
  384. nlmsg_end(skb, nlh);
  385. return 0;
  386. }
  387. static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
  388. struct netlink_callback *cb,
  389. u16 nlmsg_flags, bool net_admin)
  390. {
  391. struct request_sock *reqsk = inet_reqsk(sk);
  392. struct inet_diag_msg *r;
  393. struct nlmsghdr *nlh;
  394. long tmo;
  395. nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  396. cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
  397. if (!nlh)
  398. return -EMSGSIZE;
  399. r = nlmsg_data(nlh);
  400. inet_diag_msg_common_fill(r, sk);
  401. r->idiag_state = TCP_SYN_RECV;
  402. r->idiag_timer = 1;
  403. r->idiag_retrans = reqsk->num_retrans;
  404. BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
  405. offsetof(struct sock, sk_cookie));
  406. tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
  407. r->idiag_expires = jiffies_delta_to_msecs(tmo);
  408. r->idiag_rqueue = 0;
  409. r->idiag_wqueue = 0;
  410. r->idiag_uid = 0;
  411. r->idiag_inode = 0;
  412. if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
  413. inet_rsk(reqsk)->ir_mark)) {
  414. nlmsg_cancel(skb, nlh);
  415. return -EMSGSIZE;
  416. }
  417. nlmsg_end(skb, nlh);
  418. return 0;
  419. }
  420. static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
  421. struct netlink_callback *cb,
  422. const struct inet_diag_req_v2 *r,
  423. u16 nlmsg_flags, bool net_admin)
  424. {
  425. if (sk->sk_state == TCP_TIME_WAIT)
  426. return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
  427. if (sk->sk_state == TCP_NEW_SYN_RECV)
  428. return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
  429. return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
  430. net_admin);
  431. }
  432. struct sock *inet_diag_find_one_icsk(struct net *net,
  433. struct inet_hashinfo *hashinfo,
  434. const struct inet_diag_req_v2 *req)
  435. {
  436. struct sock *sk;
  437. rcu_read_lock();
  438. if (req->sdiag_family == AF_INET)
  439. sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
  440. req->id.idiag_dport, req->id.idiag_src[0],
  441. req->id.idiag_sport, req->id.idiag_if);
  442. #if IS_ENABLED(CONFIG_IPV6)
  443. else if (req->sdiag_family == AF_INET6) {
  444. if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
  445. ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
  446. sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
  447. req->id.idiag_dport, req->id.idiag_src[3],
  448. req->id.idiag_sport, req->id.idiag_if);
  449. else
  450. sk = inet6_lookup(net, hashinfo, NULL, 0,
  451. (struct in6_addr *)req->id.idiag_dst,
  452. req->id.idiag_dport,
  453. (struct in6_addr *)req->id.idiag_src,
  454. req->id.idiag_sport,
  455. req->id.idiag_if);
  456. }
  457. #endif
  458. else {
  459. rcu_read_unlock();
  460. return ERR_PTR(-EINVAL);
  461. }
  462. rcu_read_unlock();
  463. if (!sk)
  464. return ERR_PTR(-ENOENT);
  465. if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
  466. sock_gen_put(sk);
  467. return ERR_PTR(-ENOENT);
  468. }
  469. return sk;
  470. }
  471. EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
  472. int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
  473. struct netlink_callback *cb,
  474. const struct inet_diag_req_v2 *req)
  475. {
  476. struct sk_buff *in_skb = cb->skb;
  477. bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
  478. struct net *net = sock_net(in_skb->sk);
  479. struct sk_buff *rep;
  480. struct sock *sk;
  481. int err;
  482. sk = inet_diag_find_one_icsk(net, hashinfo, req);
  483. if (IS_ERR(sk))
  484. return PTR_ERR(sk);
  485. rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
  486. if (!rep) {
  487. err = -ENOMEM;
  488. goto out;
  489. }
  490. err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
  491. if (err < 0) {
  492. WARN_ON(err == -EMSGSIZE);
  493. nlmsg_free(rep);
  494. goto out;
  495. }
  496. err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
  497. out:
  498. if (sk)
  499. sock_gen_put(sk);
  500. return err;
  501. }
  502. EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
  503. static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
  504. const struct nlmsghdr *nlh,
  505. int hdrlen,
  506. const struct inet_diag_req_v2 *req)
  507. {
  508. const struct inet_diag_handler *handler;
  509. struct inet_diag_dump_data dump_data;
  510. int err, protocol;
  511. memset(&dump_data, 0, sizeof(dump_data));
  512. err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas);
  513. if (err)
  514. return err;
  515. protocol = inet_diag_get_protocol(req, &dump_data);
  516. handler = inet_diag_lock_handler(protocol);
  517. if (!handler)
  518. return -ENOENT;
  519. if (cmd == SOCK_DIAG_BY_FAMILY) {
  520. struct netlink_callback cb = {
  521. .nlh = nlh,
  522. .skb = in_skb,
  523. .data = &dump_data,
  524. };
  525. err = handler->dump_one(&cb, req);
  526. } else if (cmd == SOCK_DESTROY && handler->destroy) {
  527. err = handler->destroy(in_skb, req);
  528. } else {
  529. err = -EOPNOTSUPP;
  530. }
  531. inet_diag_unlock_handler(handler);
  532. return err;
  533. }
  534. static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
  535. {
  536. int words = bits >> 5;
  537. bits &= 0x1f;
  538. if (words) {
  539. if (memcmp(a1, a2, words << 2))
  540. return 0;
  541. }
  542. if (bits) {
  543. __be32 w1, w2;
  544. __be32 mask;
  545. w1 = a1[words];
  546. w2 = a2[words];
  547. mask = htonl((0xffffffff) << (32 - bits));
  548. if ((w1 ^ w2) & mask)
  549. return 0;
  550. }
  551. return 1;
  552. }
  553. static int inet_diag_bc_run(const struct nlattr *_bc,
  554. const struct inet_diag_entry *entry)
  555. {
  556. const void *bc = nla_data(_bc);
  557. int len = nla_len(_bc);
  558. while (len > 0) {
  559. int yes = 1;
  560. const struct inet_diag_bc_op *op = bc;
  561. switch (op->code) {
  562. case INET_DIAG_BC_NOP:
  563. break;
  564. case INET_DIAG_BC_JMP:
  565. yes = 0;
  566. break;
  567. case INET_DIAG_BC_S_EQ:
  568. yes = entry->sport == op[1].no;
  569. break;
  570. case INET_DIAG_BC_S_GE:
  571. yes = entry->sport >= op[1].no;
  572. break;
  573. case INET_DIAG_BC_S_LE:
  574. yes = entry->sport <= op[1].no;
  575. break;
  576. case INET_DIAG_BC_D_EQ:
  577. yes = entry->dport == op[1].no;
  578. break;
  579. case INET_DIAG_BC_D_GE:
  580. yes = entry->dport >= op[1].no;
  581. break;
  582. case INET_DIAG_BC_D_LE:
  583. yes = entry->dport <= op[1].no;
  584. break;
  585. case INET_DIAG_BC_AUTO:
  586. yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
  587. break;
  588. case INET_DIAG_BC_S_COND:
  589. case INET_DIAG_BC_D_COND: {
  590. const struct inet_diag_hostcond *cond;
  591. const __be32 *addr;
  592. cond = (const struct inet_diag_hostcond *)(op + 1);
  593. if (cond->port != -1 &&
  594. cond->port != (op->code == INET_DIAG_BC_S_COND ?
  595. entry->sport : entry->dport)) {
  596. yes = 0;
  597. break;
  598. }
  599. if (op->code == INET_DIAG_BC_S_COND)
  600. addr = entry->saddr;
  601. else
  602. addr = entry->daddr;
  603. if (cond->family != AF_UNSPEC &&
  604. cond->family != entry->family) {
  605. if (entry->family == AF_INET6 &&
  606. cond->family == AF_INET) {
  607. if (addr[0] == 0 && addr[1] == 0 &&
  608. addr[2] == htonl(0xffff) &&
  609. bitstring_match(addr + 3,
  610. cond->addr,
  611. cond->prefix_len))
  612. break;
  613. }
  614. yes = 0;
  615. break;
  616. }
  617. if (cond->prefix_len == 0)
  618. break;
  619. if (bitstring_match(addr, cond->addr,
  620. cond->prefix_len))
  621. break;
  622. yes = 0;
  623. break;
  624. }
  625. case INET_DIAG_BC_DEV_COND: {
  626. u32 ifindex;
  627. ifindex = *((const u32 *)(op + 1));
  628. if (ifindex != entry->ifindex)
  629. yes = 0;
  630. break;
  631. }
  632. case INET_DIAG_BC_MARK_COND: {
  633. struct inet_diag_markcond *cond;
  634. cond = (struct inet_diag_markcond *)(op + 1);
  635. if ((entry->mark & cond->mask) != cond->mark)
  636. yes = 0;
  637. break;
  638. }
  639. #ifdef CONFIG_SOCK_CGROUP_DATA
  640. case INET_DIAG_BC_CGROUP_COND: {
  641. u64 cgroup_id;
  642. cgroup_id = get_unaligned((const u64 *)(op + 1));
  643. if (cgroup_id != entry->cgroup_id)
  644. yes = 0;
  645. break;
  646. }
  647. #endif
  648. }
  649. if (yes) {
  650. len -= op->yes;
  651. bc += op->yes;
  652. } else {
  653. len -= op->no;
  654. bc += op->no;
  655. }
  656. }
  657. return len == 0;
  658. }
  659. /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
  660. */
  661. static void entry_fill_addrs(struct inet_diag_entry *entry,
  662. const struct sock *sk)
  663. {
  664. #if IS_ENABLED(CONFIG_IPV6)
  665. if (sk->sk_family == AF_INET6) {
  666. entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
  667. entry->daddr = sk->sk_v6_daddr.s6_addr32;
  668. } else
  669. #endif
  670. {
  671. entry->saddr = &sk->sk_rcv_saddr;
  672. entry->daddr = &sk->sk_daddr;
  673. }
  674. }
  675. int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
  676. {
  677. struct inet_sock *inet = inet_sk(sk);
  678. struct inet_diag_entry entry;
  679. if (!bc)
  680. return 1;
  681. entry.family = sk->sk_family;
  682. entry_fill_addrs(&entry, sk);
  683. entry.sport = inet->inet_num;
  684. entry.dport = ntohs(inet->inet_dport);
  685. entry.ifindex = sk->sk_bound_dev_if;
  686. entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
  687. if (sk_fullsock(sk))
  688. entry.mark = READ_ONCE(sk->sk_mark);
  689. else if (sk->sk_state == TCP_NEW_SYN_RECV)
  690. entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
  691. else if (sk->sk_state == TCP_TIME_WAIT)
  692. entry.mark = inet_twsk(sk)->tw_mark;
  693. else
  694. entry.mark = 0;
  695. #ifdef CONFIG_SOCK_CGROUP_DATA
  696. entry.cgroup_id = sk_fullsock(sk) ?
  697. cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
  698. #endif
  699. return inet_diag_bc_run(bc, &entry);
  700. }
  701. EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
  702. static int valid_cc(const void *bc, int len, int cc)
  703. {
  704. while (len >= 0) {
  705. const struct inet_diag_bc_op *op = bc;
  706. if (cc > len)
  707. return 0;
  708. if (cc == len)
  709. return 1;
  710. if (op->yes < 4 || op->yes & 3)
  711. return 0;
  712. len -= op->yes;
  713. bc += op->yes;
  714. }
  715. return 0;
  716. }
  717. /* data is u32 ifindex */
  718. static bool valid_devcond(const struct inet_diag_bc_op *op, int len,
  719. int *min_len)
  720. {
  721. /* Check ifindex space. */
  722. *min_len += sizeof(u32);
  723. if (len < *min_len)
  724. return false;
  725. return true;
  726. }
  727. /* Validate an inet_diag_hostcond. */
  728. static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
  729. int *min_len)
  730. {
  731. struct inet_diag_hostcond *cond;
  732. int addr_len;
  733. /* Check hostcond space. */
  734. *min_len += sizeof(struct inet_diag_hostcond);
  735. if (len < *min_len)
  736. return false;
  737. cond = (struct inet_diag_hostcond *)(op + 1);
  738. /* Check address family and address length. */
  739. switch (cond->family) {
  740. case AF_UNSPEC:
  741. addr_len = 0;
  742. break;
  743. case AF_INET:
  744. addr_len = sizeof(struct in_addr);
  745. break;
  746. case AF_INET6:
  747. addr_len = sizeof(struct in6_addr);
  748. break;
  749. default:
  750. return false;
  751. }
  752. *min_len += addr_len;
  753. if (len < *min_len)
  754. return false;
  755. /* Check prefix length (in bits) vs address length (in bytes). */
  756. if (cond->prefix_len > 8 * addr_len)
  757. return false;
  758. return true;
  759. }
  760. /* Validate a port comparison operator. */
  761. static bool valid_port_comparison(const struct inet_diag_bc_op *op,
  762. int len, int *min_len)
  763. {
  764. /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
  765. *min_len += sizeof(struct inet_diag_bc_op);
  766. if (len < *min_len)
  767. return false;
  768. return true;
  769. }
  770. static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
  771. int *min_len)
  772. {
  773. *min_len += sizeof(struct inet_diag_markcond);
  774. return len >= *min_len;
  775. }
  776. #ifdef CONFIG_SOCK_CGROUP_DATA
  777. static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
  778. int *min_len)
  779. {
  780. *min_len += sizeof(u64);
  781. return len >= *min_len;
  782. }
  783. #endif
  784. static int inet_diag_bc_audit(const struct nlattr *attr,
  785. const struct sk_buff *skb)
  786. {
  787. bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
  788. const void *bytecode, *bc;
  789. int bytecode_len, len;
  790. if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
  791. return -EINVAL;
  792. bytecode = bc = nla_data(attr);
  793. len = bytecode_len = nla_len(attr);
  794. while (len > 0) {
  795. int min_len = sizeof(struct inet_diag_bc_op);
  796. const struct inet_diag_bc_op *op = bc;
  797. switch (op->code) {
  798. case INET_DIAG_BC_S_COND:
  799. case INET_DIAG_BC_D_COND:
  800. if (!valid_hostcond(bc, len, &min_len))
  801. return -EINVAL;
  802. break;
  803. case INET_DIAG_BC_DEV_COND:
  804. if (!valid_devcond(bc, len, &min_len))
  805. return -EINVAL;
  806. break;
  807. case INET_DIAG_BC_S_EQ:
  808. case INET_DIAG_BC_S_GE:
  809. case INET_DIAG_BC_S_LE:
  810. case INET_DIAG_BC_D_EQ:
  811. case INET_DIAG_BC_D_GE:
  812. case INET_DIAG_BC_D_LE:
  813. if (!valid_port_comparison(bc, len, &min_len))
  814. return -EINVAL;
  815. break;
  816. case INET_DIAG_BC_MARK_COND:
  817. if (!net_admin)
  818. return -EPERM;
  819. if (!valid_markcond(bc, len, &min_len))
  820. return -EINVAL;
  821. break;
  822. #ifdef CONFIG_SOCK_CGROUP_DATA
  823. case INET_DIAG_BC_CGROUP_COND:
  824. if (!valid_cgroupcond(bc, len, &min_len))
  825. return -EINVAL;
  826. break;
  827. #endif
  828. case INET_DIAG_BC_AUTO:
  829. case INET_DIAG_BC_JMP:
  830. case INET_DIAG_BC_NOP:
  831. break;
  832. default:
  833. return -EINVAL;
  834. }
  835. if (op->code != INET_DIAG_BC_NOP) {
  836. if (op->no < min_len || op->no > len + 4 || op->no & 3)
  837. return -EINVAL;
  838. if (op->no < len &&
  839. !valid_cc(bytecode, bytecode_len, len - op->no))
  840. return -EINVAL;
  841. }
  842. if (op->yes < min_len || op->yes > len + 4 || op->yes & 3)
  843. return -EINVAL;
  844. bc += op->yes;
  845. len -= op->yes;
  846. }
  847. return len == 0 ? 0 : -EINVAL;
  848. }
  849. static void twsk_build_assert(void)
  850. {
  851. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
  852. offsetof(struct sock, sk_family));
  853. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
  854. offsetof(struct inet_sock, inet_num));
  855. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
  856. offsetof(struct inet_sock, inet_dport));
  857. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
  858. offsetof(struct inet_sock, inet_rcv_saddr));
  859. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
  860. offsetof(struct inet_sock, inet_daddr));
  861. #if IS_ENABLED(CONFIG_IPV6)
  862. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
  863. offsetof(struct sock, sk_v6_rcv_saddr));
  864. BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
  865. offsetof(struct sock, sk_v6_daddr));
  866. #endif
  867. }
  868. void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
  869. struct netlink_callback *cb,
  870. const struct inet_diag_req_v2 *r)
  871. {
  872. bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
  873. struct inet_diag_dump_data *cb_data = cb->data;
  874. struct net *net = sock_net(skb->sk);
  875. u32 idiag_states = r->idiag_states;
  876. int i, num, s_i, s_num;
  877. struct nlattr *bc;
  878. struct sock *sk;
  879. bc = cb_data->inet_diag_nla_bc;
  880. if (idiag_states & TCPF_SYN_RECV)
  881. idiag_states |= TCPF_NEW_SYN_RECV;
  882. s_i = cb->args[1];
  883. s_num = num = cb->args[2];
  884. if (cb->args[0] == 0) {
  885. if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
  886. goto skip_listen_ht;
  887. for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
  888. struct inet_listen_hashbucket *ilb;
  889. struct hlist_nulls_node *node;
  890. num = 0;
  891. ilb = &hashinfo->lhash2[i];
  892. if (hlist_nulls_empty(&ilb->nulls_head)) {
  893. s_num = 0;
  894. continue;
  895. }
  896. spin_lock(&ilb->lock);
  897. sk_nulls_for_each(sk, node, &ilb->nulls_head) {
  898. struct inet_sock *inet = inet_sk(sk);
  899. if (!net_eq(sock_net(sk), net))
  900. continue;
  901. if (num < s_num) {
  902. num++;
  903. continue;
  904. }
  905. if (r->sdiag_family != AF_UNSPEC &&
  906. sk->sk_family != r->sdiag_family)
  907. goto next_listen;
  908. if (r->id.idiag_sport != inet->inet_sport &&
  909. r->id.idiag_sport)
  910. goto next_listen;
  911. if (!inet_diag_bc_sk(bc, sk))
  912. goto next_listen;
  913. if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
  914. cb, r, NLM_F_MULTI,
  915. net_admin) < 0) {
  916. spin_unlock(&ilb->lock);
  917. goto done;
  918. }
  919. next_listen:
  920. ++num;
  921. }
  922. spin_unlock(&ilb->lock);
  923. s_num = 0;
  924. }
  925. skip_listen_ht:
  926. cb->args[0] = 1;
  927. s_i = num = s_num = 0;
  928. }
  929. /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
  930. * with bh disabled.
  931. */
  932. #define SKARR_SZ 16
  933. /* Dump bound but inactive (not listening, connecting, etc.) sockets */
  934. if (cb->args[0] == 1) {
  935. if (!(idiag_states & TCPF_BOUND_INACTIVE))
  936. goto skip_bind_ht;
  937. for (i = s_i; i < hashinfo->bhash_size; i++) {
  938. struct inet_bind_hashbucket *ibb;
  939. struct inet_bind2_bucket *tb2;
  940. struct sock *sk_arr[SKARR_SZ];
  941. int num_arr[SKARR_SZ];
  942. int idx, accum, res;
  943. resume_bind_walk:
  944. num = 0;
  945. accum = 0;
  946. ibb = &hashinfo->bhash2[i];
  947. if (hlist_empty(&ibb->chain)) {
  948. s_num = 0;
  949. continue;
  950. }
  951. spin_lock_bh(&ibb->lock);
  952. inet_bind_bucket_for_each(tb2, &ibb->chain) {
  953. if (!net_eq(ib2_net(tb2), net))
  954. continue;
  955. sk_for_each_bound(sk, &tb2->owners) {
  956. struct inet_sock *inet = inet_sk(sk);
  957. if (num < s_num)
  958. goto next_bind;
  959. if (sk->sk_state != TCP_CLOSE ||
  960. !inet->inet_num)
  961. goto next_bind;
  962. if (r->sdiag_family != AF_UNSPEC &&
  963. r->sdiag_family != sk->sk_family)
  964. goto next_bind;
  965. if (!inet_diag_bc_sk(bc, sk))
  966. goto next_bind;
  967. sock_hold(sk);
  968. num_arr[accum] = num;
  969. sk_arr[accum] = sk;
  970. if (++accum == SKARR_SZ)
  971. goto pause_bind_walk;
  972. next_bind:
  973. num++;
  974. }
  975. }
  976. pause_bind_walk:
  977. spin_unlock_bh(&ibb->lock);
  978. res = 0;
  979. for (idx = 0; idx < accum; idx++) {
  980. if (res >= 0) {
  981. res = inet_sk_diag_fill(sk_arr[idx],
  982. NULL, skb, cb,
  983. r, NLM_F_MULTI,
  984. net_admin);
  985. if (res < 0)
  986. num = num_arr[idx];
  987. }
  988. sock_put(sk_arr[idx]);
  989. }
  990. if (res < 0)
  991. goto done;
  992. cond_resched();
  993. if (accum == SKARR_SZ) {
  994. s_num = num + 1;
  995. goto resume_bind_walk;
  996. }
  997. s_num = 0;
  998. }
  999. skip_bind_ht:
  1000. cb->args[0] = 2;
  1001. s_i = num = s_num = 0;
  1002. }
  1003. if (!(idiag_states & ~TCPF_LISTEN))
  1004. goto out;
  1005. for (i = s_i; i <= hashinfo->ehash_mask; i++) {
  1006. struct inet_ehash_bucket *head = &hashinfo->ehash[i];
  1007. spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
  1008. struct hlist_nulls_node *node;
  1009. struct sock *sk_arr[SKARR_SZ];
  1010. int num_arr[SKARR_SZ];
  1011. int idx, accum, res;
  1012. if (hlist_nulls_empty(&head->chain))
  1013. continue;
  1014. if (i > s_i)
  1015. s_num = 0;
  1016. next_chunk:
  1017. num = 0;
  1018. accum = 0;
  1019. spin_lock_bh(lock);
  1020. sk_nulls_for_each(sk, node, &head->chain) {
  1021. int state;
  1022. if (!net_eq(sock_net(sk), net))
  1023. continue;
  1024. if (num < s_num)
  1025. goto next_normal;
  1026. state = (sk->sk_state == TCP_TIME_WAIT) ?
  1027. READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
  1028. if (!(idiag_states & (1 << state)))
  1029. goto next_normal;
  1030. if (r->sdiag_family != AF_UNSPEC &&
  1031. sk->sk_family != r->sdiag_family)
  1032. goto next_normal;
  1033. if (r->id.idiag_sport != htons(sk->sk_num) &&
  1034. r->id.idiag_sport)
  1035. goto next_normal;
  1036. if (r->id.idiag_dport != sk->sk_dport &&
  1037. r->id.idiag_dport)
  1038. goto next_normal;
  1039. twsk_build_assert();
  1040. if (!inet_diag_bc_sk(bc, sk))
  1041. goto next_normal;
  1042. if (!refcount_inc_not_zero(&sk->sk_refcnt))
  1043. goto next_normal;
  1044. num_arr[accum] = num;
  1045. sk_arr[accum] = sk;
  1046. if (++accum == SKARR_SZ)
  1047. break;
  1048. next_normal:
  1049. ++num;
  1050. }
  1051. spin_unlock_bh(lock);
  1052. res = 0;
  1053. for (idx = 0; idx < accum; idx++) {
  1054. if (res >= 0) {
  1055. res = sk_diag_fill(sk_arr[idx], skb, cb, r,
  1056. NLM_F_MULTI, net_admin);
  1057. if (res < 0)
  1058. num = num_arr[idx];
  1059. }
  1060. sock_gen_put(sk_arr[idx]);
  1061. }
  1062. if (res < 0)
  1063. break;
  1064. cond_resched();
  1065. if (accum == SKARR_SZ) {
  1066. s_num = num + 1;
  1067. goto next_chunk;
  1068. }
  1069. }
  1070. done:
  1071. cb->args[1] = i;
  1072. cb->args[2] = num;
  1073. out:
  1074. ;
  1075. }
  1076. EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
  1077. static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
  1078. const struct inet_diag_req_v2 *r)
  1079. {
  1080. struct inet_diag_dump_data *cb_data = cb->data;
  1081. const struct inet_diag_handler *handler;
  1082. u32 prev_min_dump_alloc;
  1083. int protocol, err = 0;
  1084. protocol = inet_diag_get_protocol(r, cb_data);
  1085. again:
  1086. prev_min_dump_alloc = cb->min_dump_alloc;
  1087. handler = inet_diag_lock_handler(protocol);
  1088. if (handler) {
  1089. handler->dump(skb, cb, r);
  1090. inet_diag_unlock_handler(handler);
  1091. } else {
  1092. err = -ENOENT;
  1093. }
  1094. /* The skb is not large enough to fit one sk info and
  1095. * inet_sk_diag_fill() has requested for a larger skb.
  1096. */
  1097. if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) {
  1098. err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL);
  1099. if (!err)
  1100. goto again;
  1101. }
  1102. return err ? : skb->len;
  1103. }
  1104. static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
  1105. {
  1106. return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh));
  1107. }
  1108. static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
  1109. {
  1110. const struct nlmsghdr *nlh = cb->nlh;
  1111. struct inet_diag_dump_data *cb_data;
  1112. struct sk_buff *skb = cb->skb;
  1113. struct nlattr *nla;
  1114. int err;
  1115. cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL);
  1116. if (!cb_data)
  1117. return -ENOMEM;
  1118. err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas);
  1119. if (err) {
  1120. kfree(cb_data);
  1121. return err;
  1122. }
  1123. nla = cb_data->inet_diag_nla_bc;
  1124. if (nla) {
  1125. err = inet_diag_bc_audit(nla, skb);
  1126. if (err) {
  1127. kfree(cb_data);
  1128. return err;
  1129. }
  1130. }
  1131. nla = cb_data->inet_diag_nla_bpf_stgs;
  1132. if (nla) {
  1133. struct bpf_sk_storage_diag *bpf_stg_diag;
  1134. bpf_stg_diag = bpf_sk_storage_diag_alloc(nla);
  1135. if (IS_ERR(bpf_stg_diag)) {
  1136. kfree(cb_data);
  1137. return PTR_ERR(bpf_stg_diag);
  1138. }
  1139. cb_data->bpf_stg_diag = bpf_stg_diag;
  1140. }
  1141. cb->data = cb_data;
  1142. return 0;
  1143. }
  1144. static int inet_diag_dump_start(struct netlink_callback *cb)
  1145. {
  1146. return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2));
  1147. }
  1148. static int inet_diag_dump_start_compat(struct netlink_callback *cb)
  1149. {
  1150. return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req));
  1151. }
  1152. static int inet_diag_dump_done(struct netlink_callback *cb)
  1153. {
  1154. struct inet_diag_dump_data *cb_data = cb->data;
  1155. bpf_sk_storage_diag_free(cb_data->bpf_stg_diag);
  1156. kfree(cb->data);
  1157. return 0;
  1158. }
  1159. static int inet_diag_type2proto(int type)
  1160. {
  1161. switch (type) {
  1162. case TCPDIAG_GETSOCK:
  1163. return IPPROTO_TCP;
  1164. case DCCPDIAG_GETSOCK:
  1165. return IPPROTO_DCCP;
  1166. default:
  1167. return 0;
  1168. }
  1169. }
  1170. static int inet_diag_dump_compat(struct sk_buff *skb,
  1171. struct netlink_callback *cb)
  1172. {
  1173. struct inet_diag_req *rc = nlmsg_data(cb->nlh);
  1174. struct inet_diag_req_v2 req;
  1175. req.sdiag_family = AF_UNSPEC; /* compatibility */
  1176. req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
  1177. req.idiag_ext = rc->idiag_ext;
  1178. req.pad = 0;
  1179. req.idiag_states = rc->idiag_states;
  1180. req.id = rc->id;
  1181. return __inet_diag_dump(skb, cb, &req);
  1182. }
  1183. static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
  1184. const struct nlmsghdr *nlh)
  1185. {
  1186. struct inet_diag_req *rc = nlmsg_data(nlh);
  1187. struct inet_diag_req_v2 req;
  1188. req.sdiag_family = rc->idiag_family;
  1189. req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
  1190. req.idiag_ext = rc->idiag_ext;
  1191. req.pad = 0;
  1192. req.idiag_states = rc->idiag_states;
  1193. req.id = rc->id;
  1194. return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh,
  1195. sizeof(struct inet_diag_req), &req);
  1196. }
  1197. static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
  1198. {
  1199. int hdrlen = sizeof(struct inet_diag_req);
  1200. struct net *net = sock_net(skb->sk);
  1201. if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
  1202. nlmsg_len(nlh) < hdrlen)
  1203. return -EINVAL;
  1204. if (nlh->nlmsg_flags & NLM_F_DUMP) {
  1205. struct netlink_dump_control c = {
  1206. .start = inet_diag_dump_start_compat,
  1207. .done = inet_diag_dump_done,
  1208. .dump = inet_diag_dump_compat,
  1209. };
  1210. return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
  1211. }
  1212. return inet_diag_get_exact_compat(skb, nlh);
  1213. }
  1214. static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
  1215. {
  1216. int hdrlen = sizeof(struct inet_diag_req_v2);
  1217. struct net *net = sock_net(skb->sk);
  1218. if (nlmsg_len(h) < hdrlen)
  1219. return -EINVAL;
  1220. if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
  1221. h->nlmsg_flags & NLM_F_DUMP) {
  1222. struct netlink_dump_control c = {
  1223. .start = inet_diag_dump_start,
  1224. .done = inet_diag_dump_done,
  1225. .dump = inet_diag_dump,
  1226. };
  1227. return netlink_dump_start(net->diag_nlsk, skb, h, &c);
  1228. }
  1229. return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen,
  1230. nlmsg_data(h));
  1231. }
  1232. static
  1233. int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
  1234. {
  1235. const struct inet_diag_handler *handler;
  1236. struct nlmsghdr *nlh;
  1237. struct nlattr *attr;
  1238. struct inet_diag_msg *r;
  1239. void *info = NULL;
  1240. int err = 0;
  1241. nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
  1242. if (!nlh)
  1243. return -ENOMEM;
  1244. r = nlmsg_data(nlh);
  1245. memset(r, 0, sizeof(*r));
  1246. inet_diag_msg_common_fill(r, sk);
  1247. if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM)
  1248. r->id.idiag_sport = inet_sk(sk)->inet_sport;
  1249. r->idiag_state = sk->sk_state;
  1250. if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
  1251. nlmsg_cancel(skb, nlh);
  1252. return err;
  1253. }
  1254. handler = inet_diag_lock_handler(sk->sk_protocol);
  1255. if (!handler) {
  1256. nlmsg_cancel(skb, nlh);
  1257. return -ENOENT;
  1258. }
  1259. attr = handler->idiag_info_size
  1260. ? nla_reserve_64bit(skb, INET_DIAG_INFO,
  1261. handler->idiag_info_size,
  1262. INET_DIAG_PAD)
  1263. : NULL;
  1264. if (attr)
  1265. info = nla_data(attr);
  1266. handler->idiag_get_info(sk, r, info);
  1267. inet_diag_unlock_handler(handler);
  1268. nlmsg_end(skb, nlh);
  1269. return 0;
  1270. }
  1271. static const struct sock_diag_handler inet_diag_handler = {
  1272. .owner = THIS_MODULE,
  1273. .family = AF_INET,
  1274. .dump = inet_diag_handler_cmd,
  1275. .get_info = inet_diag_handler_get_info,
  1276. .destroy = inet_diag_handler_cmd,
  1277. };
  1278. static const struct sock_diag_handler inet6_diag_handler = {
  1279. .owner = THIS_MODULE,
  1280. .family = AF_INET6,
  1281. .dump = inet_diag_handler_cmd,
  1282. .get_info = inet_diag_handler_get_info,
  1283. .destroy = inet_diag_handler_cmd,
  1284. };
  1285. int inet_diag_register(const struct inet_diag_handler *h)
  1286. {
  1287. const __u16 type = h->idiag_type;
  1288. if (type >= IPPROTO_MAX)
  1289. return -EINVAL;
  1290. return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type],
  1291. NULL, h) ? 0 : -EEXIST;
  1292. }
  1293. EXPORT_SYMBOL_GPL(inet_diag_register);
  1294. void inet_diag_unregister(const struct inet_diag_handler *h)
  1295. {
  1296. const __u16 type = h->idiag_type;
  1297. if (type >= IPPROTO_MAX)
  1298. return;
  1299. xchg((const struct inet_diag_handler **)&inet_diag_table[type],
  1300. NULL);
  1301. }
  1302. EXPORT_SYMBOL_GPL(inet_diag_unregister);
  1303. static const struct sock_diag_inet_compat inet_diag_compat = {
  1304. .owner = THIS_MODULE,
  1305. .fn = inet_diag_rcv_msg_compat,
  1306. };
  1307. static int __init inet_diag_init(void)
  1308. {
  1309. const int inet_diag_table_size = (IPPROTO_MAX *
  1310. sizeof(struct inet_diag_handler *));
  1311. int err = -ENOMEM;
  1312. inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
  1313. if (!inet_diag_table)
  1314. goto out;
  1315. err = sock_diag_register(&inet_diag_handler);
  1316. if (err)
  1317. goto out_free_nl;
  1318. err = sock_diag_register(&inet6_diag_handler);
  1319. if (err)
  1320. goto out_free_inet;
  1321. sock_diag_register_inet_compat(&inet_diag_compat);
  1322. out:
  1323. return err;
  1324. out_free_inet:
  1325. sock_diag_unregister(&inet_diag_handler);
  1326. out_free_nl:
  1327. kfree(inet_diag_table);
  1328. goto out;
  1329. }
  1330. static void __exit inet_diag_exit(void)
  1331. {
  1332. sock_diag_unregister(&inet6_diag_handler);
  1333. sock_diag_unregister(&inet_diag_handler);
  1334. sock_diag_unregister_inet_compat(&inet_diag_compat);
  1335. kfree(inet_diag_table);
  1336. }
  1337. module_init(inet_diag_init);
  1338. module_exit(inet_diag_exit);
  1339. MODULE_LICENSE("GPL");
  1340. MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG");
  1341. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
  1342. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);