netvsc_drv.c 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <haiyangz@microsoft.com>
  7. * Hank Janssen <hjanssen@microsoft.com>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/init.h>
  11. #include <linux/atomic.h>
  12. #include <linux/ethtool.h>
  13. #include <linux/module.h>
  14. #include <linux/highmem.h>
  15. #include <linux/device.h>
  16. #include <linux/io.h>
  17. #include <linux/delay.h>
  18. #include <linux/netdevice.h>
  19. #include <linux/inetdevice.h>
  20. #include <linux/etherdevice.h>
  21. #include <linux/pci.h>
  22. #include <linux/skbuff.h>
  23. #include <linux/if_vlan.h>
  24. #include <linux/in.h>
  25. #include <linux/slab.h>
  26. #include <linux/rtnetlink.h>
  27. #include <linux/netpoll.h>
  28. #include <linux/bpf.h>
  29. #include <net/arp.h>
  30. #include <net/route.h>
  31. #include <net/sock.h>
  32. #include <net/pkt_sched.h>
  33. #include <net/checksum.h>
  34. #include <net/ip6_checksum.h>
  35. #include "hyperv_net.h"
  36. #define RING_SIZE_MIN 64
  37. #define LINKCHANGE_INT (2 * HZ)
  38. #define VF_TAKEOVER_INT (HZ / 10)
  39. /* Macros to define the context of vf registration */
  40. #define VF_REG_IN_PROBE 1
  41. #define VF_REG_IN_NOTIFIER 2
  42. static unsigned int ring_size __ro_after_init = 128;
  43. module_param(ring_size, uint, 0444);
  44. MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
  45. unsigned int netvsc_ring_bytes __ro_after_init;
  46. static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
  47. NETIF_MSG_LINK | NETIF_MSG_IFUP |
  48. NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
  49. NETIF_MSG_TX_ERR;
  50. static int debug = -1;
  51. module_param(debug, int, 0444);
  52. MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  53. static LIST_HEAD(netvsc_dev_list);
  54. static void netvsc_change_rx_flags(struct net_device *net, int change)
  55. {
  56. struct net_device_context *ndev_ctx = netdev_priv(net);
  57. struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  58. int inc;
  59. if (!vf_netdev)
  60. return;
  61. if (change & IFF_PROMISC) {
  62. inc = (net->flags & IFF_PROMISC) ? 1 : -1;
  63. dev_set_promiscuity(vf_netdev, inc);
  64. }
  65. if (change & IFF_ALLMULTI) {
  66. inc = (net->flags & IFF_ALLMULTI) ? 1 : -1;
  67. dev_set_allmulti(vf_netdev, inc);
  68. }
  69. }
  70. static void netvsc_set_rx_mode(struct net_device *net)
  71. {
  72. struct net_device_context *ndev_ctx = netdev_priv(net);
  73. struct net_device *vf_netdev;
  74. struct netvsc_device *nvdev;
  75. rcu_read_lock();
  76. vf_netdev = rcu_dereference(ndev_ctx->vf_netdev);
  77. if (vf_netdev) {
  78. dev_uc_sync(vf_netdev, net);
  79. dev_mc_sync(vf_netdev, net);
  80. }
  81. nvdev = rcu_dereference(ndev_ctx->nvdev);
  82. if (nvdev)
  83. rndis_filter_update(nvdev);
  84. rcu_read_unlock();
  85. }
  86. static void netvsc_tx_enable(struct netvsc_device *nvscdev,
  87. struct net_device *ndev)
  88. {
  89. nvscdev->tx_disable = false;
  90. virt_wmb(); /* ensure queue wake up mechanism is on */
  91. netif_tx_wake_all_queues(ndev);
  92. }
  93. static int netvsc_open(struct net_device *net)
  94. {
  95. struct net_device_context *ndev_ctx = netdev_priv(net);
  96. struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  97. struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
  98. struct rndis_device *rdev;
  99. int ret = 0;
  100. netif_carrier_off(net);
  101. /* Open up the device */
  102. ret = rndis_filter_open(nvdev);
  103. if (ret != 0) {
  104. netdev_err(net, "unable to open device (ret %d).\n", ret);
  105. return ret;
  106. }
  107. rdev = nvdev->extension;
  108. if (!rdev->link_state) {
  109. netif_carrier_on(net);
  110. netvsc_tx_enable(nvdev, net);
  111. }
  112. if (vf_netdev) {
  113. /* Setting synthetic device up transparently sets
  114. * slave as up. If open fails, then slave will be
  115. * still be offline (and not used).
  116. */
  117. ret = dev_open(vf_netdev, NULL);
  118. if (ret)
  119. netdev_warn(net,
  120. "unable to open slave: %s: %d\n",
  121. vf_netdev->name, ret);
  122. }
  123. return 0;
  124. }
  125. static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
  126. {
  127. unsigned int retry = 0;
  128. int i;
  129. /* Ensure pending bytes in ring are read */
  130. for (;;) {
  131. u32 aread = 0;
  132. for (i = 0; i < nvdev->num_chn; i++) {
  133. struct vmbus_channel *chn
  134. = nvdev->chan_table[i].channel;
  135. if (!chn)
  136. continue;
  137. /* make sure receive not running now */
  138. napi_synchronize(&nvdev->chan_table[i].napi);
  139. aread = hv_get_bytes_to_read(&chn->inbound);
  140. if (aread)
  141. break;
  142. aread = hv_get_bytes_to_read(&chn->outbound);
  143. if (aread)
  144. break;
  145. }
  146. if (aread == 0)
  147. return 0;
  148. if (++retry > RETRY_MAX)
  149. return -ETIMEDOUT;
  150. usleep_range(RETRY_US_LO, RETRY_US_HI);
  151. }
  152. }
  153. static void netvsc_tx_disable(struct netvsc_device *nvscdev,
  154. struct net_device *ndev)
  155. {
  156. if (nvscdev) {
  157. nvscdev->tx_disable = true;
  158. virt_wmb(); /* ensure txq will not wake up after stop */
  159. }
  160. netif_tx_disable(ndev);
  161. }
  162. static int netvsc_close(struct net_device *net)
  163. {
  164. struct net_device_context *net_device_ctx = netdev_priv(net);
  165. struct net_device *vf_netdev
  166. = rtnl_dereference(net_device_ctx->vf_netdev);
  167. struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
  168. int ret;
  169. netvsc_tx_disable(nvdev, net);
  170. /* No need to close rndis filter if it is removed already */
  171. if (!nvdev)
  172. return 0;
  173. ret = rndis_filter_close(nvdev);
  174. if (ret != 0) {
  175. netdev_err(net, "unable to close device (ret %d).\n", ret);
  176. return ret;
  177. }
  178. ret = netvsc_wait_until_empty(nvdev);
  179. if (ret)
  180. netdev_err(net, "Ring buffer not empty after closing rndis\n");
  181. if (vf_netdev)
  182. dev_close(vf_netdev);
  183. return ret;
  184. }
  185. static inline void *init_ppi_data(struct rndis_message *msg,
  186. u32 ppi_size, u32 pkt_type)
  187. {
  188. struct rndis_packet *rndis_pkt = &msg->msg.pkt;
  189. struct rndis_per_packet_info *ppi;
  190. rndis_pkt->data_offset += ppi_size;
  191. ppi = (void *)rndis_pkt + rndis_pkt->per_pkt_info_offset
  192. + rndis_pkt->per_pkt_info_len;
  193. ppi->size = ppi_size;
  194. ppi->type = pkt_type;
  195. ppi->internal = 0;
  196. ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
  197. rndis_pkt->per_pkt_info_len += ppi_size;
  198. return ppi + 1;
  199. }
  200. static inline int netvsc_get_tx_queue(struct net_device *ndev,
  201. struct sk_buff *skb, int old_idx)
  202. {
  203. const struct net_device_context *ndc = netdev_priv(ndev);
  204. struct sock *sk = skb->sk;
  205. int q_idx;
  206. q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) &
  207. (VRSS_SEND_TAB_SIZE - 1)];
  208. /* If queue index changed record the new value */
  209. if (q_idx != old_idx &&
  210. sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
  211. sk_tx_queue_set(sk, q_idx);
  212. return q_idx;
  213. }
  214. /*
  215. * Select queue for transmit.
  216. *
  217. * If a valid queue has already been assigned, then use that.
  218. * Otherwise compute tx queue based on hash and the send table.
  219. *
  220. * This is basically similar to default (netdev_pick_tx) with the added step
  221. * of using the host send_table when no other queue has been assigned.
  222. *
  223. * TODO support XPS - but get_xps_queue not exported
  224. */
  225. static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
  226. {
  227. int q_idx = sk_tx_queue_get(skb->sk);
  228. if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
  229. /* If forwarding a packet, we use the recorded queue when
  230. * available for better cache locality.
  231. */
  232. if (skb_rx_queue_recorded(skb))
  233. q_idx = skb_get_rx_queue(skb);
  234. else
  235. q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
  236. }
  237. return q_idx;
  238. }
  239. static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
  240. struct net_device *sb_dev)
  241. {
  242. struct net_device_context *ndc = netdev_priv(ndev);
  243. struct net_device *vf_netdev;
  244. u16 txq;
  245. rcu_read_lock();
  246. vf_netdev = rcu_dereference(ndc->vf_netdev);
  247. if (vf_netdev) {
  248. const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
  249. if (vf_ops->ndo_select_queue)
  250. txq = vf_ops->ndo_select_queue(vf_netdev, skb, sb_dev);
  251. else
  252. txq = netdev_pick_tx(vf_netdev, skb, NULL);
  253. /* Record the queue selected by VF so that it can be
  254. * used for common case where VF has more queues than
  255. * the synthetic device.
  256. */
  257. qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq;
  258. } else {
  259. txq = netvsc_pick_tx(ndev, skb);
  260. }
  261. rcu_read_unlock();
  262. while (txq >= ndev->real_num_tx_queues)
  263. txq -= ndev->real_num_tx_queues;
  264. return txq;
  265. }
  266. static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
  267. struct hv_netvsc_packet *packet,
  268. struct hv_page_buffer *pb)
  269. {
  270. int frags = skb_shinfo(skb)->nr_frags;
  271. int i;
  272. /* The packet is laid out thus:
  273. * 1. hdr: RNDIS header and PPI
  274. * 2. skb linear data
  275. * 3. skb fragment data
  276. */
  277. pb[0].offset = offset_in_hvpage(hdr);
  278. pb[0].len = len;
  279. pb[0].pfn = virt_to_hvpfn(hdr);
  280. packet->rmsg_size = len;
  281. pb[1].offset = offset_in_hvpage(skb->data);
  282. pb[1].len = skb_headlen(skb);
  283. pb[1].pfn = virt_to_hvpfn(skb->data);
  284. for (i = 0; i < frags; i++) {
  285. skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  286. struct hv_page_buffer *cur_pb = &pb[i + 2];
  287. u64 pfn = page_to_hvpfn(skb_frag_page(frag));
  288. u32 offset = skb_frag_off(frag);
  289. cur_pb->offset = offset_in_hvpage(offset);
  290. cur_pb->len = skb_frag_size(frag);
  291. cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT);
  292. }
  293. return frags + 2;
  294. }
  295. static int count_skb_frag_slots(struct sk_buff *skb)
  296. {
  297. int i, frags = skb_shinfo(skb)->nr_frags;
  298. int pages = 0;
  299. for (i = 0; i < frags; i++) {
  300. skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  301. unsigned long size = skb_frag_size(frag);
  302. unsigned long offset = skb_frag_off(frag);
  303. /* Skip unused frames from start of page */
  304. offset &= ~HV_HYP_PAGE_MASK;
  305. pages += HVPFN_UP(offset + size);
  306. }
  307. return pages;
  308. }
  309. static int netvsc_get_slots(struct sk_buff *skb)
  310. {
  311. char *data = skb->data;
  312. unsigned int offset = offset_in_hvpage(data);
  313. unsigned int len = skb_headlen(skb);
  314. int slots;
  315. int frag_slots;
  316. slots = DIV_ROUND_UP(offset + len, HV_HYP_PAGE_SIZE);
  317. frag_slots = count_skb_frag_slots(skb);
  318. return slots + frag_slots;
  319. }
  320. static u32 net_checksum_info(struct sk_buff *skb)
  321. {
  322. if (skb->protocol == htons(ETH_P_IP)) {
  323. struct iphdr *ip = ip_hdr(skb);
  324. if (ip->protocol == IPPROTO_TCP)
  325. return TRANSPORT_INFO_IPV4_TCP;
  326. else if (ip->protocol == IPPROTO_UDP)
  327. return TRANSPORT_INFO_IPV4_UDP;
  328. } else {
  329. struct ipv6hdr *ip6 = ipv6_hdr(skb);
  330. if (ip6->nexthdr == IPPROTO_TCP)
  331. return TRANSPORT_INFO_IPV6_TCP;
  332. else if (ip6->nexthdr == IPPROTO_UDP)
  333. return TRANSPORT_INFO_IPV6_UDP;
  334. }
  335. return TRANSPORT_INFO_NOT_IP;
  336. }
  337. /* Send skb on the slave VF device. */
  338. static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
  339. struct sk_buff *skb)
  340. {
  341. struct net_device_context *ndev_ctx = netdev_priv(net);
  342. unsigned int len = skb->len;
  343. int rc;
  344. skb->dev = vf_netdev;
  345. skb_record_rx_queue(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
  346. rc = dev_queue_xmit(skb);
  347. if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
  348. struct netvsc_vf_pcpu_stats *pcpu_stats
  349. = this_cpu_ptr(ndev_ctx->vf_stats);
  350. u64_stats_update_begin(&pcpu_stats->syncp);
  351. pcpu_stats->tx_packets++;
  352. pcpu_stats->tx_bytes += len;
  353. u64_stats_update_end(&pcpu_stats->syncp);
  354. } else {
  355. this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
  356. }
  357. return rc;
  358. }
  359. static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
  360. {
  361. struct net_device_context *net_device_ctx = netdev_priv(net);
  362. struct hv_netvsc_packet *packet = NULL;
  363. int ret;
  364. unsigned int num_data_pgs;
  365. struct rndis_message *rndis_msg;
  366. struct net_device *vf_netdev;
  367. u32 rndis_msg_size;
  368. u32 hash;
  369. struct hv_page_buffer pb[MAX_DATA_RANGES];
  370. /* If VF is present and up then redirect packets to it.
  371. * Skip the VF if it is marked down or has no carrier.
  372. * If netpoll is in uses, then VF can not be used either.
  373. */
  374. vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
  375. if (vf_netdev && netif_running(vf_netdev) &&
  376. netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net) &&
  377. net_device_ctx->data_path_is_vf)
  378. return netvsc_vf_xmit(net, vf_netdev, skb);
  379. /* We will atmost need two pages to describe the rndis
  380. * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
  381. * of pages in a single packet. If skb is scattered around
  382. * more pages we try linearizing it.
  383. */
  384. num_data_pgs = netvsc_get_slots(skb) + 2;
  385. if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
  386. ++net_device_ctx->eth_stats.tx_scattered;
  387. if (skb_linearize(skb))
  388. goto no_memory;
  389. num_data_pgs = netvsc_get_slots(skb) + 2;
  390. if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
  391. ++net_device_ctx->eth_stats.tx_too_big;
  392. goto drop;
  393. }
  394. }
  395. /*
  396. * Place the rndis header in the skb head room and
  397. * the skb->cb will be used for hv_netvsc_packet
  398. * structure.
  399. */
  400. ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
  401. if (ret)
  402. goto no_memory;
  403. /* Use the skb control buffer for building up the packet */
  404. BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
  405. sizeof_field(struct sk_buff, cb));
  406. packet = (struct hv_netvsc_packet *)skb->cb;
  407. packet->q_idx = skb_get_queue_mapping(skb);
  408. packet->total_data_buflen = skb->len;
  409. packet->total_bytes = skb->len;
  410. packet->total_packets = 1;
  411. rndis_msg = (struct rndis_message *)skb->head;
  412. /* Add the rndis header */
  413. rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
  414. rndis_msg->msg_len = packet->total_data_buflen;
  415. rndis_msg->msg.pkt = (struct rndis_packet) {
  416. .data_offset = sizeof(struct rndis_packet),
  417. .data_len = packet->total_data_buflen,
  418. .per_pkt_info_offset = sizeof(struct rndis_packet),
  419. };
  420. rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
  421. hash = skb_get_hash_raw(skb);
  422. if (hash != 0 && net->real_num_tx_queues > 1) {
  423. u32 *hash_info;
  424. rndis_msg_size += NDIS_HASH_PPI_SIZE;
  425. hash_info = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
  426. NBL_HASH_VALUE);
  427. *hash_info = hash;
  428. }
  429. /* When using AF_PACKET we need to drop VLAN header from
  430. * the frame and update the SKB to allow the HOST OS
  431. * to transmit the 802.1Q packet
  432. */
  433. if (skb->protocol == htons(ETH_P_8021Q)) {
  434. u16 vlan_tci;
  435. skb_reset_mac_header(skb);
  436. if (eth_type_vlan(eth_hdr(skb)->h_proto)) {
  437. if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) {
  438. ++net_device_ctx->eth_stats.vlan_error;
  439. goto drop;
  440. }
  441. __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
  442. /* Update the NDIS header pkt lengths */
  443. packet->total_data_buflen -= VLAN_HLEN;
  444. packet->total_bytes -= VLAN_HLEN;
  445. rndis_msg->msg_len = packet->total_data_buflen;
  446. rndis_msg->msg.pkt.data_len = packet->total_data_buflen;
  447. }
  448. }
  449. if (skb_vlan_tag_present(skb)) {
  450. struct ndis_pkt_8021q_info *vlan;
  451. rndis_msg_size += NDIS_VLAN_PPI_SIZE;
  452. vlan = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
  453. IEEE_8021Q_INFO);
  454. vlan->value = 0;
  455. vlan->vlanid = skb_vlan_tag_get_id(skb);
  456. vlan->cfi = skb_vlan_tag_get_cfi(skb);
  457. vlan->pri = skb_vlan_tag_get_prio(skb);
  458. }
  459. if (skb_is_gso(skb)) {
  460. struct ndis_tcp_lso_info *lso_info;
  461. rndis_msg_size += NDIS_LSO_PPI_SIZE;
  462. lso_info = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
  463. TCP_LARGESEND_PKTINFO);
  464. lso_info->value = 0;
  465. lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
  466. if (skb->protocol == htons(ETH_P_IP)) {
  467. lso_info->lso_v2_transmit.ip_version =
  468. NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
  469. ip_hdr(skb)->tot_len = 0;
  470. ip_hdr(skb)->check = 0;
  471. tcp_hdr(skb)->check =
  472. ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
  473. ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
  474. } else {
  475. lso_info->lso_v2_transmit.ip_version =
  476. NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
  477. tcp_v6_gso_csum_prep(skb);
  478. }
  479. lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
  480. lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
  481. } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
  482. if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) {
  483. struct ndis_tcp_ip_checksum_info *csum_info;
  484. rndis_msg_size += NDIS_CSUM_PPI_SIZE;
  485. csum_info = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
  486. TCPIP_CHKSUM_PKTINFO);
  487. csum_info->value = 0;
  488. csum_info->transmit.tcp_header_offset = skb_transport_offset(skb);
  489. if (skb->protocol == htons(ETH_P_IP)) {
  490. csum_info->transmit.is_ipv4 = 1;
  491. if (ip_hdr(skb)->protocol == IPPROTO_TCP)
  492. csum_info->transmit.tcp_checksum = 1;
  493. else
  494. csum_info->transmit.udp_checksum = 1;
  495. } else {
  496. csum_info->transmit.is_ipv6 = 1;
  497. if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
  498. csum_info->transmit.tcp_checksum = 1;
  499. else
  500. csum_info->transmit.udp_checksum = 1;
  501. }
  502. } else {
  503. /* Can't do offload of this type of checksum */
  504. if (skb_checksum_help(skb))
  505. goto drop;
  506. }
  507. }
  508. /* Start filling in the page buffers with the rndis hdr */
  509. rndis_msg->msg_len += rndis_msg_size;
  510. packet->total_data_buflen = rndis_msg->msg_len;
  511. packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
  512. skb, packet, pb);
  513. /* timestamp packet in software */
  514. skb_tx_timestamp(skb);
  515. ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx);
  516. if (likely(ret == 0))
  517. return NETDEV_TX_OK;
  518. if (ret == -EAGAIN) {
  519. ++net_device_ctx->eth_stats.tx_busy;
  520. return NETDEV_TX_BUSY;
  521. }
  522. if (ret == -ENOSPC)
  523. ++net_device_ctx->eth_stats.tx_no_space;
  524. drop:
  525. dev_kfree_skb_any(skb);
  526. net->stats.tx_dropped++;
  527. return NETDEV_TX_OK;
  528. no_memory:
  529. ++net_device_ctx->eth_stats.tx_no_memory;
  530. goto drop;
  531. }
  532. static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
  533. struct net_device *ndev)
  534. {
  535. return netvsc_xmit(skb, ndev, false);
  536. }
  537. /*
  538. * netvsc_linkstatus_callback - Link up/down notification
  539. */
  540. void netvsc_linkstatus_callback(struct net_device *net,
  541. struct rndis_message *resp,
  542. void *data, u32 data_buflen)
  543. {
  544. struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
  545. struct net_device_context *ndev_ctx = netdev_priv(net);
  546. struct netvsc_reconfig *event;
  547. unsigned long flags;
  548. /* Ensure the packet is big enough to access its fields */
  549. if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) {
  550. netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n",
  551. resp->msg_len);
  552. return;
  553. }
  554. /* Copy the RNDIS indicate status into nvchan->recv_buf */
  555. memcpy(indicate, data + RNDIS_HEADER_SIZE, sizeof(*indicate));
  556. /* Update the physical link speed when changing to another vSwitch */
  557. if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
  558. u32 speed;
  559. /* Validate status_buf_offset and status_buflen.
  560. *
  561. * Certain (pre-Fe) implementations of Hyper-V's vSwitch didn't account
  562. * for the status buffer field in resp->msg_len; perform the validation
  563. * using data_buflen (>= resp->msg_len).
  564. */
  565. if (indicate->status_buflen < sizeof(speed) ||
  566. indicate->status_buf_offset < sizeof(*indicate) ||
  567. data_buflen - RNDIS_HEADER_SIZE < indicate->status_buf_offset ||
  568. data_buflen - RNDIS_HEADER_SIZE - indicate->status_buf_offset
  569. < indicate->status_buflen) {
  570. netdev_err(net, "invalid rndis_indicate_status packet\n");
  571. return;
  572. }
  573. speed = *(u32 *)(data + RNDIS_HEADER_SIZE + indicate->status_buf_offset) / 10000;
  574. ndev_ctx->speed = speed;
  575. return;
  576. }
  577. /* Handle these link change statuses below */
  578. if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
  579. indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
  580. indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
  581. return;
  582. if (net->reg_state != NETREG_REGISTERED)
  583. return;
  584. event = kzalloc(sizeof(*event), GFP_ATOMIC);
  585. if (!event)
  586. return;
  587. event->event = indicate->status;
  588. spin_lock_irqsave(&ndev_ctx->lock, flags);
  589. list_add_tail(&event->list, &ndev_ctx->reconfig_events);
  590. spin_unlock_irqrestore(&ndev_ctx->lock, flags);
  591. schedule_delayed_work(&ndev_ctx->dwork, 0);
  592. }
  593. /* This function should only be called after skb_record_rx_queue() */
  594. void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
  595. {
  596. int rc;
  597. skb->queue_mapping = skb_get_rx_queue(skb);
  598. __skb_push(skb, ETH_HLEN);
  599. rc = netvsc_xmit(skb, ndev, true);
  600. if (dev_xmit_complete(rc))
  601. return;
  602. dev_kfree_skb_any(skb);
  603. ndev->stats.tx_dropped++;
  604. }
  605. static void netvsc_comp_ipcsum(struct sk_buff *skb)
  606. {
  607. struct iphdr *iph = (struct iphdr *)skb->data;
  608. iph->check = 0;
  609. iph->check = ip_fast_csum(iph, iph->ihl);
  610. }
  611. static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
  612. struct netvsc_channel *nvchan,
  613. struct xdp_buff *xdp)
  614. {
  615. struct napi_struct *napi = &nvchan->napi;
  616. const struct ndis_pkt_8021q_info *vlan = &nvchan->rsc.vlan;
  617. const struct ndis_tcp_ip_checksum_info *csum_info =
  618. &nvchan->rsc.csum_info;
  619. const u32 *hash_info = &nvchan->rsc.hash_info;
  620. u8 ppi_flags = nvchan->rsc.ppi_flags;
  621. struct sk_buff *skb;
  622. void *xbuf = xdp->data_hard_start;
  623. int i;
  624. if (xbuf) {
  625. unsigned int hdroom = xdp->data - xdp->data_hard_start;
  626. unsigned int xlen = xdp->data_end - xdp->data;
  627. unsigned int frag_size = xdp->frame_sz;
  628. skb = build_skb(xbuf, frag_size);
  629. if (!skb) {
  630. __free_page(virt_to_page(xbuf));
  631. return NULL;
  632. }
  633. skb_reserve(skb, hdroom);
  634. skb_put(skb, xlen);
  635. skb->dev = napi->dev;
  636. } else {
  637. skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
  638. if (!skb)
  639. return NULL;
  640. /* Copy to skb. This copy is needed here since the memory
  641. * pointed by hv_netvsc_packet cannot be deallocated.
  642. */
  643. for (i = 0; i < nvchan->rsc.cnt; i++)
  644. skb_put_data(skb, nvchan->rsc.data[i],
  645. nvchan->rsc.len[i]);
  646. }
  647. skb->protocol = eth_type_trans(skb, net);
  648. /* skb is already created with CHECKSUM_NONE */
  649. skb_checksum_none_assert(skb);
  650. /* Incoming packets may have IP header checksum verified by the host.
  651. * They may not have IP header checksum computed after coalescing.
  652. * We compute it here if the flags are set, because on Linux, the IP
  653. * checksum is always checked.
  654. */
  655. if ((ppi_flags & NVSC_RSC_CSUM_INFO) && csum_info->receive.ip_checksum_value_invalid &&
  656. csum_info->receive.ip_checksum_succeeded &&
  657. skb->protocol == htons(ETH_P_IP)) {
  658. /* Check that there is enough space to hold the IP header. */
  659. if (skb_headlen(skb) < sizeof(struct iphdr)) {
  660. kfree_skb(skb);
  661. return NULL;
  662. }
  663. netvsc_comp_ipcsum(skb);
  664. }
  665. /* Do L4 checksum offload if enabled and present. */
  666. if ((ppi_flags & NVSC_RSC_CSUM_INFO) && (net->features & NETIF_F_RXCSUM)) {
  667. if (csum_info->receive.tcp_checksum_succeeded ||
  668. csum_info->receive.udp_checksum_succeeded)
  669. skb->ip_summed = CHECKSUM_UNNECESSARY;
  670. }
  671. if ((ppi_flags & NVSC_RSC_HASH_INFO) && (net->features & NETIF_F_RXHASH))
  672. skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
  673. if (ppi_flags & NVSC_RSC_VLAN) {
  674. u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
  675. (vlan->cfi ? VLAN_CFI_MASK : 0);
  676. __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
  677. vlan_tci);
  678. }
  679. return skb;
  680. }
  681. /*
  682. * netvsc_recv_callback - Callback when we receive a packet from the
  683. * "wire" on the specified device.
  684. */
  685. int netvsc_recv_callback(struct net_device *net,
  686. struct netvsc_device *net_device,
  687. struct netvsc_channel *nvchan)
  688. {
  689. struct net_device_context *net_device_ctx = netdev_priv(net);
  690. struct vmbus_channel *channel = nvchan->channel;
  691. u16 q_idx = channel->offermsg.offer.sub_channel_index;
  692. struct sk_buff *skb;
  693. struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats;
  694. struct xdp_buff xdp;
  695. u32 act;
  696. if (net->reg_state != NETREG_REGISTERED)
  697. return NVSP_STAT_FAIL;
  698. act = netvsc_run_xdp(net, nvchan, &xdp);
  699. if (act == XDP_REDIRECT)
  700. return NVSP_STAT_SUCCESS;
  701. if (act != XDP_PASS && act != XDP_TX) {
  702. u64_stats_update_begin(&rx_stats->syncp);
  703. rx_stats->xdp_drop++;
  704. u64_stats_update_end(&rx_stats->syncp);
  705. return NVSP_STAT_SUCCESS; /* consumed by XDP */
  706. }
  707. /* Allocate a skb - TODO direct I/O to pages? */
  708. skb = netvsc_alloc_recv_skb(net, nvchan, &xdp);
  709. if (unlikely(!skb)) {
  710. ++net_device_ctx->eth_stats.rx_no_memory;
  711. return NVSP_STAT_FAIL;
  712. }
  713. skb_record_rx_queue(skb, q_idx);
  714. /*
  715. * Even if injecting the packet, record the statistics
  716. * on the synthetic device because modifying the VF device
  717. * statistics will not work correctly.
  718. */
  719. u64_stats_update_begin(&rx_stats->syncp);
  720. if (act == XDP_TX)
  721. rx_stats->xdp_tx++;
  722. rx_stats->packets++;
  723. rx_stats->bytes += nvchan->rsc.pktlen;
  724. if (skb->pkt_type == PACKET_BROADCAST)
  725. ++rx_stats->broadcast;
  726. else if (skb->pkt_type == PACKET_MULTICAST)
  727. ++rx_stats->multicast;
  728. u64_stats_update_end(&rx_stats->syncp);
  729. if (act == XDP_TX) {
  730. netvsc_xdp_xmit(skb, net);
  731. return NVSP_STAT_SUCCESS;
  732. }
  733. napi_gro_receive(&nvchan->napi, skb);
  734. return NVSP_STAT_SUCCESS;
  735. }
  736. static void netvsc_get_drvinfo(struct net_device *net,
  737. struct ethtool_drvinfo *info)
  738. {
  739. strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
  740. strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
  741. }
  742. static void netvsc_get_channels(struct net_device *net,
  743. struct ethtool_channels *channel)
  744. {
  745. struct net_device_context *net_device_ctx = netdev_priv(net);
  746. struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
  747. if (nvdev) {
  748. channel->max_combined = nvdev->max_chn;
  749. channel->combined_count = nvdev->num_chn;
  750. }
  751. }
  752. /* Alloc struct netvsc_device_info, and initialize it from either existing
  753. * struct netvsc_device, or from default values.
  754. */
  755. static
  756. struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
  757. {
  758. struct netvsc_device_info *dev_info;
  759. struct bpf_prog *prog;
  760. dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC);
  761. if (!dev_info)
  762. return NULL;
  763. if (nvdev) {
  764. ASSERT_RTNL();
  765. dev_info->num_chn = nvdev->num_chn;
  766. dev_info->send_sections = nvdev->send_section_cnt;
  767. dev_info->send_section_size = nvdev->send_section_size;
  768. dev_info->recv_sections = nvdev->recv_section_cnt;
  769. dev_info->recv_section_size = nvdev->recv_section_size;
  770. memcpy(dev_info->rss_key, nvdev->extension->rss_key,
  771. NETVSC_HASH_KEYLEN);
  772. prog = netvsc_xdp_get(nvdev);
  773. if (prog) {
  774. bpf_prog_inc(prog);
  775. dev_info->bprog = prog;
  776. }
  777. } else {
  778. dev_info->num_chn = max(VRSS_CHANNEL_DEFAULT,
  779. netif_get_num_default_rss_queues());
  780. dev_info->send_sections = NETVSC_DEFAULT_TX;
  781. dev_info->send_section_size = NETVSC_SEND_SECTION_SIZE;
  782. dev_info->recv_sections = NETVSC_DEFAULT_RX;
  783. dev_info->recv_section_size = NETVSC_RECV_SECTION_SIZE;
  784. }
  785. return dev_info;
  786. }
  787. /* Free struct netvsc_device_info */
  788. static void netvsc_devinfo_put(struct netvsc_device_info *dev_info)
  789. {
  790. if (dev_info->bprog) {
  791. ASSERT_RTNL();
  792. bpf_prog_put(dev_info->bprog);
  793. }
  794. kfree(dev_info);
  795. }
  796. static int netvsc_detach(struct net_device *ndev,
  797. struct netvsc_device *nvdev)
  798. {
  799. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  800. struct hv_device *hdev = ndev_ctx->device_ctx;
  801. int ret;
  802. /* Don't try continuing to try and setup sub channels */
  803. if (cancel_work_sync(&nvdev->subchan_work))
  804. nvdev->num_chn = 1;
  805. netvsc_xdp_set(ndev, NULL, NULL, nvdev);
  806. /* If device was up (receiving) then shutdown */
  807. if (netif_running(ndev)) {
  808. netvsc_tx_disable(nvdev, ndev);
  809. ret = rndis_filter_close(nvdev);
  810. if (ret) {
  811. netdev_err(ndev,
  812. "unable to close device (ret %d).\n", ret);
  813. return ret;
  814. }
  815. ret = netvsc_wait_until_empty(nvdev);
  816. if (ret) {
  817. netdev_err(ndev,
  818. "Ring buffer not empty after closing rndis\n");
  819. return ret;
  820. }
  821. }
  822. netif_device_detach(ndev);
  823. rndis_filter_device_remove(hdev, nvdev);
  824. return 0;
  825. }
  826. static int netvsc_attach(struct net_device *ndev,
  827. struct netvsc_device_info *dev_info)
  828. {
  829. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  830. struct hv_device *hdev = ndev_ctx->device_ctx;
  831. struct netvsc_device *nvdev;
  832. struct rndis_device *rdev;
  833. struct bpf_prog *prog;
  834. int ret = 0;
  835. nvdev = rndis_filter_device_add(hdev, dev_info);
  836. if (IS_ERR(nvdev))
  837. return PTR_ERR(nvdev);
  838. if (nvdev->num_chn > 1) {
  839. ret = rndis_set_subchannel(ndev, nvdev, dev_info);
  840. /* if unavailable, just proceed with one queue */
  841. if (ret) {
  842. nvdev->max_chn = 1;
  843. nvdev->num_chn = 1;
  844. }
  845. }
  846. prog = dev_info->bprog;
  847. if (prog) {
  848. bpf_prog_inc(prog);
  849. ret = netvsc_xdp_set(ndev, prog, NULL, nvdev);
  850. if (ret) {
  851. bpf_prog_put(prog);
  852. goto err1;
  853. }
  854. }
  855. /* In any case device is now ready */
  856. nvdev->tx_disable = false;
  857. netif_device_attach(ndev);
  858. /* Note: enable and attach happen when sub-channels setup */
  859. netif_carrier_off(ndev);
  860. if (netif_running(ndev)) {
  861. ret = rndis_filter_open(nvdev);
  862. if (ret)
  863. goto err2;
  864. rdev = nvdev->extension;
  865. if (!rdev->link_state)
  866. netif_carrier_on(ndev);
  867. }
  868. return 0;
  869. err2:
  870. netif_device_detach(ndev);
  871. err1:
  872. rndis_filter_device_remove(hdev, nvdev);
  873. return ret;
  874. }
  875. static int netvsc_set_channels(struct net_device *net,
  876. struct ethtool_channels *channels)
  877. {
  878. struct net_device_context *net_device_ctx = netdev_priv(net);
  879. struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
  880. unsigned int orig, count = channels->combined_count;
  881. struct netvsc_device_info *device_info;
  882. int ret;
  883. /* We do not support separate count for rx, tx, or other */
  884. if (count == 0 ||
  885. channels->rx_count || channels->tx_count || channels->other_count)
  886. return -EINVAL;
  887. if (!nvdev || nvdev->destroy)
  888. return -ENODEV;
  889. if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5)
  890. return -EINVAL;
  891. if (count > nvdev->max_chn)
  892. return -EINVAL;
  893. orig = nvdev->num_chn;
  894. device_info = netvsc_devinfo_get(nvdev);
  895. if (!device_info)
  896. return -ENOMEM;
  897. device_info->num_chn = count;
  898. ret = netvsc_detach(net, nvdev);
  899. if (ret)
  900. goto out;
  901. ret = netvsc_attach(net, device_info);
  902. if (ret) {
  903. device_info->num_chn = orig;
  904. if (netvsc_attach(net, device_info))
  905. netdev_err(net, "restoring channel setting failed\n");
  906. }
  907. out:
  908. netvsc_devinfo_put(device_info);
  909. return ret;
  910. }
  911. static void netvsc_init_settings(struct net_device *dev)
  912. {
  913. struct net_device_context *ndc = netdev_priv(dev);
  914. ndc->l4_hash = HV_DEFAULT_L4HASH;
  915. ndc->speed = SPEED_UNKNOWN;
  916. ndc->duplex = DUPLEX_FULL;
  917. dev->features = NETIF_F_LRO;
  918. }
  919. static int netvsc_get_link_ksettings(struct net_device *dev,
  920. struct ethtool_link_ksettings *cmd)
  921. {
  922. struct net_device_context *ndc = netdev_priv(dev);
  923. struct net_device *vf_netdev;
  924. vf_netdev = rtnl_dereference(ndc->vf_netdev);
  925. if (vf_netdev)
  926. return __ethtool_get_link_ksettings(vf_netdev, cmd);
  927. cmd->base.speed = ndc->speed;
  928. cmd->base.duplex = ndc->duplex;
  929. cmd->base.port = PORT_OTHER;
  930. return 0;
  931. }
  932. static int netvsc_set_link_ksettings(struct net_device *dev,
  933. const struct ethtool_link_ksettings *cmd)
  934. {
  935. struct net_device_context *ndc = netdev_priv(dev);
  936. struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
  937. if (vf_netdev) {
  938. if (!vf_netdev->ethtool_ops->set_link_ksettings)
  939. return -EOPNOTSUPP;
  940. return vf_netdev->ethtool_ops->set_link_ksettings(vf_netdev,
  941. cmd);
  942. }
  943. return ethtool_virtdev_set_link_ksettings(dev, cmd,
  944. &ndc->speed, &ndc->duplex);
  945. }
  946. static int netvsc_change_mtu(struct net_device *ndev, int mtu)
  947. {
  948. struct net_device_context *ndevctx = netdev_priv(ndev);
  949. struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
  950. struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  951. int orig_mtu = ndev->mtu;
  952. struct netvsc_device_info *device_info;
  953. int ret = 0;
  954. if (!nvdev || nvdev->destroy)
  955. return -ENODEV;
  956. device_info = netvsc_devinfo_get(nvdev);
  957. if (!device_info)
  958. return -ENOMEM;
  959. /* Change MTU of underlying VF netdev first. */
  960. if (vf_netdev) {
  961. ret = dev_set_mtu(vf_netdev, mtu);
  962. if (ret)
  963. goto out;
  964. }
  965. ret = netvsc_detach(ndev, nvdev);
  966. if (ret)
  967. goto rollback_vf;
  968. WRITE_ONCE(ndev->mtu, mtu);
  969. ret = netvsc_attach(ndev, device_info);
  970. if (!ret)
  971. goto out;
  972. /* Attempt rollback to original MTU */
  973. WRITE_ONCE(ndev->mtu, orig_mtu);
  974. if (netvsc_attach(ndev, device_info))
  975. netdev_err(ndev, "restoring mtu failed\n");
  976. rollback_vf:
  977. if (vf_netdev)
  978. dev_set_mtu(vf_netdev, orig_mtu);
  979. out:
  980. netvsc_devinfo_put(device_info);
  981. return ret;
  982. }
  983. static void netvsc_get_vf_stats(struct net_device *net,
  984. struct netvsc_vf_pcpu_stats *tot)
  985. {
  986. struct net_device_context *ndev_ctx = netdev_priv(net);
  987. int i;
  988. memset(tot, 0, sizeof(*tot));
  989. for_each_possible_cpu(i) {
  990. const struct netvsc_vf_pcpu_stats *stats
  991. = per_cpu_ptr(ndev_ctx->vf_stats, i);
  992. u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
  993. unsigned int start;
  994. do {
  995. start = u64_stats_fetch_begin(&stats->syncp);
  996. rx_packets = stats->rx_packets;
  997. tx_packets = stats->tx_packets;
  998. rx_bytes = stats->rx_bytes;
  999. tx_bytes = stats->tx_bytes;
  1000. } while (u64_stats_fetch_retry(&stats->syncp, start));
  1001. tot->rx_packets += rx_packets;
  1002. tot->tx_packets += tx_packets;
  1003. tot->rx_bytes += rx_bytes;
  1004. tot->tx_bytes += tx_bytes;
  1005. tot->tx_dropped += stats->tx_dropped;
  1006. }
  1007. }
  1008. static void netvsc_get_pcpu_stats(struct net_device *net,
  1009. struct netvsc_ethtool_pcpu_stats *pcpu_tot)
  1010. {
  1011. struct net_device_context *ndev_ctx = netdev_priv(net);
  1012. struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
  1013. int i;
  1014. /* fetch percpu stats of vf */
  1015. for_each_possible_cpu(i) {
  1016. const struct netvsc_vf_pcpu_stats *stats =
  1017. per_cpu_ptr(ndev_ctx->vf_stats, i);
  1018. struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
  1019. unsigned int start;
  1020. do {
  1021. start = u64_stats_fetch_begin(&stats->syncp);
  1022. this_tot->vf_rx_packets = stats->rx_packets;
  1023. this_tot->vf_tx_packets = stats->tx_packets;
  1024. this_tot->vf_rx_bytes = stats->rx_bytes;
  1025. this_tot->vf_tx_bytes = stats->tx_bytes;
  1026. } while (u64_stats_fetch_retry(&stats->syncp, start));
  1027. this_tot->rx_packets = this_tot->vf_rx_packets;
  1028. this_tot->tx_packets = this_tot->vf_tx_packets;
  1029. this_tot->rx_bytes = this_tot->vf_rx_bytes;
  1030. this_tot->tx_bytes = this_tot->vf_tx_bytes;
  1031. }
  1032. /* fetch percpu stats of netvsc */
  1033. for (i = 0; i < nvdev->num_chn; i++) {
  1034. const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
  1035. const struct netvsc_stats_tx *tx_stats;
  1036. const struct netvsc_stats_rx *rx_stats;
  1037. struct netvsc_ethtool_pcpu_stats *this_tot =
  1038. &pcpu_tot[nvchan->channel->target_cpu];
  1039. u64 packets, bytes;
  1040. unsigned int start;
  1041. tx_stats = &nvchan->tx_stats;
  1042. do {
  1043. start = u64_stats_fetch_begin(&tx_stats->syncp);
  1044. packets = tx_stats->packets;
  1045. bytes = tx_stats->bytes;
  1046. } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
  1047. this_tot->tx_bytes += bytes;
  1048. this_tot->tx_packets += packets;
  1049. rx_stats = &nvchan->rx_stats;
  1050. do {
  1051. start = u64_stats_fetch_begin(&rx_stats->syncp);
  1052. packets = rx_stats->packets;
  1053. bytes = rx_stats->bytes;
  1054. } while (u64_stats_fetch_retry(&rx_stats->syncp, start));
  1055. this_tot->rx_bytes += bytes;
  1056. this_tot->rx_packets += packets;
  1057. }
  1058. }
  1059. static void netvsc_get_stats64(struct net_device *net,
  1060. struct rtnl_link_stats64 *t)
  1061. {
  1062. struct net_device_context *ndev_ctx = netdev_priv(net);
  1063. struct netvsc_device *nvdev;
  1064. struct netvsc_vf_pcpu_stats vf_tot;
  1065. int i;
  1066. rcu_read_lock();
  1067. nvdev = rcu_dereference(ndev_ctx->nvdev);
  1068. if (!nvdev)
  1069. goto out;
  1070. netdev_stats_to_stats64(t, &net->stats);
  1071. netvsc_get_vf_stats(net, &vf_tot);
  1072. t->rx_packets += vf_tot.rx_packets;
  1073. t->tx_packets += vf_tot.tx_packets;
  1074. t->rx_bytes += vf_tot.rx_bytes;
  1075. t->tx_bytes += vf_tot.tx_bytes;
  1076. t->tx_dropped += vf_tot.tx_dropped;
  1077. for (i = 0; i < nvdev->num_chn; i++) {
  1078. const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
  1079. const struct netvsc_stats_tx *tx_stats;
  1080. const struct netvsc_stats_rx *rx_stats;
  1081. u64 packets, bytes, multicast;
  1082. unsigned int start;
  1083. tx_stats = &nvchan->tx_stats;
  1084. do {
  1085. start = u64_stats_fetch_begin(&tx_stats->syncp);
  1086. packets = tx_stats->packets;
  1087. bytes = tx_stats->bytes;
  1088. } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
  1089. t->tx_bytes += bytes;
  1090. t->tx_packets += packets;
  1091. rx_stats = &nvchan->rx_stats;
  1092. do {
  1093. start = u64_stats_fetch_begin(&rx_stats->syncp);
  1094. packets = rx_stats->packets;
  1095. bytes = rx_stats->bytes;
  1096. multicast = rx_stats->multicast + rx_stats->broadcast;
  1097. } while (u64_stats_fetch_retry(&rx_stats->syncp, start));
  1098. t->rx_bytes += bytes;
  1099. t->rx_packets += packets;
  1100. t->multicast += multicast;
  1101. }
  1102. out:
  1103. rcu_read_unlock();
  1104. }
  1105. static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
  1106. {
  1107. struct net_device_context *ndc = netdev_priv(ndev);
  1108. struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
  1109. struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
  1110. struct sockaddr *addr = p;
  1111. int err;
  1112. err = eth_prepare_mac_addr_change(ndev, p);
  1113. if (err)
  1114. return err;
  1115. if (!nvdev)
  1116. return -ENODEV;
  1117. if (vf_netdev) {
  1118. err = dev_set_mac_address(vf_netdev, addr, NULL);
  1119. if (err)
  1120. return err;
  1121. }
  1122. err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
  1123. if (!err) {
  1124. eth_commit_mac_addr_change(ndev, p);
  1125. } else if (vf_netdev) {
  1126. /* rollback change on VF */
  1127. memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
  1128. dev_set_mac_address(vf_netdev, addr, NULL);
  1129. }
  1130. return err;
  1131. }
  1132. static const struct {
  1133. char name[ETH_GSTRING_LEN];
  1134. u16 offset;
  1135. } netvsc_stats[] = {
  1136. { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) },
  1137. { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) },
  1138. { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) },
  1139. { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) },
  1140. { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
  1141. { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
  1142. { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
  1143. { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
  1144. { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
  1145. { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
  1146. { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) },
  1147. }, pcpu_stats[] = {
  1148. { "cpu%u_rx_packets",
  1149. offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
  1150. { "cpu%u_rx_bytes",
  1151. offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
  1152. { "cpu%u_tx_packets",
  1153. offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
  1154. { "cpu%u_tx_bytes",
  1155. offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
  1156. { "cpu%u_vf_rx_packets",
  1157. offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
  1158. { "cpu%u_vf_rx_bytes",
  1159. offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
  1160. { "cpu%u_vf_tx_packets",
  1161. offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
  1162. { "cpu%u_vf_tx_bytes",
  1163. offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
  1164. }, vf_stats[] = {
  1165. { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
  1166. { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
  1167. { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
  1168. { "vf_tx_bytes", offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
  1169. { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
  1170. };
  1171. #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
  1172. #define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats)
  1173. /* statistics per queue (rx/tx packets/bytes) */
  1174. #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
  1175. /* 8 statistics per queue (rx/tx packets/bytes, XDP actions) */
  1176. #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 8)
  1177. static int netvsc_get_sset_count(struct net_device *dev, int string_set)
  1178. {
  1179. struct net_device_context *ndc = netdev_priv(dev);
  1180. struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
  1181. if (!nvdev)
  1182. return -ENODEV;
  1183. switch (string_set) {
  1184. case ETH_SS_STATS:
  1185. return NETVSC_GLOBAL_STATS_LEN
  1186. + NETVSC_VF_STATS_LEN
  1187. + NETVSC_QUEUE_STATS_LEN(nvdev)
  1188. + NETVSC_PCPU_STATS_LEN;
  1189. default:
  1190. return -EINVAL;
  1191. }
  1192. }
  1193. static void netvsc_get_ethtool_stats(struct net_device *dev,
  1194. struct ethtool_stats *stats, u64 *data)
  1195. {
  1196. struct net_device_context *ndc = netdev_priv(dev);
  1197. struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
  1198. const void *nds = &ndc->eth_stats;
  1199. const struct netvsc_stats_tx *tx_stats;
  1200. const struct netvsc_stats_rx *rx_stats;
  1201. struct netvsc_vf_pcpu_stats sum;
  1202. struct netvsc_ethtool_pcpu_stats *pcpu_sum;
  1203. unsigned int start;
  1204. u64 packets, bytes;
  1205. u64 xdp_drop;
  1206. u64 xdp_redirect;
  1207. u64 xdp_tx;
  1208. u64 xdp_xmit;
  1209. int i, j, cpu;
  1210. if (!nvdev)
  1211. return;
  1212. for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
  1213. data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
  1214. netvsc_get_vf_stats(dev, &sum);
  1215. for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
  1216. data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
  1217. for (j = 0; j < nvdev->num_chn; j++) {
  1218. tx_stats = &nvdev->chan_table[j].tx_stats;
  1219. do {
  1220. start = u64_stats_fetch_begin(&tx_stats->syncp);
  1221. packets = tx_stats->packets;
  1222. bytes = tx_stats->bytes;
  1223. xdp_xmit = tx_stats->xdp_xmit;
  1224. } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
  1225. data[i++] = packets;
  1226. data[i++] = bytes;
  1227. data[i++] = xdp_xmit;
  1228. rx_stats = &nvdev->chan_table[j].rx_stats;
  1229. do {
  1230. start = u64_stats_fetch_begin(&rx_stats->syncp);
  1231. packets = rx_stats->packets;
  1232. bytes = rx_stats->bytes;
  1233. xdp_drop = rx_stats->xdp_drop;
  1234. xdp_redirect = rx_stats->xdp_redirect;
  1235. xdp_tx = rx_stats->xdp_tx;
  1236. } while (u64_stats_fetch_retry(&rx_stats->syncp, start));
  1237. data[i++] = packets;
  1238. data[i++] = bytes;
  1239. data[i++] = xdp_drop;
  1240. data[i++] = xdp_redirect;
  1241. data[i++] = xdp_tx;
  1242. }
  1243. pcpu_sum = kvmalloc_array(num_possible_cpus(),
  1244. sizeof(struct netvsc_ethtool_pcpu_stats),
  1245. GFP_KERNEL);
  1246. if (!pcpu_sum)
  1247. return;
  1248. netvsc_get_pcpu_stats(dev, pcpu_sum);
  1249. for_each_present_cpu(cpu) {
  1250. struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
  1251. for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
  1252. data[i++] = *(u64 *)((void *)this_sum
  1253. + pcpu_stats[j].offset);
  1254. }
  1255. kvfree(pcpu_sum);
  1256. }
  1257. static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  1258. {
  1259. struct net_device_context *ndc = netdev_priv(dev);
  1260. struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
  1261. u8 *p = data;
  1262. int i, cpu;
  1263. if (!nvdev)
  1264. return;
  1265. switch (stringset) {
  1266. case ETH_SS_STATS:
  1267. for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
  1268. ethtool_puts(&p, netvsc_stats[i].name);
  1269. for (i = 0; i < ARRAY_SIZE(vf_stats); i++)
  1270. ethtool_puts(&p, vf_stats[i].name);
  1271. for (i = 0; i < nvdev->num_chn; i++) {
  1272. ethtool_sprintf(&p, "tx_queue_%u_packets", i);
  1273. ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
  1274. ethtool_sprintf(&p, "tx_queue_%u_xdp_xmit", i);
  1275. ethtool_sprintf(&p, "rx_queue_%u_packets", i);
  1276. ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
  1277. ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
  1278. ethtool_sprintf(&p, "rx_queue_%u_xdp_redirect", i);
  1279. ethtool_sprintf(&p, "rx_queue_%u_xdp_tx", i);
  1280. }
  1281. for_each_present_cpu(cpu) {
  1282. for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++)
  1283. ethtool_sprintf(&p, pcpu_stats[i].name, cpu);
  1284. }
  1285. break;
  1286. }
  1287. }
  1288. static int
  1289. netvsc_get_rss_hash_opts(struct net_device_context *ndc,
  1290. struct ethtool_rxnfc *info)
  1291. {
  1292. const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3;
  1293. info->data = RXH_IP_SRC | RXH_IP_DST;
  1294. switch (info->flow_type) {
  1295. case TCP_V4_FLOW:
  1296. if (ndc->l4_hash & HV_TCP4_L4HASH)
  1297. info->data |= l4_flag;
  1298. break;
  1299. case TCP_V6_FLOW:
  1300. if (ndc->l4_hash & HV_TCP6_L4HASH)
  1301. info->data |= l4_flag;
  1302. break;
  1303. case UDP_V4_FLOW:
  1304. if (ndc->l4_hash & HV_UDP4_L4HASH)
  1305. info->data |= l4_flag;
  1306. break;
  1307. case UDP_V6_FLOW:
  1308. if (ndc->l4_hash & HV_UDP6_L4HASH)
  1309. info->data |= l4_flag;
  1310. break;
  1311. case IPV4_FLOW:
  1312. case IPV6_FLOW:
  1313. break;
  1314. default:
  1315. info->data = 0;
  1316. break;
  1317. }
  1318. return 0;
  1319. }
  1320. static int
  1321. netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
  1322. u32 *rules)
  1323. {
  1324. struct net_device_context *ndc = netdev_priv(dev);
  1325. struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
  1326. if (!nvdev)
  1327. return -ENODEV;
  1328. switch (info->cmd) {
  1329. case ETHTOOL_GRXRINGS:
  1330. info->data = nvdev->num_chn;
  1331. return 0;
  1332. case ETHTOOL_GRXFH:
  1333. return netvsc_get_rss_hash_opts(ndc, info);
  1334. }
  1335. return -EOPNOTSUPP;
  1336. }
  1337. static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
  1338. struct ethtool_rxnfc *info)
  1339. {
  1340. if (info->data == (RXH_IP_SRC | RXH_IP_DST |
  1341. RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
  1342. switch (info->flow_type) {
  1343. case TCP_V4_FLOW:
  1344. ndc->l4_hash |= HV_TCP4_L4HASH;
  1345. break;
  1346. case TCP_V6_FLOW:
  1347. ndc->l4_hash |= HV_TCP6_L4HASH;
  1348. break;
  1349. case UDP_V4_FLOW:
  1350. ndc->l4_hash |= HV_UDP4_L4HASH;
  1351. break;
  1352. case UDP_V6_FLOW:
  1353. ndc->l4_hash |= HV_UDP6_L4HASH;
  1354. break;
  1355. default:
  1356. return -EOPNOTSUPP;
  1357. }
  1358. return 0;
  1359. }
  1360. if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
  1361. switch (info->flow_type) {
  1362. case TCP_V4_FLOW:
  1363. ndc->l4_hash &= ~HV_TCP4_L4HASH;
  1364. break;
  1365. case TCP_V6_FLOW:
  1366. ndc->l4_hash &= ~HV_TCP6_L4HASH;
  1367. break;
  1368. case UDP_V4_FLOW:
  1369. ndc->l4_hash &= ~HV_UDP4_L4HASH;
  1370. break;
  1371. case UDP_V6_FLOW:
  1372. ndc->l4_hash &= ~HV_UDP6_L4HASH;
  1373. break;
  1374. default:
  1375. return -EOPNOTSUPP;
  1376. }
  1377. return 0;
  1378. }
  1379. return -EOPNOTSUPP;
  1380. }
  1381. static int
  1382. netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
  1383. {
  1384. struct net_device_context *ndc = netdev_priv(ndev);
  1385. if (info->cmd == ETHTOOL_SRXFH)
  1386. return netvsc_set_rss_hash_opts(ndc, info);
  1387. return -EOPNOTSUPP;
  1388. }
  1389. static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
  1390. {
  1391. return NETVSC_HASH_KEYLEN;
  1392. }
  1393. static u32 netvsc_rss_indir_size(struct net_device *dev)
  1394. {
  1395. struct net_device_context *ndc = netdev_priv(dev);
  1396. return ndc->rx_table_sz;
  1397. }
  1398. static int netvsc_get_rxfh(struct net_device *dev,
  1399. struct ethtool_rxfh_param *rxfh)
  1400. {
  1401. struct net_device_context *ndc = netdev_priv(dev);
  1402. struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
  1403. struct rndis_device *rndis_dev;
  1404. int i;
  1405. if (!ndev)
  1406. return -ENODEV;
  1407. rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
  1408. rndis_dev = ndev->extension;
  1409. if (rxfh->indir) {
  1410. for (i = 0; i < ndc->rx_table_sz; i++)
  1411. rxfh->indir[i] = ndc->rx_table[i];
  1412. }
  1413. if (rxfh->key)
  1414. memcpy(rxfh->key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN);
  1415. return 0;
  1416. }
  1417. static int netvsc_set_rxfh(struct net_device *dev,
  1418. struct ethtool_rxfh_param *rxfh,
  1419. struct netlink_ext_ack *extack)
  1420. {
  1421. struct net_device_context *ndc = netdev_priv(dev);
  1422. struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
  1423. struct rndis_device *rndis_dev;
  1424. u8 *key = rxfh->key;
  1425. int i;
  1426. if (!ndev)
  1427. return -ENODEV;
  1428. if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
  1429. rxfh->hfunc != ETH_RSS_HASH_TOP)
  1430. return -EOPNOTSUPP;
  1431. rndis_dev = ndev->extension;
  1432. if (rxfh->indir) {
  1433. for (i = 0; i < ndc->rx_table_sz; i++)
  1434. if (rxfh->indir[i] >= ndev->num_chn)
  1435. return -EINVAL;
  1436. for (i = 0; i < ndc->rx_table_sz; i++)
  1437. ndc->rx_table[i] = rxfh->indir[i];
  1438. }
  1439. if (!key) {
  1440. if (!rxfh->indir)
  1441. return 0;
  1442. key = rndis_dev->rss_key;
  1443. }
  1444. return rndis_filter_set_rss_param(rndis_dev, key);
  1445. }
  1446. /* Hyper-V RNDIS protocol does not have ring in the HW sense.
  1447. * It does have pre-allocated receive area which is divided into sections.
  1448. */
  1449. static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
  1450. struct ethtool_ringparam *ring)
  1451. {
  1452. u32 max_buf_size;
  1453. ring->rx_pending = nvdev->recv_section_cnt;
  1454. ring->tx_pending = nvdev->send_section_cnt;
  1455. if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
  1456. max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
  1457. else
  1458. max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
  1459. ring->rx_max_pending = max_buf_size / nvdev->recv_section_size;
  1460. ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE
  1461. / nvdev->send_section_size;
  1462. }
  1463. static void netvsc_get_ringparam(struct net_device *ndev,
  1464. struct ethtool_ringparam *ring,
  1465. struct kernel_ethtool_ringparam *kernel_ring,
  1466. struct netlink_ext_ack *extack)
  1467. {
  1468. struct net_device_context *ndevctx = netdev_priv(ndev);
  1469. struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  1470. if (!nvdev)
  1471. return;
  1472. __netvsc_get_ringparam(nvdev, ring);
  1473. }
  1474. static int netvsc_set_ringparam(struct net_device *ndev,
  1475. struct ethtool_ringparam *ring,
  1476. struct kernel_ethtool_ringparam *kernel_ring,
  1477. struct netlink_ext_ack *extack)
  1478. {
  1479. struct net_device_context *ndevctx = netdev_priv(ndev);
  1480. struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  1481. struct netvsc_device_info *device_info;
  1482. struct ethtool_ringparam orig;
  1483. u32 new_tx, new_rx;
  1484. int ret = 0;
  1485. if (!nvdev || nvdev->destroy)
  1486. return -ENODEV;
  1487. memset(&orig, 0, sizeof(orig));
  1488. __netvsc_get_ringparam(nvdev, &orig);
  1489. new_tx = clamp_t(u32, ring->tx_pending,
  1490. NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending);
  1491. new_rx = clamp_t(u32, ring->rx_pending,
  1492. NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending);
  1493. if (new_tx == orig.tx_pending &&
  1494. new_rx == orig.rx_pending)
  1495. return 0; /* no change */
  1496. device_info = netvsc_devinfo_get(nvdev);
  1497. if (!device_info)
  1498. return -ENOMEM;
  1499. device_info->send_sections = new_tx;
  1500. device_info->recv_sections = new_rx;
  1501. ret = netvsc_detach(ndev, nvdev);
  1502. if (ret)
  1503. goto out;
  1504. ret = netvsc_attach(ndev, device_info);
  1505. if (ret) {
  1506. device_info->send_sections = orig.tx_pending;
  1507. device_info->recv_sections = orig.rx_pending;
  1508. if (netvsc_attach(ndev, device_info))
  1509. netdev_err(ndev, "restoring ringparam failed");
  1510. }
  1511. out:
  1512. netvsc_devinfo_put(device_info);
  1513. return ret;
  1514. }
  1515. static netdev_features_t netvsc_fix_features(struct net_device *ndev,
  1516. netdev_features_t features)
  1517. {
  1518. struct net_device_context *ndevctx = netdev_priv(ndev);
  1519. struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  1520. if (!nvdev || nvdev->destroy)
  1521. return features;
  1522. if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) {
  1523. features ^= NETIF_F_LRO;
  1524. netdev_info(ndev, "Skip LRO - unsupported with XDP\n");
  1525. }
  1526. return features;
  1527. }
  1528. static int netvsc_set_features(struct net_device *ndev,
  1529. netdev_features_t features)
  1530. {
  1531. netdev_features_t change = features ^ ndev->features;
  1532. struct net_device_context *ndevctx = netdev_priv(ndev);
  1533. struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  1534. struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
  1535. struct ndis_offload_params offloads;
  1536. int ret = 0;
  1537. if (!nvdev || nvdev->destroy)
  1538. return -ENODEV;
  1539. if (!(change & NETIF_F_LRO))
  1540. goto syncvf;
  1541. memset(&offloads, 0, sizeof(struct ndis_offload_params));
  1542. if (features & NETIF_F_LRO) {
  1543. offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
  1544. offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
  1545. } else {
  1546. offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
  1547. offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
  1548. }
  1549. ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
  1550. if (ret) {
  1551. features ^= NETIF_F_LRO;
  1552. ndev->features = features;
  1553. }
  1554. syncvf:
  1555. if (!vf_netdev)
  1556. return ret;
  1557. vf_netdev->wanted_features = features;
  1558. netdev_update_features(vf_netdev);
  1559. return ret;
  1560. }
  1561. static int netvsc_get_regs_len(struct net_device *netdev)
  1562. {
  1563. return VRSS_SEND_TAB_SIZE * sizeof(u32);
  1564. }
  1565. static void netvsc_get_regs(struct net_device *netdev,
  1566. struct ethtool_regs *regs, void *p)
  1567. {
  1568. struct net_device_context *ndc = netdev_priv(netdev);
  1569. u32 *regs_buff = p;
  1570. /* increase the version, if buffer format is changed. */
  1571. regs->version = 1;
  1572. memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32));
  1573. }
  1574. static u32 netvsc_get_msglevel(struct net_device *ndev)
  1575. {
  1576. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1577. return ndev_ctx->msg_enable;
  1578. }
  1579. static void netvsc_set_msglevel(struct net_device *ndev, u32 val)
  1580. {
  1581. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1582. ndev_ctx->msg_enable = val;
  1583. }
  1584. static const struct ethtool_ops ethtool_ops = {
  1585. .get_drvinfo = netvsc_get_drvinfo,
  1586. .get_regs_len = netvsc_get_regs_len,
  1587. .get_regs = netvsc_get_regs,
  1588. .get_msglevel = netvsc_get_msglevel,
  1589. .set_msglevel = netvsc_set_msglevel,
  1590. .get_link = ethtool_op_get_link,
  1591. .get_ethtool_stats = netvsc_get_ethtool_stats,
  1592. .get_sset_count = netvsc_get_sset_count,
  1593. .get_strings = netvsc_get_strings,
  1594. .get_channels = netvsc_get_channels,
  1595. .set_channels = netvsc_set_channels,
  1596. .get_ts_info = ethtool_op_get_ts_info,
  1597. .get_rxnfc = netvsc_get_rxnfc,
  1598. .set_rxnfc = netvsc_set_rxnfc,
  1599. .get_rxfh_key_size = netvsc_get_rxfh_key_size,
  1600. .get_rxfh_indir_size = netvsc_rss_indir_size,
  1601. .get_rxfh = netvsc_get_rxfh,
  1602. .set_rxfh = netvsc_set_rxfh,
  1603. .get_link_ksettings = netvsc_get_link_ksettings,
  1604. .set_link_ksettings = netvsc_set_link_ksettings,
  1605. .get_ringparam = netvsc_get_ringparam,
  1606. .set_ringparam = netvsc_set_ringparam,
  1607. };
  1608. static const struct net_device_ops device_ops = {
  1609. .ndo_open = netvsc_open,
  1610. .ndo_stop = netvsc_close,
  1611. .ndo_start_xmit = netvsc_start_xmit,
  1612. .ndo_change_rx_flags = netvsc_change_rx_flags,
  1613. .ndo_set_rx_mode = netvsc_set_rx_mode,
  1614. .ndo_fix_features = netvsc_fix_features,
  1615. .ndo_set_features = netvsc_set_features,
  1616. .ndo_change_mtu = netvsc_change_mtu,
  1617. .ndo_validate_addr = eth_validate_addr,
  1618. .ndo_set_mac_address = netvsc_set_mac_addr,
  1619. .ndo_select_queue = netvsc_select_queue,
  1620. .ndo_get_stats64 = netvsc_get_stats64,
  1621. .ndo_bpf = netvsc_bpf,
  1622. .ndo_xdp_xmit = netvsc_ndoxdp_xmit,
  1623. };
  1624. /*
  1625. * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
  1626. * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
  1627. * present send GARP packet to network peers with netif_notify_peers().
  1628. */
  1629. static void netvsc_link_change(struct work_struct *w)
  1630. {
  1631. struct net_device_context *ndev_ctx =
  1632. container_of(w, struct net_device_context, dwork.work);
  1633. struct hv_device *device_obj = ndev_ctx->device_ctx;
  1634. struct net_device *net = hv_get_drvdata(device_obj);
  1635. unsigned long flags, next_reconfig, delay;
  1636. struct netvsc_reconfig *event = NULL;
  1637. struct netvsc_device *net_device;
  1638. struct rndis_device *rdev;
  1639. bool reschedule = false;
  1640. /* if changes are happening, comeback later */
  1641. if (!rtnl_trylock()) {
  1642. schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
  1643. return;
  1644. }
  1645. net_device = rtnl_dereference(ndev_ctx->nvdev);
  1646. if (!net_device)
  1647. goto out_unlock;
  1648. rdev = net_device->extension;
  1649. next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
  1650. if (time_is_after_jiffies(next_reconfig)) {
  1651. /* link_watch only sends one notification with current state
  1652. * per second, avoid doing reconfig more frequently. Handle
  1653. * wrap around.
  1654. */
  1655. delay = next_reconfig - jiffies;
  1656. delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT;
  1657. schedule_delayed_work(&ndev_ctx->dwork, delay);
  1658. goto out_unlock;
  1659. }
  1660. ndev_ctx->last_reconfig = jiffies;
  1661. spin_lock_irqsave(&ndev_ctx->lock, flags);
  1662. if (!list_empty(&ndev_ctx->reconfig_events)) {
  1663. event = list_first_entry(&ndev_ctx->reconfig_events,
  1664. struct netvsc_reconfig, list);
  1665. list_del(&event->list);
  1666. reschedule = !list_empty(&ndev_ctx->reconfig_events);
  1667. }
  1668. spin_unlock_irqrestore(&ndev_ctx->lock, flags);
  1669. if (!event)
  1670. goto out_unlock;
  1671. switch (event->event) {
  1672. /* Only the following events are possible due to the check in
  1673. * netvsc_linkstatus_callback()
  1674. */
  1675. case RNDIS_STATUS_MEDIA_CONNECT:
  1676. if (rdev->link_state) {
  1677. rdev->link_state = false;
  1678. netif_carrier_on(net);
  1679. netvsc_tx_enable(net_device, net);
  1680. } else {
  1681. __netdev_notify_peers(net);
  1682. }
  1683. kfree(event);
  1684. break;
  1685. case RNDIS_STATUS_MEDIA_DISCONNECT:
  1686. if (!rdev->link_state) {
  1687. rdev->link_state = true;
  1688. netif_carrier_off(net);
  1689. netvsc_tx_disable(net_device, net);
  1690. }
  1691. kfree(event);
  1692. break;
  1693. case RNDIS_STATUS_NETWORK_CHANGE:
  1694. /* Only makes sense if carrier is present */
  1695. if (!rdev->link_state) {
  1696. rdev->link_state = true;
  1697. netif_carrier_off(net);
  1698. netvsc_tx_disable(net_device, net);
  1699. event->event = RNDIS_STATUS_MEDIA_CONNECT;
  1700. spin_lock_irqsave(&ndev_ctx->lock, flags);
  1701. list_add(&event->list, &ndev_ctx->reconfig_events);
  1702. spin_unlock_irqrestore(&ndev_ctx->lock, flags);
  1703. reschedule = true;
  1704. }
  1705. break;
  1706. }
  1707. rtnl_unlock();
  1708. /* link_watch only sends one notification with current state per
  1709. * second, handle next reconfig event in 2 seconds.
  1710. */
  1711. if (reschedule)
  1712. schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
  1713. return;
  1714. out_unlock:
  1715. rtnl_unlock();
  1716. }
  1717. static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
  1718. {
  1719. struct net_device_context *net_device_ctx;
  1720. struct net_device *dev;
  1721. dev = netdev_master_upper_dev_get(vf_netdev);
  1722. if (!dev || dev->netdev_ops != &device_ops)
  1723. return NULL; /* not a netvsc device */
  1724. net_device_ctx = netdev_priv(dev);
  1725. if (!rtnl_dereference(net_device_ctx->nvdev))
  1726. return NULL; /* device is removed */
  1727. return dev;
  1728. }
  1729. /* Called when VF is injecting data into network stack.
  1730. * Change the associated network device from VF to netvsc.
  1731. * note: already called with rcu_read_lock
  1732. */
  1733. static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
  1734. {
  1735. struct sk_buff *skb = *pskb;
  1736. struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
  1737. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1738. struct netvsc_vf_pcpu_stats *pcpu_stats
  1739. = this_cpu_ptr(ndev_ctx->vf_stats);
  1740. skb = skb_share_check(skb, GFP_ATOMIC);
  1741. if (unlikely(!skb))
  1742. return RX_HANDLER_CONSUMED;
  1743. *pskb = skb;
  1744. skb->dev = ndev;
  1745. u64_stats_update_begin(&pcpu_stats->syncp);
  1746. pcpu_stats->rx_packets++;
  1747. pcpu_stats->rx_bytes += skb->len;
  1748. u64_stats_update_end(&pcpu_stats->syncp);
  1749. return RX_HANDLER_ANOTHER;
  1750. }
  1751. static int netvsc_vf_join(struct net_device *vf_netdev,
  1752. struct net_device *ndev, int context)
  1753. {
  1754. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1755. int ret;
  1756. ret = netdev_rx_handler_register(vf_netdev,
  1757. netvsc_vf_handle_frame, ndev);
  1758. if (ret != 0) {
  1759. netdev_err(vf_netdev,
  1760. "can not register netvsc VF receive handler (err = %d)\n",
  1761. ret);
  1762. goto rx_handler_failed;
  1763. }
  1764. ret = netdev_master_upper_dev_link(vf_netdev, ndev,
  1765. NULL, NULL, NULL);
  1766. if (ret != 0) {
  1767. netdev_err(vf_netdev,
  1768. "can not set master device %s (err = %d)\n",
  1769. ndev->name, ret);
  1770. goto upper_link_failed;
  1771. }
  1772. /* If this registration is called from probe context vf_takeover
  1773. * is taken care of later in probe itself.
  1774. */
  1775. if (context == VF_REG_IN_NOTIFIER)
  1776. schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
  1777. call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
  1778. netdev_info(vf_netdev, "joined to %s\n", ndev->name);
  1779. return 0;
  1780. upper_link_failed:
  1781. netdev_rx_handler_unregister(vf_netdev);
  1782. rx_handler_failed:
  1783. return ret;
  1784. }
  1785. static void __netvsc_vf_setup(struct net_device *ndev,
  1786. struct net_device *vf_netdev)
  1787. {
  1788. int ret;
  1789. /* Align MTU of VF with master */
  1790. ret = dev_set_mtu(vf_netdev, ndev->mtu);
  1791. if (ret)
  1792. netdev_warn(vf_netdev,
  1793. "unable to change mtu to %u\n", ndev->mtu);
  1794. /* set multicast etc flags on VF */
  1795. dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE, NULL);
  1796. /* sync address list from ndev to VF */
  1797. netif_addr_lock_bh(ndev);
  1798. dev_uc_sync(vf_netdev, ndev);
  1799. dev_mc_sync(vf_netdev, ndev);
  1800. netif_addr_unlock_bh(ndev);
  1801. if (netif_running(ndev)) {
  1802. ret = dev_open(vf_netdev, NULL);
  1803. if (ret)
  1804. netdev_warn(vf_netdev,
  1805. "unable to open: %d\n", ret);
  1806. }
  1807. }
  1808. /* Setup VF as slave of the synthetic device.
  1809. * Runs in workqueue to avoid recursion in netlink callbacks.
  1810. */
  1811. static void netvsc_vf_setup(struct work_struct *w)
  1812. {
  1813. struct net_device_context *ndev_ctx
  1814. = container_of(w, struct net_device_context, vf_takeover.work);
  1815. struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
  1816. struct net_device *vf_netdev;
  1817. if (!rtnl_trylock()) {
  1818. schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
  1819. return;
  1820. }
  1821. vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  1822. if (vf_netdev)
  1823. __netvsc_vf_setup(ndev, vf_netdev);
  1824. rtnl_unlock();
  1825. }
  1826. /* Find netvsc by VF serial number.
  1827. * The PCI hyperv controller records the serial number as the slot kobj name.
  1828. */
  1829. static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
  1830. {
  1831. struct device *parent = vf_netdev->dev.parent;
  1832. struct net_device_context *ndev_ctx;
  1833. struct net_device *ndev;
  1834. struct pci_dev *pdev;
  1835. u32 serial;
  1836. if (!parent || !dev_is_pci(parent))
  1837. return NULL; /* not a PCI device */
  1838. pdev = to_pci_dev(parent);
  1839. if (!pdev->slot) {
  1840. netdev_notice(vf_netdev, "no PCI slot information\n");
  1841. return NULL;
  1842. }
  1843. if (kstrtou32(pci_slot_name(pdev->slot), 10, &serial)) {
  1844. netdev_notice(vf_netdev, "Invalid vf serial:%s\n",
  1845. pci_slot_name(pdev->slot));
  1846. return NULL;
  1847. }
  1848. list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
  1849. if (!ndev_ctx->vf_alloc)
  1850. continue;
  1851. if (ndev_ctx->vf_serial != serial)
  1852. continue;
  1853. ndev = hv_get_drvdata(ndev_ctx->device_ctx);
  1854. if (ndev->addr_len != vf_netdev->addr_len ||
  1855. memcmp(ndev->perm_addr, vf_netdev->perm_addr,
  1856. ndev->addr_len) != 0)
  1857. continue;
  1858. return ndev;
  1859. }
  1860. /* Fallback path to check synthetic vf with help of mac addr.
  1861. * Because this function can be called before vf_netdev is
  1862. * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied
  1863. * from dev_addr, also try to match to its dev_addr.
  1864. * Note: On Hyper-V and Azure, it's not possible to set a MAC address
  1865. * on a VF that matches to the MAC of a unrelated NETVSC device.
  1866. */
  1867. list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
  1868. ndev = hv_get_drvdata(ndev_ctx->device_ctx);
  1869. if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) ||
  1870. ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr))
  1871. return ndev;
  1872. }
  1873. netdev_notice(vf_netdev,
  1874. "no netdev found for vf serial:%u\n", serial);
  1875. return NULL;
  1876. }
  1877. static int netvsc_prepare_bonding(struct net_device *vf_netdev)
  1878. {
  1879. struct net_device *ndev;
  1880. ndev = get_netvsc_byslot(vf_netdev);
  1881. if (!ndev)
  1882. return NOTIFY_DONE;
  1883. /* Set slave flag and no addrconf flag before open
  1884. * to prevent IPv6 addrconf.
  1885. */
  1886. vf_netdev->flags |= IFF_SLAVE;
  1887. vf_netdev->priv_flags |= IFF_NO_ADDRCONF;
  1888. return NOTIFY_DONE;
  1889. }
  1890. static int netvsc_register_vf(struct net_device *vf_netdev, int context)
  1891. {
  1892. struct net_device_context *net_device_ctx;
  1893. struct netvsc_device *netvsc_dev;
  1894. struct bpf_prog *prog;
  1895. struct net_device *ndev;
  1896. int ret;
  1897. if (vf_netdev->addr_len != ETH_ALEN)
  1898. return NOTIFY_DONE;
  1899. ndev = get_netvsc_byslot(vf_netdev);
  1900. if (!ndev)
  1901. return NOTIFY_DONE;
  1902. net_device_ctx = netdev_priv(ndev);
  1903. netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
  1904. if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
  1905. return NOTIFY_DONE;
  1906. /* if synthetic interface is a different namespace,
  1907. * then move the VF to that namespace; join will be
  1908. * done again in that context.
  1909. */
  1910. if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) {
  1911. ret = dev_change_net_namespace(vf_netdev,
  1912. dev_net(ndev), "eth%d");
  1913. if (ret)
  1914. netdev_err(vf_netdev,
  1915. "could not move to same namespace as %s: %d\n",
  1916. ndev->name, ret);
  1917. else
  1918. netdev_info(vf_netdev,
  1919. "VF moved to namespace with: %s\n",
  1920. ndev->name);
  1921. return NOTIFY_DONE;
  1922. }
  1923. netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
  1924. if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
  1925. return NOTIFY_DONE;
  1926. dev_hold(vf_netdev);
  1927. rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
  1928. if (ndev->needed_headroom < vf_netdev->needed_headroom)
  1929. ndev->needed_headroom = vf_netdev->needed_headroom;
  1930. vf_netdev->wanted_features = ndev->features;
  1931. netdev_update_features(vf_netdev);
  1932. prog = netvsc_xdp_get(netvsc_dev);
  1933. netvsc_vf_setxdp(vf_netdev, prog);
  1934. return NOTIFY_OK;
  1935. }
  1936. /* Change the data path when VF UP/DOWN/CHANGE are detected.
  1937. *
  1938. * Typically a UP or DOWN event is followed by a CHANGE event, so
  1939. * net_device_ctx->data_path_is_vf is used to cache the current data path
  1940. * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate
  1941. * message.
  1942. *
  1943. * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
  1944. * interface, there is only the CHANGE event and no UP or DOWN event.
  1945. */
  1946. static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
  1947. {
  1948. struct net_device_context *net_device_ctx;
  1949. struct netvsc_device *netvsc_dev;
  1950. struct net_device *ndev;
  1951. bool vf_is_up = false;
  1952. int ret;
  1953. if (event != NETDEV_GOING_DOWN)
  1954. vf_is_up = netif_running(vf_netdev);
  1955. ndev = get_netvsc_byref(vf_netdev);
  1956. if (!ndev)
  1957. return NOTIFY_DONE;
  1958. net_device_ctx = netdev_priv(ndev);
  1959. netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
  1960. if (!netvsc_dev)
  1961. return NOTIFY_DONE;
  1962. if (net_device_ctx->data_path_is_vf == vf_is_up)
  1963. return NOTIFY_OK;
  1964. if (vf_is_up && !net_device_ctx->vf_alloc) {
  1965. netdev_info(ndev, "Waiting for the VF association from host\n");
  1966. wait_for_completion(&net_device_ctx->vf_add);
  1967. }
  1968. ret = netvsc_switch_datapath(ndev, vf_is_up);
  1969. if (ret) {
  1970. netdev_err(ndev,
  1971. "Data path failed to switch %s VF: %s, err: %d\n",
  1972. vf_is_up ? "to" : "from", vf_netdev->name, ret);
  1973. return NOTIFY_DONE;
  1974. } else {
  1975. netdev_info(ndev, "Data path switched %s VF: %s\n",
  1976. vf_is_up ? "to" : "from", vf_netdev->name);
  1977. }
  1978. return NOTIFY_OK;
  1979. }
  1980. static int netvsc_unregister_vf(struct net_device *vf_netdev)
  1981. {
  1982. struct net_device *ndev;
  1983. struct net_device_context *net_device_ctx;
  1984. ndev = get_netvsc_byref(vf_netdev);
  1985. if (!ndev)
  1986. return NOTIFY_DONE;
  1987. net_device_ctx = netdev_priv(ndev);
  1988. cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
  1989. netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
  1990. netvsc_vf_setxdp(vf_netdev, NULL);
  1991. reinit_completion(&net_device_ctx->vf_add);
  1992. netdev_rx_handler_unregister(vf_netdev);
  1993. netdev_upper_dev_unlink(vf_netdev, ndev);
  1994. RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
  1995. dev_put(vf_netdev);
  1996. ndev->needed_headroom = RNDIS_AND_PPI_SIZE;
  1997. return NOTIFY_OK;
  1998. }
  1999. static int check_dev_is_matching_vf(struct net_device *event_ndev)
  2000. {
  2001. /* Skip NetVSC interfaces */
  2002. if (event_ndev->netdev_ops == &device_ops)
  2003. return -ENODEV;
  2004. /* Avoid non-Ethernet type devices */
  2005. if (event_ndev->type != ARPHRD_ETHER)
  2006. return -ENODEV;
  2007. /* Avoid Vlan dev with same MAC registering as VF */
  2008. if (is_vlan_dev(event_ndev))
  2009. return -ENODEV;
  2010. /* Avoid Bonding master dev with same MAC registering as VF */
  2011. if (netif_is_bond_master(event_ndev))
  2012. return -ENODEV;
  2013. return 0;
  2014. }
  2015. static int netvsc_probe(struct hv_device *dev,
  2016. const struct hv_vmbus_device_id *dev_id)
  2017. {
  2018. struct net_device *net = NULL, *vf_netdev;
  2019. struct net_device_context *net_device_ctx;
  2020. struct netvsc_device_info *device_info = NULL;
  2021. struct netvsc_device *nvdev;
  2022. int ret = -ENOMEM;
  2023. net = alloc_etherdev_mq(sizeof(struct net_device_context),
  2024. VRSS_CHANNEL_MAX);
  2025. if (!net)
  2026. goto no_net;
  2027. netif_carrier_off(net);
  2028. netvsc_init_settings(net);
  2029. net_device_ctx = netdev_priv(net);
  2030. net_device_ctx->device_ctx = dev;
  2031. net_device_ctx->msg_enable = netif_msg_init(debug, default_msg);
  2032. if (netif_msg_probe(net_device_ctx))
  2033. netdev_dbg(net, "netvsc msg_enable: %d\n",
  2034. net_device_ctx->msg_enable);
  2035. hv_set_drvdata(dev, net);
  2036. INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
  2037. init_completion(&net_device_ctx->vf_add);
  2038. spin_lock_init(&net_device_ctx->lock);
  2039. INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
  2040. INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
  2041. INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work);
  2042. net_device_ctx->vf_stats
  2043. = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
  2044. if (!net_device_ctx->vf_stats)
  2045. goto no_stats;
  2046. net->netdev_ops = &device_ops;
  2047. net->ethtool_ops = &ethtool_ops;
  2048. SET_NETDEV_DEV(net, &dev->device);
  2049. dma_set_min_align_mask(&dev->device, HV_HYP_PAGE_SIZE - 1);
  2050. /* We always need headroom for rndis header */
  2051. net->needed_headroom = RNDIS_AND_PPI_SIZE;
  2052. /* Initialize the number of queues to be 1, we may change it if more
  2053. * channels are offered later.
  2054. */
  2055. netif_set_real_num_tx_queues(net, 1);
  2056. netif_set_real_num_rx_queues(net, 1);
  2057. /* Notify the netvsc driver of the new device */
  2058. device_info = netvsc_devinfo_get(NULL);
  2059. if (!device_info) {
  2060. ret = -ENOMEM;
  2061. goto devinfo_failed;
  2062. }
  2063. /* We must get rtnl lock before scheduling nvdev->subchan_work,
  2064. * otherwise netvsc_subchan_work() can get rtnl lock first and wait
  2065. * all subchannels to show up, but that may not happen because
  2066. * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer()
  2067. * -> ... -> device_add() -> ... -> __device_attach() can't get
  2068. * the device lock, so all the subchannels can't be processed --
  2069. * finally netvsc_subchan_work() hangs forever.
  2070. *
  2071. * The rtnl lock also needs to be held before rndis_filter_device_add()
  2072. * which advertises nvsp_2_vsc_capability / sriov bit, and triggers
  2073. * VF NIC offering and registering. If VF NIC finished register_netdev()
  2074. * earlier it may cause name based config failure.
  2075. */
  2076. rtnl_lock();
  2077. nvdev = rndis_filter_device_add(dev, device_info);
  2078. if (IS_ERR(nvdev)) {
  2079. ret = PTR_ERR(nvdev);
  2080. netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
  2081. goto rndis_failed;
  2082. }
  2083. eth_hw_addr_set(net, device_info->mac_adr);
  2084. if (nvdev->num_chn > 1)
  2085. schedule_work(&nvdev->subchan_work);
  2086. /* hw_features computed in rndis_netdev_set_hwcaps() */
  2087. net->features = net->hw_features |
  2088. NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX |
  2089. NETIF_F_HW_VLAN_CTAG_RX;
  2090. net->vlan_features = net->features;
  2091. netdev_lockdep_set_classes(net);
  2092. net->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
  2093. NETDEV_XDP_ACT_NDO_XMIT;
  2094. /* MTU range: 68 - 1500 or 65521 */
  2095. net->min_mtu = NETVSC_MTU_MIN;
  2096. if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
  2097. net->max_mtu = NETVSC_MTU - ETH_HLEN;
  2098. else
  2099. net->max_mtu = ETH_DATA_LEN;
  2100. nvdev->tx_disable = false;
  2101. ret = register_netdevice(net);
  2102. if (ret != 0) {
  2103. pr_err("Unable to register netdev.\n");
  2104. goto register_failed;
  2105. }
  2106. list_add(&net_device_ctx->list, &netvsc_dev_list);
  2107. /* When the hv_netvsc driver is unloaded and reloaded, the
  2108. * NET_DEVICE_REGISTER for the vf device is replayed before probe
  2109. * is complete. This is because register_netdevice_notifier() gets
  2110. * registered before vmbus_driver_register() so that callback func
  2111. * is set before probe and we don't miss events like NETDEV_POST_INIT
  2112. * So, in this section we try to register the matching vf device that
  2113. * is present as a netdevice, knowing that its register call is not
  2114. * processed in the netvsc_netdev_notifier(as probing is progress and
  2115. * get_netvsc_byslot fails).
  2116. */
  2117. for_each_netdev(dev_net(net), vf_netdev) {
  2118. ret = check_dev_is_matching_vf(vf_netdev);
  2119. if (ret != 0)
  2120. continue;
  2121. if (net != get_netvsc_byslot(vf_netdev))
  2122. continue;
  2123. netvsc_prepare_bonding(vf_netdev);
  2124. netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
  2125. __netvsc_vf_setup(net, vf_netdev);
  2126. break;
  2127. }
  2128. rtnl_unlock();
  2129. netvsc_devinfo_put(device_info);
  2130. return 0;
  2131. register_failed:
  2132. rndis_filter_device_remove(dev, nvdev);
  2133. rndis_failed:
  2134. rtnl_unlock();
  2135. netvsc_devinfo_put(device_info);
  2136. devinfo_failed:
  2137. free_percpu(net_device_ctx->vf_stats);
  2138. no_stats:
  2139. hv_set_drvdata(dev, NULL);
  2140. free_netdev(net);
  2141. no_net:
  2142. return ret;
  2143. }
  2144. static void netvsc_remove(struct hv_device *dev)
  2145. {
  2146. struct net_device_context *ndev_ctx;
  2147. struct net_device *vf_netdev, *net;
  2148. struct netvsc_device *nvdev;
  2149. net = hv_get_drvdata(dev);
  2150. if (net == NULL) {
  2151. dev_err(&dev->device, "No net device to remove\n");
  2152. return;
  2153. }
  2154. ndev_ctx = netdev_priv(net);
  2155. cancel_delayed_work_sync(&ndev_ctx->dwork);
  2156. rtnl_lock();
  2157. cancel_delayed_work_sync(&ndev_ctx->vfns_work);
  2158. nvdev = rtnl_dereference(ndev_ctx->nvdev);
  2159. if (nvdev) {
  2160. cancel_work_sync(&nvdev->subchan_work);
  2161. netvsc_xdp_set(net, NULL, NULL, nvdev);
  2162. }
  2163. /*
  2164. * Call to the vsc driver to let it know that the device is being
  2165. * removed. Also blocks mtu and channel changes.
  2166. */
  2167. vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  2168. if (vf_netdev)
  2169. netvsc_unregister_vf(vf_netdev);
  2170. if (nvdev)
  2171. rndis_filter_device_remove(dev, nvdev);
  2172. unregister_netdevice(net);
  2173. list_del(&ndev_ctx->list);
  2174. rtnl_unlock();
  2175. hv_set_drvdata(dev, NULL);
  2176. free_percpu(ndev_ctx->vf_stats);
  2177. free_netdev(net);
  2178. }
  2179. static int netvsc_suspend(struct hv_device *dev)
  2180. {
  2181. struct net_device_context *ndev_ctx;
  2182. struct netvsc_device *nvdev;
  2183. struct net_device *net;
  2184. int ret;
  2185. net = hv_get_drvdata(dev);
  2186. ndev_ctx = netdev_priv(net);
  2187. cancel_delayed_work_sync(&ndev_ctx->dwork);
  2188. rtnl_lock();
  2189. cancel_delayed_work_sync(&ndev_ctx->vfns_work);
  2190. nvdev = rtnl_dereference(ndev_ctx->nvdev);
  2191. if (nvdev == NULL) {
  2192. ret = -ENODEV;
  2193. goto out;
  2194. }
  2195. /* Save the current config info */
  2196. ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
  2197. if (!ndev_ctx->saved_netvsc_dev_info) {
  2198. ret = -ENOMEM;
  2199. goto out;
  2200. }
  2201. ret = netvsc_detach(net, nvdev);
  2202. out:
  2203. rtnl_unlock();
  2204. return ret;
  2205. }
  2206. static int netvsc_resume(struct hv_device *dev)
  2207. {
  2208. struct net_device *net = hv_get_drvdata(dev);
  2209. struct net_device_context *net_device_ctx;
  2210. struct netvsc_device_info *device_info;
  2211. int ret;
  2212. rtnl_lock();
  2213. net_device_ctx = netdev_priv(net);
  2214. /* Reset the data path to the netvsc NIC before re-opening the vmbus
  2215. * channel. Later netvsc_netdev_event() will switch the data path to
  2216. * the VF upon the UP or CHANGE event.
  2217. */
  2218. net_device_ctx->data_path_is_vf = false;
  2219. device_info = net_device_ctx->saved_netvsc_dev_info;
  2220. ret = netvsc_attach(net, device_info);
  2221. netvsc_devinfo_put(device_info);
  2222. net_device_ctx->saved_netvsc_dev_info = NULL;
  2223. rtnl_unlock();
  2224. return ret;
  2225. }
  2226. static const struct hv_vmbus_device_id id_table[] = {
  2227. /* Network guid */
  2228. { HV_NIC_GUID, },
  2229. { },
  2230. };
  2231. MODULE_DEVICE_TABLE(vmbus, id_table);
  2232. /* The one and only one */
  2233. static struct hv_driver netvsc_drv = {
  2234. .name = KBUILD_MODNAME,
  2235. .id_table = id_table,
  2236. .probe = netvsc_probe,
  2237. .remove = netvsc_remove,
  2238. .suspend = netvsc_suspend,
  2239. .resume = netvsc_resume,
  2240. .driver = {
  2241. .probe_type = PROBE_FORCE_SYNCHRONOUS,
  2242. },
  2243. };
  2244. /* Set VF's namespace same as the synthetic NIC */
  2245. static void netvsc_event_set_vf_ns(struct net_device *ndev)
  2246. {
  2247. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  2248. struct net_device *vf_netdev;
  2249. int ret;
  2250. vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  2251. if (!vf_netdev)
  2252. return;
  2253. if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) {
  2254. ret = dev_change_net_namespace(vf_netdev, dev_net(ndev),
  2255. "eth%d");
  2256. if (ret)
  2257. netdev_err(vf_netdev,
  2258. "Cannot move to same namespace as %s: %d\n",
  2259. ndev->name, ret);
  2260. else
  2261. netdev_info(vf_netdev,
  2262. "Moved VF to namespace with: %s\n",
  2263. ndev->name);
  2264. }
  2265. }
  2266. void netvsc_vfns_work(struct work_struct *w)
  2267. {
  2268. struct net_device_context *ndev_ctx =
  2269. container_of(w, struct net_device_context, vfns_work.work);
  2270. struct net_device *ndev;
  2271. if (!rtnl_trylock()) {
  2272. schedule_delayed_work(&ndev_ctx->vfns_work, 1);
  2273. return;
  2274. }
  2275. ndev = hv_get_drvdata(ndev_ctx->device_ctx);
  2276. if (!ndev)
  2277. goto out;
  2278. netvsc_event_set_vf_ns(ndev);
  2279. out:
  2280. rtnl_unlock();
  2281. }
  2282. /*
  2283. * On Hyper-V, every VF interface is matched with a corresponding
  2284. * synthetic interface. The synthetic interface is presented first
  2285. * to the guest. When the corresponding VF instance is registered,
  2286. * we will take care of switching the data path.
  2287. */
  2288. static int netvsc_netdev_event(struct notifier_block *this,
  2289. unsigned long event, void *ptr)
  2290. {
  2291. struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
  2292. struct net_device_context *ndev_ctx;
  2293. int ret = 0;
  2294. if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) {
  2295. ndev_ctx = netdev_priv(event_dev);
  2296. schedule_delayed_work(&ndev_ctx->vfns_work, 0);
  2297. return NOTIFY_DONE;
  2298. }
  2299. ret = check_dev_is_matching_vf(event_dev);
  2300. if (ret != 0)
  2301. return NOTIFY_DONE;
  2302. switch (event) {
  2303. case NETDEV_POST_INIT:
  2304. return netvsc_prepare_bonding(event_dev);
  2305. case NETDEV_REGISTER:
  2306. return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
  2307. case NETDEV_UNREGISTER:
  2308. return netvsc_unregister_vf(event_dev);
  2309. case NETDEV_UP:
  2310. case NETDEV_DOWN:
  2311. case NETDEV_CHANGE:
  2312. case NETDEV_GOING_DOWN:
  2313. return netvsc_vf_changed(event_dev, event);
  2314. default:
  2315. return NOTIFY_DONE;
  2316. }
  2317. }
  2318. static struct notifier_block netvsc_netdev_notifier = {
  2319. .notifier_call = netvsc_netdev_event,
  2320. };
  2321. static void __exit netvsc_drv_exit(void)
  2322. {
  2323. unregister_netdevice_notifier(&netvsc_netdev_notifier);
  2324. vmbus_driver_unregister(&netvsc_drv);
  2325. }
  2326. static int __init netvsc_drv_init(void)
  2327. {
  2328. int ret;
  2329. if (ring_size < RING_SIZE_MIN) {
  2330. ring_size = RING_SIZE_MIN;
  2331. pr_info("Increased ring_size to %u (min allowed)\n",
  2332. ring_size);
  2333. }
  2334. netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
  2335. register_netdevice_notifier(&netvsc_netdev_notifier);
  2336. ret = vmbus_driver_register(&netvsc_drv);
  2337. if (ret)
  2338. goto err_vmbus_reg;
  2339. return 0;
  2340. err_vmbus_reg:
  2341. unregister_netdevice_notifier(&netvsc_netdev_notifier);
  2342. return ret;
  2343. }
  2344. MODULE_LICENSE("GPL");
  2345. MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
  2346. module_init(netvsc_drv_init);
  2347. module_exit(netvsc_drv_exit);